Compare commits
143 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2c4f3ea196 | |||
| cb12ee4b2d | |||
| a57781f8a9 | |||
| 6749e335a3 | |||
| 53814b39c3 | |||
| efd71e8914 | |||
| bc2ba1356e | |||
| e0b3fa6eb3 | |||
| 929245ba69 | |||
| 73a3de5798 | |||
| 3a26076194 | |||
| 04d3a2e2be | |||
| 70882abe9b | |||
| 2771d404a3 | |||
| 7150715e19 | |||
| 4eab358ff7 | |||
| f96db81d3b | |||
| 6f436a463e | |||
| 9d61408837 | |||
| ec2ab5bfaf | |||
| 82c2035823 | |||
| 2e509422ef | |||
| 3ac2125140 | |||
| 7dea33303a | |||
| d246f9a278 | |||
| c1e93aa331 | |||
| 8b49012a0a | |||
| 3fc715ddf5 | |||
| 9c90b3a597 | |||
| 22b0d6dc1a | |||
| 5dc232a6e2 | |||
| c25f9d1d36 | |||
| d617858896 | |||
| 2d587c5662 | |||
| caf0f30eab | |||
| 70d53d8b75 | |||
| fbdca64f73 | |||
| 07b7cf6fe4 | |||
| c52cd48e25 | |||
| d3f62c6913 | |||
| c769be344a | |||
| 372e9a18cd | |||
| b5c6d9ac08 | |||
| f6f25b9449 | |||
| e77f1ed5f7 | |||
| 4c61fb6cf6 | |||
| 1264fab156 | |||
| 4e2c66a098 | |||
| eb51fb6f50 | |||
| 4a2fa77c15 | |||
| 9896e43db5 | |||
| d08c2a016a | |||
| 0e2873a77d | |||
| 280dd4513a | |||
| bb694bad42 | |||
| 9e30ef224d | |||
| a7cd254c29 | |||
| 4d58e48cdb | |||
| bec2250d2c | |||
| e02a7e5e1c | |||
| 5ce5fe3181 | |||
| 531efe7208 | |||
| 2a474bcf72 | |||
| 6dbbf20ff4 | |||
| 5aa4727f34 | |||
| 4cc18877c6 | |||
| 3fde8c153d | |||
| 3462b097e2 | |||
| 552e9c7881 | |||
| 18cd1e5c72 | |||
| 0ce12a9241 | |||
| 56b79f12ac | |||
| 3d2f146460 | |||
| 2e3f576298 | |||
| 2ea7cf287e | |||
| ba9964ff0d | |||
| 2fdefca570 | |||
| 48be2e0e4d | |||
| 87d9239009 | |||
| c3a09f7835 | |||
| 6c26727bb3 | |||
| 5edb346c75 | |||
| f722ec723f | |||
| be0728cacc | |||
| 975e13091e | |||
| 32aea113f0 | |||
| 1c76689b28 | |||
| 24c7ce0fb8 | |||
| b4afc6546e | |||
| 127b56a61a | |||
| 4ead464f97 | |||
| 3bbe980115 | |||
| a9db0e2c74 | |||
| c6a992e3e3 | |||
| 9514ddbee2 | |||
| 59088228f6 | |||
| 5672772dab | |||
| b9b6e034d5 | |||
| 1566d71726 | |||
| f7441f9c42 | |||
| c42edd8055 | |||
| 8ad34db551 | |||
| c6a380eb6c | |||
| 8f92327891 | |||
| fc7e04e9ed | |||
| 3ce1cf2bb7 | |||
| 1a7bb988fc | |||
| 2a352f96ee | |||
| 31a0100104 | |||
| 0cc1a1d2d9 | |||
| c634c07bcc | |||
| 33a3cf5322 | |||
| b4b118c201 | |||
| 351fdcc6e6 | |||
| 971cfaa38c | |||
| 024a8e3ee9 | |||
| 1d27be0ff3 | |||
| 4d2df86281 | |||
| 57a61057f5 | |||
| 419910ee21 | |||
| fee88105f9 | |||
| 27506cc02d | |||
| 88f5186d35 | |||
| eeb747de25 | |||
| 6fc1989a5d | |||
| b6c6f650ee | |||
| 6f1a5f8597 | |||
| 9d793e8e58 | |||
| cebd480818 | |||
| c547392fd4 | |||
| ce26785187 | |||
| c29b4f55d9 | |||
| ef43938e2b | |||
| ca192cfb77 | |||
| 5af4b73f87 | |||
| 12842d32ce | |||
| 9ff98daf71 | |||
| a8a05c8ea7 | |||
| b4ba42550c | |||
| 6f3a020e62 | |||
| edb2d91057 | |||
| 42c4288411 | |||
| 258965663c |
@@ -27,9 +27,9 @@ on:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets its
|
||||
# own SHA-tagged image; :main and :latest are guarded separately by the
|
||||
# move-main and move-latest jobs. PR runs reuse a PR-scoped group with
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets
|
||||
# its own :main or release-tagged image. :latest is guarded separately
|
||||
# by the move-latest job. PR runs reuse a PR-scoped group with
|
||||
# cancel-in-progress: true so rapid pushes to the same PR collapse to the
|
||||
# latest commit.
|
||||
concurrency:
|
||||
@@ -92,10 +92,10 @@ jobs:
|
||||
# pattern for multi-runner multi-platform builds.
|
||||
#
|
||||
# We apply the OCI revision label here (and again on arm64) because
|
||||
# the move-main / move-latest jobs read it off the linux/amd64
|
||||
# sub-manifest config of the floating tag to decide whether it's safe
|
||||
# to advance. The label must be on each per-arch image — manifest
|
||||
# lists themselves don't carry image config labels.
|
||||
# the move-latest job reads it off the linux/amd64 sub-manifest
|
||||
# config of the floating tag to decide whether it's safe to advance.
|
||||
# The label must be on each per-arch image — manifest lists themselves
|
||||
# don't carry image config labels.
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
@@ -208,8 +208,14 @@ jobs:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stitch both per-arch digests into a single tagged multi-arch manifest.
|
||||
# This is a registry-side operation — no building, no layer re-push —
|
||||
# so it runs in ~30 seconds. On main pushes it produces :sha-<sha>.
|
||||
# On releases it produces :<release_tag_name>.
|
||||
# so it runs in ~30 seconds. On main pushes it produces :main; on
|
||||
# releases it produces :<release_tag_name>.
|
||||
#
|
||||
# For main pushes the ancestor check runs BEFORE the manifest push so
|
||||
# we never overwrite :main with an older commit. The top-level
|
||||
# concurrency group (`docker-${{ github.ref }}` with
|
||||
# `cancel-in-progress: false`) already serialises runs per ref; the
|
||||
# ancestor check is defense-in-depth.
|
||||
# ---------------------------------------------------------------------------
|
||||
merge:
|
||||
if: github.repository == 'NousResearch/hermes-agent' && (github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release')
|
||||
@@ -217,10 +223,15 @@ jobs:
|
||||
needs: [build-amd64, build-arm64]
|
||||
timeout-minutes: 10
|
||||
outputs:
|
||||
pushed_sha_tag: ${{ steps.mark_pushed.outputs.pushed }}
|
||||
pushed_release_tag: ${{ steps.mark_release_pushed.outputs.pushed }}
|
||||
release_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
@@ -237,120 +248,19 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Compute the tag for this run. Main pushes use sha-<sha> (so every
|
||||
# commit gets its own immutable tag); releases use the release tag name.
|
||||
- name: Compute tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=sha-${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Build the arg array from each digest file (filename = the digest
|
||||
# hex, with no sha256: prefix; empty file content, only the name
|
||||
# matters). Using an array avoids shellcheck SC2046 and keeps
|
||||
# every digest a single argv token even under pathological names.
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
docker buildx imagetools create \
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
# Signal to move-main that the SHA tag is live. Only on main pushes;
|
||||
# releases set pushed_release_tag instead.
|
||||
- name: Mark SHA tag pushed
|
||||
id: mark_pushed
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Signal to move-latest that the release tag is live.
|
||||
- name: Mark release tag pushed
|
||||
id: mark_release_pushed
|
||||
if: github.event_name == 'release'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :main to point at the SHA tag the merge job pushed.
|
||||
#
|
||||
# :main is the floating tag that tracks the tip of the main branch. Every
|
||||
# merge to main retags :main forward. Users who want "latest dev build"
|
||||
# pull :main; users who want stable releases pull :latest.
|
||||
#
|
||||
# The real serialization guarantee comes from the top-level concurrency
|
||||
# group (`docker-${{ github.ref }}` with `cancel-in-progress: false`),
|
||||
# which ensures at most one workflow run for this ref executes at a time.
|
||||
# That means two move-main steps for the same ref cannot overlap.
|
||||
#
|
||||
# This job has its own concurrency group as defense-in-depth: if the
|
||||
# top-level group is ever loosened, queued move-mains will run serially
|
||||
# in arrival order, each one running the ancestor check below and either
|
||||
# advancing :main or skipping. `cancel-in-progress: false` matches the
|
||||
# top-level setting — we don't want rapid pushes to cancel a queued
|
||||
# move-main, because the ancestor check is the real safety mechanism
|
||||
# and queueing is cheap (move-main is a ~30s registry op).
|
||||
#
|
||||
# Combined with the ancestor check, this means :main only ever moves
|
||||
# forward in git history.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-main:
|
||||
if: |
|
||||
github.repository == 'NousResearch/hermes-agent'
|
||||
&& github.event_name == 'push'
|
||||
&& github.ref == 'refs/heads/main'
|
||||
&& needs.merge.outputs.pushed_sha_tag == 'true'
|
||||
needs: merge
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
concurrency:
|
||||
group: docker-move-main-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Read the git revision label off the current :main manifest, then
|
||||
# use `git merge-base --is-ancestor` to check whether our commit is a
|
||||
# descendant of it. If :main doesn't exist yet, or its label is
|
||||
# missing, we treat that as "safe to publish". If another run already
|
||||
# advanced :main past us (or diverged), we skip and leave it alone.
|
||||
# use `git merge-base --is-ancestor` to check whether our commit is
|
||||
# a descendant of it. If :main doesn't exist yet, or its label is
|
||||
# missing, we treat that as "safe to publish". If another run
|
||||
# already advanced :main past us (or diverged), we skip and leave
|
||||
# it alone.
|
||||
- name: Decide whether to move :main
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
id: main_check
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
|
||||
# Pull the JSON for the linux/amd64 sub-manifest's config and extract
|
||||
# the OCI revision label with jq — Go template field access can't
|
||||
# handle dots in map keys, so using json+jq is the robust route.
|
||||
image_json=$(
|
||||
docker buildx imagetools inspect "${image}:main" \
|
||||
--format '{{ json (index .Image "linux/amd64") }}' \
|
||||
@@ -383,7 +293,6 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Make sure we have the :main commit locally for merge-base.
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
git fetch --no-tags --prune origin \
|
||||
"+refs/heads/main:refs/remotes/origin/main" \
|
||||
@@ -396,7 +305,6 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Our SHA must be a descendant of the current :main to be safe.
|
||||
if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then
|
||||
echo "Our commit is a descendant of :main — safe to advance."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
@@ -405,19 +313,48 @@ jobs:
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Retag the already-pushed SHA manifest as :main. This is a registry-
|
||||
# side operation — no rebuild, no layer re-push — so it's quick and
|
||||
# atomic per-tag. The ancestor check above plus the cancel-in-progress
|
||||
# concurrency on this job together guarantee we only ever move :main
|
||||
# forward in git history.
|
||||
- name: Move :main to this SHA
|
||||
if: steps.main_check.outputs.push_main == 'true'
|
||||
# Compute the tag for this run. Main pushes tag directly as :main
|
||||
# (no per-commit SHA tags); releases use the release tag name.
|
||||
- name: Compute tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=main" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Gate the manifest push on the ancestor check for main pushes.
|
||||
# For releases there is no gate — the check doesn't even run.
|
||||
- name: Create manifest list and push
|
||||
if: github.event_name != 'push' || steps.main_check.outputs.push_main == 'true'
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
docker buildx imagetools create \
|
||||
--tag "${image}:main" \
|
||||
"${image}:sha-${GITHUB_SHA}"
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
- name: Inspect image
|
||||
if: github.event_name != 'push' || steps.main_check.outputs.push_main == 'true'
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
# Signal to move-latest that the release tag is live.
|
||||
- name: Mark release tag pushed
|
||||
id: mark_release_pushed
|
||||
if: github.event_name == 'release'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :latest to point at the release tag the merge job pushed.
|
||||
@@ -427,10 +364,10 @@ jobs:
|
||||
#
|
||||
# We still run an ancestor check against the existing :latest so that a
|
||||
# backport release on an older branch (e.g. patching v1.1.5 after v1.2.3
|
||||
# is out) doesn't drag :latest backwards. The check is the same shape as
|
||||
# move-main: read the OCI revision label off the current :latest, look up
|
||||
# that commit in git, and only advance if our release commit is a strict
|
||||
# descendant.
|
||||
# is out) doesn't drag :latest backwards. The check is the same shape
|
||||
# as the ancestor check in the merge job for :main: read the OCI
|
||||
# revision label off the current :latest, look up that commit in git,
|
||||
# and only advance if our release commit is a strict descendant.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-latest:
|
||||
if: |
|
||||
|
||||
@@ -23,13 +23,24 @@ concurrency:
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
|
||||
rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
@@ -44,9 +55,26 @@ jobs:
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Run tests
|
||||
# Per-file isolation via scripts/run_tests_parallel.py: discovers
|
||||
# every test_*.py file under tests/ (excluding integration/ + e2e/),
|
||||
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
|
||||
# with bounded parallelism. No xdist, no shared workers, no
|
||||
# module-level state leakage between files.
|
||||
#
|
||||
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
|
||||
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
|
||||
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
|
||||
# every file a fresh interpreter — the only isolation boundary
|
||||
# that matters in practice (cross-file leakage was the original
|
||||
# flake source; intra-file is the test author's responsibility).
|
||||
#
|
||||
# Why drop xdist entirely: xdist's persistent workers accumulate
|
||||
# state across files, which is exactly the leakage we wanted to
|
||||
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
|
||||
# the job with cleaner semantics.
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto --timeout=30 --timeout-method=signal
|
||||
python scripts/run_tests_parallel.py
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
@@ -60,8 +88,19 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
|
||||
rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
@@ -18,6 +18,7 @@ __pycache__/web_tools.cpython-310.pyc
|
||||
logs/
|
||||
data/
|
||||
.pytest_cache/
|
||||
.pytest-cache/
|
||||
tmp/
|
||||
temp_vision_images/
|
||||
hermes-*/*
|
||||
|
||||
@@ -1013,17 +1013,39 @@ def profile_env(tmp_path, monkeypatch):
|
||||
|
||||
**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
|
||||
hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
|
||||
4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
|
||||
developer machine with API keys set diverges from CI in ways that have caused
|
||||
multiple "works locally, fails in CI" incidents (and the reverse).
|
||||
`-n auto` xdist workers, in-tree subprocess-isolation plugin). Direct `pytest`
|
||||
on a 16+ core developer machine with API keys set diverges from CI in ways
|
||||
that have caused multiple "works locally, fails in CI" incidents (and the reverse).
|
||||
|
||||
```bash
|
||||
scripts/run_tests.sh # full suite, CI-parity
|
||||
scripts/run_tests.sh tests/gateway/ # one directory
|
||||
scripts/run_tests.sh tests/agent/test_foo.py::test_x # one test
|
||||
scripts/run_tests.sh -v --tb=long # pass-through pytest flags
|
||||
scripts/run_tests.sh --no-isolate tests/foo/ # disable subprocess isolation (faster, for debugging)
|
||||
```
|
||||
|
||||
### Subprocess-per-test isolation
|
||||
|
||||
Every test runs in a freshly-spawned Python subprocess via the in-tree plugin
|
||||
at `tests/_isolate_plugin.py`. This means module-level dicts/sets and
|
||||
ContextVars from one test cannot leak into the next — the historic
|
||||
`_reset_module_state` autouse fixture is gone.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- The plugin uses `multiprocessing.get_context("spawn")`, which works on
|
||||
Linux, macOS, and Windows alike (POSIX `fork` is not used).
|
||||
- Per-test overhead is ~0.5–1.0s (Python startup + pytest collection). xdist
|
||||
parallelism amortizes this across cores; on a 20-core box the full suite
|
||||
finishes in roughly the same wall time as before, but flake-free.
|
||||
- `isolate_timeout` (configured in `pyproject.toml`) caps each test at 30s.
|
||||
Hangs are killed and surfaced as a failure report.
|
||||
- Pass `--no-isolate` to disable isolation — useful when debugging a single
|
||||
test interactively, or when you specifically want to verify state leakage.
|
||||
- The plugin disables itself in child processes (sentinel envvar
|
||||
`HERMES_ISOLATE_CHILD=1`), so there's no fork-bomb risk.
|
||||
|
||||
### Why the wrapper (and why the old "just call pytest" doesn't work)
|
||||
|
||||
Five real sources of local-vs-CI drift the script closes:
|
||||
@@ -1034,7 +1056,7 @@ Five real sources of local-vs-CI drift the script closes:
|
||||
| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
|
||||
| Timezone | Local TZ (PDT etc.) | UTC |
|
||||
| Locale | Whatever is set | C.UTF-8 |
|
||||
| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
|
||||
| xdist workers | `-n auto` = all cores | `-n auto` (safe — subprocess isolation prevents cross-worker flakes) |
|
||||
|
||||
`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
|
||||
invocation (including IDE integrations) gets hermetic behavior — but the wrapper
|
||||
@@ -1042,15 +1064,21 @@ is belt-and-suspenders.
|
||||
|
||||
### Running without the wrapper (only if you must)
|
||||
|
||||
If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
If you can't use the wrapper (e.g. inside an IDE that shells pytest directly),
|
||||
at minimum activate the venv. The isolation plugin loads automatically from
|
||||
`addopts` in `pyproject.toml`, so you get the same per-test process isolation
|
||||
either way.
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
Worker count above 4 will surface test-ordering flakes that CI never sees.
|
||||
If you need to bypass isolation for fast feedback while debugging:
|
||||
|
||||
```bash
|
||||
python -m pytest tests/agent/test_foo.py -q --no-isolate
|
||||
```
|
||||
|
||||
Always run the full suite before pushing changes.
|
||||
|
||||
|
||||
+2
-2
@@ -210,7 +210,7 @@ hermes-agent/
|
||||
| `~/.hermes/skills/` | All active skills (bundled + hub-installed + agent-created) |
|
||||
| `~/.hermes/memories/` | Persistent memory (MEMORY.md, USER.md) |
|
||||
| `~/.hermes/state.db` | SQLite session database |
|
||||
| `~/.hermes/sessions/` | JSON session logs |
|
||||
| `~/.hermes/sessions/` | Gateway routing index (`sessions.json`), request-dump breadcrumbs, gateway `*.jsonl` transcripts, and (optionally) per-session JSON snapshots when `sessions.write_json_snapshots: true` is set. The per-session snapshots are off by default; state.db is canonical. |
|
||||
| `~/.hermes/cron/` | Scheduled job data |
|
||||
| `~/.hermes/whatsapp/session/` | WhatsApp bridge credentials |
|
||||
|
||||
@@ -239,7 +239,7 @@ User message → AIAgent._run_agent_loop()
|
||||
|
||||
- **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules.
|
||||
- **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform.
|
||||
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. JSON logs go to `~/.hermes/sessions/`.
|
||||
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. Per-session JSON snapshots in `~/.hermes/sessions/` were superseded by the SQLite store and are off by default; opt back in with `sessions.write_json_snapshots: true` if you have external tooling that consumes the JSON files directly.
|
||||
- **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs.
|
||||
- **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint).
|
||||
- **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests.
|
||||
|
||||
+106
-3
@@ -71,6 +71,71 @@ def _ra():
|
||||
return run_agent
|
||||
|
||||
|
||||
def _normalized_custom_base_url(value: Any) -> str:
|
||||
if not isinstance(value, str):
|
||||
return ""
|
||||
return value.strip().rstrip("/")
|
||||
|
||||
|
||||
def _custom_provider_model_matches(agent_model: str, entry: Dict[str, Any]) -> bool:
|
||||
provider_model = str(entry.get("model", "") or "").strip().lower()
|
||||
if not provider_model:
|
||||
return True
|
||||
return provider_model == str(agent_model or "").strip().lower()
|
||||
|
||||
|
||||
def _custom_provider_extra_body_for_agent(
|
||||
*,
|
||||
provider: str,
|
||||
model: str,
|
||||
base_url: str,
|
||||
custom_providers: List[Dict[str, Any]],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
if (provider or "").strip().lower() != "custom":
|
||||
return None
|
||||
|
||||
target_url = _normalized_custom_base_url(base_url)
|
||||
if not target_url:
|
||||
return None
|
||||
|
||||
fallback: Optional[Dict[str, Any]] = None
|
||||
for entry in custom_providers or []:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if _normalized_custom_base_url(entry.get("base_url")) != target_url:
|
||||
continue
|
||||
extra_body = entry.get("extra_body")
|
||||
if not isinstance(extra_body, dict) or not extra_body:
|
||||
continue
|
||||
provider_model = str(entry.get("model", "") or "").strip()
|
||||
if provider_model:
|
||||
if _custom_provider_model_matches(model, entry):
|
||||
return dict(extra_body)
|
||||
elif fallback is None:
|
||||
fallback = dict(extra_body)
|
||||
|
||||
return fallback
|
||||
|
||||
|
||||
def _merge_custom_provider_extra_body(agent, custom_providers: List[Dict[str, Any]]) -> None:
|
||||
extra_body = _custom_provider_extra_body_for_agent(
|
||||
provider=agent.provider,
|
||||
model=agent.model,
|
||||
base_url=agent.base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if not extra_body:
|
||||
return
|
||||
|
||||
overrides = dict(getattr(agent, "request_overrides", {}) or {})
|
||||
merged_extra_body = dict(extra_body)
|
||||
existing_extra_body = overrides.get("extra_body")
|
||||
if isinstance(existing_extra_body, dict):
|
||||
merged_extra_body.update(existing_extra_body)
|
||||
overrides["extra_body"] = merged_extra_body
|
||||
agent.request_overrides = overrides
|
||||
|
||||
|
||||
def init_agent(
|
||||
agent,
|
||||
base_url: str = None,
|
||||
@@ -901,7 +966,19 @@ def init_agent(
|
||||
hermes_home = get_hermes_home()
|
||||
agent.logs_dir = hermes_home / "sessions"
|
||||
agent.logs_dir.mkdir(parents=True, exist_ok=True)
|
||||
agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
|
||||
# Per-session JSON snapshot writer (~/.hermes/sessions/session_{sid}.json)
|
||||
# is opt-in via sessions.write_json_snapshots (default False). state.db
|
||||
# is canonical — the snapshot is only useful for external tooling that
|
||||
# reads the JSON files directly. See run_agent._save_session_log.
|
||||
agent._session_json_enabled = False
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_sess_cfg
|
||||
_sess_cfg = (_load_sess_cfg().get("sessions") or {})
|
||||
agent._session_json_enabled = bool(_sess_cfg.get("write_json_snapshots", False))
|
||||
except Exception:
|
||||
pass
|
||||
# logs_dir is retained unconditionally for request_dump_*.json (debug
|
||||
# breadcrumb path written by agent_runtime_helpers.dump_api_request_debug).
|
||||
|
||||
# Track conversation messages for session logging
|
||||
agent._session_messages: List[Dict[str, Any]] = []
|
||||
@@ -1048,7 +1125,18 @@ def init_agent(
|
||||
# through _ra().get_tool_definitions()). Duplicate function names cause
|
||||
# 400 errors on providers that enforce unique names (e.g. Xiaomi
|
||||
# MiMo via Nous Portal).
|
||||
if agent._memory_manager and agent.tools is not None:
|
||||
#
|
||||
# Respect the platform's enabled_toolsets configuration (#5544):
|
||||
# enabled_toolsets is None → no filter, inject (backward compat)
|
||||
# "memory" in enabled_toolsets → user opted in, inject
|
||||
# otherwise (incl. []) → user excluded memory, skip injection
|
||||
#
|
||||
# Without this gate, `platform_toolsets: telegram: []` still leaks memory
|
||||
# provider tools (fact_store, etc.) into the tool surface — a 10x latency
|
||||
# penalty on local models and a frequent trigger of tool-call loops.
|
||||
if agent._memory_manager and agent.tools is not None and (
|
||||
agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
|
||||
):
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in agent.tools
|
||||
@@ -1201,6 +1289,7 @@ def init_agent(
|
||||
# Store for reuse by _check_compression_model_feasibility (auxiliary
|
||||
# compression model context-length detection needs the same list).
|
||||
agent._custom_providers = _custom_providers
|
||||
_merge_custom_provider_extra_body(agent, _custom_providers)
|
||||
|
||||
# Check custom_providers per-model context_length
|
||||
if _config_context_length is None and _custom_providers:
|
||||
@@ -1357,8 +1446,22 @@ def init_agent(
|
||||
# errors. Even with the cache fix, dedup is the right defense
|
||||
# against plugin paths that may register the same schemas via
|
||||
# ctx.register_tool(). Mirrors the memory tools dedup above.
|
||||
#
|
||||
# Respect the platform's enabled_toolsets configuration (#5544):
|
||||
# context engine tools follow the same gating pattern as memory
|
||||
# provider tools — without the gate, `platform_toolsets: telegram: []`
|
||||
# would still leak lcm_* tools into the tool surface and incur the
|
||||
# same local-model latency penalty.
|
||||
agent._context_engine_tool_names: set = set()
|
||||
if hasattr(agent, "context_compressor") and agent.context_compressor and agent.tools is not None:
|
||||
if (
|
||||
hasattr(agent, "context_compressor")
|
||||
and agent.context_compressor
|
||||
and agent.tools is not None
|
||||
and (
|
||||
agent.enabled_toolsets is None
|
||||
or "context_engine" in agent.enabled_toolsets
|
||||
)
|
||||
):
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in agent.tools
|
||||
|
||||
@@ -1869,6 +1869,77 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No
|
||||
|
||||
|
||||
|
||||
def _iter_pool_sockets(client: Any):
|
||||
"""Yield raw sockets reachable from an OpenAI/httpx client pool.
|
||||
|
||||
httpcore 1.x stores the concrete HTTP11/HTTP2 connection under
|
||||
``conn._connection``; older versions exposed stream attributes directly
|
||||
on the pool entry. Keep the traversal defensive because these are private
|
||||
transport internals and vary across httpx/httpcore releases.
|
||||
"""
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
seen: set[int] = set()
|
||||
for conn in list(connections):
|
||||
candidates = [conn]
|
||||
inner = getattr(conn, "_connection", None)
|
||||
if inner is not None:
|
||||
candidates.append(inner)
|
||||
for candidate in candidates:
|
||||
stream = (
|
||||
getattr(candidate, "_network_stream", None)
|
||||
or getattr(candidate, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
get_extra_info = getattr(stream, "get_extra_info", None)
|
||||
if callable(get_extra_info):
|
||||
try:
|
||||
sock = get_extra_info("socket")
|
||||
except Exception:
|
||||
sock = None
|
||||
if sock is None:
|
||||
wrapped = getattr(stream, "stream", None)
|
||||
if wrapped is not None:
|
||||
sock = getattr(wrapped, "_sock", None)
|
||||
if sock is None:
|
||||
# anyio-backed streams expose the raw socket through
|
||||
# SocketAttribute.raw_socket when available.
|
||||
wrapped = getattr(stream, "_stream", None)
|
||||
extra = getattr(wrapped, "extra", None)
|
||||
if callable(extra):
|
||||
try:
|
||||
from anyio.abc import SocketAttribute
|
||||
sock = extra(SocketAttribute.raw_socket)
|
||||
except Exception:
|
||||
sock = None
|
||||
if sock is None:
|
||||
continue
|
||||
marker = id(sock)
|
||||
if marker in seen:
|
||||
continue
|
||||
seen.add(marker)
|
||||
yield sock
|
||||
|
||||
|
||||
def cleanup_dead_connections(agent) -> bool:
|
||||
"""Detect and clean up dead TCP connections on the primary client.
|
||||
|
||||
@@ -1882,36 +1953,8 @@ def cleanup_dead_connections(agent) -> bool:
|
||||
if client is None:
|
||||
return False
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return False
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return False
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return False
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
dead_count = 0
|
||||
for conn in list(connections):
|
||||
# Check for connections that are idle but have closed sockets
|
||||
stream = (
|
||||
getattr(conn, "_network_stream", None)
|
||||
or getattr(conn, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
sock = getattr(stream, "stream", None)
|
||||
if sock is not None:
|
||||
sock = getattr(sock, "_sock", None)
|
||||
if sock is None:
|
||||
continue
|
||||
for sock in _iter_pool_sockets(client):
|
||||
# Probe socket health with a non-blocking recv peek
|
||||
import socket as _socket
|
||||
try:
|
||||
@@ -2087,36 +2130,7 @@ def force_close_tcp_sockets(client: Any) -> int:
|
||||
|
||||
closed = 0
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return 0
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return 0
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return 0
|
||||
# httpx uses httpcore connection pools; connections live in
|
||||
# _connections (list) or _pool (list) depending on version.
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
for conn in list(connections):
|
||||
stream = (
|
||||
getattr(conn, "_network_stream", None)
|
||||
or getattr(conn, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
sock = getattr(stream, "stream", None)
|
||||
if sock is not None:
|
||||
sock = getattr(sock, "_sock", None)
|
||||
if sock is None:
|
||||
continue
|
||||
for sock in _iter_pool_sockets(client):
|
||||
try:
|
||||
sock.shutdown(_socket.SHUT_RDWR)
|
||||
except OSError:
|
||||
@@ -2154,5 +2168,6 @@ __all__ = [
|
||||
"cleanup_dead_connections",
|
||||
"extract_api_error_context",
|
||||
"apply_pending_steer_to_tool_results",
|
||||
"_iter_pool_sockets",
|
||||
"force_close_tcp_sockets",
|
||||
]
|
||||
|
||||
+254
-230
@@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
|
||||
return out
|
||||
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert an assistant message to Anthropic content blocks.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
Handles thinking blocks, regular content, tool calls, and
|
||||
reasoning_content injection for Kimi/DeepSeek endpoints.
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
result.append({"role": "assistant", "content": effective})
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
# Sanitize tool_use_id and ensure non-empty content.
|
||||
# Computer-use (and other multimodal) tool results arrive as
|
||||
# either a list of OpenAI-style content parts, or a dict
|
||||
# marked `_multimodal` with an embedded `content` list. Convert
|
||||
# both into Anthropic `tool_result` inner blocks (text + image).
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
)
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
continue
|
||||
|
||||
# Regular user message — validate non-empty content (Anthropic rejects empty)
|
||||
content = m.get("content", "")
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
# Check if all text blocks are empty
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
result.append({"role": "user", "content": converted_blocks})
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
# Validate string content is non-empty
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
result.append({"role": "user", "content": content})
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
return {"role": "assistant", "content": effective}
|
||||
|
||||
|
||||
def _convert_tool_message_to_result(
|
||||
result: List[Dict[str, Any]], m: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Convert a tool message to an Anthropic tool_result, merging consecutive
|
||||
results into one user message.
|
||||
|
||||
Mutates ``result`` in place — either appends a new user message or extends
|
||||
the trailing user message's tool_result list.
|
||||
"""
|
||||
content = m.get("content", "")
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
)
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
|
||||
|
||||
def _convert_user_message(content: Any) -> Dict[str, Any]:
|
||||
"""Validate and convert a user message to anthropic format."""
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
return {"role": "user", "content": converted_blocks}
|
||||
else:
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
return {"role": "user", "content": content}
|
||||
|
||||
|
||||
def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
"""Strip tool_use blocks with no matching tool_result, and vice versa.
|
||||
|
||||
Context compression or session truncation can remove either side of a
|
||||
tool-call pair. Anthropic rejects both orphans with HTTP 400.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||||
tool_result_ids = set()
|
||||
for m in result:
|
||||
@@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||||
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them).
|
||||
# This is the mirror of the above: context compression or session truncation
|
||||
# can remove an assistant message containing a tool_use while leaving the
|
||||
# subsequent tool_result intact. Anthropic rejects these with a 400.
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them)
|
||||
tool_use_ids = set()
|
||||
for m in result:
|
||||
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||||
@@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||||
|
||||
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
||||
|
||||
def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Merge consecutive same-role messages to enforce Anthropic alternation.
|
||||
|
||||
Returns a new list (caller must rebind ``result``).
|
||||
"""
|
||||
fixed = []
|
||||
for m in result:
|
||||
if fixed and fixed[-1]["role"] == m["role"]:
|
||||
if m["role"] == "user":
|
||||
# Merge consecutive user messages
|
||||
prev_content = fixed[-1]["content"]
|
||||
curr_content = m["content"]
|
||||
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
||||
@@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
|
||||
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
||||
fixed[-1]["content"] = prev_content + curr_content
|
||||
else:
|
||||
# Mixed types — wrap string in list
|
||||
if isinstance(prev_content, str):
|
||||
prev_content = [{"type": "text", "text": prev_content}]
|
||||
if isinstance(curr_content, str):
|
||||
@@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
|
||||
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||||
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||||
else:
|
||||
# Mixed types — normalize both to list and merge
|
||||
if isinstance(prev_blocks, str):
|
||||
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
||||
if isinstance(curr_blocks, str):
|
||||
@@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
|
||||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||||
else:
|
||||
fixed.append(m)
|
||||
result = fixed
|
||||
return fixed
|
||||
|
||||
# ── Thinking block signature management ──────────────────────────
|
||||
# Anthropic signs thinking blocks against the full turn content.
|
||||
# Any upstream mutation (context compression, session truncation,
|
||||
# orphan stripping, message merging) invalidates the signature,
|
||||
# causing HTTP 400 "Invalid signature in thinking block".
|
||||
#
|
||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
# thinking blocks if it supports extended thinking.
|
||||
#
|
||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||||
# EXCEPT the last one — preserves reasoning continuity on the
|
||||
# current tool-use chain while avoiding stale signature errors.
|
||||
# 2. Downgrade unsigned thinking blocks (no signature) to text —
|
||||
# Anthropic can't validate them and will reject them.
|
||||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
||||
# cache markers can interfere with signature validation.
|
||||
|
||||
def _manage_thinking_signatures(
|
||||
result: List[Dict[str, Any]], base_url: str | None, model: str | None
|
||||
) -> None:
|
||||
"""Strip or preserve thinking blocks based on endpoint type.
|
||||
|
||||
Anthropic signs thinking blocks against the full turn content.
|
||||
Any upstream mutation (context compression, session truncation, orphan
|
||||
stripping, message merging) invalidates the signature, causing HTTP 400
|
||||
"Invalid signature in thinking block".
|
||||
|
||||
Signatures are Anthropic-proprietary. Third-party endpoints (MiniMax,
|
||||
Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
|
||||
and will reject them outright. Kimi's /coding and DeepSeek's /anthropic
|
||||
endpoints speak the Anthropic protocol upstream but require unsigned
|
||||
thinking blocks (synthesised from ``reasoning_content``) to round-trip on
|
||||
replayed assistant tool-call messages. See hermes-agent#13848 (Kimi) and
|
||||
hermes-agent#16748 (DeepSeek).
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
||||
# Kimi / DeepSeek share a contract: strip signed Anthropic blocks
|
||||
# (neither upstream can validate Anthropic signatures), preserve unsigned
|
||||
# ones synthesised from reasoning_content. See #13848, #16748.
|
||||
_preserve_unsigned_thinking = (
|
||||
_is_kimi_family_endpoint(base_url, model)
|
||||
or _is_deepseek_anthropic_endpoint(base_url)
|
||||
@@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
|
||||
continue
|
||||
|
||||
if _preserve_unsigned_thinking:
|
||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
||||
# thinking server-side and require unsigned thinking blocks on
|
||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
||||
# blocks (neither upstream can validate Anthropic signatures) but
|
||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
||||
# Kimi / DeepSeek: strip signed, preserve unsigned.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Anthropic-signed block — upstream can't validate, strip
|
||||
# Signed (or redacted-with-data) — upstream can't validate, strip.
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: the upstream needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Third-party: strip ALL thinking blocks (signatures are proprietary).
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
stripped = [
|
||||
b for b in m["content"]
|
||||
@@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
|
||||
]
|
||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||
else:
|
||||
# Latest assistant on direct Anthropic: keep signed thinking
|
||||
# blocks for reasoning continuity; downgrade unsigned ones to
|
||||
# plain text.
|
||||
# Latest assistant on direct Anthropic: keep signed, downgrade unsigned
|
||||
# to text so the reasoning isn't lost.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("type") == "redacted_thinking":
|
||||
# Redacted blocks use 'data' for the signature payload
|
||||
# Redacted blocks use 'data' for the signature payload —
|
||||
# drop the block when 'data' is missing (can't be validated).
|
||||
if b.get("data"):
|
||||
new_content.append(b)
|
||||
# else: drop — no data means it can't be validated
|
||||
elif b.get("signature"):
|
||||
# Signed thinking block — keep it
|
||||
new_content.append(b)
|
||||
else:
|
||||
# Unsigned thinking — downgrade to text so it's not lost
|
||||
thinking_text = b.get("thinking", "")
|
||||
if thinking_text:
|
||||
new_content.append({"type": "text", "text": thinking_text})
|
||||
@@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
|
||||
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
||||
b.pop("cache_control", None)
|
||||
|
||||
# ── Image eviction: keep only the most recent N screenshots ─────
|
||||
# computer_use screenshots (base64 images) sit inside tool_result
|
||||
# blocks: they accumulate and are sent with every API call. Each
|
||||
# costs ~1,465 tokens; after 10+ the conversation becomes slow
|
||||
# even for simple text queries. Walk backward, keep the most recent
|
||||
# _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
|
||||
|
||||
def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
|
||||
"""Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
|
||||
|
||||
Base64 images cost ~1,465 tokens each and accumulate across tool calls.
|
||||
Walk backward, keep the most recent N, replace older ones with a placeholder.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
_MAX_KEEP_IMAGES = 3
|
||||
_image_count = 0
|
||||
for msg in reversed(result):
|
||||
@@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
|
||||
for b in inner
|
||||
]
|
||||
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
system = None
|
||||
result: List[Dict[str, Any]] = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
result.append(_convert_assistant_message(m))
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
_convert_tool_message_to_result(result, m)
|
||||
continue
|
||||
|
||||
# Regular user message
|
||||
result.append(_convert_user_message(content))
|
||||
|
||||
_strip_orphaned_tool_blocks(result)
|
||||
result = _merge_consecutive_roles(result)
|
||||
_manage_thinking_signatures(result, base_url, model)
|
||||
_evict_old_screenshots(result)
|
||||
|
||||
return system, result
|
||||
|
||||
|
||||
|
||||
@@ -390,6 +390,9 @@ def _run_review_in_thread(
|
||||
# parent below so memory(action="add") writes from
|
||||
# the review still land on disk; the review just
|
||||
# has zero side effects on external providers.
|
||||
# Match parent's toolset config so ``tools[]`` is byte-identical
|
||||
# in the request body — Anthropic's cache key includes it.
|
||||
# (The runtime whitelist below still restricts dispatch.)
|
||||
review_agent = AIAgent(
|
||||
model=agent.model,
|
||||
max_iterations=16,
|
||||
@@ -401,6 +404,8 @@ def _run_review_in_thread(
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(agent, "_credential_pool", None),
|
||||
parent_session_id=agent.session_id,
|
||||
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
|
||||
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
|
||||
skip_memory=True,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
|
||||
@@ -92,17 +92,36 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
"""
|
||||
result = {"response": None, "error": None}
|
||||
request_client_holder = {"client": None}
|
||||
request_client_lock = threading.Lock()
|
||||
|
||||
def _set_request_client(client):
|
||||
with request_client_lock:
|
||||
request_client_holder["client"] = client
|
||||
return client
|
||||
|
||||
def _take_request_client():
|
||||
with request_client_lock:
|
||||
client = request_client_holder.get("client")
|
||||
request_client_holder["client"] = None
|
||||
return client
|
||||
|
||||
def _close_request_client_once(reason: str) -> None:
|
||||
request_client = _take_request_client()
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason=reason)
|
||||
|
||||
def _call():
|
||||
try:
|
||||
if agent.api_mode == "codex_responses":
|
||||
request_client_holder["client"] = agent._create_request_openai_client(
|
||||
reason="codex_stream_request",
|
||||
api_kwargs=api_kwargs,
|
||||
request_client = _set_request_client(
|
||||
agent._create_request_openai_client(
|
||||
reason="codex_stream_request",
|
||||
api_kwargs=api_kwargs,
|
||||
)
|
||||
)
|
||||
result["response"] = agent._run_codex_stream(
|
||||
api_kwargs,
|
||||
client=request_client_holder["client"],
|
||||
client=request_client,
|
||||
on_first_delta=getattr(agent, "_codex_on_first_delta", None),
|
||||
)
|
||||
elif agent.api_mode == "anthropic_messages":
|
||||
@@ -131,17 +150,17 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
raise
|
||||
result["response"] = normalize_converse_response(raw_response)
|
||||
else:
|
||||
request_client_holder["client"] = agent._create_request_openai_client(
|
||||
reason="chat_completion_request",
|
||||
api_kwargs=api_kwargs,
|
||||
request_client = _set_request_client(
|
||||
agent._create_request_openai_client(
|
||||
reason="chat_completion_request",
|
||||
api_kwargs=api_kwargs,
|
||||
)
|
||||
)
|
||||
result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
|
||||
result["response"] = request_client.chat.completions.create(**api_kwargs)
|
||||
except Exception as e:
|
||||
result["error"] = e
|
||||
finally:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="request_complete")
|
||||
_close_request_client_once("request_complete")
|
||||
|
||||
# ── Stale-call timeout (mirrors streaming stale detector) ────────
|
||||
# Non-streaming calls return nothing until the full response is
|
||||
@@ -192,9 +211,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
else:
|
||||
rc = request_client_holder.get("client")
|
||||
if rc is not None:
|
||||
agent._close_request_openai_client(rc, reason="stale_call_kill")
|
||||
_close_request_client_once("stale_call_kill")
|
||||
except Exception:
|
||||
pass
|
||||
agent._touch_activity(
|
||||
@@ -218,9 +235,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
else:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="interrupt_abort")
|
||||
_close_request_client_once("interrupt_abort")
|
||||
except Exception:
|
||||
pass
|
||||
raise InterruptedError("Agent interrupted during API call")
|
||||
@@ -1257,6 +1272,24 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
|
||||
result = {"response": None, "error": None, "partial_tool_names": []}
|
||||
request_client_holder = {"client": None, "diag": None}
|
||||
request_client_lock = threading.Lock()
|
||||
|
||||
def _set_request_client(client):
|
||||
with request_client_lock:
|
||||
request_client_holder["client"] = client
|
||||
return client
|
||||
|
||||
def _take_request_client():
|
||||
with request_client_lock:
|
||||
client = request_client_holder.get("client")
|
||||
request_client_holder["client"] = None
|
||||
return client
|
||||
|
||||
def _close_request_client_once(reason: str) -> None:
|
||||
request_client = _take_request_client()
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason=reason)
|
||||
|
||||
first_delta_fired = {"done": False}
|
||||
deltas_were_sent = {"yes": False} # Track if any deltas were fired (for fallback)
|
||||
# Wall-clock timestamp of the last real streaming chunk. The outer
|
||||
@@ -1313,9 +1346,11 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
pool=_conn_cap,
|
||||
),
|
||||
}
|
||||
request_client_holder["client"] = agent._create_request_openai_client(
|
||||
reason="chat_completion_stream_request",
|
||||
api_kwargs=stream_kwargs,
|
||||
request_client = _set_request_client(
|
||||
agent._create_request_openai_client(
|
||||
reason="chat_completion_stream_request",
|
||||
api_kwargs=stream_kwargs,
|
||||
)
|
||||
)
|
||||
# Reset stale-stream timer so the detector measures from this
|
||||
# attempt's start, not a previous attempt's last chunk.
|
||||
@@ -1326,7 +1361,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
# ``request_client_holder["diag"]`` for closure access.
|
||||
_diag = agent._stream_diag_init()
|
||||
request_client_holder["diag"] = _diag
|
||||
stream = request_client_holder["client"].chat.completions.create(**stream_kwargs)
|
||||
stream = request_client.chat.completions.create(**stream_kwargs)
|
||||
|
||||
# Capture rate limit headers from the initial HTTP response.
|
||||
# The OpenAI SDK Stream object exposes the underlying httpx
|
||||
@@ -1765,12 +1800,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
mid_tool_call=True,
|
||||
diag=request_client_holder.get("diag"),
|
||||
)
|
||||
stale = request_client_holder.get("client")
|
||||
if stale is not None:
|
||||
agent._close_request_openai_client(
|
||||
stale, reason="stream_mid_tool_retry_cleanup"
|
||||
)
|
||||
request_client_holder["client"] = None
|
||||
_close_request_client_once("stream_mid_tool_retry_cleanup")
|
||||
try:
|
||||
agent._replace_primary_openai_client(
|
||||
reason="stream_mid_tool_retry_pool_cleanup"
|
||||
@@ -1821,12 +1851,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
diag=request_client_holder.get("diag"),
|
||||
)
|
||||
# Close the stale request client before retry
|
||||
stale = request_client_holder.get("client")
|
||||
if stale is not None:
|
||||
agent._close_request_openai_client(
|
||||
stale, reason="stream_retry_cleanup"
|
||||
)
|
||||
request_client_holder["client"] = None
|
||||
_close_request_client_once("stream_retry_cleanup")
|
||||
# Also rebuild the primary client to purge
|
||||
# any dead connections from the pool.
|
||||
try:
|
||||
@@ -1894,9 +1919,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
result["error"] = e
|
||||
return
|
||||
finally:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="stream_request_complete")
|
||||
_close_request_client_once("stream_request_complete")
|
||||
|
||||
# Provider-configured stale timeout takes priority over env default.
|
||||
_cfg_stale = get_provider_stale_timeout(agent.provider, agent.model)
|
||||
@@ -1966,9 +1989,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
f"Reconnecting..."
|
||||
)
|
||||
try:
|
||||
rc = request_client_holder.get("client")
|
||||
if rc is not None:
|
||||
agent._close_request_openai_client(rc, reason="stale_stream_kill")
|
||||
_close_request_client_once("stale_stream_kill")
|
||||
except Exception:
|
||||
pass
|
||||
# Rebuild the primary client too — its connection pool
|
||||
@@ -1990,9 +2011,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
else:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="stream_interrupt_abort")
|
||||
_close_request_client_once("stream_interrupt_abort")
|
||||
except Exception:
|
||||
pass
|
||||
raise InterruptedError("Agent interrupted during streaming API call")
|
||||
|
||||
@@ -251,13 +251,16 @@ def _chat_messages_to_responses_input(
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items.
|
||||
|
||||
``is_xai_responses=True`` strips ``encrypted_content`` from replayed
|
||||
reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface
|
||||
rejects encrypted reasoning blobs minted by prior turns: the request
|
||||
streams an ``error`` SSE frame before ``response.created`` and the
|
||||
OpenAI SDK collapses it into a generic stream-ordering error. Native
|
||||
Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
|
||||
— keep the default off.
|
||||
``is_xai_responses`` is kept for transport signature compatibility but
|
||||
no longer suppresses encrypted reasoning replay. Earlier (PR #26644,
|
||||
May 2026) we believed xAI's OAuth/SuperGrok ``/v1/responses`` surface
|
||||
rejected replayed ``encrypted_content`` reasoning items minted by
|
||||
prior turns, and we stripped them. That decision was wrong — xAI
|
||||
explicitly relies on Hermes threading encrypted reasoning back across
|
||||
turns for cross-turn coherence (the whole point of their partnership
|
||||
integration). We now replay encrypted reasoning on every Responses
|
||||
transport (xAI, native Codex, custom relays) and let xAI tell us
|
||||
explicitly if a specific surface ever rejects a payload.
|
||||
"""
|
||||
items: List[Dict[str, Any]] = []
|
||||
seen_item_ids: set = set()
|
||||
@@ -284,17 +287,12 @@ def _chat_messages_to_responses_input(
|
||||
if role == "assistant":
|
||||
# Replay encrypted reasoning items from previous turns
|
||||
# so the API can maintain coherent reasoning chains.
|
||||
#
|
||||
# xAI OAuth (SuperGrok/Premium) rejects replayed
|
||||
# ``encrypted_content`` reasoning items minted by prior
|
||||
# turns — see _chat_messages_to_responses_input docstring.
|
||||
# When ``is_xai_responses`` is set we drop the replay
|
||||
# entirely; Grok still reasons on each turn server-side,
|
||||
# we just don't try to thread the prior turn's encrypted
|
||||
# blob back in.
|
||||
# This applies to every Responses transport including
|
||||
# xAI — see _chat_messages_to_responses_input docstring
|
||||
# for the May 2026 reversal of the earlier xAI gate.
|
||||
codex_reasoning = msg.get("codex_reasoning_items")
|
||||
has_codex_reasoning = False
|
||||
if isinstance(codex_reasoning, list) and not is_xai_responses:
|
||||
if isinstance(codex_reasoning, list):
|
||||
for ri in codex_reasoning:
|
||||
if isinstance(ri, dict) and ri.get("encrypted_content"):
|
||||
item_id = ri.get("id")
|
||||
|
||||
@@ -387,8 +387,6 @@ def compress_context(
|
||||
_SESSION_ID.set(agent.session_id)
|
||||
except Exception:
|
||||
pass
|
||||
# Update session_log_file to point to the new session's JSON file
|
||||
agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
|
||||
agent._session_db_created = False
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
|
||||
@@ -46,6 +46,7 @@ from agent.message_sanitization import (
|
||||
_strip_non_ascii,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
estimate_messages_tokens_rough,
|
||||
estimate_request_tokens_rough,
|
||||
get_next_probe_tier,
|
||||
@@ -73,6 +74,50 @@ from utils import base_url_host_matches, env_var_enabled
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
|
||||
"""Return a user-facing error when Ollama is loaded with too little context."""
|
||||
if not getattr(agent, "tools", None):
|
||||
return None
|
||||
|
||||
runtime_ctx = getattr(agent, "_ollama_num_ctx", None)
|
||||
if not isinstance(runtime_ctx, int) or runtime_ctx <= 0:
|
||||
return None
|
||||
if runtime_ctx >= MINIMUM_CONTEXT_LENGTH:
|
||||
return None
|
||||
|
||||
model = getattr(agent, "model", "") or "the selected model"
|
||||
base_url = getattr(agent, "base_url", "") or "unknown base URL"
|
||||
provider = getattr(agent, "provider", "") or "unknown"
|
||||
tool_count = len(getattr(agent, "tools", None) or [])
|
||||
|
||||
logger.warning(
|
||||
"Ollama runtime context too small for Hermes tool use: "
|
||||
"model=%s provider=%s base_url=%s runtime_context=%d "
|
||||
"minimum_context=%d estimated_request_tokens=%d tool_count=%d "
|
||||
"session=%s",
|
||||
model,
|
||||
provider,
|
||||
base_url,
|
||||
runtime_ctx,
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
request_tokens,
|
||||
tool_count,
|
||||
getattr(agent, "session_id", None) or "none",
|
||||
)
|
||||
|
||||
return (
|
||||
f"Ollama loaded `{model}` with only {runtime_ctx:,} tokens of runtime "
|
||||
f"context, but Hermes needs at least {MINIMUM_CONTEXT_LENGTH:,} tokens "
|
||||
"for reliable tool use.\n\n"
|
||||
"Increase the Ollama context for this model and restart/reload the "
|
||||
"model before trying again. A known-good starting point is 65,536 "
|
||||
"tokens. In Hermes config, set `model.ollama_num_ctx: 65536` "
|
||||
"(and `model.context_length: 65536` if you also override the displayed "
|
||||
"model context). If you manage the model through an Ollama Modelfile, "
|
||||
"set `PARAMETER num_ctx 65536` there instead."
|
||||
)
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to ``run_agent`` so callers can patch
|
||||
``run_agent.handle_function_call`` / ``run_agent._set_interrupt`` /
|
||||
@@ -527,6 +572,7 @@ def run_conversation(
|
||||
api_call_count = 0
|
||||
final_response = None
|
||||
interrupted = False
|
||||
failed = False
|
||||
codex_ack_continuations = 0
|
||||
length_continue_retries = 0
|
||||
truncated_tool_call_retries = 0
|
||||
@@ -883,6 +929,26 @@ def run_conversation(
|
||||
# Calculate approximate request size for logging
|
||||
total_chars = sum(len(str(msg)) for msg in api_messages)
|
||||
approx_tokens = estimate_messages_tokens_rough(api_messages)
|
||||
approx_request_tokens = estimate_request_tokens_rough(
|
||||
api_messages, tools=agent.tools or None
|
||||
)
|
||||
|
||||
_runtime_context_error = _ollama_context_limit_error(
|
||||
agent, approx_request_tokens
|
||||
)
|
||||
if _runtime_context_error:
|
||||
final_response = _runtime_context_error
|
||||
failed = True
|
||||
_turn_exit_reason = "ollama_runtime_context_too_small"
|
||||
messages.append({"role": "assistant", "content": final_response})
|
||||
agent._emit_status("❌ Ollama runtime context is too small for Hermes tool use")
|
||||
api_call_count -= 1
|
||||
agent._api_call_count = api_call_count
|
||||
try:
|
||||
agent.iteration_budget.refund()
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
|
||||
# Thinking spinner for quiet mode (animated during API call)
|
||||
thinking_spinner = None
|
||||
@@ -923,6 +989,7 @@ def run_conversation(
|
||||
copilot_auth_retry_attempted=False
|
||||
thinking_sig_retry_attempted = False
|
||||
image_shrink_retry_attempted = False
|
||||
multimodal_tool_content_retry_attempted = False
|
||||
oauth_1m_beta_retry_attempted = False
|
||||
llama_cpp_grammar_retry_attempted = False
|
||||
has_retried_429 = False
|
||||
@@ -1454,7 +1521,6 @@ def run_conversation(
|
||||
}
|
||||
messages.append(continue_msg)
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
restart_with_length_continuation = True
|
||||
break
|
||||
|
||||
@@ -1995,6 +2061,31 @@ def run_conversation(
|
||||
"or shrink didn't reduce size; surfacing original error."
|
||||
)
|
||||
|
||||
# Multimodal-tool-content recovery: providers that follow
|
||||
# the OpenAI spec strictly (tool message content must be a
|
||||
# string) reject our list-type content with a 400. Strip
|
||||
# image parts from any list-type tool messages, mark the
|
||||
# (provider, model) as no-list-tool-content for the rest
|
||||
# of this session so future tool results preemptively
|
||||
# downgrade, and retry once. See issue #27344.
|
||||
if (
|
||||
classified.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
and not multimodal_tool_content_retry_attempted
|
||||
):
|
||||
multimodal_tool_content_retry_attempted = True
|
||||
if agent._try_strip_image_parts_from_tool_messages(api_messages):
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}📐 Provider rejected list-type tool content — "
|
||||
f"downgraded screenshots to text and retrying...",
|
||||
force=True,
|
||||
)
|
||||
continue
|
||||
else:
|
||||
logger.info(
|
||||
"multimodal-tool-content recovery: no list-type tool "
|
||||
"messages with image parts found; surfacing original error."
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejected the 1M-context beta
|
||||
# header ("long context beta is not yet available for this
|
||||
# subscription"). Disable the beta for the rest of this
|
||||
@@ -3086,7 +3177,6 @@ def run_conversation(
|
||||
if not agent.quiet_mode:
|
||||
agent._vprint(f"{agent.log_prefix}↻ Codex response incomplete; continuing turn ({agent._codex_incomplete_retries}/3)")
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
continue
|
||||
|
||||
agent._codex_incomplete_retries = 0
|
||||
@@ -3411,7 +3501,6 @@ def run_conversation(
|
||||
|
||||
# Save session log incrementally (so progress is visible even if interrupted)
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
|
||||
# Continue loop for next response
|
||||
continue
|
||||
@@ -3578,7 +3667,6 @@ def run_conversation(
|
||||
interim_msg["_thinking_prefill"] = True
|
||||
messages.append(interim_msg)
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
continue
|
||||
|
||||
# ── Empty response retry ──────────────────────
|
||||
@@ -3712,7 +3800,6 @@ def run_conversation(
|
||||
}
|
||||
messages.append(continue_msg)
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
continue
|
||||
|
||||
codex_ack_continuations = 0
|
||||
@@ -3853,7 +3940,11 @@ def run_conversation(
|
||||
)
|
||||
|
||||
# Determine if conversation completed successfully
|
||||
completed = final_response is not None and api_call_count < agent.max_iterations
|
||||
completed = (
|
||||
final_response is not None
|
||||
and api_call_count < agent.max_iterations
|
||||
and not failed
|
||||
)
|
||||
|
||||
# Save trajectory if enabled. ``user_message`` may be a multimodal
|
||||
# list of parts; the trajectory format wants a plain string.
|
||||
@@ -4003,6 +4094,7 @@ def run_conversation(
|
||||
"api_calls": api_call_count,
|
||||
"completed": completed,
|
||||
"turn_exit_reason": _turn_exit_reason,
|
||||
"failed": failed,
|
||||
"partial": False, # True only when stopped due to invalid tool calls
|
||||
"interrupted": interrupted,
|
||||
"response_previewed": getattr(agent, "_response_was_previewed", False),
|
||||
|
||||
@@ -50,6 +50,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -176,7 +177,9 @@ def get_keep() -> int:
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
return sum(
|
||||
1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ class FailoverReason(enum.Enum):
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
multimodal_tool_content_unsupported = "multimodal_tool_content_unsupported" # Provider rejected list-type content in tool messages (e.g. Xiaomi MiMo) — downgrade to text and retry
|
||||
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
@@ -165,6 +166,32 @@ _IMAGE_TOO_LARGE_PATTERNS = [
|
||||
# the likely culprit; we still try the shrink path before giving up.
|
||||
]
|
||||
|
||||
# Providers that follow the OpenAI spec strictly require tool message
|
||||
# ``content`` to be a string. Some (Anthropic native, Codex Responses,
|
||||
# Gemini native, first-party OpenAI) extend this to accept a content-parts
|
||||
# list (text + image_url) so screenshots from computer_use survive. Others
|
||||
# (Xiaomi MiMo, some Alibaba endpoints, a long tail of OpenAI-compatible
|
||||
# providers) reject the list with a 400 — the patterns below are the most
|
||||
# common error shapes we see. Recovery: strip image parts from tool
|
||||
# messages in-place, record the (provider, model) for the rest of the
|
||||
# session so we don't waste another call learning the same lesson, retry.
|
||||
#
|
||||
# See: https://github.com/NousResearch/hermes-agent/issues/27344
|
||||
_MULTIMODAL_TOOL_CONTENT_PATTERNS = [
|
||||
# Xiaomi MiMo: {"error":{"code":"400","message":"Param Incorrect","param":"text is not set"}}
|
||||
"text is not set",
|
||||
# Generic "tool message must be string" shapes
|
||||
"tool message content must be a string",
|
||||
"tool content must be a string",
|
||||
"tool message must be a string",
|
||||
# OpenAI-compat servers that reject list-type tool content with a
|
||||
# schema-validation message
|
||||
"expected string, got list",
|
||||
"expected string, got array",
|
||||
# Alibaba/DashScope variant
|
||||
"tool_call.content must be string",
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
@@ -781,6 +808,19 @@ def _classify_400(
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Multimodal tool content rejected from 400. Must be checked BEFORE
|
||||
# image_too_large because the recovery is different (strip image parts
|
||||
# from tool messages, mark the model as no-list-tool-content for the
|
||||
# rest of the session) and BEFORE context_overflow because some of the
|
||||
# patterns ("text is not set") are ambiguous in isolation but become
|
||||
# specific when combined with a 400 on a request known to contain
|
||||
# multimodal tool content.
|
||||
if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.multimodal_tool_content_unsupported,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
|
||||
# Must be checked BEFORE context_overflow because messages can trip both
|
||||
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
|
||||
@@ -922,6 +962,13 @@ def _classify_by_message(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Multimodal tool content patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.multimodal_tool_content_unsupported,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Image-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
@@ -16,9 +16,19 @@ def _hermes_home_path() -> Path:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def _hermes_root_path() -> Path:
|
||||
"""Resolve the Hermes root dir (always the parent of any profile, never per-profile)."""
|
||||
try:
|
||||
from hermes_constants import get_default_hermes_root # local import to avoid cycles
|
||||
return get_default_hermes_root()
|
||||
except Exception:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def build_write_denied_paths(home: str) -> set[str]:
|
||||
"""Return exact sensitive paths that must never be written."""
|
||||
hermes_home = _hermes_home_path()
|
||||
hermes_root = _hermes_root_path()
|
||||
return {
|
||||
os.path.realpath(p)
|
||||
for p in [
|
||||
@@ -26,7 +36,11 @@ def build_write_denied_paths(home: str) -> set[str]:
|
||||
os.path.join(home, ".ssh", "id_rsa"),
|
||||
os.path.join(home, ".ssh", "id_ed25519"),
|
||||
os.path.join(home, ".ssh", "config"),
|
||||
# Active profile .env (or top-level .env when not in profile mode).
|
||||
str(hermes_home / ".env"),
|
||||
# Top-level .env, even when running under a profile — overwriting it
|
||||
# leaks credentials across every profile that inherits from root (#15981).
|
||||
str(hermes_root / ".env"),
|
||||
os.path.join(home, ".bashrc"),
|
||||
os.path.join(home, ".zshrc"),
|
||||
os.path.join(home, ".profile"),
|
||||
|
||||
@@ -59,7 +59,7 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_constants import get_hermes_home, secure_parent_dir
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -491,10 +491,8 @@ def save_credentials(creds: GoogleCredentials) -> Path:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Tighten parent dir to 0o700 so siblings can't traverse to the creds file.
|
||||
# On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures.
|
||||
try:
|
||||
os.chmod(path.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
|
||||
secure_parent_dir(path)
|
||||
payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n"
|
||||
|
||||
with _credentials_lock():
|
||||
|
||||
+93
-3
@@ -46,6 +46,84 @@ logger = logging.getLogger(__name__)
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
# Strict YAML/JSON boolean coercion for capability overrides.
|
||||
#
|
||||
# ``bool("false")`` is True in Python because non-empty strings are truthy, so
|
||||
# a user writing ``supports_vision: "false"`` (quoted — a common YAML mistake)
|
||||
# would silently enable native vision routing on a model that can't actually
|
||||
# handle it. Accept only the values YAML 1.1 / 1.2 treat as booleans, plus
|
||||
# real ``bool`` and integer 0/1. Anything else returns None so the caller
|
||||
# falls through to models.dev rather than honouring garbage.
|
||||
_TRUE_TOKENS = frozenset({"true", "yes", "on", "1"})
|
||||
_FALSE_TOKENS = frozenset({"false", "no", "off", "0"})
|
||||
|
||||
|
||||
def _coerce_capability_bool(raw: Any) -> Optional[bool]:
|
||||
"""Return True/False for recognised boolean values, None otherwise."""
|
||||
if isinstance(raw, bool):
|
||||
return raw
|
||||
if isinstance(raw, int):
|
||||
if raw in (0, 1):
|
||||
return bool(raw)
|
||||
return None
|
||||
if isinstance(raw, str):
|
||||
s = raw.strip().lower()
|
||||
if s in _TRUE_TOKENS:
|
||||
return True
|
||||
if s in _FALSE_TOKENS:
|
||||
return False
|
||||
return None
|
||||
|
||||
|
||||
def _supports_vision_override(
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
provider: str,
|
||||
model: str,
|
||||
) -> Optional[bool]:
|
||||
"""Resolve user-declared vision capability from config.yaml.
|
||||
|
||||
Resolution order, first hit wins:
|
||||
1. ``model.supports_vision`` (top-level shortcut for the active model)
|
||||
2. ``providers.<provider>.models.<model>.supports_vision``
|
||||
(named custom providers — ``provider`` may be the runtime-resolved
|
||||
value ``"custom"`` and/or the user-declared name under
|
||||
``model.provider``; both are tried)
|
||||
|
||||
Returns None when no override is set, so the caller falls through to
|
||||
models.dev. Returns False explicitly only when the user wrote a
|
||||
recognised boolean false token.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return None
|
||||
|
||||
# 1. Top-level shortcut
|
||||
model_cfg_raw = cfg.get("model")
|
||||
model_cfg: Dict[str, Any] = model_cfg_raw if isinstance(model_cfg_raw, dict) else {}
|
||||
top = _coerce_capability_bool(model_cfg.get("supports_vision"))
|
||||
if top is not None:
|
||||
return top
|
||||
|
||||
# 2. Per-provider, per-model. Named custom providers (e.g. "my-vllm")
|
||||
# get rewritten to provider="custom" at runtime
|
||||
# (hermes_cli/runtime_provider.py:_resolve_named_custom_runtime), so the
|
||||
# config still holds the user-declared name under model.provider. Try
|
||||
# both as candidate provider keys.
|
||||
config_provider = str(model_cfg.get("provider") or "").strip()
|
||||
providers_raw = cfg.get("providers")
|
||||
providers_cfg: Dict[str, Any] = providers_raw if isinstance(providers_raw, dict) else {}
|
||||
for p in dict.fromkeys(filter(None, (provider, config_provider))):
|
||||
entry_raw = providers_cfg.get(p)
|
||||
entry: Dict[str, Any] = entry_raw if isinstance(entry_raw, dict) else {}
|
||||
models_raw = entry.get("models")
|
||||
models_cfg: Dict[str, Any] = models_raw if isinstance(models_raw, dict) else {}
|
||||
per_model_raw = models_cfg.get(model)
|
||||
per_model: Dict[str, Any] = per_model_raw if isinstance(per_model_raw, dict) else {}
|
||||
coerced = _coerce_capability_bool(per_model.get("supports_vision"))
|
||||
if coerced is not None:
|
||||
return coerced
|
||||
return None
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
@@ -81,8 +159,20 @@ def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown."""
|
||||
def _lookup_supports_vision(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown.
|
||||
|
||||
Consults the user's ``supports_vision`` override in config.yaml first
|
||||
(so custom/local models declared as vision-capable don't fall through to
|
||||
text routing in ``auto`` mode), then falls back to models.dev.
|
||||
"""
|
||||
override = _supports_vision_override(cfg, provider, model)
|
||||
if override is not None:
|
||||
return override
|
||||
if not provider or not model:
|
||||
return None
|
||||
try:
|
||||
@@ -123,7 +213,7 @@ def decide_image_input_mode(
|
||||
if _explicit_aux_vision_override(cfg):
|
||||
return "text"
|
||||
|
||||
supports = _lookup_supports_vision(provider, model)
|
||||
supports = _lookup_supports_vision(provider, model, cfg)
|
||||
if supports is True:
|
||||
return "native"
|
||||
return "text"
|
||||
|
||||
+38
-1
@@ -1258,6 +1258,10 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
"terminal",
|
||||
"process",
|
||||
"execute_code",
|
||||
"app_search_tools",
|
||||
"app_tool_schemas",
|
||||
"app_execute_tools",
|
||||
"app_manage_connections",
|
||||
}
|
||||
|
||||
if valid_names and not (valid_names & relevant_tool_names):
|
||||
@@ -1279,7 +1283,7 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
|
||||
lines = [
|
||||
"# Nous Subscription",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, browser automation (Browser Use), and app integrations (500+ apps) by default. Modal execution is optional.",
|
||||
"Current capability status:",
|
||||
]
|
||||
lines.extend(_status_line(feature) for feature in features.items())
|
||||
@@ -1294,6 +1298,39 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# App tools (500+ external integrations) behavioural prompt
|
||||
# =========================================================================
|
||||
|
||||
_APP_TOOLS_PROMPT = """\
|
||||
## App Tools (500+ External Integrations)
|
||||
|
||||
You have app_search_tools, app_tool_schemas, app_execute_tools, and app_manage_connections available RIGHT NOW as callable tools. They are already configured and connected to the Nous tool gateway — no SDK installation, no API keys, no plugin setup needed. Just call them.
|
||||
|
||||
**When to use:** When a user asks to interact with ANY external app or service — Gmail, Slack, GitHub, Jira, Notion, Google Sheets, Linear, HubSpot, Figma, Salesforce, or any of 500+ other apps. ALWAYS prefer these tools over loading skills about the same service (e.g. do NOT load the 'linear', 'airtable', 'google-workspace', 'notion', or any similar skill — use app_search_tools instead). Do NOT suggest installing SDKs, CLI tools, MCP servers, or API keys for external services — call app_search_tools directly.
|
||||
|
||||
**Workflow:**
|
||||
1. Call app_search_tools with a clear use_case description to discover available tools
|
||||
2. Check the response for connection status — if no active connection, call app_manage_connections and share the auth link with the user
|
||||
3. Review the execution plan and pitfalls in the search response before executing
|
||||
4. If a tool has schemaRef instead of input_schema, call app_tool_schemas to get the full schema
|
||||
5. Execute tools via app_execute_tools with schema-compliant arguments
|
||||
|
||||
**Session tracking:** Pass session: {generate_id: true} on your first app_search_tools call. Reuse the returned session.id in all subsequent calls. Generate a new session when the user pivots to a different task.
|
||||
|
||||
**Important:** Never fabricate tool slugs or argument field names. Only use slugs and schemas returned by app_search_tools or app_tool_schemas."""
|
||||
|
||||
|
||||
def build_app_tools_prompt(valid_tool_names: "set[str] | None" = None) -> str:
|
||||
"""Return the app tools behavioural guidance when the toolset is active."""
|
||||
if valid_tool_names and "app_search_tools" not in valid_tool_names:
|
||||
return ""
|
||||
if not valid_tool_names:
|
||||
# No tool names known — skip (conservative)
|
||||
return ""
|
||||
return _APP_TOOLS_PROMPT
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# =========================================================================
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
"""External secret source integrations.
|
||||
|
||||
A secret source is anything that can supply environment-variable-shaped
|
||||
credentials at process startup, _after_ ~/.hermes/.env has loaded. By
|
||||
default sources are non-destructive: they only set values for env vars
|
||||
that aren't already present, so .env and shell exports continue to win.
|
||||
|
||||
Currently shipped:
|
||||
|
||||
- ``bitwarden`` — Bitwarden Secrets Manager (`bws` CLI). See
|
||||
``agent.secret_sources.bitwarden`` for the integration and
|
||||
``hermes_cli.secrets_cli`` for the user-facing setup wizard.
|
||||
"""
|
||||
@@ -0,0 +1,515 @@
|
||||
"""Bitwarden Secrets Manager (`bws` CLI) integration.
|
||||
|
||||
Hermes pulls API keys from Bitwarden Secrets Manager at process startup
|
||||
so they don't have to live in plaintext in ``~/.hermes/.env``.
|
||||
|
||||
Design summary
|
||||
--------------
|
||||
|
||||
* The ``bws`` binary is auto-installed into ``<hermes_home>/bin/bws`` on
|
||||
first use. Hermes pins one version (``_BWS_VERSION``) and downloads
|
||||
the matching asset from the official GitHub Releases page, verifying
|
||||
the SHA-256 against the release's published checksum file.
|
||||
* The access token is stored in ``~/.hermes/.env`` as
|
||||
``BWS_ACCESS_TOKEN`` (or whatever name the user picked in
|
||||
``secrets.bitwarden.access_token_env``). This is the one
|
||||
bootstrap secret — every other provider key can live in Bitwarden.
|
||||
* Pulling secrets is a single ``bws secret list <project_id>
|
||||
--output json`` call. We cache the result in-process for
|
||||
``cache_ttl_seconds`` so back-to-back ``hermes`` invocations don't
|
||||
hammer the API.
|
||||
* Failures NEVER block Hermes startup. Missing binary, no network,
|
||||
expired token, etc. all emit a one-line warning and continue with
|
||||
whatever credentials ``.env`` already had.
|
||||
|
||||
The module is intentionally subprocess-driven rather than going through
|
||||
the ``bitwarden-sdk-secrets`` Python package: one cross-platform binary
|
||||
is easier to lazy-install than a wheels-with-Rust-extension dependency.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import stat
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import zipfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Pinned upstream version. Bump in a follow-up PR — never auto-resolve
|
||||
# "latest" because upstream release shape (asset names, CLI flags) is
|
||||
# allowed to change between majors and we want updates to be deliberate.
|
||||
_BWS_VERSION = "2.0.0"
|
||||
|
||||
_BWS_RELEASE_BASE = (
|
||||
f"https://github.com/bitwarden/sdk-sm/releases/download/bws-v{_BWS_VERSION}"
|
||||
)
|
||||
_BWS_CHECKSUM_NAME = f"bws-sha256-checksums-{_BWS_VERSION}.txt"
|
||||
|
||||
# How long to wait for bws subprocesses and HTTP downloads, in seconds.
|
||||
_BWS_DOWNLOAD_TIMEOUT = 60
|
||||
_BWS_RUN_TIMEOUT = 30
|
||||
|
||||
# In-process cache so repeated load_hermes_dotenv() calls (CLI startup,
|
||||
# gateway hot-reload, test suites) don't re-fetch from BSM.
|
||||
_CacheKey = Tuple[str, str] # (access_token_fingerprint, project_id)
|
||||
_CACHE: Dict[_CacheKey, "_CachedFetch"] = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class _CachedFetch:
|
||||
secrets: Dict[str, str]
|
||||
fetched_at: float
|
||||
|
||||
def is_fresh(self, ttl_seconds: float) -> bool:
|
||||
if ttl_seconds <= 0:
|
||||
return False
|
||||
return (time.time() - self.fetched_at) < ttl_seconds
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class FetchResult:
|
||||
"""Outcome of a single BSM pull."""
|
||||
|
||||
secrets: Dict[str, str] = field(default_factory=dict)
|
||||
applied: List[str] = field(default_factory=list) # set into os.environ
|
||||
skipped: List[str] = field(default_factory=list) # already set, not overridden
|
||||
warnings: List[str] = field(default_factory=list) # non-fatal issues
|
||||
error: Optional[str] = None # fatal: nothing was fetched
|
||||
binary_path: Optional[Path] = None
|
||||
|
||||
@property
|
||||
def ok(self) -> bool:
|
||||
return self.error is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Binary discovery + lazy install
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _hermes_bin_dir() -> Path:
|
||||
"""Where Hermes stores its managed binaries. Profile-aware."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
return get_hermes_home() / "bin"
|
||||
|
||||
|
||||
def find_bws(*, install_if_missing: bool = False) -> Optional[Path]:
|
||||
"""Return a path to a usable ``bws`` binary, or None.
|
||||
|
||||
Resolution order:
|
||||
1. ``<hermes_home>/bin/bws`` (our managed copy — preferred)
|
||||
2. ``shutil.which("bws")`` (system PATH)
|
||||
|
||||
When ``install_if_missing`` is True and neither resolves, this calls
|
||||
:func:`install_bws` to download and verify the pinned version.
|
||||
"""
|
||||
managed = _hermes_bin_dir() / _platform_binary_name()
|
||||
if managed.exists() and os.access(managed, os.X_OK):
|
||||
return managed
|
||||
|
||||
system = shutil.which("bws")
|
||||
if system:
|
||||
return Path(system)
|
||||
|
||||
if install_if_missing:
|
||||
try:
|
||||
return install_bws()
|
||||
except Exception as exc: # noqa: BLE001 — never block startup
|
||||
logger.warning("bws auto-install failed: %s", exc)
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _platform_binary_name() -> str:
|
||||
return "bws.exe" if platform.system() == "Windows" else "bws"
|
||||
|
||||
|
||||
def _platform_asset_name() -> str:
|
||||
"""Map (uname, arch, libc) → the upstream asset filename.
|
||||
|
||||
Asset names follow Rust's target triple convention. Linux defaults
|
||||
to gnu (glibc); we switch to musl only if ldd --version says so.
|
||||
"""
|
||||
system = platform.system()
|
||||
machine = platform.machine().lower()
|
||||
|
||||
if system == "Darwin":
|
||||
# Universal binary works on both Intel and Apple Silicon — no
|
||||
# need to pick a per-arch asset.
|
||||
return f"bws-macos-universal-{_BWS_VERSION}.zip"
|
||||
|
||||
if system == "Windows":
|
||||
arch = "aarch64" if machine in ("arm64", "aarch64") else "x86_64"
|
||||
return f"bws-{arch}-pc-windows-msvc-{_BWS_VERSION}.zip"
|
||||
|
||||
if system == "Linux":
|
||||
arch = "aarch64" if machine in ("arm64", "aarch64") else "x86_64"
|
||||
libc = "gnu"
|
||||
# ldd --version writes to stderr on glibc, stdout on musl. We
|
||||
# don't need bullet-proof detection — getting it wrong falls
|
||||
# back to a clear error from the binary loader, which we catch.
|
||||
try:
|
||||
res = subprocess.run(
|
||||
["ldd", "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=2,
|
||||
)
|
||||
if "musl" in (res.stdout + res.stderr).lower():
|
||||
libc = "musl"
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
return f"bws-{arch}-unknown-linux-{libc}-{_BWS_VERSION}.zip"
|
||||
|
||||
raise RuntimeError(
|
||||
f"Unsupported platform for bws auto-install: {system} {machine}"
|
||||
)
|
||||
|
||||
|
||||
def install_bws(*, force: bool = False) -> Path:
|
||||
"""Download, verify, and install the pinned ``bws`` binary.
|
||||
|
||||
Returns the path to the installed executable. Raises on any
|
||||
failure (network, checksum, extraction) — callers in the auto-install
|
||||
path catch these; the user-facing ``hermes secrets bitwarden setup``
|
||||
surface lets them propagate so the wizard can show a clear error.
|
||||
"""
|
||||
bin_dir = _hermes_bin_dir()
|
||||
bin_dir.mkdir(parents=True, exist_ok=True)
|
||||
target = bin_dir / _platform_binary_name()
|
||||
|
||||
if target.exists() and not force:
|
||||
return target
|
||||
|
||||
asset_name = _platform_asset_name()
|
||||
asset_url = f"{_BWS_RELEASE_BASE}/{asset_name}"
|
||||
checksum_url = f"{_BWS_RELEASE_BASE}/{_BWS_CHECKSUM_NAME}"
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="hermes-bws-") as tmpdir:
|
||||
tmp = Path(tmpdir)
|
||||
zip_path = tmp / asset_name
|
||||
checksum_path = tmp / _BWS_CHECKSUM_NAME
|
||||
|
||||
logger.info("Downloading %s", asset_url)
|
||||
_http_download(asset_url, zip_path)
|
||||
_http_download(checksum_url, checksum_path)
|
||||
|
||||
expected = _expected_sha256(checksum_path, asset_name)
|
||||
actual = _sha256_file(zip_path)
|
||||
if expected.lower() != actual.lower():
|
||||
raise RuntimeError(
|
||||
f"Checksum mismatch for {asset_name}: "
|
||||
f"expected {expected}, got {actual}"
|
||||
)
|
||||
|
||||
with zipfile.ZipFile(zip_path) as zf:
|
||||
member = _pick_zip_member(zf, _platform_binary_name())
|
||||
zf.extract(member, tmp)
|
||||
extracted = tmp / member
|
||||
|
||||
# Move into place atomically. We write to a sibling tempfile in
|
||||
# the final directory so the rename can't cross filesystems.
|
||||
fd, staged = tempfile.mkstemp(dir=str(bin_dir), prefix=".bws_")
|
||||
os.close(fd)
|
||||
shutil.copy2(extracted, staged)
|
||||
os.chmod(
|
||||
staged,
|
||||
stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR
|
||||
| stat.S_IRGRP | stat.S_IXGRP
|
||||
| stat.S_IROTH | stat.S_IXOTH,
|
||||
)
|
||||
os.replace(staged, target)
|
||||
|
||||
logger.info("Installed bws %s at %s", _BWS_VERSION, target)
|
||||
return target
|
||||
|
||||
|
||||
def _http_download(url: str, dest: Path) -> None:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "hermes-agent"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=_BWS_DOWNLOAD_TIMEOUT) as resp: # noqa: S310
|
||||
with open(dest, "wb") as f:
|
||||
shutil.copyfileobj(resp, f)
|
||||
except urllib.error.URLError as exc:
|
||||
raise RuntimeError(f"Failed to download {url}: {exc}") from exc
|
||||
|
||||
|
||||
def _expected_sha256(checksum_file: Path, asset_name: str) -> str:
|
||||
"""Parse the upstream ``bws-sha256-checksums-X.Y.Z.txt`` file.
|
||||
|
||||
Format is the standard ``sha256sum`` output: ``<hex> <filename>``,
|
||||
one per line.
|
||||
"""
|
||||
text = checksum_file.read_text(encoding="utf-8", errors="replace")
|
||||
for line in text.splitlines():
|
||||
parts = line.strip().split()
|
||||
if len(parts) >= 2 and parts[-1] == asset_name:
|
||||
return parts[0]
|
||||
raise RuntimeError(
|
||||
f"No checksum entry for {asset_name} in {checksum_file.name}"
|
||||
)
|
||||
|
||||
|
||||
def _sha256_file(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _pick_zip_member(zf: zipfile.ZipFile, binary_name: str) -> str:
|
||||
"""Find the binary inside the upstream zip.
|
||||
|
||||
Historically the archive has been flat (``bws`` at the root) but we
|
||||
tolerate a top-level directory just in case upstream changes.
|
||||
"""
|
||||
candidates = [n for n in zf.namelist() if n.split("/")[-1] == binary_name]
|
||||
if not candidates:
|
||||
raise RuntimeError(
|
||||
f"Could not find {binary_name} inside downloaded archive "
|
||||
f"(members: {zf.namelist()[:5]}...)"
|
||||
)
|
||||
# Prefer the shortest path (i.e. root over nested) for determinism.
|
||||
candidates.sort(key=len)
|
||||
return candidates[0]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Secret fetch + apply
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _token_fingerprint(token: str) -> str:
|
||||
"""SHA-256 prefix used as a cache key — never logged, never displayed."""
|
||||
return hashlib.sha256(token.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def fetch_bitwarden_secrets(
|
||||
*,
|
||||
access_token: str,
|
||||
project_id: str,
|
||||
binary: Optional[Path] = None,
|
||||
cache_ttl_seconds: float = 300,
|
||||
use_cache: bool = True,
|
||||
) -> Tuple[Dict[str, str], List[str]]:
|
||||
"""Pull the secrets for ``project_id`` from Bitwarden Secrets Manager.
|
||||
|
||||
Returns ``(secrets_dict, warnings_list)``.
|
||||
|
||||
Raises :class:`RuntimeError` for fatal conditions (missing binary,
|
||||
auth failure, unparseable output). Callers in the env_loader path
|
||||
catch this and emit a single warning; callers in the user-facing
|
||||
setup wizard let it propagate.
|
||||
"""
|
||||
if not access_token:
|
||||
raise RuntimeError("Bitwarden access token is empty")
|
||||
if not project_id:
|
||||
raise RuntimeError("Bitwarden project_id is empty")
|
||||
|
||||
cache_key = (_token_fingerprint(access_token), project_id)
|
||||
if use_cache:
|
||||
cached = _CACHE.get(cache_key)
|
||||
if cached and cached.is_fresh(cache_ttl_seconds):
|
||||
return cached.secrets, []
|
||||
|
||||
bws = binary or find_bws(install_if_missing=True)
|
||||
if bws is None:
|
||||
raise RuntimeError(
|
||||
"bws binary not available — auto-install failed and `bws` is "
|
||||
"not on PATH. Install manually from "
|
||||
"https://github.com/bitwarden/sdk-sm/releases or re-run "
|
||||
"`hermes secrets bitwarden setup`."
|
||||
)
|
||||
|
||||
secrets, warnings = _run_bws_list(bws, access_token, project_id)
|
||||
_CACHE[cache_key] = _CachedFetch(secrets=secrets, fetched_at=time.time())
|
||||
return secrets, warnings
|
||||
|
||||
|
||||
def _run_bws_list(
|
||||
bws: Path, access_token: str, project_id: str
|
||||
) -> Tuple[Dict[str, str], List[str]]:
|
||||
cmd = [str(bws), "secret", "list", project_id, "--output", "json"]
|
||||
env = os.environ.copy()
|
||||
env["BWS_ACCESS_TOKEN"] = access_token
|
||||
# Make sure we're not echoing telemetry / colour codes into json.
|
||||
env.setdefault("NO_COLOR", "1")
|
||||
|
||||
try:
|
||||
proc = subprocess.run( # noqa: S603 — bws path is trusted
|
||||
cmd,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=_BWS_RUN_TIMEOUT,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise RuntimeError(
|
||||
f"bws timed out after {_BWS_RUN_TIMEOUT}s fetching secrets"
|
||||
) from exc
|
||||
except OSError as exc:
|
||||
raise RuntimeError(f"failed to invoke bws: {exc}") from exc
|
||||
|
||||
if proc.returncode != 0:
|
||||
# bws writes auth/network errors to stderr in plain English.
|
||||
# Strip ANSI just in case and surface the first 200 chars.
|
||||
err = (proc.stderr or proc.stdout or "").strip().replace("\x1b", "")
|
||||
raise RuntimeError(
|
||||
f"bws exited {proc.returncode}: {err[:200]}"
|
||||
)
|
||||
|
||||
raw = proc.stdout.strip()
|
||||
if not raw:
|
||||
return {}, ["bws returned no output (empty project?)"]
|
||||
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise RuntimeError(f"bws returned non-JSON output: {exc}") from exc
|
||||
|
||||
if not isinstance(payload, list):
|
||||
raise RuntimeError(
|
||||
f"bws returned unexpected shape: {type(payload).__name__}"
|
||||
)
|
||||
|
||||
secrets: Dict[str, str] = {}
|
||||
warnings: List[str] = []
|
||||
for item in payload:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
key = item.get("key")
|
||||
value = item.get("value")
|
||||
if not isinstance(key, str) or not isinstance(value, str):
|
||||
continue
|
||||
if not _is_valid_env_name(key):
|
||||
warnings.append(
|
||||
f"Skipping secret {key!r}: not a valid env-var name"
|
||||
)
|
||||
continue
|
||||
secrets[key] = value
|
||||
return secrets, warnings
|
||||
|
||||
|
||||
def _is_valid_env_name(name: str) -> bool:
|
||||
if not name:
|
||||
return False
|
||||
if not (name[0].isalpha() or name[0] == "_"):
|
||||
return False
|
||||
return all(c.isalnum() or c == "_" for c in name)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entry point — called from hermes_cli.env_loader
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def apply_bitwarden_secrets(
|
||||
*,
|
||||
enabled: bool,
|
||||
access_token_env: str = "BWS_ACCESS_TOKEN",
|
||||
project_id: str = "",
|
||||
override_existing: bool = False,
|
||||
cache_ttl_seconds: float = 300,
|
||||
auto_install: bool = True,
|
||||
) -> FetchResult:
|
||||
"""Pull secrets from BSM and set them on ``os.environ``.
|
||||
|
||||
This is the function ``load_hermes_dotenv()`` calls after the .env
|
||||
files have loaded. It is intentionally defensive — any failure
|
||||
returns a :class:`FetchResult` with ``error`` set; it never raises.
|
||||
|
||||
Parameters mirror the ``secrets.bitwarden.*`` config keys so the
|
||||
caller can just splat the dict in.
|
||||
"""
|
||||
result = FetchResult()
|
||||
|
||||
if not enabled:
|
||||
return result
|
||||
|
||||
access_token = os.environ.get(access_token_env, "").strip()
|
||||
if not access_token:
|
||||
result.error = (
|
||||
f"secrets.bitwarden.enabled is true but {access_token_env} is "
|
||||
"not set. Run `hermes secrets bitwarden setup`."
|
||||
)
|
||||
return result
|
||||
|
||||
if not project_id:
|
||||
result.error = (
|
||||
"secrets.bitwarden.project_id is empty. "
|
||||
"Run `hermes secrets bitwarden setup`."
|
||||
)
|
||||
return result
|
||||
|
||||
binary = find_bws(install_if_missing=auto_install)
|
||||
result.binary_path = binary
|
||||
if binary is None:
|
||||
result.error = (
|
||||
"bws binary not available and auto-install is disabled. "
|
||||
"Run `hermes secrets bitwarden setup` to install."
|
||||
)
|
||||
return result
|
||||
|
||||
try:
|
||||
secrets, warnings = fetch_bitwarden_secrets(
|
||||
access_token=access_token,
|
||||
project_id=project_id,
|
||||
binary=binary,
|
||||
cache_ttl_seconds=cache_ttl_seconds,
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
result.error = str(exc)
|
||||
return result
|
||||
|
||||
result.secrets = secrets
|
||||
result.warnings.extend(warnings)
|
||||
|
||||
for key, value in secrets.items():
|
||||
if key == access_token_env:
|
||||
# Don't let BSM clobber the very token we used to fetch
|
||||
# itself — that would be a footgun if someone stored the
|
||||
# token as a BSM secret too.
|
||||
result.skipped.append(key)
|
||||
continue
|
||||
if not override_existing and os.environ.get(key):
|
||||
result.skipped.append(key)
|
||||
continue
|
||||
os.environ[key] = value
|
||||
result.applied.append(key)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test hook — used by hermetic tests to flush the cache between cases.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _reset_cache_for_tests() -> None:
|
||||
_CACHE.clear()
|
||||
+58
-3
@@ -12,7 +12,7 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import get_config_path, get_skills_dir
|
||||
from hermes_constants import get_config_path, get_skills_dir, is_termux
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -24,7 +24,43 @@ PLATFORM_MAP = {
|
||||
"windows": "win32",
|
||||
}
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
EXCLUDED_SKILL_DIRS = frozenset(
|
||||
(
|
||||
".git",
|
||||
".github",
|
||||
".hub",
|
||||
".archive",
|
||||
".venv",
|
||||
"venv",
|
||||
"node_modules",
|
||||
"site-packages",
|
||||
"__pycache__",
|
||||
".tox",
|
||||
".nox",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def is_excluded_skill_path(path) -> bool:
|
||||
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
|
||||
|
||||
Use this on every SKILL.md path produced by ``rglob`` to prune
|
||||
dependency, virtualenv, VCS, and cache directories. Centralising the
|
||||
check here keeps every skill-scanning site in sync with the shared
|
||||
exclusion set.
|
||||
|
||||
Accepts a Path or string.
|
||||
"""
|
||||
try:
|
||||
parts = path.parts # Path
|
||||
except AttributeError:
|
||||
from pathlib import PurePath
|
||||
parts = PurePath(str(path)).parts
|
||||
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
|
||||
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -100,6 +136,14 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
|
||||
|
||||
If the field is absent or empty the skill is compatible with **all**
|
||||
platforms (backward-compatible default).
|
||||
|
||||
Termux note: on Termux/Android, ``sys.platform`` is ``"linux"`` on
|
||||
older Pythons but became ``"android"`` on Python 3.13+. Termux is a
|
||||
Linux userland riding on the Android kernel, so skills tagged
|
||||
``linux`` are treated as compatible in Termux regardless of which
|
||||
``sys.platform`` value Python reports. Individual Linux commands
|
||||
inside a skill may still misbehave (no systemd, BusyBox utils, no
|
||||
apt/dnf, etc.) but that is on the skill, not on platform gating.
|
||||
"""
|
||||
platforms = frontmatter.get("platforms")
|
||||
if not platforms:
|
||||
@@ -107,11 +151,21 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
|
||||
if not isinstance(platforms, list):
|
||||
platforms = [platforms]
|
||||
current = sys.platform
|
||||
running_in_termux = is_termux()
|
||||
for platform in platforms:
|
||||
normalized = str(platform).lower().strip()
|
||||
mapped = PLATFORM_MAP.get(normalized, normalized)
|
||||
if current.startswith(mapped):
|
||||
return True
|
||||
# Termux runs a Linux userland on Android. Accept linux-tagged
|
||||
# skills regardless of whether sys.platform is "linux" (pre-3.13
|
||||
# Termux) or "android" (Python 3.13+ Termux, and any other
|
||||
# Android runtime).
|
||||
if running_in_termux and mapped == "linux":
|
||||
return True
|
||||
# Explicit termux/android tags match a Termux session too.
|
||||
if running_in_termux and mapped in ("termux", "android"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -478,7 +532,8 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
||||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
|
||||
Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
|
||||
directories so dependencies cannot register nested skills.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
|
||||
@@ -130,6 +130,12 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
|
||||
if nous_subscription_prompt:
|
||||
stable_parts.append(nous_subscription_prompt)
|
||||
|
||||
# App tools (500+ external integrations) behavioural guidance
|
||||
app_tools_prompt = _r.build_app_tools_prompt(agent.valid_tool_names)
|
||||
if app_tools_prompt:
|
||||
stable_parts.append(app_tools_prompt)
|
||||
|
||||
# Tool-use enforcement: tells the model to actually call tools instead
|
||||
# of describing intended actions. Controlled by config.yaml
|
||||
# agent.tool_use_enforcement:
|
||||
|
||||
@@ -112,17 +112,31 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def convert_messages(
|
||||
self, messages: list[dict[str, Any]], **kwargs
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
"""Messages are already in OpenAI format — strip internal fields
|
||||
that strict chat-completions providers reject with HTTP 400/422.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
Strips:
|
||||
|
||||
- Codex Responses API fields: ``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id`` /
|
||||
``response_item_id`` on ``tool_calls`` entries.
|
||||
- ``tool_name`` on tool-result messages — written by
|
||||
``make_tool_result_message()`` for the SQLite FTS index, but not
|
||||
part of the Chat Completions schema. Strict providers (Fireworks,
|
||||
Moonshot/Kimi) reject any payload containing it with
|
||||
``Extra inputs are not permitted, field: 'messages[N].tool_name'``.
|
||||
Permissive providers (OpenRouter, MiniMax) silently ignore the
|
||||
field, which masked the bug for months.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
if (
|
||||
"codex_reasoning_items" in msg
|
||||
or "codex_message_items" in msg
|
||||
or "tool_name" in msg
|
||||
):
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
@@ -145,6 +159,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
msg.pop("tool_name", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
|
||||
@@ -116,14 +116,11 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
if reasoning_enabled and is_xai_responses:
|
||||
from agent.model_metadata import grok_supports_reasoning_effort
|
||||
|
||||
# NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
|
||||
# any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
|
||||
# replayed encrypted reasoning items on turn 2+ — see
|
||||
# _chat_messages_to_responses_input docstring. Requesting the field
|
||||
# back would just have us cache something we then must strip. Grok
|
||||
# still reasons natively each turn; coherence across turns rides on
|
||||
# the visible message text alone.
|
||||
kwargs["include"] = []
|
||||
# Ask xAI to echo back encrypted reasoning items so we can
|
||||
# replay them on subsequent turns for cross-turn coherence.
|
||||
# See agent/codex_responses_adapter._chat_messages_to_responses_input
|
||||
# for the May 2026 reversal of the earlier suppression gate.
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
|
||||
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
|
||||
# those models reason natively. Only send the effort dial when
|
||||
|
||||
@@ -6501,12 +6501,6 @@ class HermesCLI:
|
||||
if self.agent:
|
||||
self.agent.session_id = new_session_id
|
||||
self.agent.session_start = now
|
||||
# Redirect the JSON session log to the new branch session file so
|
||||
# messages written after branching land in the correct file.
|
||||
if hasattr(self.agent, "session_log_file") and hasattr(self.agent, "logs_dir"):
|
||||
self.agent.session_log_file = (
|
||||
self.agent.logs_dir / f"session_{new_session_id}.json"
|
||||
)
|
||||
self.agent.reset_session_state()
|
||||
if hasattr(self.agent, "_last_flushed_db_idx"):
|
||||
self.agent._last_flushed_db_idx = len(self.conversation_history)
|
||||
@@ -10227,6 +10221,7 @@ class HermesCLI:
|
||||
self._voice_processing = True
|
||||
|
||||
submitted = False
|
||||
transcription_failed = False
|
||||
wav_path = None
|
||||
try:
|
||||
if self._voice_recorder is None:
|
||||
@@ -10275,18 +10270,24 @@ class HermesCLI:
|
||||
else:
|
||||
error = result.get("error", "Unknown error")
|
||||
_cprint(f"\n{_DIM}Transcription failed: {error}{_RST}")
|
||||
transcription_failed = True
|
||||
|
||||
except Exception as e:
|
||||
_cprint(f"\n{_DIM}Voice processing error: {e}{_RST}")
|
||||
transcription_failed = wav_path is not None
|
||||
finally:
|
||||
with self._voice_lock:
|
||||
self._voice_processing = False
|
||||
if hasattr(self, '_app') and self._app:
|
||||
self._app.invalidate()
|
||||
# Clean up temp file
|
||||
# Clean up temp file unless transcription failed. On failure, keep
|
||||
# the source recording so long dictation is not lost.
|
||||
try:
|
||||
if wav_path and os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
if transcription_failed:
|
||||
_cprint(f"{_DIM}Recording preserved at: {wav_path}{_RST}")
|
||||
else:
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -14429,13 +14430,54 @@ def main(
|
||||
# Only print the final response and parseable session info.
|
||||
cli.tool_progress_mode = "off"
|
||||
if cli._ensure_runtime_credentials():
|
||||
effective_query = query
|
||||
effective_query: Any = query
|
||||
if single_query_images:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query,
|
||||
single_query_images,
|
||||
announce=False,
|
||||
)
|
||||
# Honour the same image-routing decision used by the
|
||||
# interactive path. With a vision-capable model (incl.
|
||||
# custom-provider models declared via
|
||||
# `model.supports_vision: true`), attach images natively
|
||||
# as image_url content parts. Otherwise fall back to the
|
||||
# text-pipeline (vision_analyze pre-description).
|
||||
_img_mode = "text"
|
||||
_build_parts = None
|
||||
try:
|
||||
from agent.image_routing import (
|
||||
build_native_content_parts as _build_parts, # noqa: F811
|
||||
)
|
||||
from agent.image_routing import decide_image_input_mode
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
_img_mode = decide_image_input_mode(
|
||||
(cli.provider or "").strip(),
|
||||
(cli.model or "").strip(),
|
||||
load_config(),
|
||||
)
|
||||
except Exception:
|
||||
_img_mode = "text"
|
||||
|
||||
if _img_mode == "native" and _build_parts is not None:
|
||||
try:
|
||||
_parts, _skipped = _build_parts(
|
||||
query if isinstance(query, str) else "",
|
||||
[str(p) for p in single_query_images],
|
||||
)
|
||||
if any(p.get("type") == "image_url" for p in _parts):
|
||||
effective_query = _parts
|
||||
else:
|
||||
# All images unreadable — text fallback.
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
except Exception:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
else:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query,
|
||||
single_query_images,
|
||||
announce=False,
|
||||
)
|
||||
turn_route = cli._resolve_turn_agent_config(effective_query)
|
||||
if turn_route["signature"] != cli._active_agent_route_signature:
|
||||
cli.agent = None
|
||||
|
||||
+7
-1
@@ -830,6 +830,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["require_mention"] = platform_cfg["require_mention"]
|
||||
if plat == Platform.TELEGRAM and "allowed_chats" in platform_cfg:
|
||||
bridged["allowed_chats"] = platform_cfg["allowed_chats"]
|
||||
if plat == Platform.TELEGRAM and "group_allowed_chats" in platform_cfg:
|
||||
bridged["group_allowed_chats"] = platform_cfg["group_allowed_chats"]
|
||||
if plat == Platform.TELEGRAM and "allowed_topics" in platform_cfg:
|
||||
bridged["allowed_topics"] = platform_cfg["allowed_topics"]
|
||||
if "free_response_channels" in platform_cfg:
|
||||
@@ -838,6 +840,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
|
||||
if "exclusive_bot_mentions" in platform_cfg:
|
||||
bridged["exclusive_bot_mentions"] = platform_cfg["exclusive_bot_mentions"]
|
||||
if plat == Platform.TELEGRAM and "observe_unmentioned_group_messages" in platform_cfg:
|
||||
bridged["observe_unmentioned_group_messages"] = platform_cfg["observe_unmentioned_group_messages"]
|
||||
if "dm_policy" in platform_cfg:
|
||||
bridged["dm_policy"] = platform_cfg["dm_policy"]
|
||||
if "allow_from" in platform_cfg:
|
||||
@@ -1024,6 +1028,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_EXCLUSIVE_BOT_MENTIONS"] = str(telegram_cfg["exclusive_bot_mentions"]).lower()
|
||||
if "guest_mode" in telegram_cfg and not os.getenv("TELEGRAM_GUEST_MODE"):
|
||||
os.environ["TELEGRAM_GUEST_MODE"] = str(telegram_cfg["guest_mode"]).lower()
|
||||
if "observe_unmentioned_group_messages" in telegram_cfg and not os.getenv("TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES"):
|
||||
os.environ["TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES"] = str(telegram_cfg["observe_unmentioned_group_messages"]).lower()
|
||||
frc = telegram_cfg.get("free_response_chats")
|
||||
if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
|
||||
if isinstance(frc, list):
|
||||
@@ -1074,7 +1080,7 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(group_allowed_chats, list):
|
||||
group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
|
||||
for _telegram_extra_key in ("guest_mode", "disable_link_previews"):
|
||||
for _telegram_extra_key in ("guest_mode", "disable_link_previews", "observe_unmentioned_group_messages"):
|
||||
if _telegram_extra_key in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
|
||||
+95
-20
@@ -18,6 +18,7 @@ Security features (based on OWASP + NIST SP 800-63-4 guidance):
|
||||
Storage: ~/.hermes/pairing/
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
@@ -148,6 +149,11 @@ class PairingStore:
|
||||
|
||||
# ----- Pending codes -----
|
||||
|
||||
@staticmethod
|
||||
def _hash_code(code: str, salt: bytes) -> str:
|
||||
"""Hash a pairing code with the given salt using SHA-256."""
|
||||
return hashlib.sha256(salt + code.encode("utf-8")).hexdigest()
|
||||
|
||||
def generate_code(
|
||||
self, platform: str, user_id: str, user_name: str = ""
|
||||
) -> Optional[str]:
|
||||
@@ -158,6 +164,9 @@ class PairingStore:
|
||||
- User is rate-limited (too recent request)
|
||||
- Max pending codes reached for this platform
|
||||
- User/platform is in lockout due to failed attempts
|
||||
|
||||
The code is NOT stored in plaintext. Only a salted SHA-256 hash is
|
||||
persisted so that reading the pending file does not reveal codes.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
@@ -178,8 +187,17 @@ class PairingStore:
|
||||
# Generate cryptographically random code
|
||||
code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
|
||||
|
||||
# Store pending request
|
||||
pending[code] = {
|
||||
# Hash the code with a random salt before storing
|
||||
salt = os.urandom(16)
|
||||
code_hash = self._hash_code(code, salt)
|
||||
|
||||
# Use a unique entry id as the key (not the code itself)
|
||||
entry_id = secrets.token_hex(8)
|
||||
|
||||
# Store pending request with hashed code
|
||||
pending[entry_id] = {
|
||||
"hash": code_hash,
|
||||
"salt": salt.hex(),
|
||||
"user_id": user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": time.time(),
|
||||
@@ -195,10 +213,16 @@ class PairingStore:
|
||||
"""
|
||||
Approve a pairing code. Adds the user to the approved list.
|
||||
|
||||
Returns {user_id, user_name} on success, None if code is
|
||||
Returns ``{user_id, user_name}`` on success, ``None`` if the code is
|
||||
invalid/expired OR the platform is currently locked out after
|
||||
``MAX_FAILED_ATTEMPTS`` failed approvals (#10195). Callers can
|
||||
disambiguate with ``_is_locked_out(platform)``.
|
||||
|
||||
Verification: the user-provided code is hashed with each stored
|
||||
entry's salt and compared to the stored hash using constant-time
|
||||
comparison. Pre-hash entries (legacy plaintext-key format from
|
||||
pre-upgrade pending.json files) are silently ignored — they get
|
||||
pruned at TTL by ``_cleanup_expired``.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
@@ -213,34 +237,73 @@ class PairingStore:
|
||||
return None
|
||||
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if code not in pending:
|
||||
|
||||
# Find the entry whose hash matches the provided code.
|
||||
# Tolerate legacy plaintext-key entries (no salt/hash) and
|
||||
# malformed entries — skip them rather than KeyError, so an
|
||||
# in-place upgrade across an existing pending.json doesn't
|
||||
# crash on the first approve call. Legacy entries get pruned
|
||||
# at their TTL by _cleanup_expired.
|
||||
matched_key = None
|
||||
matched_entry = None
|
||||
for entry_id, entry in pending.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if "salt" not in entry or "hash" not in entry:
|
||||
continue
|
||||
try:
|
||||
salt = bytes.fromhex(entry["salt"])
|
||||
except ValueError:
|
||||
continue
|
||||
candidate_hash = self._hash_code(code, salt)
|
||||
if secrets.compare_digest(candidate_hash, entry["hash"]):
|
||||
matched_key = entry_id
|
||||
matched_entry = entry
|
||||
break
|
||||
|
||||
if matched_key is None:
|
||||
self._record_failed_attempt(platform)
|
||||
return None
|
||||
|
||||
entry = pending.pop(code)
|
||||
del pending[matched_key]
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Add to approved list
|
||||
self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
|
||||
self._approve_user(platform, matched_entry["user_id"],
|
||||
matched_entry.get("user_name", ""))
|
||||
|
||||
return {
|
||||
"user_id": entry["user_id"],
|
||||
"user_name": entry.get("user_name", ""),
|
||||
"user_id": matched_entry["user_id"],
|
||||
"user_name": matched_entry.get("user_name", ""),
|
||||
}
|
||||
|
||||
def list_pending(self, platform: str = None) -> list:
|
||||
"""List pending pairing requests, optionally filtered by platform."""
|
||||
"""List pending pairing requests, optionally filtered by platform.
|
||||
|
||||
Codes are stored hashed — the ``code`` field is replaced with the
|
||||
first 8 hex characters of the hash so admins can distinguish entries
|
||||
without revealing the original code. Legacy plaintext-key entries
|
||||
(pre-hash format) are shown with a "legacy" placeholder so admins
|
||||
can see them age out without crashing on a missing ``hash`` field.
|
||||
"""
|
||||
results = []
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
self._cleanup_expired(p)
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
for code, info in pending.items():
|
||||
age_min = int((time.time() - info["created_at"]) / 60)
|
||||
for entry_id, info in pending.items():
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
created_at = info.get("created_at")
|
||||
if not isinstance(created_at, (int, float)):
|
||||
continue
|
||||
age_min = int((time.time() - created_at) / 60)
|
||||
hash_val = info.get("hash")
|
||||
code_display = hash_val[:8] if isinstance(hash_val, str) else "legacy"
|
||||
results.append({
|
||||
"platform": p,
|
||||
"code": code,
|
||||
"user_id": info["user_id"],
|
||||
"code": code_display,
|
||||
"user_id": info.get("user_id", ""),
|
||||
"user_name": info.get("user_name", ""),
|
||||
"age_minutes": age_min,
|
||||
})
|
||||
@@ -297,17 +360,29 @@ class PairingStore:
|
||||
# ----- Cleanup -----
|
||||
|
||||
def _cleanup_expired(self, platform: str) -> None:
|
||||
"""Remove expired pending codes."""
|
||||
"""Remove expired pending codes.
|
||||
|
||||
Tolerant of malformed / legacy entries — anything without a numeric
|
||||
``created_at`` is treated as expired (it's effectively unusable
|
||||
with the new hash-keyed schema anyway).
|
||||
"""
|
||||
path = self._pending_path(platform)
|
||||
pending = self._load_json(path)
|
||||
now = time.time()
|
||||
expired = [
|
||||
code for code, info in pending.items()
|
||||
if (now - info["created_at"]) > CODE_TTL_SECONDS
|
||||
]
|
||||
expired = []
|
||||
for entry_id, info in pending.items():
|
||||
if not isinstance(info, dict):
|
||||
expired.append(entry_id)
|
||||
continue
|
||||
created_at = info.get("created_at")
|
||||
if not isinstance(created_at, (int, float)):
|
||||
expired.append(entry_id)
|
||||
continue
|
||||
if (now - created_at) > CODE_TTL_SECONDS:
|
||||
expired.append(entry_id)
|
||||
if expired:
|
||||
for code in expired:
|
||||
del pending[code]
|
||||
for entry_id in expired:
|
||||
del pending[entry_id]
|
||||
self._save_json(path, pending)
|
||||
|
||||
def _all_platforms(self, suffix: str) -> list:
|
||||
|
||||
@@ -2706,8 +2706,13 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
Discord's TYPING_START gateway event is unreliable in DMs for bots.
|
||||
Instead, start a background loop that hits the typing endpoint every
|
||||
8 seconds (typing indicator lasts ~10s). The loop is cancelled when
|
||||
12 seconds (typing indicator lasts ~10s). The loop is cancelled when
|
||||
stop_typing() is called (after the response is sent).
|
||||
|
||||
Rate-limit handling: if a 429 is encountered, the loop logs a
|
||||
warning, sleeps for the ``retry_after`` duration (or a sensible
|
||||
default), and continues — it does NOT die on a single rate-limit
|
||||
hit. Only CancelledError (from stop_typing) stops the loop.
|
||||
"""
|
||||
if not self._client:
|
||||
return
|
||||
@@ -2727,9 +2732,22 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
except Exception as e:
|
||||
logger.debug("Discord typing indicator failed for %s: %s", chat_id, e)
|
||||
return
|
||||
await asyncio.sleep(8)
|
||||
# Don't die on 429 — backoff and continue
|
||||
retry_after = self._extract_discord_retry_after(e)
|
||||
if retry_after is not None:
|
||||
logger.warning(
|
||||
"Typing indicator rate-limited for %s; retrying in %.1fs",
|
||||
chat_id, retry_after,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Discord typing indicator failed for %s: %s",
|
||||
chat_id, e,
|
||||
)
|
||||
return
|
||||
await asyncio.sleep(retry_after)
|
||||
continue
|
||||
await asyncio.sleep(12)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
finally:
|
||||
|
||||
@@ -8,12 +8,14 @@ Uses python-telegram-bot library for:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import html as _html
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -4178,6 +4180,23 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return bool(configured)
|
||||
return os.getenv("TELEGRAM_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}
|
||||
|
||||
def _telegram_observe_unmentioned_group_messages(self) -> bool:
|
||||
"""Return whether skipped unmentioned group messages are stored as context.
|
||||
|
||||
When enabled with ``require_mention``, Telegram matches the Yuanbao /
|
||||
OpenClaw-style group UX: observe ordinary group chatter in the session
|
||||
transcript, but only dispatch the agent when the bot is explicitly
|
||||
addressed.
|
||||
"""
|
||||
configured = self.config.extra.get("observe_unmentioned_group_messages")
|
||||
if configured is None:
|
||||
configured = self.config.extra.get("ingest_unmentioned_group_messages")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() in {"true", "1", "yes", "on"}
|
||||
return bool(configured)
|
||||
return os.getenv("TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES", "false").lower() in {"true", "1", "yes", "on"}
|
||||
|
||||
def _telegram_guest_mode(self) -> bool:
|
||||
"""Return whether non-allowlisted groups may trigger via direct @mention."""
|
||||
configured = self.config.extra.get("guest_mode")
|
||||
@@ -4219,6 +4238,30 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
def _telegram_group_allowed_chats(self) -> set[str]:
|
||||
"""Return Telegram chats authorized at group scope."""
|
||||
raw = self.config.extra.get("group_allowed_chats")
|
||||
if raw is None:
|
||||
raw = os.getenv("TELEGRAM_GROUP_ALLOWED_CHATS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
def _telegram_observe_allowed_chats(self) -> set[str]:
|
||||
"""Chats where observed group context may use a shared source.
|
||||
|
||||
``group_allowed_chats`` is the gateway authorization allowlist for
|
||||
user-less group sources. ``allowed_chats`` remains an optional response
|
||||
gate; when set, observed context must satisfy both lists.
|
||||
"""
|
||||
group_allowed = self._telegram_group_allowed_chats()
|
||||
if not group_allowed:
|
||||
return set()
|
||||
response_allowed = self._telegram_allowed_chats()
|
||||
if response_allowed:
|
||||
return group_allowed & response_allowed
|
||||
return group_allowed
|
||||
|
||||
def _telegram_allowed_topics(self) -> set[str]:
|
||||
"""Return the whitelist of Telegram forum topic IDs this bot handles.
|
||||
|
||||
@@ -4466,6 +4509,126 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
cleaned = re.sub(rf"(?i)@{username}\b[,:\-]*\s*", "", text).strip()
|
||||
return cleaned or text
|
||||
|
||||
def _should_observe_unmentioned_group_message(self, message: Message) -> bool:
|
||||
"""Return True when a group message should be stored but not dispatched."""
|
||||
if not self._telegram_observe_unmentioned_group_messages():
|
||||
return False
|
||||
if not self._is_group_chat(message):
|
||||
return False
|
||||
|
||||
thread_id = getattr(message, "message_thread_id", None)
|
||||
allowed_topics = self._telegram_allowed_topics()
|
||||
if allowed_topics:
|
||||
topic_id = str(thread_id) if thread_id is not None else self._GENERAL_TOPIC_THREAD_ID
|
||||
if topic_id not in allowed_topics:
|
||||
return False
|
||||
|
||||
if thread_id is not None:
|
||||
try:
|
||||
if int(thread_id) in self._telegram_ignored_threads():
|
||||
return False
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
|
||||
chat_id_str = str(getattr(getattr(message, "chat", None), "id", ""))
|
||||
if self._telegram_exclusive_bot_mentions() and self._explicit_bot_mentions_exclude_self(message):
|
||||
return False
|
||||
|
||||
allowed = self._telegram_observe_allowed_chats()
|
||||
# Observed context is shared at chat/topic scope so a later trigger from
|
||||
# another user can see it. Require an explicit chat allowlist; that
|
||||
# keeps shared observed history limited to operator-approved groups and
|
||||
# lets gateway authorization pass even after the shared session source
|
||||
# drops the per-sender user_id.
|
||||
if not allowed or chat_id_str not in allowed:
|
||||
return False
|
||||
|
||||
# Only observe messages skipped by the require_mention gate. If the
|
||||
# message would be processed normally, let the dispatcher handle it;
|
||||
# if require_mention is disabled, every group message is a request.
|
||||
if chat_id_str in self._telegram_free_response_chats():
|
||||
return False
|
||||
if not self._telegram_require_mention():
|
||||
return False
|
||||
if self._is_reply_to_bot(message):
|
||||
return False
|
||||
if self._message_mentions_bot(message):
|
||||
return False
|
||||
if self._message_matches_mention_patterns(message):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _telegram_group_observe_shared_source(self, source):
|
||||
"""Return a chat/topic-scoped source for observed Telegram group context."""
|
||||
return dataclasses.replace(source, user_id=None, user_name=None, user_id_alt=None)
|
||||
|
||||
def _telegram_group_observe_attributed_text(self, event: MessageEvent) -> str:
|
||||
user_id = event.source.user_id or "unknown"
|
||||
sender = event.source.user_name or user_id
|
||||
return f"[{sender}|{user_id}]\n{event.text or ''}"
|
||||
|
||||
def _telegram_group_observe_channel_prompt(self) -> str:
|
||||
username = getattr(getattr(self, "_bot", None), "username", None) or "unknown"
|
||||
bot_id = getattr(getattr(self, "_bot", None), "id", None) or "unknown"
|
||||
return (
|
||||
"You are handling a Telegram group chat message.\n"
|
||||
f"- Your identity: user_id={bot_id}, @-mention name in this group=@{username}\n"
|
||||
"- Lines in history prefixed with `[nickname|user_id]` are observed Telegram group context "
|
||||
"and are not necessarily addressed to you.\n"
|
||||
"- Treat only the current new message as a request explicitly directed at you, "
|
||||
"and answer it directly."
|
||||
)
|
||||
|
||||
def _apply_telegram_group_observe_attribution(self, event: MessageEvent) -> MessageEvent:
|
||||
"""Align triggered group turns with observed-history attribution."""
|
||||
if not self._telegram_observe_unmentioned_group_messages():
|
||||
return event
|
||||
raw_message = getattr(event, "raw_message", None)
|
||||
if not raw_message or not self._is_group_chat(raw_message):
|
||||
return event
|
||||
chat_id_str = str(getattr(getattr(raw_message, "chat", None), "id", ""))
|
||||
allowed = self._telegram_observe_allowed_chats()
|
||||
if not allowed or chat_id_str not in allowed:
|
||||
return event
|
||||
shared_source = self._telegram_group_observe_shared_source(event.source)
|
||||
observe_prompt = self._telegram_group_observe_channel_prompt()
|
||||
channel_prompt = f"{event.channel_prompt}\n\n{observe_prompt}" if event.channel_prompt else observe_prompt
|
||||
return dataclasses.replace(
|
||||
event,
|
||||
text=self._telegram_group_observe_attributed_text(event),
|
||||
source=shared_source,
|
||||
channel_prompt=channel_prompt,
|
||||
)
|
||||
|
||||
def _observe_unmentioned_group_message(self, message: Message, msg_type: MessageType, update_id: Optional[int] = None) -> None:
|
||||
"""Append skipped group chatter to the target session without dispatching."""
|
||||
store = getattr(self, "_session_store", None)
|
||||
if not store:
|
||||
return
|
||||
try:
|
||||
event = self._build_message_event(message, msg_type, update_id=update_id)
|
||||
shared_source = self._telegram_group_observe_shared_source(event.source)
|
||||
session_entry = store.get_or_create_session(shared_source)
|
||||
entry = {
|
||||
"role": "user",
|
||||
"content": self._telegram_group_observe_attributed_text(event),
|
||||
"timestamp": datetime.now(tz=timezone.utc).isoformat(),
|
||||
"observed": True,
|
||||
}
|
||||
if event.message_id:
|
||||
entry["message_id"] = str(event.message_id)
|
||||
store.append_to_transcript(session_entry.session_id, entry)
|
||||
adapter_name = getattr(self, "name", "telegram")
|
||||
logger.info(
|
||||
"[%s] Telegram group message observed (no bot trigger): chat=%s from=%s",
|
||||
adapter_name,
|
||||
getattr(getattr(message, "chat", None), "id", "unknown"),
|
||||
event.source.user_id or "unknown",
|
||||
)
|
||||
except Exception as exc:
|
||||
adapter_name = getattr(self, "name", "telegram")
|
||||
logger.warning("[%s] Failed to observe Telegram group message: %s", adapter_name, exc)
|
||||
|
||||
def _should_process_message(self, message: Message, *, is_command: bool = False) -> bool:
|
||||
"""Apply Telegram group trigger rules.
|
||||
|
||||
@@ -4590,11 +4753,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
if not msg or not msg.text:
|
||||
return
|
||||
if not self._should_process_message(msg):
|
||||
if self._should_observe_unmentioned_group_message(msg):
|
||||
self._observe_unmentioned_group_message(msg, MessageType.TEXT, update_id=update.update_id)
|
||||
return
|
||||
await self._ensure_forum_commands(update.message)
|
||||
|
||||
event = self._build_message_event(msg, MessageType.TEXT, update_id=update.update_id)
|
||||
event.text = self._clean_bot_trigger_text(event.text)
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
self._enqueue_text_event(event)
|
||||
|
||||
async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
@@ -4607,6 +4773,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
await self._ensure_forum_commands(msg)
|
||||
|
||||
event = self._build_message_event(msg, MessageType.COMMAND, update_id=update.update_id)
|
||||
event.text = self._clean_bot_trigger_text(event.text)
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
@@ -4615,6 +4783,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
if not msg:
|
||||
return
|
||||
if not self._should_process_message(msg):
|
||||
if self._should_observe_unmentioned_group_message(msg):
|
||||
self._observe_unmentioned_group_message(msg, MessageType.LOCATION, update_id=update.update_id)
|
||||
return
|
||||
|
||||
venue = getattr(msg, "venue", None)
|
||||
@@ -4644,6 +4814,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
|
||||
event.text = "\n".join(parts)
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
await self.handle_message(event)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -4788,8 +4959,23 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
if not update.message:
|
||||
return
|
||||
if not self._should_process_message(update.message):
|
||||
if self._should_observe_unmentioned_group_message(update.message):
|
||||
_m = update.message
|
||||
if _m.sticker:
|
||||
_observe_type = MessageType.STICKER
|
||||
elif _m.photo:
|
||||
_observe_type = MessageType.PHOTO
|
||||
elif _m.video:
|
||||
_observe_type = MessageType.VIDEO
|
||||
elif _m.audio:
|
||||
_observe_type = MessageType.AUDIO
|
||||
elif _m.voice:
|
||||
_observe_type = MessageType.VOICE
|
||||
else:
|
||||
_observe_type = MessageType.DOCUMENT
|
||||
self._observe_unmentioned_group_message(_m, _observe_type, update_id=update.update_id)
|
||||
return
|
||||
|
||||
|
||||
msg = update.message
|
||||
|
||||
# Determine media type
|
||||
@@ -4817,9 +5003,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
# Handle stickers: describe via vision tool with caching
|
||||
if msg.sticker:
|
||||
await self._handle_sticker(msg, event)
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
|
||||
# Apply observe attribution after caption is set; sticker is handled above
|
||||
# because _handle_sticker overwrites event.text with its vision description.
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
|
||||
# Download photo to local image cache so the vision tool can access it
|
||||
# even after Telegram's ephemeral file URLs expire (~1 hour).
|
||||
if msg.photo:
|
||||
|
||||
@@ -308,11 +308,26 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
data = json.loads(subs_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
return
|
||||
# Merge: static routes take precedence over dynamic ones
|
||||
self._dynamic_routes = {
|
||||
k: v for k, v in data.items()
|
||||
if k not in self._static_routes
|
||||
}
|
||||
# Merge: static routes take precedence over dynamic ones.
|
||||
# Reject any dynamic route whose effective secret is empty —
|
||||
# an empty secret would cause _handle_webhook to skip HMAC
|
||||
# validation entirely, letting unauthenticated callers in.
|
||||
new_dynamic: Dict[str, dict] = {}
|
||||
for k, v in data.items():
|
||||
if k in self._static_routes:
|
||||
continue
|
||||
effective_secret = v.get("secret", self._global_secret)
|
||||
if not effective_secret:
|
||||
logger.warning(
|
||||
"[webhook] Dynamic route '%s' skipped: 'secret' is "
|
||||
"missing or empty. Set a valid HMAC secret, or use "
|
||||
"'%s' to explicitly disable auth (testing only).",
|
||||
k,
|
||||
_INSECURE_NO_AUTH,
|
||||
)
|
||||
continue
|
||||
new_dynamic[k] = v
|
||||
self._dynamic_routes = new_dynamic
|
||||
self._routes = {**self._dynamic_routes, **self._static_routes}
|
||||
self._dynamic_routes_mtime = mtime
|
||||
logger.info(
|
||||
|
||||
@@ -1410,33 +1410,43 @@ class RecallGuardMiddleware(InboundMiddleware):
|
||||
logger.warning("[%s] Recall: failed to resolve session: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
# Load transcript from canonical store (state.db). See Branch A below
|
||||
# for why we can no longer match by platform `message_id`.
|
||||
# Load transcript from canonical store (state.db). Since PR #29278
|
||||
# added a ``platform_message_id`` column to the messages table and
|
||||
# ``append_to_transcript`` wires the incoming dict's ``message_id``
|
||||
# into it, ``load_transcript`` returns rows with ``message_id`` set
|
||||
# for any message that was observed with one — Branch A1 (exact id
|
||||
# match) is the canonical path again.
|
||||
try:
|
||||
transcript = store.load_transcript(sid)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Recall: failed to load transcript: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
# Branch A: content-match redaction. state.db does NOT preserve the
|
||||
# platform `message_id` (only its own autoincrement primary key), so we
|
||||
# cannot redact by exact id. Match by content instead. Most yuanbao
|
||||
# recalls carry the recalled text via `recalled_content`, which is
|
||||
# sufficient for any non-duplicate message.
|
||||
#
|
||||
# TODO: add a `platform_message_id` column to state.db messages to
|
||||
# restore exact-id matching. Tracked separately.
|
||||
# Branch A1: exact platform message_id match. Authoritative when the
|
||||
# row was persisted with a platform_message_id (observed group
|
||||
# messages and any inbound message whose adapter carried a msg_id).
|
||||
target = None
|
||||
if recalled_content:
|
||||
branch_label = ""
|
||||
for entry in transcript:
|
||||
if entry.get("message_id") == recalled_id:
|
||||
target = entry
|
||||
branch_label = "branch A1: id match"
|
||||
break
|
||||
# Branch A2: content-match fallback for messages that lack an exact
|
||||
# platform id on the row — e.g. agent-processed @bot messages
|
||||
# (run.py doesn't carry msg_id through) or older rows persisted
|
||||
# before the platform_message_id column existed.
|
||||
if target is None and recalled_content:
|
||||
for entry in transcript:
|
||||
if entry.get("role") == "user" and entry.get("content") == recalled_content:
|
||||
target = entry
|
||||
branch_label = "branch A2: content match"
|
||||
break
|
||||
if target is not None:
|
||||
target["content"] = cls._REDACTED
|
||||
try:
|
||||
store.rewrite_transcript(sid, transcript)
|
||||
logger.info("[%s] Recall: redacted msg_id=%s (branch A: content match)", adapter.name, recalled_id)
|
||||
logger.info("[%s] Recall: redacted msg_id=%s (%s)", adapter.name, recalled_id, branch_label)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Recall: rewrite_transcript failed: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
+4
-2
@@ -1109,7 +1109,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
normalized = command_name.lower().replace("_", "-")
|
||||
try:
|
||||
from tools.skills_tool import _get_disabled_skill_names
|
||||
from agent.skill_utils import get_all_skills_dirs
|
||||
from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
|
||||
disabled = _get_disabled_skill_names()
|
||||
|
||||
# Check disabled skills across all dirs (local + external)
|
||||
@@ -1117,7 +1117,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
if not skills_dir.exists():
|
||||
continue
|
||||
for skill_md in skills_dir.rglob("SKILL.md"):
|
||||
if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
slug, declared_name = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug or not declared_name:
|
||||
@@ -1136,6 +1136,8 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
|
||||
if optional_dir.exists():
|
||||
for skill_md in optional_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
slug, _declared = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug:
|
||||
continue
|
||||
|
||||
@@ -1271,6 +1271,12 @@ class SessionStore:
|
||||
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
|
||||
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
|
||||
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
|
||||
# Platform-side message id (yuanbao msg_id, telegram update_id, …).
|
||||
# Accept either explicit ``platform_message_id`` or the legacy
|
||||
# ``message_id`` key the JSONL transcript used.
|
||||
platform_message_id=(
|
||||
message.get("platform_message_id") or message.get("message_id")
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
|
||||
+7
-13
@@ -48,7 +48,7 @@ import httpx
|
||||
import yaml
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_constants import OPENROUTER_BASE_URL, secure_parent_dir
|
||||
from utils import atomic_replace, atomic_yaml_write, is_truthy_value
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -1030,10 +1030,8 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
|
||||
auth_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Tighten parent dir to 0o700 so siblings can't traverse to creds.
|
||||
# No-op on Windows (POSIX mode bits not enforced); ignore failures.
|
||||
try:
|
||||
os.chmod(auth_file.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
|
||||
secure_parent_dir(auth_file)
|
||||
auth_store["version"] = AUTH_STORE_VERSION
|
||||
auth_store["updated_at"] = datetime.now(timezone.utc).isoformat()
|
||||
payload = json.dumps(auth_store, indent=2) + "\n"
|
||||
@@ -1863,10 +1861,8 @@ def _read_qwen_cli_tokens() -> Dict[str, Any]:
|
||||
def _save_qwen_cli_tokens(tokens: Dict[str, Any]) -> Path:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
os.chmod(auth_path.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
|
||||
secure_parent_dir(auth_path)
|
||||
# Per-process random temp suffix avoids collisions between concurrent
|
||||
# writers and stale leftovers from a crashed prior write.
|
||||
tmp_path = auth_path.with_name(f"{auth_path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
|
||||
@@ -4168,10 +4164,8 @@ def _write_shared_nous_state(state: Dict[str, Any]) -> None:
|
||||
with _nous_shared_store_lock():
|
||||
path = _nous_shared_store_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
os.chmod(path.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
|
||||
secure_parent_dir(path)
|
||||
tmp = path.with_name(f"{path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
|
||||
# Create with 0o600 atomically via os.open(O_EXCL) — closes the TOCTOU
|
||||
# window where write_text() + post-write chmod briefly exposed Nous
|
||||
|
||||
+66
-3
@@ -508,6 +508,68 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
|
||||
return result
|
||||
|
||||
|
||||
_TELEGRAM_MENU_PRIORITY = (
|
||||
# Most-typed everyday commands first.
|
||||
"help",
|
||||
"new",
|
||||
"stop",
|
||||
"status",
|
||||
"resume",
|
||||
"sessions",
|
||||
"model",
|
||||
# Maintenance / diagnostics — the ones that prompted this priority list.
|
||||
"debug",
|
||||
"restart",
|
||||
"update",
|
||||
"verbose",
|
||||
"commands",
|
||||
# Mid-turn session control.
|
||||
"approve",
|
||||
"deny",
|
||||
"queue",
|
||||
"steer",
|
||||
"background",
|
||||
# Lower-priority but still useful operational built-ins.
|
||||
"reasoning",
|
||||
"usage",
|
||||
"platforms",
|
||||
"platform",
|
||||
"profile",
|
||||
"whoami",
|
||||
)
|
||||
"""Built-in commands that should stay visible in Telegram's capped menu.
|
||||
|
||||
Telegram only displays a small BotCommand menu in practice. The full Hermes
|
||||
registry is still dispatchable when typed manually, but operational commands
|
||||
need to survive the visible menu cap ahead of lower-priority built-ins.
|
||||
"""
|
||||
|
||||
|
||||
def _prioritize_telegram_menu_commands(
|
||||
commands: list[tuple[str, str]],
|
||||
) -> list[tuple[str, str]]:
|
||||
priority = {
|
||||
_sanitize_telegram_name(name): index
|
||||
for index, name in enumerate(_TELEGRAM_MENU_PRIORITY)
|
||||
}
|
||||
return [
|
||||
command
|
||||
for _index, command in sorted(
|
||||
enumerate(commands),
|
||||
key=lambda item: (
|
||||
0,
|
||||
priority[item[1][0]],
|
||||
item[0],
|
||||
)
|
||||
if item[1][0] in priority
|
||||
else (
|
||||
1,
|
||||
item[0],
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
_CMD_NAME_LIMIT = 32
|
||||
"""Max command name length shared by Telegram and Discord."""
|
||||
|
||||
@@ -721,11 +783,12 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
|
||||
|
||||
Returns:
|
||||
(menu_commands, hidden_count) where hidden_count is the number of
|
||||
skill commands omitted due to the cap.
|
||||
commands omitted due to the cap.
|
||||
"""
|
||||
core_commands = list(telegram_bot_commands())
|
||||
core_commands = _prioritize_telegram_menu_commands(list(telegram_bot_commands()))
|
||||
reserved_names = {n for n, _ in core_commands}
|
||||
all_commands = list(core_commands)
|
||||
hidden_core_count = max(0, len(all_commands) - max_commands)
|
||||
|
||||
remaining_slots = max(0, max_commands - len(all_commands))
|
||||
entries, hidden_count = _collect_gateway_skill_entries(
|
||||
@@ -737,7 +800,7 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
|
||||
)
|
||||
# Drop the cmd_key — Telegram only needs (name, desc) pairs.
|
||||
all_commands.extend((n, d) for n, d, _k in entries)
|
||||
return all_commands[:max_commands], hidden_count
|
||||
return all_commands[:max_commands], hidden_count + hidden_core_count
|
||||
|
||||
|
||||
def discord_skill_commands(
|
||||
|
||||
+93
-4
@@ -1648,6 +1648,15 @@ DEFAULT_CONFIG = {
|
||||
# the sweep on every CLI invocation). Tracked via state_meta in
|
||||
# state.db itself, so it's shared across all processes.
|
||||
"min_interval_hours": 24,
|
||||
# Legacy per-session JSON snapshot writer. When true, the agent
|
||||
# rewrites ``~/.hermes/sessions/session_{sid}.json`` on every turn
|
||||
# boundary with the full message list. state.db is canonical and
|
||||
# has every field the snapshot stored (plus per-message timestamps
|
||||
# and token counts), so this is off by default — the snapshots had
|
||||
# no consumer outside their own overwrite guard and accumulated
|
||||
# GBs of disk on heavy users. Opt in only if you have an external
|
||||
# tool that consumes the JSON files directly.
|
||||
"write_json_snapshots": False,
|
||||
},
|
||||
|
||||
# Contextual first-touch onboarding hints (see agent/onboarding.py).
|
||||
@@ -1738,8 +1747,48 @@ DEFAULT_CONFIG = {
|
||||
"retries": 2,
|
||||
},
|
||||
|
||||
# =========================================================================
|
||||
# External secret sources
|
||||
# =========================================================================
|
||||
# Pull credentials from external secret managers at process startup
|
||||
# rather than storing them in ~/.hermes/.env.
|
||||
"secrets": {
|
||||
"bitwarden": {
|
||||
# Master switch. When false, BSM is never contacted and the
|
||||
# bws binary is never auto-installed — same as not having
|
||||
# this section at all.
|
||||
"enabled": False,
|
||||
# Name of the env var that holds the Bitwarden machine-account
|
||||
# access token. This is the one bootstrap secret; it lives
|
||||
# in ~/.hermes/.env (or your shell) and never in config.yaml.
|
||||
"access_token_env": "BWS_ACCESS_TOKEN",
|
||||
# UUID of the BSM project to sync from.
|
||||
"project_id": "",
|
||||
# Seconds to cache fetched secrets in-process. 0 disables.
|
||||
"cache_ttl_seconds": 300,
|
||||
# When True, BSM values overwrite existing env vars. Default
|
||||
# True because the point of using BSM is centralized rotation —
|
||||
# if .env had the final say, rotating in Bitwarden wouldn't
|
||||
# take effect until you also cleared the matching .env line.
|
||||
"override_existing": True,
|
||||
# When True, the bws binary is auto-downloaded into
|
||||
# ~/.hermes/bin/ on first use. When False you must install
|
||||
# bws yourself and have it on PATH.
|
||||
"auto_install": True,
|
||||
},
|
||||
},
|
||||
|
||||
# ── Nous Portal feature flags ──────────────────────────────────────
|
||||
"portal": {
|
||||
# App tools: 500+ external app integrations (Gmail, Slack, GitHub,
|
||||
# Notion, etc.) via the Nous tool gateway. Requires an active Nous
|
||||
# subscription. Set to False to hide the app_tools toolset even
|
||||
# when a subscription is present.
|
||||
"app_tools": True,
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 23,
|
||||
"_config_version": 24,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -2227,6 +2276,22 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"TOOLS_GATEWAY_URL": {
|
||||
"description": "Explicit URL for the tools-gateway (app integrations). Overrides the auto-derived tools-gateway.nousresearch.com",
|
||||
"prompt": "Tools-gateway URL",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"PORTAL_APP_TOOLS": {
|
||||
"description": "Enable app integration tools (500+ apps via Nous tool gateway). Requires Nous subscription.",
|
||||
"prompt": "Enable app tools (500+ apps)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"TAVILY_API_KEY": {
|
||||
"description": "Tavily API key for AI-native web search, extract, and crawl",
|
||||
"prompt": "Tavily API key",
|
||||
@@ -3008,7 +3073,7 @@ def _normalize_custom_provider_entry(
|
||||
"api_mode", "transport", "model", "default_model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"request_timeout_seconds", "stale_timeout_seconds",
|
||||
"discover_models",
|
||||
"discover_models", "extra_body",
|
||||
}
|
||||
for camel, snake in _CAMEL_ALIASES.items():
|
||||
if camel in entry and snake not in entry:
|
||||
@@ -3103,6 +3168,10 @@ def _normalize_custom_provider_entry(
|
||||
if isinstance(discover_models, bool):
|
||||
normalized["discover_models"] = discover_models
|
||||
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
normalized["extra_body"] = dict(extra_body)
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
@@ -3257,13 +3326,13 @@ _KNOWN_ROOT_KEYS = {
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
"sessions",
|
||||
"sessions", "portal",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
_VALID_CUSTOM_PROVIDER_FIELDS = {
|
||||
"name", "base_url", "api_key", "api_mode", "model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"context_length", "rate_limit_delay", "extra_body",
|
||||
# key_env is read at runtime by runtime_provider.py and auxiliary_client.py
|
||||
# — include it here so the set accurately describes the supported schema.
|
||||
"key_env",
|
||||
@@ -3920,6 +3989,26 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
f"{', '.join(added_aux)}"
|
||||
)
|
||||
|
||||
# ── Version 23 → 24: inject app_tools into saved platform_toolsets ──
|
||||
# The portal.app_tools config flag is handled by deep-merge (DEFAULT_CONFIG
|
||||
# has it, so load_config() always includes it). But platform_toolsets are
|
||||
# user-owned lists that deep-merge can't append to — existing users who
|
||||
# ran `hermes tools` have a saved list that won't include app_tools.
|
||||
if current_ver < 24:
|
||||
config = read_raw_config()
|
||||
pt = config.get("platform_toolsets")
|
||||
if isinstance(pt, dict):
|
||||
patched = False
|
||||
for plat_key, ts_list in pt.items():
|
||||
if isinstance(ts_list, list) and "app_tools" not in ts_list:
|
||||
ts_list.append("app_tools")
|
||||
patched = True
|
||||
if patched:
|
||||
save_config(config)
|
||||
results["config_added"].append("app_tools added to platform_toolsets")
|
||||
if not quiet:
|
||||
print(" ✓ Added app_tools to saved platform toolset lists")
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ def curses_checklist(
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, 8, -1) # dim gray
|
||||
curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
|
||||
@@ -777,7 +777,33 @@ def run_doctor(args):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_section("xAI Model Retirement (May 15, 2026)")
|
||||
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.xai_retirement import (
|
||||
MIGRATION_GUIDE_URL,
|
||||
find_retired_xai_refs,
|
||||
format_issue,
|
||||
)
|
||||
|
||||
_xai_cfg = load_config()
|
||||
retired_refs = find_retired_xai_refs(_xai_cfg)
|
||||
if not retired_refs:
|
||||
check_ok("No retired xAI models in config")
|
||||
else:
|
||||
for ref in retired_refs:
|
||||
check_warn(format_issue(ref))
|
||||
check_info(f"Migration guide: {MIGRATION_GUIDE_URL}")
|
||||
manual_issues.append(
|
||||
f"Update {len(retired_refs)} retired xAI model reference(s) "
|
||||
f"in config.yaml — see {MIGRATION_GUIDE_URL}"
|
||||
)
|
||||
except Exception as _xai_check_err:
|
||||
check_warn("xAI retirement check skipped", f"({_xai_check_err})")
|
||||
|
||||
_section("Auth Providers")
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import (
|
||||
get_nous_auth_status,
|
||||
|
||||
@@ -16,6 +16,7 @@ from pathlib import Path
|
||||
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
|
||||
def _get_git_commit(project_root: Path) -> str:
|
||||
@@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int:
|
||||
return 0
|
||||
count = 0
|
||||
for item in skills_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(item):
|
||||
continue
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
@@ -21,6 +21,44 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
|
||||
# tests) don't spam the same warning multiple times.
|
||||
_WARNED_KEYS: set[str] = set()
|
||||
|
||||
# Map of env-var name → source label ("bitwarden", etc.) for credentials
|
||||
# that were injected by an external secret source during load_hermes_dotenv().
|
||||
# Used by setup / `hermes model` flows to label detected credentials so
|
||||
# users understand WHERE a key came from when their .env doesn't contain it
|
||||
# directly (otherwise the "credentials detected ✓" line looks identical to
|
||||
# the .env case and they don't know Bitwarden is wired up).
|
||||
_SECRET_SOURCES: dict[str, str] = {}
|
||||
|
||||
|
||||
def get_secret_source(env_var: str) -> str | None:
|
||||
"""Return the label of the secret source that supplied ``env_var``, if any.
|
||||
|
||||
Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
|
||||
during the current process's ``load_hermes_dotenv()`` call. Returns
|
||||
``None`` for keys that came from ``.env``, the shell environment, or
|
||||
aren't tracked.
|
||||
"""
|
||||
return _SECRET_SOURCES.get(env_var)
|
||||
|
||||
|
||||
def format_secret_source_suffix(env_var: str) -> str:
|
||||
"""Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
|
||||
|
||||
Use this when printing a detected credential so the user can see where
|
||||
it came from. Empty string when the credential came from ``.env`` or
|
||||
the shell — those are the implicit / "default" cases users already
|
||||
understand.
|
||||
"""
|
||||
source = get_secret_source(env_var)
|
||||
if not source:
|
||||
return ""
|
||||
if source == "bitwarden":
|
||||
return " (from Bitwarden)"
|
||||
# Generic fallback — future-proofing for additional secret sources
|
||||
# (e.g. 1Password, HashiCorp Vault) without having to update every
|
||||
# call site.
|
||||
return f" (from {source})"
|
||||
|
||||
|
||||
def _format_offending_chars(value: str, limit: int = 3) -> str:
|
||||
"""Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
|
||||
@@ -172,4 +210,87 @@ def load_hermes_dotenv(
|
||||
_load_dotenv_with_fallback(project_env_path, override=not loaded)
|
||||
loaded.append(project_env_path)
|
||||
|
||||
_apply_external_secret_sources(home_path)
|
||||
|
||||
return loaded
|
||||
|
||||
|
||||
def _apply_external_secret_sources(home_path: Path) -> None:
|
||||
"""Pull secrets from external sources (currently Bitwarden) into env.
|
||||
|
||||
Runs AFTER dotenv loads so .env values are visible (we use them to
|
||||
locate the access token) but BEFORE the rest of Hermes reads
|
||||
``os.environ`` for credentials. Any failure here is logged and
|
||||
swallowed — external secret sources must never block startup.
|
||||
"""
|
||||
try:
|
||||
cfg = _load_secrets_config(home_path)
|
||||
except Exception: # noqa: BLE001 — config errors must not block startup
|
||||
return
|
||||
|
||||
bw_cfg = (cfg or {}).get("bitwarden") or {}
|
||||
if not bw_cfg.get("enabled"):
|
||||
return
|
||||
|
||||
try:
|
||||
from agent.secret_sources.bitwarden import apply_bitwarden_secrets
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
result = apply_bitwarden_secrets(
|
||||
enabled=True,
|
||||
access_token_env=bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN"),
|
||||
project_id=bw_cfg.get("project_id", ""),
|
||||
override_existing=bool(bw_cfg.get("override_existing", False)),
|
||||
cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
|
||||
auto_install=bool(bw_cfg.get("auto_install", True)),
|
||||
)
|
||||
|
||||
if result.applied:
|
||||
# Re-run the ASCII sanitization pass: BSM values are user-supplied
|
||||
# and might have the same copy-paste corruption as a manually
|
||||
# edited .env (see #6843).
|
||||
_sanitize_loaded_credentials()
|
||||
# Remember where these came from so the setup / `hermes model`
|
||||
# flows can label detected credentials with "(from Bitwarden)" —
|
||||
# otherwise users see "credentials ✓" with no hint that the value
|
||||
# came from BSM rather than .env.
|
||||
for name in result.applied:
|
||||
_SECRET_SOURCES[name] = "bitwarden"
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: applied {len(result.applied)} "
|
||||
f"secret{'s' if len(result.applied) != 1 else ''} "
|
||||
f"({', '.join(sorted(result.applied))})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if result.error:
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: {result.error}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
for warn in result.warnings:
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: {warn}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def _load_secrets_config(home_path: Path) -> dict:
|
||||
"""Read just the ``secrets:`` section out of config.yaml.
|
||||
|
||||
Imported lazily and isolated from the main config loader so a
|
||||
malformed config can't take down dotenv loading entirely.
|
||||
"""
|
||||
config_path = home_path / "config.yaml"
|
||||
if not config_path.exists():
|
||||
return {}
|
||||
try:
|
||||
import yaml # type: ignore
|
||||
except ImportError:
|
||||
return {}
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
except Exception: # noqa: BLE001
|
||||
return {}
|
||||
return data.get("secrets") or {}
|
||||
|
||||
@@ -951,6 +951,58 @@ CREATE INDEX IF NOT EXISTS idx_notify_task ON kanban_notify_subs(task_
|
||||
|
||||
_INITIALIZED_PATHS: set[str] = set()
|
||||
_INIT_LOCK = threading.RLock()
|
||||
_SQLITE_HEADER = b"SQLite format 3\x00"
|
||||
|
||||
|
||||
def _looks_like_tls_record_at(data: bytes, offset: int) -> bool:
|
||||
"""Return True for a TLS record header at ``data[offset:]``."""
|
||||
if len(data) < offset + 5:
|
||||
return False
|
||||
content_type = data[offset]
|
||||
major = data[offset + 1]
|
||||
minor = data[offset + 2]
|
||||
length = int.from_bytes(data[offset + 3:offset + 5], "big")
|
||||
return (
|
||||
content_type in {0x14, 0x15, 0x16, 0x17}
|
||||
and major == 0x03
|
||||
and minor in {0x00, 0x01, 0x02, 0x03, 0x04}
|
||||
and 0 < length <= 18432
|
||||
)
|
||||
|
||||
|
||||
def _validate_sqlite_header(path: Path) -> None:
|
||||
"""Fail early with an actionable error for non-SQLite Kanban DB files.
|
||||
|
||||
``sqlite3.connect()`` creates missing and zero-byte files, so those are
|
||||
allowed. Existing non-empty files must have the SQLite header before we
|
||||
hand them to SQLite/WAL setup. This keeps corrupted page-0 failures from
|
||||
being collapsed into a generic PRAGMA error and lets the gateway's corrupt
|
||||
board handling identify the board by fingerprint.
|
||||
"""
|
||||
try:
|
||||
stat = path.stat()
|
||||
except FileNotFoundError:
|
||||
return
|
||||
except OSError:
|
||||
return
|
||||
if stat.st_size == 0:
|
||||
return
|
||||
try:
|
||||
with path.open("rb") as handle:
|
||||
head = handle.read(64)
|
||||
except OSError:
|
||||
return
|
||||
if head.startswith(_SQLITE_HEADER):
|
||||
return
|
||||
signature = ""
|
||||
if head.startswith(b"SQLit") and _looks_like_tls_record_at(head, 5):
|
||||
signature = " (TLS record header detected at byte offset 5)"
|
||||
elif _looks_like_tls_record_at(head, 0):
|
||||
signature = " (TLS record header detected at byte offset 0)"
|
||||
raise sqlite3.DatabaseError(
|
||||
"file is not a database: invalid SQLite header for "
|
||||
f"{path}{signature}; first_32={head[:32].hex(' ')}"
|
||||
)
|
||||
|
||||
|
||||
def connect(
|
||||
@@ -981,6 +1033,7 @@ def connect(
|
||||
else:
|
||||
path = kanban_db_path(board=board)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
_validate_sqlite_header(path)
|
||||
resolved = str(path.resolve())
|
||||
conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
|
||||
try:
|
||||
|
||||
+601
-108
@@ -261,11 +261,147 @@ import time as _time
|
||||
from datetime import datetime
|
||||
|
||||
from hermes_cli import __version__, __release_date__
|
||||
from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _is_termux_startup_environment(env: dict[str, str] | None = None) -> bool:
|
||||
"""Import-safe Termux check for cold-start-sensitive CLI paths."""
|
||||
check = env or os.environ
|
||||
prefix = str(check.get("PREFIX", ""))
|
||||
return bool(
|
||||
check.get("TERMUX_VERSION")
|
||||
or "com.termux/files/usr" in prefix
|
||||
or prefix.startswith("/data/data/com.termux/")
|
||||
)
|
||||
|
||||
|
||||
def _read_packed_ref(common_dir: Path, ref: str) -> str | None:
|
||||
"""Look up a ref in .git/packed-refs without spawning git.
|
||||
|
||||
packed-refs lines look like ``<sha> <ref>`` with optional ``^<sha>``
|
||||
peel lines and ``#``-prefixed comments / ``# pack-refs with:`` header.
|
||||
"""
|
||||
try:
|
||||
text = (common_dir / "packed-refs").read_text(encoding="utf-8", errors="replace")
|
||||
except OSError:
|
||||
return None
|
||||
for line in text.splitlines():
|
||||
if not line or line.startswith("#") or line.startswith("^"):
|
||||
continue
|
||||
parts = line.split(" ", 1)
|
||||
if len(parts) == 2 and parts[1].strip() == ref:
|
||||
return parts[0].strip()
|
||||
return None
|
||||
|
||||
|
||||
def _read_git_revision_fingerprint(repo_root: Path) -> str | None:
|
||||
"""Return a cheap checkout fingerprint without spawning git."""
|
||||
git_dir = repo_root / ".git"
|
||||
try:
|
||||
if git_dir.is_file():
|
||||
for line in git_dir.read_text(encoding="utf-8", errors="replace").splitlines():
|
||||
key, _, value = line.partition(":")
|
||||
if key.strip() == "gitdir" and value.strip():
|
||||
git_dir = (repo_root / value.strip()).resolve()
|
||||
break
|
||||
# Worktrees point HEAD at a per-worktree gitdir but pack their refs
|
||||
# in the main repo's gitdir (referenced via ``commondir``). Resolve
|
||||
# that up front so packed-refs lookups hit the right file.
|
||||
common_dir = git_dir
|
||||
commondir_file = git_dir / "commondir"
|
||||
if commondir_file.exists():
|
||||
try:
|
||||
rel = commondir_file.read_text(encoding="utf-8", errors="replace").strip()
|
||||
if rel:
|
||||
common_dir = (git_dir / rel).resolve()
|
||||
except OSError:
|
||||
pass
|
||||
head_file = git_dir / "HEAD"
|
||||
head = head_file.read_text(encoding="utf-8", errors="replace").strip()
|
||||
if head.startswith("ref:"):
|
||||
ref = head.split(":", 1)[1].strip()
|
||||
# Loose refs may live in the worktree gitdir OR the common dir
|
||||
# (branches created via `git worktree add` typically live in the
|
||||
# common dir's refs/heads/).
|
||||
for candidate in (git_dir, common_dir):
|
||||
ref_file = candidate / ref
|
||||
if ref_file.exists():
|
||||
return f"git:{ref}:{ref_file.read_text(encoding='utf-8', errors='replace').strip()}"
|
||||
packed_sha = _read_packed_ref(common_dir, ref)
|
||||
if packed_sha:
|
||||
return f"git:{ref}:{packed_sha}"
|
||||
# Ref name is known but unresolved — still stable across launches,
|
||||
# and the version/release fallback in the caller will invalidate
|
||||
# after `hermes update`.
|
||||
return f"git:{ref}:unresolved"
|
||||
return f"git:HEAD:{head}"
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def _termux_bundled_skills_fingerprint() -> str:
|
||||
"""Cheap invalidation key for Termux bundled-skill startup sync."""
|
||||
git_fp = _read_git_revision_fingerprint(PROJECT_ROOT)
|
||||
if git_fp:
|
||||
return git_fp
|
||||
skills_dir = PROJECT_ROOT / "skills"
|
||||
try:
|
||||
stat = skills_dir.stat()
|
||||
return f"skills:{__version__}:{__release_date__}:{stat.st_mtime_ns}:{stat.st_size}"
|
||||
except OSError:
|
||||
return f"skills:{__version__}:{__release_date__}:missing"
|
||||
|
||||
|
||||
def _termux_bundled_skills_stamp_path() -> Path:
|
||||
return get_hermes_home() / "skills" / ".termux_bundled_sync_stamp"
|
||||
|
||||
|
||||
def _termux_bundled_skills_sync_needed() -> bool:
|
||||
if not _is_termux_startup_environment():
|
||||
return True
|
||||
if os.environ.get("HERMES_TERMUX_FORCE_SKILLS_SYNC") == "1":
|
||||
return True
|
||||
try:
|
||||
stamp = _termux_bundled_skills_stamp_path()
|
||||
return stamp.read_text(encoding="utf-8").strip() != _termux_bundled_skills_fingerprint()
|
||||
except OSError:
|
||||
return True
|
||||
|
||||
|
||||
def _mark_termux_bundled_skills_synced() -> None:
|
||||
if not _is_termux_startup_environment():
|
||||
return
|
||||
try:
|
||||
stamp = _termux_bundled_skills_stamp_path()
|
||||
stamp.parent.mkdir(parents=True, exist_ok=True)
|
||||
stamp.write_text(_termux_bundled_skills_fingerprint() + "\n", encoding="utf-8")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _sync_bundled_skills_for_startup() -> bool:
|
||||
"""Sync bundled skills, but skip unchanged Termux checkouts cheaply.
|
||||
|
||||
Hashing every bundled skill is safe but expensive on older Android
|
||||
storage. The git/ref stamp keeps post-update correctness: a changed
|
||||
checkout revision forces one real sync, then later starts skip it.
|
||||
"""
|
||||
if _is_termux_startup_environment() and not _termux_bundled_skills_sync_needed():
|
||||
return False
|
||||
|
||||
from tools.skills_sync import sync_skills
|
||||
|
||||
sync_skills(quiet=True)
|
||||
_mark_termux_bundled_skills_synced()
|
||||
return True
|
||||
|
||||
|
||||
def _termux_should_prefetch_update_check() -> bool:
|
||||
if not _is_termux_startup_environment():
|
||||
return True
|
||||
return os.environ.get("HERMES_TERMUX_PREFETCH_UPDATES") == "1"
|
||||
|
||||
|
||||
def _relative_time(ts) -> str:
|
||||
"""Format a timestamp as relative time (e.g., '2h ago', 'yesterday')."""
|
||||
if not ts:
|
||||
@@ -455,7 +591,7 @@ def _session_browse_picker(sessions: list) -> Optional[str]:
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1) # selected
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1) # header
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1) # search
|
||||
curses.init_pair(4, 8, -1) # dim
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim
|
||||
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
@@ -967,6 +1103,72 @@ def _tui_need_npm_install(root: Path) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
_TUI_BUILD_INPUT_DIRS = (
|
||||
"src",
|
||||
"packages/hermes-ink/src",
|
||||
)
|
||||
|
||||
_TUI_BUILD_INPUT_FILES = (
|
||||
"package.json",
|
||||
"package-lock.json",
|
||||
"tsconfig.json",
|
||||
"tsconfig.build.json",
|
||||
"babel.compiler.config.cjs",
|
||||
"scripts/build.mjs",
|
||||
"packages/hermes-ink/package.json",
|
||||
"packages/hermes-ink/package-lock.json",
|
||||
"packages/hermes-ink/index.js",
|
||||
"packages/hermes-ink/text-input.js",
|
||||
)
|
||||
|
||||
_TUI_BUILD_INPUT_SUFFIXES = frozenset(
|
||||
{".cjs", ".js", ".jsx", ".json", ".mjs", ".ts", ".tsx"}
|
||||
)
|
||||
|
||||
|
||||
def _iter_tui_build_inputs(root: Path):
|
||||
"""Yield source/config files that affect ``ui-tui/dist/entry.js``."""
|
||||
for rel in _TUI_BUILD_INPUT_FILES:
|
||||
path = root / rel
|
||||
if path.is_file():
|
||||
yield path
|
||||
|
||||
for rel in _TUI_BUILD_INPUT_DIRS:
|
||||
base = root / rel
|
||||
if not base.is_dir():
|
||||
continue
|
||||
for path in base.rglob("*"):
|
||||
if path.is_file() and path.suffix in _TUI_BUILD_INPUT_SUFFIXES:
|
||||
yield path
|
||||
|
||||
|
||||
def _tui_need_rebuild(root: Path) -> bool:
|
||||
"""True when ``dist/entry.js`` is missing or older than TUI inputs.
|
||||
|
||||
The TUI bundle is self-contained. Rebuilding it on every launch adds a
|
||||
visible cold-start tax on slow Termux CPUs, while a simple mtime freshness
|
||||
check still rebuilds immediately after source updates, dependency updates,
|
||||
or local edits. Set ``HERMES_TUI_FORCE_BUILD=1`` to force the old behaviour.
|
||||
"""
|
||||
force = (os.environ.get("HERMES_TUI_FORCE_BUILD") or "").strip().lower()
|
||||
if force in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
|
||||
entry = root / "dist" / "entry.js"
|
||||
try:
|
||||
output_mtime = entry.stat().st_mtime
|
||||
except OSError:
|
||||
return True
|
||||
|
||||
for path in _iter_tui_build_inputs(root):
|
||||
try:
|
||||
if path.stat().st_mtime > output_mtime:
|
||||
return True
|
||||
except OSError:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _ensure_tui_node() -> None:
|
||||
"""Make sure `node` + `npm` are on PATH for the TUI.
|
||||
|
||||
@@ -1071,16 +1273,17 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
p = Path(ext_dir)
|
||||
if (p / "dist" / "entry.js").is_file():
|
||||
node = _node_bin("node")
|
||||
return [node, str(p / "dist" / "entry.js")], p
|
||||
return [node, "--expose-gc", str(p / "dist" / "entry.js")], p
|
||||
|
||||
# 1b. Bundled in wheel (pip install)
|
||||
bundled = _find_bundled_tui()
|
||||
if bundled is not None:
|
||||
node = _node_bin("node")
|
||||
return [node, str(bundled)], bundled.parent
|
||||
return [node, "--expose-gc", str(bundled)], bundled.parent
|
||||
|
||||
# 2. Normal flow: npm install if needed, always esbuild, then node dist/entry.js.
|
||||
# --dev flow: npm install if needed, then tsx src/entry.tsx.
|
||||
did_install = False
|
||||
if _tui_need_npm_install(tui_dir):
|
||||
npm = _node_bin("npm")
|
||||
if not os.environ.get("HERMES_QUIET"):
|
||||
@@ -1100,6 +1303,7 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
if preview:
|
||||
print(preview)
|
||||
sys.exit(1)
|
||||
did_install = True
|
||||
|
||||
if tui_dev:
|
||||
# Keep the local @hermes/ink package exports in sync with source.
|
||||
@@ -1128,24 +1332,31 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
return [str(tsx), "src/entry.tsx"], tui_dir
|
||||
return [npm, "start"], tui_dir
|
||||
|
||||
# Always rebuild — esbuild is fast and this avoids staleness-edge-case bugs.
|
||||
npm = _node_bin("npm")
|
||||
result = subprocess.run(
|
||||
[npm, "run", "build"],
|
||||
cwd=str(tui_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
combined = f"{result.stdout or ''}{result.stderr or ''}".strip()
|
||||
preview = "\n".join(combined.splitlines()[-30:])
|
||||
print("TUI build failed.")
|
||||
if preview:
|
||||
print(preview)
|
||||
sys.exit(1)
|
||||
# Desktop/dev launches retain the historical "always rebuild" behaviour.
|
||||
# Termux cold starts use the freshness check because esbuild startup is
|
||||
# expensive on old mobile CPUs.
|
||||
should_build = True
|
||||
if _is_termux_startup_environment():
|
||||
should_build = did_install or _tui_need_rebuild(tui_dir)
|
||||
|
||||
if should_build:
|
||||
npm = _node_bin("npm")
|
||||
result = subprocess.run(
|
||||
[npm, "run", "build"],
|
||||
cwd=str(tui_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
combined = f"{result.stdout or ''}{result.stderr or ''}".strip()
|
||||
preview = "\n".join(combined.splitlines()[-30:])
|
||||
print("TUI build failed.")
|
||||
if preview:
|
||||
print(preview)
|
||||
sys.exit(1)
|
||||
|
||||
node = _node_bin("node")
|
||||
return [node, str(tui_dir / "dist" / "entry.js")], tui_dir
|
||||
return [node, "--expose-gc", str(tui_dir / "dist" / "entry.js")], tui_dir
|
||||
|
||||
|
||||
def _normalize_tui_toolsets(toolsets: object) -> list[str]:
|
||||
@@ -1267,16 +1478,16 @@ def _launch_tui(
|
||||
env["HERMES_TUI_TOOL_PROGRESS"] = "off"
|
||||
if accept_hooks:
|
||||
env["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
|
||||
# ~1.5–4GB depending on version and can fatal-OOM on long sessions with
|
||||
# large transcripts / reasoning blobs. Token-level merge: respect any
|
||||
# user-supplied --max-old-space-size (they may have set it higher) and
|
||||
# avoid duplicating --expose-gc.
|
||||
# Guarantee an 8GB V8 heap for the TUI. Default node cap is ~1.5–4GB
|
||||
# depending on version and can fatal-OOM on long sessions with large
|
||||
# transcripts / reasoning blobs. Token-level merge: respect any
|
||||
# user-supplied --max-old-space-size (they may have set it higher).
|
||||
# --expose-gc is *not* added here: Node rejects it in NODE_OPTIONS
|
||||
# ("--expose-gc is not allowed in NODE_OPTIONS") and refuses to start.
|
||||
# It is passed as a direct argv flag in _make_tui_argv() instead.
|
||||
_tokens = env.get("NODE_OPTIONS", "").split()
|
||||
if not any(t.startswith("--max-old-space-size=") for t in _tokens):
|
||||
_tokens.append("--max-old-space-size=8192")
|
||||
if "--expose-gc" not in _tokens:
|
||||
_tokens.append("--expose-gc")
|
||||
env["NODE_OPTIONS"] = " ".join(_tokens)
|
||||
# HERMES_TUI_RESUME is an internal hand-off from the Python wrapper to the
|
||||
# Ink app. Because we start from os.environ.copy(), an exported/stale value
|
||||
@@ -1384,6 +1595,29 @@ def cmd_chat(args):
|
||||
# If resolution fails, keep the original value — _init_agent will
|
||||
# report "Session not found" with the original input
|
||||
|
||||
# xAI retirement warning — one-shot, non-blocking, never fails startup
|
||||
try:
|
||||
from hermes_cli.xai_retirement import (
|
||||
MIGRATION_GUIDE_URL,
|
||||
RETIREMENT_DATE,
|
||||
find_retired_xai_refs,
|
||||
format_issue,
|
||||
)
|
||||
from hermes_cli.config import load_config as _load_config_for_xai_check
|
||||
|
||||
_retired_xai_refs = find_retired_xai_refs(_load_config_for_xai_check())
|
||||
if _retired_xai_refs:
|
||||
sys.stderr.write(
|
||||
f"\033[33m⚠ xAI retires {len(_retired_xai_refs)} model(s) "
|
||||
f"in your config on {RETIREMENT_DATE}:\033[0m\n"
|
||||
)
|
||||
for _ref in _retired_xai_refs:
|
||||
sys.stderr.write(f" \033[33m⚠\033[0m {format_issue(_ref)}\n")
|
||||
sys.stderr.write(f" \033[2mMigration guide: {MIGRATION_GUIDE_URL}\033[0m\n")
|
||||
sys.stderr.write(" \033[2mRun 'hermes doctor' for details.\033[0m\n\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# First-run guard: check if any provider is configured before launching
|
||||
if not _has_any_provider_configured():
|
||||
print()
|
||||
@@ -1416,19 +1650,20 @@ def cmd_chat(args):
|
||||
print("You can run 'hermes setup' at any time to configure.")
|
||||
sys.exit(1)
|
||||
|
||||
# Start update check in background (runs while other init happens)
|
||||
try:
|
||||
from hermes_cli.banner import prefetch_update_check
|
||||
# Start update check in background (runs while other init happens).
|
||||
# On Termux this imports rich/prompt_toolkit in the foreground and then
|
||||
# competes for CPU on single-core devices, so keep it opt-in there.
|
||||
if _termux_should_prefetch_update_check():
|
||||
try:
|
||||
from hermes_cli.banner import prefetch_update_check
|
||||
|
||||
prefetch_update_check()
|
||||
except Exception:
|
||||
pass
|
||||
prefetch_update_check()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Sync bundled skills on every CLI launch (fast -- skips unchanged skills)
|
||||
try:
|
||||
from tools.skills_sync import sync_skills
|
||||
|
||||
sync_skills(quiet=True)
|
||||
_sync_bundled_skills_for_startup()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -2198,6 +2433,9 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("title_generation", "Title generation", "session titles"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
("triage_specifier", "Triage specifier", "kanban spec fleshing"),
|
||||
("kanban_decomposer", "Kanban decomposer", "task decomposition"),
|
||||
("profile_describer", "Profile describer", "auto profile descriptions"),
|
||||
("curator", "Curator", "skill-usage review pass"),
|
||||
]
|
||||
|
||||
@@ -2566,6 +2804,7 @@ def _prompt_provider_choice(choices, *, default=0):
|
||||
|
||||
def _model_flow_openrouter(config, current_model=""):
|
||||
"""OpenRouter provider: ensure API key, then pick model."""
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_cli.auth import (
|
||||
ProviderConfig,
|
||||
_prompt_model_selection,
|
||||
@@ -2626,6 +2865,7 @@ def _model_flow_openrouter(config, current_model=""):
|
||||
|
||||
def _model_flow_ai_gateway(config, current_model=""):
|
||||
"""Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
|
||||
from hermes_constants import AI_GATEWAY_BASE_URL
|
||||
from hermes_cli.auth import (
|
||||
PROVIDER_REGISTRY,
|
||||
_prompt_model_selection,
|
||||
@@ -4219,8 +4459,11 @@ def _model_flow_named_custom(config, provider_info):
|
||||
print(f" Provider: {name} ({base_url})")
|
||||
|
||||
|
||||
# Curated model lists for direct API-key providers — single source in models.py
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
# Keep the historical eager model catalog import on desktop/CI. Termux defers
|
||||
# it to the model-selection handlers so plain `hermes --tui` does not pay for
|
||||
# requests/models.dev catalog imports before the Node TUI starts.
|
||||
if not _is_termux_startup_environment():
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
|
||||
|
||||
def _current_reasoning_effort(config) -> str:
|
||||
@@ -4337,6 +4580,7 @@ def _model_flow_copilot(config, current_model=""):
|
||||
)
|
||||
from hermes_cli.config import save_env_value, load_config, save_config
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS,
|
||||
fetch_api_models,
|
||||
fetch_github_model_catalog,
|
||||
github_model_reasoning_efforts,
|
||||
@@ -4421,7 +4665,9 @@ def _model_flow_copilot(config, current_model=""):
|
||||
source = creds.get("source", "")
|
||||
else:
|
||||
if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
|
||||
print(f" GitHub token: {api_key[:8]}... ✓ ({source})")
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
bw_suffix = format_secret_source_suffix(source)
|
||||
print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
|
||||
elif source == "gh auth token":
|
||||
print(" GitHub token: ✓ (from `gh auth token`)")
|
||||
else:
|
||||
@@ -4529,6 +4775,7 @@ def _model_flow_copilot_acp(config, current_model=""):
|
||||
resolve_external_process_provider_credentials,
|
||||
)
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS,
|
||||
fetch_github_model_catalog,
|
||||
normalize_copilot_model_id,
|
||||
)
|
||||
@@ -4677,7 +4924,10 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
|
||||
return new_key, False
|
||||
|
||||
# Already configured — offer K / R / C ────────────────────────────────
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
|
||||
source_suffix = format_secret_source_suffix(key_env) if key_env else ""
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓{source_suffix}")
|
||||
if not key_env:
|
||||
# Nothing we can rewrite; just acknowledge and move on.
|
||||
print()
|
||||
@@ -4732,6 +4982,7 @@ def _model_flow_kimi(config, current_model=""):
|
||||
load_config,
|
||||
save_config,
|
||||
)
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
|
||||
provider_id = "kimi-coding"
|
||||
pconfig = PROVIDER_REGISTRY[provider_id]
|
||||
@@ -4842,7 +5093,7 @@ def _model_flow_stepfun(config, current_model=""):
|
||||
load_config,
|
||||
save_config,
|
||||
)
|
||||
from hermes_cli.models import fetch_api_models
|
||||
from hermes_cli.models import _PROVIDER_MODELS, fetch_api_models
|
||||
|
||||
provider_id = "stepfun"
|
||||
pconfig = PROVIDER_REGISTRY[provider_id]
|
||||
@@ -4959,7 +5210,9 @@ def _model_flow_bedrock_api_key(config, region, current_model=""):
|
||||
# Prompt for API key
|
||||
existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
|
||||
if existing_key:
|
||||
print(f" Bedrock API Key: {existing_key[:12]}... ✓")
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
|
||||
print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
|
||||
else:
|
||||
print(f" Endpoint: {mantle_base_url}")
|
||||
print()
|
||||
@@ -5222,6 +5475,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
save_config,
|
||||
)
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS,
|
||||
fetch_api_models,
|
||||
opencode_model_api_mode,
|
||||
normalize_opencode_model_id,
|
||||
@@ -5629,7 +5883,22 @@ def _model_flow_anthropic(config, current_model=""):
|
||||
if has_creds:
|
||||
# Show what we found
|
||||
if existing_key:
|
||||
print(f" Anthropic credentials: {existing_key[:12]}... ✓")
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
|
||||
# Surface which env var supplied the key so users with
|
||||
# Bitwarden see "(from Bitwarden)" — without this, a detected
|
||||
# BSM key looks identical to a key in .env and users assume
|
||||
# nothing is wired up.
|
||||
source_suffix = ""
|
||||
for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
|
||||
if os.getenv(var, "").strip() == existing_key:
|
||||
source_suffix = format_secret_source_suffix(var)
|
||||
if source_suffix:
|
||||
break
|
||||
print(
|
||||
f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
|
||||
)
|
||||
elif cc_available:
|
||||
print(" Claude Code credentials: ✓ (auto-detected)")
|
||||
print()
|
||||
@@ -5855,8 +6124,7 @@ def cmd_import(args):
|
||||
run_import(args)
|
||||
|
||||
|
||||
def cmd_version(args):
|
||||
"""Show version."""
|
||||
def _print_version_info(*, check_updates: bool = True) -> None:
|
||||
print(f"Hermes Agent v{__version__} ({__release_date__})")
|
||||
print(f"Project: {PROJECT_ROOT}")
|
||||
|
||||
@@ -5876,6 +6144,9 @@ def cmd_version(args):
|
||||
except ImportError:
|
||||
print("OpenAI SDK: Not installed")
|
||||
|
||||
if not check_updates:
|
||||
return
|
||||
|
||||
# Show update status (synchronous — acceptable since user asked for version info)
|
||||
try:
|
||||
from hermes_cli.banner import check_for_updates
|
||||
@@ -5894,6 +6165,11 @@ def cmd_version(args):
|
||||
pass
|
||||
|
||||
|
||||
def cmd_version(args):
|
||||
"""Show version."""
|
||||
_print_version_info(check_updates=True)
|
||||
|
||||
|
||||
def cmd_uninstall(args):
|
||||
"""Uninstall Hermes Agent."""
|
||||
_require_tty("uninstall")
|
||||
@@ -5970,24 +6246,36 @@ def _validate_critical_files_syntax(root) -> tuple[bool, str | None, str | None]
|
||||
them after a successful ``git pull`` so we can auto-roll-back instead of
|
||||
leaving the user with a bricked install.
|
||||
|
||||
The compiled ``.pyc`` is written to a temp directory rather than the
|
||||
source tree's ``__pycache__/`` so we don't race with concurrent test
|
||||
workers that walk the same dir, and so we don't leave a stale pyc
|
||||
behind in production if the next interpreter run picks a different
|
||||
Python version. The pyc is discarded on function return either way —
|
||||
we only care about the compile-or-not signal.
|
||||
|
||||
Returns ``(ok, failing_path, error_message)``. ``ok=True`` means every
|
||||
file parsed cleanly.
|
||||
"""
|
||||
import py_compile
|
||||
import tempfile
|
||||
|
||||
root = Path(root)
|
||||
for relpath in _UPDATE_CRITICAL_FILES:
|
||||
path = root / relpath
|
||||
if not path.exists():
|
||||
# Missing file is suspicious but not necessarily fatal — a future
|
||||
# refactor may legitimately remove one of these. Skip and move on.
|
||||
continue
|
||||
try:
|
||||
py_compile.compile(str(path), doraise=True)
|
||||
except py_compile.PyCompileError as exc:
|
||||
return False, str(path), str(exc)
|
||||
except OSError as exc:
|
||||
return False, str(path), f"could not read: {exc}"
|
||||
with tempfile.TemporaryDirectory(prefix="hermes-syntax-check-") as tmpdir:
|
||||
for relpath in _UPDATE_CRITICAL_FILES:
|
||||
path = root / relpath
|
||||
if not path.exists():
|
||||
# Missing file is suspicious but not necessarily fatal — a future
|
||||
# refactor may legitimately remove one of these. Skip and move on.
|
||||
continue
|
||||
# Mirror the relative path under the tmpdir so two different
|
||||
# files with the same basename don't collide on the cfile name.
|
||||
cfile = Path(tmpdir) / (relpath.replace("/", "__") + "c")
|
||||
try:
|
||||
py_compile.compile(str(path), cfile=str(cfile), doraise=True)
|
||||
except py_compile.PyCompileError as exc:
|
||||
return False, str(path), str(exc)
|
||||
except OSError as exc:
|
||||
return False, str(path), f"could not read: {exc}"
|
||||
return True, None, None
|
||||
|
||||
|
||||
@@ -7639,9 +7927,7 @@ def _install_python_dependencies_with_optional_fallback(
|
||||
|
||||
|
||||
def _is_termux_env(env: dict[str, str] | None = None) -> bool:
|
||||
check = env or os.environ
|
||||
prefix = str(check.get("PREFIX", ""))
|
||||
return "com.termux" in prefix or prefix.startswith("/data/data/com.termux/")
|
||||
return _is_termux_startup_environment(env)
|
||||
|
||||
|
||||
def _is_android_python() -> bool:
|
||||
@@ -10295,11 +10581,11 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
||||
"computer-use",
|
||||
"config", "cron", "curator", "dashboard", "debug", "doctor",
|
||||
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
|
||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
|
||||
"model", "pairing", "plugins", "postinstall", "profile", "proxy",
|
||||
"send", "sessions", "setup",
|
||||
"skills", "slack", "status", "tools", "uninstall", "update",
|
||||
"version", "webhook", "whatsapp", "chat",
|
||||
"version", "webhook", "whatsapp", "chat", "secrets",
|
||||
# Help-ish invocations — plugin commands not being listed in
|
||||
# top-level --help is an acceptable trade-off for skipping an
|
||||
# expensive eager import of every bundled plugin module.
|
||||
@@ -10389,6 +10675,178 @@ def _plugin_cli_discovery_needed() -> bool:
|
||||
return True
|
||||
|
||||
|
||||
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
|
||||
_AGENT_SUBCOMMANDS = {
|
||||
"cron": ("cron_command", {"run", "tick"}),
|
||||
"gateway": ("gateway_command", {"run"}),
|
||||
"mcp": ("mcp_action", {"serve"}),
|
||||
}
|
||||
|
||||
|
||||
def _prepare_agent_startup(args) -> None:
|
||||
"""Discover plugins/MCP/hooks for commands that can run an agent turn."""
|
||||
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
|
||||
if not (
|
||||
args.command in _AGENT_COMMANDS
|
||||
or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
|
||||
):
|
||||
return
|
||||
|
||||
_accept_hooks = bool(getattr(args, "accept_hooks", False))
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"plugin discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
register_from_config(load_config(), accept_hooks=_accept_hooks)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"shell-hook registration failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
def _set_chat_arg_defaults(args) -> None:
|
||||
for attr, default in [
|
||||
("query", None),
|
||||
("model", None),
|
||||
("provider", None),
|
||||
("toolsets", None),
|
||||
("verbose", False),
|
||||
("resume", None),
|
||||
("continue_last", None),
|
||||
("worktree", False),
|
||||
]:
|
||||
if not hasattr(args, attr):
|
||||
setattr(args, attr, default)
|
||||
|
||||
|
||||
def _is_termux_fast_version_argv(argv: list[str]) -> bool:
|
||||
return argv in (["--version"], ["-V"], ["version"])
|
||||
|
||||
|
||||
def _try_termux_fast_cli_launch() -> bool:
|
||||
"""Run obvious Termux non-TUI chat/oneshot/version paths on a light parser."""
|
||||
if not _is_termux_startup_environment():
|
||||
return False
|
||||
if os.environ.get("HERMES_TERMUX_DISABLE_FAST_CLI") == "1":
|
||||
return False
|
||||
|
||||
argv = sys.argv[1:]
|
||||
if "-h" in argv or "--help" in argv:
|
||||
return False
|
||||
if os.environ.get("HERMES_TUI") == "1" or "--tui" in argv:
|
||||
return False
|
||||
|
||||
if _is_termux_fast_version_argv(argv):
|
||||
_print_version_info(check_updates=False)
|
||||
return True
|
||||
|
||||
first = _first_positional_argv()
|
||||
has_oneshot = any(
|
||||
arg == "-z" or arg == "--oneshot" or arg.startswith("--oneshot=")
|
||||
for arg in argv
|
||||
)
|
||||
if not has_oneshot and first not in {None, "chat"}:
|
||||
return False
|
||||
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
parser, _subparsers, chat_parser = build_top_level_parser()
|
||||
chat_parser.set_defaults(func=cmd_chat)
|
||||
args = parser.parse_args(_coalesce_session_name_args(argv))
|
||||
|
||||
if getattr(args, "version", False):
|
||||
_print_version_info(check_updates=False)
|
||||
return True
|
||||
|
||||
if getattr(args, "oneshot", None):
|
||||
_prepare_agent_startup(args)
|
||||
from hermes_cli.oneshot import run_oneshot
|
||||
|
||||
sys.exit(
|
||||
run_oneshot(
|
||||
args.oneshot,
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
toolsets=getattr(args, "toolsets", None),
|
||||
)
|
||||
)
|
||||
|
||||
if (args.resume or args.continue_last) and args.command is None:
|
||||
args.command = "chat"
|
||||
|
||||
if args.command in {None, "chat"}:
|
||||
_set_chat_arg_defaults(args)
|
||||
_prepare_agent_startup(args)
|
||||
cmd_chat(args)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _try_termux_fast_tui_launch() -> bool:
|
||||
"""Launch obvious Termux TUI invocations before building every subparser.
|
||||
|
||||
`hermes --tui` is the hot path on phones. The full parser setup imports
|
||||
command modules for model, fallback, migrate, kanban, bundles, plugins,
|
||||
etc. even though the TUI immediately execs Node. On Termux only, parse the
|
||||
lightweight top-level/chat parser and hand off to ``cmd_chat`` when the
|
||||
invocation is unambiguously the built-in TUI/chat path.
|
||||
"""
|
||||
if not _is_termux_startup_environment():
|
||||
return False
|
||||
|
||||
if "-h" in sys.argv[1:] or "--help" in sys.argv[1:]:
|
||||
return False
|
||||
|
||||
wants_tui = os.environ.get("HERMES_TUI") == "1" or "--tui" in sys.argv[1:]
|
||||
if not wants_tui:
|
||||
return False
|
||||
|
||||
first = _first_positional_argv()
|
||||
if first not in {None, "chat"}:
|
||||
return False
|
||||
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
parser, _subparsers, chat_parser = build_top_level_parser()
|
||||
chat_parser.set_defaults(func=cmd_chat)
|
||||
args = parser.parse_args(_coalesce_session_name_args(sys.argv[1:]))
|
||||
|
||||
# Preserve top-level behaviours whose semantics are not "launch chat/TUI".
|
||||
if getattr(args, "version", False) or getattr(args, "oneshot", None):
|
||||
return False
|
||||
if getattr(args, "command", None) not in {None, "chat"}:
|
||||
return False
|
||||
if not (getattr(args, "tui", False) or os.environ.get("HERMES_TUI") == "1"):
|
||||
return False
|
||||
|
||||
cmd_chat(args)
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for hermes CLI."""
|
||||
# Force UTF-8 stdio on Windows before anything prints. No-op elsewhere.
|
||||
@@ -10406,6 +10864,11 @@ def main():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if _try_termux_fast_tui_launch():
|
||||
return
|
||||
if _try_termux_fast_cli_launch():
|
||||
return
|
||||
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
parser, subparsers, chat_parser = build_top_level_parser()
|
||||
@@ -10502,6 +10965,80 @@ def main():
|
||||
)
|
||||
fallback_parser.set_defaults(func=cmd_fallback)
|
||||
|
||||
# =========================================================================
|
||||
# secrets command — external secret managers (currently: Bitwarden)
|
||||
# =========================================================================
|
||||
secrets_parser = subparsers.add_parser(
|
||||
"secrets",
|
||||
help="Manage external secret sources (Bitwarden Secrets Manager)",
|
||||
description=(
|
||||
"Pull API keys from an external secret manager at process startup "
|
||||
"instead of storing them in ~/.hermes/.env. Currently supports "
|
||||
"Bitwarden Secrets Manager. See: "
|
||||
"https://hermes-agent.nousresearch.com/docs/user-guide/secrets/bitwarden"
|
||||
),
|
||||
)
|
||||
secrets_subparsers = secrets_parser.add_subparsers(dest="secrets_command")
|
||||
|
||||
secrets_bw = secrets_subparsers.add_parser(
|
||||
"bitwarden",
|
||||
aliases=["bw"],
|
||||
help="Bitwarden Secrets Manager integration",
|
||||
)
|
||||
|
||||
# Lazy import — only pays for itself when this subcommand is actually used.
|
||||
from hermes_cli import secrets_cli as _secrets_cli
|
||||
|
||||
_secrets_cli.register_cli(secrets_bw)
|
||||
|
||||
def _dispatch_secrets(args): # noqa: ANN001
|
||||
sub = getattr(args, "secrets_command", None)
|
||||
bw_sub = getattr(args, "secrets_bw_command", None)
|
||||
if sub in ("bitwarden", "bw") and bw_sub is not None:
|
||||
return args.func(args)
|
||||
secrets_parser.print_help()
|
||||
return 0
|
||||
|
||||
secrets_parser.set_defaults(func=_dispatch_secrets)
|
||||
|
||||
# =========================================================================
|
||||
# migrate command
|
||||
# =========================================================================
|
||||
from hermes_cli.migrate import cmd_migrate, cmd_migrate_xai
|
||||
|
||||
migrate_parser = subparsers.add_parser(
|
||||
"migrate",
|
||||
help="Migrate configuration for retired models or deprecated settings",
|
||||
description=(
|
||||
"Diagnose and (optionally) rewrite the active config.yaml to "
|
||||
"replace references to retired models or deprecated settings."
|
||||
),
|
||||
)
|
||||
migrate_subparsers = migrate_parser.add_subparsers(dest="migrate_type")
|
||||
|
||||
migrate_xai = migrate_subparsers.add_parser(
|
||||
"xai",
|
||||
help="Migrate xAI models scheduled for retirement on May 15, 2026",
|
||||
description=(
|
||||
"Scan config.yaml for references to xAI models retiring on "
|
||||
"May 15, 2026 and, with --apply, rewrite them in-place to the "
|
||||
"official replacements per the xAI migration guide. The original "
|
||||
"config.yaml is backed up before any rewrite."
|
||||
),
|
||||
)
|
||||
migrate_xai.add_argument(
|
||||
"--apply",
|
||||
action="store_true",
|
||||
help="Rewrite config.yaml in-place (default: dry-run, no writes)",
|
||||
)
|
||||
migrate_xai.add_argument(
|
||||
"--no-backup",
|
||||
action="store_true",
|
||||
help="Skip the timestamped backup of config.yaml when applying",
|
||||
)
|
||||
migrate_xai.set_defaults(func=cmd_migrate_xai)
|
||||
migrate_parser.set_defaults(func=cmd_migrate)
|
||||
|
||||
# =========================================================================
|
||||
# gateway command
|
||||
# =========================================================================
|
||||
@@ -13129,51 +13666,7 @@ Examples:
|
||||
# so introspection/management commands (hermes hooks list, cron
|
||||
# list, gateway status, mcp add, ...) don't pay discovery cost or
|
||||
# trigger consent prompts for hooks the user is still inspecting.
|
||||
# Groups with mixed admin/CRUD vs. agent-running entries narrow via
|
||||
# the nested subcommand (dest varies by parser).
|
||||
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
|
||||
_AGENT_SUBCOMMANDS = {
|
||||
"cron": ("cron_command", {"run", "tick"}),
|
||||
"gateway": ("gateway_command", {"run"}),
|
||||
"mcp": ("mcp_action", {"serve"}),
|
||||
}
|
||||
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
|
||||
if args.command in _AGENT_COMMANDS or (
|
||||
_sub_attr and getattr(args, _sub_attr, None) in _sub_set
|
||||
):
|
||||
_accept_hooks = bool(getattr(args, "accept_hooks", False))
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"plugin discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
register_from_config(load_config(), accept_hooks=_accept_hooks)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"shell-hook registration failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
_prepare_agent_startup(args)
|
||||
|
||||
# Handle top-level --oneshot / -z: single-shot mode, stdout = final
|
||||
# response only, nothing else. Bypasses cli.py entirely.
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
"""CLI handlers for ``hermes migrate ...``.
|
||||
|
||||
Currently exposes only ``hermes migrate xai`` — diagnoses and (with --apply)
|
||||
rewrites references to xAI models retired on May 15, 2026.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
|
||||
def cmd_migrate(args: Any) -> int:
|
||||
"""Dispatcher for ``hermes migrate <subtype>``."""
|
||||
sub = getattr(args, "migrate_type", None)
|
||||
if sub == "xai":
|
||||
return cmd_migrate_xai(args)
|
||||
|
||||
print("usage: hermes migrate xai [--apply] [--no-backup]", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
|
||||
def cmd_migrate_xai(args: Any) -> int:
|
||||
"""Run xAI May-15 model migration in dry-run or apply mode."""
|
||||
from hermes_cli.xai_retirement import (
|
||||
MIGRATION_GUIDE_URL,
|
||||
RETIREMENT_DATE,
|
||||
apply_migration,
|
||||
find_retired_xai_refs,
|
||||
format_issue,
|
||||
)
|
||||
|
||||
apply = bool(getattr(args, "apply", False))
|
||||
no_backup = bool(getattr(args, "no_backup", False))
|
||||
|
||||
config = load_config()
|
||||
issues = find_retired_xai_refs(config)
|
||||
|
||||
print()
|
||||
print(color(
|
||||
f"◆ xAI Model Retirement Migration ({RETIREMENT_DATE})",
|
||||
Colors.CYAN, Colors.BOLD,
|
||||
))
|
||||
print()
|
||||
|
||||
if not issues:
|
||||
print(f" {color('✓', Colors.GREEN)} No retired xAI models in config — nothing to migrate.")
|
||||
return 0
|
||||
|
||||
print(f" Found {len(issues)} retired xAI model reference(s):")
|
||||
print()
|
||||
for issue in issues:
|
||||
print(f" {color('⚠', Colors.YELLOW)} {format_issue(issue)}")
|
||||
print()
|
||||
print(f" {color('→', Colors.CYAN)} Migration guide: {MIGRATION_GUIDE_URL}")
|
||||
print()
|
||||
|
||||
config_path = _resolve_config_path()
|
||||
|
||||
if not apply:
|
||||
print(color("Dry-run mode — no changes written.", Colors.DIM))
|
||||
print(color(
|
||||
"Re-run with `hermes migrate xai --apply` to rewrite "
|
||||
f"{config_path} in-place (backup created automatically).",
|
||||
Colors.DIM,
|
||||
))
|
||||
return 0
|
||||
|
||||
if not config_path or not config_path.exists():
|
||||
print(
|
||||
f" {color('✗', Colors.RED)} Could not locate config.yaml "
|
||||
f"(looked at: {config_path})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
try:
|
||||
result = apply_migration(
|
||||
config_path=config_path,
|
||||
issues=issues,
|
||||
backup=not no_backup,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(
|
||||
f" {color('✗', Colors.RED)} Migration failed: {exc}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
if not result.config_changed:
|
||||
print(f" {color('⚠', Colors.YELLOW)} No changes written.")
|
||||
return 0
|
||||
|
||||
if result.backup_path is not None:
|
||||
print(f" {color('✓', Colors.GREEN)} Backup: {result.backup_path}")
|
||||
print(
|
||||
f" {color('✓', Colors.GREEN)} Updated {len(result.issues_resolved)} "
|
||||
f"slot(s) in {result.file_path}"
|
||||
)
|
||||
print()
|
||||
print(color(
|
||||
"Run `hermes doctor` to confirm no retired xAI models remain.",
|
||||
Colors.DIM,
|
||||
))
|
||||
return 0
|
||||
|
||||
|
||||
def _resolve_config_path() -> Path:
|
||||
"""Best-effort: locate the active config.yaml on disk."""
|
||||
from hermes_cli.config import get_hermes_home
|
||||
|
||||
return get_hermes_home() / "config.yaml"
|
||||
@@ -74,8 +74,12 @@ class NousSubscriptionFeatures:
|
||||
def modal(self) -> NousFeatureState:
|
||||
return self.features["modal"]
|
||||
|
||||
@property
|
||||
def app_tools(self) -> NousFeatureState:
|
||||
return self.features["app_tools"]
|
||||
|
||||
def items(self) -> Iterable[NousFeatureState]:
|
||||
ordered = ("web", "image_gen", "tts", "browser", "modal")
|
||||
ordered = ("web", "image_gen", "tts", "browser", "modal", "app_tools")
|
||||
for key in ordered:
|
||||
yield self.features[key]
|
||||
|
||||
@@ -225,6 +229,22 @@ def _resolve_browser_feature_state(
|
||||
return "local", available, active, False
|
||||
|
||||
|
||||
def _read_portal_app_tools_enabled(config: Optional[Dict[str, object]] = None) -> bool:
|
||||
"""Return True when the portal.app_tools config flag is on."""
|
||||
if config is not None:
|
||||
# Fast path: use the pre-loaded config snapshot from the caller
|
||||
import os
|
||||
env_val = os.getenv("PORTAL_APP_TOOLS")
|
||||
if env_val is not None:
|
||||
return is_truthy_value(env_val)
|
||||
portal = config.get("portal")
|
||||
if isinstance(portal, dict):
|
||||
return bool(portal.get("app_tools", True))
|
||||
return True
|
||||
from tools.tool_backend_helpers import portal_app_tools_enabled
|
||||
return portal_app_tools_enabled()
|
||||
|
||||
|
||||
def get_nous_subscription_features(
|
||||
config: Optional[Dict[str, object]] = None,
|
||||
) -> NousSubscriptionFeatures:
|
||||
@@ -313,6 +333,8 @@ def get_nous_subscription_features(
|
||||
managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
|
||||
managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
|
||||
managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
|
||||
app_gw_ready = bool(managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("tools"))
|
||||
app_config_on = _read_portal_app_tools_enabled(config)
|
||||
modal_state = resolve_modal_backend_state(
|
||||
modal_mode,
|
||||
has_direct=direct_modal,
|
||||
@@ -476,6 +498,17 @@ def get_nous_subscription_features(
|
||||
current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
|
||||
explicit_configured=terminal_backend == "modal",
|
||||
),
|
||||
"app_tools": NousFeatureState(
|
||||
key="app_tools",
|
||||
label="App tools (500+ apps)",
|
||||
included_by_default=True,
|
||||
available=app_gw_ready,
|
||||
active=app_gw_ready and app_config_on,
|
||||
managed_by_nous=app_gw_ready and app_config_on,
|
||||
direct_override=False,
|
||||
toolset_enabled=app_config_on,
|
||||
current_provider="Nous Tool Gateway",
|
||||
),
|
||||
}
|
||||
|
||||
return NousSubscriptionFeatures(
|
||||
|
||||
@@ -1051,7 +1051,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1) # dim gray
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
@@ -1196,7 +1196,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in {curses.KEY_ENTER, 10, 13}:
|
||||
if cursor < n_plugins:
|
||||
@@ -1228,7 +1228,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in {27, ord("q")}:
|
||||
# Save plugin changes on exit
|
||||
|
||||
@@ -35,6 +35,7 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]:
|
||||
return []
|
||||
names: list[str] = []
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
path_str = str(md)
|
||||
if "/.hub/" in path_str or "/.git/" in path_str:
|
||||
if is_excluded_skill_path(md):
|
||||
continue
|
||||
try:
|
||||
rel = md.relative_to(skills_dir)
|
||||
@@ -201,7 +201,7 @@ def describe_profile(
|
||||
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
|
||||
skill_count = sum(
|
||||
1 for _ in (profile_dir / "skills").rglob("SKILL.md")
|
||||
if "/.hub/" not in str(_) and "/.git/" not in str(_)
|
||||
if not is_excluded_skill_path(_)
|
||||
) if (profile_dir / "skills").is_dir() else 0
|
||||
|
||||
# Read model + provider from the profile's config.
|
||||
|
||||
@@ -70,6 +70,8 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
@@ -463,7 +465,9 @@ def _count_skills(staged: Path) -> int:
|
||||
skills_dir = staged / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return 0
|
||||
return sum(1 for _ in skills_dir.rglob("SKILL.md"))
|
||||
return sum(
|
||||
1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
|
||||
|
||||
def plan_install(
|
||||
|
||||
+48
-3
@@ -30,6 +30,8 @@ from dataclasses import dataclass
|
||||
from pathlib import Path, PurePosixPath, PureWindowsPath
|
||||
from typing import List, Optional
|
||||
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
_PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
|
||||
|
||||
# Directories bootstrapped inside every new profile
|
||||
@@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int:
|
||||
return 0
|
||||
count = 0
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
if "/.hub/" not in str(md) and "/.git/" not in str(md):
|
||||
count += 1
|
||||
if is_excluded_skill_path(md):
|
||||
continue
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
@@ -902,7 +905,49 @@ def delete_profile(name: str, yes: bool = False) -> Path:
|
||||
|
||||
# 4. Remove profile directory
|
||||
try:
|
||||
shutil.rmtree(profile_dir)
|
||||
def _make_writable(func, path, exc):
|
||||
"""onexc/onerror handler: add +w on PermissionError so rmtree can proceed.
|
||||
|
||||
Handles two cases on NixOS (and other systems with read-only
|
||||
copies from immutable stores):
|
||||
1. The path itself isn't writable (e.g. a file with mode 0444)
|
||||
2. The *parent* directory isn't writable (e.g. mode 0555)
|
||||
|
||||
Compatible with both the ``onexc`` API (3.12+, receives an
|
||||
exception instance) and the ``onerror`` API (3.11-, receives
|
||||
``sys.exc_info()`` tuple).
|
||||
"""
|
||||
import stat as _stat
|
||||
import sys as _sys
|
||||
|
||||
# Normalise the two callback signatures:
|
||||
# onexc(func, path, exc_instance) — 3.12+
|
||||
# onerror(func, path, exc_info_tuple) — 3.11
|
||||
if isinstance(exc, tuple):
|
||||
exc = exc[1] # exc_info → actual exception object
|
||||
|
||||
if isinstance(exc, PermissionError):
|
||||
# Make the path writable
|
||||
try:
|
||||
os.chmod(path, os.stat(path).st_mode | _stat.S_IWUSR)
|
||||
except OSError:
|
||||
pass
|
||||
# Also make the parent writable (needed for unlink/rmdir)
|
||||
parent = os.path.dirname(path)
|
||||
if parent:
|
||||
try:
|
||||
os.chmod(parent, os.stat(parent).st_mode | _stat.S_IWUSR)
|
||||
except OSError:
|
||||
pass
|
||||
func(path)
|
||||
else:
|
||||
raise
|
||||
|
||||
# ``onexc`` was added in 3.12; fall back to ``onerror`` on 3.11.
|
||||
try:
|
||||
shutil.rmtree(profile_dir, onexc=_make_writable)
|
||||
except TypeError:
|
||||
shutil.rmtree(profile_dir, onerror=_make_writable)
|
||||
print(f"✓ Removed {profile_dir}")
|
||||
except Exception as e:
|
||||
print(f"⚠ Could not remove {profile_dir}: {e}")
|
||||
|
||||
@@ -100,6 +100,63 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _host_derived_api_key(base_url: str) -> str:
|
||||
"""Look up `<VENDOR>_API_KEY` in the env, derived from the base URL host.
|
||||
|
||||
Examples:
|
||||
https://api.deepseek.com/v1 → DEEPSEEK_API_KEY
|
||||
https://api.groq.com/openai/v1 → GROQ_API_KEY
|
||||
https://api.mistral.ai/v1 → MISTRAL_API_KEY
|
||||
https://generativelanguage.googleapis.com/v1beta/openai/ → GOOGLEAPIS_API_KEY
|
||||
|
||||
Returns the env value (stripped) or "". Never returns env vars whose names
|
||||
are already explicitly checked elsewhere — those are handled by their own
|
||||
host-gated paths (OPENAI/OPENROUTER/OLLAMA).
|
||||
|
||||
The vendor label is the *registrable* portion of the hostname: strip
|
||||
``api.`` / ``www.`` prefixes, then take the second-to-last label
|
||||
(``api.deepseek.com`` → ``deepseek``). Falls back to "" for hostnames
|
||||
that don't yield a usable vendor label (IPs, loopback, single-label
|
||||
hosts).
|
||||
"""
|
||||
hostname = base_url_hostname(base_url)
|
||||
if not hostname:
|
||||
return ""
|
||||
# Reject IPv4 / IPv6 / loopback — no meaningful vendor label.
|
||||
if any(ch.isdigit() for ch in hostname.split(".")[-1]):
|
||||
# Last label starts with a digit → likely IP. (TLDs are never numeric.)
|
||||
return ""
|
||||
if hostname in ("localhost",) or ":" in hostname:
|
||||
return ""
|
||||
labels = [lbl for lbl in hostname.split(".") if lbl]
|
||||
# Strip common API/CDN prefixes.
|
||||
while labels and labels[0] in ("api", "www"):
|
||||
labels.pop(0)
|
||||
if len(labels) < 2:
|
||||
return ""
|
||||
# Take the *registrable* label (second-to-last). For typical provider
|
||||
# hosts this is what users intuitively call "the vendor":
|
||||
# deepseek.com → labels[-2] = "deepseek" ✓
|
||||
# api.groq.com → groq.com → labels[-2] = "groq" ✓
|
||||
# api.mistral.ai → labels[-2] = "mistral" ✓
|
||||
# Crucially, lookalike hosts pick the ATTACKER's label, not the spoofed
|
||||
# vendor:
|
||||
# api.deepseek.com.attacker.test → labels[-2] = "attacker"
|
||||
# so DEEPSEEK_API_KEY stays put and the chain falls through to
|
||||
# no-key-required. This mirrors how `base_url_host_matches` resists the
|
||||
# same lookalike attack for explicit hosts.
|
||||
vendor = labels[-2]
|
||||
# Sanitize to env var charset: A-Z, 0-9, underscore.
|
||||
sanitized = "".join(ch if ch.isalnum() else "_" for ch in vendor).upper()
|
||||
if not sanitized or not sanitized[0].isalpha():
|
||||
return ""
|
||||
# Don't re-derive env vars already handled by explicit host-gated paths.
|
||||
if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
|
||||
return ""
|
||||
env_name = f"{sanitized}_API_KEY"
|
||||
return (os.getenv(env_name, "") or "").strip()
|
||||
|
||||
|
||||
def _auto_detect_local_model(base_url: str) -> str:
|
||||
"""Query a local server for its model name when only one model is loaded."""
|
||||
if not base_url:
|
||||
@@ -471,6 +528,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
# The v11→v12 migration writes the API mode under the new
|
||||
# ``transport`` field, but hand-edited configs may still
|
||||
# use the legacy ``api_mode`` spelling. Accept both —
|
||||
@@ -496,6 +556,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
@@ -539,6 +602,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
result["key_env"] = key_env
|
||||
if provider_key:
|
||||
result["provider_key"] = provider_key
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
api_mode = _parse_api_mode(entry.get("api_mode"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
@@ -550,6 +616,13 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
return None
|
||||
|
||||
|
||||
def _custom_provider_request_overrides(custom_provider: Dict[str, Any]) -> Dict[str, Any]:
|
||||
extra_body = custom_provider.get("extra_body")
|
||||
if not isinstance(extra_body, dict) or not extra_body:
|
||||
return {}
|
||||
return {"extra_body": dict(extra_body)}
|
||||
|
||||
|
||||
def _resolve_named_custom_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
@@ -582,10 +655,17 @@ def _resolve_named_custom_runtime(
|
||||
if pool_result:
|
||||
pool_result["source"] = "direct-alias"
|
||||
return pool_result
|
||||
_da_is_openai_url = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
|
||||
_da_is_openrouter = base_url_host_matches(base_url, "openrouter.ai")
|
||||
api_key_candidates = [
|
||||
(explicit_api_key or "").strip(),
|
||||
os.getenv("OPENAI_API_KEY", "").strip(),
|
||||
os.getenv("OPENROUTER_API_KEY", "").strip(),
|
||||
# Gate env key fallbacks on authoritative hosts (#28660)
|
||||
(os.getenv("OPENAI_API_KEY", "").strip() if _da_is_openai_url else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
|
||||
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
|
||||
# intuitive match without configuring `custom_providers` first.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next(
|
||||
(c for c in api_key_candidates if has_usable_secret(c)),
|
||||
@@ -619,14 +699,27 @@ def _resolve_named_custom_runtime(
|
||||
model_name = custom_provider.get("model")
|
||||
if model_name:
|
||||
pool_result["model"] = model_name
|
||||
request_overrides = _custom_provider_request_overrides(custom_provider)
|
||||
if request_overrides:
|
||||
pool_result["request_overrides"] = {
|
||||
**dict(pool_result.get("request_overrides") or {}),
|
||||
**request_overrides,
|
||||
}
|
||||
return pool_result
|
||||
|
||||
_cp_is_openai_url = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
|
||||
_cp_is_openrouter = base_url_host_matches(base_url, "openrouter.ai")
|
||||
api_key_candidates = [
|
||||
(explicit_api_key or "").strip(),
|
||||
str(custom_provider.get("api_key", "") or "").strip(),
|
||||
os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
|
||||
os.getenv("OPENAI_API_KEY", "").strip(),
|
||||
os.getenv("OPENROUTER_API_KEY", "").strip(),
|
||||
# Gate provider env keys on their authoritative hosts — sending
|
||||
# OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
|
||||
(os.getenv("OPENAI_API_KEY", "").strip() if _cp_is_openai_url else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
|
||||
# fallback when key_env wasn't set explicitly.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")
|
||||
|
||||
@@ -643,6 +736,9 @@ def _resolve_named_custom_runtime(
|
||||
# provider name differs from the actual model string the API expects.
|
||||
if custom_provider.get("model"):
|
||||
result["model"] = custom_provider["model"]
|
||||
request_overrides = _custom_provider_request_overrides(custom_provider)
|
||||
if request_overrides:
|
||||
result["request_overrides"] = request_overrides
|
||||
return result
|
||||
|
||||
|
||||
@@ -707,7 +803,15 @@ def _resolve_openrouter_runtime(
|
||||
# OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
|
||||
# provider (issues #420, #560).
|
||||
_is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
|
||||
if _is_openrouter_url:
|
||||
# Also treat explicitly-configured OpenRouter mirrors/proxies as OpenRouter
|
||||
# for key selection — if the user set OPENROUTER_BASE_URL or requested
|
||||
# provider=openrouter explicitly, OPENROUTER_API_KEY should still be used.
|
||||
_is_openrouter_context = _is_openrouter_url or (
|
||||
requested_norm == "openrouter"
|
||||
and (env_openrouter_base_url or base_url == env_openrouter_base_url)
|
||||
and base_url == (env_openrouter_base_url or "").rstrip("/")
|
||||
)
|
||||
if _is_openrouter_context:
|
||||
api_key_candidates = [
|
||||
explicit_api_key,
|
||||
os.getenv("OPENROUTER_API_KEY"),
|
||||
@@ -721,13 +825,24 @@ def _resolve_openrouter_runtime(
|
||||
# "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
|
||||
# hostname is a look-alike (ollama.com.attacker.test) must not
|
||||
# receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
|
||||
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
|
||||
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
|
||||
_is_openai_url = base_url_host_matches(base_url, "openai.com")
|
||||
_is_openai_azure = base_url_host_matches(base_url, "openai.azure.com")
|
||||
# Gate each provider key on its own host — sending OPENAI_API_KEY or
|
||||
# OPENROUTER_API_KEY to an unrelated custom endpoint (DeepSeek, Groq,
|
||||
# Mistral, …) leaks credentials and causes 401s (issue #28660).
|
||||
# Mirrors the OLLAMA_API_KEY host-gate added in GHSA-76xc-57q6-vm5m.
|
||||
api_key_candidates = [
|
||||
explicit_api_key,
|
||||
(cfg_api_key if use_config_base_url else ""),
|
||||
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
|
||||
os.getenv("OPENAI_API_KEY"),
|
||||
os.getenv("OPENROUTER_API_KEY"),
|
||||
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
|
||||
(os.getenv("OPENAI_API_KEY") if (_is_openai_url or _is_openai_azure) else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
|
||||
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
|
||||
# intuitive match. Helper returns "" for IPs/loopback and for env
|
||||
# vars already handled by the explicit host-gated paths above.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next(
|
||||
(str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)),
|
||||
|
||||
@@ -0,0 +1,445 @@
|
||||
"""CLI handlers for ``hermes secrets bitwarden ...``.
|
||||
|
||||
Subcommands:
|
||||
setup — interactive wizard: install bws, prompt for token + project, test fetch
|
||||
status — show current config + binary version + last fetch outcome
|
||||
sync — run a fetch right now and show what would be applied (dry-run friendly)
|
||||
disable — flip ``secrets.bitwarden.enabled`` to False
|
||||
install — just download the bws binary (no token / project required)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import getpass
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
|
||||
from agent.secret_sources import bitwarden as bw
|
||||
from hermes_cli.config import (
|
||||
get_env_path,
|
||||
load_config,
|
||||
save_config,
|
||||
save_env_value,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Argparse wiring — called from hermes_cli.main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def register_cli(parent_parser: argparse.ArgumentParser) -> None:
|
||||
"""Attach the ``bitwarden`` subcommand tree to a parent parser.
|
||||
|
||||
Called from ``hermes_cli.main`` as part of building the top-level
|
||||
``hermes secrets`` parser.
|
||||
"""
|
||||
sub = parent_parser.add_subparsers(dest="secrets_bw_command")
|
||||
|
||||
setup = sub.add_parser(
|
||||
"setup",
|
||||
help="Interactive wizard: install bws, store access token, pick project",
|
||||
)
|
||||
setup.add_argument(
|
||||
"--project-id",
|
||||
help="Pre-select a project UUID instead of prompting",
|
||||
)
|
||||
setup.add_argument(
|
||||
"--access-token",
|
||||
help="Provide the access token non-interactively (will be stored in .env)",
|
||||
)
|
||||
setup.set_defaults(func=cmd_setup)
|
||||
|
||||
status = sub.add_parser("status", help="Show config + binary + last fetch")
|
||||
status.set_defaults(func=cmd_status)
|
||||
|
||||
sync = sub.add_parser("sync", help="Fetch secrets now and report what changed")
|
||||
sync.add_argument(
|
||||
"--apply",
|
||||
action="store_true",
|
||||
help="Actually export the secrets into the current shell's env (default: dry-run)",
|
||||
)
|
||||
sync.set_defaults(func=cmd_sync)
|
||||
|
||||
disable = sub.add_parser("disable", help="Turn off the Bitwarden integration")
|
||||
disable.set_defaults(func=cmd_disable)
|
||||
|
||||
install = sub.add_parser(
|
||||
"install",
|
||||
help=f"Download and verify the pinned bws binary (v{bw._BWS_VERSION})",
|
||||
)
|
||||
install.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Re-download even if a managed copy already exists",
|
||||
)
|
||||
install.set_defaults(func=cmd_install)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def cmd_setup(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
console.print(
|
||||
Panel.fit(
|
||||
"[bold]Bitwarden Secrets Manager setup[/bold]\n\n"
|
||||
"Need an access token? In the Bitwarden web app:\n"
|
||||
" Secrets Manager → Machine accounts → [your account] →\n"
|
||||
" Access tokens → Create access token\n\n"
|
||||
"Copy the token (starts with [cyan]0.[/cyan]…) — it cannot be retrieved later.",
|
||||
border_style="cyan",
|
||||
)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------ binary
|
||||
console.print()
|
||||
console.print("[bold]Step 1[/bold] Install the bws CLI")
|
||||
try:
|
||||
binary = bw.find_bws(install_if_missing=False)
|
||||
if binary is None:
|
||||
console.print(" No bws on PATH — downloading…")
|
||||
binary = bw.install_bws()
|
||||
version = _bws_version(binary)
|
||||
console.print(f" [green]✓[/green] {binary} ({version})")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f" [red]✗ Could not install bws: {exc}[/red]")
|
||||
console.print(
|
||||
" Manual install: "
|
||||
"https://github.com/bitwarden/sdk-sm/releases"
|
||||
)
|
||||
return 1
|
||||
|
||||
# ------------------------------------------------------------------- token
|
||||
console.print()
|
||||
console.print("[bold]Step 2[/bold] Provide your access token")
|
||||
cfg = load_config()
|
||||
secrets_cfg = (cfg.setdefault("secrets", {})
|
||||
.setdefault("bitwarden", {}))
|
||||
token_env = secrets_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
|
||||
|
||||
token = (args.access_token or "").strip()
|
||||
if not token:
|
||||
token = getpass.getpass(f" Paste access token ({token_env}): ").strip()
|
||||
if not token:
|
||||
console.print(" [red]Empty token, aborting.[/red]")
|
||||
return 1
|
||||
if not token.startswith("0."):
|
||||
console.print(
|
||||
" [yellow]Warning: token doesn't start with '0.' — usually that means "
|
||||
"you pasted something other than a BSM access token. Continuing anyway.[/yellow]"
|
||||
)
|
||||
|
||||
save_env_value(token_env, token)
|
||||
os.environ[token_env] = token # so the test fetch below sees it
|
||||
console.print(f" [green]✓[/green] stored in {get_env_path()} as {token_env}")
|
||||
|
||||
# ------------------------------------------------------------------- project
|
||||
if args.project_id and args.project_id.strip():
|
||||
project_id = args.project_id.strip()
|
||||
else:
|
||||
console.print()
|
||||
console.print("[bold]Step 3[/bold] Pick a project")
|
||||
project_id = ""
|
||||
projects = _list_projects(binary, token, console)
|
||||
if projects is None:
|
||||
return 1
|
||||
if not projects:
|
||||
console.print(" [yellow]No projects visible to this machine account.[/yellow]")
|
||||
console.print(
|
||||
" In the Bitwarden web app, open the machine account → Projects tab "
|
||||
"and grant it access to at least one project."
|
||||
)
|
||||
return 1
|
||||
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("#", style="cyan", width=4)
|
||||
table.add_column("Name")
|
||||
table.add_column("ID", style="dim")
|
||||
for i, p in enumerate(projects, 1):
|
||||
table.add_row(str(i), p.get("name", "?"), p.get("id", "?"))
|
||||
console.print(table)
|
||||
|
||||
while True:
|
||||
choice = console.input(f" Select project [1-{len(projects)}]: ").strip()
|
||||
if not choice:
|
||||
continue
|
||||
try:
|
||||
idx = int(choice)
|
||||
except ValueError:
|
||||
console.print(" [red]Enter a number.[/red]")
|
||||
continue
|
||||
if 1 <= idx <= len(projects):
|
||||
project_id = projects[idx - 1]["id"]
|
||||
break
|
||||
console.print(f" [red]Out of range — pick 1-{len(projects)}.[/red]")
|
||||
|
||||
# ------------------------------------------------------------------- test
|
||||
console.print()
|
||||
step_num = 4 if not (args.project_id and args.project_id.strip()) else 3
|
||||
console.print(f"[bold]Step {step_num}[/bold] Test fetch")
|
||||
try:
|
||||
secrets, warnings = bw.fetch_bitwarden_secrets(
|
||||
access_token=token,
|
||||
project_id=project_id,
|
||||
binary=binary,
|
||||
use_cache=False,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f" [red]✗ Fetch failed: {exc}[/red]")
|
||||
return 1
|
||||
|
||||
if not secrets:
|
||||
console.print(" [yellow]Fetch succeeded but the project has no secrets.[/yellow]")
|
||||
else:
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("Name", style="cyan")
|
||||
table.add_column("Status")
|
||||
for key in sorted(secrets):
|
||||
if key == token_env:
|
||||
status = "[dim]bootstrap token — never overrides itself[/dim]"
|
||||
elif os.environ.get(key):
|
||||
status = "[yellow]already set in env (will be overwritten)[/yellow]"
|
||||
else:
|
||||
status = "[green]new[/green]"
|
||||
table.add_row(key, status)
|
||||
console.print(table)
|
||||
for w in warnings:
|
||||
console.print(f" [yellow]warning:[/yellow] {w}")
|
||||
|
||||
# ------------------------------------------------------------------- save
|
||||
secrets_cfg["enabled"] = True
|
||||
secrets_cfg["project_id"] = project_id
|
||||
secrets_cfg.setdefault("access_token_env", token_env)
|
||||
secrets_cfg.setdefault("cache_ttl_seconds", 300)
|
||||
secrets_cfg.setdefault("override_existing", True)
|
||||
secrets_cfg.setdefault("auto_install", True)
|
||||
save_config(cfg)
|
||||
|
||||
console.print()
|
||||
console.print(
|
||||
"[green]✓ Bitwarden Secrets Manager is enabled.[/green] "
|
||||
"Secrets will be pulled at the start of every Hermes process."
|
||||
)
|
||||
console.print(
|
||||
" Status: [cyan]hermes secrets bitwarden status[/cyan]\n"
|
||||
" Refresh: [cyan]hermes secrets bitwarden sync[/cyan]\n"
|
||||
" Disable: [cyan]hermes secrets bitwarden disable[/cyan]"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_status(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
cfg = load_config()
|
||||
bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
|
||||
|
||||
enabled = bool(bw_cfg.get("enabled"))
|
||||
token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
|
||||
project_id = bw_cfg.get("project_id", "")
|
||||
token_set = bool(os.environ.get(token_env))
|
||||
|
||||
table = Table(show_header=False, box=None, padding=(0, 2))
|
||||
table.add_column("", style="bold")
|
||||
table.add_column("")
|
||||
table.add_row("Enabled", _yn(enabled))
|
||||
table.add_row("Token env var", token_env)
|
||||
table.add_row("Token in env", _yn(token_set))
|
||||
table.add_row("Project ID", project_id or "[dim](unset)[/dim]")
|
||||
table.add_row("Override existing", _yn(bool(bw_cfg.get("override_existing", False))))
|
||||
table.add_row("Cache TTL (s)", str(bw_cfg.get("cache_ttl_seconds", 300)))
|
||||
table.add_row("Auto-install", _yn(bool(bw_cfg.get("auto_install", True))))
|
||||
|
||||
binary = bw.find_bws(install_if_missing=False)
|
||||
if binary:
|
||||
table.add_row("bws binary", f"{binary} ({_bws_version(binary)})")
|
||||
else:
|
||||
table.add_row("bws binary", "[yellow]not installed[/yellow]")
|
||||
|
||||
console.print(Panel(table, title="Bitwarden Secrets Manager", border_style="cyan"))
|
||||
|
||||
if not enabled:
|
||||
console.print("\n Run [cyan]hermes secrets bitwarden setup[/cyan] to enable.")
|
||||
return 0
|
||||
if not token_set:
|
||||
console.print(
|
||||
f"\n [yellow]Enabled but {token_env} is not set — Hermes will skip BSM "
|
||||
"and warn on next startup.[/yellow]"
|
||||
)
|
||||
if not project_id:
|
||||
console.print(
|
||||
"\n [yellow]Enabled but no project_id — nothing to fetch.[/yellow]"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_sync(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
cfg = load_config()
|
||||
bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
|
||||
if not bw_cfg.get("enabled"):
|
||||
console.print(
|
||||
"[yellow]Bitwarden integration is disabled. Run "
|
||||
"`hermes secrets bitwarden setup` first.[/yellow]"
|
||||
)
|
||||
return 1
|
||||
|
||||
token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
|
||||
token = os.environ.get(token_env, "").strip()
|
||||
if not token:
|
||||
console.print(f"[red]{token_env} is not set.[/red]")
|
||||
return 1
|
||||
|
||||
project_id = bw_cfg.get("project_id", "")
|
||||
if not project_id:
|
||||
console.print("[red]No project_id configured.[/red]")
|
||||
return 1
|
||||
|
||||
try:
|
||||
secrets, warnings = bw.fetch_bitwarden_secrets(
|
||||
access_token=token,
|
||||
project_id=project_id,
|
||||
use_cache=False,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f"[red]Fetch failed: {exc}[/red]")
|
||||
return 1
|
||||
|
||||
if not secrets:
|
||||
console.print("[yellow]No secrets in project.[/yellow]")
|
||||
return 0
|
||||
|
||||
override = bool(bw_cfg.get("override_existing", False)) or args.apply
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("Name", style="cyan")
|
||||
table.add_column("Action")
|
||||
applied = 0
|
||||
for key in sorted(secrets):
|
||||
if key == token_env:
|
||||
table.add_row(key, "[dim]skip (bootstrap token)[/dim]")
|
||||
continue
|
||||
already = bool(os.environ.get(key))
|
||||
if already and not override:
|
||||
table.add_row(key, "[dim]skip (already set)[/dim]")
|
||||
continue
|
||||
if args.apply:
|
||||
os.environ[key] = secrets[key]
|
||||
applied += 1
|
||||
table.add_row(key, "[green]exported[/green]" + (" (overrode)" if already else ""))
|
||||
else:
|
||||
table.add_row(key, "[green]would export[/green]" + (" (overrides)" if already else ""))
|
||||
|
||||
console.print(table)
|
||||
for w in warnings:
|
||||
console.print(f"[yellow]warning:[/yellow] {w}")
|
||||
|
||||
if not args.apply:
|
||||
console.print(
|
||||
"\n This was a dry-run — secrets are picked up automatically on the "
|
||||
"next [cyan]hermes[/cyan] invocation. Re-run with [cyan]--apply[/cyan] "
|
||||
"to export into the current shell instead."
|
||||
)
|
||||
else:
|
||||
console.print(f"\n [green]Exported {applied} secret(s) into current process.[/green]")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_disable(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
cfg = load_config()
|
||||
bw_cfg = (cfg.setdefault("secrets", {})
|
||||
.setdefault("bitwarden", {}))
|
||||
bw_cfg["enabled"] = False
|
||||
save_config(cfg)
|
||||
console.print(
|
||||
"[green]Disabled.[/green] Bitwarden secrets will NOT be pulled on the next "
|
||||
"Hermes invocation.\n"
|
||||
" Your access token is left in .env — remove it manually if you also want "
|
||||
"to revoke the credential."
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_install(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
try:
|
||||
path = bw.install_bws(force=bool(args.force))
|
||||
console.print(f"[green]✓[/green] {path} ({_bws_version(path)})")
|
||||
return 0
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f"[red]Install failed: {exc}[/red]")
|
||||
return 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _yn(b: bool) -> str:
|
||||
return "[green]yes[/green]" if b else "[dim]no[/dim]"
|
||||
|
||||
|
||||
def _bws_version(binary: Path) -> str:
|
||||
try:
|
||||
res = subprocess.run(
|
||||
[str(binary), "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if res.returncode == 0:
|
||||
return (res.stdout or res.stderr).strip().splitlines()[0]
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
return "version unknown"
|
||||
|
||||
|
||||
def _list_projects(
|
||||
binary: Path, token: str, console: Console
|
||||
) -> Optional[List[dict]]:
|
||||
"""Call ``bws project list`` and return the parsed list, or None on failure."""
|
||||
env = os.environ.copy()
|
||||
env["BWS_ACCESS_TOKEN"] = token
|
||||
env.setdefault("NO_COLOR", "1")
|
||||
try:
|
||||
res = subprocess.run(
|
||||
[str(binary), "project", "list", "--output", "json"],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
except (OSError, subprocess.TimeoutExpired) as exc:
|
||||
console.print(f" [red]Couldn't list projects: {exc}[/red]")
|
||||
return None
|
||||
|
||||
if res.returncode != 0:
|
||||
err = (res.stderr or res.stdout).strip()[:300]
|
||||
console.print(f" [red]bws project list failed: {err}[/red]")
|
||||
if "authorization" in err.lower() or "invalid" in err.lower():
|
||||
console.print(
|
||||
" [yellow]This usually means the access token is wrong or revoked. "
|
||||
"Double-check it in the Bitwarden web app.[/yellow]"
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(res.stdout or "[]")
|
||||
except json.JSONDecodeError as exc:
|
||||
console.print(f" [red]bws returned non-JSON: {exc}[/red]")
|
||||
return None
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
return [p for p in data if isinstance(p, dict) and p.get("id")]
|
||||
@@ -23,6 +23,7 @@ from rich.table import Table
|
||||
# Lazy imports to avoid circular dependencies and slow startup.
|
||||
# tools.skills_hub and tools.skills_guard are imported inside functions.
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
_console = Console()
|
||||
|
||||
@@ -178,9 +179,12 @@ def _existing_categories() -> List[str]:
|
||||
# top level (no category); otherwise treat as a category bucket.
|
||||
if (entry / "SKILL.md").exists():
|
||||
continue
|
||||
# Has at least one nested SKILL.md?
|
||||
# Has at least one nested SKILL.md (excluding dependency/cache dirs)?
|
||||
try:
|
||||
if any(entry.rglob("SKILL.md")):
|
||||
if any(
|
||||
not is_excluded_skill_path(p)
|
||||
for p in entry.rglob("SKILL.md")
|
||||
):
|
||||
out.append(entry.name)
|
||||
except OSError:
|
||||
continue
|
||||
@@ -319,12 +323,14 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
|
||||
c.print("[dim]No skills found in the Skills Hub.[/]\n")
|
||||
return
|
||||
|
||||
# Deduplicate by name, preferring higher trust
|
||||
# Deduplicate by identifier, preferring higher trust.
|
||||
# identifier is always unique per skill; name is not (browse-sh skills from different
|
||||
# sites can share the same task name, e.g. "search-listings" on Airbnb and Booking.com).
|
||||
seen: dict = {}
|
||||
for r in all_results:
|
||||
rank = _TRUST_RANK.get(r.trust_level, 0)
|
||||
if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0):
|
||||
seen[r.name] = r
|
||||
if r.identifier not in seen or rank > _TRUST_RANK.get(seen[r.identifier].trust_level, 0):
|
||||
seen[r.identifier] = r
|
||||
deduped = list(seen.values())
|
||||
|
||||
# Sort: official first, then by trust level (desc), then alphabetically
|
||||
@@ -702,8 +708,8 @@ def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> di
|
||||
seen: dict = {}
|
||||
for r in all_results:
|
||||
rank = _TRUST_RANK.get(r.trust_level, 0)
|
||||
if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0):
|
||||
seen[r.name] = r
|
||||
if r.identifier not in seen or rank > _TRUST_RANK.get(seen[r.identifier].trust_level, 0):
|
||||
seen[r.identifier] = r
|
||||
deduped = list(seen.values())
|
||||
deduped.sort(key=lambda r: (-_TRUST_RANK.get(r.trust_level, 0), r.source != "official", r.name.lower()))
|
||||
total = len(deduped)
|
||||
|
||||
+86
-29
@@ -78,6 +78,7 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
("yuanbao", "🤖 Yuanbao", "group info, member queries, DM"),
|
||||
("computer_use", "🖱️ Computer Use (macOS)", "background desktop control via cua-driver"),
|
||||
("app_tools", "🔌 App Integrations (500+)", "Gmail, Slack, GitHub, Jira, Notion, etc. via Nous tool gateway"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
@@ -311,6 +312,16 @@ TOOL_CATEGORIES = {
|
||||
"image_gen": {
|
||||
"name": "Image Generation",
|
||||
"icon": "🎨",
|
||||
# Per-provider rows for FAL.ai (`plugins/image_gen/fal`), OpenAI,
|
||||
# OpenAI Codex, and xAI are injected at runtime from each
|
||||
# ``plugins.image_gen.<vendor>`` package via
|
||||
# ``_plugin_image_gen_providers()`` in ``_visible_providers``.
|
||||
# Only non-provider UX setup-flow rows remain here:
|
||||
# - "Nous Subscription" — managed FAL billed via the Nous
|
||||
# subscription (requires_nous_auth + override_env_vars).
|
||||
# Uses the fal plugin as the underlying backend but has a
|
||||
# distinct setup UX.
|
||||
# Mirrors the shape browser/video_gen ship today.
|
||||
"providers": [
|
||||
{
|
||||
"name": "Nous Subscription",
|
||||
@@ -322,15 +333,6 @@ TOOL_CATEGORIES = {
|
||||
"override_env_vars": ["FAL_KEY"],
|
||||
"imagegen_backend": "fal",
|
||||
},
|
||||
{
|
||||
"name": "FAL.ai",
|
||||
"badge": "paid",
|
||||
"tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
|
||||
"env_vars": [
|
||||
{"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
|
||||
],
|
||||
"imagegen_backend": "fal",
|
||||
},
|
||||
],
|
||||
},
|
||||
"video_gen": {
|
||||
@@ -482,6 +484,11 @@ TOOLSET_ENV_REQUIREMENTS = {
|
||||
# ─── Post-Setup Hooks ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _cua_driver_cmd() -> str:
|
||||
"""Return the cua-driver executable name/path, honoring non-empty overrides."""
|
||||
return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
|
||||
|
||||
|
||||
def _pip_install(
|
||||
args: List[str],
|
||||
*,
|
||||
@@ -550,6 +557,55 @@ def _pip_install(
|
||||
)
|
||||
|
||||
|
||||
|
||||
def _check_cua_driver_asset_for_arch() -> bool:
|
||||
"""Check whether the latest CUA release ships an asset for this architecture.
|
||||
|
||||
Returns True if the asset likely exists (or if we cannot determine it).
|
||||
Returns False and prints a warning when the asset is confirmed missing,
|
||||
so callers can skip the install attempt and avoid a raw 404.
|
||||
"""
|
||||
import platform as _plat
|
||||
import urllib.request
|
||||
|
||||
machine = _plat.machine() # "x86_64" or "arm64"
|
||||
if machine == "arm64":
|
||||
# arm64 (Apple Silicon) assets are always published.
|
||||
return True
|
||||
|
||||
# x86_64 / Intel — probe the latest release for an architecture-specific
|
||||
# asset before falling through to the upstream installer.
|
||||
api_url = (
|
||||
"https://api.github.com/repos/trycua/cua/releases/latest"
|
||||
)
|
||||
try:
|
||||
req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
release = _json.loads(resp.read().decode())
|
||||
tag = release.get("tag_name", "")
|
||||
assets = release.get("assets", [])
|
||||
arch_names = {"x86_64", "amd64"}
|
||||
has_asset = any(
|
||||
any(a in a_info.get("name", "").lower() for a in arch_names)
|
||||
for a_info in assets
|
||||
)
|
||||
if not has_asset:
|
||||
_print_warning(
|
||||
f" Latest CUA release ({tag}) has no Intel (x86_64) asset."
|
||||
)
|
||||
_print_info(
|
||||
" CUA Driver currently only ships Apple Silicon builds."
|
||||
)
|
||||
_print_info(
|
||||
" See: https://github.com/trycua/cua/issues/1493"
|
||||
)
|
||||
return False
|
||||
except Exception:
|
||||
# Network / API failure — proceed and let the installer handle it.
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
"""Install or refresh the cua-driver binary used by Computer Use.
|
||||
|
||||
@@ -579,7 +635,8 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" Computer Use (cua-driver) is macOS-only; skipping.")
|
||||
return False
|
||||
|
||||
binary = shutil.which("cua-driver")
|
||||
driver_cmd = _cua_driver_cmd()
|
||||
binary = shutil.which(driver_cmd)
|
||||
|
||||
# Not installed → fresh install path (only when caller asked for it).
|
||||
if not binary and not upgrade:
|
||||
@@ -587,18 +644,20 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" curl not found — install manually:")
|
||||
_print_info(" https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
|
||||
return False
|
||||
if not _check_cua_driver_asset_for_arch():
|
||||
return False
|
||||
return _run_cua_driver_installer(label="Installing")
|
||||
|
||||
# Already installed and caller didn't ask to upgrade → just confirm.
|
||||
if binary and not upgrade:
|
||||
try:
|
||||
version = subprocess.run(
|
||||
["cua-driver", "--version"],
|
||||
[driver_cmd, "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
_print_success(f" cua-driver already installed: {version or 'unknown version'}")
|
||||
_print_success(f" {driver_cmd} already installed: {version or 'unknown version'}")
|
||||
except Exception:
|
||||
_print_success(" cua-driver already installed.")
|
||||
_print_success(f" {driver_cmd} already installed.")
|
||||
_print_info(" Grant macOS permissions if not done yet:")
|
||||
_print_info(" System Settings > Privacy & Security > Accessibility")
|
||||
_print_info(" System Settings > Privacy & Security > Screen Recording")
|
||||
@@ -609,11 +668,14 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" curl not found — cannot refresh cua-driver.")
|
||||
return bool(binary)
|
||||
|
||||
if not _check_cua_driver_asset_for_arch():
|
||||
return bool(binary)
|
||||
|
||||
if binary:
|
||||
# Show before/after version when we have a baseline. Best-effort.
|
||||
try:
|
||||
before = subprocess.run(
|
||||
["cua-driver", "--version"],
|
||||
[driver_cmd, "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
except Exception:
|
||||
@@ -625,13 +687,13 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
if ok and before:
|
||||
try:
|
||||
after = subprocess.run(
|
||||
["cua-driver", "--version"],
|
||||
[driver_cmd, "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
if after and after != before:
|
||||
_print_success(f" cua-driver upgraded: {before} → {after}")
|
||||
_print_success(f" {driver_cmd} upgraded: {before} → {after}")
|
||||
elif after:
|
||||
_print_info(f" cua-driver up to date: {after}")
|
||||
_print_info(f" {driver_cmd} up to date: {after}")
|
||||
except Exception:
|
||||
pass
|
||||
return ok
|
||||
@@ -655,11 +717,12 @@ def _run_cua_driver_installer(label: str = "Installing", verbose: bool = True) -
|
||||
_print_info(f" {label} cua-driver (macOS background computer-use)...")
|
||||
else:
|
||||
_print_info(f" {label} cua-driver...")
|
||||
driver_cmd = _cua_driver_cmd()
|
||||
try:
|
||||
result = subprocess.run(install_cmd, shell=True, timeout=300)
|
||||
if result.returncode == 0 and shutil.which("cua-driver"):
|
||||
if result.returncode == 0 and shutil.which(driver_cmd):
|
||||
if verbose:
|
||||
_print_success(" cua-driver installed.")
|
||||
_print_success(f" {driver_cmd} installed.")
|
||||
_print_info(" IMPORTANT — grant macOS permissions now:")
|
||||
_print_info(" System Settings > Privacy & Security > Accessibility")
|
||||
_print_info(" System Settings > Privacy & Security > Screen Recording")
|
||||
@@ -1506,12 +1569,9 @@ def _plugin_image_gen_providers() -> list[dict]:
|
||||
Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
|
||||
row but carries an ``image_gen_plugin_name`` marker so downstream
|
||||
code (config writing, model picker) knows to route through the
|
||||
plugin registry instead of the in-tree FAL backend.
|
||||
|
||||
FAL is skipped — it's already exposed by the hardcoded
|
||||
``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
|
||||
a plugin in a follow-up PR, the hardcoded entries go away and this
|
||||
function surfaces it alongside OpenAI automatically.
|
||||
plugin registry. Every image-gen backend is a plugin now — there
|
||||
are no hardcoded rows left in ``TOOL_CATEGORIES["image_gen"]`` for
|
||||
this function to dedupe against (see issue #26241).
|
||||
"""
|
||||
try:
|
||||
from agent.image_gen_registry import list_providers
|
||||
@@ -1524,9 +1584,6 @@ def _plugin_image_gen_providers() -> list[dict]:
|
||||
|
||||
rows: list[dict] = []
|
||||
for provider in providers:
|
||||
if getattr(provider, "name", None) == "fal":
|
||||
# FAL has its own hardcoded rows today.
|
||||
continue
|
||||
try:
|
||||
schema = provider.get_setup_schema()
|
||||
except Exception:
|
||||
@@ -1751,7 +1808,7 @@ _POST_SETUP_INSTALLED: dict = {
|
||||
# entry when (a) the post_setup is the ONLY install side-effect for
|
||||
# a no-key provider, and (b) an installed-state check is cheap and
|
||||
# doesn't trigger a heavy import.
|
||||
"cua_driver": lambda: bool(shutil.which("cua-driver")),
|
||||
"cua_driver": lambda: bool(shutil.which(_cua_driver_cmd())),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -975,11 +975,13 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = (
|
||||
"vision",
|
||||
"web_extract",
|
||||
"compression",
|
||||
"session_search",
|
||||
"skills_hub",
|
||||
"approval",
|
||||
"mcp",
|
||||
"title_generation",
|
||||
"triage_specifier",
|
||||
"kanban_decomposer",
|
||||
"profile_describer",
|
||||
"curator",
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,253 @@
|
||||
"""Detect xAI models retired on May 15, 2026.
|
||||
|
||||
Source: https://docs.x.ai/developers/migration/may-15-retirement
|
||||
|
||||
Pure logic: walks a Hermes config dict, returns issues for any reference
|
||||
to a retired xAI model. No I/O, no CLI dependencies — testable in isolation
|
||||
and reusable from both `hermes doctor` and a future `hermes migrate xai`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
MIGRATION_GUIDE_URL = "https://docs.x.ai/developers/migration/may-15-retirement"
|
||||
RETIREMENT_DATE = "May 15, 2026"
|
||||
|
||||
|
||||
# Official mapping per xAI migration guide.
|
||||
# Some entries set ``reasoning_effort`` because non-reasoning variants don't
|
||||
# have a one-to-one replacement: ``grok-4.3`` reasons by default, so emulating
|
||||
# ``*-non-reasoning`` behavior on it requires ``reasoning_effort="none"``.
|
||||
_RETIRED_MODELS: Dict[str, Dict[str, Optional[str]]] = {
|
||||
"grok-4-0709": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-4-fast-reasoning": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-4-fast-non-reasoning": {"replacement": "grok-4.3", "reasoning_effort": "none", "note": None},
|
||||
"grok-4-1-fast-reasoning": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-4-1-fast-non-reasoning": {"replacement": "grok-4.3", "reasoning_effort": "none", "note": None},
|
||||
"grok-code-fast-1": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-3": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-imagine-image-pro": {"replacement": "grok-imagine-image-quality", "reasoning_effort": None, "note": None},
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RetirementIssue:
|
||||
"""A reference to a retired xAI model found in a Hermes config."""
|
||||
|
||||
config_path: str # e.g. "principal.model" or "auxiliary.vision.model"
|
||||
current_model: str # exact value found in config (preserves casing/prefix)
|
||||
replacement: str # recommended xAI replacement
|
||||
reasoning_effort: Optional[str] = None # set if non-reasoning variant migration
|
||||
note: Optional[str] = None # disambiguation note when applicable
|
||||
|
||||
|
||||
def _normalize(model_id: str) -> str:
|
||||
"""Strip provider prefix (``x-ai/grok-4`` → ``grok-4``) and lowercase."""
|
||||
m = model_id.strip().lower()
|
||||
for prefix in ("x-ai/", "xai/"):
|
||||
if m.startswith(prefix):
|
||||
m = m[len(prefix):]
|
||||
break
|
||||
return m
|
||||
|
||||
|
||||
def _looks_like_xai(model_id: Optional[str]) -> bool:
|
||||
if not isinstance(model_id, str) or not model_id.strip():
|
||||
return False
|
||||
return _normalize(model_id).startswith("grok-")
|
||||
|
||||
|
||||
def find_retired_xai_refs(config: Dict[str, Any]) -> List[RetirementIssue]:
|
||||
"""Walk all model slots in a Hermes config and return retirement issues.
|
||||
|
||||
Slots scanned:
|
||||
- ``principal.model``
|
||||
- ``auxiliary.<any>.model`` (introspective — covers future aux slots)
|
||||
- ``delegation.model``
|
||||
- ``tts.xai.model``
|
||||
- ``plugins.image_gen.xai.model``
|
||||
"""
|
||||
issues: List[RetirementIssue] = []
|
||||
|
||||
def _check(path: str, model: Any) -> None:
|
||||
if not _looks_like_xai(model):
|
||||
return
|
||||
norm = _normalize(model)
|
||||
entry = _RETIRED_MODELS.get(norm)
|
||||
if entry is None:
|
||||
return
|
||||
issues.append(RetirementIssue(
|
||||
config_path=path,
|
||||
current_model=model,
|
||||
replacement=entry["replacement"],
|
||||
reasoning_effort=entry.get("reasoning_effort"),
|
||||
note=entry.get("note"),
|
||||
))
|
||||
|
||||
if not isinstance(config, dict):
|
||||
return issues
|
||||
|
||||
principal = config.get("principal")
|
||||
if isinstance(principal, dict):
|
||||
_check("principal.model", principal.get("model"))
|
||||
|
||||
aux = config.get("auxiliary")
|
||||
if isinstance(aux, dict):
|
||||
for slot_name, slot_cfg in aux.items():
|
||||
if isinstance(slot_cfg, dict):
|
||||
_check(f"auxiliary.{slot_name}.model", slot_cfg.get("model"))
|
||||
|
||||
delegation = config.get("delegation")
|
||||
if isinstance(delegation, dict):
|
||||
_check("delegation.model", delegation.get("model"))
|
||||
|
||||
tts = config.get("tts")
|
||||
if isinstance(tts, dict):
|
||||
tts_xai = tts.get("xai")
|
||||
if isinstance(tts_xai, dict):
|
||||
_check("tts.xai.model", tts_xai.get("model"))
|
||||
|
||||
plugins = config.get("plugins")
|
||||
if isinstance(plugins, dict):
|
||||
image_gen = plugins.get("image_gen")
|
||||
if isinstance(image_gen, dict):
|
||||
ig_xai = image_gen.get("xai")
|
||||
if isinstance(ig_xai, dict):
|
||||
_check("plugins.image_gen.xai.model", ig_xai.get("model"))
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def format_issue(issue: RetirementIssue) -> str:
|
||||
"""One-line human-readable rendering of a retirement issue."""
|
||||
parts = [
|
||||
f"{issue.config_path}: {issue.current_model!r} → use {issue.replacement!r}"
|
||||
]
|
||||
if issue.reasoning_effort:
|
||||
parts.append(f'(set reasoning_effort: "{issue.reasoning_effort}")')
|
||||
if issue.note:
|
||||
parts.append(f"[note: {issue.note}]")
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Apply migration to config.yaml (round-trip preserves comments/order/types)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ApplyResult:
|
||||
"""Outcome of an apply_migration call."""
|
||||
|
||||
file_path: Path
|
||||
backup_path: Optional[Path]
|
||||
issues_resolved: List[RetirementIssue]
|
||||
config_changed: bool
|
||||
|
||||
|
||||
def _walk_to_parent(yaml_doc: Any, dotted_path: str) -> "tuple[Any, str]":
|
||||
"""Resolve a dotted slot path to (parent_mapping, leaf_key).
|
||||
|
||||
Example: "auxiliary.vision.model" -> (yaml_doc["auxiliary"]["vision"], "model").
|
||||
Raises KeyError if any intermediate node is missing or not a mapping.
|
||||
"""
|
||||
parts = dotted_path.split(".")
|
||||
if len(parts) < 2:
|
||||
raise ValueError(f"Path must have at least one parent: {dotted_path!r}")
|
||||
node = yaml_doc
|
||||
for segment in parts[:-1]:
|
||||
if not isinstance(node, dict) or segment not in node:
|
||||
raise KeyError(f"Path segment {segment!r} missing in {dotted_path!r}")
|
||||
node = node[segment]
|
||||
return node, parts[-1]
|
||||
|
||||
|
||||
def apply_migration(
|
||||
config_path: Path,
|
||||
issues: List[RetirementIssue],
|
||||
backup: bool = True,
|
||||
) -> ApplyResult:
|
||||
"""Rewrite ``config_path`` in-place so each issue is resolved.
|
||||
|
||||
For every issue, the model name is replaced by ``issue.replacement``. If the
|
||||
issue has ``reasoning_effort`` set (i.e. the migration is from a
|
||||
``*-non-reasoning`` variant), a sibling ``reasoning_effort`` key is added
|
||||
or updated alongside the model.
|
||||
|
||||
Uses ``ruamel.yaml`` round-trip mode so comments, key order, indentation,
|
||||
and type literals (booleans, ints) are preserved.
|
||||
|
||||
A backup copy is written to
|
||||
``<config_path>.bak-pre-migrate-xai-YYYYMMDD-HHMMSS`` before rewriting,
|
||||
unless ``backup=False``.
|
||||
"""
|
||||
from ruamel.yaml import YAML # local import — avoid hard dep at module load
|
||||
|
||||
config_path = Path(config_path)
|
||||
if not config_path.exists():
|
||||
raise FileNotFoundError(config_path)
|
||||
|
||||
if not issues:
|
||||
return ApplyResult(
|
||||
file_path=config_path,
|
||||
backup_path=None,
|
||||
issues_resolved=[],
|
||||
config_changed=False,
|
||||
)
|
||||
|
||||
yaml = YAML(typ="rt")
|
||||
yaml.preserve_quotes = True
|
||||
with config_path.open("r", encoding="utf-8") as fh:
|
||||
doc = yaml.load(fh)
|
||||
|
||||
if doc is None:
|
||||
return ApplyResult(
|
||||
file_path=config_path,
|
||||
backup_path=None,
|
||||
issues_resolved=[],
|
||||
config_changed=False,
|
||||
)
|
||||
|
||||
resolved: List[RetirementIssue] = []
|
||||
for issue in issues:
|
||||
try:
|
||||
parent, leaf = _walk_to_parent(doc, issue.config_path)
|
||||
except KeyError:
|
||||
# Slot vanished between scan and apply — skip silently
|
||||
continue
|
||||
parent[leaf] = issue.replacement
|
||||
if issue.reasoning_effort:
|
||||
parent["reasoning_effort"] = issue.reasoning_effort
|
||||
resolved.append(issue)
|
||||
|
||||
if not resolved:
|
||||
return ApplyResult(
|
||||
file_path=config_path,
|
||||
backup_path=None,
|
||||
issues_resolved=[],
|
||||
config_changed=False,
|
||||
)
|
||||
|
||||
backup_path: Optional[Path] = None
|
||||
if backup:
|
||||
ts = _dt.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
backup_path = config_path.with_name(
|
||||
f"{config_path.name}.bak-pre-migrate-xai-{ts}"
|
||||
)
|
||||
shutil.copy2(config_path, backup_path)
|
||||
|
||||
with config_path.open("w", encoding="utf-8") as fh:
|
||||
yaml.dump(doc, fh)
|
||||
|
||||
return ApplyResult(
|
||||
file_path=config_path,
|
||||
backup_path=backup_path,
|
||||
issues_resolved=resolved,
|
||||
config_changed=True,
|
||||
)
|
||||
@@ -235,6 +235,26 @@ def display_hermes_home() -> str:
|
||||
return str(home)
|
||||
|
||||
|
||||
def secure_parent_dir(path: Path) -> None:
|
||||
"""Chmod ``0o700`` on the parent directory of *path*, but only if safe.
|
||||
|
||||
Refuses to chmod ``/`` or any top-level directory (resolved parent with
|
||||
fewer than 3 parts, i.e. ``/`` or any direct child like ``/usr``) to
|
||||
prevent catastrophic host bricking when ``HERMES_HOME`` or other path
|
||||
env vars resolve to an unexpected location.
|
||||
|
||||
See https://github.com/NousResearch/hermes-agent/issues/25821.
|
||||
"""
|
||||
parent = path.parent.resolve()
|
||||
# Refuse root and its direct children (/usr, /home, /var, /tmp, …).
|
||||
if parent == Path("/") or len(parent.parts) < 3:
|
||||
return
|
||||
try:
|
||||
os.chmod(parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def get_subprocess_home() -> str | None:
|
||||
"""Return a per-profile HOME directory for subprocesses, or None.
|
||||
|
||||
|
||||
+42
-7
@@ -33,7 +33,7 @@ T = TypeVar("T")
|
||||
|
||||
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
|
||||
|
||||
SCHEMA_VERSION = 11
|
||||
SCHEMA_VERSION = 12
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WAL-compatibility fallback
|
||||
@@ -236,7 +236,8 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
reasoning_content TEXT,
|
||||
reasoning_details TEXT,
|
||||
codex_reasoning_items TEXT,
|
||||
codex_message_items TEXT
|
||||
codex_message_items TEXT,
|
||||
platform_message_id TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS state_meta (
|
||||
@@ -571,6 +572,19 @@ class SessionDB:
|
||||
# column gets created here.
|
||||
self._reconcile_columns(cursor)
|
||||
|
||||
# Indexes that reference reconciler-added columns must be created
|
||||
# AFTER _reconcile_columns runs — declaring them in SCHEMA_SQL
|
||||
# makes the initial executescript fail on legacy DBs (the index's
|
||||
# WHERE clause references a column that doesn't exist yet).
|
||||
try:
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_messages_platform_msg_id "
|
||||
"ON messages(session_id, platform_message_id) "
|
||||
"WHERE platform_message_id IS NOT NULL"
|
||||
)
|
||||
except sqlite3.OperationalError as exc:
|
||||
logger.debug("idx_messages_platform_msg_id create skipped: %s", exc)
|
||||
|
||||
# ── Schema version bookkeeping ─────────────────────────────────
|
||||
# Bump to current so future data migrations (if any) can gate on
|
||||
# version. No version-gated column additions remain.
|
||||
@@ -1445,12 +1459,19 @@ class SessionDB:
|
||||
reasoning_details: Any = None,
|
||||
codex_reasoning_items: Any = None,
|
||||
codex_message_items: Any = None,
|
||||
platform_message_id: str = None,
|
||||
) -> int:
|
||||
"""
|
||||
Append a message to a session. Returns the message row ID.
|
||||
|
||||
Also increments the session's message_count (and tool_call_count
|
||||
if role is 'tool' or tool_calls is present).
|
||||
|
||||
``platform_message_id`` is the external messaging platform's own
|
||||
message ID (e.g. Telegram update_id, Yuanbao msg_id). It is
|
||||
independent of the SQLite autoincrement primary key and is used by
|
||||
platform-specific flows like yuanbao's recall guard to redact a
|
||||
message by its platform-side identifier.
|
||||
"""
|
||||
# Serialize structured fields to JSON before entering the write txn
|
||||
reasoning_details_json = (
|
||||
@@ -1480,8 +1501,8 @@ class SessionDB:
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
codex_message_items, platform_message_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
@@ -1497,6 +1518,7 @@ class SessionDB:
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
platform_message_id,
|
||||
),
|
||||
)
|
||||
msg_id = cursor.lastrowid
|
||||
@@ -1558,13 +1580,18 @@ class SessionDB:
|
||||
json.dumps(codex_message_items) if codex_message_items else None
|
||||
)
|
||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||
# Accept either `platform_message_id` (new explicit name) or
|
||||
# `message_id` (yuanbao's existing convention on message dicts).
|
||||
platform_msg_id = (
|
||||
msg.get("platform_message_id") or msg.get("message_id")
|
||||
)
|
||||
|
||||
conn.execute(
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
codex_message_items, platform_message_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
@@ -1580,6 +1607,7 @@ class SessionDB:
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
platform_msg_id,
|
||||
),
|
||||
)
|
||||
total_messages += 1
|
||||
@@ -1897,7 +1925,7 @@ class SessionDB:
|
||||
rows = self._conn.execute(
|
||||
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
|
||||
"finish_reason, reasoning, reasoning_content, reasoning_details, "
|
||||
"codex_reasoning_items, codex_message_items "
|
||||
"codex_reasoning_items, codex_message_items, platform_message_id "
|
||||
f"FROM messages WHERE session_id IN ({placeholders}) ORDER BY id",
|
||||
tuple(session_ids),
|
||||
).fetchall()
|
||||
@@ -1918,6 +1946,13 @@ class SessionDB:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize tool_calls in conversation replay, falling back to []")
|
||||
msg["tool_calls"] = []
|
||||
# Surface the platform-side message id (e.g. yuanbao msg_id,
|
||||
# telegram update_id) so platform-specific flows like recall
|
||||
# can match by external identifier instead of having to fall
|
||||
# back to content-match heuristics. Exposed as ``message_id``
|
||||
# for backward compatibility with the JSONL transcript shape.
|
||||
if row["platform_message_id"]:
|
||||
msg["message_id"] = row["platform_message_id"]
|
||||
# Restore reasoning fields on assistant messages so providers
|
||||
# that replay reasoning (OpenRouter, OpenAI, Nous) receive
|
||||
# coherent multi-turn reasoning context.
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 1.2 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
@@ -0,0 +1,121 @@
|
||||
Create a professional infographic following these specifications:
|
||||
|
||||
## Image Specifications
|
||||
|
||||
- **Type**: Infographic
|
||||
- **Layout**: bento-grid
|
||||
- **Style**: retro-pop-grid
|
||||
- **Aspect Ratio**: 1:1 (square)
|
||||
- **Language**: en
|
||||
|
||||
## Core Principles
|
||||
|
||||
- Follow the layout structure precisely for information architecture
|
||||
- Apply style aesthetics consistently throughout
|
||||
- Keep information concise, highlight keywords and core concepts
|
||||
- Use ample whitespace for visual clarity
|
||||
- Maintain clear visual hierarchy
|
||||
|
||||
## Text Requirements
|
||||
|
||||
- All text must match the specified style treatment
|
||||
- Main titles should be prominent and readable
|
||||
- Key concepts should be visually emphasized
|
||||
- Labels should be clear and appropriately sized
|
||||
- Use English for all text content
|
||||
|
||||
## Layout Guidelines (bento-grid)
|
||||
|
||||
- Grid of rectangular cells with varied sizes (1x1, 2x1, 1x2, 2x2)
|
||||
- Hero cell ("ONE TOKEN, EVERY KEY") takes the largest position (top-center or upper-left, 2x2)
|
||||
- Supporting cells around the hero, mixed cell sizes for rhythm
|
||||
- Each cell self-contained with its own title + icon + brief content
|
||||
- Title strip at the top: "BITWARDEN SECRETS MANAGER — HERMES-AGENT PR #30035"
|
||||
- Footer strip at the bottom with commit SHA + repo
|
||||
|
||||
## Style Guidelines (retro-pop-grid)
|
||||
|
||||
- 1970s retro pop art with strict Swiss international grid
|
||||
- Background: warm vintage cream/beige (#F5F0E6)
|
||||
- Accents: salmon pink, sky blue, mustard yellow, mint green — all muted retro tones
|
||||
- Pure solid black (#000000) and solid white (#FFFFFF) for extreme-contrast cells
|
||||
- Uniform thick black outlines on ALL illustrations, text boxes, grid dividers
|
||||
- Pure 2D flat vector aesthetic with subtle screen-print texture
|
||||
- One cell inverted to black-background-with-white-text for the "NEVER BLOCKS STARTUP" warning section
|
||||
- Geometric fill patterns in empty cells: checkerboards, diagonal lines, dot grids
|
||||
- Flat abstract symbols: shields (security), wrenches (install), arrows (rotation), keyholes (auth), checkmarks (tests)
|
||||
- Vintage comic-style smiley face for "26/26 PASSING" cell
|
||||
- Bold brutalist or thick retro display fonts for headers; clean sans-serif body
|
||||
- Decorative stylistic labels acceptable: "WARNING", "NEW DEFAULT", "PINNED", "VERIFIED", "ROTATE"
|
||||
|
||||
## Avoid
|
||||
|
||||
- 3D rendering, gradients, soft shadows, sketch-like lines
|
||||
- Free-floating elements — everything anchored in grid cells
|
||||
- Pure white background — must use warm cream/beige
|
||||
|
||||
---
|
||||
|
||||
Generate the infographic based on the content below:
|
||||
|
||||
### Title (top strip)
|
||||
BITWARDEN SECRETS MANAGER → HERMES-AGENT
|
||||
PR #30035
|
||||
|
||||
### HERO CELL (largest, top-center, salmon pink background with thick black border)
|
||||
ONE TOKEN, EVERY KEY
|
||||
Rotate once in the Bitwarden web app.
|
||||
Every Hermes process picks it up on next start.
|
||||
NEW DEFAULT: override_existing = true
|
||||
|
||||
### Cell — LAZY INSTALL (sky blue background)
|
||||
~/.hermes/bin/bws
|
||||
bws v2.0.0 PINNED
|
||||
SHA-256 VERIFIED
|
||||
No apt · no brew · no sudo
|
||||
Icon: wrench + downward arrow
|
||||
|
||||
### Cell — CLI SURFACE (mustard yellow background, checkerboard accents)
|
||||
$ hermes secrets bitwarden
|
||||
setup wizard
|
||||
status diagnose
|
||||
sync fetch
|
||||
install binary
|
||||
disable off
|
||||
Icon: terminal prompt symbol
|
||||
|
||||
### Cell — SOURCE OF TRUTH (mint green background)
|
||||
BITWARDEN WINS
|
||||
Overwrites stale .env on every start
|
||||
Bootstrap token never overwritten (exception)
|
||||
Icon: keyhole + arrow
|
||||
|
||||
### Cell — INVERTED BLACK CELL with WHITE TEXT — NEVER BLOCKS STARTUP (extreme contrast)
|
||||
WARNING-FREE STARTUP
|
||||
Missing binary → warn + continue
|
||||
Bad token → warn + continue
|
||||
Network down → warn + continue
|
||||
Checksum mismatch → refuse + warn
|
||||
30s timeout ceiling
|
||||
Icon: white triangle warning sign
|
||||
|
||||
### Cell — TESTS (cream with thick black outline, vintage comic smiley face)
|
||||
26 / 26
|
||||
HERMETIC
|
||||
subprocess + urllib mocked
|
||||
linux · macos · windows
|
||||
x86_64 · arm64
|
||||
Icon: comic-style smiley face with checkmark
|
||||
|
||||
### Cell — CONFIG YAML (white background with black grid)
|
||||
secrets:
|
||||
bitwarden:
|
||||
enabled: true
|
||||
project_id: ...
|
||||
override_existing: true
|
||||
cache_ttl_seconds: 300
|
||||
auto_install: true
|
||||
|
||||
### Footer strip (bottom, black-on-cream)
|
||||
PR #30035 · commit 7f9b05668 · NousResearch/hermes-agent
|
||||
10 files · +1743 / -1 · agent/secret_sources/ · hermes_cli/secrets_cli.py
|
||||
@@ -0,0 +1,57 @@
|
||||
# Hermes-Agent PR #30035 — Bitwarden Secrets Manager Integration
|
||||
|
||||
## Hero
|
||||
**ONE TOKEN, EVERY KEY**
|
||||
Rotate once. Every Hermes process picks it up on next start.
|
||||
`secrets.bitwarden.override_existing: true` (default)
|
||||
|
||||
## Cells
|
||||
|
||||
### Lazy Install
|
||||
- `bws v2.0.0` pinned
|
||||
- Downloaded into `~/.hermes/bin/bws`
|
||||
- SHA-256 verified vs GitHub Releases checksum file
|
||||
- No apt, no brew, no sudo
|
||||
- Cross-platform: linux gnu+musl, macos universal, windows x86_64+arm64
|
||||
|
||||
### CLI Surface
|
||||
- `hermes secrets bitwarden setup` wizard
|
||||
- `hermes secrets bitwarden status` diagnose
|
||||
- `hermes secrets bitwarden sync` dry-run / --apply
|
||||
- `hermes secrets bitwarden install` binary only
|
||||
- `hermes secrets bitwarden disable` off switch
|
||||
|
||||
### Source of Truth
|
||||
- Bitwarden WINS on every Hermes start
|
||||
- BSM values overwrite stale `.env` lines
|
||||
- Rotate a key once → all your machines reload it
|
||||
- Bootstrap token `BWS_ACCESS_TOKEN` is the lone exception (never overwritten)
|
||||
|
||||
### Never Blocks Startup
|
||||
- Missing binary → warn + continue
|
||||
- Bad token → warn + continue
|
||||
- Checksum mismatch → refuse install + warn
|
||||
- No network → warn + continue
|
||||
- Timeout → 30s ceiling, warn + continue
|
||||
|
||||
### Tests
|
||||
- 26/26 passing, hermetic
|
||||
- subprocess + urllib mocked
|
||||
- Platform matrix tested (linux, macos, windows × x86_64, arm64)
|
||||
- Cache hit/miss, auth fail, non-JSON, timeout, override behavior
|
||||
|
||||
### Config
|
||||
```yaml
|
||||
secrets:
|
||||
bitwarden:
|
||||
enabled: true
|
||||
project_id: <uuid>
|
||||
override_existing: true # NEW DEFAULT
|
||||
cache_ttl_seconds: 300
|
||||
auto_install: true
|
||||
```
|
||||
|
||||
## Footer
|
||||
PR #30035 · commit 7f9b05668 · NousResearch/hermes-agent
|
||||
|
||||
10 files changed · +1743 / -1 · agent/secret_sources/ · hermes_cli/secrets_cli.py · tests · docs
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 2.1 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.6 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.4 MiB |
@@ -0,0 +1,85 @@
|
||||
Create a professional infographic following these specifications:
|
||||
|
||||
## Image Specifications
|
||||
|
||||
- **Type**: Infographic
|
||||
- **Layout**: bento-grid
|
||||
- **Style**: technical-schematic (engineering blueprint variant)
|
||||
- **Aspect Ratio**: 1:1 (square)
|
||||
- **Language**: English
|
||||
|
||||
## Core Principles
|
||||
|
||||
- Follow the bento-grid layout precisely with varied cell sizes
|
||||
- Apply technical-schematic aesthetics consistently throughout
|
||||
- Keep information concise, highlight keywords and core concepts
|
||||
- Use ample whitespace for visual clarity
|
||||
- Maintain clear visual hierarchy with a hero cell for the headline metric
|
||||
|
||||
## Style Guidelines (technical-schematic blueprint)
|
||||
|
||||
- Color palette: deep blue background (#1E3A5F), white lines and text, amber accent (#F59E0B) ONLY on the hero metric and critical deltas, cyan callouts for measurement annotations
|
||||
- Grid pattern overlay across the entire canvas — fine white grid lines on the deep blue background
|
||||
- All-caps technical stencil typography for headers; clean sans-serif for body
|
||||
- Dimension lines with arrowheads connecting metrics to their cells
|
||||
- Technical symbols where appropriate (gear icons, flow arrows, modular block diagrams)
|
||||
- Consistent stroke weights — bold for cell borders, thin for grid, medium for connector lines
|
||||
- Engineering spec-sheet aesthetic: feels like a printed architectural blueprint, austere and precise
|
||||
|
||||
## Layout Guidelines (bento-grid)
|
||||
|
||||
- Hero cell (TOP-CENTER or LEFT, occupying ~40% of canvas): "−61 COMPLEXITY · 79 → 18" headline metric in massive amber-on-blue, with subtitle "convert_messages_to_anthropic refactored"
|
||||
- 7 helper cells in a 2x4 or 3x3 grid showing each extracted helper as its own modular block — each cell has the helper name in all-caps, its complexity number, and one-line role
|
||||
- Metrics strip cell: BEFORE/AFTER table with deltas (185 statements → ~70, 79 C → 18 C, +5 violations intentional)
|
||||
- Test validation cell: "152/152 + 213/213 PASS" with checkmark stencil
|
||||
- Footer strip across bottom: "PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor · NousResearch/hermes-agent"
|
||||
|
||||
## Content to render
|
||||
|
||||
**Main title (top of canvas, all caps):** "ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION"
|
||||
**Subtitle:** "PR #27784 — convert_messages_to_anthropic refactor"
|
||||
|
||||
**Hero cell (largest, amber accent):**
|
||||
- "−61"
|
||||
- "CYCLOMATIC COMPLEXITY"
|
||||
- "79 → 18 MAX (−77%)"
|
||||
- Subtext: "convert_messages_to_anthropic · pure code motion · zero behavior change"
|
||||
|
||||
**7 helper cells (one per helper, each its own modular block):**
|
||||
|
||||
1. _convert_assistant_message · C<10 · "Assistant msg → content blocks"
|
||||
2. _convert_tool_message_to_result · C=12 · "Tool msg → tool_result + merge"
|
||||
3. _convert_user_message · C<10 · "User msg validation"
|
||||
4. _strip_orphaned_tool_blocks · C=15 · "Orphan tool_use removal"
|
||||
5. _merge_consecutive_roles · C=13 · "Anthropic role-alternation"
|
||||
6. _manage_thinking_signatures · C=18 · "Strip/preserve by endpoint"
|
||||
7. _evict_old_screenshots · C<10 · "Keep most recent 3 images"
|
||||
|
||||
**Metrics cell (table format with arrows):**
|
||||
- MAX FUNCTION COMPLEXITY: 79 → 18 (−77%)
|
||||
- MAX STATEMENTS/FUNCTION: 185 → ~70 (−62%)
|
||||
- LOC FILE-WIDE: −4
|
||||
- MAIN FUNCTION LOC: 395 → 63
|
||||
|
||||
**Test validation cell (checkmark stencil):**
|
||||
- test_anthropic_adapter.py: 152/152 PASS
|
||||
- test_auxiliary_client.py: 172/172 PASS
|
||||
- test_azure_identity_adapter.py: 39/39 PASS
|
||||
- test_bedrock_1m_context.py: 2/2 PASS
|
||||
|
||||
**Behavior preservation cell:**
|
||||
"ZERO LOGIC CHANGES · ANTHROPIC + KIMI + DEEPSEEK + MINIMAX + AZURE FOUNDRY + BEDROCK SEMANTICS PRESERVED"
|
||||
|
||||
**Footer strip:**
|
||||
"PR #27784 · agent/anthropic_adapter.py · cherry-picked from #23968 · @kshitijk4poor · NousResearch/hermes-agent"
|
||||
|
||||
## Text Requirements
|
||||
|
||||
- All text in English, all-caps for headers
|
||||
- Hero metric "−61" in amber (#F59E0B), oversized, with thick blueprint stencil treatment
|
||||
- Helper names in white technical stencil
|
||||
- Complexity numbers (C=12, C=18, etc.) in cyan callouts
|
||||
- "BEFORE" labels in white-on-blue, "AFTER" labels in amber-on-blue
|
||||
- Footer in small white stencil
|
||||
|
||||
Generate the infographic now as a square engineering blueprint.
|
||||
@@ -0,0 +1,66 @@
|
||||
# Infographic: PR #27784 — convert_messages_to_anthropic refactor
|
||||
|
||||
## Hero metric
|
||||
**−61 cyclomatic complexity** in `agent/anthropic_adapter.py` (79 → 18 max).
|
||||
**−4 LOC** net file-wide. **77% drop** in single-function complexity ceiling.
|
||||
|
||||
## Title
|
||||
ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION
|
||||
PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor
|
||||
|
||||
## Section 1: BEFORE (left side)
|
||||
**convert_messages_to_anthropic**
|
||||
- 185 statements
|
||||
- 90 branches
|
||||
- Cyclomatic: 79
|
||||
- Did 7 jobs in one function
|
||||
|
||||
Inline responsibilities mixed together:
|
||||
1. Walk + dispatch by role
|
||||
2. Tool-result conversion
|
||||
3. Orphan tool-use stripping
|
||||
4. Same-role merging
|
||||
5. Thinking-signature management
|
||||
6. Screenshot eviction
|
||||
7. Final assembly
|
||||
|
||||
## Section 2: AFTER (right side)
|
||||
**convert_messages_to_anthropic** — now 63 lines, C<10
|
||||
Plus 7 single-responsibility helpers:
|
||||
|
||||
| Helper | C | Role |
|
||||
|---|---|---|
|
||||
| _convert_assistant_message | <10 | Assistant msg → content blocks |
|
||||
| _convert_tool_message_to_result | 12 | Tool msg → tool_result + merge |
|
||||
| _convert_user_message | <10 | User msg validation + conversion |
|
||||
| _strip_orphaned_tool_blocks | 15 | Strip orphan tool_use + tool_result |
|
||||
| _merge_consecutive_roles | 13 | Anthropic role-alternation enforce |
|
||||
| _manage_thinking_signatures | 18 | Strip/preserve/downgrade by endpoint |
|
||||
| _evict_old_screenshots | <10 | Keep most recent 3 images |
|
||||
|
||||
## Section 3: METRICS
|
||||
| Metric | Before | After | Δ |
|
||||
|---|---:|---:|---:|
|
||||
| Max function complexity | 79 | 18 | −77% |
|
||||
| Max statements/function | 185 | ~70 | −62% |
|
||||
| LOC (file-wide) | — | — | **−4** |
|
||||
| C901 violations | 3 | 8 | +5 (intentional split) |
|
||||
|
||||
## Section 4: ZERO BEHAVIOR CHANGE
|
||||
- Pure code motion — no logic edits
|
||||
- Mutating helpers update `result` in place (same as inline)
|
||||
- `_merge_consecutive_roles` returns new list — caller rebinds
|
||||
- Anthropic / Kimi / DeepSeek / MiniMax / Azure Foundry / Bedrock semantics preserved
|
||||
- Thinking-signature handling identical to pre-refactor
|
||||
|
||||
## Section 5: TEST VALIDATION
|
||||
- tests/agent/test_anthropic_adapter.py — **152 / 152 pass**
|
||||
- tests/agent/test_auxiliary_client.py — **172 / 172 pass**
|
||||
- tests/agent/test_azure_identity_adapter.py — **39 / 39 pass**
|
||||
- tests/agent/test_bedrock_1m_context.py — **2 / 2 pass**
|
||||
|
||||
## Footer
|
||||
File: agent/anthropic_adapter.py
|
||||
Original PR: #27784 (cherry-pick of #23968)
|
||||
Salvage commit: 9c102b937 (kshitijk4poor authorship preserved)
|
||||
Repo: NousResearch/hermes-agent
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
@@ -16,6 +16,11 @@
|
||||
openssh,
|
||||
ffmpeg,
|
||||
tirith,
|
||||
|
||||
# linux-only deps
|
||||
wl-clipboard,
|
||||
xclip,
|
||||
|
||||
# Flake inputs — passed explicitly by packages.nix and overlays.nix
|
||||
uv2nix,
|
||||
pyproject-nix,
|
||||
@@ -68,6 +73,10 @@ let
|
||||
openssh
|
||||
ffmpeg
|
||||
tirith
|
||||
]
|
||||
++ lib.optionals stdenv.isLinux [
|
||||
wl-clipboard
|
||||
xclip
|
||||
];
|
||||
|
||||
runtimePath = lib.makeBinPath runtimeDeps;
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../ui-tui;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-dNL/J4tyQQ7Ji3xfIE5b5Jdi6rQyCFjqYpzLYftJVdc=";
|
||||
hash = "sha256-F6/MzZOWc0zhW9mIfnaY+PrllPvJcsA/OdFdEM+NpLY=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../web;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-FL2E8Vv8gyeClEa5b/pHn/ekWoHWTd4YwzV6zhLEos4=";
|
||||
hash = "sha256-xSsyluzU2lNhwGqB6XMCGMv3QFHZizE6hgUyc1jvyOw=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
|
||||
|
||||
@@ -148,7 +148,7 @@ class BrowserUseBrowserProvider(BrowserProvider):
|
||||
|
||||
return {
|
||||
"api_key": managed.nous_user_token,
|
||||
"base_url": managed.gateway_origin.rstrip("/"),
|
||||
"base_url": managed.resolved_origin.rstrip("/"),
|
||||
"managed_mode": True,
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
"""FAL.ai image generation backend.
|
||||
|
||||
Wraps the 18-model FAL catalog (FLUX 2, Z-Image, Nano Banana, GPT
|
||||
Image 1.5, Recraft, Imagen 4, Qwen, Ideogram, …) as an
|
||||
:class:`ImageGenProvider` implementation.
|
||||
|
||||
The heavy lifting — model catalog, payload construction, request
|
||||
submission, managed-Nous-gateway selection, Clarity Upscaler chaining
|
||||
— lives in :mod:`tools.image_generation_tool`. This plugin reaches into
|
||||
that module via call-time indirection (``import tools.image_generation_tool as _it``)
|
||||
so:
|
||||
|
||||
* the existing test suite (``tests/tools/test_image_generation.py``,
|
||||
``tests/tools/test_managed_media_gateways.py``) keeps patching
|
||||
``image_tool._submit_fal_request`` / ``image_tool.fal_client`` /
|
||||
``image_tool._managed_fal_client`` without modification, and
|
||||
* there's exactly one canonical FAL code path on disk — the plugin is a
|
||||
registration adapter, not a parallel implementation.
|
||||
|
||||
See issue #26241 for the migration plan and the
|
||||
``plugin-extraction-test-patch-compatibility.md`` rules this follows.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.image_gen_provider import (
|
||||
DEFAULT_ASPECT_RATIO,
|
||||
ImageGenProvider,
|
||||
resolve_aspect_ratio,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FalImageGenProvider(ImageGenProvider):
|
||||
"""FAL.ai image generation backend.
|
||||
|
||||
Delegates to ``tools.image_generation_tool.image_generate_tool`` so
|
||||
the in-tree FAL implementation (model catalog, payload builder,
|
||||
managed-gateway selection, Clarity Upscaler chaining) is the single
|
||||
source of truth. Everything is resolved at call time via the
|
||||
``_it`` indirection so tests can monkey-patch the legacy module.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "fal"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "FAL.ai"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
# Available when direct FAL_KEY is set OR the managed Nous
|
||||
# gateway resolves a fal-queue origin. Both checks come from the
|
||||
# legacy module so this provider tracks whatever logic ships
|
||||
# there.
|
||||
import tools.image_generation_tool as _it
|
||||
try:
|
||||
return bool(_it.check_fal_api_key())
|
||||
except Exception: # noqa: BLE001 — defensive; never break the picker
|
||||
return False
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
import tools.image_generation_tool as _it
|
||||
return [
|
||||
{
|
||||
"id": model_id,
|
||||
"display": meta.get("display", model_id),
|
||||
"speed": meta.get("speed", ""),
|
||||
"strengths": meta.get("strengths", ""),
|
||||
"price": meta.get("price", ""),
|
||||
}
|
||||
for model_id, meta in _it.FAL_MODELS.items()
|
||||
]
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
import tools.image_generation_tool as _it
|
||||
return _it.DEFAULT_MODEL
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "FAL.ai",
|
||||
"badge": "paid",
|
||||
"tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "FAL_KEY",
|
||||
"prompt": "FAL API key",
|
||||
"url": "https://fal.ai/dashboard/keys",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image via the legacy FAL pipeline.
|
||||
|
||||
Forwards prompt + aspect_ratio (and any forward-compat extras
|
||||
the schema supports) into :func:`tools.image_generation_tool.image_generate_tool`,
|
||||
then reshapes its JSON-string response into the provider-ABC
|
||||
dict format consumed by ``_dispatch_to_plugin_provider``.
|
||||
"""
|
||||
import tools.image_generation_tool as _it
|
||||
|
||||
aspect = resolve_aspect_ratio(aspect_ratio)
|
||||
passthrough = {
|
||||
key: kwargs[key]
|
||||
for key in (
|
||||
"num_inference_steps",
|
||||
"guidance_scale",
|
||||
"num_images",
|
||||
"output_format",
|
||||
"seed",
|
||||
)
|
||||
if key in kwargs and kwargs[key] is not None
|
||||
}
|
||||
|
||||
try:
|
||||
raw = _it.image_generate_tool(
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
**passthrough,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001 — never raise out of generate
|
||||
logger.warning("FAL image_generate_tool raised: %s", exc, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": f"FAL image generation failed: {exc}",
|
||||
"error_type": type(exc).__name__,
|
||||
"provider": "fal",
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect,
|
||||
}
|
||||
|
||||
try:
|
||||
response = json.loads(raw) if isinstance(raw, str) else raw
|
||||
except Exception: # noqa: BLE001
|
||||
response = {"success": False, "image": None, "error": "Invalid JSON from FAL pipeline"}
|
||||
|
||||
if not isinstance(response, dict):
|
||||
response = {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": "FAL pipeline returned a non-dict response",
|
||||
"error_type": "provider_contract",
|
||||
}
|
||||
|
||||
# Stamp provider/prompt/aspect_ratio so downstream consumers see
|
||||
# the uniform shape declared in ``agent.image_gen_provider``.
|
||||
response.setdefault("provider", "fal")
|
||||
response.setdefault("prompt", prompt)
|
||||
response.setdefault("aspect_ratio", aspect)
|
||||
# Annotate model best-effort — the legacy pipeline resolves it
|
||||
# internally, so query it after the fact for the response shape.
|
||||
if "model" not in response:
|
||||
try:
|
||||
model_id, _meta = _it._resolve_fal_model()
|
||||
response["model"] = model_id
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
return response
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Plugin entry point — wire ``FalImageGenProvider`` into the registry."""
|
||||
ctx.register_image_gen_provider(FalImageGenProvider())
|
||||
@@ -0,0 +1,7 @@
|
||||
name: fal
|
||||
version: 1.0.0
|
||||
description: "FAL.ai image generation backend (flux-2-klein, flux-2-pro, nano-banana, gpt-image-1.5, recraft-v3, etc.)."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
requires_env:
|
||||
- FAL_KEY
|
||||
+33
-25
@@ -24,6 +24,23 @@
|
||||
const { useState, useEffect, useCallback, useMemo, useRef } = SDK.hooks;
|
||||
const { cn, timeAgo } = SDK.utils;
|
||||
|
||||
// Newer host dashboards expose a DS-styled Checkbox on the plugin SDK.
|
||||
// Fall back to a native <input type="checkbox"> shim so older hosts that
|
||||
// predate the design-system rollout still render. The shim normalises
|
||||
// Radix's onCheckedChange(checked) signature to native onChange(event).
|
||||
const Checkbox = SDK.components.Checkbox || function (props) {
|
||||
const { checked, onCheckedChange, className, onClick, ...rest } = props;
|
||||
return h("input", Object.assign({
|
||||
type: "checkbox",
|
||||
checked: !!checked,
|
||||
className: className,
|
||||
onClick: onClick,
|
||||
onChange: function (e) {
|
||||
if (onCheckedChange) onCheckedChange(e.target.checked);
|
||||
},
|
||||
}, rest));
|
||||
};
|
||||
|
||||
// useI18n is a hook each component calls locally. Older host dashboards
|
||||
// may not expose it yet; fall back to a shim so the bundle still renders
|
||||
// English against an older host SDK. English fallback strings live
|
||||
@@ -1648,11 +1665,10 @@
|
||||
h(Label, { className: "text-xs text-muted-foreground" },
|
||||
"Orchestration mode"),
|
||||
h("label", { className: "flex items-center gap-2 text-xs h-8" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: !!settings.auto_decompose,
|
||||
onChange: function (e) {
|
||||
saveSettings({ auto_decompose: !!e.target.checked });
|
||||
onCheckedChange: function (checked) {
|
||||
saveSettings({ auto_decompose: checked === true });
|
||||
},
|
||||
}),
|
||||
"Auto-decompose triage tasks",
|
||||
@@ -1908,10 +1924,9 @@
|
||||
}),
|
||||
),
|
||||
h("label", { className: "flex items-center gap-2 text-xs" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: switchTo,
|
||||
onChange: function (e) { setSwitchTo(e.target.checked); },
|
||||
onCheckedChange: function (checked) { setSwitchTo(checked === true); },
|
||||
}),
|
||||
tx(t, "switchAfterCreate", "Switch to this board after creating it"),
|
||||
),
|
||||
@@ -1981,19 +1996,17 @@
|
||||
),
|
||||
h("label", { className: "flex items-center gap-2 text-xs",
|
||||
title: "Include archived tasks in the board view. Archived tasks are hidden by default." },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: props.includeArchived,
|
||||
onChange: function (e) { props.setIncludeArchived(e.target.checked); },
|
||||
onCheckedChange: function (checked) { props.setIncludeArchived(checked === true); },
|
||||
}),
|
||||
tx(t, "showArchived", "Show archived"),
|
||||
),
|
||||
h("label", { className: "flex items-center gap-2 text-xs",
|
||||
title: "Group the Running column by assigned profile" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: props.laneByProfile,
|
||||
onChange: function (e) { props.setLaneByProfile(e.target.checked); },
|
||||
onCheckedChange: function (checked) { props.setLaneByProfile(checked === true); },
|
||||
}),
|
||||
tx(t, "lanesByProfile", "Lanes by profile"),
|
||||
),
|
||||
@@ -2122,10 +2135,9 @@
|
||||
}, tx(t, "apply", "Apply")),
|
||||
),
|
||||
h("label", { className: "hermes-kanban-bulk-reclaim-first", title: "Reclaim any active claims before reassigning" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: reclaimFirst,
|
||||
onChange: function (e) { setReclaimFirst(e.target.checked); },
|
||||
onCheckedChange: function (checked) { setReclaimFirst(checked === true); },
|
||||
}),
|
||||
"Reclaim first",
|
||||
),
|
||||
@@ -2313,14 +2325,12 @@
|
||||
},
|
||||
h("div", { className: "hermes-kanban-column-header",
|
||||
title: colHelp || "" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
className: "hermes-kanban-col-check",
|
||||
title: "Select all tasks in this column",
|
||||
"aria-label": `Select all tasks in ${colLabel || props.column.name}`,
|
||||
checked: props.column.tasks.length > 0 && props.column.tasks.every(function (t) { return props.selectedIds.has(t.id); }),
|
||||
onChange: function (e) {
|
||||
e.stopPropagation();
|
||||
onCheckedChange: function () {
|
||||
if (props.selectAllInColumn) props.selectAllInColumn(props.column.name);
|
||||
},
|
||||
onClick: function (e) { e.stopPropagation(); },
|
||||
@@ -2461,8 +2471,7 @@
|
||||
if (props.toggleSelected) props.toggleSelected(t.id, false);
|
||||
}
|
||||
};
|
||||
const handleCheckbox = function (e) {
|
||||
e.stopPropagation();
|
||||
const handleCheckedChange = function () {
|
||||
props.toggleSelected(t.id, true);
|
||||
};
|
||||
|
||||
@@ -2495,11 +2504,10 @@
|
||||
title: tx(i18n, "selectForBulk", "Select for bulk actions"),
|
||||
onClick: function (e) { e.stopPropagation(); },
|
||||
},
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
className: "hermes-kanban-card-check",
|
||||
checked: props.selected,
|
||||
onChange: handleCheckbox,
|
||||
onCheckedChange: handleCheckedChange,
|
||||
onClick: function (e) { e.stopPropagation(); },
|
||||
"aria-label": `Select task ${t.id}`,
|
||||
}),
|
||||
|
||||
@@ -47,6 +47,25 @@ _DEFAULT_ENDPOINT = "http://127.0.0.1:1933"
|
||||
_TIMEOUT = 30.0
|
||||
_REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")
|
||||
|
||||
# Maps the viking_remember `category` enum to a viking:// subdirectory.
|
||||
# Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
|
||||
_CATEGORY_SUBDIR_MAP = {
|
||||
"preference": "preferences",
|
||||
"entity": "entities",
|
||||
"event": "events",
|
||||
"case": "cases",
|
||||
"pattern": "patterns",
|
||||
}
|
||||
_DEFAULT_MEMORY_SUBDIR = "preferences"
|
||||
|
||||
# Maps the built-in memory tool's `target` ("user" vs "memory") to a subdir
|
||||
# for on_memory_write mirroring. User profile facts → preferences; agent
|
||||
# notes / observations → patterns. Anything unknown falls back to the default.
|
||||
_MEMORY_WRITE_TARGET_SUBDIR_MAP = {
|
||||
"user": "preferences",
|
||||
"memory": "patterns",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-level atexit safety net — ensures pending sessions are committed
|
||||
@@ -607,24 +626,35 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
||||
except Exception as e:
|
||||
logger.warning("OpenViking session commit failed: %s", e)
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Mirror built-in memory writes to OpenViking as explicit memories."""
|
||||
def _build_memory_uri(self, subdir: str) -> str:
|
||||
"""Build a viking:// memory URI under the configured user/subdir."""
|
||||
slug = uuid.uuid4().hex[:12]
|
||||
return f"viking://user/{self._user}/memories/{subdir}/mem_{slug}.md"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Mirror built-in memory writes to OpenViking via content/write."""
|
||||
if not self._client or action != "add" or not content:
|
||||
return
|
||||
|
||||
subdir = _MEMORY_WRITE_TARGET_SUBDIR_MAP.get(target, _DEFAULT_MEMORY_SUBDIR)
|
||||
uri = self._build_memory_uri(subdir)
|
||||
|
||||
def _write():
|
||||
try:
|
||||
client = _VikingClient(
|
||||
self._endpoint, self._api_key,
|
||||
account=self._account, user=self._user, agent=self._agent,
|
||||
)
|
||||
# Add as a user message with memory context so the commit
|
||||
# picks it up as an explicit memory during extraction
|
||||
client.post(f"/api/v1/sessions/{self._session_id}/messages", {
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"type": "text", "text": f"[Memory note — {target}] {content}"},
|
||||
],
|
||||
client.post("/api/v1/content/write", {
|
||||
"uri": uri,
|
||||
"content": content,
|
||||
"mode": "create",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug("OpenViking memory mirror failed: %s", e)
|
||||
@@ -858,24 +888,27 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
||||
if not content:
|
||||
return tool_error("content is required")
|
||||
|
||||
# Store as a session message that will be extracted during commit.
|
||||
# The category hint helps OpenViking's extraction classify correctly.
|
||||
category = args.get("category", "")
|
||||
text = f"[Remember] {content}"
|
||||
if category:
|
||||
text = f"[Remember — {category}] {content}"
|
||||
subdir = _CATEGORY_SUBDIR_MAP.get(category, _DEFAULT_MEMORY_SUBDIR)
|
||||
uri = self._build_memory_uri(subdir)
|
||||
|
||||
self._client.post(f"/api/v1/sessions/{self._session_id}/messages", {
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"type": "text", "text": text},
|
||||
],
|
||||
})
|
||||
|
||||
return json.dumps({
|
||||
"status": "stored",
|
||||
"message": "Memory recorded. Will be extracted and indexed on session commit.",
|
||||
})
|
||||
# Write directly via content/write API.
|
||||
# This creates the file, stores the content, and queues vector indexing
|
||||
# in a single call — no dependency on session commit / VLM extraction.
|
||||
try:
|
||||
result = self._client.post("/api/v1/content/write", {
|
||||
"uri": uri,
|
||||
"content": content,
|
||||
"mode": "create",
|
||||
})
|
||||
written = result.get("result", {}).get("written_bytes", 0)
|
||||
return json.dumps({
|
||||
"status": "stored",
|
||||
"message": f"Memory stored ({written}b) and queued for vector indexing.",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("OpenViking content/write failed: %s", e)
|
||||
return tool_error(f"Failed to store memory: {e}")
|
||||
|
||||
def _tool_add_resource(self, args: dict) -> str:
|
||||
url = args.get("url", "")
|
||||
|
||||
@@ -282,20 +282,24 @@ def _build_payload(
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# fal_client lazy import (same pattern as image_generation_tool)
|
||||
# fal_client lazy import (shared with image_generation_tool via fal_common)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_fal_client: Any = None
|
||||
|
||||
|
||||
def _load_fal_client() -> Any:
|
||||
"""Lazy-load the ``fal_client`` SDK and cache it on this module.
|
||||
|
||||
Delegates the actual import to :func:`tools.fal_common.import_fal_client`
|
||||
so the ``lazy_deps`` ensure-install handling stays in one place.
|
||||
"""
|
||||
global _fal_client
|
||||
if _fal_client is not None:
|
||||
return _fal_client
|
||||
import fal_client # type: ignore
|
||||
|
||||
_fal_client = fal_client
|
||||
return fal_client
|
||||
from tools.fal_common import import_fal_client
|
||||
_fal_client = import_fal_client()
|
||||
return _fal_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -238,7 +238,7 @@ def _get_firecrawl_client() -> Any:
|
||||
|
||||
kwargs = {
|
||||
"api_key": managed_gateway.nous_user_token,
|
||||
"api_url": managed_gateway.gateway_origin,
|
||||
"api_url": managed_gateway.resolved_origin,
|
||||
}
|
||||
client_config = (
|
||||
"tool-gateway",
|
||||
|
||||
+12
-12
@@ -41,7 +41,11 @@ dependencies = [
|
||||
"ruamel.yaml==0.18.17",
|
||||
"requests==2.33.0", # CVE-2026-25645
|
||||
"jinja2==3.1.6",
|
||||
"pydantic==2.12.5",
|
||||
# Bumped from 2.12.5 to 2.13.4 to pull in pydantic-core 2.46.4.
|
||||
# pydantic-core 2.41.5 (pulled by 2.12.5) segfaults when the OpenAI SDK's
|
||||
# Responses API resource is exercised from a non-main thread, which is the
|
||||
# codex_responses dispatch in agent/chat_completion_helpers.py:_call.
|
||||
"pydantic==2.13.4",
|
||||
# Interactive CLI (prompt_toolkit is used directly by cli.py)
|
||||
"prompt_toolkit==3.0.52",
|
||||
# Cron scheduler (built-in feature — scheduled cron/interval jobs use croniter).
|
||||
@@ -80,7 +84,7 @@ modal = ["modal==1.3.4"]
|
||||
daytona = ["daytona==0.155.0"]
|
||||
vercel = ["vercel==0.5.7"]
|
||||
hindsight = ["hindsight-client==0.6.1"]
|
||||
dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
|
||||
dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
|
||||
messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
|
||||
cron = [] # croniter is now a core dependency; this extra kept for back-compat
|
||||
slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
|
||||
@@ -228,16 +232,12 @@ markers = [
|
||||
"integration: marks tests requiring external services (API keys, Modal, etc.)",
|
||||
"real_concurrent_gate: opt out of the autouse stub that disables _detect_concurrent_hermes_instances",
|
||||
]
|
||||
# pytest-timeout: per-test 60s hard cap with thread method.
|
||||
# Discovered May 2026: the suite reliably hangs at ~96% on full runs even
|
||||
# though every individual test completes in <30s. Root cause is leaked
|
||||
# threads / atexit handlers accumulating across thousands of tests until
|
||||
# something deadlocks at session teardown. Adding pytest-timeout (with
|
||||
# thread method, which forces an interrupt into the test thread) breaks
|
||||
# the deadlock — the suite then completes cleanly. The 60s cap is large
|
||||
# enough that no legitimate test trips it; if a test exceeds it that's a
|
||||
# real bug worth surfacing as a Timeout failure.
|
||||
addopts = "-m 'not integration' -n auto --timeout=30 --timeout-method=signal"
|
||||
# pytest-timeout: per-test 30s hard cap with signal method.
|
||||
# This is the fallback inside each per-file pytest subprocess (see
|
||||
# scripts/run_tests_parallel.py). Per-file isolation gives every test
|
||||
# file a fresh Python interpreter; pytest-timeout catches Python-level
|
||||
# hangs within a file.
|
||||
addopts = "-m 'not integration' --timeout=30 --timeout-method=signal"
|
||||
|
||||
[tool.ty.environment]
|
||||
python-version = "3.13"
|
||||
|
||||
+132
-23
@@ -168,7 +168,7 @@ from agent.tool_result_classification import (
|
||||
file_mutation_result_landed,
|
||||
)
|
||||
from agent.trajectory import (
|
||||
convert_scratchpad_to_think, has_incomplete_scratchpad,
|
||||
convert_scratchpad_to_think,
|
||||
save_trajectory as _save_trajectory_to_file,
|
||||
)
|
||||
from agent.message_sanitization import (
|
||||
@@ -1517,23 +1517,35 @@ class AIAgent:
|
||||
return content.strip()
|
||||
|
||||
def _save_session_log(self, messages: List[Dict[str, Any]] = None):
|
||||
"""
|
||||
Save the full raw session to a JSON file.
|
||||
"""Optional per-session JSON snapshot writer.
|
||||
|
||||
Stores every message exactly as the agent sees it: user messages,
|
||||
assistant messages (with reasoning, finish_reason, tool_calls),
|
||||
tool responses (with tool_call_id, tool_name), and injected system
|
||||
messages (compression summaries, todo snapshots, etc.).
|
||||
Gated by ``sessions.write_json_snapshots`` (default False). state.db
|
||||
is the canonical message store; this writer exists only for users
|
||||
whose external tooling consumes ``~/.hermes/sessions/session_{sid}.json``
|
||||
directly. When the flag is off this is a fast no-op.
|
||||
|
||||
REASONING_SCRATCHPAD tags are converted to <think> blocks for consistency.
|
||||
Overwritten after each turn so it always reflects the latest state.
|
||||
When enabled, rewrites the snapshot after every persistence point with
|
||||
the full message list (assistant content normalized via
|
||||
``_clean_session_content`` to convert REASONING_SCRATCHPAD to think
|
||||
tags). The truncation guard ("don't overwrite a larger log with
|
||||
fewer messages") is preserved so resume + branch don't clobber a
|
||||
fuller existing snapshot.
|
||||
"""
|
||||
if not getattr(self, "_session_json_enabled", False):
|
||||
return
|
||||
messages = messages or self._session_messages
|
||||
if not messages:
|
||||
return
|
||||
|
||||
# Re-derive the target path each call so /branch and /compress
|
||||
# session-id changes land in the right file without any re-point
|
||||
# bookkeeping at the call sites.
|
||||
try:
|
||||
log_file = self.logs_dir / f"session_{self.session_id}.json"
|
||||
except Exception:
|
||||
return
|
||||
|
||||
try:
|
||||
# Clean assistant content for session logs
|
||||
cleaned = []
|
||||
for msg in messages:
|
||||
if msg.get("role") == "assistant" and msg.get("content"):
|
||||
@@ -1542,12 +1554,11 @@ class AIAgent:
|
||||
cleaned.append(msg)
|
||||
|
||||
# Guard: never overwrite a larger session log with fewer messages.
|
||||
# This protects against data loss when --resume loads a session whose
|
||||
# messages weren't fully written to SQLite — the resumed agent starts
|
||||
# with partial history and would otherwise clobber the full JSON log.
|
||||
if self.session_log_file.exists():
|
||||
# Protects against data loss when a resumed agent starts with
|
||||
# partial history and would otherwise clobber the full JSON log.
|
||||
if log_file.exists():
|
||||
try:
|
||||
existing = json.loads(self.session_log_file.read_text(encoding="utf-8"))
|
||||
existing = json.loads(log_file.read_text(encoding="utf-8"))
|
||||
existing_count = existing.get("message_count", len(existing.get("messages", [])))
|
||||
if existing_count > len(cleaned):
|
||||
logging.debug(
|
||||
@@ -1572,7 +1583,7 @@ class AIAgent:
|
||||
}
|
||||
|
||||
atomic_json_write(
|
||||
self.session_log_file,
|
||||
log_file,
|
||||
entry,
|
||||
indent=2,
|
||||
default=str,
|
||||
@@ -1582,6 +1593,7 @@ class AIAgent:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to save session log: {e}")
|
||||
|
||||
|
||||
def interrupt(self, message: str = None) -> None:
|
||||
"""
|
||||
Request the agent to interrupt its current tool-calling loop.
|
||||
@@ -3188,17 +3200,21 @@ class AIAgent:
|
||||
Used to decide whether to strip image content parts from API-bound
|
||||
messages (for non-vision models) or let the provider adapter handle
|
||||
them natively (for vision-capable models).
|
||||
|
||||
Resolution order (see ``agent.image_routing._supports_vision_override``):
|
||||
1. ``model.supports_vision`` (top-level, single-model shortcut)
|
||||
2. ``providers.<provider>.models.<model>.supports_vision``
|
||||
3. models.dev capability lookup
|
||||
Custom/local models absent from models.dev would otherwise be
|
||||
misclassified as non-vision and have their images stripped.
|
||||
"""
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
from hermes_cli.config import load_config
|
||||
from agent.image_routing import _lookup_supports_vision
|
||||
cfg = load_config()
|
||||
provider = (getattr(self, "provider", "") or "").strip()
|
||||
model = (getattr(self, "model", "") or "").strip()
|
||||
if not provider or not model:
|
||||
return False
|
||||
caps = get_model_capabilities(provider, model)
|
||||
if caps is None:
|
||||
return False
|
||||
return bool(caps.supports_vision)
|
||||
return _lookup_supports_vision(provider, model, cfg) is True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -3341,6 +3357,25 @@ class AIAgent:
|
||||
return content
|
||||
|
||||
if self._model_supports_vision():
|
||||
# Vision-capable on paper — but if we've already learned in this
|
||||
# session that the active (provider, model) rejects list-type
|
||||
# tool content (e.g. Xiaomi MiMo's 400 "text is not set"),
|
||||
# short-circuit to a text summary so we don't burn another
|
||||
# round-trip relearning the same lesson. Cache populated by
|
||||
# the 400 recovery path in agent.conversation_loop. Transient
|
||||
# per-session; next session retries.
|
||||
key = (
|
||||
(getattr(self, "provider", "") or "").strip().lower(),
|
||||
(getattr(self, "model", "") or "").strip(),
|
||||
)
|
||||
no_list = getattr(self, "_no_list_tool_content_models", None)
|
||||
if no_list and key in no_list:
|
||||
logger.debug(
|
||||
"Tool %s: model %s/%s known to reject list-type tool "
|
||||
"content this session — sending text summary",
|
||||
tool_name, key[0], key[1],
|
||||
)
|
||||
return _multimodal_text_summary(result)
|
||||
return content
|
||||
|
||||
summary = _multimodal_text_summary(result)
|
||||
@@ -3369,6 +3404,80 @@ class AIAgent:
|
||||
from agent.conversation_compression import try_shrink_image_parts_in_messages
|
||||
return try_shrink_image_parts_in_messages(api_messages)
|
||||
|
||||
def _try_strip_image_parts_from_tool_messages(self, api_messages: list) -> bool:
|
||||
"""Downgrade list-type tool messages to text summaries in-place.
|
||||
|
||||
Recovery path for providers that reject list-type tool message content
|
||||
(e.g. Xiaomi MiMo's 400 "text is not set"; see issue #27344). Walks
|
||||
``api_messages`` for any ``role: "tool"`` message whose ``content`` is
|
||||
a list containing image parts, replaces the content with the existing
|
||||
text part(s) (or a minimal placeholder if none survive), and records
|
||||
the active (provider, model) in ``self._no_list_tool_content_models``
|
||||
so subsequent ``_tool_result_content_for_active_model`` calls in this
|
||||
session preemptively downgrade screenshots without a round-trip.
|
||||
|
||||
Returns True when at least one tool message was downgraded — the
|
||||
caller (the 400 recovery branch in ``agent.conversation_loop``) uses
|
||||
this to decide whether to retry the API call with the modified
|
||||
history or surface the original error.
|
||||
"""
|
||||
if not isinstance(api_messages, list):
|
||||
return False
|
||||
|
||||
# Record (provider, model) so we don't relearn this lesson.
|
||||
key = (
|
||||
(getattr(self, "provider", "") or "").strip().lower(),
|
||||
(getattr(self, "model", "") or "").strip(),
|
||||
)
|
||||
if not hasattr(self, "_no_list_tool_content_models"):
|
||||
self._no_list_tool_content_models = set()
|
||||
if key[1]: # only record when we actually have a model id
|
||||
self._no_list_tool_content_models.add(key)
|
||||
|
||||
changed = False
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
|
||||
# Salvage any text parts so the model still sees some signal.
|
||||
text_parts: List[str] = []
|
||||
had_image = False
|
||||
for part in content:
|
||||
if not isinstance(part, dict):
|
||||
if isinstance(part, str) and part.strip():
|
||||
text_parts.append(part.strip())
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype == "image_url" or ptype == "input_image":
|
||||
had_image = True
|
||||
continue
|
||||
if ptype in {"text", "input_text"}:
|
||||
text = str(part.get("text") or "").strip()
|
||||
if text:
|
||||
text_parts.append(text)
|
||||
|
||||
if not had_image:
|
||||
# List-type content but no image parts — leave alone (some
|
||||
# providers reject ANY list content, but stripping a
|
||||
# text-only list doesn't reduce ambiguity; let the caller
|
||||
# surface the original error if this turns out to be the
|
||||
# case).
|
||||
continue
|
||||
|
||||
if text_parts:
|
||||
msg["content"] = "\n\n".join(text_parts)
|
||||
else:
|
||||
msg["content"] = (
|
||||
"[image content removed — provider does not accept "
|
||||
"list-type tool message content]"
|
||||
)
|
||||
changed = True
|
||||
|
||||
return changed
|
||||
|
||||
def _anthropic_preserve_dots(self) -> bool:
|
||||
"""True when using an anthropic-compatible endpoint that preserves dots in model names.
|
||||
Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
|
||||
|
||||
+16
-1
@@ -47,6 +47,10 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
||||
AUTHOR_MAP = {
|
||||
# teknium (multiple emails)
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"cipherframe@users.noreply.github.com": "CipherFrame",
|
||||
"me@promplate.dev": "CNSeniorious000",
|
||||
"yichengqiao21@gmail.com": "YarrowQiao",
|
||||
"erhanyasarx@gmail.com": "erhnysr",
|
||||
"30366221+WorldWriter@users.noreply.github.com": "WorldWriter",
|
||||
"dafeng@DafengdeMacBook-Pro.local": "WorldWriter",
|
||||
"anadi.jaggia@gmail.com": "Jaggia",
|
||||
@@ -56,12 +60,18 @@ AUTHOR_MAP = {
|
||||
"mgongzai@gmail.com": "vKongv",
|
||||
"0x.badfriend@gmail.com": "discodirector",
|
||||
"altriatree@gmail.com": "TruaShamu",
|
||||
"contact-me@stark-x.cn": "Stark-X",
|
||||
"nat@nthrow.io": "nthrow",
|
||||
"m@mobrienv.dev": "mikeyobrien",
|
||||
"saeed919@pm.me": "falasi",
|
||||
"chrisdlc119@outlook.com": "chdlc",
|
||||
"omar@techdeveloper.site": "nycomar",
|
||||
"qiyin.zuo@pcitc.com": "qiyin-code",
|
||||
"mr.aashiz@gmail.com": "aashizpoudel",
|
||||
"70629228+shaun0927@users.noreply.github.com": "shaun0927",
|
||||
"98262967+Bihruze@users.noreply.github.com": "Bihruze",
|
||||
"189280367+Lempkey@users.noreply.github.com": "Lempkey",
|
||||
"leovillalbajr@gmail.com": "Lempkey",
|
||||
"nidhi2894@gmail.com": "nidhi-singh02",
|
||||
"30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
|
||||
"oleksii.lisikh@gmail.com": "olisikh",
|
||||
@@ -74,6 +84,7 @@ AUTHOR_MAP = {
|
||||
"108427749+buntingszn@users.noreply.github.com": "buntingszn",
|
||||
"yanglongwei06@gmail.com": "Alex-yang00",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"markuscontasul@gmail.com": "Glucksberg",
|
||||
"piyushvp1@gmail.com": "thelumiereguy",
|
||||
"dskwelmcy@163.com": "dskwe",
|
||||
"421774554@qq.com": "wuli666",
|
||||
@@ -372,6 +383,7 @@ AUTHOR_MAP = {
|
||||
"bloodcarter@gmail.com": "bloodcarter",
|
||||
"scott@scotttrinh.com": "scotttrinh",
|
||||
"quocanh261997@gmail.com": "quocanh261997",
|
||||
"savanne.kham@protonmail.com": "savanne-kham", # PR #28958 salvage (strip tool_name for strict providers)
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -680,7 +692,7 @@ AUTHOR_MAP = {
|
||||
"hmbown@gmail.com": "Hmbown",
|
||||
"iacobs@m0n5t3r.info": "m0n5t3r",
|
||||
"jiayuw794@gmail.com": "JiayuuWang",
|
||||
"jonny@nousresearch.com": "jquesnelle",
|
||||
"jonny@nousresearch.com": "yoniebans",
|
||||
"jake@nousresearch.com": "simpolism",
|
||||
"juan.ovalle@mistral.ai": "jjovalle99",
|
||||
"julien.talbot@ergonomia.re": "Julientalbot",
|
||||
@@ -713,6 +725,7 @@ AUTHOR_MAP = {
|
||||
"9219265+cresslank@users.noreply.github.com": "cresslank",
|
||||
"trevmanthony@gmail.com": "trevthefoolish",
|
||||
"ziliangpeng@users.noreply.github.com": "ziliangpeng",
|
||||
"ziliangdotme@gmail.com": "ziliangpeng",
|
||||
"centripetal-star@users.noreply.github.com": "centripetal-star",
|
||||
"LeonSGP43@users.noreply.github.com": "LeonSGP43",
|
||||
"154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
|
||||
@@ -922,6 +935,8 @@ AUTHOR_MAP = {
|
||||
"holynn@placeholder.local": "holynn-q",
|
||||
"agent@hermes.local": "jacdevos",
|
||||
"sunsky.lau@gmail.com": "liuhao1024",
|
||||
"fabianoeq@gmail.com": "rodrigoeqnit",
|
||||
"178342791+sgtworkman@users.noreply.github.com": "sgtworkman",
|
||||
"qiuqfang98@qq.com": "keepcalmqqf",
|
||||
"261867348+ai-ag2026@users.noreply.github.com": "ai-ag2026",
|
||||
"yanzh.su@gmail.com": "YanzhongSu",
|
||||
|
||||
+40
-96
@@ -3,29 +3,36 @@
|
||||
# `pytest` directly to guarantee your local run matches CI behavior.
|
||||
#
|
||||
# What this script enforces:
|
||||
# * -n 4 xdist workers (CI has 4 cores; -n auto diverges locally)
|
||||
# * Per-file isolation via scripts/run_tests_parallel.py — each test
|
||||
# file runs in its own freshly-spawned `python -m pytest <file>`
|
||||
# subprocess. No xdist, no shared workers, no module-level leakage
|
||||
# between files.
|
||||
# * TZ=UTC, LANG=C.UTF-8, PYTHONHASHSEED=0 (deterministic)
|
||||
# * Credential env vars blanked (conftest.py also does this, but this
|
||||
# is belt-and-suspenders for anyone running `pytest` outside of
|
||||
# our conftest path — e.g. calling pytest on a single file)
|
||||
# * Proper venv activation
|
||||
# * Env vars blanked (conftest.py also does this, but this
|
||||
# is belt-and-suspenders for anyone running pytest outside our
|
||||
# conftest path — e.g. on a single file)
|
||||
# * Proper venv activation (probes .venv, venv, then ~/.hermes/...)
|
||||
#
|
||||
# Usage:
|
||||
# scripts/run_tests.sh # full suite
|
||||
# scripts/run_tests.sh tests/agent/ # one directory
|
||||
# scripts/run_tests.sh tests/agent/test_foo.py::TestClass::test_method
|
||||
# scripts/run_tests.sh --tb=long -v # pass-through pytest args
|
||||
# scripts/run_tests.sh # full suite
|
||||
# scripts/run_tests.sh -j 4 # cap parallelism
|
||||
# scripts/run_tests.sh tests/agent/ # discover only here
|
||||
# scripts/run_tests.sh tests/agent/ tests/acp/ # multiple roots
|
||||
# scripts/run_tests.sh tests/foo.py # single file
|
||||
# scripts/run_tests.sh tests/foo.py -- --tb=long # path + pytest args
|
||||
# scripts/run_tests.sh -- -v --tb=long # pytest args only
|
||||
#
|
||||
# Everything after a literal '--' is passed through to each per-file
|
||||
# pytest invocation. Positional path arguments before '--' override
|
||||
# the default discovery root (tests/).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Locate repo root ────────────────────────────────────────────────────────
|
||||
# Works whether this is the main checkout or a worktree.
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
# ── Activate venv ───────────────────────────────────────────────────────────
|
||||
# Prefer a .venv in the current tree, fall back to the main checkout's venv
|
||||
# (useful for worktrees where we don't always duplicate the venv).
|
||||
VENV=""
|
||||
for candidate in "$REPO_ROOT/.venv" "$REPO_ROOT/venv" "$HOME/.hermes/hermes-agent/venv"; do
|
||||
if [ -f "$candidate/bin/activate" ]; then
|
||||
@@ -41,94 +48,31 @@ fi
|
||||
|
||||
PYTHON="$VENV/bin/python"
|
||||
|
||||
# ── Ensure pytest-split is installed (required for shard-equivalent runs) ──
|
||||
if ! "$PYTHON" -c "import pytest_split" 2>/dev/null; then
|
||||
echo "→ installing pytest-split into $VENV"
|
||||
if command -v uv >/dev/null 2>&1; then
|
||||
uv pip install --python "$PYTHON" --quiet "pytest-split>=0.9,<1"
|
||||
elif "$PYTHON" -m pip --version >/dev/null 2>&1; then
|
||||
"$PYTHON" -m pip install --quiet "pytest-split>=0.9,<1"
|
||||
else
|
||||
echo "error: neither uv nor pip is available in $VENV — pytest-split is missing" >&2
|
||||
echo " fix: run uv pip install -e \".[dev]\" from $REPO_ROOT" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── Hermetic environment ────────────────────────────────────────────────────
|
||||
# Mirror what CI does in .github/workflows/tests.yml + what conftest.py does.
|
||||
# Unset every credential-shaped var currently in the environment.
|
||||
while IFS='=' read -r name _; do
|
||||
case "$name" in
|
||||
*_API_KEY|*_TOKEN|*_SECRET|*_PASSWORD|*_CREDENTIALS|*_ACCESS_KEY| \
|
||||
*_SECRET_ACCESS_KEY|*_PRIVATE_KEY|*_OAUTH_TOKEN|*_WEBHOOK_SECRET| \
|
||||
*_ENCRYPT_KEY|*_APP_SECRET|*_CLIENT_SECRET|*_CORP_SECRET|*_AES_KEY| \
|
||||
AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|FAL_KEY| \
|
||||
GH_TOKEN|GITHUB_TOKEN)
|
||||
unset "$name"
|
||||
;;
|
||||
esac
|
||||
done < <(env)
|
||||
|
||||
# Unset HERMES_* behavioral vars too.
|
||||
unset HERMES_YOLO_MODE HERMES_INTERACTIVE HERMES_QUIET HERMES_TOOL_PROGRESS \
|
||||
HERMES_TOOL_PROGRESS_MODE HERMES_MAX_ITERATIONS HERMES_SESSION_PLATFORM \
|
||||
HERMES_SESSION_CHAT_ID HERMES_SESSION_CHAT_NAME HERMES_SESSION_THREAD_ID \
|
||||
HERMES_SESSION_SOURCE HERMES_SESSION_KEY HERMES_GATEWAY_SESSION \
|
||||
HERMES_CRON_SESSION \
|
||||
HERMES_PLATFORM HERMES_INFERENCE_PROVIDER HERMES_MANAGED HERMES_DEV \
|
||||
HERMES_CONTAINER HERMES_EPHEMERAL_SYSTEM_PROMPT HERMES_TIMEZONE \
|
||||
HERMES_REDACT_SECRETS HERMES_BACKGROUND_NOTIFICATIONS HERMES_EXEC_ASK \
|
||||
HERMES_HOME_MODE 2>/dev/null || true
|
||||
|
||||
# Pin deterministic runtime.
|
||||
export TZ=UTC
|
||||
export LANG=C.UTF-8
|
||||
export LC_ALL=C.UTF-8
|
||||
export PYTHONHASHSEED=0
|
||||
|
||||
# ── Live-gateway test guard (developer machines) ────────────────────────────
|
||||
# If a system-wide hermes pytest_live_guard plugin is installed at
|
||||
# $HOME/.hermes/pytest_live_guard.py, force-load it here so every test run
|
||||
# from this script gets the protection regardless of which worktree is
|
||||
# checked out (in-tree tests/conftest.py guard may be missing on stale
|
||||
# branches). Harmless on CI / fresh machines that don't have the file.
|
||||
# ── Live-gateway plugin (computed before we drop env) ───────────────────────
|
||||
EXTRA_PYTHONPATH=""
|
||||
EXTRA_PYTEST_PLUGINS=""
|
||||
if [ -f "$HOME/.hermes/pytest_live_guard.py" ]; then
|
||||
case ":${PYTHONPATH:-}:" in
|
||||
*":$HOME/.hermes:"*) ;;
|
||||
*) export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}$HOME/.hermes" ;;
|
||||
esac
|
||||
if [[ ",${PYTEST_PLUGINS:-}," != *,pytest_live_guard,* ]]; then
|
||||
export PYTEST_PLUGINS="${PYTEST_PLUGINS:+$PYTEST_PLUGINS,}pytest_live_guard"
|
||||
fi
|
||||
EXTRA_PYTHONPATH="$HOME/.hermes"
|
||||
EXTRA_PYTEST_PLUGINS="pytest_live_guard"
|
||||
fi
|
||||
|
||||
# ── Worker count ────────────────────────────────────────────────────────────
|
||||
# CI uses `-n auto` on ubuntu-latest which gives 4 workers. A 20-core
|
||||
# workstation with `-n auto` gets 20 workers and exposes test-ordering
|
||||
# flakes that CI will never see. Pin to 4 so local matches CI.
|
||||
WORKERS="${HERMES_TEST_WORKERS:-4}"
|
||||
|
||||
# ── Run pytest ──────────────────────────────────────────────────────────────
|
||||
# ── Run in hermetic env ──────────────────────────────────────────────────────
|
||||
# env -i: start with empty environment, opt-in only what we need.
|
||||
# No credential var can leak — you'd have to explicitly add it here.
|
||||
echo "▶ running per-file parallel test suite via run_tests_parallel.py"
|
||||
echo " (TZ=UTC LANG=C.UTF-8 PYTHONHASHSEED=0; clean env)"
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
# If the first argument starts with `-` treat all args as pytest flags;
|
||||
# otherwise treat them as test paths.
|
||||
ARGS=("$@")
|
||||
|
||||
echo "▶ running pytest with $WORKERS workers, hermetic env, in $REPO_ROOT"
|
||||
echo " (TZ=UTC LANG=C.UTF-8 PYTHONHASHSEED=0; all credential env vars unset)"
|
||||
|
||||
# -o "addopts=" clears pyproject.toml's `-n auto` so our -n wins.
|
||||
# We re-add --timeout/--timeout-method here because pyproject.toml's
|
||||
# addopts is wiped above. The 60s cap is essential: see pyproject.toml
|
||||
# for why (suite deadlocks at session teardown without it).
|
||||
exec "$PYTHON" -m pytest \
|
||||
-o "addopts=" \
|
||||
-n "$WORKERS" \
|
||||
--timeout=30 \
|
||||
--timeout-method=signal \
|
||||
--ignore=tests/integration \
|
||||
--ignore=tests/e2e \
|
||||
-m "not integration" \
|
||||
"${ARGS[@]}"
|
||||
exec env -i \
|
||||
PATH="$PATH" \
|
||||
HOME="$HOME" \
|
||||
TZ=UTC \
|
||||
LANG=C.UTF-8 \
|
||||
LC_ALL=C.UTF-8 \
|
||||
PYTHONHASHSEED=0 \
|
||||
${EXTRA_PYTHONPATH:+PYTHONPATH="$EXTRA_PYTHONPATH"} \
|
||||
${EXTRA_PYTEST_PLUGINS:+PYTEST_PLUGINS="$EXTRA_PYTEST_PLUGINS"} \
|
||||
"$PYTHON" "$SCRIPT_DIR/run_tests_parallel.py" "$@"
|
||||
|
||||
Executable
+650
@@ -0,0 +1,650 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Per-file parallel test runner.
|
||||
|
||||
The minimum-viable replacement for pytest-xdist + a subprocess-isolation
|
||||
plugin. Discovers test files under ``tests/`` (excluding integration/e2e
|
||||
unless explicitly requested), then runs one ``python -m pytest <file>``
|
||||
subprocess per file, with bounded parallelism (default: ``os.cpu_count()``).
|
||||
|
||||
Why per-file rather than per-test?
|
||||
Per-test spawn overhead (~250ms × 17k tests = 70min CPU minimum)
|
||||
swamped the actual work. Per-file spawn (~250ms × ~850 files = ~3.5min)
|
||||
fits in the budget while still giving every file a fresh Python
|
||||
interpreter — the only isolation boundary that actually matters
|
||||
(cross-file module-level state leakage was the original flake source;
|
||||
intra-file state is the test author's responsibility).
|
||||
|
||||
Why drop xdist entirely?
|
||||
xdist's persistent workers accumulate state across files, which is
|
||||
exactly the leakage we wanted to fix. xdist also adds complexity
|
||||
(loadfile vs loadscope, --max-worker-restart, internal control plane)
|
||||
that we don't need when the unit of work is "run pytest on one file".
|
||||
A subprocess.Popen pool gated by a semaphore is ~60 lines and does
|
||||
the job.
|
||||
|
||||
Usage:
|
||||
python scripts/run_tests_parallel.py [pytest_args...]
|
||||
|
||||
Common pytest args pass through (e.g. ``-v``, ``-x``, ``--tb=long``,
|
||||
``-k 'pattern'``, ``--lf``).
|
||||
|
||||
Environment:
|
||||
HERMES_TEST_WORKERS Override worker count (default: os.cpu_count())
|
||||
HERMES_TEST_PATHS Override discovery roots (colon-sep, default: 'tests')
|
||||
|
||||
Exit code: 0 if every file's pytest exited 0; 1 otherwise.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, Future
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
|
||||
# Default test discovery roots.
|
||||
_DEFAULT_ROOTS = ["tests"]
|
||||
|
||||
# Directories to skip during discovery — the e2e + integration suites
|
||||
# require real services and are run separately. Match exactly the
|
||||
# ``--ignore=`` flags the previous CI command used.
|
||||
_SKIP_PARTS = {"integration", "e2e"}
|
||||
|
||||
# Per-file wall-clock cap. Generous default — pytest-timeout still
|
||||
# enforces per-test caps inside each subprocess; this is just an outer
|
||||
# safety net so a single hung file can't stall the whole suite. Override
|
||||
# via --file-timeout or HERMES_TEST_FILE_TIMEOUT.
|
||||
_DEFAULT_FILE_TIMEOUT_SECONDS = 600.0 # 10 minutes
|
||||
|
||||
|
||||
def _count_tests(
|
||||
files: List[Path], repo_root: Path, pytest_passthrough: List[str]
|
||||
) -> dict[Path, int]:
|
||||
"""Run ``pytest --co -q`` once to count individual tests per file.
|
||||
|
||||
Returns a mapping ``{file_path: test_count}``. Files with zero
|
||||
collected tests are omitted from the dict (not an error — e.g. the
|
||||
file only defines fixtures / conftest helpers).
|
||||
|
||||
This is a single subprocess call (~2-5s for ~1k files) that gives
|
||||
us the total test count for the discovery announcement and
|
||||
per-file counts for the progress lines.
|
||||
|
||||
``--ignore`` flags for directories in ``_SKIP_PARTS`` are added
|
||||
automatically so that pytest's own collection machinery (conftest
|
||||
walking, directory traversal) doesn't pull in tests we intend to
|
||||
skip — matching what the per-file runs will actually execute.
|
||||
"""
|
||||
# Build --ignore flags for skipped dirs so the --co collection
|
||||
# mirrors what we'll actually run (not what pytest might find via
|
||||
# conftest walking or directory traversal).
|
||||
ignore_args: List[str] = []
|
||||
for root in [repo_root / p for p in _DEFAULT_ROOTS]:
|
||||
for part in _SKIP_PARTS:
|
||||
d = root / part
|
||||
if d.is_dir():
|
||||
ignore_args.extend(["--ignore", str(d)])
|
||||
|
||||
cmd = [
|
||||
sys.executable, "-m", "pytest",
|
||||
"--co", "-q",
|
||||
*ignore_args,
|
||||
*[str(f) for f in files],
|
||||
*pytest_passthrough,
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=repo_root,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError):
|
||||
return {}
|
||||
|
||||
counts: dict[Path, int] = {}
|
||||
for line in result.stdout.splitlines():
|
||||
# Lines look like: tests/acp/test_auth.py::TestClass::test_name
|
||||
if "::" not in line:
|
||||
continue
|
||||
file_part = line.split("::", 1)[0]
|
||||
key = repo_root / file_part
|
||||
counts[key] = counts.get(key, 0) + 1
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
def _discover_files(roots: List[Path]) -> List[Path]:
|
||||
"""Return every ``test_*.py`` under the given roots (sorted).
|
||||
|
||||
Roots may be directories (recursed for ``test_*.py``) or explicit
|
||||
``.py`` files (included as-is, even if they don't match the
|
||||
``test_*`` prefix — caller knows what they want).
|
||||
|
||||
Exclude any file whose path contains a component in ``_SKIP_PARTS``,
|
||||
UNLESS the user explicitly named it as a root (in which case the
|
||||
user's intent overrides the skip filter).
|
||||
"""
|
||||
seen: set[Path] = set()
|
||||
out: List[Path] = []
|
||||
for root in roots:
|
||||
if not root.exists():
|
||||
continue
|
||||
if root.is_file():
|
||||
# Explicit file: include it as-is, skip the _SKIP_PARTS filter
|
||||
# since the user named it directly.
|
||||
real = root.resolve()
|
||||
if real not in seen:
|
||||
seen.add(real)
|
||||
out.append(root)
|
||||
continue
|
||||
for path in root.rglob("test_*.py"):
|
||||
if any(part in _SKIP_PARTS for part in path.parts):
|
||||
continue
|
||||
real = path.resolve()
|
||||
if real in seen:
|
||||
continue
|
||||
seen.add(real)
|
||||
out.append(path)
|
||||
return sorted(out)
|
||||
|
||||
|
||||
def _kill_tree(proc: "subprocess.Popen", pgid: int | None = None) -> None:
|
||||
"""Kill the pytest subprocess and every descendant it spawned.
|
||||
|
||||
A test run can spin up uvicorn servers, async runtimes, or other
|
||||
long-running grandchildren that survive the pytest subprocess exit
|
||||
if we don't kill the whole tree. ``subprocess.Popen.kill()`` only
|
||||
targets the immediate child; grandchildren reparent to PID 1
|
||||
(Linux) / get adopted by services.exe (Windows) and leak.
|
||||
|
||||
POSIX: the caller must pass ``pgid`` — the process group id captured
|
||||
immediately after Popen (via ``os.getpgid(proc.pid)``). We can't
|
||||
look it up here in the happy path because by the time we get
|
||||
called the leader process has already been reaped and its pid is
|
||||
gone from the kernel's process table, even though descendants in
|
||||
the group are still alive. SIGKILL'ing the captured pgid takes out
|
||||
everything in that group atomically.
|
||||
|
||||
Windows: ``taskkill /F /T /PID`` walks the recorded ppid chain and
|
||||
terminates the whole tree, even when the root has already exited.
|
||||
|
||||
Why not psutil: psutil walks the parent-child tree, but in the
|
||||
happy path the root has already been reaped so ``psutil.Process(pid)``
|
||||
can't find it; grandchildren reparented to PID 1 are also
|
||||
unreachable by tree walk at that point. The platform-native
|
||||
primitives (process groups / taskkill) handle both cases correctly
|
||||
without an extra abstraction layer.
|
||||
"""
|
||||
if proc.pid is None:
|
||||
return
|
||||
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
|
||||
subprocess.run(
|
||||
["taskkill", "/F", "/T", "/PID", str(proc.pid)],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=10,
|
||||
) # windows-footgun: ok
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
pass
|
||||
else:
|
||||
# POSIX: kill the captured pgid. Local-import signal so the
|
||||
# SIGKILL attribute is never referenced on Windows.
|
||||
if pgid is not None:
|
||||
try:
|
||||
import signal as _signal
|
||||
os.killpg(pgid, _signal.SIGKILL) # windows-footgun: ok
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
pass
|
||||
|
||||
# Belt-and-suspenders: ensure subprocess.communicate() sees the exit.
|
||||
try:
|
||||
proc.kill()
|
||||
except (ProcessLookupError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _run_one_file(
|
||||
file: Path,
|
||||
pytest_args: List[str],
|
||||
repo_root: Path,
|
||||
file_timeout: float,
|
||||
) -> Tuple[Path, int, str, dict[str, int]]:
|
||||
"""Run ``python -m pytest <file> <pytest_args>`` in a fresh subprocess.
|
||||
|
||||
Returns (file, returncode, captured_combined_output, summary_counts).
|
||||
|
||||
``summary_counts`` is the result of ``_parse_pytest_summary(output)`` —
|
||||
|
||||
pytest exit codes (https://docs.pytest.org/en/stable/reference/exit-codes.html):
|
||||
0 = all tests passed
|
||||
1 = some tests failed
|
||||
2 = test execution interrupted
|
||||
3 = internal error
|
||||
4 = pytest CLI usage error
|
||||
5 = no tests collected
|
||||
|
||||
We treat exit 5 as a pass: it just means every test in the file was
|
||||
skipped or filtered by a marker (e.g. ``-m 'not integration'`` skips
|
||||
files where every test is marked integration). That's intentional and
|
||||
not a failure mode.
|
||||
|
||||
On per-file timeout (``file_timeout`` seconds) or any other exception
|
||||
during ``communicate()``, we kill the whole process group / process
|
||||
tree so grandchildren (uvicorn servers, async runtimes, etc.) do not
|
||||
orphan onto PID 1. The pytest-timeout plugin enforces per-test
|
||||
timeouts inside the subprocess; this outer timeout exists only to
|
||||
bound a pathologically slow or hung file as a whole.
|
||||
"""
|
||||
cmd = [sys.executable, "-m", "pytest", str(file), *pytest_args]
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
cwd=repo_root,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
# POSIX: place the child at the head of its own process group so
|
||||
# _kill_tree can SIGKILL the group atomically.
|
||||
# Windows: this maps to CREATE_NEW_PROCESS_GROUP in CPython 3.12+;
|
||||
# _kill_tree handles the Windows path via taskkill /F /T.
|
||||
start_new_session=True,
|
||||
)
|
||||
|
||||
# Capture the pgid NOW, before the leader can exit and be reaped.
|
||||
# Once the leader is reaped, os.getpgid(proc.pid) raises
|
||||
# ProcessLookupError even though grandchildren in that group are
|
||||
# still alive — defeating the whole cleanup. None on Windows where
|
||||
# the pgid concept doesn't apply (taskkill walks ppid chain instead).
|
||||
pgid: int | None = None
|
||||
if sys.platform != "win32":
|
||||
try:
|
||||
pgid = os.getpgid(proc.pid)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
# Astonishingly fast child? Already dead. _kill_tree's
|
||||
# fallback will handle this case as a no-op.
|
||||
pgid = None
|
||||
|
||||
try:
|
||||
output, _ = proc.communicate(timeout=file_timeout)
|
||||
rc = proc.returncode
|
||||
except subprocess.TimeoutExpired:
|
||||
_kill_tree(proc, pgid=pgid)
|
||||
# Drain whatever the child wrote before we killed it so we have
|
||||
# something to surface in the failure dump.
|
||||
try:
|
||||
output, _ = proc.communicate(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
output = "(file timeout exceeded; output unavailable)"
|
||||
rc = 124 # de facto convention for "killed by timeout".
|
||||
output = (
|
||||
f"(per-file timeout: {file_timeout:.0f}s exceeded; "
|
||||
f"process tree SIGKILL'd)\n{output}"
|
||||
)
|
||||
except BaseException:
|
||||
# KeyboardInterrupt / runner crash — make sure no zombie
|
||||
# grandchildren outlive us.
|
||||
_kill_tree(proc, pgid=pgid)
|
||||
raise
|
||||
else:
|
||||
# Happy path: pytest exited on its own. The child process already
|
||||
# cleaned up its grandchildren if it's well-behaved, but
|
||||
# well-behaved is not universal — kill the group anyway. Already-
|
||||
# dead processes are a no-op.
|
||||
_kill_tree(proc, pgid=pgid)
|
||||
|
||||
if rc == 5:
|
||||
# No tests collected — every test in the file was filtered out.
|
||||
# Treat as a pass; surface info in a slightly distinct status
|
||||
# so the operator can spot it.
|
||||
rc = 0
|
||||
summary = _parse_pytest_summary(output)
|
||||
return file, rc, output, summary
|
||||
|
||||
|
||||
def _parse_pytest_summary(output: str) -> dict[str, int]:
|
||||
"""Extract per-file test pass/fail/skip counts from pytest output.
|
||||
|
||||
pytest prints a summary line like ``12 passed, 3 skipped, 1 failed in 2.1s``
|
||||
as the last non-empty line before the short test summary. We scrape that
|
||||
line for the individual counts so the progress display can show test-level
|
||||
granularity instead of just file-level pass/fail.
|
||||
|
||||
Returns a dict with keys ``passed``, ``failed``, ``skipped``, ``errors``,
|
||||
``xfailed``, ``xpassed`` (only keys found in the output are present).
|
||||
"""
|
||||
import re
|
||||
|
||||
result: dict[str, int] = {}
|
||||
# Walk backwards from the end — the summary line is always near the tail.
|
||||
for line in reversed(output.splitlines()):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
# Match "N passed", "N failed", "N skipped", "N errors", "N xfailed", "N xpassed"
|
||||
for m in re.finditer(r"(\d+)\s+(passed|failed|skipped|errors|xfailed|xpassed)", line):
|
||||
result[m.group(2)] = int(m.group(1))
|
||||
# Also match "N error" (singular — pytest uses this sometimes).
|
||||
for m in re.finditer(r"(\d+)\s+error\b", line):
|
||||
result.setdefault("errors", result.get("errors", 0) + int(m.group(1)))
|
||||
if result:
|
||||
# Found the counts line — done.
|
||||
break
|
||||
# Stop at the short test summary header (if any) — everything above
|
||||
# that is individual failure details, not the counts line.
|
||||
if line.startswith("FAILED") or line.startswith("SHORT TEST SUMMARY"):
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def _format_file(file: Path, repo_root: Path) -> str:
|
||||
"""Render a test-file path for display: strip the repo-root prefix
|
||||
when possible so output reads ``tests/acp/test_auth.py`` instead of
|
||||
``/home/runner/work/hermes-agent/hermes-agent/tests/acp/test_auth.py``.
|
||||
|
||||
Falls back to the absolute path for anything outside the repo root.
|
||||
"""
|
||||
try:
|
||||
return str(file.resolve().relative_to(repo_root.resolve()))
|
||||
except ValueError:
|
||||
return str(file)
|
||||
|
||||
|
||||
def _print_progress(
|
||||
tests_done: int,
|
||||
total_tests: int,
|
||||
file: Path,
|
||||
rc: int,
|
||||
dur: float,
|
||||
repo_root: Path,
|
||||
tests_passed: int,
|
||||
tests_failed: int,
|
||||
test_counts: dict[Path, int],
|
||||
file_summary: dict[str, int] | None = None,
|
||||
) -> None:
|
||||
"""Single-line live progress.
|
||||
|
||||
When ``file_summary`` is provided (parsed from pytest output), the
|
||||
per-file parenthetical shows individual test pass/fail counts instead
|
||||
of just the total test count.
|
||||
"""
|
||||
status = "✓" if rc == 0 else "✗"
|
||||
pct = (tests_done / total_tests * 100) if total_tests else 0
|
||||
# Digit width for left-side counter padding (derived from total file count).
|
||||
fw = len(str(tests_passed + tests_failed))
|
||||
# Build per-file test count string.
|
||||
if file_summary:
|
||||
parts = []
|
||||
p = file_summary.get("passed", 0)
|
||||
f = file_summary.get("failed", 0)
|
||||
s = file_summary.get("skipped", 0)
|
||||
e = file_summary.get("errors", 0)
|
||||
if p:
|
||||
parts.append(f"{p}✓")
|
||||
if f:
|
||||
parts.append(f"{f}✗")
|
||||
if s:
|
||||
parts.append(f"{s}s")
|
||||
if e:
|
||||
parts.append(f"{e}e")
|
||||
# xfailed/xpassed are rare; include if present.
|
||||
xf = file_summary.get("xfailed", 0)
|
||||
xp = file_summary.get("xpassed", 0)
|
||||
if xf:
|
||||
parts.append(f"{xf}xf")
|
||||
if xp:
|
||||
parts.append(f"{xp}xp")
|
||||
test_str = " ".join(parts) + ", " if parts else ""
|
||||
else:
|
||||
n_tests = test_counts.get(file, 0)
|
||||
test_str = f"{n_tests} tests, " if n_tests else ""
|
||||
msg = (
|
||||
f"[{pct:5.1f}% | {tests_done:>5}/{total_tests}"
|
||||
f" | ✓{tests_passed:>{fw}} | ✗{tests_failed:>{fw}}] "
|
||||
f"{status} {_format_file(file, repo_root)} ({test_str}{dur:.1f}s)"
|
||||
)
|
||||
# Truncate to terminal width if available (no clobbering ANSI lines).
|
||||
try:
|
||||
cols = os.get_terminal_size().columns
|
||||
if len(msg) > cols:
|
||||
msg = msg[: cols - 1] + "…"
|
||||
except OSError:
|
||||
pass
|
||||
print(msg, flush=True)
|
||||
|
||||
|
||||
def _print_inline_failure(
|
||||
file: Path, output: str, repo_root: Path, pytest_passthrough: List[str]
|
||||
) -> None:
|
||||
"""Print a compact failure summary immediately when a file fails.
|
||||
|
||||
Shows the tail of the pytest output (the failure section with stack
|
||||
traces) and a ready-to-run repro command, so the developer doesn't
|
||||
have to wait for the full run to finish before seeing what broke.
|
||||
"""
|
||||
rel = _format_file(file, repo_root)
|
||||
# Build a repro command the developer can copy-paste.
|
||||
passthrough_str = " ".join(pytest_passthrough) if pytest_passthrough else ""
|
||||
repro = f"python -m pytest {rel}"
|
||||
if passthrough_str:
|
||||
repro += f" {passthrough_str}"
|
||||
|
||||
# Grab just the failure lines (last ~30 lines of pytest output —
|
||||
# typically the FAILED summary + short test info).
|
||||
lines = output.rstrip().splitlines()
|
||||
tail = "\n".join(lines[-30:])
|
||||
|
||||
print(flush=True)
|
||||
print(f" ╔╍ Failed: {rel} ╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍", flush=True)
|
||||
for line in tail.splitlines():
|
||||
print(f" ║ {line}", flush=True)
|
||||
print(f" ║", flush=True)
|
||||
print(f" ║ Repro: {repro}", flush=True)
|
||||
print(f" ╚╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍", flush=True)
|
||||
print(flush=True)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-j",
|
||||
"--jobs",
|
||||
type=int,
|
||||
default=int(os.environ.get("HERMES_TEST_WORKERS") or (os.cpu_count() or 4) * 2),
|
||||
help="Parallel worker count (default: $HERMES_TEST_WORKERS or cpu_count*2)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--paths",
|
||||
default=os.environ.get("HERMES_TEST_PATHS", ":".join(_DEFAULT_ROOTS)),
|
||||
help="Colon-separated discovery roots (default: 'tests')",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-integration",
|
||||
action="store_true",
|
||||
help="Don't skip integration/ e2e/ during discovery",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--file-timeout",
|
||||
type=float,
|
||||
default=float(
|
||||
os.environ.get("HERMES_TEST_FILE_TIMEOUT", _DEFAULT_FILE_TIMEOUT_SECONDS)
|
||||
),
|
||||
help=(
|
||||
"Per-file wall-clock cap in seconds. On timeout, the pytest "
|
||||
"subprocess and its full process tree are SIGKILL'd. "
|
||||
"Default: 600 (10 min), env: HERMES_TEST_FILE_TIMEOUT."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"paths_positional",
|
||||
nargs="*",
|
||||
metavar="PATH",
|
||||
help=(
|
||||
"Restrict discovery to these paths (directories or .py files). "
|
||||
"Mutually exclusive with --paths. Anything after a literal '--' "
|
||||
"separator is passed through to each per-file pytest invocation."
|
||||
),
|
||||
)
|
||||
# Manually split argv on '--' so positional paths and pytest passthrough
|
||||
# args don't fight over each other. argparse's nargs="*" positional is
|
||||
# greedy and will swallow everything after '--' including the pytest
|
||||
# flags, defeating the convention.
|
||||
argv = sys.argv[1:]
|
||||
if "--" in argv:
|
||||
sep = argv.index("--")
|
||||
our_args, pytest_passthrough = argv[:sep], argv[sep + 1 :]
|
||||
else:
|
||||
our_args, pytest_passthrough = argv, []
|
||||
args = parser.parse_args(our_args)
|
||||
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
|
||||
# Resolve discovery roots: positional path args override --paths if any
|
||||
# were supplied, otherwise --paths (which itself defaults to 'tests').
|
||||
if args.paths_positional:
|
||||
# Positionals can be directories OR explicit .py files. Either is
|
||||
# fine — _discover_files handles both via rglob('test_*.py') for
|
||||
# dirs and direct inclusion for files.
|
||||
roots = [repo_root / p for p in args.paths_positional]
|
||||
else:
|
||||
roots = [repo_root / p for p in args.paths.split(":") if p]
|
||||
|
||||
if args.include_integration:
|
||||
# Caller takes responsibility — typically used via explicit -k filter.
|
||||
global _SKIP_PARTS # noqa: PLW0603 — config knob
|
||||
_SKIP_PARTS = set()
|
||||
|
||||
files = _discover_files(roots)
|
||||
if not files:
|
||||
print(f"No test files discovered under {[str(r) for r in roots]}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Count individual tests per file via a single pytest --co pass.
|
||||
test_counts = _count_tests(files, repo_root, pytest_passthrough)
|
||||
total_tests = sum(test_counts.values())
|
||||
|
||||
print(
|
||||
f"Discovered {len(files)} test files ({total_tests} tests) under "
|
||||
f"{[str(r.relative_to(repo_root)) if r.is_relative_to(repo_root) else str(r) for r in roots]}; "
|
||||
f"running with -j {args.jobs}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
# Capture and print on completion (out-of-order is fine — keeps the
|
||||
# terminal clean rather than interleaving N parallel pytest outputs).
|
||||
failures: List[Tuple[Path, str, Dict[str, int]]] = []
|
||||
started = time.monotonic()
|
||||
files_done = 0
|
||||
tests_done = 0
|
||||
pass_count = 0
|
||||
fail_count = 0
|
||||
tests_passed = 0
|
||||
tests_failed = 0
|
||||
lock = threading.Lock()
|
||||
|
||||
def _on_done(file: Path, started_at: float, fut: "Future[Tuple[Path, int, str, dict[str, int]]]") -> None:
|
||||
nonlocal files_done, tests_done, pass_count, fail_count, tests_passed, tests_failed
|
||||
n_tests = test_counts.get(file, 0)
|
||||
try:
|
||||
fpath, rc, output, summary = fut.result()
|
||||
except Exception as exc: # noqa: BLE001 — must always advance counter
|
||||
with lock:
|
||||
files_done += 1
|
||||
tests_done += n_tests
|
||||
fail_count += 1
|
||||
failures.append((file, f"runner crashed: {exc!r}", {}))
|
||||
_print_progress(
|
||||
tests_done, total_tests, file, 1,
|
||||
time.monotonic() - started_at,
|
||||
repo_root, tests_passed, tests_failed,
|
||||
test_counts,
|
||||
)
|
||||
return
|
||||
with lock:
|
||||
files_done += 1
|
||||
tests_done += n_tests
|
||||
# Accumulate test-level counts from parsed summary.
|
||||
tests_passed += summary.get("passed", 0)
|
||||
tests_failed += summary.get("failed", 0)
|
||||
if rc == 0:
|
||||
pass_count += 1
|
||||
else:
|
||||
fail_count += 1
|
||||
failures.append((fpath, output, summary))
|
||||
_print_progress(
|
||||
tests_done, total_tests, fpath, rc,
|
||||
time.monotonic() - started_at,
|
||||
repo_root, tests_passed, tests_failed,
|
||||
test_counts,
|
||||
file_summary=summary,
|
||||
)
|
||||
if rc != 0:
|
||||
_print_inline_failure(fpath, output, repo_root, pytest_passthrough)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=args.jobs) as pool:
|
||||
futures: List[Future] = []
|
||||
for file in files:
|
||||
t0 = time.monotonic()
|
||||
fut = pool.submit(
|
||||
_run_one_file, file, pytest_passthrough, repo_root, args.file_timeout
|
||||
)
|
||||
fut.add_done_callback(lambda f, file=file, t0=t0: _on_done(file, t0, f))
|
||||
futures.append(fut)
|
||||
# Block until everything's done. ThreadPoolExecutor.__exit__ waits
|
||||
# for all submitted work, but doing it explicitly here makes the
|
||||
# control flow obvious.
|
||||
for fut in futures:
|
||||
fut.result() if fut.exception() is None else None
|
||||
|
||||
elapsed = time.monotonic() - started
|
||||
print()
|
||||
pct = (tests_done / total_tests * 100) if total_tests else 0
|
||||
print(f"=== Summary: {len(files)} files, {tests_passed} tests passed, {tests_failed} failed ({pct:.0f}% complete) in {elapsed:.1f}s ({args.jobs} workers) ===")
|
||||
|
||||
if failures:
|
||||
print()
|
||||
print("=== Failure output ===")
|
||||
for file, output, _summary in failures:
|
||||
print()
|
||||
print(f"--- {_format_file(file, repo_root)} ---")
|
||||
print(output.rstrip())
|
||||
print()
|
||||
# Split: files with actual test failures vs non-zero exit for other reasons
|
||||
test_fail_files = [(f, s) for f, _o, s in failures if s.get("failed", 0) > 0]
|
||||
all_passed_but_nonzero = [(f, s) for f, _o, s in failures
|
||||
if s.get("failed", 0) == 0 and s.get("passed", 0) > 0]
|
||||
no_tests_ran = [(f, s) for f, _o, s in failures
|
||||
if s.get("failed", 0) == 0 and s.get("passed", 0) == 0]
|
||||
if test_fail_files:
|
||||
total_tf = sum(s.get("failed", 0) for _, s in test_fail_files)
|
||||
print(f"=== {len(test_fail_files)} file{'s' if len(test_fail_files) != 1 else ''} with test failures ({total_tf} test{'s' if total_tf != 1 else ''} failed) ===")
|
||||
for file, s in test_fail_files:
|
||||
nf = s.get("failed", 0)
|
||||
print(f" {_format_file(file, repo_root)} ({nf} test{'s' if nf != 1 else ''} failed)")
|
||||
if all_passed_but_nonzero:
|
||||
print(f"=== {len(all_passed_but_nonzero)} file{'s' if len(all_passed_but_nonzero) != 1 else ''} where all tests passed but pytest exited non-zero (warnings-as-errors, hook failures, etc.) ===")
|
||||
for file, s in all_passed_but_nonzero:
|
||||
print(f" {_format_file(file, repo_root)} ({s.get('passed', 0)} passed)")
|
||||
if no_tests_ran:
|
||||
print(f"=== {len(no_tests_ran)} file{'s' if len(no_tests_ran) != 1 else ''} where no tests ran (collection/import error, timeout before collection, etc.) ===")
|
||||
for file, s in no_tests_ran:
|
||||
print(f" {_format_file(file, repo_root)}")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
+16
-17
@@ -629,13 +629,12 @@
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/fetch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.1.tgz",
|
||||
"integrity": "sha512-GpptLrs57adMSuHi3VNj0mAF8dwh36LMaYF6XyJ6JMWlVsc+t42tm1HSEDmOs3A8fC9yyeisgLhsTVQokOZ0zw==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.0"
|
||||
"@protobufjs/aspromise": "^1.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/float": {
|
||||
@@ -645,9 +644,9 @@
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
|
||||
"integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.2.tgz",
|
||||
"integrity": "sha512-pa0vFRuws4wkvaXKK1uXZMAwAX4/t8ANaJo45iw/oQHNQ9q5xUzwgFmVJGXiga2BeN+zpX7Vf9vmsiIa2J+MUw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
@@ -1620,9 +1619,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "7.5.6",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.6.tgz",
|
||||
"integrity": "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==",
|
||||
"version": "7.6.0",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.6.0.tgz",
|
||||
"integrity": "sha512-LtESOsMPTZgyYtwxhvdgdjGL0HmXEaRA/hVD6sol4zA60hVXXXP/SGmxnqDbgGE8gy7pYex7cym+5vYPcmaXBQ==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
@@ -1630,14 +1629,14 @@
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.5",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.1",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.2",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.1",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^5.0.0"
|
||||
"long": "^5.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
@@ -2117,9 +2116,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.20.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
|
||||
"integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
|
||||
"version": "8.20.1",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.1.tgz",
|
||||
"integrity": "sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
|
||||
@@ -336,7 +336,8 @@ The registry of record is `hermes_cli/commands.py` — every consumer
|
||||
~/.hermes/config.yaml Main configuration
|
||||
~/.hermes/.env API keys and secrets
|
||||
$HERMES_HOME/skills/ Installed skills
|
||||
~/.hermes/sessions/ Session transcripts
|
||||
~/.hermes/sessions/ Gateway routing index, request dumps, *.jsonl transcripts (and optional per-session JSON snapshots when sessions.write_json_snapshots: true)
|
||||
~/.hermes/state.db Canonical session store (SQLite + FTS5)
|
||||
~/.hermes/logs/ Gateway and error logs
|
||||
~/.hermes/auth.json OAuth tokens and credential pools
|
||||
~/.hermes/hermes-agent/ Source code (if git-installed)
|
||||
@@ -867,7 +868,7 @@ hermes config set auxiliary.vision.model <model_name>
|
||||
| Env variables | `hermes config env-path` or [Env vars reference](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) |
|
||||
| CLI commands | `hermes --help` or [CLI reference](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) |
|
||||
| Gateway logs | `~/.hermes/logs/gateway.log` |
|
||||
| Session files | `~/.hermes/sessions/` or `hermes sessions browse` |
|
||||
| Session files | `hermes sessions browse` (reads state.db) |
|
||||
| Source code | `~/.hermes/hermes-agent/` |
|
||||
|
||||
---
|
||||
|
||||
@@ -40,6 +40,16 @@ def _clean_env(monkeypatch):
|
||||
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
# Module-level unhealthy cache (10-min TTL) leaks between tests;
|
||||
# earlier tests that call _mark_provider_unhealthy() poison the
|
||||
# cache for later ones, causing _resolve_auto to skip providers
|
||||
# that the test patched to return valid clients.
|
||||
import agent.auxiliary_client as _aux_mod
|
||||
_aux_mod._aux_unhealthy_until.clear()
|
||||
_aux_mod._aux_unhealthy_logged_at.clear()
|
||||
yield
|
||||
_aux_mod._aux_unhealthy_until.clear()
|
||||
_aux_mod._aux_unhealthy_logged_at.clear()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -461,6 +471,17 @@ class TestExpiredCodexFallback:
|
||||
import base64
|
||||
import time as _time
|
||||
|
||||
# Belt-and-suspenders: _try_openrouter marks openrouter unhealthy
|
||||
# when OPENROUTER_API_KEY is absent (which the preceding test in
|
||||
# this class exercises). The file-level _clean_env autouse fixture
|
||||
# clears the cache, but fixture ordering with the conftest
|
||||
# _hermetic_environment autouse can leave a narrow window where
|
||||
# the mark reappears. Explicitly clear here so this test is
|
||||
# independent of run order.
|
||||
import agent.auxiliary_client as _aux_mod
|
||||
_aux_mod._aux_unhealthy_until.clear()
|
||||
_aux_mod._aux_unhealthy_logged_at.clear()
|
||||
|
||||
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
|
||||
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
|
||||
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
|
||||
@@ -1047,6 +1068,20 @@ class TestGetProviderChain:
|
||||
class TestTryPaymentFallback:
|
||||
"""_try_payment_fallback skips the failed provider and tries alternatives."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_unhealthy_cache(self):
|
||||
"""Earlier tests in this file call _mark_provider_unhealthy() which
|
||||
pollutes the module-level ``_aux_unhealthy_until`` dict (10-min TTL).
|
||||
Without this cleanup the fallback chain skips providers we've patched
|
||||
to return valid clients — the patched function is never called.
|
||||
"""
|
||||
from agent.auxiliary_client import _aux_unhealthy_until, _aux_unhealthy_logged_at
|
||||
_aux_unhealthy_until.clear()
|
||||
_aux_unhealthy_logged_at.clear()
|
||||
yield
|
||||
_aux_unhealthy_until.clear()
|
||||
_aux_unhealthy_logged_at.clear()
|
||||
|
||||
def test_skips_failed_provider(self):
|
||||
mock_client = MagicMock()
|
||||
with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.agent_init import _merge_custom_provider_extra_body
|
||||
|
||||
|
||||
def test_custom_provider_extra_body_merges_into_request_overrides():
|
||||
agent = SimpleNamespace(
|
||||
provider="custom",
|
||||
model="google/gemma-4-31b-it",
|
||||
base_url="https://example.test/v1",
|
||||
request_overrides={"service_tier": "priority"},
|
||||
)
|
||||
|
||||
_merge_custom_provider_extra_body(
|
||||
agent,
|
||||
[
|
||||
{
|
||||
"name": "gemma",
|
||||
"base_url": "https://example.test/v1/",
|
||||
"model": "google/gemma-4-31b-it",
|
||||
"extra_body": {
|
||||
"enable_thinking": True,
|
||||
"reasoning_effort": "high",
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert agent.request_overrides == {
|
||||
"service_tier": "priority",
|
||||
"extra_body": {
|
||||
"enable_thinking": True,
|
||||
"reasoning_effort": "high",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_custom_provider_extra_body_preserves_caller_override():
|
||||
agent = SimpleNamespace(
|
||||
provider="custom",
|
||||
model="google/gemma-4-31b-it",
|
||||
base_url="https://example.test/v1",
|
||||
request_overrides={
|
||||
"extra_body": {
|
||||
"reasoning_effort": "low",
|
||||
"caller_only": True,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
_merge_custom_provider_extra_body(
|
||||
agent,
|
||||
[
|
||||
{
|
||||
"name": "gemma",
|
||||
"base_url": "https://example.test/v1",
|
||||
"model": "google/gemma-4-31b-it",
|
||||
"extra_body": {
|
||||
"enable_thinking": True,
|
||||
"reasoning_effort": "high",
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert agent.request_overrides["extra_body"] == {
|
||||
"enable_thinking": True,
|
||||
"reasoning_effort": "low",
|
||||
"caller_only": True,
|
||||
}
|
||||
|
||||
|
||||
def test_custom_provider_extra_body_ignores_other_custom_models():
|
||||
agent = SimpleNamespace(
|
||||
provider="custom",
|
||||
model="other-model",
|
||||
base_url="https://example.test/v1",
|
||||
request_overrides={},
|
||||
)
|
||||
|
||||
_merge_custom_provider_extra_body(
|
||||
agent,
|
||||
[
|
||||
{
|
||||
"name": "gemma",
|
||||
"base_url": "https://example.test/v1",
|
||||
"model": "google/gemma-4-31b-it",
|
||||
"extra_body": {"enable_thinking": True},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert agent.request_overrides == {}
|
||||
@@ -56,6 +56,7 @@ class TestFailoverReason:
|
||||
"overloaded", "server_error", "timeout",
|
||||
"context_overflow", "payload_too_large", "image_too_large",
|
||||
"model_not_found", "format_error",
|
||||
"multimodal_tool_content_unsupported",
|
||||
"provider_policy_blocked",
|
||||
"thinking_signature", "long_context_tier",
|
||||
"oauth_long_context_beta_forbidden",
|
||||
@@ -1256,3 +1257,66 @@ class TestRateLimitErrorWithoutStatusCode:
|
||||
e.status_code = None
|
||||
result = classify_api_error(e, provider="copilot", model="gpt-4o")
|
||||
assert result.reason != FailoverReason.rate_limit
|
||||
|
||||
|
||||
|
||||
# ── Test: multimodal_tool_content_unsupported pattern ───────────────────
|
||||
|
||||
class TestMultimodalToolContentUnsupported:
|
||||
"""Issue #27344 — providers that reject list-type tool message content
|
||||
should be classified as ``multimodal_tool_content_unsupported`` so the
|
||||
retry loop can downgrade screenshots to text and try again.
|
||||
"""
|
||||
|
||||
def test_xiaomi_mimo_text_is_not_set_pattern(self):
|
||||
"""The actual Xiaomi MiMo 400 wording from the bug report."""
|
||||
e = MockAPIError(
|
||||
"Error code: 400 - {'error': {'code': '400', 'message': 'Param Incorrect', 'param': 'text is not set', 'type': ''}}",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
assert result.retryable is True
|
||||
|
||||
def test_generic_tool_message_must_be_string(self):
|
||||
e = MockAPIError(
|
||||
"tool message content must be a string",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="custom", model="some-model")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_expected_string_got_list(self):
|
||||
e = MockAPIError(
|
||||
"Schema validation failed: expected string, got list",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="custom", model="some-model")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_multimodal_tool_content_takes_priority_over_context_overflow(self):
|
||||
"""Some providers return a 400 whose message contains BOTH
|
||||
'text is not set' and a length-shaped phrase; the tool-content
|
||||
recovery is cheaper than compression so it must win the priority.
|
||||
"""
|
||||
e = MockAPIError(
|
||||
"text is not set; context length exceeded",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_no_status_code_path_also_classifies(self):
|
||||
"""When the error reaches us without a status code (transport
|
||||
layer ate it) the message-only classifier branch must also
|
||||
recognise the pattern.
|
||||
"""
|
||||
e = MockTransportError("tool_call.content must be string")
|
||||
result = classify_api_error(e, provider="alibaba", model="qwen3.5-plus")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_unrelated_400_is_not_misclassified(self):
|
||||
"""Make sure the patterns don't false-positive on normal 400s."""
|
||||
e = MockAPIError("bad request: missing field 'model'", status_code=400)
|
||||
result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4")
|
||||
assert result.reason != FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
@@ -9,8 +9,11 @@ from unittest.mock import patch
|
||||
import pytest
|
||||
|
||||
from agent.image_routing import (
|
||||
_coerce_capability_bool,
|
||||
_coerce_mode,
|
||||
_explicit_aux_vision_override,
|
||||
_lookup_supports_vision,
|
||||
_supports_vision_override,
|
||||
build_native_content_parts,
|
||||
decide_image_input_mode,
|
||||
)
|
||||
@@ -125,6 +128,168 @@ class TestDecideImageInputMode:
|
||||
assert decide_image_input_mode("xiaomi", "mimo-v2.5-pro", {}) == "text"
|
||||
|
||||
|
||||
# ─── _coerce_capability_bool ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestCoerceCapabilityBool:
|
||||
def test_real_bool_passes_through(self):
|
||||
assert _coerce_capability_bool(True) is True
|
||||
assert _coerce_capability_bool(False) is False
|
||||
|
||||
def test_int_0_and_1(self):
|
||||
assert _coerce_capability_bool(1) is True
|
||||
assert _coerce_capability_bool(0) is False
|
||||
|
||||
def test_other_ints_return_none(self):
|
||||
assert _coerce_capability_bool(2) is None
|
||||
assert _coerce_capability_bool(-1) is None
|
||||
|
||||
def test_yaml_true_tokens(self):
|
||||
for s in ("true", "TRUE", "True", "yes", "on", "1", " true "):
|
||||
assert _coerce_capability_bool(s) is True
|
||||
|
||||
def test_yaml_false_tokens(self):
|
||||
for s in ("false", "FALSE", "False", "no", "off", "0", " false "):
|
||||
assert _coerce_capability_bool(s) is False
|
||||
|
||||
def test_quoted_false_does_not_silently_become_true(self):
|
||||
# Regression: bool("false") is True in Python. A user writing
|
||||
# supports_vision: "false" must NOT enable native vision routing.
|
||||
assert _coerce_capability_bool("false") is False
|
||||
|
||||
def test_unrecognised_strings_return_none(self):
|
||||
# None == fall through to models.dev, not a silent truthy.
|
||||
assert _coerce_capability_bool("maybe") is None
|
||||
assert _coerce_capability_bool("") is None
|
||||
assert _coerce_capability_bool("definitely") is None
|
||||
|
||||
def test_other_types_return_none(self):
|
||||
assert _coerce_capability_bool(None) is None
|
||||
assert _coerce_capability_bool([]) is None
|
||||
assert _coerce_capability_bool({}) is None
|
||||
assert _coerce_capability_bool(1.5) is None
|
||||
|
||||
|
||||
# ─── _supports_vision_override ───────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSupportsVisionOverride:
|
||||
def test_no_cfg_returns_none(self):
|
||||
assert _supports_vision_override(None, "custom", "my-llava") is None
|
||||
assert _supports_vision_override({}, "custom", "my-llava") is None
|
||||
|
||||
def test_top_level_shortcut_wins(self):
|
||||
cfg = {"model": {"supports_vision": True}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is True
|
||||
|
||||
def test_top_level_false_propagates(self):
|
||||
cfg = {"model": {"supports_vision": False}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is False
|
||||
|
||||
def test_per_provider_per_model_via_runtime_name(self):
|
||||
cfg = {
|
||||
"providers": {
|
||||
"custom": {"models": {"my-llava": {"supports_vision": True}}},
|
||||
},
|
||||
}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is True
|
||||
|
||||
def test_per_provider_per_model_via_config_name(self):
|
||||
# Named custom provider — runtime self.provider == "custom", config
|
||||
# holds the original name under model.provider.
|
||||
cfg = {
|
||||
"model": {"provider": "my-vllm"},
|
||||
"providers": {
|
||||
"my-vllm": {"models": {"my-llava": {"supports_vision": True}}},
|
||||
},
|
||||
}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is True
|
||||
|
||||
def test_quoted_false_string_in_yaml_does_not_enable(self):
|
||||
# Real-world: user writes supports_vision: "false" (quoted).
|
||||
cfg = {"model": {"supports_vision": "false"}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is False
|
||||
|
||||
def test_unrecognised_value_falls_through(self):
|
||||
cfg = {"model": {"supports_vision": "maybe"}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is None
|
||||
|
||||
def test_no_override_returns_none(self):
|
||||
cfg = {"model": {"default": "my-llava"}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is None
|
||||
|
||||
def test_malformed_sections_are_ignored(self):
|
||||
# User accidentally wrote a string where a section was expected —
|
||||
# don't blow up, just fall through.
|
||||
cfg = {"model": "some-string", "providers": ["not-a-dict"]}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is None
|
||||
|
||||
|
||||
# ─── _lookup_supports_vision (override-aware) ────────────────────────────────
|
||||
|
||||
|
||||
class TestLookupSupportsVisionOverride:
|
||||
def test_config_override_short_circuits_models_dev(self):
|
||||
# Config says True, models.dev says None — config wins.
|
||||
cfg = {"model": {"supports_vision": True}}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert _lookup_supports_vision("custom", "my-llava", cfg) is True
|
||||
|
||||
def test_config_override_false_beats_vision_capable_models_dev(self):
|
||||
# User explicitly disables vision on a models.dev-vision-capable model.
|
||||
fake_caps = type("Caps", (), {"supports_vision": True})()
|
||||
cfg = {"model": {"supports_vision": False}}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
|
||||
assert _lookup_supports_vision("anthropic", "claude-sonnet-4", cfg) is False
|
||||
|
||||
def test_no_override_falls_back_to_models_dev(self):
|
||||
fake_caps = type("Caps", (), {"supports_vision": True})()
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
|
||||
assert _lookup_supports_vision("anthropic", "claude-sonnet-4", {}) is True
|
||||
|
||||
def test_no_override_no_models_dev_entry_returns_none(self):
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert _lookup_supports_vision("custom", "my-llava", {}) is None
|
||||
|
||||
def test_cfg_none_falls_back_to_models_dev(self):
|
||||
# Caller didn't pass cfg at all — old call sites must still work.
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert _lookup_supports_vision("openrouter", "x", None) is None
|
||||
|
||||
|
||||
# ─── decide_image_input_mode with auto + override ────────────────────────────
|
||||
|
||||
|
||||
class TestAutoModeRespectsOverride:
|
||||
def test_auto_native_for_custom_with_supports_vision_true(self):
|
||||
# The motivating bug: Qwen3.6 on local llama.cpp via provider=custom.
|
||||
# Without the override, auto falls back to text. With it, auto picks
|
||||
# native — no need to also set agent.image_input_mode: native.
|
||||
cfg = {"model": {"supports_vision": True}}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert decide_image_input_mode("custom", "qwen3.6-35b", cfg) == "native"
|
||||
|
||||
def test_auto_text_for_custom_with_supports_vision_false(self):
|
||||
cfg = {"model": {"supports_vision": False}}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert decide_image_input_mode("custom", "some-text-only", cfg) == "text"
|
||||
|
||||
def test_auto_text_for_custom_with_no_override(self):
|
||||
# Unchanged baseline: unknown custom model → text.
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert decide_image_input_mode("custom", "unknown", {}) == "text"
|
||||
|
||||
def test_explicit_aux_vision_override_still_wins(self):
|
||||
# If the user has configured a dedicated vision aux backend, respect
|
||||
# it even when supports_vision: true is also set.
|
||||
cfg = {
|
||||
"model": {"supports_vision": True},
|
||||
"auxiliary": {"vision": {"provider": "openrouter", "model": "gemini-2.5-pro"}},
|
||||
}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert decide_image_input_mode("custom", "qwen3.6-35b", cfg) == "text"
|
||||
|
||||
|
||||
# ─── build_native_content_parts ──────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -1060,3 +1060,191 @@ class TestHonchoCadenceTracking:
|
||||
p.on_turn_start(2, "second message")
|
||||
should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
|
||||
assert should_skip, "Second turn (turn 2) SHOULD be skipped"
|
||||
|
||||
|
||||
class TestMemoryToolToolsetGate:
|
||||
"""Issue #5544: memory provider tools must respect platform_toolsets.
|
||||
|
||||
Before the fix, MemoryManager.get_all_tool_schemas() output was appended
|
||||
to AIAgent.tools unconditionally in agent_init.py — bypassing the
|
||||
enabled_toolsets filter. Result: `platform_toolsets: telegram: []`
|
||||
still leaked fact_store and other memory tools into the tool surface,
|
||||
causing 10x latency on local models (Qwen3-30B: 1.7s → 42s) and
|
||||
tool-call loops on small models.
|
||||
|
||||
These tests mirror the gate logic in agent/agent_init.py around the
|
||||
memory provider tool injection block. The gate condition is:
|
||||
|
||||
enabled_toolsets is None → no filter, inject (backward compat)
|
||||
"memory" in enabled_toolsets → user opted in, inject
|
||||
otherwise (incl. []) → skip injection
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _run_memory_injection(enabled_toolsets, memory_manager):
|
||||
"""Simulate the gated memory-tool injection block from agent_init.py."""
|
||||
tools = []
|
||||
valid_tool_names = set()
|
||||
|
||||
if memory_manager and tools is not None and (
|
||||
enabled_toolsets is None or "memory" in enabled_toolsets
|
||||
):
|
||||
_existing = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in memory_manager.get_all_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing:
|
||||
continue
|
||||
tools.append({"type": "function", "function": _schema})
|
||||
if _tname:
|
||||
valid_tool_names.add(_tname)
|
||||
_existing.add(_tname)
|
||||
|
||||
return tools, valid_tool_names
|
||||
|
||||
def _mgr_with_tools(self, *tool_names):
|
||||
"""Build a MemoryManager whose providers expose the named tool schemas."""
|
||||
mgr = MemoryManager()
|
||||
p = FakeMemoryProvider(
|
||||
"ext",
|
||||
tools=[{"name": n, "description": n, "parameters": {}} for n in tool_names],
|
||||
)
|
||||
mgr.add_provider(p)
|
||||
return mgr
|
||||
|
||||
def test_none_toolsets_injects(self):
|
||||
"""enabled_toolsets=None (no filter) injects memory tools — backward compat."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(None, mgr)
|
||||
assert "fact_store" in names
|
||||
assert any(t["function"]["name"] == "fact_store" for t in tools)
|
||||
|
||||
def test_memory_in_toolsets_injects(self):
|
||||
"""enabled_toolsets including 'memory' injects memory tools."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(["terminal", "memory", "web"], mgr)
|
||||
assert "fact_store" in names
|
||||
|
||||
def test_empty_toolsets_blocks_injection(self):
|
||||
"""`platform_toolsets: telegram: []` must suppress memory tools. (#5544)"""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection([], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_toolsets_without_memory_blocks_injection(self):
|
||||
"""Toolset list that doesn't name 'memory' must suppress injection."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(["terminal", "web"], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_no_memory_manager_no_injection(self):
|
||||
"""Gate is moot without a memory manager."""
|
||||
tools, names = self._run_memory_injection(None, None)
|
||||
assert tools == []
|
||||
|
||||
def test_multiple_schemas_all_blocked_together(self):
|
||||
"""When the gate is closed, no memory tools leak — not even partially."""
|
||||
mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
|
||||
tools, names = self._run_memory_injection(["terminal"], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_multiple_schemas_all_injected_when_enabled(self):
|
||||
"""When the gate is open, every memory tool schema is injected."""
|
||||
mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
|
||||
tools, names = self._run_memory_injection(None, mgr)
|
||||
assert names == {"fact_store", "memory_search", "memory_add"}
|
||||
|
||||
|
||||
class TestContextEngineToolsetGate:
|
||||
"""Issue #5544 (sibling): context engine tools follow the same gate.
|
||||
|
||||
`agent.context_compressor.get_tool_schemas()` (e.g. lcm_grep, lcm_describe,
|
||||
lcm_expand) was appended to AIAgent.tools unconditionally. Same blind
|
||||
injection class as the memory bug; same local-model penalty. Gate name:
|
||||
"context_engine" (matches the existing plugin-system convention).
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _run_context_engine_injection(enabled_toolsets, compressor):
|
||||
"""Simulate the gated context-engine injection block from agent_init.py."""
|
||||
tools = []
|
||||
valid_tool_names = set()
|
||||
engine_tool_names = set()
|
||||
|
||||
if (
|
||||
compressor is not None
|
||||
and tools is not None
|
||||
and (
|
||||
enabled_toolsets is None
|
||||
or "context_engine" in enabled_toolsets
|
||||
)
|
||||
):
|
||||
_existing = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in compressor.get_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing:
|
||||
continue
|
||||
tools.append({"type": "function", "function": _schema})
|
||||
if _tname:
|
||||
valid_tool_names.add(_tname)
|
||||
engine_tool_names.add(_tname)
|
||||
_existing.add(_tname)
|
||||
|
||||
return tools, valid_tool_names, engine_tool_names
|
||||
|
||||
class _FakeCompressor:
|
||||
def __init__(self, schemas):
|
||||
self._schemas = schemas
|
||||
|
||||
def get_tool_schemas(self):
|
||||
return list(self._schemas)
|
||||
|
||||
def _compressor_with(self, *tool_names):
|
||||
return self._FakeCompressor(
|
||||
[{"name": n, "description": n, "parameters": {}} for n in tool_names]
|
||||
)
|
||||
|
||||
def test_none_toolsets_injects(self):
|
||||
"""enabled_toolsets=None injects context-engine tools — backward compat."""
|
||||
c = self._compressor_with("lcm_grep", "lcm_describe", "lcm_expand")
|
||||
tools, names, engine_names = self._run_context_engine_injection(None, c)
|
||||
assert engine_names == {"lcm_grep", "lcm_describe", "lcm_expand"}
|
||||
|
||||
def test_context_engine_in_toolsets_injects(self):
|
||||
"""enabled_toolsets including 'context_engine' injects the tools."""
|
||||
c = self._compressor_with("lcm_grep")
|
||||
tools, names, engine_names = self._run_context_engine_injection(
|
||||
["terminal", "context_engine"], c
|
||||
)
|
||||
assert "lcm_grep" in engine_names
|
||||
|
||||
def test_empty_toolsets_blocks_injection(self):
|
||||
"""`platform_toolsets: telegram: []` must suppress context-engine tools."""
|
||||
c = self._compressor_with("lcm_grep")
|
||||
tools, names, engine_names = self._run_context_engine_injection([], c)
|
||||
assert tools == []
|
||||
assert engine_names == set()
|
||||
|
||||
def test_toolsets_without_context_engine_blocks_injection(self):
|
||||
"""A toolset list that doesn't name 'context_engine' suppresses injection."""
|
||||
c = self._compressor_with("lcm_grep", "lcm_describe")
|
||||
tools, names, engine_names = self._run_context_engine_injection(
|
||||
["terminal", "memory"], c
|
||||
)
|
||||
assert tools == []
|
||||
assert engine_names == set()
|
||||
|
||||
def test_no_compressor_no_injection(self):
|
||||
"""Gate is moot without a context_compressor."""
|
||||
tools, names, engine_names = self._run_context_engine_injection(None, None)
|
||||
assert tools == []
|
||||
|
||||
@@ -444,6 +444,7 @@ class TestBuildNousSubscriptionPrompt:
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
|
||||
"app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, True, True, True, False, True, "Nous Subscription"),
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -468,6 +469,7 @@ class TestBuildNousSubscriptionPrompt:
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""),
|
||||
"app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, False, False, False, False, True, ""),
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
@@ -556,10 +556,11 @@ Generate some audio.
|
||||
raising=False,
|
||||
)
|
||||
|
||||
with patch.dict(
|
||||
os.environ, {"HERMES_SESSION_PLATFORM": "telegram"}, clear=False
|
||||
):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
from gateway.session_context import clear_session_vars, set_session_vars
|
||||
|
||||
tokens = set_session_vars(platform="telegram")
|
||||
try:
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"test-skill",
|
||||
@@ -571,6 +572,8 @@ Generate some audio.
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/test-skill", "do stuff")
|
||||
finally:
|
||||
clear_session_vars(tokens)
|
||||
|
||||
assert msg is not None
|
||||
assert "local cli" in msg.lower()
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
"""Tests for agent/skill_utils.py — extract_skill_conditions metadata handling."""
|
||||
"""Tests for agent/skill_utils.py."""
|
||||
|
||||
from agent.skill_utils import extract_skill_conditions
|
||||
from unittest.mock import patch
|
||||
|
||||
from agent.skill_utils import (
|
||||
extract_skill_conditions,
|
||||
iter_skill_index_files,
|
||||
skill_matches_platform,
|
||||
)
|
||||
|
||||
|
||||
def test_metadata_as_dict_with_hermes():
|
||||
@@ -56,3 +62,138 @@ def test_metadata_missing_entirely():
|
||||
"fallback_for_tools": [],
|
||||
"requires_tools": [],
|
||||
}
|
||||
|
||||
|
||||
def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
|
||||
real = tmp_path / "real-skill"
|
||||
real.mkdir()
|
||||
(real / "SKILL.md").write_text("---\nname: real-skill\n---\n", encoding="utf-8")
|
||||
|
||||
nested = (
|
||||
tmp_path
|
||||
/ "bring"
|
||||
/ "scripts"
|
||||
/ ".venv"
|
||||
/ "lib"
|
||||
/ "python3.13"
|
||||
/ "site-packages"
|
||||
/ "typer"
|
||||
/ ".agents"
|
||||
/ "skills"
|
||||
/ "typer"
|
||||
)
|
||||
nested.mkdir(parents=True)
|
||||
(nested / "SKILL.md").write_text("---\nname: typer\n---\n", encoding="utf-8")
|
||||
|
||||
node_module = (
|
||||
tmp_path
|
||||
/ "web-skill"
|
||||
/ "node_modules"
|
||||
/ "dep"
|
||||
/ ".agents"
|
||||
/ "skills"
|
||||
/ "dep"
|
||||
)
|
||||
node_module.mkdir(parents=True)
|
||||
(node_module / "SKILL.md").write_text("---\nname: dep\n---\n", encoding="utf-8")
|
||||
|
||||
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
|
||||
|
||||
assert found == [real / "SKILL.md"]
|
||||
|
||||
|
||||
# ── skill_matches_platform on Termux ──────────────────────────────────────
|
||||
|
||||
|
||||
class TestSkillMatchesPlatformTermux:
|
||||
"""Termux is Linux userland on Android. Skills tagged platforms:[linux]
|
||||
must load there regardless of whether Python reports sys.platform as
|
||||
"linux" (pre-3.13) or "android" (3.13+). Reported by user @LikiusInik
|
||||
in May 2026 — only 3 built-in skills appeared on Termux because every
|
||||
github/productivity/mlops skill is tagged platforms:[linux,macos,windows]
|
||||
and sys.platform=="android" did not start with "linux".
|
||||
"""
|
||||
|
||||
def test_no_platforms_field_matches_everywhere(self):
|
||||
# Backward-compat default — skills without a platforms tag load
|
||||
# on any OS, Termux included.
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform({}) is True
|
||||
assert skill_matches_platform({"name": "foo"}) is True
|
||||
|
||||
def test_linux_skill_loads_on_termux_android_platform(self):
|
||||
# Python 3.13+ on Termux reports sys.platform == "android".
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_linux_macos_windows_skill_loads_on_termux(self):
|
||||
# The common "[linux, macos, windows]" tag used by github-*,
|
||||
# productivity, mlops, etc.
|
||||
fm = {"platforms": ["linux", "macos", "windows"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_linux_skill_loads_on_termux_linux_platform(self):
|
||||
# Pre-3.13 Termux reports sys.platform == "linux" already — this
|
||||
# works without the Termux escape hatch but must still pass.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "linux"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_macos_only_skill_still_excluded_on_termux(self):
|
||||
# macOS-only skills (apple-notes, imessage, ...) should NOT load
|
||||
# on Termux. The Termux fallback only widens platforms:[linux,...].
|
||||
fm = {"platforms": ["macos"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_windows_only_skill_still_excluded_on_termux(self):
|
||||
fm = {"platforms": ["windows"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_explicit_termux_or_android_tag_matches(self):
|
||||
# Skills can also opt in explicitly via platforms:[termux] or
|
||||
# platforms:[android] — both should match a Termux session.
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform({"platforms": ["termux"]}) is True
|
||||
assert skill_matches_platform({"platforms": ["android"]}) is True
|
||||
|
||||
def test_non_termux_android_does_not_widen(self):
|
||||
# If we're somehow on a plain Android Python (not Termux), don't
|
||||
# silently load Linux skills — Termux is the supported environment.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_linux_skill_on_real_linux_unaffected(self):
|
||||
# The non-Termux Linux path must not change.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "linux"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_macos_skill_on_real_macos_unaffected(self):
|
||||
fm = {"platforms": ["macos"]}
|
||||
with patch("agent.skill_utils.sys.platform", "darwin"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
@@ -46,6 +46,26 @@ class TestChatCompletionsBasic:
|
||||
assert "codex_reasoning_items" in msgs[0]
|
||||
assert "codex_message_items" in msgs[0]
|
||||
|
||||
def test_convert_messages_strips_tool_name(self, transport):
|
||||
"""Internal `tool_name` (used for FTS indexing in the SQLite store) is
|
||||
not part of the OpenAI Chat Completions schema. Strict providers like
|
||||
Moonshot/Kimi reject it with HTTP 400 'Extra inputs are not permitted'.
|
||||
"""
|
||||
msgs = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{"role": "assistant", "content": None,
|
||||
"tool_calls": [{"id": "call_1", "type": "function",
|
||||
"function": {"name": "execute_code", "arguments": "{}"}}]},
|
||||
{"role": "tool", "tool_call_id": "call_1", "tool_name": "execute_code",
|
||||
"content": "result"},
|
||||
]
|
||||
result = transport.convert_messages(msgs)
|
||||
assert "tool_name" not in result[2]
|
||||
assert result[2]["content"] == "result"
|
||||
assert result[2]["tool_call_id"] == "call_1"
|
||||
# Original list untouched (deepcopy-on-demand)
|
||||
assert msgs[2]["tool_name"] == "execute_code"
|
||||
|
||||
|
||||
class TestChatCompletionsBuildKwargs:
|
||||
|
||||
|
||||
@@ -196,14 +196,13 @@ class TestCodexBuildKwargs:
|
||||
)
|
||||
# xAI Responses receives reasoning.effort on the allowlisted models.
|
||||
assert kw.get("reasoning") == {"effort": "high"}
|
||||
# As of May 2026 we deliberately do NOT request
|
||||
# reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
|
||||
# surface rejects replayed encrypted reasoning items on turn 2+
|
||||
# (the multi-turn "Expected to have received response.created
|
||||
# before error" failure). Grok still reasons natively each turn;
|
||||
# we just don't try to thread the prior turn's encrypted blob back
|
||||
# in. See tests/run_agent/test_codex_xai_oauth_recovery.py.
|
||||
assert "reasoning.encrypted_content" not in kw.get("include", [])
|
||||
# As of May 2026 (post-revert of PR #26644) we DO request
|
||||
# reasoning.encrypted_content back from xAI so we can replay it
|
||||
# across turns for cross-turn coherence — xAI explicitly relies
|
||||
# on this for their partnership integration. See
|
||||
# tests/run_agent/test_codex_xai_oauth_recovery.py for the
|
||||
# full history.
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
|
||||
def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
@@ -229,9 +228,9 @@ class TestCodexBuildKwargs:
|
||||
# api.x.ai 400s with "Model X does not support parameter reasoningEffort"
|
||||
# on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
|
||||
# Those models reason natively but don't expose the dial. The transport
|
||||
# must omit the `reasoning` key for them. As of May 2026 we also no
|
||||
# longer request ``reasoning.encrypted_content`` back from xAI on ANY
|
||||
# model — see test_xai_reasoning_effort_passed for the rationale.
|
||||
# must omit the `reasoning` key for them. As of May 2026 we DO request
|
||||
# ``reasoning.encrypted_content`` back from xAI on every model —
|
||||
# see test_xai_reasoning_effort_passed for the rationale.
|
||||
|
||||
def test_xai_grok_4_omits_reasoning_effort(self, transport):
|
||||
"""grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
|
||||
@@ -245,9 +244,9 @@ class TestCodexBuildKwargs:
|
||||
assert "reasoning" not in kw, (
|
||||
f"{model} must not receive a reasoning key (xAI rejects it)"
|
||||
)
|
||||
# We no longer ask xAI for encrypted_content back (see comment
|
||||
# above) — verify the include list is empty.
|
||||
assert "reasoning.encrypted_content" not in kw.get("include", [])
|
||||
# Even without the effort dial we still ask xAI to echo back
|
||||
# encrypted reasoning content so it can be replayed next turn.
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
|
||||
def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
|
||||
"""grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""
|
||||
|
||||
@@ -160,30 +160,6 @@ class TestBranchCommandCLI:
|
||||
assert agent.reset_session_state.called
|
||||
assert agent._last_flushed_db_idx == 4 # len(conversation_history)
|
||||
|
||||
def test_branch_updates_agent_session_log_file(self, cli_instance, session_db, tmp_path):
|
||||
"""Branching must redirect the agent's session_log_file to the new session's path."""
|
||||
from cli import HermesCLI
|
||||
from pathlib import Path
|
||||
|
||||
logs_dir = tmp_path / "sessions"
|
||||
logs_dir.mkdir()
|
||||
|
||||
agent = MagicMock()
|
||||
agent._last_flushed_db_idx = 0
|
||||
agent.logs_dir = logs_dir
|
||||
agent.session_log_file = logs_dir / f"session_{cli_instance.session_id}.json"
|
||||
cli_instance.agent = agent
|
||||
|
||||
old_log_file = agent.session_log_file
|
||||
HermesCLI._handle_branch_command(cli_instance, "/branch")
|
||||
|
||||
new_session_id = cli_instance.session_id
|
||||
expected_log = logs_dir / f"session_{new_session_id}.json"
|
||||
assert agent.session_log_file == expected_log, (
|
||||
"session_log_file must point to the branch session, not the original"
|
||||
)
|
||||
assert agent.session_log_file != old_log_file
|
||||
|
||||
def test_branch_sets_resumed_flag(self, cli_instance, session_db):
|
||||
"""Branch should set _resumed=True to prevent auto-title generation."""
|
||||
from cli import HermesCLI
|
||||
|
||||
+34
-184
@@ -20,12 +20,9 @@ test runner at ``scripts/run_tests.sh``.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
@@ -37,6 +34,22 @@ if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
|
||||
# ── Per-file process isolation ──────────────────────────────────────────────
|
||||
# Tests run via ``scripts/run_tests_parallel.py``, which spawns a fresh
|
||||
# ``python -m pytest <file>`` subprocess per test file. Cross-file state
|
||||
# leakage (module-level dicts, ContextVars, caches) is impossible: each
|
||||
# file gets a clean Python interpreter. Intra-file ordering is the test
|
||||
# author's responsibility — if test A in foo.py mutates state that test B
|
||||
# in foo.py reads, that's a real bug to fix in the file (it would also
|
||||
# bite anyone running ``pytest tests/foo.py`` directly).
|
||||
#
|
||||
# This replaces the historic _reset_module_state autouse fixture (manual
|
||||
# state clearing) and the brief experiment with subprocess-per-test
|
||||
# isolation (too slow at ~17k tests).
|
||||
#
|
||||
# See ``scripts/run_tests_parallel.py`` for the runner.
|
||||
|
||||
|
||||
# ── Credential env-var filter ──────────────────────────────────────────────
|
||||
#
|
||||
# Any env var in the current process matching ONE of these patterns is
|
||||
@@ -279,7 +292,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
|
||||
"WECOM_HOME_CHANNEL_NAME",
|
||||
# Platform gating — set by load_gateway_config() as a side effect when
|
||||
# a config.yaml is present, so individual test bodies that call the
|
||||
# loader leak these values into later tests on the same xdist worker.
|
||||
# loader leak these values into later tests in the same process.
|
||||
# Force-clear on every test setup so the leak can't happen.
|
||||
"SLACK_REQUIRE_MENTION",
|
||||
"SLACK_STRICT_MENTION",
|
||||
@@ -368,144 +381,21 @@ def _isolate_hermes_home(_hermetic_environment):
|
||||
return None
|
||||
|
||||
|
||||
# ── Module-level state reset ───────────────────────────────────────────────
|
||||
# ── Module-level state reset — replaced by per-file process isolation ──────
|
||||
#
|
||||
# Python modules are singletons per process, and pytest-xdist workers are
|
||||
# long-lived. Module-level dicts/sets (tool registries, approval state,
|
||||
# interrupt flags) and ContextVars persist across tests in the same worker,
|
||||
# causing tests that pass alone to fail when run with siblings.
|
||||
# Each test FILE runs in a freshly-spawned ``python -m pytest <file>``
|
||||
# subprocess via ``scripts/run_tests_parallel.py``, so module-level dicts /
|
||||
# sets / ContextVars from tests in one file cannot leak into tests in
|
||||
# another file. No manual per-module clearing needed.
|
||||
#
|
||||
# Each entry in this fixture clears state that belongs to a specific module.
|
||||
# New state buckets go here too — this is the single gate that prevents
|
||||
# "works alone, flakes in CI" bugs from state leakage.
|
||||
# Within a single file, ordering is the author's responsibility. If your
|
||||
# tests in the same file share mutable state, either reset it explicitly
|
||||
# in a fixture or split them across files.
|
||||
#
|
||||
# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
|
||||
# this closes; the running example was `test_command_guards` failing 12/15
|
||||
# CI runs because ``tools.approval._session_approved`` carried approvals
|
||||
# from one test's session into another's.
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_module_state():
|
||||
"""Clear module-level mutable state and ContextVars between tests.
|
||||
|
||||
Keeps state from leaking across tests on the same xdist worker. Modules
|
||||
that don't exist yet (test collection before production import) are
|
||||
skipped silently — production import later creates fresh empty state.
|
||||
"""
|
||||
# --- logging — quiet/one-shot paths mutate process-global logger state ---
|
||||
logging.disable(logging.NOTSET)
|
||||
for _logger_name in ("tools", "run_agent", "trajectory_compressor", "cron", "hermes_cli"):
|
||||
_logger = logging.getLogger(_logger_name)
|
||||
_logger.disabled = False
|
||||
_logger.setLevel(logging.NOTSET)
|
||||
_logger.propagate = True
|
||||
|
||||
# --- tools.approval — the single biggest source of cross-test pollution ---
|
||||
try:
|
||||
from tools import approval as _approval_mod
|
||||
_approval_mod._session_approved.clear()
|
||||
_approval_mod._session_yolo.clear()
|
||||
_approval_mod._permanent_approved.clear()
|
||||
_approval_mod._pending.clear()
|
||||
_approval_mod._gateway_queues.clear()
|
||||
_approval_mod._gateway_notify_cbs.clear()
|
||||
# ContextVar: reset to empty string so get_current_session_key()
|
||||
# falls through to the env var / default path, matching a fresh
|
||||
# process.
|
||||
_approval_mod._approval_session_key.set("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.interrupt — per-thread interrupt flag set ---
|
||||
try:
|
||||
from tools import interrupt as _interrupt_mod
|
||||
with _interrupt_mod._lock:
|
||||
_interrupt_mod._interrupted_threads.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- gateway.session_context — 9 ContextVars that represent
|
||||
# the active gateway session. If set in one test and not reset,
|
||||
# the next test's get_session_env() reads stale values.
|
||||
try:
|
||||
from gateway import session_context as _sc_mod
|
||||
for _cv in (
|
||||
_sc_mod._SESSION_PLATFORM,
|
||||
_sc_mod._SESSION_CHAT_ID,
|
||||
_sc_mod._SESSION_CHAT_NAME,
|
||||
_sc_mod._SESSION_THREAD_ID,
|
||||
_sc_mod._SESSION_USER_ID,
|
||||
_sc_mod._SESSION_USER_NAME,
|
||||
_sc_mod._SESSION_KEY,
|
||||
_sc_mod._CRON_AUTO_DELIVER_PLATFORM,
|
||||
_sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
|
||||
_sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
|
||||
):
|
||||
_cv.set(_sc_mod._UNSET)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.env_passthrough — ContextVar<set[str]> with no default ---
|
||||
# LookupError is normal if the test never set it. Setting it to an
|
||||
# empty set unconditionally normalizes the starting state.
|
||||
try:
|
||||
from tools import env_passthrough as _envp_mod
|
||||
_envp_mod._allowed_env_vars_var.set(set())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.terminal_tool — active environment/cwd cache ---
|
||||
# File tools prefer a live terminal cwd when one is cached for the task.
|
||||
# Clear terminal environments between tests so a prior terminal call can't
|
||||
# override TERMINAL_CWD in path-resolution tests.
|
||||
try:
|
||||
from tools import terminal_tool as _term_mod
|
||||
_envs_to_cleanup = []
|
||||
with _term_mod._env_lock:
|
||||
_envs_to_cleanup = list(_term_mod._active_environments.values())
|
||||
_term_mod._active_environments.clear()
|
||||
_term_mod._last_activity.clear()
|
||||
_term_mod._creation_locks.clear()
|
||||
for _env in _envs_to_cleanup:
|
||||
try:
|
||||
_env.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.credential_files — ContextVar<dict> ---
|
||||
try:
|
||||
from tools import credential_files as _credf_mod
|
||||
_credf_mod._registered_files_var.set({})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- agent.auxiliary_client — runtime main provider/model override and
|
||||
# payment-error health cache. Both are process-global in production;
|
||||
# reset them per test so one worker's fallback/402 test does not make
|
||||
# later auxiliary-client tests skip otherwise-available providers.
|
||||
try:
|
||||
from agent import auxiliary_client as _aux_mod
|
||||
_aux_mod.clear_runtime_main()
|
||||
_aux_mod._reset_aux_unhealthy_cache()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.file_tools — per-task read history + file-ops cache ---
|
||||
# _read_tracker accumulates per-task_id read history for loop detection,
|
||||
# capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
|
||||
# cap is hit faster than expected and capacity-related tests flake.
|
||||
try:
|
||||
from tools import file_tools as _ft_mod
|
||||
with _ft_mod._read_tracker_lock:
|
||||
_ft_mod._read_tracker.clear()
|
||||
with _ft_mod._file_ops_lock:
|
||||
_ft_mod._file_ops_cache.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
yield
|
||||
# The skill ``test-suite-cascade-diagnosis`` documents the cascade patterns
|
||||
# this replaces; the running example was ``test_command_guards`` failing
|
||||
# 12/15 CI runs because ``tools.approval._session_approved`` carried
|
||||
# approvals from one test's session into another's.
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@@ -532,13 +422,12 @@ def mock_config():
|
||||
}
|
||||
|
||||
|
||||
# ── Global test timeout ─────────────────────────────────────────────────────
|
||||
# Kill any individual test that takes longer than 30 seconds.
|
||||
# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the
|
||||
# entire test suite.
|
||||
# ── Per-test timeout — handled by the isolation plugin ─────────────────────
|
||||
#
|
||||
# The subprocess-per-test plugin enforces the configured ``isolate_timeout``
|
||||
# ini key by terminating the child if it overruns. The old SIGALRM-based
|
||||
# fixture (POSIX-only, didn't work on Windows) is gone.
|
||||
|
||||
def _timeout_handler(signum, frame):
|
||||
raise TimeoutError("Test exceeded 30 second timeout")
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_current_event_loop(request):
|
||||
@@ -584,45 +473,6 @@ def _ensure_current_event_loop(request):
|
||||
asyncio.set_event_loop(None)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _enforce_test_timeout():
|
||||
"""Kill any individual test that takes longer than 30 seconds.
|
||||
SIGALRM is Unix-only; skip on Windows."""
|
||||
if sys.platform == "win32":
|
||||
yield
|
||||
return
|
||||
old = signal.signal(signal.SIGALRM, _timeout_handler)
|
||||
signal.alarm(30)
|
||||
yield
|
||||
signal.alarm(0)
|
||||
signal.signal(signal.SIGALRM, old)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_tool_registry_caches():
|
||||
"""Clear tool-registry-level caches between tests.
|
||||
|
||||
The production registry caches ``check_fn()`` results for 30 s
|
||||
(see tools/registry.py) and :func:`get_tool_definitions` memoizes
|
||||
its result (see model_tools.py). Both are keyed on state that tests
|
||||
routinely mutate (env vars, registry._generation, config.yaml mtime)
|
||||
— but a stale result from test A can still be served to test B
|
||||
because 30 s covers the entire suite, and xdist worker reuse means
|
||||
one test's cache lands in another's process. Clearing before every
|
||||
test keeps hermetic behavior.
|
||||
"""
|
||||
try:
|
||||
from tools.registry import invalidate_check_fn_cache
|
||||
invalidate_check_fn_cache()
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
from model_tools import _clear_tool_defs_cache
|
||||
_clear_tool_defs_cache()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# ── Live-system guard ──────────────────────────────────────────────────────
|
||||
#
|
||||
# Several test files exercise the gateway-restart / kill code paths
|
||||
|
||||
@@ -74,7 +74,6 @@ class _Codex401ThenSuccessAgent(run_agent.AIAgent):
|
||||
self._cleanup_task_resources = lambda task_id: None
|
||||
self._persist_session = lambda messages, history=None: None
|
||||
self._save_trajectory = lambda messages, user_message, completed: None
|
||||
self._save_session_log = lambda messages: None
|
||||
|
||||
def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
|
||||
type(self).refresh_attempts += 1
|
||||
|
||||
+116
-17
@@ -313,19 +313,30 @@ def _scan_for_plugin_adapter_antipattern(source: str) -> list[str]:
|
||||
return offenses
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Reject plugin-adapter tests that use the sys.path anti-pattern.
|
||||
def _fingerprint_gateway_tests() -> str:
|
||||
"""Return a short fingerprint that changes when any gateway test file changes.
|
||||
|
||||
Runs once per pytest session on the controller, BEFORE any xdist
|
||||
worker is spawned. If any file under ``tests/gateway/`` matches the
|
||||
anti-pattern, we fail the whole session with a clear message —
|
||||
before a polluted ``sys.path`` can cascade across workers.
|
||||
Uses (mtime, size) pairs instead of content hashing — fast to compute
|
||||
(stat-only, no reads) and sufficient for cache invalidation across
|
||||
per-file subprocess runs.
|
||||
"""
|
||||
# Only run on the xdist controller (or in non-xdist runs). Skip on
|
||||
# worker subprocesses so we don't scan the filesystem N times.
|
||||
if hasattr(config, "workerinput"):
|
||||
return
|
||||
import hashlib
|
||||
|
||||
h = hashlib.sha256()
|
||||
for path in sorted(_GATEWAY_DIR.rglob("test_*.py")):
|
||||
try:
|
||||
st = path.stat()
|
||||
h.update(f"{path.name}:{st.st_mtime_ns}:{st.st_size}".encode())
|
||||
except OSError:
|
||||
h.update(f"{path.name}:missing".encode())
|
||||
return h.hexdigest()[:16]
|
||||
|
||||
|
||||
def _run_adapter_antipattern_scan() -> list[str]:
|
||||
"""Scan gateway test files for the plugin-adapter anti-pattern.
|
||||
|
||||
Returns a list of violation strings (empty if clean).
|
||||
"""
|
||||
violations: list[str] = []
|
||||
for path in _GATEWAY_DIR.rglob("test_*.py"):
|
||||
if path.name in {"_plugin_adapter_loader.py", "conftest.py"}:
|
||||
@@ -334,20 +345,108 @@ def pytest_configure(config):
|
||||
source = path.read_text(encoding="utf-8")
|
||||
except OSError:
|
||||
continue
|
||||
# Fast string pre-filter: skip files that can't possibly violate.
|
||||
# A violating file MUST contain both (a) an adapter/plugins/platforms
|
||||
# reference AND (b) either sys.path manipulation or a bare adapter import.
|
||||
if "adapter" not in source and "plugins/platforms" not in source:
|
||||
continue
|
||||
if not (
|
||||
"sys.path" in source
|
||||
or "import adapter" in source
|
||||
or "from adapter import" in source
|
||||
):
|
||||
continue
|
||||
offenses = _scan_for_plugin_adapter_antipattern(source)
|
||||
if offenses:
|
||||
violations.append(
|
||||
f" {path.relative_to(_GATEWAY_DIR.parent.parent)}:\n "
|
||||
+ "\n ".join(offenses)
|
||||
)
|
||||
return violations
|
||||
|
||||
if violations:
|
||||
raise pytest.UsageError(
|
||||
"Plugin-adapter-import anti-pattern detected in gateway tests:\n"
|
||||
+ "\n".join(violations)
|
||||
+ "\n\n"
|
||||
+ _GUARD_HINT
|
||||
)
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Reject plugin-adapter tests that use the sys.path anti-pattern.
|
||||
|
||||
Runs once per pytest session on the controller, BEFORE any xdist
|
||||
worker is spawned. If any file under ``tests/gateway/`` matches the
|
||||
anti-pattern, we fail the whole session with a clear message —
|
||||
before a polluted ``sys.path`` can cascade across workers.
|
||||
|
||||
**Performance**: in the per-file subprocess isolation model (no xdist),
|
||||
every subprocess is a "controller" — so the naive scan would run 257
|
||||
times, each costing ~1s of AST walking. We avoid this with two
|
||||
strategies:
|
||||
|
||||
1. **Tight string pre-filter**: a file can only violate if it contains
|
||||
*both* an adapter/plugins/platforms reference *and* a sys.path
|
||||
manipulation or bare ``import adapter``. This drops ~95% of files
|
||||
from needing AST parsing.
|
||||
2. **File-locked cache**: the scan result is cached in
|
||||
``.pytest-cache/gw-adapter-guard-<fingerprint>`` keyed on a
|
||||
fingerprint of the gateway test file mtimes/sizes. Concurrent
|
||||
subprocesses acquire a lock; only the first performs the scan;
|
||||
the rest wait and read the cached result.
|
||||
"""
|
||||
# Only run on the xdist controller (or in non-xdist runs). Skip on
|
||||
# worker subprocesses so we don't scan the filesystem N times.
|
||||
if hasattr(config, "workerinput"):
|
||||
return
|
||||
|
||||
fp = _fingerprint_gateway_tests()
|
||||
cache_dir = Path.cwd() / ".pytest-cache"
|
||||
cache_file = cache_dir / f"gw-adapter-guard-{fp}"
|
||||
lock_file = cache_dir / f".gw-adapter-guard-{fp}.lock"
|
||||
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Evict stale cache entries from previous fingerprints (best-effort).
|
||||
try:
|
||||
for old in cache_dir.glob("gw-adapter-guard-*"):
|
||||
if old.name != f"gw-adapter-guard-{fp}":
|
||||
old.unlink(missing_ok=True)
|
||||
for old in cache_dir.glob(".gw-adapter-guard-*.lock"):
|
||||
if old.name != f".gw-adapter-guard-{fp}.lock":
|
||||
old.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass # Non-critical; old files are harmless.
|
||||
|
||||
# Use filelock to ensure only one process scans at a time.
|
||||
# Concurrent subprocesses all hit pytest_configure simultaneously;
|
||||
# without a lock they'd all find no cache and all run the scan.
|
||||
try:
|
||||
from filelock import FileLock
|
||||
lock = FileLock(str(lock_file), timeout=120)
|
||||
except ImportError:
|
||||
# Fallback: no locking (still correct, just slower under contention).
|
||||
import contextlib
|
||||
|
||||
class _NoLock:
|
||||
def __enter__(self):
|
||||
return self
|
||||
def __exit__(self, *a):
|
||||
pass
|
||||
lock = _NoLock()
|
||||
|
||||
with lock:
|
||||
if cache_file.exists():
|
||||
cached = cache_file.read_text(encoding="utf-8")
|
||||
if cached == "clean":
|
||||
return
|
||||
raise pytest.UsageError(cached)
|
||||
|
||||
# Slow path: this process is the first to acquire the lock.
|
||||
violations = _run_adapter_antipattern_scan()
|
||||
|
||||
if violations:
|
||||
msg = (
|
||||
"Plugin-adapter-import anti-pattern detected in gateway tests:\n"
|
||||
+ "\n".join(violations)
|
||||
+ "\n\n"
|
||||
+ _GUARD_HINT
|
||||
)
|
||||
cache_file.write_text(msg, encoding="utf-8")
|
||||
raise pytest.UsageError(msg)
|
||||
else:
|
||||
cache_file.write_text("clean", encoding="utf-8")
|
||||
|
||||
|
||||
@@ -1,31 +1,88 @@
|
||||
"""Yuanbao recall: branch A (content-match) works against DB-only transcripts."""
|
||||
"""Yuanbao recall: branch A1 (exact id) and A2 (content-match) against DB-only transcripts.
|
||||
|
||||
state.db persists the platform-side ``message_id`` via the
|
||||
``platform_message_id`` column (added in the salvage of PR #29211) and
|
||||
``load_transcript`` surfaces it back on each message dict as ``message_id``
|
||||
— so the recall guard's exact-id match path stays canonical even with the
|
||||
JSONL file gone. When a row has no platform id (e.g. agent-processed
|
||||
@bot messages whose adapter didn't carry a msg_id, or pre-column legacy
|
||||
rows), recall falls through to content-match.
|
||||
"""
|
||||
from gateway.session import SessionStore
|
||||
from gateway.config import GatewayConfig
|
||||
|
||||
|
||||
def test_recall_content_match_finds_target_in_db_transcript(tmp_path, monkeypatch):
|
||||
"""state.db doesn't preserve message_id, so recall uses content-match.
|
||||
|
||||
Pin DEFAULT_DB_PATH to tmp_path so SessionDB() can't write to the real
|
||||
~/.hermes/state.db. (Module-level constant snapshot, see test_load_transcript_db_only.)
|
||||
"""
|
||||
def _pin_db(monkeypatch, tmp_path):
|
||||
"""Force SessionDB() to write into tmp_path instead of the real ~/.hermes."""
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
|
||||
|
||||
def test_recall_branch_a1_exact_id_match_round_trips_through_db(tmp_path, monkeypatch):
|
||||
"""A user message persisted with ``message_id`` must round-trip through
|
||||
state.db so recall can find and redact it by exact id (branch A1)."""
|
||||
_pin_db(monkeypatch, tmp_path)
|
||||
|
||||
config = GatewayConfig()
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
|
||||
sid = "test-yuanbao-recall"
|
||||
sid = "test-yuanbao-recall-a1"
|
||||
store._db.create_session(session_id=sid, source="yuanbao:group:G")
|
||||
store.append_to_transcript(sid, {"role": "user", "content": "sensitive content", "timestamp": 1.0})
|
||||
store.append_to_transcript(sid, {"role": "assistant", "content": "ack", "timestamp": 2.0})
|
||||
store.append_to_transcript(sid, {
|
||||
"role": "user",
|
||||
"content": "sensitive content",
|
||||
"timestamp": 1.0,
|
||||
"message_id": "platform-msg-abc",
|
||||
})
|
||||
store.append_to_transcript(sid, {
|
||||
"role": "assistant",
|
||||
"content": "ack",
|
||||
"timestamp": 2.0,
|
||||
})
|
||||
|
||||
# DB-only history carries no platform message_id (PR #29211 dropped that path).
|
||||
history = store.load_transcript(sid)
|
||||
assert all("message_id" not in msg for msg in history)
|
||||
# The user row must carry its platform id back so the recall guard can
|
||||
# match by exact id; the assistant row had no platform id so it should
|
||||
# not gain one spuriously.
|
||||
user_msg = next(m for m in history if m["role"] == "user")
|
||||
assistant_msg = next(m for m in history if m["role"] == "assistant")
|
||||
assert user_msg.get("message_id") == "platform-msg-abc"
|
||||
assert "message_id" not in assistant_msg
|
||||
|
||||
# Branch A: content match finds the target row that recall would redact.
|
||||
target = next((m for m in history
|
||||
if m.get("role") == "user" and m.get("content") == "sensitive content"), None)
|
||||
# Branch A1: locate the row by exact platform id — no content heuristics.
|
||||
target = next(
|
||||
(m for m in history if m.get("message_id") == "platform-msg-abc"),
|
||||
None,
|
||||
)
|
||||
assert target is not None
|
||||
assert target["content"] == "sensitive content"
|
||||
|
||||
|
||||
def test_recall_branch_a2_content_match_when_no_platform_id(tmp_path, monkeypatch):
|
||||
"""Rows that lack a platform_message_id (e.g. agent-processed @bot
|
||||
messages) still match by content as a fallback."""
|
||||
_pin_db(monkeypatch, tmp_path)
|
||||
|
||||
config = GatewayConfig()
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
|
||||
sid = "test-yuanbao-recall-a2"
|
||||
store._db.create_session(session_id=sid, source="yuanbao:group:G")
|
||||
# No message_id on the dict — simulates an agent-processed message
|
||||
# that did not carry the platform msg_id through.
|
||||
store.append_to_transcript(sid, {
|
||||
"role": "user",
|
||||
"content": "sensitive content",
|
||||
"timestamp": 1.0,
|
||||
})
|
||||
|
||||
history = store.load_transcript(sid)
|
||||
assert all("message_id" not in m for m in history)
|
||||
|
||||
# Branch A2: content match recovers the target.
|
||||
target = next(
|
||||
(m for m in history
|
||||
if m.get("role") == "user" and m.get("content") == "sensitive content"),
|
||||
None,
|
||||
)
|
||||
assert target is not None
|
||||
# Caller would then redact: target["content"] = REDACTED; store.rewrite_transcript(sid, history)
|
||||
|
||||
@@ -22,19 +22,26 @@ from gateway.config import PlatformConfig
|
||||
|
||||
|
||||
def _ensure_telegram_mock():
|
||||
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
|
||||
return
|
||||
|
||||
telegram_mod = MagicMock()
|
||||
telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
|
||||
telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
|
||||
telegram_mod.constants.ChatType.GROUP = "group"
|
||||
telegram_mod.constants.ChatType.SUPERGROUP = "supergroup"
|
||||
telegram_mod.constants.ChatType.CHANNEL = "channel"
|
||||
telegram_mod.constants.ChatType.PRIVATE = "private"
|
||||
|
||||
for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
|
||||
sys.modules.setdefault(name, telegram_mod)
|
||||
# Register telegram.constants as a separate module mock so that
|
||||
# ``from telegram.constants import ChatType`` resolves to our mock
|
||||
# with string-valued members (not auto-generated MagicMocks).
|
||||
constants_mod = MagicMock()
|
||||
constants_mod.ParseMode.MARKDOWN_V2 = "MarkdownV2"
|
||||
constants_mod.ChatType.GROUP = "group"
|
||||
constants_mod.ChatType.SUPERGROUP = "supergroup"
|
||||
constants_mod.ChatType.CHANNEL = "channel"
|
||||
constants_mod.ChatType.PRIVATE = "private"
|
||||
|
||||
sys.modules["telegram"] = telegram_mod
|
||||
sys.modules["telegram.ext"] = telegram_mod.ext
|
||||
sys.modules["telegram.constants"] = constants_mod
|
||||
sys.modules["telegram.request"] = telegram_mod.request
|
||||
|
||||
# Force reimport so the adapter picks up the mock ChatType.
|
||||
sys.modules.pop("gateway.platforms.telegram", None)
|
||||
|
||||
|
||||
_ensure_telegram_mock()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user