Compare commits
88 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 984e6cb5b8 | |||
| a7cd254c29 | |||
| 4d58e48cdb | |||
| bec2250d2c | |||
| e02a7e5e1c | |||
| 5ce5fe3181 | |||
| 531efe7208 | |||
| 2a474bcf72 | |||
| 6dbbf20ff4 | |||
| 5aa4727f34 | |||
| 4cc18877c6 | |||
| 3fde8c153d | |||
| 3462b097e2 | |||
| 552e9c7881 | |||
| 18cd1e5c72 | |||
| 0ce12a9241 | |||
| 56b79f12ac | |||
| 3d2f146460 | |||
| 2e3f576298 | |||
| 2ea7cf287e | |||
| ba9964ff0d | |||
| 2fdefca570 | |||
| 48be2e0e4d | |||
| 87d9239009 | |||
| c3a09f7835 | |||
| 6c26727bb3 | |||
| 5edb346c75 | |||
| f722ec723f | |||
| be0728cacc | |||
| 975e13091e | |||
| 32aea113f0 | |||
| 1c76689b28 | |||
| 24c7ce0fb8 | |||
| b4afc6546e | |||
| 127b56a61a | |||
| 4ead464f97 | |||
| 3bbe980115 | |||
| a9db0e2c74 | |||
| c6a992e3e3 | |||
| 9514ddbee2 | |||
| 59088228f6 | |||
| 5672772dab | |||
| b9b6e034d5 | |||
| 1566d71726 | |||
| f7441f9c42 | |||
| c42edd8055 | |||
| 8ad34db551 | |||
| c6a380eb6c | |||
| 8f92327891 | |||
| fc7e04e9ed | |||
| 3ce1cf2bb7 | |||
| 1a7bb988fc | |||
| 2a352f96ee | |||
| 31a0100104 | |||
| 0cc1a1d2d9 | |||
| c634c07bcc | |||
| 33a3cf5322 | |||
| b4b118c201 | |||
| 351fdcc6e6 | |||
| 971cfaa38c | |||
| 024a8e3ee9 | |||
| 1d27be0ff3 | |||
| 4d2df86281 | |||
| 57a61057f5 | |||
| 419910ee21 | |||
| fee88105f9 | |||
| 27506cc02d | |||
| 88f5186d35 | |||
| eeb747de25 | |||
| 6fc1989a5d | |||
| b6c6f650ee | |||
| 6f1a5f8597 | |||
| 9d793e8e58 | |||
| cebd480818 | |||
| c547392fd4 | |||
| ce26785187 | |||
| c29b4f55d9 | |||
| ef43938e2b | |||
| ca192cfb77 | |||
| 5af4b73f87 | |||
| 12842d32ce | |||
| 9ff98daf71 | |||
| a8a05c8ea7 | |||
| b4ba42550c | |||
| 6f3a020e62 | |||
| edb2d91057 | |||
| 42c4288411 | |||
| 258965663c |
@@ -27,9 +27,9 @@ on:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets its
|
||||
# own SHA-tagged image; :main and :latest are guarded separately by the
|
||||
# move-main and move-latest jobs. PR runs reuse a PR-scoped group with
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets
|
||||
# its own :main or release-tagged image. :latest is guarded separately
|
||||
# by the move-latest job. PR runs reuse a PR-scoped group with
|
||||
# cancel-in-progress: true so rapid pushes to the same PR collapse to the
|
||||
# latest commit.
|
||||
concurrency:
|
||||
@@ -92,10 +92,10 @@ jobs:
|
||||
# pattern for multi-runner multi-platform builds.
|
||||
#
|
||||
# We apply the OCI revision label here (and again on arm64) because
|
||||
# the move-main / move-latest jobs read it off the linux/amd64
|
||||
# sub-manifest config of the floating tag to decide whether it's safe
|
||||
# to advance. The label must be on each per-arch image — manifest
|
||||
# lists themselves don't carry image config labels.
|
||||
# the move-latest job reads it off the linux/amd64 sub-manifest
|
||||
# config of the floating tag to decide whether it's safe to advance.
|
||||
# The label must be on each per-arch image — manifest lists themselves
|
||||
# don't carry image config labels.
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
@@ -208,8 +208,14 @@ jobs:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stitch both per-arch digests into a single tagged multi-arch manifest.
|
||||
# This is a registry-side operation — no building, no layer re-push —
|
||||
# so it runs in ~30 seconds. On main pushes it produces :sha-<sha>.
|
||||
# On releases it produces :<release_tag_name>.
|
||||
# so it runs in ~30 seconds. On main pushes it produces :main; on
|
||||
# releases it produces :<release_tag_name>.
|
||||
#
|
||||
# For main pushes the ancestor check runs BEFORE the manifest push so
|
||||
# we never overwrite :main with an older commit. The top-level
|
||||
# concurrency group (`docker-${{ github.ref }}` with
|
||||
# `cancel-in-progress: false`) already serialises runs per ref; the
|
||||
# ancestor check is defense-in-depth.
|
||||
# ---------------------------------------------------------------------------
|
||||
merge:
|
||||
if: github.repository == 'NousResearch/hermes-agent' && (github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release')
|
||||
@@ -217,10 +223,15 @@ jobs:
|
||||
needs: [build-amd64, build-arm64]
|
||||
timeout-minutes: 10
|
||||
outputs:
|
||||
pushed_sha_tag: ${{ steps.mark_pushed.outputs.pushed }}
|
||||
pushed_release_tag: ${{ steps.mark_release_pushed.outputs.pushed }}
|
||||
release_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
@@ -237,120 +248,19 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Compute the tag for this run. Main pushes use sha-<sha> (so every
|
||||
# commit gets its own immutable tag); releases use the release tag name.
|
||||
- name: Compute tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=sha-${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Build the arg array from each digest file (filename = the digest
|
||||
# hex, with no sha256: prefix; empty file content, only the name
|
||||
# matters). Using an array avoids shellcheck SC2046 and keeps
|
||||
# every digest a single argv token even under pathological names.
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
docker buildx imagetools create \
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
# Signal to move-main that the SHA tag is live. Only on main pushes;
|
||||
# releases set pushed_release_tag instead.
|
||||
- name: Mark SHA tag pushed
|
||||
id: mark_pushed
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Signal to move-latest that the release tag is live.
|
||||
- name: Mark release tag pushed
|
||||
id: mark_release_pushed
|
||||
if: github.event_name == 'release'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :main to point at the SHA tag the merge job pushed.
|
||||
#
|
||||
# :main is the floating tag that tracks the tip of the main branch. Every
|
||||
# merge to main retags :main forward. Users who want "latest dev build"
|
||||
# pull :main; users who want stable releases pull :latest.
|
||||
#
|
||||
# The real serialization guarantee comes from the top-level concurrency
|
||||
# group (`docker-${{ github.ref }}` with `cancel-in-progress: false`),
|
||||
# which ensures at most one workflow run for this ref executes at a time.
|
||||
# That means two move-main steps for the same ref cannot overlap.
|
||||
#
|
||||
# This job has its own concurrency group as defense-in-depth: if the
|
||||
# top-level group is ever loosened, queued move-mains will run serially
|
||||
# in arrival order, each one running the ancestor check below and either
|
||||
# advancing :main or skipping. `cancel-in-progress: false` matches the
|
||||
# top-level setting — we don't want rapid pushes to cancel a queued
|
||||
# move-main, because the ancestor check is the real safety mechanism
|
||||
# and queueing is cheap (move-main is a ~30s registry op).
|
||||
#
|
||||
# Combined with the ancestor check, this means :main only ever moves
|
||||
# forward in git history.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-main:
|
||||
if: |
|
||||
github.repository == 'NousResearch/hermes-agent'
|
||||
&& github.event_name == 'push'
|
||||
&& github.ref == 'refs/heads/main'
|
||||
&& needs.merge.outputs.pushed_sha_tag == 'true'
|
||||
needs: merge
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
concurrency:
|
||||
group: docker-move-main-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Read the git revision label off the current :main manifest, then
|
||||
# use `git merge-base --is-ancestor` to check whether our commit is a
|
||||
# descendant of it. If :main doesn't exist yet, or its label is
|
||||
# missing, we treat that as "safe to publish". If another run already
|
||||
# advanced :main past us (or diverged), we skip and leave it alone.
|
||||
# use `git merge-base --is-ancestor` to check whether our commit is
|
||||
# a descendant of it. If :main doesn't exist yet, or its label is
|
||||
# missing, we treat that as "safe to publish". If another run
|
||||
# already advanced :main past us (or diverged), we skip and leave
|
||||
# it alone.
|
||||
- name: Decide whether to move :main
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
id: main_check
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
|
||||
# Pull the JSON for the linux/amd64 sub-manifest's config and extract
|
||||
# the OCI revision label with jq — Go template field access can't
|
||||
# handle dots in map keys, so using json+jq is the robust route.
|
||||
image_json=$(
|
||||
docker buildx imagetools inspect "${image}:main" \
|
||||
--format '{{ json (index .Image "linux/amd64") }}' \
|
||||
@@ -383,7 +293,6 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Make sure we have the :main commit locally for merge-base.
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
git fetch --no-tags --prune origin \
|
||||
"+refs/heads/main:refs/remotes/origin/main" \
|
||||
@@ -396,7 +305,6 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Our SHA must be a descendant of the current :main to be safe.
|
||||
if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then
|
||||
echo "Our commit is a descendant of :main — safe to advance."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
@@ -405,19 +313,48 @@ jobs:
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Retag the already-pushed SHA manifest as :main. This is a registry-
|
||||
# side operation — no rebuild, no layer re-push — so it's quick and
|
||||
# atomic per-tag. The ancestor check above plus the cancel-in-progress
|
||||
# concurrency on this job together guarantee we only ever move :main
|
||||
# forward in git history.
|
||||
- name: Move :main to this SHA
|
||||
if: steps.main_check.outputs.push_main == 'true'
|
||||
# Compute the tag for this run. Main pushes tag directly as :main
|
||||
# (no per-commit SHA tags); releases use the release tag name.
|
||||
- name: Compute tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=main" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Gate the manifest push on the ancestor check for main pushes.
|
||||
# For releases there is no gate — the check doesn't even run.
|
||||
- name: Create manifest list and push
|
||||
if: github.event_name != 'push' || steps.main_check.outputs.push_main == 'true'
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
docker buildx imagetools create \
|
||||
--tag "${image}:main" \
|
||||
"${image}:sha-${GITHUB_SHA}"
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
- name: Inspect image
|
||||
if: github.event_name != 'push' || steps.main_check.outputs.push_main == 'true'
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
# Signal to move-latest that the release tag is live.
|
||||
- name: Mark release tag pushed
|
||||
id: mark_release_pushed
|
||||
if: github.event_name == 'release'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :latest to point at the release tag the merge job pushed.
|
||||
@@ -427,10 +364,10 @@ jobs:
|
||||
#
|
||||
# We still run an ancestor check against the existing :latest so that a
|
||||
# backport release on an older branch (e.g. patching v1.1.5 after v1.2.3
|
||||
# is out) doesn't drag :latest backwards. The check is the same shape as
|
||||
# move-main: read the OCI revision label off the current :latest, look up
|
||||
# that commit in git, and only advance if our release commit is a strict
|
||||
# descendant.
|
||||
# is out) doesn't drag :latest backwards. The check is the same shape
|
||||
# as the ancestor check in the merge job for :main: read the OCI
|
||||
# revision label off the current :latest, look up that commit in git,
|
||||
# and only advance if our release commit is a strict descendant.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-latest:
|
||||
if: |
|
||||
|
||||
@@ -23,13 +23,24 @@ concurrency:
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
|
||||
rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
@@ -44,9 +55,26 @@ jobs:
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Run tests
|
||||
# Per-file isolation via scripts/run_tests_parallel.py: discovers
|
||||
# every test_*.py file under tests/ (excluding integration/ + e2e/),
|
||||
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
|
||||
# with bounded parallelism. No xdist, no shared workers, no
|
||||
# module-level state leakage between files.
|
||||
#
|
||||
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
|
||||
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
|
||||
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
|
||||
# every file a fresh interpreter — the only isolation boundary
|
||||
# that matters in practice (cross-file leakage was the original
|
||||
# flake source; intra-file is the test author's responsibility).
|
||||
#
|
||||
# Why drop xdist entirely: xdist's persistent workers accumulate
|
||||
# state across files, which is exactly the leakage we wanted to
|
||||
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
|
||||
# the job with cleaner semantics.
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto --timeout=30 --timeout-method=signal
|
||||
python scripts/run_tests_parallel.py
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
@@ -60,8 +88,19 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
|
||||
rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
@@ -18,6 +18,7 @@ __pycache__/web_tools.cpython-310.pyc
|
||||
logs/
|
||||
data/
|
||||
.pytest_cache/
|
||||
.pytest-cache/
|
||||
tmp/
|
||||
temp_vision_images/
|
||||
hermes-*/*
|
||||
|
||||
@@ -1013,17 +1013,39 @@ def profile_env(tmp_path, monkeypatch):
|
||||
|
||||
**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
|
||||
hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
|
||||
4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
|
||||
developer machine with API keys set diverges from CI in ways that have caused
|
||||
multiple "works locally, fails in CI" incidents (and the reverse).
|
||||
`-n auto` xdist workers, in-tree subprocess-isolation plugin). Direct `pytest`
|
||||
on a 16+ core developer machine with API keys set diverges from CI in ways
|
||||
that have caused multiple "works locally, fails in CI" incidents (and the reverse).
|
||||
|
||||
```bash
|
||||
scripts/run_tests.sh # full suite, CI-parity
|
||||
scripts/run_tests.sh tests/gateway/ # one directory
|
||||
scripts/run_tests.sh tests/agent/test_foo.py::test_x # one test
|
||||
scripts/run_tests.sh -v --tb=long # pass-through pytest flags
|
||||
scripts/run_tests.sh --no-isolate tests/foo/ # disable subprocess isolation (faster, for debugging)
|
||||
```
|
||||
|
||||
### Subprocess-per-test isolation
|
||||
|
||||
Every test runs in a freshly-spawned Python subprocess via the in-tree plugin
|
||||
at `tests/_isolate_plugin.py`. This means module-level dicts/sets and
|
||||
ContextVars from one test cannot leak into the next — the historic
|
||||
`_reset_module_state` autouse fixture is gone.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- The plugin uses `multiprocessing.get_context("spawn")`, which works on
|
||||
Linux, macOS, and Windows alike (POSIX `fork` is not used).
|
||||
- Per-test overhead is ~0.5–1.0s (Python startup + pytest collection). xdist
|
||||
parallelism amortizes this across cores; on a 20-core box the full suite
|
||||
finishes in roughly the same wall time as before, but flake-free.
|
||||
- `isolate_timeout` (configured in `pyproject.toml`) caps each test at 30s.
|
||||
Hangs are killed and surfaced as a failure report.
|
||||
- Pass `--no-isolate` to disable isolation — useful when debugging a single
|
||||
test interactively, or when you specifically want to verify state leakage.
|
||||
- The plugin disables itself in child processes (sentinel envvar
|
||||
`HERMES_ISOLATE_CHILD=1`), so there's no fork-bomb risk.
|
||||
|
||||
### Why the wrapper (and why the old "just call pytest" doesn't work)
|
||||
|
||||
Five real sources of local-vs-CI drift the script closes:
|
||||
@@ -1034,7 +1056,7 @@ Five real sources of local-vs-CI drift the script closes:
|
||||
| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
|
||||
| Timezone | Local TZ (PDT etc.) | UTC |
|
||||
| Locale | Whatever is set | C.UTF-8 |
|
||||
| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
|
||||
| xdist workers | `-n auto` = all cores | `-n auto` (safe — subprocess isolation prevents cross-worker flakes) |
|
||||
|
||||
`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
|
||||
invocation (including IDE integrations) gets hermetic behavior — but the wrapper
|
||||
@@ -1042,15 +1064,21 @@ is belt-and-suspenders.
|
||||
|
||||
### Running without the wrapper (only if you must)
|
||||
|
||||
If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
If you can't use the wrapper (e.g. inside an IDE that shells pytest directly),
|
||||
at minimum activate the venv. The isolation plugin loads automatically from
|
||||
`addopts` in `pyproject.toml`, so you get the same per-test process isolation
|
||||
either way.
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
Worker count above 4 will surface test-ordering flakes that CI never sees.
|
||||
If you need to bypass isolation for fast feedback while debugging:
|
||||
|
||||
```bash
|
||||
python -m pytest tests/agent/test_foo.py -q --no-isolate
|
||||
```
|
||||
|
||||
Always run the full suite before pushing changes.
|
||||
|
||||
|
||||
+2
-2
@@ -210,7 +210,7 @@ hermes-agent/
|
||||
| `~/.hermes/skills/` | All active skills (bundled + hub-installed + agent-created) |
|
||||
| `~/.hermes/memories/` | Persistent memory (MEMORY.md, USER.md) |
|
||||
| `~/.hermes/state.db` | SQLite session database |
|
||||
| `~/.hermes/sessions/` | JSON session logs |
|
||||
| `~/.hermes/sessions/` | Gateway routing index (`sessions.json`), request-dump breadcrumbs, gateway `*.jsonl` transcripts, and (optionally) per-session JSON snapshots when `sessions.write_json_snapshots: true` is set. The per-session snapshots are off by default; state.db is canonical. |
|
||||
| `~/.hermes/cron/` | Scheduled job data |
|
||||
| `~/.hermes/whatsapp/session/` | WhatsApp bridge credentials |
|
||||
|
||||
@@ -239,7 +239,7 @@ User message → AIAgent._run_agent_loop()
|
||||
|
||||
- **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules.
|
||||
- **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform.
|
||||
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. JSON logs go to `~/.hermes/sessions/`.
|
||||
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. Per-session JSON snapshots in `~/.hermes/sessions/` were superseded by the SQLite store and are off by default; opt back in with `sessions.write_json_snapshots: true` if you have external tooling that consumes the JSON files directly.
|
||||
- **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs.
|
||||
- **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint).
|
||||
- **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests.
|
||||
|
||||
+79
-1
@@ -71,6 +71,71 @@ def _ra():
|
||||
return run_agent
|
||||
|
||||
|
||||
def _normalized_custom_base_url(value: Any) -> str:
|
||||
if not isinstance(value, str):
|
||||
return ""
|
||||
return value.strip().rstrip("/")
|
||||
|
||||
|
||||
def _custom_provider_model_matches(agent_model: str, entry: Dict[str, Any]) -> bool:
|
||||
provider_model = str(entry.get("model", "") or "").strip().lower()
|
||||
if not provider_model:
|
||||
return True
|
||||
return provider_model == str(agent_model or "").strip().lower()
|
||||
|
||||
|
||||
def _custom_provider_extra_body_for_agent(
|
||||
*,
|
||||
provider: str,
|
||||
model: str,
|
||||
base_url: str,
|
||||
custom_providers: List[Dict[str, Any]],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
if (provider or "").strip().lower() != "custom":
|
||||
return None
|
||||
|
||||
target_url = _normalized_custom_base_url(base_url)
|
||||
if not target_url:
|
||||
return None
|
||||
|
||||
fallback: Optional[Dict[str, Any]] = None
|
||||
for entry in custom_providers or []:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if _normalized_custom_base_url(entry.get("base_url")) != target_url:
|
||||
continue
|
||||
extra_body = entry.get("extra_body")
|
||||
if not isinstance(extra_body, dict) or not extra_body:
|
||||
continue
|
||||
provider_model = str(entry.get("model", "") or "").strip()
|
||||
if provider_model:
|
||||
if _custom_provider_model_matches(model, entry):
|
||||
return dict(extra_body)
|
||||
elif fallback is None:
|
||||
fallback = dict(extra_body)
|
||||
|
||||
return fallback
|
||||
|
||||
|
||||
def _merge_custom_provider_extra_body(agent, custom_providers: List[Dict[str, Any]]) -> None:
|
||||
extra_body = _custom_provider_extra_body_for_agent(
|
||||
provider=agent.provider,
|
||||
model=agent.model,
|
||||
base_url=agent.base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if not extra_body:
|
||||
return
|
||||
|
||||
overrides = dict(getattr(agent, "request_overrides", {}) or {})
|
||||
merged_extra_body = dict(extra_body)
|
||||
existing_extra_body = overrides.get("extra_body")
|
||||
if isinstance(existing_extra_body, dict):
|
||||
merged_extra_body.update(existing_extra_body)
|
||||
overrides["extra_body"] = merged_extra_body
|
||||
agent.request_overrides = overrides
|
||||
|
||||
|
||||
def init_agent(
|
||||
agent,
|
||||
base_url: str = None,
|
||||
@@ -901,7 +966,19 @@ def init_agent(
|
||||
hermes_home = get_hermes_home()
|
||||
agent.logs_dir = hermes_home / "sessions"
|
||||
agent.logs_dir.mkdir(parents=True, exist_ok=True)
|
||||
agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
|
||||
# Per-session JSON snapshot writer (~/.hermes/sessions/session_{sid}.json)
|
||||
# is opt-in via sessions.write_json_snapshots (default False). state.db
|
||||
# is canonical — the snapshot is only useful for external tooling that
|
||||
# reads the JSON files directly. See run_agent._save_session_log.
|
||||
agent._session_json_enabled = False
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_sess_cfg
|
||||
_sess_cfg = (_load_sess_cfg().get("sessions") or {})
|
||||
agent._session_json_enabled = bool(_sess_cfg.get("write_json_snapshots", False))
|
||||
except Exception:
|
||||
pass
|
||||
# logs_dir is retained unconditionally for request_dump_*.json (debug
|
||||
# breadcrumb path written by agent_runtime_helpers.dump_api_request_debug).
|
||||
|
||||
# Track conversation messages for session logging
|
||||
agent._session_messages: List[Dict[str, Any]] = []
|
||||
@@ -1201,6 +1278,7 @@ def init_agent(
|
||||
# Store for reuse by _check_compression_model_feasibility (auxiliary
|
||||
# compression model context-length detection needs the same list).
|
||||
agent._custom_providers = _custom_providers
|
||||
_merge_custom_provider_extra_body(agent, _custom_providers)
|
||||
|
||||
# Check custom_providers per-model context_length
|
||||
if _config_context_length is None and _custom_providers:
|
||||
|
||||
@@ -1869,6 +1869,77 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No
|
||||
|
||||
|
||||
|
||||
def _iter_pool_sockets(client: Any):
|
||||
"""Yield raw sockets reachable from an OpenAI/httpx client pool.
|
||||
|
||||
httpcore 1.x stores the concrete HTTP11/HTTP2 connection under
|
||||
``conn._connection``; older versions exposed stream attributes directly
|
||||
on the pool entry. Keep the traversal defensive because these are private
|
||||
transport internals and vary across httpx/httpcore releases.
|
||||
"""
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
seen: set[int] = set()
|
||||
for conn in list(connections):
|
||||
candidates = [conn]
|
||||
inner = getattr(conn, "_connection", None)
|
||||
if inner is not None:
|
||||
candidates.append(inner)
|
||||
for candidate in candidates:
|
||||
stream = (
|
||||
getattr(candidate, "_network_stream", None)
|
||||
or getattr(candidate, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
get_extra_info = getattr(stream, "get_extra_info", None)
|
||||
if callable(get_extra_info):
|
||||
try:
|
||||
sock = get_extra_info("socket")
|
||||
except Exception:
|
||||
sock = None
|
||||
if sock is None:
|
||||
wrapped = getattr(stream, "stream", None)
|
||||
if wrapped is not None:
|
||||
sock = getattr(wrapped, "_sock", None)
|
||||
if sock is None:
|
||||
# anyio-backed streams expose the raw socket through
|
||||
# SocketAttribute.raw_socket when available.
|
||||
wrapped = getattr(stream, "_stream", None)
|
||||
extra = getattr(wrapped, "extra", None)
|
||||
if callable(extra):
|
||||
try:
|
||||
from anyio.abc import SocketAttribute
|
||||
sock = extra(SocketAttribute.raw_socket)
|
||||
except Exception:
|
||||
sock = None
|
||||
if sock is None:
|
||||
continue
|
||||
marker = id(sock)
|
||||
if marker in seen:
|
||||
continue
|
||||
seen.add(marker)
|
||||
yield sock
|
||||
|
||||
|
||||
def cleanup_dead_connections(agent) -> bool:
|
||||
"""Detect and clean up dead TCP connections on the primary client.
|
||||
|
||||
@@ -1882,36 +1953,8 @@ def cleanup_dead_connections(agent) -> bool:
|
||||
if client is None:
|
||||
return False
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return False
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return False
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return False
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
dead_count = 0
|
||||
for conn in list(connections):
|
||||
# Check for connections that are idle but have closed sockets
|
||||
stream = (
|
||||
getattr(conn, "_network_stream", None)
|
||||
or getattr(conn, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
sock = getattr(stream, "stream", None)
|
||||
if sock is not None:
|
||||
sock = getattr(sock, "_sock", None)
|
||||
if sock is None:
|
||||
continue
|
||||
for sock in _iter_pool_sockets(client):
|
||||
# Probe socket health with a non-blocking recv peek
|
||||
import socket as _socket
|
||||
try:
|
||||
@@ -2087,36 +2130,7 @@ def force_close_tcp_sockets(client: Any) -> int:
|
||||
|
||||
closed = 0
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return 0
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return 0
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return 0
|
||||
# httpx uses httpcore connection pools; connections live in
|
||||
# _connections (list) or _pool (list) depending on version.
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
for conn in list(connections):
|
||||
stream = (
|
||||
getattr(conn, "_network_stream", None)
|
||||
or getattr(conn, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
sock = getattr(stream, "stream", None)
|
||||
if sock is not None:
|
||||
sock = getattr(sock, "_sock", None)
|
||||
if sock is None:
|
||||
continue
|
||||
for sock in _iter_pool_sockets(client):
|
||||
try:
|
||||
sock.shutdown(_socket.SHUT_RDWR)
|
||||
except OSError:
|
||||
@@ -2154,5 +2168,6 @@ __all__ = [
|
||||
"cleanup_dead_connections",
|
||||
"extract_api_error_context",
|
||||
"apply_pending_steer_to_tool_results",
|
||||
"_iter_pool_sockets",
|
||||
"force_close_tcp_sockets",
|
||||
]
|
||||
|
||||
@@ -390,6 +390,9 @@ def _run_review_in_thread(
|
||||
# parent below so memory(action="add") writes from
|
||||
# the review still land on disk; the review just
|
||||
# has zero side effects on external providers.
|
||||
# Match parent's toolset config so ``tools[]`` is byte-identical
|
||||
# in the request body — Anthropic's cache key includes it.
|
||||
# (The runtime whitelist below still restricts dispatch.)
|
||||
review_agent = AIAgent(
|
||||
model=agent.model,
|
||||
max_iterations=16,
|
||||
@@ -401,6 +404,8 @@ def _run_review_in_thread(
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(agent, "_credential_pool", None),
|
||||
parent_session_id=agent.session_id,
|
||||
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
|
||||
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
|
||||
skip_memory=True,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
|
||||
@@ -92,17 +92,36 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
"""
|
||||
result = {"response": None, "error": None}
|
||||
request_client_holder = {"client": None}
|
||||
request_client_lock = threading.Lock()
|
||||
|
||||
def _set_request_client(client):
|
||||
with request_client_lock:
|
||||
request_client_holder["client"] = client
|
||||
return client
|
||||
|
||||
def _take_request_client():
|
||||
with request_client_lock:
|
||||
client = request_client_holder.get("client")
|
||||
request_client_holder["client"] = None
|
||||
return client
|
||||
|
||||
def _close_request_client_once(reason: str) -> None:
|
||||
request_client = _take_request_client()
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason=reason)
|
||||
|
||||
def _call():
|
||||
try:
|
||||
if agent.api_mode == "codex_responses":
|
||||
request_client_holder["client"] = agent._create_request_openai_client(
|
||||
reason="codex_stream_request",
|
||||
api_kwargs=api_kwargs,
|
||||
request_client = _set_request_client(
|
||||
agent._create_request_openai_client(
|
||||
reason="codex_stream_request",
|
||||
api_kwargs=api_kwargs,
|
||||
)
|
||||
)
|
||||
result["response"] = agent._run_codex_stream(
|
||||
api_kwargs,
|
||||
client=request_client_holder["client"],
|
||||
client=request_client,
|
||||
on_first_delta=getattr(agent, "_codex_on_first_delta", None),
|
||||
)
|
||||
elif agent.api_mode == "anthropic_messages":
|
||||
@@ -131,17 +150,17 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
raise
|
||||
result["response"] = normalize_converse_response(raw_response)
|
||||
else:
|
||||
request_client_holder["client"] = agent._create_request_openai_client(
|
||||
reason="chat_completion_request",
|
||||
api_kwargs=api_kwargs,
|
||||
request_client = _set_request_client(
|
||||
agent._create_request_openai_client(
|
||||
reason="chat_completion_request",
|
||||
api_kwargs=api_kwargs,
|
||||
)
|
||||
)
|
||||
result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
|
||||
result["response"] = request_client.chat.completions.create(**api_kwargs)
|
||||
except Exception as e:
|
||||
result["error"] = e
|
||||
finally:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="request_complete")
|
||||
_close_request_client_once("request_complete")
|
||||
|
||||
# ── Stale-call timeout (mirrors streaming stale detector) ────────
|
||||
# Non-streaming calls return nothing until the full response is
|
||||
@@ -192,9 +211,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
else:
|
||||
rc = request_client_holder.get("client")
|
||||
if rc is not None:
|
||||
agent._close_request_openai_client(rc, reason="stale_call_kill")
|
||||
_close_request_client_once("stale_call_kill")
|
||||
except Exception:
|
||||
pass
|
||||
agent._touch_activity(
|
||||
@@ -218,9 +235,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
else:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="interrupt_abort")
|
||||
_close_request_client_once("interrupt_abort")
|
||||
except Exception:
|
||||
pass
|
||||
raise InterruptedError("Agent interrupted during API call")
|
||||
@@ -1257,6 +1272,24 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
|
||||
result = {"response": None, "error": None, "partial_tool_names": []}
|
||||
request_client_holder = {"client": None, "diag": None}
|
||||
request_client_lock = threading.Lock()
|
||||
|
||||
def _set_request_client(client):
|
||||
with request_client_lock:
|
||||
request_client_holder["client"] = client
|
||||
return client
|
||||
|
||||
def _take_request_client():
|
||||
with request_client_lock:
|
||||
client = request_client_holder.get("client")
|
||||
request_client_holder["client"] = None
|
||||
return client
|
||||
|
||||
def _close_request_client_once(reason: str) -> None:
|
||||
request_client = _take_request_client()
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason=reason)
|
||||
|
||||
first_delta_fired = {"done": False}
|
||||
deltas_were_sent = {"yes": False} # Track if any deltas were fired (for fallback)
|
||||
# Wall-clock timestamp of the last real streaming chunk. The outer
|
||||
@@ -1313,9 +1346,11 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
pool=_conn_cap,
|
||||
),
|
||||
}
|
||||
request_client_holder["client"] = agent._create_request_openai_client(
|
||||
reason="chat_completion_stream_request",
|
||||
api_kwargs=stream_kwargs,
|
||||
request_client = _set_request_client(
|
||||
agent._create_request_openai_client(
|
||||
reason="chat_completion_stream_request",
|
||||
api_kwargs=stream_kwargs,
|
||||
)
|
||||
)
|
||||
# Reset stale-stream timer so the detector measures from this
|
||||
# attempt's start, not a previous attempt's last chunk.
|
||||
@@ -1326,7 +1361,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
# ``request_client_holder["diag"]`` for closure access.
|
||||
_diag = agent._stream_diag_init()
|
||||
request_client_holder["diag"] = _diag
|
||||
stream = request_client_holder["client"].chat.completions.create(**stream_kwargs)
|
||||
stream = request_client.chat.completions.create(**stream_kwargs)
|
||||
|
||||
# Capture rate limit headers from the initial HTTP response.
|
||||
# The OpenAI SDK Stream object exposes the underlying httpx
|
||||
@@ -1765,12 +1800,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
mid_tool_call=True,
|
||||
diag=request_client_holder.get("diag"),
|
||||
)
|
||||
stale = request_client_holder.get("client")
|
||||
if stale is not None:
|
||||
agent._close_request_openai_client(
|
||||
stale, reason="stream_mid_tool_retry_cleanup"
|
||||
)
|
||||
request_client_holder["client"] = None
|
||||
_close_request_client_once("stream_mid_tool_retry_cleanup")
|
||||
try:
|
||||
agent._replace_primary_openai_client(
|
||||
reason="stream_mid_tool_retry_pool_cleanup"
|
||||
@@ -1821,12 +1851,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
diag=request_client_holder.get("diag"),
|
||||
)
|
||||
# Close the stale request client before retry
|
||||
stale = request_client_holder.get("client")
|
||||
if stale is not None:
|
||||
agent._close_request_openai_client(
|
||||
stale, reason="stream_retry_cleanup"
|
||||
)
|
||||
request_client_holder["client"] = None
|
||||
_close_request_client_once("stream_retry_cleanup")
|
||||
# Also rebuild the primary client to purge
|
||||
# any dead connections from the pool.
|
||||
try:
|
||||
@@ -1894,9 +1919,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
result["error"] = e
|
||||
return
|
||||
finally:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="stream_request_complete")
|
||||
_close_request_client_once("stream_request_complete")
|
||||
|
||||
# Provider-configured stale timeout takes priority over env default.
|
||||
_cfg_stale = get_provider_stale_timeout(agent.provider, agent.model)
|
||||
@@ -1966,9 +1989,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
f"Reconnecting..."
|
||||
)
|
||||
try:
|
||||
rc = request_client_holder.get("client")
|
||||
if rc is not None:
|
||||
agent._close_request_openai_client(rc, reason="stale_stream_kill")
|
||||
_close_request_client_once("stale_stream_kill")
|
||||
except Exception:
|
||||
pass
|
||||
# Rebuild the primary client too — its connection pool
|
||||
@@ -1990,9 +2011,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
else:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="stream_interrupt_abort")
|
||||
_close_request_client_once("stream_interrupt_abort")
|
||||
except Exception:
|
||||
pass
|
||||
raise InterruptedError("Agent interrupted during streaming API call")
|
||||
|
||||
@@ -251,13 +251,16 @@ def _chat_messages_to_responses_input(
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items.
|
||||
|
||||
``is_xai_responses=True`` strips ``encrypted_content`` from replayed
|
||||
reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface
|
||||
rejects encrypted reasoning blobs minted by prior turns: the request
|
||||
streams an ``error`` SSE frame before ``response.created`` and the
|
||||
OpenAI SDK collapses it into a generic stream-ordering error. Native
|
||||
Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
|
||||
— keep the default off.
|
||||
``is_xai_responses`` is kept for transport signature compatibility but
|
||||
no longer suppresses encrypted reasoning replay. Earlier (PR #26644,
|
||||
May 2026) we believed xAI's OAuth/SuperGrok ``/v1/responses`` surface
|
||||
rejected replayed ``encrypted_content`` reasoning items minted by
|
||||
prior turns, and we stripped them. That decision was wrong — xAI
|
||||
explicitly relies on Hermes threading encrypted reasoning back across
|
||||
turns for cross-turn coherence (the whole point of their partnership
|
||||
integration). We now replay encrypted reasoning on every Responses
|
||||
transport (xAI, native Codex, custom relays) and let xAI tell us
|
||||
explicitly if a specific surface ever rejects a payload.
|
||||
"""
|
||||
items: List[Dict[str, Any]] = []
|
||||
seen_item_ids: set = set()
|
||||
@@ -284,17 +287,12 @@ def _chat_messages_to_responses_input(
|
||||
if role == "assistant":
|
||||
# Replay encrypted reasoning items from previous turns
|
||||
# so the API can maintain coherent reasoning chains.
|
||||
#
|
||||
# xAI OAuth (SuperGrok/Premium) rejects replayed
|
||||
# ``encrypted_content`` reasoning items minted by prior
|
||||
# turns — see _chat_messages_to_responses_input docstring.
|
||||
# When ``is_xai_responses`` is set we drop the replay
|
||||
# entirely; Grok still reasons on each turn server-side,
|
||||
# we just don't try to thread the prior turn's encrypted
|
||||
# blob back in.
|
||||
# This applies to every Responses transport including
|
||||
# xAI — see _chat_messages_to_responses_input docstring
|
||||
# for the May 2026 reversal of the earlier xAI gate.
|
||||
codex_reasoning = msg.get("codex_reasoning_items")
|
||||
has_codex_reasoning = False
|
||||
if isinstance(codex_reasoning, list) and not is_xai_responses:
|
||||
if isinstance(codex_reasoning, list):
|
||||
for ri in codex_reasoning:
|
||||
if isinstance(ri, dict) and ri.get("encrypted_content"):
|
||||
item_id = ri.get("id")
|
||||
|
||||
@@ -387,8 +387,6 @@ def compress_context(
|
||||
_SESSION_ID.set(agent.session_id)
|
||||
except Exception:
|
||||
pass
|
||||
# Update session_log_file to point to the new session's JSON file
|
||||
agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
|
||||
agent._session_db_created = False
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
|
||||
@@ -1454,7 +1454,6 @@ def run_conversation(
|
||||
}
|
||||
messages.append(continue_msg)
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
restart_with_length_continuation = True
|
||||
break
|
||||
|
||||
@@ -3086,7 +3085,6 @@ def run_conversation(
|
||||
if not agent.quiet_mode:
|
||||
agent._vprint(f"{agent.log_prefix}↻ Codex response incomplete; continuing turn ({agent._codex_incomplete_retries}/3)")
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
continue
|
||||
|
||||
agent._codex_incomplete_retries = 0
|
||||
@@ -3411,7 +3409,6 @@ def run_conversation(
|
||||
|
||||
# Save session log incrementally (so progress is visible even if interrupted)
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
|
||||
# Continue loop for next response
|
||||
continue
|
||||
@@ -3578,7 +3575,6 @@ def run_conversation(
|
||||
interim_msg["_thinking_prefill"] = True
|
||||
messages.append(interim_msg)
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
continue
|
||||
|
||||
# ── Empty response retry ──────────────────────
|
||||
@@ -3712,7 +3708,6 @@ def run_conversation(
|
||||
}
|
||||
messages.append(continue_msg)
|
||||
agent._session_messages = messages
|
||||
agent._save_session_log(messages)
|
||||
continue
|
||||
|
||||
codex_ack_continuations = 0
|
||||
|
||||
@@ -50,6 +50,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -176,7 +177,9 @@ def get_keep() -> int:
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
return sum(
|
||||
1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
@@ -16,9 +16,19 @@ def _hermes_home_path() -> Path:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def _hermes_root_path() -> Path:
|
||||
"""Resolve the Hermes root dir (always the parent of any profile, never per-profile)."""
|
||||
try:
|
||||
from hermes_constants import get_default_hermes_root # local import to avoid cycles
|
||||
return get_default_hermes_root()
|
||||
except Exception:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def build_write_denied_paths(home: str) -> set[str]:
|
||||
"""Return exact sensitive paths that must never be written."""
|
||||
hermes_home = _hermes_home_path()
|
||||
hermes_root = _hermes_root_path()
|
||||
return {
|
||||
os.path.realpath(p)
|
||||
for p in [
|
||||
@@ -26,7 +36,11 @@ def build_write_denied_paths(home: str) -> set[str]:
|
||||
os.path.join(home, ".ssh", "id_rsa"),
|
||||
os.path.join(home, ".ssh", "id_ed25519"),
|
||||
os.path.join(home, ".ssh", "config"),
|
||||
# Active profile .env (or top-level .env when not in profile mode).
|
||||
str(hermes_home / ".env"),
|
||||
# Top-level .env, even when running under a profile — overwriting it
|
||||
# leaks credentials across every profile that inherits from root (#15981).
|
||||
str(hermes_root / ".env"),
|
||||
os.path.join(home, ".bashrc"),
|
||||
os.path.join(home, ".zshrc"),
|
||||
os.path.join(home, ".profile"),
|
||||
|
||||
@@ -59,7 +59,7 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_constants import get_hermes_home, secure_parent_dir
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -491,10 +491,8 @@ def save_credentials(creds: GoogleCredentials) -> Path:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Tighten parent dir to 0o700 so siblings can't traverse to the creds file.
|
||||
# On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures.
|
||||
try:
|
||||
os.chmod(path.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
|
||||
secure_parent_dir(path)
|
||||
payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n"
|
||||
|
||||
with _credentials_lock():
|
||||
|
||||
+93
-3
@@ -46,6 +46,84 @@ logger = logging.getLogger(__name__)
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
# Strict YAML/JSON boolean coercion for capability overrides.
|
||||
#
|
||||
# ``bool("false")`` is True in Python because non-empty strings are truthy, so
|
||||
# a user writing ``supports_vision: "false"`` (quoted — a common YAML mistake)
|
||||
# would silently enable native vision routing on a model that can't actually
|
||||
# handle it. Accept only the values YAML 1.1 / 1.2 treat as booleans, plus
|
||||
# real ``bool`` and integer 0/1. Anything else returns None so the caller
|
||||
# falls through to models.dev rather than honouring garbage.
|
||||
_TRUE_TOKENS = frozenset({"true", "yes", "on", "1"})
|
||||
_FALSE_TOKENS = frozenset({"false", "no", "off", "0"})
|
||||
|
||||
|
||||
def _coerce_capability_bool(raw: Any) -> Optional[bool]:
|
||||
"""Return True/False for recognised boolean values, None otherwise."""
|
||||
if isinstance(raw, bool):
|
||||
return raw
|
||||
if isinstance(raw, int):
|
||||
if raw in (0, 1):
|
||||
return bool(raw)
|
||||
return None
|
||||
if isinstance(raw, str):
|
||||
s = raw.strip().lower()
|
||||
if s in _TRUE_TOKENS:
|
||||
return True
|
||||
if s in _FALSE_TOKENS:
|
||||
return False
|
||||
return None
|
||||
|
||||
|
||||
def _supports_vision_override(
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
provider: str,
|
||||
model: str,
|
||||
) -> Optional[bool]:
|
||||
"""Resolve user-declared vision capability from config.yaml.
|
||||
|
||||
Resolution order, first hit wins:
|
||||
1. ``model.supports_vision`` (top-level shortcut for the active model)
|
||||
2. ``providers.<provider>.models.<model>.supports_vision``
|
||||
(named custom providers — ``provider`` may be the runtime-resolved
|
||||
value ``"custom"`` and/or the user-declared name under
|
||||
``model.provider``; both are tried)
|
||||
|
||||
Returns None when no override is set, so the caller falls through to
|
||||
models.dev. Returns False explicitly only when the user wrote a
|
||||
recognised boolean false token.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return None
|
||||
|
||||
# 1. Top-level shortcut
|
||||
model_cfg_raw = cfg.get("model")
|
||||
model_cfg: Dict[str, Any] = model_cfg_raw if isinstance(model_cfg_raw, dict) else {}
|
||||
top = _coerce_capability_bool(model_cfg.get("supports_vision"))
|
||||
if top is not None:
|
||||
return top
|
||||
|
||||
# 2. Per-provider, per-model. Named custom providers (e.g. "my-vllm")
|
||||
# get rewritten to provider="custom" at runtime
|
||||
# (hermes_cli/runtime_provider.py:_resolve_named_custom_runtime), so the
|
||||
# config still holds the user-declared name under model.provider. Try
|
||||
# both as candidate provider keys.
|
||||
config_provider = str(model_cfg.get("provider") or "").strip()
|
||||
providers_raw = cfg.get("providers")
|
||||
providers_cfg: Dict[str, Any] = providers_raw if isinstance(providers_raw, dict) else {}
|
||||
for p in dict.fromkeys(filter(None, (provider, config_provider))):
|
||||
entry_raw = providers_cfg.get(p)
|
||||
entry: Dict[str, Any] = entry_raw if isinstance(entry_raw, dict) else {}
|
||||
models_raw = entry.get("models")
|
||||
models_cfg: Dict[str, Any] = models_raw if isinstance(models_raw, dict) else {}
|
||||
per_model_raw = models_cfg.get(model)
|
||||
per_model: Dict[str, Any] = per_model_raw if isinstance(per_model_raw, dict) else {}
|
||||
coerced = _coerce_capability_bool(per_model.get("supports_vision"))
|
||||
if coerced is not None:
|
||||
return coerced
|
||||
return None
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
@@ -81,8 +159,20 @@ def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown."""
|
||||
def _lookup_supports_vision(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown.
|
||||
|
||||
Consults the user's ``supports_vision`` override in config.yaml first
|
||||
(so custom/local models declared as vision-capable don't fall through to
|
||||
text routing in ``auto`` mode), then falls back to models.dev.
|
||||
"""
|
||||
override = _supports_vision_override(cfg, provider, model)
|
||||
if override is not None:
|
||||
return override
|
||||
if not provider or not model:
|
||||
return None
|
||||
try:
|
||||
@@ -123,7 +213,7 @@ def decide_image_input_mode(
|
||||
if _explicit_aux_vision_override(cfg):
|
||||
return "text"
|
||||
|
||||
supports = _lookup_supports_vision(provider, model)
|
||||
supports = _lookup_supports_vision(provider, model, cfg)
|
||||
if supports is True:
|
||||
return "native"
|
||||
return "text"
|
||||
|
||||
+19
-2
@@ -428,6 +428,23 @@ PLATFORM_HINTS = {
|
||||
"files arrive as downloadable documents. You can also include image "
|
||||
"URLs in markdown format  and they will be sent as photos."
|
||||
),
|
||||
"whatsapp_cloud": (
|
||||
"You are on a text messaging communication platform, WhatsApp "
|
||||
"(via Meta's official Business Cloud API). Standard markdown "
|
||||
"(**bold**, ~~strike~~, # headers, [links](url)) is auto-converted "
|
||||
"to WhatsApp's native syntax (*bold*, ~strike~, etc.) — feel free "
|
||||
"to write in markdown. Tables are NOT supported — prefer bullet "
|
||||
"lists or labeled key:value pairs. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png) become photo attachments, "
|
||||
"videos (.mp4) play inline, audio (.mp3, .ogg) sends as voice/audio "
|
||||
"messages, other files arrive as documents. Image URLs in markdown "
|
||||
"format  also work. "
|
||||
"IMPORTANT: this platform has a 24-hour conversation window — if the "
|
||||
"user hasn't messaged in 24h, free-form replies are refused by Meta "
|
||||
"(error 131047). This rarely matters for live chat, but is worth "
|
||||
"knowing if you're scheduling a delayed message."
|
||||
),
|
||||
"telegram": (
|
||||
"You are on a text messaging communication platform, Telegram. "
|
||||
"Standard markdown is automatically converted to Telegram format. "
|
||||
@@ -1279,13 +1296,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
|
||||
lines = [
|
||||
"# Nous Subscription",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, OpenAI Whisper STT, and browser automation (Browser Use) by default. Modal execution is optional.",
|
||||
"Current capability status:",
|
||||
]
|
||||
lines.extend(_status_line(feature) for feature in features.items())
|
||||
lines.extend(
|
||||
[
|
||||
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
|
||||
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, OpenAI Whisper, or Browser-Use API keys.",
|
||||
"If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
|
||||
"Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
|
||||
"Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
"""External secret source integrations.
|
||||
|
||||
A secret source is anything that can supply environment-variable-shaped
|
||||
credentials at process startup, _after_ ~/.hermes/.env has loaded. By
|
||||
default sources are non-destructive: they only set values for env vars
|
||||
that aren't already present, so .env and shell exports continue to win.
|
||||
|
||||
Currently shipped:
|
||||
|
||||
- ``bitwarden`` — Bitwarden Secrets Manager (`bws` CLI). See
|
||||
``agent.secret_sources.bitwarden`` for the integration and
|
||||
``hermes_cli.secrets_cli`` for the user-facing setup wizard.
|
||||
"""
|
||||
@@ -0,0 +1,515 @@
|
||||
"""Bitwarden Secrets Manager (`bws` CLI) integration.
|
||||
|
||||
Hermes pulls API keys from Bitwarden Secrets Manager at process startup
|
||||
so they don't have to live in plaintext in ``~/.hermes/.env``.
|
||||
|
||||
Design summary
|
||||
--------------
|
||||
|
||||
* The ``bws`` binary is auto-installed into ``<hermes_home>/bin/bws`` on
|
||||
first use. Hermes pins one version (``_BWS_VERSION``) and downloads
|
||||
the matching asset from the official GitHub Releases page, verifying
|
||||
the SHA-256 against the release's published checksum file.
|
||||
* The access token is stored in ``~/.hermes/.env`` as
|
||||
``BWS_ACCESS_TOKEN`` (or whatever name the user picked in
|
||||
``secrets.bitwarden.access_token_env``). This is the one
|
||||
bootstrap secret — every other provider key can live in Bitwarden.
|
||||
* Pulling secrets is a single ``bws secret list <project_id>
|
||||
--output json`` call. We cache the result in-process for
|
||||
``cache_ttl_seconds`` so back-to-back ``hermes`` invocations don't
|
||||
hammer the API.
|
||||
* Failures NEVER block Hermes startup. Missing binary, no network,
|
||||
expired token, etc. all emit a one-line warning and continue with
|
||||
whatever credentials ``.env`` already had.
|
||||
|
||||
The module is intentionally subprocess-driven rather than going through
|
||||
the ``bitwarden-sdk-secrets`` Python package: one cross-platform binary
|
||||
is easier to lazy-install than a wheels-with-Rust-extension dependency.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import stat
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import zipfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Pinned upstream version. Bump in a follow-up PR — never auto-resolve
|
||||
# "latest" because upstream release shape (asset names, CLI flags) is
|
||||
# allowed to change between majors and we want updates to be deliberate.
|
||||
_BWS_VERSION = "2.0.0"
|
||||
|
||||
_BWS_RELEASE_BASE = (
|
||||
f"https://github.com/bitwarden/sdk-sm/releases/download/bws-v{_BWS_VERSION}"
|
||||
)
|
||||
_BWS_CHECKSUM_NAME = f"bws-sha256-checksums-{_BWS_VERSION}.txt"
|
||||
|
||||
# How long to wait for bws subprocesses and HTTP downloads, in seconds.
|
||||
_BWS_DOWNLOAD_TIMEOUT = 60
|
||||
_BWS_RUN_TIMEOUT = 30
|
||||
|
||||
# In-process cache so repeated load_hermes_dotenv() calls (CLI startup,
|
||||
# gateway hot-reload, test suites) don't re-fetch from BSM.
|
||||
_CacheKey = Tuple[str, str] # (access_token_fingerprint, project_id)
|
||||
_CACHE: Dict[_CacheKey, "_CachedFetch"] = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class _CachedFetch:
|
||||
secrets: Dict[str, str]
|
||||
fetched_at: float
|
||||
|
||||
def is_fresh(self, ttl_seconds: float) -> bool:
|
||||
if ttl_seconds <= 0:
|
||||
return False
|
||||
return (time.time() - self.fetched_at) < ttl_seconds
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class FetchResult:
|
||||
"""Outcome of a single BSM pull."""
|
||||
|
||||
secrets: Dict[str, str] = field(default_factory=dict)
|
||||
applied: List[str] = field(default_factory=list) # set into os.environ
|
||||
skipped: List[str] = field(default_factory=list) # already set, not overridden
|
||||
warnings: List[str] = field(default_factory=list) # non-fatal issues
|
||||
error: Optional[str] = None # fatal: nothing was fetched
|
||||
binary_path: Optional[Path] = None
|
||||
|
||||
@property
|
||||
def ok(self) -> bool:
|
||||
return self.error is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Binary discovery + lazy install
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _hermes_bin_dir() -> Path:
|
||||
"""Where Hermes stores its managed binaries. Profile-aware."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
return get_hermes_home() / "bin"
|
||||
|
||||
|
||||
def find_bws(*, install_if_missing: bool = False) -> Optional[Path]:
|
||||
"""Return a path to a usable ``bws`` binary, or None.
|
||||
|
||||
Resolution order:
|
||||
1. ``<hermes_home>/bin/bws`` (our managed copy — preferred)
|
||||
2. ``shutil.which("bws")`` (system PATH)
|
||||
|
||||
When ``install_if_missing`` is True and neither resolves, this calls
|
||||
:func:`install_bws` to download and verify the pinned version.
|
||||
"""
|
||||
managed = _hermes_bin_dir() / _platform_binary_name()
|
||||
if managed.exists() and os.access(managed, os.X_OK):
|
||||
return managed
|
||||
|
||||
system = shutil.which("bws")
|
||||
if system:
|
||||
return Path(system)
|
||||
|
||||
if install_if_missing:
|
||||
try:
|
||||
return install_bws()
|
||||
except Exception as exc: # noqa: BLE001 — never block startup
|
||||
logger.warning("bws auto-install failed: %s", exc)
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _platform_binary_name() -> str:
|
||||
return "bws.exe" if platform.system() == "Windows" else "bws"
|
||||
|
||||
|
||||
def _platform_asset_name() -> str:
|
||||
"""Map (uname, arch, libc) → the upstream asset filename.
|
||||
|
||||
Asset names follow Rust's target triple convention. Linux defaults
|
||||
to gnu (glibc); we switch to musl only if ldd --version says so.
|
||||
"""
|
||||
system = platform.system()
|
||||
machine = platform.machine().lower()
|
||||
|
||||
if system == "Darwin":
|
||||
# Universal binary works on both Intel and Apple Silicon — no
|
||||
# need to pick a per-arch asset.
|
||||
return f"bws-macos-universal-{_BWS_VERSION}.zip"
|
||||
|
||||
if system == "Windows":
|
||||
arch = "aarch64" if machine in ("arm64", "aarch64") else "x86_64"
|
||||
return f"bws-{arch}-pc-windows-msvc-{_BWS_VERSION}.zip"
|
||||
|
||||
if system == "Linux":
|
||||
arch = "aarch64" if machine in ("arm64", "aarch64") else "x86_64"
|
||||
libc = "gnu"
|
||||
# ldd --version writes to stderr on glibc, stdout on musl. We
|
||||
# don't need bullet-proof detection — getting it wrong falls
|
||||
# back to a clear error from the binary loader, which we catch.
|
||||
try:
|
||||
res = subprocess.run(
|
||||
["ldd", "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=2,
|
||||
)
|
||||
if "musl" in (res.stdout + res.stderr).lower():
|
||||
libc = "musl"
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
return f"bws-{arch}-unknown-linux-{libc}-{_BWS_VERSION}.zip"
|
||||
|
||||
raise RuntimeError(
|
||||
f"Unsupported platform for bws auto-install: {system} {machine}"
|
||||
)
|
||||
|
||||
|
||||
def install_bws(*, force: bool = False) -> Path:
|
||||
"""Download, verify, and install the pinned ``bws`` binary.
|
||||
|
||||
Returns the path to the installed executable. Raises on any
|
||||
failure (network, checksum, extraction) — callers in the auto-install
|
||||
path catch these; the user-facing ``hermes secrets bitwarden setup``
|
||||
surface lets them propagate so the wizard can show a clear error.
|
||||
"""
|
||||
bin_dir = _hermes_bin_dir()
|
||||
bin_dir.mkdir(parents=True, exist_ok=True)
|
||||
target = bin_dir / _platform_binary_name()
|
||||
|
||||
if target.exists() and not force:
|
||||
return target
|
||||
|
||||
asset_name = _platform_asset_name()
|
||||
asset_url = f"{_BWS_RELEASE_BASE}/{asset_name}"
|
||||
checksum_url = f"{_BWS_RELEASE_BASE}/{_BWS_CHECKSUM_NAME}"
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="hermes-bws-") as tmpdir:
|
||||
tmp = Path(tmpdir)
|
||||
zip_path = tmp / asset_name
|
||||
checksum_path = tmp / _BWS_CHECKSUM_NAME
|
||||
|
||||
logger.info("Downloading %s", asset_url)
|
||||
_http_download(asset_url, zip_path)
|
||||
_http_download(checksum_url, checksum_path)
|
||||
|
||||
expected = _expected_sha256(checksum_path, asset_name)
|
||||
actual = _sha256_file(zip_path)
|
||||
if expected.lower() != actual.lower():
|
||||
raise RuntimeError(
|
||||
f"Checksum mismatch for {asset_name}: "
|
||||
f"expected {expected}, got {actual}"
|
||||
)
|
||||
|
||||
with zipfile.ZipFile(zip_path) as zf:
|
||||
member = _pick_zip_member(zf, _platform_binary_name())
|
||||
zf.extract(member, tmp)
|
||||
extracted = tmp / member
|
||||
|
||||
# Move into place atomically. We write to a sibling tempfile in
|
||||
# the final directory so the rename can't cross filesystems.
|
||||
fd, staged = tempfile.mkstemp(dir=str(bin_dir), prefix=".bws_")
|
||||
os.close(fd)
|
||||
shutil.copy2(extracted, staged)
|
||||
os.chmod(
|
||||
staged,
|
||||
stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR
|
||||
| stat.S_IRGRP | stat.S_IXGRP
|
||||
| stat.S_IROTH | stat.S_IXOTH,
|
||||
)
|
||||
os.replace(staged, target)
|
||||
|
||||
logger.info("Installed bws %s at %s", _BWS_VERSION, target)
|
||||
return target
|
||||
|
||||
|
||||
def _http_download(url: str, dest: Path) -> None:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "hermes-agent"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=_BWS_DOWNLOAD_TIMEOUT) as resp: # noqa: S310
|
||||
with open(dest, "wb") as f:
|
||||
shutil.copyfileobj(resp, f)
|
||||
except urllib.error.URLError as exc:
|
||||
raise RuntimeError(f"Failed to download {url}: {exc}") from exc
|
||||
|
||||
|
||||
def _expected_sha256(checksum_file: Path, asset_name: str) -> str:
|
||||
"""Parse the upstream ``bws-sha256-checksums-X.Y.Z.txt`` file.
|
||||
|
||||
Format is the standard ``sha256sum`` output: ``<hex> <filename>``,
|
||||
one per line.
|
||||
"""
|
||||
text = checksum_file.read_text(encoding="utf-8", errors="replace")
|
||||
for line in text.splitlines():
|
||||
parts = line.strip().split()
|
||||
if len(parts) >= 2 and parts[-1] == asset_name:
|
||||
return parts[0]
|
||||
raise RuntimeError(
|
||||
f"No checksum entry for {asset_name} in {checksum_file.name}"
|
||||
)
|
||||
|
||||
|
||||
def _sha256_file(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _pick_zip_member(zf: zipfile.ZipFile, binary_name: str) -> str:
|
||||
"""Find the binary inside the upstream zip.
|
||||
|
||||
Historically the archive has been flat (``bws`` at the root) but we
|
||||
tolerate a top-level directory just in case upstream changes.
|
||||
"""
|
||||
candidates = [n for n in zf.namelist() if n.split("/")[-1] == binary_name]
|
||||
if not candidates:
|
||||
raise RuntimeError(
|
||||
f"Could not find {binary_name} inside downloaded archive "
|
||||
f"(members: {zf.namelist()[:5]}...)"
|
||||
)
|
||||
# Prefer the shortest path (i.e. root over nested) for determinism.
|
||||
candidates.sort(key=len)
|
||||
return candidates[0]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Secret fetch + apply
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _token_fingerprint(token: str) -> str:
|
||||
"""SHA-256 prefix used as a cache key — never logged, never displayed."""
|
||||
return hashlib.sha256(token.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def fetch_bitwarden_secrets(
|
||||
*,
|
||||
access_token: str,
|
||||
project_id: str,
|
||||
binary: Optional[Path] = None,
|
||||
cache_ttl_seconds: float = 300,
|
||||
use_cache: bool = True,
|
||||
) -> Tuple[Dict[str, str], List[str]]:
|
||||
"""Pull the secrets for ``project_id`` from Bitwarden Secrets Manager.
|
||||
|
||||
Returns ``(secrets_dict, warnings_list)``.
|
||||
|
||||
Raises :class:`RuntimeError` for fatal conditions (missing binary,
|
||||
auth failure, unparseable output). Callers in the env_loader path
|
||||
catch this and emit a single warning; callers in the user-facing
|
||||
setup wizard let it propagate.
|
||||
"""
|
||||
if not access_token:
|
||||
raise RuntimeError("Bitwarden access token is empty")
|
||||
if not project_id:
|
||||
raise RuntimeError("Bitwarden project_id is empty")
|
||||
|
||||
cache_key = (_token_fingerprint(access_token), project_id)
|
||||
if use_cache:
|
||||
cached = _CACHE.get(cache_key)
|
||||
if cached and cached.is_fresh(cache_ttl_seconds):
|
||||
return cached.secrets, []
|
||||
|
||||
bws = binary or find_bws(install_if_missing=True)
|
||||
if bws is None:
|
||||
raise RuntimeError(
|
||||
"bws binary not available — auto-install failed and `bws` is "
|
||||
"not on PATH. Install manually from "
|
||||
"https://github.com/bitwarden/sdk-sm/releases or re-run "
|
||||
"`hermes secrets bitwarden setup`."
|
||||
)
|
||||
|
||||
secrets, warnings = _run_bws_list(bws, access_token, project_id)
|
||||
_CACHE[cache_key] = _CachedFetch(secrets=secrets, fetched_at=time.time())
|
||||
return secrets, warnings
|
||||
|
||||
|
||||
def _run_bws_list(
|
||||
bws: Path, access_token: str, project_id: str
|
||||
) -> Tuple[Dict[str, str], List[str]]:
|
||||
cmd = [str(bws), "secret", "list", project_id, "--output", "json"]
|
||||
env = os.environ.copy()
|
||||
env["BWS_ACCESS_TOKEN"] = access_token
|
||||
# Make sure we're not echoing telemetry / colour codes into json.
|
||||
env.setdefault("NO_COLOR", "1")
|
||||
|
||||
try:
|
||||
proc = subprocess.run( # noqa: S603 — bws path is trusted
|
||||
cmd,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=_BWS_RUN_TIMEOUT,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise RuntimeError(
|
||||
f"bws timed out after {_BWS_RUN_TIMEOUT}s fetching secrets"
|
||||
) from exc
|
||||
except OSError as exc:
|
||||
raise RuntimeError(f"failed to invoke bws: {exc}") from exc
|
||||
|
||||
if proc.returncode != 0:
|
||||
# bws writes auth/network errors to stderr in plain English.
|
||||
# Strip ANSI just in case and surface the first 200 chars.
|
||||
err = (proc.stderr or proc.stdout or "").strip().replace("\x1b", "")
|
||||
raise RuntimeError(
|
||||
f"bws exited {proc.returncode}: {err[:200]}"
|
||||
)
|
||||
|
||||
raw = proc.stdout.strip()
|
||||
if not raw:
|
||||
return {}, ["bws returned no output (empty project?)"]
|
||||
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise RuntimeError(f"bws returned non-JSON output: {exc}") from exc
|
||||
|
||||
if not isinstance(payload, list):
|
||||
raise RuntimeError(
|
||||
f"bws returned unexpected shape: {type(payload).__name__}"
|
||||
)
|
||||
|
||||
secrets: Dict[str, str] = {}
|
||||
warnings: List[str] = []
|
||||
for item in payload:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
key = item.get("key")
|
||||
value = item.get("value")
|
||||
if not isinstance(key, str) or not isinstance(value, str):
|
||||
continue
|
||||
if not _is_valid_env_name(key):
|
||||
warnings.append(
|
||||
f"Skipping secret {key!r}: not a valid env-var name"
|
||||
)
|
||||
continue
|
||||
secrets[key] = value
|
||||
return secrets, warnings
|
||||
|
||||
|
||||
def _is_valid_env_name(name: str) -> bool:
|
||||
if not name:
|
||||
return False
|
||||
if not (name[0].isalpha() or name[0] == "_"):
|
||||
return False
|
||||
return all(c.isalnum() or c == "_" for c in name)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entry point — called from hermes_cli.env_loader
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def apply_bitwarden_secrets(
|
||||
*,
|
||||
enabled: bool,
|
||||
access_token_env: str = "BWS_ACCESS_TOKEN",
|
||||
project_id: str = "",
|
||||
override_existing: bool = False,
|
||||
cache_ttl_seconds: float = 300,
|
||||
auto_install: bool = True,
|
||||
) -> FetchResult:
|
||||
"""Pull secrets from BSM and set them on ``os.environ``.
|
||||
|
||||
This is the function ``load_hermes_dotenv()`` calls after the .env
|
||||
files have loaded. It is intentionally defensive — any failure
|
||||
returns a :class:`FetchResult` with ``error`` set; it never raises.
|
||||
|
||||
Parameters mirror the ``secrets.bitwarden.*`` config keys so the
|
||||
caller can just splat the dict in.
|
||||
"""
|
||||
result = FetchResult()
|
||||
|
||||
if not enabled:
|
||||
return result
|
||||
|
||||
access_token = os.environ.get(access_token_env, "").strip()
|
||||
if not access_token:
|
||||
result.error = (
|
||||
f"secrets.bitwarden.enabled is true but {access_token_env} is "
|
||||
"not set. Run `hermes secrets bitwarden setup`."
|
||||
)
|
||||
return result
|
||||
|
||||
if not project_id:
|
||||
result.error = (
|
||||
"secrets.bitwarden.project_id is empty. "
|
||||
"Run `hermes secrets bitwarden setup`."
|
||||
)
|
||||
return result
|
||||
|
||||
binary = find_bws(install_if_missing=auto_install)
|
||||
result.binary_path = binary
|
||||
if binary is None:
|
||||
result.error = (
|
||||
"bws binary not available and auto-install is disabled. "
|
||||
"Run `hermes secrets bitwarden setup` to install."
|
||||
)
|
||||
return result
|
||||
|
||||
try:
|
||||
secrets, warnings = fetch_bitwarden_secrets(
|
||||
access_token=access_token,
|
||||
project_id=project_id,
|
||||
binary=binary,
|
||||
cache_ttl_seconds=cache_ttl_seconds,
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
result.error = str(exc)
|
||||
return result
|
||||
|
||||
result.secrets = secrets
|
||||
result.warnings.extend(warnings)
|
||||
|
||||
for key, value in secrets.items():
|
||||
if key == access_token_env:
|
||||
# Don't let BSM clobber the very token we used to fetch
|
||||
# itself — that would be a footgun if someone stored the
|
||||
# token as a BSM secret too.
|
||||
result.skipped.append(key)
|
||||
continue
|
||||
if not override_existing and os.environ.get(key):
|
||||
result.skipped.append(key)
|
||||
continue
|
||||
os.environ[key] = value
|
||||
result.applied.append(key)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test hook — used by hermetic tests to flush the cache between cases.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _reset_cache_for_tests() -> None:
|
||||
_CACHE.clear()
|
||||
+39
-2
@@ -24,7 +24,43 @@ PLATFORM_MAP = {
|
||||
"windows": "win32",
|
||||
}
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
EXCLUDED_SKILL_DIRS = frozenset(
|
||||
(
|
||||
".git",
|
||||
".github",
|
||||
".hub",
|
||||
".archive",
|
||||
".venv",
|
||||
"venv",
|
||||
"node_modules",
|
||||
"site-packages",
|
||||
"__pycache__",
|
||||
".tox",
|
||||
".nox",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def is_excluded_skill_path(path) -> bool:
|
||||
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
|
||||
|
||||
Use this on every SKILL.md path produced by ``rglob`` to prune
|
||||
dependency, virtualenv, VCS, and cache directories. Centralising the
|
||||
check here keeps every skill-scanning site in sync with the shared
|
||||
exclusion set.
|
||||
|
||||
Accepts a Path or string.
|
||||
"""
|
||||
try:
|
||||
parts = path.parts # Path
|
||||
except AttributeError:
|
||||
from pathlib import PurePath
|
||||
parts = PurePath(str(path)).parts
|
||||
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
|
||||
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -478,7 +514,8 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
||||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
|
||||
Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
|
||||
directories so dependencies cannot register nested skills.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
|
||||
@@ -112,17 +112,31 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def convert_messages(
|
||||
self, messages: list[dict[str, Any]], **kwargs
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
"""Messages are already in OpenAI format — strip internal fields
|
||||
that strict chat-completions providers reject with HTTP 400/422.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
Strips:
|
||||
|
||||
- Codex Responses API fields: ``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id`` /
|
||||
``response_item_id`` on ``tool_calls`` entries.
|
||||
- ``tool_name`` on tool-result messages — written by
|
||||
``make_tool_result_message()`` for the SQLite FTS index, but not
|
||||
part of the Chat Completions schema. Strict providers (Fireworks,
|
||||
Moonshot/Kimi) reject any payload containing it with
|
||||
``Extra inputs are not permitted, field: 'messages[N].tool_name'``.
|
||||
Permissive providers (OpenRouter, MiniMax) silently ignore the
|
||||
field, which masked the bug for months.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
if (
|
||||
"codex_reasoning_items" in msg
|
||||
or "codex_message_items" in msg
|
||||
or "tool_name" in msg
|
||||
):
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
@@ -145,6 +159,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
msg.pop("tool_name", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
|
||||
@@ -116,14 +116,11 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
if reasoning_enabled and is_xai_responses:
|
||||
from agent.model_metadata import grok_supports_reasoning_effort
|
||||
|
||||
# NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
|
||||
# any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
|
||||
# replayed encrypted reasoning items on turn 2+ — see
|
||||
# _chat_messages_to_responses_input docstring. Requesting the field
|
||||
# back would just have us cache something we then must strip. Grok
|
||||
# still reasons natively each turn; coherence across turns rides on
|
||||
# the visible message text alone.
|
||||
kwargs["include"] = []
|
||||
# Ask xAI to echo back encrypted reasoning items so we can
|
||||
# replay them on subsequent turns for cross-turn coherence.
|
||||
# See agent/codex_responses_adapter._chat_messages_to_responses_input
|
||||
# for the May 2026 reversal of the earlier suppression gate.
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
|
||||
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
|
||||
# those models reason natively. Only send the effort dial when
|
||||
|
||||
@@ -6501,12 +6501,6 @@ class HermesCLI:
|
||||
if self.agent:
|
||||
self.agent.session_id = new_session_id
|
||||
self.agent.session_start = now
|
||||
# Redirect the JSON session log to the new branch session file so
|
||||
# messages written after branching land in the correct file.
|
||||
if hasattr(self.agent, "session_log_file") and hasattr(self.agent, "logs_dir"):
|
||||
self.agent.session_log_file = (
|
||||
self.agent.logs_dir / f"session_{new_session_id}.json"
|
||||
)
|
||||
self.agent.reset_session_state()
|
||||
if hasattr(self.agent, "_last_flushed_db_idx"):
|
||||
self.agent._last_flushed_db_idx = len(self.conversation_history)
|
||||
@@ -10227,6 +10221,7 @@ class HermesCLI:
|
||||
self._voice_processing = True
|
||||
|
||||
submitted = False
|
||||
transcription_failed = False
|
||||
wav_path = None
|
||||
try:
|
||||
if self._voice_recorder is None:
|
||||
@@ -10275,18 +10270,24 @@ class HermesCLI:
|
||||
else:
|
||||
error = result.get("error", "Unknown error")
|
||||
_cprint(f"\n{_DIM}Transcription failed: {error}{_RST}")
|
||||
transcription_failed = True
|
||||
|
||||
except Exception as e:
|
||||
_cprint(f"\n{_DIM}Voice processing error: {e}{_RST}")
|
||||
transcription_failed = wav_path is not None
|
||||
finally:
|
||||
with self._voice_lock:
|
||||
self._voice_processing = False
|
||||
if hasattr(self, '_app') and self._app:
|
||||
self._app.invalidate()
|
||||
# Clean up temp file
|
||||
# Clean up temp file unless transcription failed. On failure, keep
|
||||
# the source recording so long dictation is not lost.
|
||||
try:
|
||||
if wav_path and os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
if transcription_failed:
|
||||
_cprint(f"{_DIM}Recording preserved at: {wav_path}{_RST}")
|
||||
else:
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -14429,13 +14430,54 @@ def main(
|
||||
# Only print the final response and parseable session info.
|
||||
cli.tool_progress_mode = "off"
|
||||
if cli._ensure_runtime_credentials():
|
||||
effective_query = query
|
||||
effective_query: Any = query
|
||||
if single_query_images:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query,
|
||||
single_query_images,
|
||||
announce=False,
|
||||
)
|
||||
# Honour the same image-routing decision used by the
|
||||
# interactive path. With a vision-capable model (incl.
|
||||
# custom-provider models declared via
|
||||
# `model.supports_vision: true`), attach images natively
|
||||
# as image_url content parts. Otherwise fall back to the
|
||||
# text-pipeline (vision_analyze pre-description).
|
||||
_img_mode = "text"
|
||||
_build_parts = None
|
||||
try:
|
||||
from agent.image_routing import (
|
||||
build_native_content_parts as _build_parts, # noqa: F811
|
||||
)
|
||||
from agent.image_routing import decide_image_input_mode
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
_img_mode = decide_image_input_mode(
|
||||
(cli.provider or "").strip(),
|
||||
(cli.model or "").strip(),
|
||||
load_config(),
|
||||
)
|
||||
except Exception:
|
||||
_img_mode = "text"
|
||||
|
||||
if _img_mode == "native" and _build_parts is not None:
|
||||
try:
|
||||
_parts, _skipped = _build_parts(
|
||||
query if isinstance(query, str) else "",
|
||||
[str(p) for p in single_query_images],
|
||||
)
|
||||
if any(p.get("type") == "image_url" for p in _parts):
|
||||
effective_query = _parts
|
||||
else:
|
||||
# All images unreadable — text fallback.
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
except Exception:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
else:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query,
|
||||
single_query_images,
|
||||
announce=False,
|
||||
)
|
||||
turn_route = cli._resolve_turn_agent_config(effective_query)
|
||||
if turn_route["signature"] != cli._active_agent_route_signature:
|
||||
cli.agent = None
|
||||
|
||||
@@ -114,6 +114,7 @@ _HOME_TARGET_ENV_VARS = {
|
||||
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
|
||||
"qqbot": "QQBOT_HOME_CHANNEL",
|
||||
"whatsapp": "WHATSAPP_HOME_CHANNEL",
|
||||
"whatsapp_cloud": "WHATSAPP_CLOUD_HOME_CHANNEL",
|
||||
}
|
||||
|
||||
# Legacy env var names kept for back-compat. Each entry is the current
|
||||
|
||||
+66
-1
@@ -109,6 +109,7 @@ class Platform(Enum):
|
||||
TELEGRAM = "telegram"
|
||||
DISCORD = "discord"
|
||||
WHATSAPP = "whatsapp"
|
||||
WHATSAPP_CLOUD = "whatsapp_cloud"
|
||||
SLACK = "slack"
|
||||
SIGNAL = "signal"
|
||||
MATTERMOST = "mattermost"
|
||||
@@ -419,6 +420,9 @@ _PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] =
|
||||
cfg.extra.get("account_id") and (cfg.token or cfg.extra.get("token"))
|
||||
),
|
||||
Platform.WHATSAPP: lambda cfg: True, # bridge handles auth
|
||||
Platform.WHATSAPP_CLOUD: lambda cfg: bool(
|
||||
cfg.extra.get("phone_number_id") and cfg.extra.get("access_token")
|
||||
),
|
||||
Platform.SIGNAL: lambda cfg: bool(cfg.extra.get("http_url")),
|
||||
Platform.EMAIL: lambda cfg: bool(cfg.extra.get("address")),
|
||||
Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
|
||||
@@ -830,6 +834,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["require_mention"] = platform_cfg["require_mention"]
|
||||
if plat == Platform.TELEGRAM and "allowed_chats" in platform_cfg:
|
||||
bridged["allowed_chats"] = platform_cfg["allowed_chats"]
|
||||
if plat == Platform.TELEGRAM and "group_allowed_chats" in platform_cfg:
|
||||
bridged["group_allowed_chats"] = platform_cfg["group_allowed_chats"]
|
||||
if plat == Platform.TELEGRAM and "allowed_topics" in platform_cfg:
|
||||
bridged["allowed_topics"] = platform_cfg["allowed_topics"]
|
||||
if "free_response_channels" in platform_cfg:
|
||||
@@ -838,6 +844,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
|
||||
if "exclusive_bot_mentions" in platform_cfg:
|
||||
bridged["exclusive_bot_mentions"] = platform_cfg["exclusive_bot_mentions"]
|
||||
if plat == Platform.TELEGRAM and "observe_unmentioned_group_messages" in platform_cfg:
|
||||
bridged["observe_unmentioned_group_messages"] = platform_cfg["observe_unmentioned_group_messages"]
|
||||
if "dm_policy" in platform_cfg:
|
||||
bridged["dm_policy"] = platform_cfg["dm_policy"]
|
||||
if "allow_from" in platform_cfg:
|
||||
@@ -1024,6 +1032,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_EXCLUSIVE_BOT_MENTIONS"] = str(telegram_cfg["exclusive_bot_mentions"]).lower()
|
||||
if "guest_mode" in telegram_cfg and not os.getenv("TELEGRAM_GUEST_MODE"):
|
||||
os.environ["TELEGRAM_GUEST_MODE"] = str(telegram_cfg["guest_mode"]).lower()
|
||||
if "observe_unmentioned_group_messages" in telegram_cfg and not os.getenv("TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES"):
|
||||
os.environ["TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES"] = str(telegram_cfg["observe_unmentioned_group_messages"]).lower()
|
||||
frc = telegram_cfg.get("free_response_chats")
|
||||
if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
|
||||
if isinstance(frc, list):
|
||||
@@ -1074,7 +1084,7 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(group_allowed_chats, list):
|
||||
group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
|
||||
for _telegram_extra_key in ("guest_mode", "disable_link_previews"):
|
||||
for _telegram_extra_key in ("guest_mode", "disable_link_previews", "observe_unmentioned_group_messages"):
|
||||
if _telegram_extra_key in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
@@ -1361,6 +1371,61 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
thread_id=os.getenv("WHATSAPP_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# WhatsApp Cloud API (official Business Platform via Meta).
|
||||
# Distinct from the Baileys bridge: pure HTTP graph.facebook.com calls
|
||||
# outbound, public webhook inbound. Both adapters can run in parallel
|
||||
# against different phone numbers.
|
||||
whatsapp_cloud_phone_id = os.getenv("WHATSAPP_CLOUD_PHONE_NUMBER_ID")
|
||||
whatsapp_cloud_token = os.getenv("WHATSAPP_CLOUD_ACCESS_TOKEN")
|
||||
if whatsapp_cloud_phone_id and whatsapp_cloud_token:
|
||||
if Platform.WHATSAPP_CLOUD not in config.platforms:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD] = PlatformConfig()
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].enabled = True
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra.update({
|
||||
"phone_number_id": whatsapp_cloud_phone_id,
|
||||
"access_token": whatsapp_cloud_token,
|
||||
})
|
||||
# Optional: app_id / app_secret (signature verification)
|
||||
wa_cloud_app_id = os.getenv("WHATSAPP_CLOUD_APP_ID")
|
||||
if wa_cloud_app_id:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["app_id"] = wa_cloud_app_id
|
||||
wa_cloud_app_secret = os.getenv("WHATSAPP_CLOUD_APP_SECRET")
|
||||
if wa_cloud_app_secret:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["app_secret"] = wa_cloud_app_secret
|
||||
# Optional: WABA id (analytics, future use)
|
||||
wa_cloud_waba_id = os.getenv("WHATSAPP_CLOUD_WABA_ID")
|
||||
if wa_cloud_waba_id:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["waba_id"] = wa_cloud_waba_id
|
||||
# Webhook verify token — Meta hub.verify_token shared secret
|
||||
wa_cloud_verify_token = os.getenv("WHATSAPP_CLOUD_VERIFY_TOKEN")
|
||||
if wa_cloud_verify_token:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["verify_token"] = wa_cloud_verify_token
|
||||
# Webhook server bind config (defaults baked into the adapter)
|
||||
wa_cloud_host = os.getenv("WHATSAPP_CLOUD_WEBHOOK_HOST")
|
||||
if wa_cloud_host:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_host"] = wa_cloud_host
|
||||
wa_cloud_port = os.getenv("WHATSAPP_CLOUD_WEBHOOK_PORT")
|
||||
if wa_cloud_port:
|
||||
try:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_port"] = int(wa_cloud_port)
|
||||
except ValueError:
|
||||
pass
|
||||
wa_cloud_path = os.getenv("WHATSAPP_CLOUD_WEBHOOK_PATH")
|
||||
if wa_cloud_path:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_path"] = wa_cloud_path
|
||||
# Graph API version override (rarely needed)
|
||||
wa_cloud_api_version = os.getenv("WHATSAPP_CLOUD_API_VERSION")
|
||||
if wa_cloud_api_version:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["api_version"] = wa_cloud_api_version
|
||||
whatsapp_cloud_home = os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL")
|
||||
if whatsapp_cloud_home and Platform.WHATSAPP_CLOUD in config.platforms:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].home_channel = HomeChannel(
|
||||
platform=Platform.WHATSAPP_CLOUD,
|
||||
chat_id=whatsapp_cloud_home,
|
||||
name=os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Slack
|
||||
slack_token = os.getenv("SLACK_BOT_TOKEN")
|
||||
if slack_token:
|
||||
|
||||
@@ -95,6 +95,12 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
|
||||
# Tier 3 — no edit support, progress messages are permanent
|
||||
"signal": _TIER_LOW,
|
||||
"whatsapp": _TIER_MEDIUM, # Baileys bridge supports /edit
|
||||
# WhatsApp Cloud API: Meta added message editing in 2023 but the
|
||||
# Hermes Cloud adapter doesn't implement edit_message yet, so we
|
||||
# stay on TIER_LOW (tool_progress off) to avoid spamming each
|
||||
# status update as a separate message. Promote to TIER_MEDIUM once
|
||||
# Cloud's edit_message lands.
|
||||
"whatsapp_cloud": _TIER_LOW,
|
||||
"bluebubbles": _TIER_LOW,
|
||||
"weixin": _TIER_LOW,
|
||||
"wecom": _TIER_LOW,
|
||||
|
||||
@@ -64,7 +64,6 @@ def mirror_to_session(
|
||||
"mirror_source": source_label,
|
||||
}
|
||||
|
||||
_append_to_jsonl(session_id, mirror_msg)
|
||||
_append_to_sqlite(session_id, mirror_msg)
|
||||
|
||||
logger.debug("Mirror: wrote to session %s (from %s)", session_id, source_label)
|
||||
@@ -150,15 +149,6 @@ def _find_session_id(
|
||||
return best_entry.get("session_id")
|
||||
|
||||
|
||||
def _append_to_jsonl(session_id: str, message: dict) -> None:
|
||||
"""Append a message to the JSONL transcript file."""
|
||||
transcript_path = _SESSIONS_DIR / f"{session_id}.jsonl"
|
||||
try:
|
||||
with open(transcript_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(message, ensure_ascii=False) + "\n")
|
||||
except Exception as e:
|
||||
logger.debug("Mirror JSONL write failed: %s", e)
|
||||
|
||||
|
||||
def _append_to_sqlite(session_id: str, message: dict) -> None:
|
||||
"""Append a message to the SQLite session database."""
|
||||
|
||||
@@ -52,6 +52,22 @@ for the full pattern (Template Buttons postback at 45s, `RequestCache`
|
||||
state machine, `interrupt_session_activity` override for `/stop`
|
||||
orphans) and the developer-guide page for the prose walkthrough.
|
||||
|
||||
**Sibling adapters that share behavior.** When a single platform has
|
||||
two transport modes the user picks between — unofficial vs official
|
||||
APIs, polling vs websocket, library A vs library B — the right
|
||||
structure is two adapters that share a behavior mixin. WhatsApp does
|
||||
this: `gateway/platforms/whatsapp.py` (Baileys bridge) and
|
||||
`gateway/platforms/whatsapp_cloud.py` (Meta Cloud API) both inherit
|
||||
from `WhatsAppBehaviorMixin` in `gateway/platforms/whatsapp_common.py`.
|
||||
The mixin owns gating, allow-lists, mention parsing, broadcast
|
||||
filters, and the WhatsApp-flavored markdown conversion — everything
|
||||
that's platform-protocol-agnostic. Each adapter owns its transport.
|
||||
Both register distinct `Platform.*` enum values so the gateway can run
|
||||
both simultaneously against different phone numbers. The mixin must
|
||||
come **first** in the bases list — `class WhatsAppAdapter(Mixin,
|
||||
BasePlatformAdapter)` — so the mixin's `format_message` overrides
|
||||
`BasePlatformAdapter`'s generic default.
|
||||
|
||||
See `plugins/platforms/irc/`, `plugins/platforms/teams/`, and
|
||||
`plugins/platforms/google_chat/` for complete working examples, and
|
||||
`website/docs/developer-guide/adding-platform-adapters.md` for the full
|
||||
@@ -94,6 +110,19 @@ The adapter is a subclass of `BasePlatformAdapter` from `gateway/platforms/base.
|
||||
| `send_animation(chat_id, path, caption)` | Send a GIF/animation |
|
||||
| `send_image_file(chat_id, path, caption)` | Send image from local file |
|
||||
|
||||
### Interactive UX (recommended if your platform supports tappable buttons)
|
||||
|
||||
If your platform supports interactive button/menu messages, implement these for a more polished agent experience. They all degrade gracefully to plain text when not overridden:
|
||||
|
||||
| Method | Purpose |
|
||||
|--------|---------|
|
||||
| `send_clarify(chat_id, question, choices, clarify_id, session_key, ...)` | Render the `clarify` tool's multi-choice question as tappable buttons. Pair with inbound dispatch that routes button taps to `tools.clarify_gateway.resolve_gateway_clarify`. |
|
||||
| `send_exec_approval(chat_id, command, session_key, description, ...)` | Render dangerous-command approval as Approve/Deny buttons. Inbound dispatch routes to `tools.approval.resolve_gateway_approval`. |
|
||||
| `send_slash_confirm(chat_id, title, message, session_key, confirm_id, ...)` | Render slash-command confirmations (e.g. `/reload-mcp`) as Once/Always/Cancel buttons. Inbound dispatch routes to `tools.slash_confirm.resolve`. |
|
||||
| `send_model_picker(...)` | Interactive `/model` picker. Used by Telegram and Discord. |
|
||||
|
||||
See `gateway/platforms/telegram.py`, `discord.py`, and `whatsapp_cloud.py` for reference implementations. The button-callback id convention (`cl:<id>:<idx>`, `appr:<id>:<choice>`, `sc:<choice>:<id>`) is shared across adapters — match it so the gateway-side resolvers work without modification.
|
||||
|
||||
### Required function
|
||||
|
||||
```python
|
||||
|
||||
@@ -2706,8 +2706,13 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
Discord's TYPING_START gateway event is unreliable in DMs for bots.
|
||||
Instead, start a background loop that hits the typing endpoint every
|
||||
8 seconds (typing indicator lasts ~10s). The loop is cancelled when
|
||||
12 seconds (typing indicator lasts ~10s). The loop is cancelled when
|
||||
stop_typing() is called (after the response is sent).
|
||||
|
||||
Rate-limit handling: if a 429 is encountered, the loop logs a
|
||||
warning, sleeps for the ``retry_after`` duration (or a sensible
|
||||
default), and continues — it does NOT die on a single rate-limit
|
||||
hit. Only CancelledError (from stop_typing) stops the loop.
|
||||
"""
|
||||
if not self._client:
|
||||
return
|
||||
@@ -2727,9 +2732,22 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
except Exception as e:
|
||||
logger.debug("Discord typing indicator failed for %s: %s", chat_id, e)
|
||||
return
|
||||
await asyncio.sleep(8)
|
||||
# Don't die on 429 — backoff and continue
|
||||
retry_after = self._extract_discord_retry_after(e)
|
||||
if retry_after is not None:
|
||||
logger.warning(
|
||||
"Typing indicator rate-limited for %s; retrying in %.1fs",
|
||||
chat_id, retry_after,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Discord typing indicator failed for %s: %s",
|
||||
chat_id, e,
|
||||
)
|
||||
return
|
||||
await asyncio.sleep(retry_after)
|
||||
continue
|
||||
await asyncio.sleep(12)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
finally:
|
||||
|
||||
@@ -8,12 +8,14 @@ Uses python-telegram-bot library for:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import html as _html
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -4178,6 +4180,23 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return bool(configured)
|
||||
return os.getenv("TELEGRAM_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}
|
||||
|
||||
def _telegram_observe_unmentioned_group_messages(self) -> bool:
|
||||
"""Return whether skipped unmentioned group messages are stored as context.
|
||||
|
||||
When enabled with ``require_mention``, Telegram matches the Yuanbao /
|
||||
OpenClaw-style group UX: observe ordinary group chatter in the session
|
||||
transcript, but only dispatch the agent when the bot is explicitly
|
||||
addressed.
|
||||
"""
|
||||
configured = self.config.extra.get("observe_unmentioned_group_messages")
|
||||
if configured is None:
|
||||
configured = self.config.extra.get("ingest_unmentioned_group_messages")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() in {"true", "1", "yes", "on"}
|
||||
return bool(configured)
|
||||
return os.getenv("TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES", "false").lower() in {"true", "1", "yes", "on"}
|
||||
|
||||
def _telegram_guest_mode(self) -> bool:
|
||||
"""Return whether non-allowlisted groups may trigger via direct @mention."""
|
||||
configured = self.config.extra.get("guest_mode")
|
||||
@@ -4219,6 +4238,30 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
def _telegram_group_allowed_chats(self) -> set[str]:
|
||||
"""Return Telegram chats authorized at group scope."""
|
||||
raw = self.config.extra.get("group_allowed_chats")
|
||||
if raw is None:
|
||||
raw = os.getenv("TELEGRAM_GROUP_ALLOWED_CHATS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
def _telegram_observe_allowed_chats(self) -> set[str]:
|
||||
"""Chats where observed group context may use a shared source.
|
||||
|
||||
``group_allowed_chats`` is the gateway authorization allowlist for
|
||||
user-less group sources. ``allowed_chats`` remains an optional response
|
||||
gate; when set, observed context must satisfy both lists.
|
||||
"""
|
||||
group_allowed = self._telegram_group_allowed_chats()
|
||||
if not group_allowed:
|
||||
return set()
|
||||
response_allowed = self._telegram_allowed_chats()
|
||||
if response_allowed:
|
||||
return group_allowed & response_allowed
|
||||
return group_allowed
|
||||
|
||||
def _telegram_allowed_topics(self) -> set[str]:
|
||||
"""Return the whitelist of Telegram forum topic IDs this bot handles.
|
||||
|
||||
@@ -4466,6 +4509,126 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
cleaned = re.sub(rf"(?i)@{username}\b[,:\-]*\s*", "", text).strip()
|
||||
return cleaned or text
|
||||
|
||||
def _should_observe_unmentioned_group_message(self, message: Message) -> bool:
|
||||
"""Return True when a group message should be stored but not dispatched."""
|
||||
if not self._telegram_observe_unmentioned_group_messages():
|
||||
return False
|
||||
if not self._is_group_chat(message):
|
||||
return False
|
||||
|
||||
thread_id = getattr(message, "message_thread_id", None)
|
||||
allowed_topics = self._telegram_allowed_topics()
|
||||
if allowed_topics:
|
||||
topic_id = str(thread_id) if thread_id is not None else self._GENERAL_TOPIC_THREAD_ID
|
||||
if topic_id not in allowed_topics:
|
||||
return False
|
||||
|
||||
if thread_id is not None:
|
||||
try:
|
||||
if int(thread_id) in self._telegram_ignored_threads():
|
||||
return False
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
|
||||
chat_id_str = str(getattr(getattr(message, "chat", None), "id", ""))
|
||||
if self._telegram_exclusive_bot_mentions() and self._explicit_bot_mentions_exclude_self(message):
|
||||
return False
|
||||
|
||||
allowed = self._telegram_observe_allowed_chats()
|
||||
# Observed context is shared at chat/topic scope so a later trigger from
|
||||
# another user can see it. Require an explicit chat allowlist; that
|
||||
# keeps shared observed history limited to operator-approved groups and
|
||||
# lets gateway authorization pass even after the shared session source
|
||||
# drops the per-sender user_id.
|
||||
if not allowed or chat_id_str not in allowed:
|
||||
return False
|
||||
|
||||
# Only observe messages skipped by the require_mention gate. If the
|
||||
# message would be processed normally, let the dispatcher handle it;
|
||||
# if require_mention is disabled, every group message is a request.
|
||||
if chat_id_str in self._telegram_free_response_chats():
|
||||
return False
|
||||
if not self._telegram_require_mention():
|
||||
return False
|
||||
if self._is_reply_to_bot(message):
|
||||
return False
|
||||
if self._message_mentions_bot(message):
|
||||
return False
|
||||
if self._message_matches_mention_patterns(message):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _telegram_group_observe_shared_source(self, source):
|
||||
"""Return a chat/topic-scoped source for observed Telegram group context."""
|
||||
return dataclasses.replace(source, user_id=None, user_name=None, user_id_alt=None)
|
||||
|
||||
def _telegram_group_observe_attributed_text(self, event: MessageEvent) -> str:
|
||||
user_id = event.source.user_id or "unknown"
|
||||
sender = event.source.user_name or user_id
|
||||
return f"[{sender}|{user_id}]\n{event.text or ''}"
|
||||
|
||||
def _telegram_group_observe_channel_prompt(self) -> str:
|
||||
username = getattr(getattr(self, "_bot", None), "username", None) or "unknown"
|
||||
bot_id = getattr(getattr(self, "_bot", None), "id", None) or "unknown"
|
||||
return (
|
||||
"You are handling a Telegram group chat message.\n"
|
||||
f"- Your identity: user_id={bot_id}, @-mention name in this group=@{username}\n"
|
||||
"- Lines in history prefixed with `[nickname|user_id]` are observed Telegram group context "
|
||||
"and are not necessarily addressed to you.\n"
|
||||
"- Treat only the current new message as a request explicitly directed at you, "
|
||||
"and answer it directly."
|
||||
)
|
||||
|
||||
def _apply_telegram_group_observe_attribution(self, event: MessageEvent) -> MessageEvent:
|
||||
"""Align triggered group turns with observed-history attribution."""
|
||||
if not self._telegram_observe_unmentioned_group_messages():
|
||||
return event
|
||||
raw_message = getattr(event, "raw_message", None)
|
||||
if not raw_message or not self._is_group_chat(raw_message):
|
||||
return event
|
||||
chat_id_str = str(getattr(getattr(raw_message, "chat", None), "id", ""))
|
||||
allowed = self._telegram_observe_allowed_chats()
|
||||
if not allowed or chat_id_str not in allowed:
|
||||
return event
|
||||
shared_source = self._telegram_group_observe_shared_source(event.source)
|
||||
observe_prompt = self._telegram_group_observe_channel_prompt()
|
||||
channel_prompt = f"{event.channel_prompt}\n\n{observe_prompt}" if event.channel_prompt else observe_prompt
|
||||
return dataclasses.replace(
|
||||
event,
|
||||
text=self._telegram_group_observe_attributed_text(event),
|
||||
source=shared_source,
|
||||
channel_prompt=channel_prompt,
|
||||
)
|
||||
|
||||
def _observe_unmentioned_group_message(self, message: Message, msg_type: MessageType, update_id: Optional[int] = None) -> None:
|
||||
"""Append skipped group chatter to the target session without dispatching."""
|
||||
store = getattr(self, "_session_store", None)
|
||||
if not store:
|
||||
return
|
||||
try:
|
||||
event = self._build_message_event(message, msg_type, update_id=update_id)
|
||||
shared_source = self._telegram_group_observe_shared_source(event.source)
|
||||
session_entry = store.get_or_create_session(shared_source)
|
||||
entry = {
|
||||
"role": "user",
|
||||
"content": self._telegram_group_observe_attributed_text(event),
|
||||
"timestamp": datetime.now(tz=timezone.utc).isoformat(),
|
||||
"observed": True,
|
||||
}
|
||||
if event.message_id:
|
||||
entry["message_id"] = str(event.message_id)
|
||||
store.append_to_transcript(session_entry.session_id, entry)
|
||||
adapter_name = getattr(self, "name", "telegram")
|
||||
logger.info(
|
||||
"[%s] Telegram group message observed (no bot trigger): chat=%s from=%s",
|
||||
adapter_name,
|
||||
getattr(getattr(message, "chat", None), "id", "unknown"),
|
||||
event.source.user_id or "unknown",
|
||||
)
|
||||
except Exception as exc:
|
||||
adapter_name = getattr(self, "name", "telegram")
|
||||
logger.warning("[%s] Failed to observe Telegram group message: %s", adapter_name, exc)
|
||||
|
||||
def _should_process_message(self, message: Message, *, is_command: bool = False) -> bool:
|
||||
"""Apply Telegram group trigger rules.
|
||||
|
||||
@@ -4590,11 +4753,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
if not msg or not msg.text:
|
||||
return
|
||||
if not self._should_process_message(msg):
|
||||
if self._should_observe_unmentioned_group_message(msg):
|
||||
self._observe_unmentioned_group_message(msg, MessageType.TEXT, update_id=update.update_id)
|
||||
return
|
||||
await self._ensure_forum_commands(update.message)
|
||||
|
||||
event = self._build_message_event(msg, MessageType.TEXT, update_id=update.update_id)
|
||||
event.text = self._clean_bot_trigger_text(event.text)
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
self._enqueue_text_event(event)
|
||||
|
||||
async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
@@ -4607,6 +4773,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
await self._ensure_forum_commands(msg)
|
||||
|
||||
event = self._build_message_event(msg, MessageType.COMMAND, update_id=update.update_id)
|
||||
event.text = self._clean_bot_trigger_text(event.text)
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
@@ -4615,6 +4783,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
if not msg:
|
||||
return
|
||||
if not self._should_process_message(msg):
|
||||
if self._should_observe_unmentioned_group_message(msg):
|
||||
self._observe_unmentioned_group_message(msg, MessageType.LOCATION, update_id=update.update_id)
|
||||
return
|
||||
|
||||
venue = getattr(msg, "venue", None)
|
||||
@@ -4644,6 +4814,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
|
||||
event.text = "\n".join(parts)
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
await self.handle_message(event)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -4788,8 +4959,23 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
if not update.message:
|
||||
return
|
||||
if not self._should_process_message(update.message):
|
||||
if self._should_observe_unmentioned_group_message(update.message):
|
||||
_m = update.message
|
||||
if _m.sticker:
|
||||
_observe_type = MessageType.STICKER
|
||||
elif _m.photo:
|
||||
_observe_type = MessageType.PHOTO
|
||||
elif _m.video:
|
||||
_observe_type = MessageType.VIDEO
|
||||
elif _m.audio:
|
||||
_observe_type = MessageType.AUDIO
|
||||
elif _m.voice:
|
||||
_observe_type = MessageType.VOICE
|
||||
else:
|
||||
_observe_type = MessageType.DOCUMENT
|
||||
self._observe_unmentioned_group_message(_m, _observe_type, update_id=update.update_id)
|
||||
return
|
||||
|
||||
|
||||
msg = update.message
|
||||
|
||||
# Determine media type
|
||||
@@ -4817,9 +5003,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
# Handle stickers: describe via vision tool with caching
|
||||
if msg.sticker:
|
||||
await self._handle_sticker(msg, event)
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
|
||||
# Apply observe attribution after caption is set; sticker is handled above
|
||||
# because _handle_sticker overwrites event.text with its vision description.
|
||||
event = self._apply_telegram_group_observe_attribution(event)
|
||||
|
||||
# Download photo to local image cache so the vision tool can access it
|
||||
# even after Telegram's ephemeral file URLs expire (~1 hour).
|
||||
if msg.photo:
|
||||
|
||||
@@ -16,11 +16,9 @@ with different backends via a bridge pattern.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
@@ -180,6 +178,7 @@ import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.whatsapp_common import WhatsAppBehaviorMixin
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
@@ -215,7 +214,7 @@ def check_whatsapp_requirements() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
class WhatsAppAdapter(BasePlatformAdapter):
|
||||
class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
|
||||
"""
|
||||
WhatsApp adapter.
|
||||
|
||||
@@ -237,13 +236,12 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
- allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
|
||||
- group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
|
||||
- group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
|
||||
|
||||
Behavior (gating, mention parsing, markdown conversion, chunking) is
|
||||
provided by ``WhatsAppBehaviorMixin`` so the Cloud API adapter can
|
||||
share it. Only transport-specific code lives here.
|
||||
"""
|
||||
|
||||
# WhatsApp message limits — practical UX limit, not protocol max.
|
||||
# WhatsApp allows ~65K but long messages are unreadable on mobile.
|
||||
MAX_MESSAGE_LENGTH = 4096
|
||||
DEFAULT_REPLY_PREFIX = "⚕ *Hermes Agent*\n────────────\n"
|
||||
|
||||
|
||||
# Default bridge location relative to the hermes-agent install
|
||||
_DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge"
|
||||
|
||||
@@ -278,213 +276,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
# notification before the normal "✓ whatsapp disconnected" fires.
|
||||
self._shutting_down: bool = False
|
||||
|
||||
def _effective_reply_prefix(self) -> str:
|
||||
"""Return the prefix the Node bridge will add in self-chat mode."""
|
||||
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
|
||||
if whatsapp_mode != "self-chat":
|
||||
return ""
|
||||
if self._reply_prefix is not None:
|
||||
return self._reply_prefix.replace("\\n", "\n")
|
||||
env_prefix = os.getenv("WHATSAPP_REPLY_PREFIX")
|
||||
if env_prefix is not None:
|
||||
return env_prefix.replace("\\n", "\n")
|
||||
return self.DEFAULT_REPLY_PREFIX
|
||||
|
||||
def _outgoing_chunk_limit(self) -> int:
|
||||
"""Reserve room for the bridge-side prefix so final WhatsApp text fits."""
|
||||
prefix_len = len(self._effective_reply_prefix())
|
||||
# Keep enough space for truncate_message's pagination indicator and
|
||||
# code-fence repair even if a user configures a very long prefix.
|
||||
return max(1024, self.MAX_MESSAGE_LENGTH - prefix_len)
|
||||
|
||||
def _whatsapp_require_mention(self) -> bool:
|
||||
configured = self.config.extra.get("require_mention")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() in {"true", "1", "yes", "on"}
|
||||
return bool(configured)
|
||||
return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}
|
||||
|
||||
def _whatsapp_free_response_chats(self) -> set[str]:
|
||||
raw = self.config.extra.get("free_response_chats")
|
||||
if raw is None:
|
||||
raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
@staticmethod
|
||||
def _coerce_allow_list(raw) -> set[str]:
|
||||
"""Parse allow_from / group_allow_from from config or env var."""
|
||||
if raw is None:
|
||||
return set()
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
@staticmethod
|
||||
def _is_broadcast_chat(chat_id: str) -> bool:
|
||||
"""True for WhatsApp pseudo-chats that aren't real conversations.
|
||||
|
||||
Covers Status updates (Stories) and Channel/Newsletter broadcasts.
|
||||
These show up as inbound messages on Baileys but the agent should
|
||||
never reply — answering a Story update spams the contact's status
|
||||
feed, and Channel posts aren't addressable in the first place.
|
||||
"""
|
||||
if not chat_id:
|
||||
return False
|
||||
cid = chat_id.strip().lower()
|
||||
if cid == "status@broadcast":
|
||||
return True
|
||||
# @broadcast suffix covers status@broadcast plus any future
|
||||
# broadcast-list variants. @newsletter is the Channel JID suffix.
|
||||
if cid.endswith("@broadcast") or cid.endswith("@newsletter"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_dm_allowed(self, sender_id: str) -> bool:
|
||||
"""Check whether a DM from the given sender should be processed."""
|
||||
if self._dm_policy == "disabled":
|
||||
return False
|
||||
if self._dm_policy == "allowlist":
|
||||
return sender_id in self._allow_from
|
||||
# "open" — all DMs allowed
|
||||
return True
|
||||
|
||||
def _is_group_allowed(self, chat_id: str) -> bool:
|
||||
"""Check whether a group chat should be processed."""
|
||||
if self._group_policy == "disabled":
|
||||
return False
|
||||
if self._group_policy == "allowlist":
|
||||
return chat_id in self._group_allow_from
|
||||
# "open" — all groups allowed
|
||||
return True
|
||||
|
||||
def _compile_mention_patterns(self):
|
||||
patterns = self.config.extra.get("mention_patterns")
|
||||
if patterns is None:
|
||||
raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip()
|
||||
if raw:
|
||||
try:
|
||||
patterns = json.loads(raw)
|
||||
except Exception:
|
||||
patterns = [part.strip() for part in raw.splitlines() if part.strip()]
|
||||
if not patterns:
|
||||
patterns = [part.strip() for part in raw.split(",") if part.strip()]
|
||||
if patterns is None:
|
||||
return []
|
||||
if isinstance(patterns, str):
|
||||
patterns = [patterns]
|
||||
if not isinstance(patterns, list):
|
||||
logger.warning("[%s] whatsapp mention_patterns must be a list or string; got %s", self.name, type(patterns).__name__)
|
||||
return []
|
||||
|
||||
compiled = []
|
||||
for pattern in patterns:
|
||||
if not isinstance(pattern, str) or not pattern.strip():
|
||||
continue
|
||||
try:
|
||||
compiled.append(re.compile(pattern, re.IGNORECASE))
|
||||
except re.error as exc:
|
||||
logger.warning("[%s] Invalid WhatsApp mention pattern %r: %s", self.name, pattern, exc)
|
||||
if compiled:
|
||||
logger.info("[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled))
|
||||
return compiled
|
||||
|
||||
@staticmethod
|
||||
def _normalize_whatsapp_id(value: Optional[str]) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
normalized = str(value).strip()
|
||||
if ":" in normalized and "@" in normalized:
|
||||
normalized = normalized.replace(":", "@", 1)
|
||||
return normalized
|
||||
|
||||
def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]:
|
||||
bot_ids = set()
|
||||
for candidate in data.get("botIds") or []:
|
||||
normalized = self._normalize_whatsapp_id(candidate)
|
||||
if normalized:
|
||||
bot_ids.add(normalized)
|
||||
return bot_ids
|
||||
|
||||
def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool:
|
||||
quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant"))
|
||||
if not quoted_participant:
|
||||
return False
|
||||
return quoted_participant in self._bot_ids_from_message(data)
|
||||
|
||||
def _message_mentions_bot(self, data: Dict[str, Any]) -> bool:
|
||||
bot_ids = self._bot_ids_from_message(data)
|
||||
if not bot_ids:
|
||||
return False
|
||||
mentioned_ids = {
|
||||
nid
|
||||
for candidate in (data.get("mentionedIds") or [])
|
||||
if (nid := self._normalize_whatsapp_id(candidate))
|
||||
}
|
||||
if mentioned_ids & bot_ids:
|
||||
return True
|
||||
|
||||
body = str(data.get("body") or "")
|
||||
lower_body = body.lower()
|
||||
for bot_id in bot_ids:
|
||||
bare_id = bot_id.split("@", 1)[0].lower()
|
||||
if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool:
|
||||
if not self._mention_patterns:
|
||||
return False
|
||||
body = str(data.get("body") or "")
|
||||
return any(pattern.search(body) for pattern in self._mention_patterns)
|
||||
|
||||
def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str:
|
||||
if not text:
|
||||
return text
|
||||
bot_ids = self._bot_ids_from_message(data)
|
||||
cleaned = text
|
||||
for bot_id in bot_ids:
|
||||
bare_id = bot_id.split("@", 1)[0]
|
||||
if bare_id:
|
||||
cleaned = re.sub(rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned)
|
||||
return cleaned.strip() or text
|
||||
|
||||
def _should_process_message(self, data: Dict[str, Any]) -> bool:
|
||||
chat_id_raw = str(data.get("chatId") or "")
|
||||
# WhatsApp uses pseudo-chats for Status updates (Stories) and
|
||||
# Channel/Newsletter broadcasts. These are not real conversations
|
||||
# and the agent should never reply to them — even in self-chat mode
|
||||
# where the bridge may surface them as "fromMe" events.
|
||||
if self._is_broadcast_chat(chat_id_raw):
|
||||
return False
|
||||
is_group = data.get("isGroup", False)
|
||||
if is_group:
|
||||
chat_id = chat_id_raw
|
||||
if not self._is_group_allowed(chat_id):
|
||||
return False
|
||||
else:
|
||||
sender_id = str(data.get("senderId") or data.get("from") or "")
|
||||
if not self._is_dm_allowed(sender_id):
|
||||
return False
|
||||
# DMs that pass the policy gate are always processed
|
||||
return True
|
||||
# Group messages: check mention / free-response settings
|
||||
chat_id = str(data.get("chatId") or "")
|
||||
if chat_id in self._whatsapp_free_response_chats():
|
||||
return True
|
||||
if not self._whatsapp_require_mention():
|
||||
return True
|
||||
body = str(data.get("body") or "").strip()
|
||||
if body.startswith("/"):
|
||||
return True
|
||||
if self._message_is_reply_to_bot(data):
|
||||
return True
|
||||
if self._message_mentions_bot(data):
|
||||
return True
|
||||
return self._message_matches_mention_patterns(data)
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""
|
||||
Start the WhatsApp bridge.
|
||||
@@ -808,63 +599,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
self._close_bridge_log()
|
||||
print(f"[{self.name}] Disconnected")
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Convert standard markdown to WhatsApp-compatible formatting.
|
||||
|
||||
WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```,
|
||||
and monospaced `inline`. Standard markdown uses different syntax
|
||||
for bold/italic/strikethrough, so we convert here.
|
||||
|
||||
Code blocks (``` fenced) and inline code (`) are protected from
|
||||
conversion via placeholder substitution.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
# --- 1. Protect fenced code blocks from formatting changes ---
|
||||
_FENCE_PH = "\x00FENCE"
|
||||
fences: list[str] = []
|
||||
|
||||
def _save_fence(m: re.Match) -> str:
|
||||
fences.append(m.group(0))
|
||||
return f"{_FENCE_PH}{len(fences) - 1}\x00"
|
||||
|
||||
result = re.sub(r"```[\s\S]*?```", _save_fence, content)
|
||||
|
||||
# --- 2. Protect inline code ---
|
||||
_CODE_PH = "\x00CODE"
|
||||
codes: list[str] = []
|
||||
|
||||
def _save_code(m: re.Match) -> str:
|
||||
codes.append(m.group(0))
|
||||
return f"{_CODE_PH}{len(codes) - 1}\x00"
|
||||
|
||||
result = re.sub(r"`[^`\n]+`", _save_code, result)
|
||||
|
||||
# --- 3. Convert markdown formatting to WhatsApp syntax ---
|
||||
# Bold: **text** or __text__ → *text*
|
||||
result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result)
|
||||
result = re.sub(r"__(.+?)__", r"*\1*", result)
|
||||
# Strikethrough: ~~text~~ → ~text~
|
||||
result = re.sub(r"~~(.+?)~~", r"~\1~", result)
|
||||
# Italic: *text* is already WhatsApp italic — leave as-is
|
||||
# _text_ is already WhatsApp italic — leave as-is
|
||||
|
||||
# --- 4. Convert markdown headers to bold text ---
|
||||
# # Header → *Header*
|
||||
result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE)
|
||||
|
||||
# --- 5. Convert markdown links: [text](url) → text (url) ---
|
||||
result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result)
|
||||
|
||||
# --- 6. Restore protected sections ---
|
||||
for i, fence in enumerate(fences):
|
||||
result = result.replace(f"{_FENCE_PH}{i}\x00", fence)
|
||||
for i, code in enumerate(codes):
|
||||
result = result.replace(f"{_CODE_PH}{i}\x00", code)
|
||||
|
||||
return result
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,351 @@
|
||||
"""
|
||||
Transport-agnostic WhatsApp behavior shared by the Baileys bridge adapter
|
||||
and the official WhatsApp Cloud API adapter.
|
||||
|
||||
The mixin provides:
|
||||
- Allow-list / DM / group gating
|
||||
- Mention detection (explicit @-mentions + configurable regex patterns)
|
||||
- Quoted-reply-to-bot detection
|
||||
- Broadcast / Channel / Newsletter filtering
|
||||
- WhatsApp-flavored markdown conversion
|
||||
- Outgoing chunk length budgeting
|
||||
|
||||
It is the *behavior layer*. Transport-specific concerns (subprocess management,
|
||||
HTTP webhooks, Graph API calls, media upload protocols) live in each adapter.
|
||||
|
||||
Mixin contract — the adapter must set these on ``self`` before any of the
|
||||
mixin's methods are called (typically in ``__init__``):
|
||||
|
||||
self.config # gateway.config.PlatformConfig
|
||||
self.name # str — adapter name (used in log lines)
|
||||
self._dm_policy # str: "open" | "allowlist" | "disabled"
|
||||
self._allow_from # set[str]
|
||||
self._group_policy # str: "open" | "allowlist" | "disabled"
|
||||
self._group_allow_from # set[str]
|
||||
self._mention_patterns # list[re.Pattern]
|
||||
self._reply_prefix # Optional[str]
|
||||
|
||||
Class attributes ``MAX_MESSAGE_LENGTH`` and ``DEFAULT_REPLY_PREFIX`` are
|
||||
defined on the mixin and may be overridden per-adapter if needed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WhatsAppBehaviorMixin:
|
||||
"""Shared behavior for all WhatsApp adapters (Baileys + Cloud API).
|
||||
|
||||
See module docstring for the attribute contract the host adapter must
|
||||
satisfy. This mixin owns no state of its own — every value it touches
|
||||
is either a class attribute or set by the adapter's ``__init__``.
|
||||
"""
|
||||
|
||||
# WhatsApp message limits — practical UX limit, not protocol max.
|
||||
# WhatsApp allows ~65K but long messages are unreadable on mobile.
|
||||
MAX_MESSAGE_LENGTH: int = 4096
|
||||
|
||||
DEFAULT_REPLY_PREFIX: str = "⚕ *Hermes Agent*\n────────────\n"
|
||||
|
||||
# ------------------------------------------------------------------ config
|
||||
def _effective_reply_prefix(self) -> str:
|
||||
"""Return the prefix to add to outgoing replies in self-chat mode.
|
||||
|
||||
Subclasses that don't have a self-chat concept (the Cloud API
|
||||
adapter) can override this to always return ``""`` or apply a
|
||||
different policy.
|
||||
"""
|
||||
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
|
||||
if whatsapp_mode != "self-chat":
|
||||
return ""
|
||||
if self._reply_prefix is not None:
|
||||
return self._reply_prefix.replace("\\n", "\n")
|
||||
env_prefix = os.getenv("WHATSAPP_REPLY_PREFIX")
|
||||
if env_prefix is not None:
|
||||
return env_prefix.replace("\\n", "\n")
|
||||
return self.DEFAULT_REPLY_PREFIX
|
||||
|
||||
def _outgoing_chunk_limit(self) -> int:
|
||||
"""Reserve room for the reply prefix so the final message fits."""
|
||||
prefix_len = len(self._effective_reply_prefix())
|
||||
# Keep enough space for truncate_message's pagination indicator and
|
||||
# code-fence repair even if a user configures a very long prefix.
|
||||
return max(1024, self.MAX_MESSAGE_LENGTH - prefix_len)
|
||||
|
||||
def _whatsapp_require_mention(self) -> bool:
|
||||
configured = self.config.extra.get("require_mention")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() in {"true", "1", "yes", "on"}
|
||||
return bool(configured)
|
||||
return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in {
|
||||
"true",
|
||||
"1",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
def _whatsapp_free_response_chats(self) -> set[str]:
|
||||
raw = self.config.extra.get("free_response_chats")
|
||||
if raw is None:
|
||||
raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
@staticmethod
|
||||
def _coerce_allow_list(raw) -> set[str]:
|
||||
"""Parse allow_from / group_allow_from from config or env var."""
|
||||
if raw is None:
|
||||
return set()
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
# ------------------------------------------------------------------ JID helpers
|
||||
@staticmethod
|
||||
def _normalize_whatsapp_id(value: Optional[str]) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
normalized = str(value).strip()
|
||||
if ":" in normalized and "@" in normalized:
|
||||
normalized = normalized.replace(":", "@", 1)
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _is_broadcast_chat(chat_id: str) -> bool:
|
||||
"""True for WhatsApp pseudo-chats that aren't real conversations.
|
||||
|
||||
Covers Status updates (Stories) and Channel/Newsletter broadcasts.
|
||||
These show up as inbound messages on Baileys but the agent should
|
||||
never reply — answering a Story update spams the contact's status
|
||||
feed, and Channel posts aren't addressable in the first place.
|
||||
"""
|
||||
if not chat_id:
|
||||
return False
|
||||
cid = chat_id.strip().lower()
|
||||
if cid == "status@broadcast":
|
||||
return True
|
||||
# @broadcast suffix covers status@broadcast plus any future
|
||||
# broadcast-list variants. @newsletter is the Channel JID suffix.
|
||||
if cid.endswith("@broadcast") or cid.endswith("@newsletter"):
|
||||
return True
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------ gating
|
||||
def _is_dm_allowed(self, sender_id: str) -> bool:
|
||||
"""Check whether a DM from the given sender should be processed."""
|
||||
if self._dm_policy == "disabled":
|
||||
return False
|
||||
if self._dm_policy == "allowlist":
|
||||
return sender_id in self._allow_from
|
||||
# "open" — all DMs allowed
|
||||
return True
|
||||
|
||||
def _is_group_allowed(self, chat_id: str) -> bool:
|
||||
"""Check whether a group chat should be processed."""
|
||||
if self._group_policy == "disabled":
|
||||
return False
|
||||
if self._group_policy == "allowlist":
|
||||
return chat_id in self._group_allow_from
|
||||
# "open" — all groups allowed
|
||||
return True
|
||||
|
||||
def _compile_mention_patterns(self):
|
||||
patterns = self.config.extra.get("mention_patterns")
|
||||
if patterns is None:
|
||||
raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip()
|
||||
if raw:
|
||||
try:
|
||||
patterns = json.loads(raw)
|
||||
except Exception:
|
||||
patterns = [
|
||||
part.strip() for part in raw.splitlines() if part.strip()
|
||||
]
|
||||
if not patterns:
|
||||
patterns = [
|
||||
part.strip() for part in raw.split(",") if part.strip()
|
||||
]
|
||||
if patterns is None:
|
||||
return []
|
||||
if isinstance(patterns, str):
|
||||
patterns = [patterns]
|
||||
if not isinstance(patterns, list):
|
||||
logger.warning(
|
||||
"[%s] whatsapp mention_patterns must be a list or string; got %s",
|
||||
self.name,
|
||||
type(patterns).__name__,
|
||||
)
|
||||
return []
|
||||
|
||||
compiled = []
|
||||
for pattern in patterns:
|
||||
if not isinstance(pattern, str) or not pattern.strip():
|
||||
continue
|
||||
try:
|
||||
compiled.append(re.compile(pattern, re.IGNORECASE))
|
||||
except re.error as exc:
|
||||
logger.warning(
|
||||
"[%s] Invalid WhatsApp mention pattern %r: %s",
|
||||
self.name,
|
||||
pattern,
|
||||
exc,
|
||||
)
|
||||
if compiled:
|
||||
logger.info(
|
||||
"[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled)
|
||||
)
|
||||
return compiled
|
||||
|
||||
def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]:
|
||||
bot_ids = set()
|
||||
for candidate in data.get("botIds") or []:
|
||||
normalized = self._normalize_whatsapp_id(candidate)
|
||||
if normalized:
|
||||
bot_ids.add(normalized)
|
||||
return bot_ids
|
||||
|
||||
def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool:
|
||||
quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant"))
|
||||
if not quoted_participant:
|
||||
return False
|
||||
return quoted_participant in self._bot_ids_from_message(data)
|
||||
|
||||
def _message_mentions_bot(self, data: Dict[str, Any]) -> bool:
|
||||
bot_ids = self._bot_ids_from_message(data)
|
||||
if not bot_ids:
|
||||
return False
|
||||
mentioned_ids = {
|
||||
nid
|
||||
for candidate in (data.get("mentionedIds") or [])
|
||||
if (nid := self._normalize_whatsapp_id(candidate))
|
||||
}
|
||||
if mentioned_ids & bot_ids:
|
||||
return True
|
||||
|
||||
body = str(data.get("body") or "")
|
||||
lower_body = body.lower()
|
||||
for bot_id in bot_ids:
|
||||
bare_id = bot_id.split("@", 1)[0].lower()
|
||||
if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool:
|
||||
if not self._mention_patterns:
|
||||
return False
|
||||
body = str(data.get("body") or "")
|
||||
return any(pattern.search(body) for pattern in self._mention_patterns)
|
||||
|
||||
def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str:
|
||||
if not text:
|
||||
return text
|
||||
bot_ids = self._bot_ids_from_message(data)
|
||||
cleaned = text
|
||||
for bot_id in bot_ids:
|
||||
bare_id = bot_id.split("@", 1)[0]
|
||||
if bare_id:
|
||||
cleaned = re.sub(
|
||||
rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned
|
||||
)
|
||||
return cleaned.strip() or text
|
||||
|
||||
def _should_process_message(self, data: Dict[str, Any]) -> bool:
|
||||
chat_id_raw = str(data.get("chatId") or "")
|
||||
# WhatsApp uses pseudo-chats for Status updates (Stories) and
|
||||
# Channel/Newsletter broadcasts. These are not real conversations
|
||||
# and the agent should never reply to them — even in self-chat mode
|
||||
# where the bridge may surface them as "fromMe" events.
|
||||
if self._is_broadcast_chat(chat_id_raw):
|
||||
return False
|
||||
is_group = data.get("isGroup", False)
|
||||
if is_group:
|
||||
chat_id = chat_id_raw
|
||||
if not self._is_group_allowed(chat_id):
|
||||
return False
|
||||
else:
|
||||
sender_id = str(data.get("senderId") or data.get("from") or "")
|
||||
if not self._is_dm_allowed(sender_id):
|
||||
return False
|
||||
# DMs that pass the policy gate are always processed
|
||||
return True
|
||||
# Group messages: check mention / free-response settings
|
||||
chat_id = str(data.get("chatId") or "")
|
||||
if chat_id in self._whatsapp_free_response_chats():
|
||||
return True
|
||||
if not self._whatsapp_require_mention():
|
||||
return True
|
||||
body = str(data.get("body") or "").strip()
|
||||
if body.startswith("/"):
|
||||
return True
|
||||
if self._message_is_reply_to_bot(data):
|
||||
return True
|
||||
if self._message_mentions_bot(data):
|
||||
return True
|
||||
return self._message_matches_mention_patterns(data)
|
||||
|
||||
# ------------------------------------------------------------------ formatting
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Convert standard markdown to WhatsApp-compatible formatting.
|
||||
|
||||
WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```,
|
||||
and monospaced `inline`. Standard markdown uses different syntax
|
||||
for bold/italic/strikethrough, so we convert here.
|
||||
|
||||
Code blocks (``` fenced) and inline code (`) are protected from
|
||||
conversion via placeholder substitution.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
# --- 1. Protect fenced code blocks from formatting changes ---
|
||||
_FENCE_PH = "\x00FENCE"
|
||||
fences: list[str] = []
|
||||
|
||||
def _save_fence(m: re.Match) -> str:
|
||||
fences.append(m.group(0))
|
||||
return f"{_FENCE_PH}{len(fences) - 1}\x00"
|
||||
|
||||
result = re.sub(r"```[\s\S]*?```", _save_fence, content)
|
||||
|
||||
# --- 2. Protect inline code ---
|
||||
_CODE_PH = "\x00CODE"
|
||||
codes: list[str] = []
|
||||
|
||||
def _save_code(m: re.Match) -> str:
|
||||
codes.append(m.group(0))
|
||||
return f"{_CODE_PH}{len(codes) - 1}\x00"
|
||||
|
||||
result = re.sub(r"`[^`\n]+`", _save_code, result)
|
||||
|
||||
# --- 3. Convert markdown formatting to WhatsApp syntax ---
|
||||
# Bold: **text** or __text__ → *text*
|
||||
result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result)
|
||||
result = re.sub(r"__(.+?)__", r"*\1*", result)
|
||||
# Strikethrough: ~~text~~ → ~text~
|
||||
result = re.sub(r"~~(.+?)~~", r"~\1~", result)
|
||||
# Italic: *text* is already WhatsApp italic — leave as-is
|
||||
# _text_ is already WhatsApp italic — leave as-is
|
||||
|
||||
# --- 4. Convert markdown headers to bold text ---
|
||||
# # Header → *Header*
|
||||
result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE)
|
||||
|
||||
# --- 5. Convert markdown links: [text](url) → text (url) ---
|
||||
result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result)
|
||||
|
||||
# --- 6. Restore protected sections ---
|
||||
for i, fence in enumerate(fences):
|
||||
result = result.replace(f"{_FENCE_PH}{i}\x00", fence)
|
||||
for i, code in enumerate(codes):
|
||||
result = result.replace(f"{_CODE_PH}{i}\x00", code)
|
||||
|
||||
return result
|
||||
@@ -1410,41 +1410,43 @@ class RecallGuardMiddleware(InboundMiddleware):
|
||||
logger.warning("[%s] Recall: failed to resolve session: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
# Read JSONL directly — SQLite doesn't preserve message_id field.
|
||||
transcript: list = []
|
||||
# Load transcript from canonical store (state.db). Since PR #29278
|
||||
# added a ``platform_message_id`` column to the messages table and
|
||||
# ``append_to_transcript`` wires the incoming dict's ``message_id``
|
||||
# into it, ``load_transcript`` returns rows with ``message_id`` set
|
||||
# for any message that was observed with one — Branch A1 (exact id
|
||||
# match) is the canonical path again.
|
||||
try:
|
||||
path = store.get_transcript_path(sid)
|
||||
if path.exists():
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
try:
|
||||
transcript.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
transcript = store.load_transcript(sid)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Recall: failed to load transcript: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
# Branch A: redact — try message_id first, then content fallback.
|
||||
# Observed messages have message_id; agent-processed @bot messages
|
||||
# only have content (run.py doesn't write message_id to transcript).
|
||||
# Branch A1: exact platform message_id match. Authoritative when the
|
||||
# row was persisted with a platform_message_id (observed group
|
||||
# messages and any inbound message whose adapter carried a msg_id).
|
||||
target = None
|
||||
branch_label = ""
|
||||
for entry in transcript:
|
||||
if entry.get("message_id") == recalled_id:
|
||||
target = entry
|
||||
branch_label = "branch A1: id match"
|
||||
break
|
||||
# Branch A2: content-match fallback for messages that lack an exact
|
||||
# platform id on the row — e.g. agent-processed @bot messages
|
||||
# (run.py doesn't carry msg_id through) or older rows persisted
|
||||
# before the platform_message_id column existed.
|
||||
if target is None and recalled_content:
|
||||
for entry in transcript:
|
||||
if entry.get("role") == "user" and entry.get("content") == recalled_content:
|
||||
target = entry
|
||||
branch_label = "branch A2: content match"
|
||||
break
|
||||
if target is not None:
|
||||
target["content"] = cls._REDACTED
|
||||
try:
|
||||
store.rewrite_transcript(sid, transcript)
|
||||
logger.info("[%s] Recall: redacted msg_id=%s (branch A)", adapter.name, recalled_id)
|
||||
logger.info("[%s] Recall: redacted msg_id=%s (%s)", adapter.name, recalled_id, branch_label)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Recall: rewrite_transcript failed: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
+22
-4
@@ -1109,7 +1109,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
normalized = command_name.lower().replace("_", "-")
|
||||
try:
|
||||
from tools.skills_tool import _get_disabled_skill_names
|
||||
from agent.skill_utils import get_all_skills_dirs
|
||||
from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
|
||||
disabled = _get_disabled_skill_names()
|
||||
|
||||
# Check disabled skills across all dirs (local + external)
|
||||
@@ -1117,7 +1117,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
if not skills_dir.exists():
|
||||
continue
|
||||
for skill_md in skills_dir.rglob("SKILL.md"):
|
||||
if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
slug, declared_name = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug or not declared_name:
|
||||
@@ -1136,6 +1136,8 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
|
||||
if optional_dir.exists():
|
||||
for skill_md in optional_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
slug, _declared = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug:
|
||||
continue
|
||||
@@ -3676,7 +3678,8 @@ class GatewayRunner:
|
||||
# Warn if no user allowlists are configured and open access is not opted in
|
||||
_builtin_allowed_vars = (
|
||||
"TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
|
||||
"WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
|
||||
"WHATSAPP_ALLOWED_USERS", "WHATSAPP_CLOUD_ALLOWED_USERS",
|
||||
"SLACK_ALLOWED_USERS",
|
||||
"SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
|
||||
"TELEGRAM_GROUP_ALLOWED_USERS",
|
||||
"TELEGRAM_GROUP_ALLOWED_CHATS",
|
||||
@@ -3694,7 +3697,8 @@ class GatewayRunner:
|
||||
)
|
||||
_builtin_allow_all_vars = (
|
||||
"TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS",
|
||||
"WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS",
|
||||
"WHATSAPP_ALLOW_ALL_USERS", "WHATSAPP_CLOUD_ALLOW_ALL_USERS",
|
||||
"SLACK_ALLOW_ALL_USERS",
|
||||
"SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS",
|
||||
"SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
|
||||
"MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS",
|
||||
@@ -5952,6 +5956,18 @@ class GatewayRunner:
|
||||
logger.warning("WhatsApp: Node.js not installed or bridge not configured")
|
||||
return None
|
||||
return WhatsAppAdapter(config)
|
||||
|
||||
elif platform == Platform.WHATSAPP_CLOUD:
|
||||
from gateway.platforms.whatsapp_cloud import (
|
||||
WhatsAppCloudAdapter,
|
||||
check_whatsapp_cloud_requirements,
|
||||
)
|
||||
if not check_whatsapp_cloud_requirements():
|
||||
logger.warning(
|
||||
"WhatsApp Cloud: aiohttp/httpx missing — reinstall hermes-agent"
|
||||
)
|
||||
return None
|
||||
return WhatsAppCloudAdapter(config)
|
||||
|
||||
elif platform == Platform.SLACK:
|
||||
from gateway.platforms.slack import SlackAdapter, check_slack_requirements
|
||||
@@ -6142,6 +6158,7 @@ class GatewayRunner:
|
||||
Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
|
||||
Platform.DISCORD: "DISCORD_ALLOWED_USERS",
|
||||
Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
|
||||
Platform.WHATSAPP_CLOUD: "WHATSAPP_CLOUD_ALLOWED_USERS",
|
||||
Platform.SLACK: "SLACK_ALLOWED_USERS",
|
||||
Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
|
||||
Platform.EMAIL: "EMAIL_ALLOWED_USERS",
|
||||
@@ -6168,6 +6185,7 @@ class GatewayRunner:
|
||||
Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
|
||||
Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
|
||||
Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
|
||||
Platform.WHATSAPP_CLOUD: "WHATSAPP_CLOUD_ALLOW_ALL_USERS",
|
||||
Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
|
||||
Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
|
||||
Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
|
||||
|
||||
+26
-83
@@ -1248,20 +1248,15 @@ class SessionStore:
|
||||
|
||||
return entries
|
||||
|
||||
def get_transcript_path(self, session_id: str) -> Path:
|
||||
"""Get the path to a session's legacy transcript file."""
|
||||
return self.sessions_dir / f"{session_id}.jsonl"
|
||||
|
||||
def append_to_transcript(self, session_id: str, message: Dict[str, Any], skip_db: bool = False) -> None:
|
||||
"""Append a message to a session's transcript (SQLite + legacy JSONL).
|
||||
"""Append a message to a session's transcript (SQLite).
|
||||
|
||||
Args:
|
||||
skip_db: When True, only write to JSONL and skip the SQLite write.
|
||||
Used when the agent already persisted messages to SQLite
|
||||
via its own _flush_messages_to_session_db(), preventing
|
||||
the duplicate-write bug (#860).
|
||||
skip_db: When True, skip the SQLite write. Used when the agent
|
||||
already persisted messages to SQLite via its own
|
||||
_flush_messages_to_session_db(), preventing the
|
||||
duplicate-write bug (#860).
|
||||
"""
|
||||
# Write to SQLite (unless the agent already handled it)
|
||||
if self._db and not skip_db:
|
||||
try:
|
||||
self._db.append_message(
|
||||
@@ -1276,94 +1271,42 @@ class SessionStore:
|
||||
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
|
||||
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
|
||||
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
|
||||
# Platform-side message id (yuanbao msg_id, telegram update_id, …).
|
||||
# Accept either explicit ``platform_message_id`` or the legacy
|
||||
# ``message_id`` key the JSONL transcript used.
|
||||
platform_message_id=(
|
||||
message.get("platform_message_id") or message.get("message_id")
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
|
||||
# Also write legacy JSONL (keeps existing tooling working during transition)
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
try:
|
||||
with self._lock:
|
||||
with open(transcript_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(message, ensure_ascii=False) + "\n")
|
||||
except OSError as e:
|
||||
# Disk full / read-only fs / permission errors must not crash the
|
||||
# message handler — the SQLite write above is the primary store.
|
||||
logger.debug("Failed to write JSONL transcript for %s: %s", session_id, e)
|
||||
|
||||
def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Replace the entire transcript for a session with new messages.
|
||||
|
||||
Used by /retry, /undo, and /compress to persist modified conversation history.
|
||||
Rewrites both SQLite and legacy JSONL storage.
|
||||
|
||||
Used by /retry, /undo, and /compress to persist modified conversation
|
||||
history. state.db is the canonical store.
|
||||
"""
|
||||
# SQLite: replace atomically so a mid-rewrite failure doesn't leave
|
||||
# the session half-empty in the DB while JSONL still has history.
|
||||
if self._db:
|
||||
try:
|
||||
self._db.replace_messages(session_id, messages)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
# JSONL: overwrite the file
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
with open(transcript_path, "w", encoding="utf-8") as f:
|
||||
for msg in messages:
|
||||
f.write(json.dumps(msg, ensure_ascii=False) + "\n")
|
||||
|
||||
def load_transcript(self, session_id: str) -> List[Dict[str, Any]]:
|
||||
"""Load all messages from a session's transcript."""
|
||||
db_messages = []
|
||||
# Try SQLite first
|
||||
if self._db:
|
||||
try:
|
||||
db_messages = self._db.get_messages_as_conversation(session_id)
|
||||
except Exception as e:
|
||||
logger.debug("Could not load messages from DB: %s", e)
|
||||
"""Load all messages from a session's transcript.
|
||||
|
||||
# Load legacy JSONL transcript (may contain more history than SQLite
|
||||
# for sessions created before the DB layer was introduced).
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
jsonl_messages = []
|
||||
if transcript_path.exists():
|
||||
try:
|
||||
with open(transcript_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
try:
|
||||
jsonl_messages.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
"Skipping corrupt line in transcript %s: %s",
|
||||
session_id, line[:120],
|
||||
)
|
||||
except OSError as e:
|
||||
# JSONL is the legacy compatibility store. If it becomes
|
||||
# unreadable, keep gateway recovery working by falling back to
|
||||
# SQLite rows loaded above (or [] when no DB exists).
|
||||
logger.debug("Failed to read JSONL transcript for %s: %s", session_id, e)
|
||||
|
||||
# Prefer whichever source has more messages.
|
||||
#
|
||||
# Background: when a session pre-dates SQLite storage (or when the DB
|
||||
# layer was added while a long-lived session was already active), the
|
||||
# first post-migration turn writes only the *new* messages to SQLite
|
||||
# (because _flush_messages_to_session_db skips messages already in
|
||||
# conversation_history, assuming they're persisted). On the *next*
|
||||
# turn load_transcript returns those few SQLite rows and ignores the
|
||||
# full JSONL history — the model sees a context of 1-4 messages instead
|
||||
# of hundreds. Using the longer source prevents this silent truncation.
|
||||
if len(jsonl_messages) > len(db_messages):
|
||||
if db_messages:
|
||||
logger.debug(
|
||||
"Session %s: JSONL has %d messages vs SQLite %d — "
|
||||
"using JSONL (legacy session not yet fully migrated)",
|
||||
session_id, len(jsonl_messages), len(db_messages),
|
||||
)
|
||||
return jsonl_messages
|
||||
|
||||
return db_messages
|
||||
state.db is the canonical store. The legacy JSONL fallback was removed
|
||||
in spec 002 — pre-DB sessions on existing disks have already been
|
||||
migrated (their DB row holds the full message history).
|
||||
"""
|
||||
if not self._db:
|
||||
return []
|
||||
try:
|
||||
return self._db.get_messages_as_conversation(session_id)
|
||||
except Exception as e:
|
||||
logger.debug("Could not load messages from DB: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def build_session_context(
|
||||
|
||||
+7
-13
@@ -48,7 +48,7 @@ import httpx
|
||||
import yaml
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_constants import OPENROUTER_BASE_URL, secure_parent_dir
|
||||
from utils import atomic_replace, atomic_yaml_write, is_truthy_value
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -1030,10 +1030,8 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
|
||||
auth_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Tighten parent dir to 0o700 so siblings can't traverse to creds.
|
||||
# No-op on Windows (POSIX mode bits not enforced); ignore failures.
|
||||
try:
|
||||
os.chmod(auth_file.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
|
||||
secure_parent_dir(auth_file)
|
||||
auth_store["version"] = AUTH_STORE_VERSION
|
||||
auth_store["updated_at"] = datetime.now(timezone.utc).isoformat()
|
||||
payload = json.dumps(auth_store, indent=2) + "\n"
|
||||
@@ -1863,10 +1861,8 @@ def _read_qwen_cli_tokens() -> Dict[str, Any]:
|
||||
def _save_qwen_cli_tokens(tokens: Dict[str, Any]) -> Path:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
os.chmod(auth_path.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
|
||||
secure_parent_dir(auth_path)
|
||||
# Per-process random temp suffix avoids collisions between concurrent
|
||||
# writers and stale leftovers from a crashed prior write.
|
||||
tmp_path = auth_path.with_name(f"{auth_path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
|
||||
@@ -4168,10 +4164,8 @@ def _write_shared_nous_state(state: Dict[str, Any]) -> None:
|
||||
with _nous_shared_store_lock():
|
||||
path = _nous_shared_store_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
os.chmod(path.parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
# secure_parent_dir refuses to chmod / or top-level dirs (#25821).
|
||||
secure_parent_dir(path)
|
||||
tmp = path.with_name(f"{path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
|
||||
# Create with 0o600 atomically via os.open(O_EXCL) — closes the TOCTOU
|
||||
# window where write_text() + post-write chmod briefly exposed Nous
|
||||
|
||||
+66
-3
@@ -508,6 +508,68 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
|
||||
return result
|
||||
|
||||
|
||||
_TELEGRAM_MENU_PRIORITY = (
|
||||
# Most-typed everyday commands first.
|
||||
"help",
|
||||
"new",
|
||||
"stop",
|
||||
"status",
|
||||
"resume",
|
||||
"sessions",
|
||||
"model",
|
||||
# Maintenance / diagnostics — the ones that prompted this priority list.
|
||||
"debug",
|
||||
"restart",
|
||||
"update",
|
||||
"verbose",
|
||||
"commands",
|
||||
# Mid-turn session control.
|
||||
"approve",
|
||||
"deny",
|
||||
"queue",
|
||||
"steer",
|
||||
"background",
|
||||
# Lower-priority but still useful operational built-ins.
|
||||
"reasoning",
|
||||
"usage",
|
||||
"platforms",
|
||||
"platform",
|
||||
"profile",
|
||||
"whoami",
|
||||
)
|
||||
"""Built-in commands that should stay visible in Telegram's capped menu.
|
||||
|
||||
Telegram only displays a small BotCommand menu in practice. The full Hermes
|
||||
registry is still dispatchable when typed manually, but operational commands
|
||||
need to survive the visible menu cap ahead of lower-priority built-ins.
|
||||
"""
|
||||
|
||||
|
||||
def _prioritize_telegram_menu_commands(
|
||||
commands: list[tuple[str, str]],
|
||||
) -> list[tuple[str, str]]:
|
||||
priority = {
|
||||
_sanitize_telegram_name(name): index
|
||||
for index, name in enumerate(_TELEGRAM_MENU_PRIORITY)
|
||||
}
|
||||
return [
|
||||
command
|
||||
for _index, command in sorted(
|
||||
enumerate(commands),
|
||||
key=lambda item: (
|
||||
0,
|
||||
priority[item[1][0]],
|
||||
item[0],
|
||||
)
|
||||
if item[1][0] in priority
|
||||
else (
|
||||
1,
|
||||
item[0],
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
_CMD_NAME_LIMIT = 32
|
||||
"""Max command name length shared by Telegram and Discord."""
|
||||
|
||||
@@ -721,11 +783,12 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
|
||||
|
||||
Returns:
|
||||
(menu_commands, hidden_count) where hidden_count is the number of
|
||||
skill commands omitted due to the cap.
|
||||
commands omitted due to the cap.
|
||||
"""
|
||||
core_commands = list(telegram_bot_commands())
|
||||
core_commands = _prioritize_telegram_menu_commands(list(telegram_bot_commands()))
|
||||
reserved_names = {n for n, _ in core_commands}
|
||||
all_commands = list(core_commands)
|
||||
hidden_core_count = max(0, len(all_commands) - max_commands)
|
||||
|
||||
remaining_slots = max(0, max_commands - len(all_commands))
|
||||
entries, hidden_count = _collect_gateway_skill_entries(
|
||||
@@ -737,7 +800,7 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
|
||||
)
|
||||
# Drop the cmd_key — Telegram only needs (name, desc) pairs.
|
||||
all_commands.extend((n, d) for n, d, _k in entries)
|
||||
return all_commands[:max_commands], hidden_count
|
||||
return all_commands[:max_commands], hidden_count + hidden_core_count
|
||||
|
||||
|
||||
def discord_skill_commands(
|
||||
|
||||
+46
-2
@@ -1648,6 +1648,15 @@ DEFAULT_CONFIG = {
|
||||
# the sweep on every CLI invocation). Tracked via state_meta in
|
||||
# state.db itself, so it's shared across all processes.
|
||||
"min_interval_hours": 24,
|
||||
# Legacy per-session JSON snapshot writer. When true, the agent
|
||||
# rewrites ``~/.hermes/sessions/session_{sid}.json`` on every turn
|
||||
# boundary with the full message list. state.db is canonical and
|
||||
# has every field the snapshot stored (plus per-message timestamps
|
||||
# and token counts), so this is off by default — the snapshots had
|
||||
# no consumer outside their own overwrite guard and accumulated
|
||||
# GBs of disk on heavy users. Opt in only if you have an external
|
||||
# tool that consumes the JSON files directly.
|
||||
"write_json_snapshots": False,
|
||||
},
|
||||
|
||||
# Contextual first-touch onboarding hints (see agent/onboarding.py).
|
||||
@@ -1738,6 +1747,37 @@ DEFAULT_CONFIG = {
|
||||
"retries": 2,
|
||||
},
|
||||
|
||||
# =========================================================================
|
||||
# External secret sources
|
||||
# =========================================================================
|
||||
# Pull credentials from external secret managers at process startup
|
||||
# rather than storing them in ~/.hermes/.env.
|
||||
"secrets": {
|
||||
"bitwarden": {
|
||||
# Master switch. When false, BSM is never contacted and the
|
||||
# bws binary is never auto-installed — same as not having
|
||||
# this section at all.
|
||||
"enabled": False,
|
||||
# Name of the env var that holds the Bitwarden machine-account
|
||||
# access token. This is the one bootstrap secret; it lives
|
||||
# in ~/.hermes/.env (or your shell) and never in config.yaml.
|
||||
"access_token_env": "BWS_ACCESS_TOKEN",
|
||||
# UUID of the BSM project to sync from.
|
||||
"project_id": "",
|
||||
# Seconds to cache fetched secrets in-process. 0 disables.
|
||||
"cache_ttl_seconds": 300,
|
||||
# When True, BSM values overwrite existing env vars. Default
|
||||
# True because the point of using BSM is centralized rotation —
|
||||
# if .env had the final say, rotating in Bitwarden wouldn't
|
||||
# take effect until you also cleared the matching .env line.
|
||||
"override_existing": True,
|
||||
# When True, the bws binary is auto-downloaded into
|
||||
# ~/.hermes/bin/ on first use. When False you must install
|
||||
# bws yourself and have it on PATH.
|
||||
"auto_install": True,
|
||||
},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 23,
|
||||
}
|
||||
@@ -3008,7 +3048,7 @@ def _normalize_custom_provider_entry(
|
||||
"api_mode", "transport", "model", "default_model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"request_timeout_seconds", "stale_timeout_seconds",
|
||||
"discover_models",
|
||||
"discover_models", "extra_body",
|
||||
}
|
||||
for camel, snake in _CAMEL_ALIASES.items():
|
||||
if camel in entry and snake not in entry:
|
||||
@@ -3103,6 +3143,10 @@ def _normalize_custom_provider_entry(
|
||||
if isinstance(discover_models, bool):
|
||||
normalized["discover_models"] = discover_models
|
||||
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
normalized["extra_body"] = dict(extra_body)
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
@@ -3263,7 +3307,7 @@ _KNOWN_ROOT_KEYS = {
|
||||
# Valid fields inside a custom_providers list entry
|
||||
_VALID_CUSTOM_PROVIDER_FIELDS = {
|
||||
"name", "base_url", "api_key", "api_mode", "model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"context_length", "rate_limit_delay", "extra_body",
|
||||
# key_env is read at runtime by runtime_provider.py and auxiliary_client.py
|
||||
# — include it here so the set accurately describes the supported schema.
|
||||
"key_env",
|
||||
|
||||
@@ -777,7 +777,33 @@ def run_doctor(args):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_section("xAI Model Retirement (May 15, 2026)")
|
||||
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.xai_retirement import (
|
||||
MIGRATION_GUIDE_URL,
|
||||
find_retired_xai_refs,
|
||||
format_issue,
|
||||
)
|
||||
|
||||
_xai_cfg = load_config()
|
||||
retired_refs = find_retired_xai_refs(_xai_cfg)
|
||||
if not retired_refs:
|
||||
check_ok("No retired xAI models in config")
|
||||
else:
|
||||
for ref in retired_refs:
|
||||
check_warn(format_issue(ref))
|
||||
check_info(f"Migration guide: {MIGRATION_GUIDE_URL}")
|
||||
manual_issues.append(
|
||||
f"Update {len(retired_refs)} retired xAI model reference(s) "
|
||||
f"in config.yaml — see {MIGRATION_GUIDE_URL}"
|
||||
)
|
||||
except Exception as _xai_check_err:
|
||||
check_warn("xAI retirement check skipped", f"({_xai_check_err})")
|
||||
|
||||
_section("Auth Providers")
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import (
|
||||
get_nous_auth_status,
|
||||
|
||||
@@ -16,6 +16,7 @@ from pathlib import Path
|
||||
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
|
||||
def _get_git_commit(project_root: Path) -> str:
|
||||
@@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int:
|
||||
return 0
|
||||
count = 0
|
||||
for item in skills_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(item):
|
||||
continue
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
@@ -172,4 +172,81 @@ def load_hermes_dotenv(
|
||||
_load_dotenv_with_fallback(project_env_path, override=not loaded)
|
||||
loaded.append(project_env_path)
|
||||
|
||||
_apply_external_secret_sources(home_path)
|
||||
|
||||
return loaded
|
||||
|
||||
|
||||
def _apply_external_secret_sources(home_path: Path) -> None:
|
||||
"""Pull secrets from external sources (currently Bitwarden) into env.
|
||||
|
||||
Runs AFTER dotenv loads so .env values are visible (we use them to
|
||||
locate the access token) but BEFORE the rest of Hermes reads
|
||||
``os.environ`` for credentials. Any failure here is logged and
|
||||
swallowed — external secret sources must never block startup.
|
||||
"""
|
||||
try:
|
||||
cfg = _load_secrets_config(home_path)
|
||||
except Exception: # noqa: BLE001 — config errors must not block startup
|
||||
return
|
||||
|
||||
bw_cfg = (cfg or {}).get("bitwarden") or {}
|
||||
if not bw_cfg.get("enabled"):
|
||||
return
|
||||
|
||||
try:
|
||||
from agent.secret_sources.bitwarden import apply_bitwarden_secrets
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
result = apply_bitwarden_secrets(
|
||||
enabled=True,
|
||||
access_token_env=bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN"),
|
||||
project_id=bw_cfg.get("project_id", ""),
|
||||
override_existing=bool(bw_cfg.get("override_existing", False)),
|
||||
cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
|
||||
auto_install=bool(bw_cfg.get("auto_install", True)),
|
||||
)
|
||||
|
||||
if result.applied:
|
||||
# Re-run the ASCII sanitization pass: BSM values are user-supplied
|
||||
# and might have the same copy-paste corruption as a manually
|
||||
# edited .env (see #6843).
|
||||
_sanitize_loaded_credentials()
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: applied {len(result.applied)} "
|
||||
f"secret{'s' if len(result.applied) != 1 else ''} "
|
||||
f"({', '.join(sorted(result.applied))})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if result.error:
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: {result.error}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
for warn in result.warnings:
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: {warn}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def _load_secrets_config(home_path: Path) -> dict:
|
||||
"""Read just the ``secrets:`` section out of config.yaml.
|
||||
|
||||
Imported lazily and isolated from the main config loader so a
|
||||
malformed config can't take down dotenv loading entirely.
|
||||
"""
|
||||
config_path = home_path / "config.yaml"
|
||||
if not config_path.exists():
|
||||
return {}
|
||||
try:
|
||||
import yaml # type: ignore
|
||||
except ImportError:
|
||||
return {}
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
except Exception: # noqa: BLE001
|
||||
return {}
|
||||
return data.get("secrets") or {}
|
||||
|
||||
@@ -951,6 +951,58 @@ CREATE INDEX IF NOT EXISTS idx_notify_task ON kanban_notify_subs(task_
|
||||
|
||||
_INITIALIZED_PATHS: set[str] = set()
|
||||
_INIT_LOCK = threading.RLock()
|
||||
_SQLITE_HEADER = b"SQLite format 3\x00"
|
||||
|
||||
|
||||
def _looks_like_tls_record_at(data: bytes, offset: int) -> bool:
|
||||
"""Return True for a TLS record header at ``data[offset:]``."""
|
||||
if len(data) < offset + 5:
|
||||
return False
|
||||
content_type = data[offset]
|
||||
major = data[offset + 1]
|
||||
minor = data[offset + 2]
|
||||
length = int.from_bytes(data[offset + 3:offset + 5], "big")
|
||||
return (
|
||||
content_type in {0x14, 0x15, 0x16, 0x17}
|
||||
and major == 0x03
|
||||
and minor in {0x00, 0x01, 0x02, 0x03, 0x04}
|
||||
and 0 < length <= 18432
|
||||
)
|
||||
|
||||
|
||||
def _validate_sqlite_header(path: Path) -> None:
|
||||
"""Fail early with an actionable error for non-SQLite Kanban DB files.
|
||||
|
||||
``sqlite3.connect()`` creates missing and zero-byte files, so those are
|
||||
allowed. Existing non-empty files must have the SQLite header before we
|
||||
hand them to SQLite/WAL setup. This keeps corrupted page-0 failures from
|
||||
being collapsed into a generic PRAGMA error and lets the gateway's corrupt
|
||||
board handling identify the board by fingerprint.
|
||||
"""
|
||||
try:
|
||||
stat = path.stat()
|
||||
except FileNotFoundError:
|
||||
return
|
||||
except OSError:
|
||||
return
|
||||
if stat.st_size == 0:
|
||||
return
|
||||
try:
|
||||
with path.open("rb") as handle:
|
||||
head = handle.read(64)
|
||||
except OSError:
|
||||
return
|
||||
if head.startswith(_SQLITE_HEADER):
|
||||
return
|
||||
signature = ""
|
||||
if head.startswith(b"SQLit") and _looks_like_tls_record_at(head, 5):
|
||||
signature = " (TLS record header detected at byte offset 5)"
|
||||
elif _looks_like_tls_record_at(head, 0):
|
||||
signature = " (TLS record header detected at byte offset 0)"
|
||||
raise sqlite3.DatabaseError(
|
||||
"file is not a database: invalid SQLite header for "
|
||||
f"{path}{signature}; first_32={head[:32].hex(' ')}"
|
||||
)
|
||||
|
||||
|
||||
def connect(
|
||||
@@ -981,6 +1033,7 @@ def connect(
|
||||
else:
|
||||
path = kanban_db_path(board=board)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
_validate_sqlite_header(path)
|
||||
resolved = str(path.resolve())
|
||||
conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
|
||||
try:
|
||||
|
||||
+606
-103
@@ -261,11 +261,147 @@ import time as _time
|
||||
from datetime import datetime
|
||||
|
||||
from hermes_cli import __version__, __release_date__
|
||||
from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _is_termux_startup_environment(env: dict[str, str] | None = None) -> bool:
|
||||
"""Import-safe Termux check for cold-start-sensitive CLI paths."""
|
||||
check = env or os.environ
|
||||
prefix = str(check.get("PREFIX", ""))
|
||||
return bool(
|
||||
check.get("TERMUX_VERSION")
|
||||
or "com.termux/files/usr" in prefix
|
||||
or prefix.startswith("/data/data/com.termux/")
|
||||
)
|
||||
|
||||
|
||||
def _read_packed_ref(common_dir: Path, ref: str) -> str | None:
|
||||
"""Look up a ref in .git/packed-refs without spawning git.
|
||||
|
||||
packed-refs lines look like ``<sha> <ref>`` with optional ``^<sha>``
|
||||
peel lines and ``#``-prefixed comments / ``# pack-refs with:`` header.
|
||||
"""
|
||||
try:
|
||||
text = (common_dir / "packed-refs").read_text(encoding="utf-8", errors="replace")
|
||||
except OSError:
|
||||
return None
|
||||
for line in text.splitlines():
|
||||
if not line or line.startswith("#") or line.startswith("^"):
|
||||
continue
|
||||
parts = line.split(" ", 1)
|
||||
if len(parts) == 2 and parts[1].strip() == ref:
|
||||
return parts[0].strip()
|
||||
return None
|
||||
|
||||
|
||||
def _read_git_revision_fingerprint(repo_root: Path) -> str | None:
|
||||
"""Return a cheap checkout fingerprint without spawning git."""
|
||||
git_dir = repo_root / ".git"
|
||||
try:
|
||||
if git_dir.is_file():
|
||||
for line in git_dir.read_text(encoding="utf-8", errors="replace").splitlines():
|
||||
key, _, value = line.partition(":")
|
||||
if key.strip() == "gitdir" and value.strip():
|
||||
git_dir = (repo_root / value.strip()).resolve()
|
||||
break
|
||||
# Worktrees point HEAD at a per-worktree gitdir but pack their refs
|
||||
# in the main repo's gitdir (referenced via ``commondir``). Resolve
|
||||
# that up front so packed-refs lookups hit the right file.
|
||||
common_dir = git_dir
|
||||
commondir_file = git_dir / "commondir"
|
||||
if commondir_file.exists():
|
||||
try:
|
||||
rel = commondir_file.read_text(encoding="utf-8", errors="replace").strip()
|
||||
if rel:
|
||||
common_dir = (git_dir / rel).resolve()
|
||||
except OSError:
|
||||
pass
|
||||
head_file = git_dir / "HEAD"
|
||||
head = head_file.read_text(encoding="utf-8", errors="replace").strip()
|
||||
if head.startswith("ref:"):
|
||||
ref = head.split(":", 1)[1].strip()
|
||||
# Loose refs may live in the worktree gitdir OR the common dir
|
||||
# (branches created via `git worktree add` typically live in the
|
||||
# common dir's refs/heads/).
|
||||
for candidate in (git_dir, common_dir):
|
||||
ref_file = candidate / ref
|
||||
if ref_file.exists():
|
||||
return f"git:{ref}:{ref_file.read_text(encoding='utf-8', errors='replace').strip()}"
|
||||
packed_sha = _read_packed_ref(common_dir, ref)
|
||||
if packed_sha:
|
||||
return f"git:{ref}:{packed_sha}"
|
||||
# Ref name is known but unresolved — still stable across launches,
|
||||
# and the version/release fallback in the caller will invalidate
|
||||
# after `hermes update`.
|
||||
return f"git:{ref}:unresolved"
|
||||
return f"git:HEAD:{head}"
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def _termux_bundled_skills_fingerprint() -> str:
|
||||
"""Cheap invalidation key for Termux bundled-skill startup sync."""
|
||||
git_fp = _read_git_revision_fingerprint(PROJECT_ROOT)
|
||||
if git_fp:
|
||||
return git_fp
|
||||
skills_dir = PROJECT_ROOT / "skills"
|
||||
try:
|
||||
stat = skills_dir.stat()
|
||||
return f"skills:{__version__}:{__release_date__}:{stat.st_mtime_ns}:{stat.st_size}"
|
||||
except OSError:
|
||||
return f"skills:{__version__}:{__release_date__}:missing"
|
||||
|
||||
|
||||
def _termux_bundled_skills_stamp_path() -> Path:
|
||||
return get_hermes_home() / "skills" / ".termux_bundled_sync_stamp"
|
||||
|
||||
|
||||
def _termux_bundled_skills_sync_needed() -> bool:
|
||||
if not _is_termux_startup_environment():
|
||||
return True
|
||||
if os.environ.get("HERMES_TERMUX_FORCE_SKILLS_SYNC") == "1":
|
||||
return True
|
||||
try:
|
||||
stamp = _termux_bundled_skills_stamp_path()
|
||||
return stamp.read_text(encoding="utf-8").strip() != _termux_bundled_skills_fingerprint()
|
||||
except OSError:
|
||||
return True
|
||||
|
||||
|
||||
def _mark_termux_bundled_skills_synced() -> None:
|
||||
if not _is_termux_startup_environment():
|
||||
return
|
||||
try:
|
||||
stamp = _termux_bundled_skills_stamp_path()
|
||||
stamp.parent.mkdir(parents=True, exist_ok=True)
|
||||
stamp.write_text(_termux_bundled_skills_fingerprint() + "\n", encoding="utf-8")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _sync_bundled_skills_for_startup() -> bool:
|
||||
"""Sync bundled skills, but skip unchanged Termux checkouts cheaply.
|
||||
|
||||
Hashing every bundled skill is safe but expensive on older Android
|
||||
storage. The git/ref stamp keeps post-update correctness: a changed
|
||||
checkout revision forces one real sync, then later starts skip it.
|
||||
"""
|
||||
if _is_termux_startup_environment() and not _termux_bundled_skills_sync_needed():
|
||||
return False
|
||||
|
||||
from tools.skills_sync import sync_skills
|
||||
|
||||
sync_skills(quiet=True)
|
||||
_mark_termux_bundled_skills_synced()
|
||||
return True
|
||||
|
||||
|
||||
def _termux_should_prefetch_update_check() -> bool:
|
||||
if not _is_termux_startup_environment():
|
||||
return True
|
||||
return os.environ.get("HERMES_TERMUX_PREFETCH_UPDATES") == "1"
|
||||
|
||||
|
||||
def _relative_time(ts) -> str:
|
||||
"""Format a timestamp as relative time (e.g., '2h ago', 'yesterday')."""
|
||||
if not ts:
|
||||
@@ -967,6 +1103,72 @@ def _tui_need_npm_install(root: Path) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
_TUI_BUILD_INPUT_DIRS = (
|
||||
"src",
|
||||
"packages/hermes-ink/src",
|
||||
)
|
||||
|
||||
_TUI_BUILD_INPUT_FILES = (
|
||||
"package.json",
|
||||
"package-lock.json",
|
||||
"tsconfig.json",
|
||||
"tsconfig.build.json",
|
||||
"babel.compiler.config.cjs",
|
||||
"scripts/build.mjs",
|
||||
"packages/hermes-ink/package.json",
|
||||
"packages/hermes-ink/package-lock.json",
|
||||
"packages/hermes-ink/index.js",
|
||||
"packages/hermes-ink/text-input.js",
|
||||
)
|
||||
|
||||
_TUI_BUILD_INPUT_SUFFIXES = frozenset(
|
||||
{".cjs", ".js", ".jsx", ".json", ".mjs", ".ts", ".tsx"}
|
||||
)
|
||||
|
||||
|
||||
def _iter_tui_build_inputs(root: Path):
|
||||
"""Yield source/config files that affect ``ui-tui/dist/entry.js``."""
|
||||
for rel in _TUI_BUILD_INPUT_FILES:
|
||||
path = root / rel
|
||||
if path.is_file():
|
||||
yield path
|
||||
|
||||
for rel in _TUI_BUILD_INPUT_DIRS:
|
||||
base = root / rel
|
||||
if not base.is_dir():
|
||||
continue
|
||||
for path in base.rglob("*"):
|
||||
if path.is_file() and path.suffix in _TUI_BUILD_INPUT_SUFFIXES:
|
||||
yield path
|
||||
|
||||
|
||||
def _tui_need_rebuild(root: Path) -> bool:
|
||||
"""True when ``dist/entry.js`` is missing or older than TUI inputs.
|
||||
|
||||
The TUI bundle is self-contained. Rebuilding it on every launch adds a
|
||||
visible cold-start tax on slow Termux CPUs, while a simple mtime freshness
|
||||
check still rebuilds immediately after source updates, dependency updates,
|
||||
or local edits. Set ``HERMES_TUI_FORCE_BUILD=1`` to force the old behaviour.
|
||||
"""
|
||||
force = (os.environ.get("HERMES_TUI_FORCE_BUILD") or "").strip().lower()
|
||||
if force in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
|
||||
entry = root / "dist" / "entry.js"
|
||||
try:
|
||||
output_mtime = entry.stat().st_mtime
|
||||
except OSError:
|
||||
return True
|
||||
|
||||
for path in _iter_tui_build_inputs(root):
|
||||
try:
|
||||
if path.stat().st_mtime > output_mtime:
|
||||
return True
|
||||
except OSError:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _ensure_tui_node() -> None:
|
||||
"""Make sure `node` + `npm` are on PATH for the TUI.
|
||||
|
||||
@@ -1071,16 +1273,17 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
p = Path(ext_dir)
|
||||
if (p / "dist" / "entry.js").is_file():
|
||||
node = _node_bin("node")
|
||||
return [node, str(p / "dist" / "entry.js")], p
|
||||
return [node, "--expose-gc", str(p / "dist" / "entry.js")], p
|
||||
|
||||
# 1b. Bundled in wheel (pip install)
|
||||
bundled = _find_bundled_tui()
|
||||
if bundled is not None:
|
||||
node = _node_bin("node")
|
||||
return [node, str(bundled)], bundled.parent
|
||||
return [node, "--expose-gc", str(bundled)], bundled.parent
|
||||
|
||||
# 2. Normal flow: npm install if needed, always esbuild, then node dist/entry.js.
|
||||
# --dev flow: npm install if needed, then tsx src/entry.tsx.
|
||||
did_install = False
|
||||
if _tui_need_npm_install(tui_dir):
|
||||
npm = _node_bin("npm")
|
||||
if not os.environ.get("HERMES_QUIET"):
|
||||
@@ -1100,6 +1303,7 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
if preview:
|
||||
print(preview)
|
||||
sys.exit(1)
|
||||
did_install = True
|
||||
|
||||
if tui_dev:
|
||||
# Keep the local @hermes/ink package exports in sync with source.
|
||||
@@ -1128,24 +1332,31 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
return [str(tsx), "src/entry.tsx"], tui_dir
|
||||
return [npm, "start"], tui_dir
|
||||
|
||||
# Always rebuild — esbuild is fast and this avoids staleness-edge-case bugs.
|
||||
npm = _node_bin("npm")
|
||||
result = subprocess.run(
|
||||
[npm, "run", "build"],
|
||||
cwd=str(tui_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
combined = f"{result.stdout or ''}{result.stderr or ''}".strip()
|
||||
preview = "\n".join(combined.splitlines()[-30:])
|
||||
print("TUI build failed.")
|
||||
if preview:
|
||||
print(preview)
|
||||
sys.exit(1)
|
||||
# Desktop/dev launches retain the historical "always rebuild" behaviour.
|
||||
# Termux cold starts use the freshness check because esbuild startup is
|
||||
# expensive on old mobile CPUs.
|
||||
should_build = True
|
||||
if _is_termux_startup_environment():
|
||||
should_build = did_install or _tui_need_rebuild(tui_dir)
|
||||
|
||||
if should_build:
|
||||
npm = _node_bin("npm")
|
||||
result = subprocess.run(
|
||||
[npm, "run", "build"],
|
||||
cwd=str(tui_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
combined = f"{result.stdout or ''}{result.stderr or ''}".strip()
|
||||
preview = "\n".join(combined.splitlines()[-30:])
|
||||
print("TUI build failed.")
|
||||
if preview:
|
||||
print(preview)
|
||||
sys.exit(1)
|
||||
|
||||
node = _node_bin("node")
|
||||
return [node, str(tui_dir / "dist" / "entry.js")], tui_dir
|
||||
return [node, "--expose-gc", str(tui_dir / "dist" / "entry.js")], tui_dir
|
||||
|
||||
|
||||
def _normalize_tui_toolsets(toolsets: object) -> list[str]:
|
||||
@@ -1267,16 +1478,16 @@ def _launch_tui(
|
||||
env["HERMES_TUI_TOOL_PROGRESS"] = "off"
|
||||
if accept_hooks:
|
||||
env["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
|
||||
# ~1.5–4GB depending on version and can fatal-OOM on long sessions with
|
||||
# large transcripts / reasoning blobs. Token-level merge: respect any
|
||||
# user-supplied --max-old-space-size (they may have set it higher) and
|
||||
# avoid duplicating --expose-gc.
|
||||
# Guarantee an 8GB V8 heap for the TUI. Default node cap is ~1.5–4GB
|
||||
# depending on version and can fatal-OOM on long sessions with large
|
||||
# transcripts / reasoning blobs. Token-level merge: respect any
|
||||
# user-supplied --max-old-space-size (they may have set it higher).
|
||||
# --expose-gc is *not* added here: Node rejects it in NODE_OPTIONS
|
||||
# ("--expose-gc is not allowed in NODE_OPTIONS") and refuses to start.
|
||||
# It is passed as a direct argv flag in _make_tui_argv() instead.
|
||||
_tokens = env.get("NODE_OPTIONS", "").split()
|
||||
if not any(t.startswith("--max-old-space-size=") for t in _tokens):
|
||||
_tokens.append("--max-old-space-size=8192")
|
||||
if "--expose-gc" not in _tokens:
|
||||
_tokens.append("--expose-gc")
|
||||
env["NODE_OPTIONS"] = " ".join(_tokens)
|
||||
# HERMES_TUI_RESUME is an internal hand-off from the Python wrapper to the
|
||||
# Ink app. Because we start from os.environ.copy(), an exported/stale value
|
||||
@@ -1384,6 +1595,29 @@ def cmd_chat(args):
|
||||
# If resolution fails, keep the original value — _init_agent will
|
||||
# report "Session not found" with the original input
|
||||
|
||||
# xAI retirement warning — one-shot, non-blocking, never fails startup
|
||||
try:
|
||||
from hermes_cli.xai_retirement import (
|
||||
MIGRATION_GUIDE_URL,
|
||||
RETIREMENT_DATE,
|
||||
find_retired_xai_refs,
|
||||
format_issue,
|
||||
)
|
||||
from hermes_cli.config import load_config as _load_config_for_xai_check
|
||||
|
||||
_retired_xai_refs = find_retired_xai_refs(_load_config_for_xai_check())
|
||||
if _retired_xai_refs:
|
||||
sys.stderr.write(
|
||||
f"\033[33m⚠ xAI retires {len(_retired_xai_refs)} model(s) "
|
||||
f"in your config on {RETIREMENT_DATE}:\033[0m\n"
|
||||
)
|
||||
for _ref in _retired_xai_refs:
|
||||
sys.stderr.write(f" \033[33m⚠\033[0m {format_issue(_ref)}\n")
|
||||
sys.stderr.write(f" \033[2mMigration guide: {MIGRATION_GUIDE_URL}\033[0m\n")
|
||||
sys.stderr.write(" \033[2mRun 'hermes doctor' for details.\033[0m\n\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# First-run guard: check if any provider is configured before launching
|
||||
if not _has_any_provider_configured():
|
||||
print()
|
||||
@@ -1416,19 +1650,20 @@ def cmd_chat(args):
|
||||
print("You can run 'hermes setup' at any time to configure.")
|
||||
sys.exit(1)
|
||||
|
||||
# Start update check in background (runs while other init happens)
|
||||
try:
|
||||
from hermes_cli.banner import prefetch_update_check
|
||||
# Start update check in background (runs while other init happens).
|
||||
# On Termux this imports rich/prompt_toolkit in the foreground and then
|
||||
# competes for CPU on single-core devices, so keep it opt-in there.
|
||||
if _termux_should_prefetch_update_check():
|
||||
try:
|
||||
from hermes_cli.banner import prefetch_update_check
|
||||
|
||||
prefetch_update_check()
|
||||
except Exception:
|
||||
pass
|
||||
prefetch_update_check()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Sync bundled skills on every CLI launch (fast -- skips unchanged skills)
|
||||
try:
|
||||
from tools.skills_sync import sync_skills
|
||||
|
||||
sync_skills(quiet=True)
|
||||
_sync_bundled_skills_for_startup()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -1746,6 +1981,25 @@ def cmd_whatsapp(args):
|
||||
print("⚠ Pairing may not have completed. Run 'hermes whatsapp' to try again.")
|
||||
|
||||
|
||||
def cmd_whatsapp_cloud(args):
|
||||
"""Set up WhatsApp Business Cloud API (official Meta integration).
|
||||
|
||||
Walks the user through the Meta-side credentials (Phone Number ID,
|
||||
Access Token, App Secret, optional App/WABA IDs) plus webhook
|
||||
configuration. Includes field-shape validators that catch the most
|
||||
common setup mistakes (e.g. pasting a phone number into the Phone
|
||||
Number ID field).
|
||||
|
||||
Distinct from ``hermes whatsapp`` (the Baileys bridge wizard) — the
|
||||
two adapters are complementary, not alternatives. See
|
||||
``hermes_cli/setup_whatsapp_cloud.py``.
|
||||
"""
|
||||
_require_tty("whatsapp-cloud")
|
||||
from hermes_cli.setup_whatsapp_cloud import run_whatsapp_cloud_setup
|
||||
|
||||
return run_whatsapp_cloud_setup()
|
||||
|
||||
|
||||
def cmd_setup(args):
|
||||
"""Interactive setup wizard."""
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
@@ -2566,6 +2820,7 @@ def _prompt_provider_choice(choices, *, default=0):
|
||||
|
||||
def _model_flow_openrouter(config, current_model=""):
|
||||
"""OpenRouter provider: ensure API key, then pick model."""
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_cli.auth import (
|
||||
ProviderConfig,
|
||||
_prompt_model_selection,
|
||||
@@ -2626,6 +2881,7 @@ def _model_flow_openrouter(config, current_model=""):
|
||||
|
||||
def _model_flow_ai_gateway(config, current_model=""):
|
||||
"""Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
|
||||
from hermes_constants import AI_GATEWAY_BASE_URL
|
||||
from hermes_cli.auth import (
|
||||
PROVIDER_REGISTRY,
|
||||
_prompt_model_selection,
|
||||
@@ -4219,8 +4475,11 @@ def _model_flow_named_custom(config, provider_info):
|
||||
print(f" Provider: {name} ({base_url})")
|
||||
|
||||
|
||||
# Curated model lists for direct API-key providers — single source in models.py
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
# Keep the historical eager model catalog import on desktop/CI. Termux defers
|
||||
# it to the model-selection handlers so plain `hermes --tui` does not pay for
|
||||
# requests/models.dev catalog imports before the Node TUI starts.
|
||||
if not _is_termux_startup_environment():
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
|
||||
|
||||
def _current_reasoning_effort(config) -> str:
|
||||
@@ -4337,6 +4596,7 @@ def _model_flow_copilot(config, current_model=""):
|
||||
)
|
||||
from hermes_cli.config import save_env_value, load_config, save_config
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS,
|
||||
fetch_api_models,
|
||||
fetch_github_model_catalog,
|
||||
github_model_reasoning_efforts,
|
||||
@@ -4529,6 +4789,7 @@ def _model_flow_copilot_acp(config, current_model=""):
|
||||
resolve_external_process_provider_credentials,
|
||||
)
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS,
|
||||
fetch_github_model_catalog,
|
||||
normalize_copilot_model_id,
|
||||
)
|
||||
@@ -4732,6 +4993,7 @@ def _model_flow_kimi(config, current_model=""):
|
||||
load_config,
|
||||
save_config,
|
||||
)
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
|
||||
provider_id = "kimi-coding"
|
||||
pconfig = PROVIDER_REGISTRY[provider_id]
|
||||
@@ -4842,7 +5104,7 @@ def _model_flow_stepfun(config, current_model=""):
|
||||
load_config,
|
||||
save_config,
|
||||
)
|
||||
from hermes_cli.models import fetch_api_models
|
||||
from hermes_cli.models import _PROVIDER_MODELS, fetch_api_models
|
||||
|
||||
provider_id = "stepfun"
|
||||
pconfig = PROVIDER_REGISTRY[provider_id]
|
||||
@@ -5222,6 +5484,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
save_config,
|
||||
)
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS,
|
||||
fetch_api_models,
|
||||
opencode_model_api_mode,
|
||||
normalize_opencode_model_id,
|
||||
@@ -5855,8 +6118,7 @@ def cmd_import(args):
|
||||
run_import(args)
|
||||
|
||||
|
||||
def cmd_version(args):
|
||||
"""Show version."""
|
||||
def _print_version_info(*, check_updates: bool = True) -> None:
|
||||
print(f"Hermes Agent v{__version__} ({__release_date__})")
|
||||
print(f"Project: {PROJECT_ROOT}")
|
||||
|
||||
@@ -5876,6 +6138,9 @@ def cmd_version(args):
|
||||
except ImportError:
|
||||
print("OpenAI SDK: Not installed")
|
||||
|
||||
if not check_updates:
|
||||
return
|
||||
|
||||
# Show update status (synchronous — acceptable since user asked for version info)
|
||||
try:
|
||||
from hermes_cli.banner import check_for_updates
|
||||
@@ -5894,6 +6159,11 @@ def cmd_version(args):
|
||||
pass
|
||||
|
||||
|
||||
def cmd_version(args):
|
||||
"""Show version."""
|
||||
_print_version_info(check_updates=True)
|
||||
|
||||
|
||||
def cmd_uninstall(args):
|
||||
"""Uninstall Hermes Agent."""
|
||||
_require_tty("uninstall")
|
||||
@@ -5970,24 +6240,36 @@ def _validate_critical_files_syntax(root) -> tuple[bool, str | None, str | None]
|
||||
them after a successful ``git pull`` so we can auto-roll-back instead of
|
||||
leaving the user with a bricked install.
|
||||
|
||||
The compiled ``.pyc`` is written to a temp directory rather than the
|
||||
source tree's ``__pycache__/`` so we don't race with concurrent test
|
||||
workers that walk the same dir, and so we don't leave a stale pyc
|
||||
behind in production if the next interpreter run picks a different
|
||||
Python version. The pyc is discarded on function return either way —
|
||||
we only care about the compile-or-not signal.
|
||||
|
||||
Returns ``(ok, failing_path, error_message)``. ``ok=True`` means every
|
||||
file parsed cleanly.
|
||||
"""
|
||||
import py_compile
|
||||
import tempfile
|
||||
|
||||
root = Path(root)
|
||||
for relpath in _UPDATE_CRITICAL_FILES:
|
||||
path = root / relpath
|
||||
if not path.exists():
|
||||
# Missing file is suspicious but not necessarily fatal — a future
|
||||
# refactor may legitimately remove one of these. Skip and move on.
|
||||
continue
|
||||
try:
|
||||
py_compile.compile(str(path), doraise=True)
|
||||
except py_compile.PyCompileError as exc:
|
||||
return False, str(path), str(exc)
|
||||
except OSError as exc:
|
||||
return False, str(path), f"could not read: {exc}"
|
||||
with tempfile.TemporaryDirectory(prefix="hermes-syntax-check-") as tmpdir:
|
||||
for relpath in _UPDATE_CRITICAL_FILES:
|
||||
path = root / relpath
|
||||
if not path.exists():
|
||||
# Missing file is suspicious but not necessarily fatal — a future
|
||||
# refactor may legitimately remove one of these. Skip and move on.
|
||||
continue
|
||||
# Mirror the relative path under the tmpdir so two different
|
||||
# files with the same basename don't collide on the cfile name.
|
||||
cfile = Path(tmpdir) / (relpath.replace("/", "__") + "c")
|
||||
try:
|
||||
py_compile.compile(str(path), cfile=str(cfile), doraise=True)
|
||||
except py_compile.PyCompileError as exc:
|
||||
return False, str(path), str(exc)
|
||||
except OSError as exc:
|
||||
return False, str(path), f"could not read: {exc}"
|
||||
return True, None, None
|
||||
|
||||
|
||||
@@ -7639,9 +7921,7 @@ def _install_python_dependencies_with_optional_fallback(
|
||||
|
||||
|
||||
def _is_termux_env(env: dict[str, str] | None = None) -> bool:
|
||||
check = env or os.environ
|
||||
prefix = str(check.get("PREFIX", ""))
|
||||
return "com.termux" in prefix or prefix.startswith("/data/data/com.termux/")
|
||||
return _is_termux_startup_environment(env)
|
||||
|
||||
|
||||
def _is_android_python() -> bool:
|
||||
@@ -9438,6 +9718,7 @@ def _coalesce_session_name_args(argv: list) -> list:
|
||||
"gateway",
|
||||
"setup",
|
||||
"whatsapp",
|
||||
"whatsapp-cloud",
|
||||
"login",
|
||||
"logout",
|
||||
"auth",
|
||||
@@ -10295,11 +10576,11 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
||||
"computer-use",
|
||||
"config", "cron", "curator", "dashboard", "debug", "doctor",
|
||||
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
|
||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
|
||||
"model", "pairing", "plugins", "postinstall", "profile", "proxy",
|
||||
"send", "sessions", "setup",
|
||||
"skills", "slack", "status", "tools", "uninstall", "update",
|
||||
"version", "webhook", "whatsapp", "chat",
|
||||
"version", "webhook", "whatsapp", "whatsapp-cloud", "chat", "secrets",
|
||||
# Help-ish invocations — plugin commands not being listed in
|
||||
# top-level --help is an acceptable trade-off for skipping an
|
||||
# expensive eager import of every bundled plugin module.
|
||||
@@ -10389,6 +10670,178 @@ def _plugin_cli_discovery_needed() -> bool:
|
||||
return True
|
||||
|
||||
|
||||
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
|
||||
_AGENT_SUBCOMMANDS = {
|
||||
"cron": ("cron_command", {"run", "tick"}),
|
||||
"gateway": ("gateway_command", {"run"}),
|
||||
"mcp": ("mcp_action", {"serve"}),
|
||||
}
|
||||
|
||||
|
||||
def _prepare_agent_startup(args) -> None:
|
||||
"""Discover plugins/MCP/hooks for commands that can run an agent turn."""
|
||||
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
|
||||
if not (
|
||||
args.command in _AGENT_COMMANDS
|
||||
or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
|
||||
):
|
||||
return
|
||||
|
||||
_accept_hooks = bool(getattr(args, "accept_hooks", False))
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"plugin discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
register_from_config(load_config(), accept_hooks=_accept_hooks)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"shell-hook registration failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
def _set_chat_arg_defaults(args) -> None:
|
||||
for attr, default in [
|
||||
("query", None),
|
||||
("model", None),
|
||||
("provider", None),
|
||||
("toolsets", None),
|
||||
("verbose", False),
|
||||
("resume", None),
|
||||
("continue_last", None),
|
||||
("worktree", False),
|
||||
]:
|
||||
if not hasattr(args, attr):
|
||||
setattr(args, attr, default)
|
||||
|
||||
|
||||
def _is_termux_fast_version_argv(argv: list[str]) -> bool:
|
||||
return argv in (["--version"], ["-V"], ["version"])
|
||||
|
||||
|
||||
def _try_termux_fast_cli_launch() -> bool:
|
||||
"""Run obvious Termux non-TUI chat/oneshot/version paths on a light parser."""
|
||||
if not _is_termux_startup_environment():
|
||||
return False
|
||||
if os.environ.get("HERMES_TERMUX_DISABLE_FAST_CLI") == "1":
|
||||
return False
|
||||
|
||||
argv = sys.argv[1:]
|
||||
if "-h" in argv or "--help" in argv:
|
||||
return False
|
||||
if os.environ.get("HERMES_TUI") == "1" or "--tui" in argv:
|
||||
return False
|
||||
|
||||
if _is_termux_fast_version_argv(argv):
|
||||
_print_version_info(check_updates=False)
|
||||
return True
|
||||
|
||||
first = _first_positional_argv()
|
||||
has_oneshot = any(
|
||||
arg == "-z" or arg == "--oneshot" or arg.startswith("--oneshot=")
|
||||
for arg in argv
|
||||
)
|
||||
if not has_oneshot and first not in {None, "chat"}:
|
||||
return False
|
||||
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
parser, _subparsers, chat_parser = build_top_level_parser()
|
||||
chat_parser.set_defaults(func=cmd_chat)
|
||||
args = parser.parse_args(_coalesce_session_name_args(argv))
|
||||
|
||||
if getattr(args, "version", False):
|
||||
_print_version_info(check_updates=False)
|
||||
return True
|
||||
|
||||
if getattr(args, "oneshot", None):
|
||||
_prepare_agent_startup(args)
|
||||
from hermes_cli.oneshot import run_oneshot
|
||||
|
||||
sys.exit(
|
||||
run_oneshot(
|
||||
args.oneshot,
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
toolsets=getattr(args, "toolsets", None),
|
||||
)
|
||||
)
|
||||
|
||||
if (args.resume or args.continue_last) and args.command is None:
|
||||
args.command = "chat"
|
||||
|
||||
if args.command in {None, "chat"}:
|
||||
_set_chat_arg_defaults(args)
|
||||
_prepare_agent_startup(args)
|
||||
cmd_chat(args)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _try_termux_fast_tui_launch() -> bool:
|
||||
"""Launch obvious Termux TUI invocations before building every subparser.
|
||||
|
||||
`hermes --tui` is the hot path on phones. The full parser setup imports
|
||||
command modules for model, fallback, migrate, kanban, bundles, plugins,
|
||||
etc. even though the TUI immediately execs Node. On Termux only, parse the
|
||||
lightweight top-level/chat parser and hand off to ``cmd_chat`` when the
|
||||
invocation is unambiguously the built-in TUI/chat path.
|
||||
"""
|
||||
if not _is_termux_startup_environment():
|
||||
return False
|
||||
|
||||
if "-h" in sys.argv[1:] or "--help" in sys.argv[1:]:
|
||||
return False
|
||||
|
||||
wants_tui = os.environ.get("HERMES_TUI") == "1" or "--tui" in sys.argv[1:]
|
||||
if not wants_tui:
|
||||
return False
|
||||
|
||||
first = _first_positional_argv()
|
||||
if first not in {None, "chat"}:
|
||||
return False
|
||||
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
parser, _subparsers, chat_parser = build_top_level_parser()
|
||||
chat_parser.set_defaults(func=cmd_chat)
|
||||
args = parser.parse_args(_coalesce_session_name_args(sys.argv[1:]))
|
||||
|
||||
# Preserve top-level behaviours whose semantics are not "launch chat/TUI".
|
||||
if getattr(args, "version", False) or getattr(args, "oneshot", None):
|
||||
return False
|
||||
if getattr(args, "command", None) not in {None, "chat"}:
|
||||
return False
|
||||
if not (getattr(args, "tui", False) or os.environ.get("HERMES_TUI") == "1"):
|
||||
return False
|
||||
|
||||
cmd_chat(args)
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for hermes CLI."""
|
||||
# Force UTF-8 stdio on Windows before anything prints. No-op elsewhere.
|
||||
@@ -10406,6 +10859,11 @@ def main():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if _try_termux_fast_tui_launch():
|
||||
return
|
||||
if _try_termux_fast_cli_launch():
|
||||
return
|
||||
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
parser, subparsers, chat_parser = build_top_level_parser()
|
||||
@@ -10502,6 +10960,80 @@ def main():
|
||||
)
|
||||
fallback_parser.set_defaults(func=cmd_fallback)
|
||||
|
||||
# =========================================================================
|
||||
# secrets command — external secret managers (currently: Bitwarden)
|
||||
# =========================================================================
|
||||
secrets_parser = subparsers.add_parser(
|
||||
"secrets",
|
||||
help="Manage external secret sources (Bitwarden Secrets Manager)",
|
||||
description=(
|
||||
"Pull API keys from an external secret manager at process startup "
|
||||
"instead of storing them in ~/.hermes/.env. Currently supports "
|
||||
"Bitwarden Secrets Manager. See: "
|
||||
"https://hermes-agent.nousresearch.com/docs/user-guide/secrets/bitwarden"
|
||||
),
|
||||
)
|
||||
secrets_subparsers = secrets_parser.add_subparsers(dest="secrets_command")
|
||||
|
||||
secrets_bw = secrets_subparsers.add_parser(
|
||||
"bitwarden",
|
||||
aliases=["bw"],
|
||||
help="Bitwarden Secrets Manager integration",
|
||||
)
|
||||
|
||||
# Lazy import — only pays for itself when this subcommand is actually used.
|
||||
from hermes_cli import secrets_cli as _secrets_cli
|
||||
|
||||
_secrets_cli.register_cli(secrets_bw)
|
||||
|
||||
def _dispatch_secrets(args): # noqa: ANN001
|
||||
sub = getattr(args, "secrets_command", None)
|
||||
bw_sub = getattr(args, "secrets_bw_command", None)
|
||||
if sub in ("bitwarden", "bw") and bw_sub is not None:
|
||||
return args.func(args)
|
||||
secrets_parser.print_help()
|
||||
return 0
|
||||
|
||||
secrets_parser.set_defaults(func=_dispatch_secrets)
|
||||
|
||||
# =========================================================================
|
||||
# migrate command
|
||||
# =========================================================================
|
||||
from hermes_cli.migrate import cmd_migrate, cmd_migrate_xai
|
||||
|
||||
migrate_parser = subparsers.add_parser(
|
||||
"migrate",
|
||||
help="Migrate configuration for retired models or deprecated settings",
|
||||
description=(
|
||||
"Diagnose and (optionally) rewrite the active config.yaml to "
|
||||
"replace references to retired models or deprecated settings."
|
||||
),
|
||||
)
|
||||
migrate_subparsers = migrate_parser.add_subparsers(dest="migrate_type")
|
||||
|
||||
migrate_xai = migrate_subparsers.add_parser(
|
||||
"xai",
|
||||
help="Migrate xAI models scheduled for retirement on May 15, 2026",
|
||||
description=(
|
||||
"Scan config.yaml for references to xAI models retiring on "
|
||||
"May 15, 2026 and, with --apply, rewrite them in-place to the "
|
||||
"official replacements per the xAI migration guide. The original "
|
||||
"config.yaml is backed up before any rewrite."
|
||||
),
|
||||
)
|
||||
migrate_xai.add_argument(
|
||||
"--apply",
|
||||
action="store_true",
|
||||
help="Rewrite config.yaml in-place (default: dry-run, no writes)",
|
||||
)
|
||||
migrate_xai.add_argument(
|
||||
"--no-backup",
|
||||
action="store_true",
|
||||
help="Skip the timestamped backup of config.yaml when applying",
|
||||
)
|
||||
migrate_xai.set_defaults(func=cmd_migrate_xai)
|
||||
migrate_parser.set_defaults(func=cmd_migrate)
|
||||
|
||||
# =========================================================================
|
||||
# gateway command
|
||||
# =========================================================================
|
||||
@@ -10799,6 +11331,21 @@ def main():
|
||||
)
|
||||
whatsapp_parser.set_defaults(func=cmd_whatsapp)
|
||||
|
||||
# =========================================================================
|
||||
# whatsapp-cloud command (official Meta Cloud API; complement to Baileys)
|
||||
# =========================================================================
|
||||
whatsapp_cloud_parser = subparsers.add_parser(
|
||||
"whatsapp-cloud",
|
||||
help="Set up WhatsApp Business Cloud API integration",
|
||||
description=(
|
||||
"Configure the official Meta WhatsApp Business Cloud API "
|
||||
"adapter (Business account required, public webhook URL "
|
||||
"required). Distinct from `hermes whatsapp` which sets up "
|
||||
"the Baileys bridge for personal accounts."
|
||||
),
|
||||
)
|
||||
whatsapp_cloud_parser.set_defaults(func=cmd_whatsapp_cloud)
|
||||
|
||||
# =========================================================================
|
||||
# slack command
|
||||
# =========================================================================
|
||||
@@ -13129,51 +13676,7 @@ Examples:
|
||||
# so introspection/management commands (hermes hooks list, cron
|
||||
# list, gateway status, mcp add, ...) don't pay discovery cost or
|
||||
# trigger consent prompts for hooks the user is still inspecting.
|
||||
# Groups with mixed admin/CRUD vs. agent-running entries narrow via
|
||||
# the nested subcommand (dest varies by parser).
|
||||
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
|
||||
_AGENT_SUBCOMMANDS = {
|
||||
"cron": ("cron_command", {"run", "tick"}),
|
||||
"gateway": ("gateway_command", {"run"}),
|
||||
"mcp": ("mcp_action", {"serve"}),
|
||||
}
|
||||
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
|
||||
if args.command in _AGENT_COMMANDS or (
|
||||
_sub_attr and getattr(args, _sub_attr, None) in _sub_set
|
||||
):
|
||||
_accept_hooks = bool(getattr(args, "accept_hooks", False))
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"plugin discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
register_from_config(load_config(), accept_hooks=_accept_hooks)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"shell-hook registration failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
_prepare_agent_startup(args)
|
||||
|
||||
# Handle top-level --oneshot / -z: single-shot mode, stdout = final
|
||||
# response only, nothing else. Bypasses cli.py entirely.
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
"""CLI handlers for ``hermes migrate ...``.
|
||||
|
||||
Currently exposes only ``hermes migrate xai`` — diagnoses and (with --apply)
|
||||
rewrites references to xAI models retired on May 15, 2026.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
|
||||
def cmd_migrate(args: Any) -> int:
|
||||
"""Dispatcher for ``hermes migrate <subtype>``."""
|
||||
sub = getattr(args, "migrate_type", None)
|
||||
if sub == "xai":
|
||||
return cmd_migrate_xai(args)
|
||||
|
||||
print("usage: hermes migrate xai [--apply] [--no-backup]", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
|
||||
def cmd_migrate_xai(args: Any) -> int:
|
||||
"""Run xAI May-15 model migration in dry-run or apply mode."""
|
||||
from hermes_cli.xai_retirement import (
|
||||
MIGRATION_GUIDE_URL,
|
||||
RETIREMENT_DATE,
|
||||
apply_migration,
|
||||
find_retired_xai_refs,
|
||||
format_issue,
|
||||
)
|
||||
|
||||
apply = bool(getattr(args, "apply", False))
|
||||
no_backup = bool(getattr(args, "no_backup", False))
|
||||
|
||||
config = load_config()
|
||||
issues = find_retired_xai_refs(config)
|
||||
|
||||
print()
|
||||
print(color(
|
||||
f"◆ xAI Model Retirement Migration ({RETIREMENT_DATE})",
|
||||
Colors.CYAN, Colors.BOLD,
|
||||
))
|
||||
print()
|
||||
|
||||
if not issues:
|
||||
print(f" {color('✓', Colors.GREEN)} No retired xAI models in config — nothing to migrate.")
|
||||
return 0
|
||||
|
||||
print(f" Found {len(issues)} retired xAI model reference(s):")
|
||||
print()
|
||||
for issue in issues:
|
||||
print(f" {color('⚠', Colors.YELLOW)} {format_issue(issue)}")
|
||||
print()
|
||||
print(f" {color('→', Colors.CYAN)} Migration guide: {MIGRATION_GUIDE_URL}")
|
||||
print()
|
||||
|
||||
config_path = _resolve_config_path()
|
||||
|
||||
if not apply:
|
||||
print(color("Dry-run mode — no changes written.", Colors.DIM))
|
||||
print(color(
|
||||
"Re-run with `hermes migrate xai --apply` to rewrite "
|
||||
f"{config_path} in-place (backup created automatically).",
|
||||
Colors.DIM,
|
||||
))
|
||||
return 0
|
||||
|
||||
if not config_path or not config_path.exists():
|
||||
print(
|
||||
f" {color('✗', Colors.RED)} Could not locate config.yaml "
|
||||
f"(looked at: {config_path})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
try:
|
||||
result = apply_migration(
|
||||
config_path=config_path,
|
||||
issues=issues,
|
||||
backup=not no_backup,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(
|
||||
f" {color('✗', Colors.RED)} Migration failed: {exc}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
if not result.config_changed:
|
||||
print(f" {color('⚠', Colors.YELLOW)} No changes written.")
|
||||
return 0
|
||||
|
||||
if result.backup_path is not None:
|
||||
print(f" {color('✓', Colors.GREEN)} Backup: {result.backup_path}")
|
||||
print(
|
||||
f" {color('✓', Colors.GREEN)} Updated {len(result.issues_resolved)} "
|
||||
f"slot(s) in {result.file_path}"
|
||||
)
|
||||
print()
|
||||
print(color(
|
||||
"Run `hermes doctor` to confirm no retired xAI models remain.",
|
||||
Colors.DIM,
|
||||
))
|
||||
return 0
|
||||
|
||||
|
||||
def _resolve_config_path() -> Path:
|
||||
"""Best-effort: locate the active config.yaml on disk."""
|
||||
from hermes_cli.config import get_hermes_home
|
||||
|
||||
return get_hermes_home() / "config.yaml"
|
||||
@@ -66,6 +66,10 @@ class NousSubscriptionFeatures:
|
||||
def tts(self) -> NousFeatureState:
|
||||
return self.features["tts"]
|
||||
|
||||
@property
|
||||
def stt(self) -> NousFeatureState:
|
||||
return self.features["stt"]
|
||||
|
||||
@property
|
||||
def browser(self) -> NousFeatureState:
|
||||
return self.features["browser"]
|
||||
@@ -75,7 +79,7 @@ class NousSubscriptionFeatures:
|
||||
return self.features["modal"]
|
||||
|
||||
def items(self) -> Iterable[NousFeatureState]:
|
||||
ordered = ("web", "image_gen", "tts", "browser", "modal")
|
||||
ordered = ("web", "image_gen", "tts", "stt", "browser", "modal")
|
||||
for key in ordered:
|
||||
yield self.features[key]
|
||||
|
||||
@@ -159,6 +163,16 @@ def _tts_label(current_provider: str) -> str:
|
||||
return mapping.get(current_provider or "edge", current_provider or "Edge TTS")
|
||||
|
||||
|
||||
def _stt_label(current_provider: str) -> str:
|
||||
mapping = {
|
||||
"openai": "OpenAI Whisper",
|
||||
"groq": "Groq Whisper",
|
||||
"mistral": "Mistral Voxtral Transcribe",
|
||||
"local": "Local faster-whisper",
|
||||
}
|
||||
return mapping.get(current_provider or "local", current_provider or "Local faster-whisper")
|
||||
|
||||
|
||||
def _resolve_browser_feature_state(
|
||||
*,
|
||||
browser_tool_enabled: bool,
|
||||
@@ -251,6 +265,7 @@ def get_nous_subscription_features(
|
||||
|
||||
web_cfg = config.get("web") if isinstance(config.get("web"), dict) else {}
|
||||
tts_cfg = config.get("tts") if isinstance(config.get("tts"), dict) else {}
|
||||
stt_cfg = config.get("stt") if isinstance(config.get("stt"), dict) else {}
|
||||
browser_cfg = config.get("browser") if isinstance(config.get("browser"), dict) else {}
|
||||
terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}
|
||||
|
||||
@@ -260,6 +275,11 @@ def get_nous_subscription_features(
|
||||
web_search_backend = str(web_cfg.get("search_backend") or "").strip().lower()
|
||||
web_extract_backend = str(web_cfg.get("extract_backend") or "").strip().lower()
|
||||
tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
|
||||
# STT default is "local" (faster-whisper) per DEFAULT_CONFIG, which
|
||||
# requires `pip install faster-whisper`. For Nous subscribers we'd
|
||||
# rather route through the managed OpenAI audio gateway — see
|
||||
# apply_nous_managed_defaults below.
|
||||
stt_provider = str(stt_cfg.get("provider") or "local").strip().lower()
|
||||
browser_provider_explicit = "cloud_provider" in browser_cfg
|
||||
browser_provider = normalize_browser_cloud_provider(
|
||||
browser_cfg.get("cloud_provider") if browser_provider_explicit else None
|
||||
@@ -276,6 +296,7 @@ def get_nous_subscription_features(
|
||||
# prevent gateway routing.
|
||||
web_use_gateway = _uses_gateway(web_cfg)
|
||||
tts_use_gateway = _uses_gateway(tts_cfg)
|
||||
stt_use_gateway = _uses_gateway(stt_cfg)
|
||||
browser_use_gateway = _uses_gateway(browser_cfg)
|
||||
image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
|
||||
image_use_gateway = _uses_gateway(image_gen_cfg)
|
||||
@@ -293,6 +314,22 @@ def get_nous_subscription_features(
|
||||
direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
|
||||
direct_modal = has_direct_modal_credentials()
|
||||
|
||||
# STT direct providers. OpenAI Whisper reuses the same audio key as
|
||||
# OpenAI TTS — resolve_openai_audio_api_key() reads VOICE_TOOLS_OPENAI_KEY
|
||||
# and falls back to OPENAI_API_KEY. The local provider's "direct"
|
||||
# signal is whether faster-whisper is importable; we lazy-import so
|
||||
# this module stays cheap on the happy path.
|
||||
direct_openai_stt = bool(resolve_openai_audio_api_key())
|
||||
direct_groq_stt = bool(get_env_value("GROQ_API_KEY"))
|
||||
direct_mistral_stt = bool(get_env_value("MISTRAL_API_KEY"))
|
||||
try:
|
||||
from tools.transcription_tools import _HAS_FASTER_WHISPER
|
||||
local_stt_available = bool(_HAS_FASTER_WHISPER) or bool(
|
||||
get_env_value("HERMES_LOCAL_STT_COMMAND")
|
||||
)
|
||||
except Exception:
|
||||
local_stt_available = bool(get_env_value("HERMES_LOCAL_STT_COMMAND"))
|
||||
|
||||
# When use_gateway is set, suppress direct credentials for managed detection
|
||||
if web_use_gateway:
|
||||
direct_firecrawl = False
|
||||
@@ -304,6 +341,11 @@ def get_nous_subscription_features(
|
||||
if tts_use_gateway:
|
||||
direct_openai_tts = False
|
||||
direct_elevenlabs = False
|
||||
if stt_use_gateway:
|
||||
direct_openai_stt = False
|
||||
direct_groq_stt = False
|
||||
direct_mistral_stt = False
|
||||
local_stt_available = False
|
||||
if browser_use_gateway:
|
||||
direct_browser_use = False
|
||||
direct_browserbase = False
|
||||
@@ -311,6 +353,10 @@ def get_nous_subscription_features(
|
||||
managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
|
||||
managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
|
||||
managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
|
||||
# STT and TTS share the same managed gateway endpoint ("openai-audio")
|
||||
# because the OpenAI audio API covers both /audio/speech (TTS) and
|
||||
# /audio/transcriptions (STT). One probe, used by both.
|
||||
managed_stt_available = managed_tts_available
|
||||
managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
|
||||
managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
|
||||
modal_state = resolve_modal_backend_state(
|
||||
@@ -361,6 +407,24 @@ def get_nous_subscription_features(
|
||||
)
|
||||
tts_active = bool(tts_tool_enabled and tts_available)
|
||||
|
||||
# STT availability per provider. Unlike TTS, STT isn't a model-callable
|
||||
# tool — the gateway voice middleware calls it on every inbound voice
|
||||
# message — so toolset_enabled is N/A and we treat stt as always
|
||||
# "enabled" if a usable provider is configured.
|
||||
stt_current_provider = stt_provider or "local"
|
||||
stt_managed = (
|
||||
stt_current_provider == "openai"
|
||||
and managed_stt_available
|
||||
and not direct_openai_stt
|
||||
)
|
||||
stt_available = bool(
|
||||
(stt_current_provider == "local" and local_stt_available)
|
||||
or (stt_current_provider == "openai" and (managed_stt_available or direct_openai_stt))
|
||||
or (stt_current_provider == "groq" and direct_groq_stt)
|
||||
or (stt_current_provider == "mistral" and direct_mistral_stt)
|
||||
)
|
||||
stt_active = stt_available
|
||||
|
||||
browser_local_available = _has_agent_browser()
|
||||
(
|
||||
browser_current_provider,
|
||||
@@ -415,6 +479,13 @@ def get_nous_subscription_features(
|
||||
if isinstance(raw_tts_cfg, dict) and "provider" in raw_tts_cfg:
|
||||
tts_explicit_configured = tts_provider not in {"", "edge"}
|
||||
|
||||
# STT considers any non-default provider explicit. "local" is the
|
||||
# DEFAULT_CONFIG seed, so seeing it doesn't mean the user picked it.
|
||||
stt_explicit_configured = False
|
||||
raw_stt_cfg = config.get("stt")
|
||||
if isinstance(raw_stt_cfg, dict) and "provider" in raw_stt_cfg:
|
||||
stt_explicit_configured = stt_provider not in {"", "local"}
|
||||
|
||||
features = {
|
||||
"web": NousFeatureState(
|
||||
key="web",
|
||||
@@ -452,6 +523,21 @@ def get_nous_subscription_features(
|
||||
current_provider=_tts_label(tts_current_provider),
|
||||
explicit_configured=tts_explicit_configured,
|
||||
),
|
||||
"stt": NousFeatureState(
|
||||
key="stt",
|
||||
label="Speech-to-text",
|
||||
included_by_default=True,
|
||||
available=stt_available,
|
||||
active=stt_active,
|
||||
managed_by_nous=stt_managed,
|
||||
direct_override=stt_active and not stt_managed,
|
||||
# STT isn't toolset-gated (gateway middleware calls it
|
||||
# unconditionally on inbound voice), so report True so the
|
||||
# status display doesn't flag it as "tool disabled".
|
||||
toolset_enabled=True,
|
||||
current_provider=_stt_label(stt_current_provider),
|
||||
explicit_configured=stt_explicit_configured,
|
||||
),
|
||||
"browser": NousFeatureState(
|
||||
key="browser",
|
||||
label="Browser automation",
|
||||
@@ -514,6 +600,11 @@ def apply_nous_managed_defaults(
|
||||
tts_cfg = {}
|
||||
config["tts"] = tts_cfg
|
||||
|
||||
stt_cfg = config.get("stt")
|
||||
if not isinstance(stt_cfg, dict):
|
||||
stt_cfg = {}
|
||||
config["stt"] = stt_cfg
|
||||
|
||||
browser_cfg = config.get("browser")
|
||||
if not isinstance(browser_cfg, dict):
|
||||
browser_cfg = {}
|
||||
@@ -535,6 +626,18 @@ def apply_nous_managed_defaults(
|
||||
tts_cfg["provider"] = "openai"
|
||||
changed.add("tts")
|
||||
|
||||
# STT: same pattern as TTS. The DEFAULT_CONFIG seed is "local"
|
||||
# (requires `pip install faster-whisper`); for Nous subscribers we
|
||||
# flip it to "openai" so the managed audio gateway handles transcription
|
||||
# via the same auth as TTS. Skipped when the user has explicitly
|
||||
# configured STT or has direct credentials for a non-managed provider.
|
||||
if not features.stt.explicit_configured and not (
|
||||
get_env_value("GROQ_API_KEY")
|
||||
or get_env_value("MISTRAL_API_KEY")
|
||||
):
|
||||
stt_cfg["provider"] = "openai"
|
||||
changed.add("stt")
|
||||
|
||||
if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
|
||||
get_env_value("BROWSER_USE_API_KEY")
|
||||
or get_env_value("BROWSERBASE_API_KEY")
|
||||
@@ -556,6 +659,7 @@ _GATEWAY_TOOL_LABELS = {
|
||||
"web": "Web search & extract (Firecrawl)",
|
||||
"image_gen": "Image generation (FAL)",
|
||||
"tts": "Text-to-speech (OpenAI TTS)",
|
||||
"stt": "Speech-to-text (OpenAI Whisper)",
|
||||
"browser": "Browser automation (Browser Use)",
|
||||
}
|
||||
|
||||
@@ -575,6 +679,15 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
|
||||
resolve_openai_audio_api_key()
|
||||
or get_env_value("ELEVENLABS_API_KEY")
|
||||
),
|
||||
# STT direct credentials. OpenAI Whisper shares the audio key
|
||||
# with TTS via resolve_openai_audio_api_key() — counting it here
|
||||
# too is intentional: if the user has an OpenAI audio key they
|
||||
# don't need the gateway for either.
|
||||
"stt": bool(
|
||||
resolve_openai_audio_api_key()
|
||||
or get_env_value("GROQ_API_KEY")
|
||||
or get_env_value("MISTRAL_API_KEY")
|
||||
),
|
||||
"browser": bool(
|
||||
get_env_value("BROWSER_USE_API_KEY")
|
||||
or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
|
||||
@@ -586,10 +699,11 @@ _GATEWAY_DIRECT_LABELS = {
|
||||
"web": "Firecrawl/Exa/Parallel/Tavily key",
|
||||
"image_gen": "FAL key",
|
||||
"tts": "OpenAI/ElevenLabs key",
|
||||
"stt": "OpenAI/Groq/Mistral key",
|
||||
"browser": "Browser Use/Browserbase key",
|
||||
}
|
||||
|
||||
_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser")
|
||||
_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "stt", "browser")
|
||||
|
||||
|
||||
def get_gateway_eligible_tools(
|
||||
@@ -625,6 +739,7 @@ def get_gateway_eligible_tools(
|
||||
"web": _uses_gateway(config.get("web")),
|
||||
"image_gen": _uses_gateway(config.get("image_gen")),
|
||||
"tts": _uses_gateway(config.get("tts")),
|
||||
"stt": _uses_gateway(config.get("stt")),
|
||||
"browser": _uses_gateway(config.get("browser")),
|
||||
}
|
||||
|
||||
@@ -664,6 +779,11 @@ def apply_gateway_defaults(
|
||||
tts_cfg = {}
|
||||
config["tts"] = tts_cfg
|
||||
|
||||
stt_cfg = config.get("stt")
|
||||
if not isinstance(stt_cfg, dict):
|
||||
stt_cfg = {}
|
||||
config["stt"] = stt_cfg
|
||||
|
||||
browser_cfg = config.get("browser")
|
||||
if not isinstance(browser_cfg, dict):
|
||||
browser_cfg = {}
|
||||
@@ -679,6 +799,11 @@ def apply_gateway_defaults(
|
||||
tts_cfg["use_gateway"] = True
|
||||
changed.add("tts")
|
||||
|
||||
if "stt" in tool_keys:
|
||||
stt_cfg["provider"] = "openai"
|
||||
stt_cfg["use_gateway"] = True
|
||||
changed.add("stt")
|
||||
|
||||
if "browser" in tool_keys:
|
||||
browser_cfg["cloud_provider"] = "browser-use"
|
||||
browser_cfg["use_gateway"] = True
|
||||
@@ -717,8 +842,9 @@ def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
|
||||
desc_parts: list[str] = [
|
||||
"",
|
||||
" The Tool Gateway gives you access to web search, image generation,",
|
||||
" text-to-speech, and browser automation through your Nous subscription.",
|
||||
" No need to sign up for separate API keys — just pick the tools you want.",
|
||||
" text-to-speech, speech-to-text, and browser automation through your",
|
||||
" Nous subscription. No need to sign up for separate API keys — just",
|
||||
" pick the tools you want.",
|
||||
"",
|
||||
]
|
||||
if already_managed:
|
||||
|
||||
@@ -24,6 +24,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
||||
("discord", PlatformInfo(label="💬 Discord", default_toolset="hermes-discord")),
|
||||
("slack", PlatformInfo(label="💼 Slack", default_toolset="hermes-slack")),
|
||||
("whatsapp", PlatformInfo(label="📱 WhatsApp", default_toolset="hermes-whatsapp")),
|
||||
("whatsapp_cloud", PlatformInfo(label="📱 WhatsApp Business (Cloud)", default_toolset="hermes-whatsapp")),
|
||||
("signal", PlatformInfo(label="📡 Signal", default_toolset="hermes-signal")),
|
||||
("bluebubbles", PlatformInfo(label="💙 BlueBubbles", default_toolset="hermes-bluebubbles")),
|
||||
("email", PlatformInfo(label="📧 Email", default_toolset="hermes-email")),
|
||||
|
||||
@@ -35,6 +35,7 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]:
|
||||
return []
|
||||
names: list[str] = []
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
path_str = str(md)
|
||||
if "/.hub/" in path_str or "/.git/" in path_str:
|
||||
if is_excluded_skill_path(md):
|
||||
continue
|
||||
try:
|
||||
rel = md.relative_to(skills_dir)
|
||||
@@ -201,7 +201,7 @@ def describe_profile(
|
||||
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
|
||||
skill_count = sum(
|
||||
1 for _ in (profile_dir / "skills").rglob("SKILL.md")
|
||||
if "/.hub/" not in str(_) and "/.git/" not in str(_)
|
||||
if not is_excluded_skill_path(_)
|
||||
) if (profile_dir / "skills").is_dir() else 0
|
||||
|
||||
# Read model + provider from the profile's config.
|
||||
|
||||
@@ -70,6 +70,8 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
@@ -463,7 +465,9 @@ def _count_skills(staged: Path) -> int:
|
||||
skills_dir = staged / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return 0
|
||||
return sum(1 for _ in skills_dir.rglob("SKILL.md"))
|
||||
return sum(
|
||||
1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
|
||||
|
||||
def plan_install(
|
||||
|
||||
+48
-3
@@ -30,6 +30,8 @@ from dataclasses import dataclass
|
||||
from pathlib import Path, PurePosixPath, PureWindowsPath
|
||||
from typing import List, Optional
|
||||
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
_PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
|
||||
|
||||
# Directories bootstrapped inside every new profile
|
||||
@@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int:
|
||||
return 0
|
||||
count = 0
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
if "/.hub/" not in str(md) and "/.git/" not in str(md):
|
||||
count += 1
|
||||
if is_excluded_skill_path(md):
|
||||
continue
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
@@ -902,7 +905,49 @@ def delete_profile(name: str, yes: bool = False) -> Path:
|
||||
|
||||
# 4. Remove profile directory
|
||||
try:
|
||||
shutil.rmtree(profile_dir)
|
||||
def _make_writable(func, path, exc):
|
||||
"""onexc/onerror handler: add +w on PermissionError so rmtree can proceed.
|
||||
|
||||
Handles two cases on NixOS (and other systems with read-only
|
||||
copies from immutable stores):
|
||||
1. The path itself isn't writable (e.g. a file with mode 0444)
|
||||
2. The *parent* directory isn't writable (e.g. mode 0555)
|
||||
|
||||
Compatible with both the ``onexc`` API (3.12+, receives an
|
||||
exception instance) and the ``onerror`` API (3.11-, receives
|
||||
``sys.exc_info()`` tuple).
|
||||
"""
|
||||
import stat as _stat
|
||||
import sys as _sys
|
||||
|
||||
# Normalise the two callback signatures:
|
||||
# onexc(func, path, exc_instance) — 3.12+
|
||||
# onerror(func, path, exc_info_tuple) — 3.11
|
||||
if isinstance(exc, tuple):
|
||||
exc = exc[1] # exc_info → actual exception object
|
||||
|
||||
if isinstance(exc, PermissionError):
|
||||
# Make the path writable
|
||||
try:
|
||||
os.chmod(path, os.stat(path).st_mode | _stat.S_IWUSR)
|
||||
except OSError:
|
||||
pass
|
||||
# Also make the parent writable (needed for unlink/rmdir)
|
||||
parent = os.path.dirname(path)
|
||||
if parent:
|
||||
try:
|
||||
os.chmod(parent, os.stat(parent).st_mode | _stat.S_IWUSR)
|
||||
except OSError:
|
||||
pass
|
||||
func(path)
|
||||
else:
|
||||
raise
|
||||
|
||||
# ``onexc`` was added in 3.12; fall back to ``onerror`` on 3.11.
|
||||
try:
|
||||
shutil.rmtree(profile_dir, onexc=_make_writable)
|
||||
except TypeError:
|
||||
shutil.rmtree(profile_dir, onerror=_make_writable)
|
||||
print(f"✓ Removed {profile_dir}")
|
||||
except Exception as e:
|
||||
print(f"⚠ Could not remove {profile_dir}: {e}")
|
||||
|
||||
@@ -100,6 +100,63 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _host_derived_api_key(base_url: str) -> str:
|
||||
"""Look up `<VENDOR>_API_KEY` in the env, derived from the base URL host.
|
||||
|
||||
Examples:
|
||||
https://api.deepseek.com/v1 → DEEPSEEK_API_KEY
|
||||
https://api.groq.com/openai/v1 → GROQ_API_KEY
|
||||
https://api.mistral.ai/v1 → MISTRAL_API_KEY
|
||||
https://generativelanguage.googleapis.com/v1beta/openai/ → GOOGLEAPIS_API_KEY
|
||||
|
||||
Returns the env value (stripped) or "". Never returns env vars whose names
|
||||
are already explicitly checked elsewhere — those are handled by their own
|
||||
host-gated paths (OPENAI/OPENROUTER/OLLAMA).
|
||||
|
||||
The vendor label is the *registrable* portion of the hostname: strip
|
||||
``api.`` / ``www.`` prefixes, then take the second-to-last label
|
||||
(``api.deepseek.com`` → ``deepseek``). Falls back to "" for hostnames
|
||||
that don't yield a usable vendor label (IPs, loopback, single-label
|
||||
hosts).
|
||||
"""
|
||||
hostname = base_url_hostname(base_url)
|
||||
if not hostname:
|
||||
return ""
|
||||
# Reject IPv4 / IPv6 / loopback — no meaningful vendor label.
|
||||
if any(ch.isdigit() for ch in hostname.split(".")[-1]):
|
||||
# Last label starts with a digit → likely IP. (TLDs are never numeric.)
|
||||
return ""
|
||||
if hostname in ("localhost",) or ":" in hostname:
|
||||
return ""
|
||||
labels = [lbl for lbl in hostname.split(".") if lbl]
|
||||
# Strip common API/CDN prefixes.
|
||||
while labels and labels[0] in ("api", "www"):
|
||||
labels.pop(0)
|
||||
if len(labels) < 2:
|
||||
return ""
|
||||
# Take the *registrable* label (second-to-last). For typical provider
|
||||
# hosts this is what users intuitively call "the vendor":
|
||||
# deepseek.com → labels[-2] = "deepseek" ✓
|
||||
# api.groq.com → groq.com → labels[-2] = "groq" ✓
|
||||
# api.mistral.ai → labels[-2] = "mistral" ✓
|
||||
# Crucially, lookalike hosts pick the ATTACKER's label, not the spoofed
|
||||
# vendor:
|
||||
# api.deepseek.com.attacker.test → labels[-2] = "attacker"
|
||||
# so DEEPSEEK_API_KEY stays put and the chain falls through to
|
||||
# no-key-required. This mirrors how `base_url_host_matches` resists the
|
||||
# same lookalike attack for explicit hosts.
|
||||
vendor = labels[-2]
|
||||
# Sanitize to env var charset: A-Z, 0-9, underscore.
|
||||
sanitized = "".join(ch if ch.isalnum() else "_" for ch in vendor).upper()
|
||||
if not sanitized or not sanitized[0].isalpha():
|
||||
return ""
|
||||
# Don't re-derive env vars already handled by explicit host-gated paths.
|
||||
if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
|
||||
return ""
|
||||
env_name = f"{sanitized}_API_KEY"
|
||||
return (os.getenv(env_name, "") or "").strip()
|
||||
|
||||
|
||||
def _auto_detect_local_model(base_url: str) -> str:
|
||||
"""Query a local server for its model name when only one model is loaded."""
|
||||
if not base_url:
|
||||
@@ -471,6 +528,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
# The v11→v12 migration writes the API mode under the new
|
||||
# ``transport`` field, but hand-edited configs may still
|
||||
# use the legacy ``api_mode`` spelling. Accept both —
|
||||
@@ -496,6 +556,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
@@ -539,6 +602,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
result["key_env"] = key_env
|
||||
if provider_key:
|
||||
result["provider_key"] = provider_key
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
api_mode = _parse_api_mode(entry.get("api_mode"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
@@ -550,6 +616,13 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
return None
|
||||
|
||||
|
||||
def _custom_provider_request_overrides(custom_provider: Dict[str, Any]) -> Dict[str, Any]:
|
||||
extra_body = custom_provider.get("extra_body")
|
||||
if not isinstance(extra_body, dict) or not extra_body:
|
||||
return {}
|
||||
return {"extra_body": dict(extra_body)}
|
||||
|
||||
|
||||
def _resolve_named_custom_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
@@ -582,10 +655,17 @@ def _resolve_named_custom_runtime(
|
||||
if pool_result:
|
||||
pool_result["source"] = "direct-alias"
|
||||
return pool_result
|
||||
_da_is_openai_url = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
|
||||
_da_is_openrouter = base_url_host_matches(base_url, "openrouter.ai")
|
||||
api_key_candidates = [
|
||||
(explicit_api_key or "").strip(),
|
||||
os.getenv("OPENAI_API_KEY", "").strip(),
|
||||
os.getenv("OPENROUTER_API_KEY", "").strip(),
|
||||
# Gate env key fallbacks on authoritative hosts (#28660)
|
||||
(os.getenv("OPENAI_API_KEY", "").strip() if _da_is_openai_url else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
|
||||
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
|
||||
# intuitive match without configuring `custom_providers` first.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next(
|
||||
(c for c in api_key_candidates if has_usable_secret(c)),
|
||||
@@ -619,14 +699,27 @@ def _resolve_named_custom_runtime(
|
||||
model_name = custom_provider.get("model")
|
||||
if model_name:
|
||||
pool_result["model"] = model_name
|
||||
request_overrides = _custom_provider_request_overrides(custom_provider)
|
||||
if request_overrides:
|
||||
pool_result["request_overrides"] = {
|
||||
**dict(pool_result.get("request_overrides") or {}),
|
||||
**request_overrides,
|
||||
}
|
||||
return pool_result
|
||||
|
||||
_cp_is_openai_url = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
|
||||
_cp_is_openrouter = base_url_host_matches(base_url, "openrouter.ai")
|
||||
api_key_candidates = [
|
||||
(explicit_api_key or "").strip(),
|
||||
str(custom_provider.get("api_key", "") or "").strip(),
|
||||
os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
|
||||
os.getenv("OPENAI_API_KEY", "").strip(),
|
||||
os.getenv("OPENROUTER_API_KEY", "").strip(),
|
||||
# Gate provider env keys on their authoritative hosts — sending
|
||||
# OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
|
||||
(os.getenv("OPENAI_API_KEY", "").strip() if _cp_is_openai_url else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
|
||||
# fallback when key_env wasn't set explicitly.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")
|
||||
|
||||
@@ -643,6 +736,9 @@ def _resolve_named_custom_runtime(
|
||||
# provider name differs from the actual model string the API expects.
|
||||
if custom_provider.get("model"):
|
||||
result["model"] = custom_provider["model"]
|
||||
request_overrides = _custom_provider_request_overrides(custom_provider)
|
||||
if request_overrides:
|
||||
result["request_overrides"] = request_overrides
|
||||
return result
|
||||
|
||||
|
||||
@@ -707,7 +803,15 @@ def _resolve_openrouter_runtime(
|
||||
# OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
|
||||
# provider (issues #420, #560).
|
||||
_is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
|
||||
if _is_openrouter_url:
|
||||
# Also treat explicitly-configured OpenRouter mirrors/proxies as OpenRouter
|
||||
# for key selection — if the user set OPENROUTER_BASE_URL or requested
|
||||
# provider=openrouter explicitly, OPENROUTER_API_KEY should still be used.
|
||||
_is_openrouter_context = _is_openrouter_url or (
|
||||
requested_norm == "openrouter"
|
||||
and (env_openrouter_base_url or base_url == env_openrouter_base_url)
|
||||
and base_url == (env_openrouter_base_url or "").rstrip("/")
|
||||
)
|
||||
if _is_openrouter_context:
|
||||
api_key_candidates = [
|
||||
explicit_api_key,
|
||||
os.getenv("OPENROUTER_API_KEY"),
|
||||
@@ -721,13 +825,24 @@ def _resolve_openrouter_runtime(
|
||||
# "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
|
||||
# hostname is a look-alike (ollama.com.attacker.test) must not
|
||||
# receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
|
||||
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
|
||||
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
|
||||
_is_openai_url = base_url_host_matches(base_url, "openai.com")
|
||||
_is_openai_azure = base_url_host_matches(base_url, "openai.azure.com")
|
||||
# Gate each provider key on its own host — sending OPENAI_API_KEY or
|
||||
# OPENROUTER_API_KEY to an unrelated custom endpoint (DeepSeek, Groq,
|
||||
# Mistral, …) leaks credentials and causes 401s (issue #28660).
|
||||
# Mirrors the OLLAMA_API_KEY host-gate added in GHSA-76xc-57q6-vm5m.
|
||||
api_key_candidates = [
|
||||
explicit_api_key,
|
||||
(cfg_api_key if use_config_base_url else ""),
|
||||
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
|
||||
os.getenv("OPENAI_API_KEY"),
|
||||
os.getenv("OPENROUTER_API_KEY"),
|
||||
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
|
||||
(os.getenv("OPENAI_API_KEY") if (_is_openai_url or _is_openai_azure) else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
|
||||
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
|
||||
# intuitive match. Helper returns "" for IPs/loopback and for env
|
||||
# vars already handled by the explicit host-gated paths above.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next(
|
||||
(str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)),
|
||||
|
||||
@@ -0,0 +1,445 @@
|
||||
"""CLI handlers for ``hermes secrets bitwarden ...``.
|
||||
|
||||
Subcommands:
|
||||
setup — interactive wizard: install bws, prompt for token + project, test fetch
|
||||
status — show current config + binary version + last fetch outcome
|
||||
sync — run a fetch right now and show what would be applied (dry-run friendly)
|
||||
disable — flip ``secrets.bitwarden.enabled`` to False
|
||||
install — just download the bws binary (no token / project required)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import getpass
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
|
||||
from agent.secret_sources import bitwarden as bw
|
||||
from hermes_cli.config import (
|
||||
get_env_path,
|
||||
load_config,
|
||||
save_config,
|
||||
save_env_value,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Argparse wiring — called from hermes_cli.main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def register_cli(parent_parser: argparse.ArgumentParser) -> None:
|
||||
"""Attach the ``bitwarden`` subcommand tree to a parent parser.
|
||||
|
||||
Called from ``hermes_cli.main`` as part of building the top-level
|
||||
``hermes secrets`` parser.
|
||||
"""
|
||||
sub = parent_parser.add_subparsers(dest="secrets_bw_command")
|
||||
|
||||
setup = sub.add_parser(
|
||||
"setup",
|
||||
help="Interactive wizard: install bws, store access token, pick project",
|
||||
)
|
||||
setup.add_argument(
|
||||
"--project-id",
|
||||
help="Pre-select a project UUID instead of prompting",
|
||||
)
|
||||
setup.add_argument(
|
||||
"--access-token",
|
||||
help="Provide the access token non-interactively (will be stored in .env)",
|
||||
)
|
||||
setup.set_defaults(func=cmd_setup)
|
||||
|
||||
status = sub.add_parser("status", help="Show config + binary + last fetch")
|
||||
status.set_defaults(func=cmd_status)
|
||||
|
||||
sync = sub.add_parser("sync", help="Fetch secrets now and report what changed")
|
||||
sync.add_argument(
|
||||
"--apply",
|
||||
action="store_true",
|
||||
help="Actually export the secrets into the current shell's env (default: dry-run)",
|
||||
)
|
||||
sync.set_defaults(func=cmd_sync)
|
||||
|
||||
disable = sub.add_parser("disable", help="Turn off the Bitwarden integration")
|
||||
disable.set_defaults(func=cmd_disable)
|
||||
|
||||
install = sub.add_parser(
|
||||
"install",
|
||||
help=f"Download and verify the pinned bws binary (v{bw._BWS_VERSION})",
|
||||
)
|
||||
install.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Re-download even if a managed copy already exists",
|
||||
)
|
||||
install.set_defaults(func=cmd_install)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def cmd_setup(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
console.print(
|
||||
Panel.fit(
|
||||
"[bold]Bitwarden Secrets Manager setup[/bold]\n\n"
|
||||
"Need an access token? In the Bitwarden web app:\n"
|
||||
" Secrets Manager → Machine accounts → [your account] →\n"
|
||||
" Access tokens → Create access token\n\n"
|
||||
"Copy the token (starts with [cyan]0.[/cyan]…) — it cannot be retrieved later.",
|
||||
border_style="cyan",
|
||||
)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------ binary
|
||||
console.print()
|
||||
console.print("[bold]Step 1[/bold] Install the bws CLI")
|
||||
try:
|
||||
binary = bw.find_bws(install_if_missing=False)
|
||||
if binary is None:
|
||||
console.print(" No bws on PATH — downloading…")
|
||||
binary = bw.install_bws()
|
||||
version = _bws_version(binary)
|
||||
console.print(f" [green]✓[/green] {binary} ({version})")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f" [red]✗ Could not install bws: {exc}[/red]")
|
||||
console.print(
|
||||
" Manual install: "
|
||||
"https://github.com/bitwarden/sdk-sm/releases"
|
||||
)
|
||||
return 1
|
||||
|
||||
# ------------------------------------------------------------------- token
|
||||
console.print()
|
||||
console.print("[bold]Step 2[/bold] Provide your access token")
|
||||
cfg = load_config()
|
||||
secrets_cfg = (cfg.setdefault("secrets", {})
|
||||
.setdefault("bitwarden", {}))
|
||||
token_env = secrets_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
|
||||
|
||||
token = (args.access_token or "").strip()
|
||||
if not token:
|
||||
token = getpass.getpass(f" Paste access token ({token_env}): ").strip()
|
||||
if not token:
|
||||
console.print(" [red]Empty token, aborting.[/red]")
|
||||
return 1
|
||||
if not token.startswith("0."):
|
||||
console.print(
|
||||
" [yellow]Warning: token doesn't start with '0.' — usually that means "
|
||||
"you pasted something other than a BSM access token. Continuing anyway.[/yellow]"
|
||||
)
|
||||
|
||||
save_env_value(token_env, token)
|
||||
os.environ[token_env] = token # so the test fetch below sees it
|
||||
console.print(f" [green]✓[/green] stored in {get_env_path()} as {token_env}")
|
||||
|
||||
# ------------------------------------------------------------------- project
|
||||
if args.project_id and args.project_id.strip():
|
||||
project_id = args.project_id.strip()
|
||||
else:
|
||||
console.print()
|
||||
console.print("[bold]Step 3[/bold] Pick a project")
|
||||
project_id = ""
|
||||
projects = _list_projects(binary, token, console)
|
||||
if projects is None:
|
||||
return 1
|
||||
if not projects:
|
||||
console.print(" [yellow]No projects visible to this machine account.[/yellow]")
|
||||
console.print(
|
||||
" In the Bitwarden web app, open the machine account → Projects tab "
|
||||
"and grant it access to at least one project."
|
||||
)
|
||||
return 1
|
||||
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("#", style="cyan", width=4)
|
||||
table.add_column("Name")
|
||||
table.add_column("ID", style="dim")
|
||||
for i, p in enumerate(projects, 1):
|
||||
table.add_row(str(i), p.get("name", "?"), p.get("id", "?"))
|
||||
console.print(table)
|
||||
|
||||
while True:
|
||||
choice = console.input(f" Select project [1-{len(projects)}]: ").strip()
|
||||
if not choice:
|
||||
continue
|
||||
try:
|
||||
idx = int(choice)
|
||||
except ValueError:
|
||||
console.print(" [red]Enter a number.[/red]")
|
||||
continue
|
||||
if 1 <= idx <= len(projects):
|
||||
project_id = projects[idx - 1]["id"]
|
||||
break
|
||||
console.print(f" [red]Out of range — pick 1-{len(projects)}.[/red]")
|
||||
|
||||
# ------------------------------------------------------------------- test
|
||||
console.print()
|
||||
step_num = 4 if not (args.project_id and args.project_id.strip()) else 3
|
||||
console.print(f"[bold]Step {step_num}[/bold] Test fetch")
|
||||
try:
|
||||
secrets, warnings = bw.fetch_bitwarden_secrets(
|
||||
access_token=token,
|
||||
project_id=project_id,
|
||||
binary=binary,
|
||||
use_cache=False,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f" [red]✗ Fetch failed: {exc}[/red]")
|
||||
return 1
|
||||
|
||||
if not secrets:
|
||||
console.print(" [yellow]Fetch succeeded but the project has no secrets.[/yellow]")
|
||||
else:
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("Name", style="cyan")
|
||||
table.add_column("Status")
|
||||
for key in sorted(secrets):
|
||||
if key == token_env:
|
||||
status = "[dim]bootstrap token — never overrides itself[/dim]"
|
||||
elif os.environ.get(key):
|
||||
status = "[yellow]already set in env (will be overwritten)[/yellow]"
|
||||
else:
|
||||
status = "[green]new[/green]"
|
||||
table.add_row(key, status)
|
||||
console.print(table)
|
||||
for w in warnings:
|
||||
console.print(f" [yellow]warning:[/yellow] {w}")
|
||||
|
||||
# ------------------------------------------------------------------- save
|
||||
secrets_cfg["enabled"] = True
|
||||
secrets_cfg["project_id"] = project_id
|
||||
secrets_cfg.setdefault("access_token_env", token_env)
|
||||
secrets_cfg.setdefault("cache_ttl_seconds", 300)
|
||||
secrets_cfg.setdefault("override_existing", True)
|
||||
secrets_cfg.setdefault("auto_install", True)
|
||||
save_config(cfg)
|
||||
|
||||
console.print()
|
||||
console.print(
|
||||
"[green]✓ Bitwarden Secrets Manager is enabled.[/green] "
|
||||
"Secrets will be pulled at the start of every Hermes process."
|
||||
)
|
||||
console.print(
|
||||
" Status: [cyan]hermes secrets bitwarden status[/cyan]\n"
|
||||
" Refresh: [cyan]hermes secrets bitwarden sync[/cyan]\n"
|
||||
" Disable: [cyan]hermes secrets bitwarden disable[/cyan]"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_status(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
cfg = load_config()
|
||||
bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
|
||||
|
||||
enabled = bool(bw_cfg.get("enabled"))
|
||||
token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
|
||||
project_id = bw_cfg.get("project_id", "")
|
||||
token_set = bool(os.environ.get(token_env))
|
||||
|
||||
table = Table(show_header=False, box=None, padding=(0, 2))
|
||||
table.add_column("", style="bold")
|
||||
table.add_column("")
|
||||
table.add_row("Enabled", _yn(enabled))
|
||||
table.add_row("Token env var", token_env)
|
||||
table.add_row("Token in env", _yn(token_set))
|
||||
table.add_row("Project ID", project_id or "[dim](unset)[/dim]")
|
||||
table.add_row("Override existing", _yn(bool(bw_cfg.get("override_existing", False))))
|
||||
table.add_row("Cache TTL (s)", str(bw_cfg.get("cache_ttl_seconds", 300)))
|
||||
table.add_row("Auto-install", _yn(bool(bw_cfg.get("auto_install", True))))
|
||||
|
||||
binary = bw.find_bws(install_if_missing=False)
|
||||
if binary:
|
||||
table.add_row("bws binary", f"{binary} ({_bws_version(binary)})")
|
||||
else:
|
||||
table.add_row("bws binary", "[yellow]not installed[/yellow]")
|
||||
|
||||
console.print(Panel(table, title="Bitwarden Secrets Manager", border_style="cyan"))
|
||||
|
||||
if not enabled:
|
||||
console.print("\n Run [cyan]hermes secrets bitwarden setup[/cyan] to enable.")
|
||||
return 0
|
||||
if not token_set:
|
||||
console.print(
|
||||
f"\n [yellow]Enabled but {token_env} is not set — Hermes will skip BSM "
|
||||
"and warn on next startup.[/yellow]"
|
||||
)
|
||||
if not project_id:
|
||||
console.print(
|
||||
"\n [yellow]Enabled but no project_id — nothing to fetch.[/yellow]"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_sync(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
cfg = load_config()
|
||||
bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
|
||||
if not bw_cfg.get("enabled"):
|
||||
console.print(
|
||||
"[yellow]Bitwarden integration is disabled. Run "
|
||||
"`hermes secrets bitwarden setup` first.[/yellow]"
|
||||
)
|
||||
return 1
|
||||
|
||||
token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
|
||||
token = os.environ.get(token_env, "").strip()
|
||||
if not token:
|
||||
console.print(f"[red]{token_env} is not set.[/red]")
|
||||
return 1
|
||||
|
||||
project_id = bw_cfg.get("project_id", "")
|
||||
if not project_id:
|
||||
console.print("[red]No project_id configured.[/red]")
|
||||
return 1
|
||||
|
||||
try:
|
||||
secrets, warnings = bw.fetch_bitwarden_secrets(
|
||||
access_token=token,
|
||||
project_id=project_id,
|
||||
use_cache=False,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f"[red]Fetch failed: {exc}[/red]")
|
||||
return 1
|
||||
|
||||
if not secrets:
|
||||
console.print("[yellow]No secrets in project.[/yellow]")
|
||||
return 0
|
||||
|
||||
override = bool(bw_cfg.get("override_existing", False)) or args.apply
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("Name", style="cyan")
|
||||
table.add_column("Action")
|
||||
applied = 0
|
||||
for key in sorted(secrets):
|
||||
if key == token_env:
|
||||
table.add_row(key, "[dim]skip (bootstrap token)[/dim]")
|
||||
continue
|
||||
already = bool(os.environ.get(key))
|
||||
if already and not override:
|
||||
table.add_row(key, "[dim]skip (already set)[/dim]")
|
||||
continue
|
||||
if args.apply:
|
||||
os.environ[key] = secrets[key]
|
||||
applied += 1
|
||||
table.add_row(key, "[green]exported[/green]" + (" (overrode)" if already else ""))
|
||||
else:
|
||||
table.add_row(key, "[green]would export[/green]" + (" (overrides)" if already else ""))
|
||||
|
||||
console.print(table)
|
||||
for w in warnings:
|
||||
console.print(f"[yellow]warning:[/yellow] {w}")
|
||||
|
||||
if not args.apply:
|
||||
console.print(
|
||||
"\n This was a dry-run — secrets are picked up automatically on the "
|
||||
"next [cyan]hermes[/cyan] invocation. Re-run with [cyan]--apply[/cyan] "
|
||||
"to export into the current shell instead."
|
||||
)
|
||||
else:
|
||||
console.print(f"\n [green]Exported {applied} secret(s) into current process.[/green]")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_disable(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
cfg = load_config()
|
||||
bw_cfg = (cfg.setdefault("secrets", {})
|
||||
.setdefault("bitwarden", {}))
|
||||
bw_cfg["enabled"] = False
|
||||
save_config(cfg)
|
||||
console.print(
|
||||
"[green]Disabled.[/green] Bitwarden secrets will NOT be pulled on the next "
|
||||
"Hermes invocation.\n"
|
||||
" Your access token is left in .env — remove it manually if you also want "
|
||||
"to revoke the credential."
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_install(args: argparse.Namespace) -> int:
|
||||
console = Console()
|
||||
try:
|
||||
path = bw.install_bws(force=bool(args.force))
|
||||
console.print(f"[green]✓[/green] {path} ({_bws_version(path)})")
|
||||
return 0
|
||||
except Exception as exc: # noqa: BLE001
|
||||
console.print(f"[red]Install failed: {exc}[/red]")
|
||||
return 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _yn(b: bool) -> str:
|
||||
return "[green]yes[/green]" if b else "[dim]no[/dim]"
|
||||
|
||||
|
||||
def _bws_version(binary: Path) -> str:
|
||||
try:
|
||||
res = subprocess.run(
|
||||
[str(binary), "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if res.returncode == 0:
|
||||
return (res.stdout or res.stderr).strip().splitlines()[0]
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
return "version unknown"
|
||||
|
||||
|
||||
def _list_projects(
|
||||
binary: Path, token: str, console: Console
|
||||
) -> Optional[List[dict]]:
|
||||
"""Call ``bws project list`` and return the parsed list, or None on failure."""
|
||||
env = os.environ.copy()
|
||||
env["BWS_ACCESS_TOKEN"] = token
|
||||
env.setdefault("NO_COLOR", "1")
|
||||
try:
|
||||
res = subprocess.run(
|
||||
[str(binary), "project", "list", "--output", "json"],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
except (OSError, subprocess.TimeoutExpired) as exc:
|
||||
console.print(f" [red]Couldn't list projects: {exc}[/red]")
|
||||
return None
|
||||
|
||||
if res.returncode != 0:
|
||||
err = (res.stderr or res.stdout).strip()[:300]
|
||||
console.print(f" [red]bws project list failed: {err}[/red]")
|
||||
if "authorization" in err.lower() or "invalid" in err.lower():
|
||||
console.print(
|
||||
" [yellow]This usually means the access token is wrong or revoked. "
|
||||
"Double-check it in the Bitwarden web app.[/yellow]"
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(res.stdout or "[]")
|
||||
except json.JSONDecodeError as exc:
|
||||
console.print(f" [red]bws returned non-JSON: {exc}[/red]")
|
||||
return None
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
return [p for p in data if isinstance(p, dict) and p.get("id")]
|
||||
@@ -0,0 +1,530 @@
|
||||
"""
|
||||
Interactive setup wizard for the WhatsApp Cloud API adapter.
|
||||
|
||||
Entry point: ``hermes whatsapp-cloud`` (dispatched from
|
||||
``cmd_whatsapp_cloud`` in ``hermes_cli/main.py``).
|
||||
|
||||
Walks the user through the 6 credentials Meta requires + recipient
|
||||
allowlist, auto-generates the verify token, and prints exact follow-up
|
||||
instructions for the parts that can't happen inside the wizard process
|
||||
(starting cloudflared, starting the gateway, configuring Meta's
|
||||
webhook dashboard, adding their phone to the recipient list).
|
||||
|
||||
Heavy emphasis on field-shape validation to catch the most common
|
||||
configuration mistakes:
|
||||
|
||||
- Putting the actual phone number in ``WHATSAPP_CLOUD_PHONE_NUMBER_ID``
|
||||
(the field expects Meta's 15-17 digit internal ID, not a phone number).
|
||||
This is the #1 trap — caught us during Phase 3 live testing.
|
||||
- Pasting tokens with trailing whitespace.
|
||||
- Pasting an OpenAI / Slack / GitHub key by mistake.
|
||||
- Confusing App ID with WABA ID with Phone Number ID.
|
||||
|
||||
Each prompt has contextual help showing exactly where to find the value
|
||||
in Meta's App Dashboard, with a one-line description and the field's
|
||||
expected shape ("starts with EAA", "15-17 digits", "32 hex chars", etc.).
|
||||
|
||||
The wizard intentionally does NOT smoke-test the webhook itself — the
|
||||
Hermes gateway and the cloudflared tunnel both run in separate
|
||||
processes the user starts AFTER this wizard exits, so any in-wizard
|
||||
probe would fail by design. Instead the final SETUP COMPLETE block
|
||||
prints the exact curl command the user can run from a third terminal
|
||||
to verify the loop end-to-end once everything's running.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import secrets
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Field-shape validators
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Each validator returns (ok, reason_if_not_ok). The wizard uses them to
|
||||
# reject obviously-malformed input before saving — saves users a round
|
||||
# trip with Meta's 401 / 400 errors.
|
||||
|
||||
|
||||
def _validate_phone_number_id(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""Phone Number ID is a 15-17 digit numeric ID assigned by Meta.
|
||||
|
||||
It's NOT a phone number. The #1 setup mistake is pasting the actual
|
||||
phone number (e.g. ``15556422442``) into this field — that's only
|
||||
10-11 digits and gets rejected by Graph as "Object with ID does
|
||||
not exist."
|
||||
"""
|
||||
if not value:
|
||||
return False, "Phone Number ID is required"
|
||||
s = value.strip()
|
||||
if not s.isdigit():
|
||||
return False, "Phone Number ID must be numeric (no '+', spaces, or dashes)"
|
||||
# Real phone numbers are 10-11 digits (US/CA country code + area code
|
||||
# + 7 digits). Meta's internal IDs are 15-17 digits. If we see a
|
||||
# phone-number-sized value, the user almost certainly pasted the
|
||||
# phone number by mistake.
|
||||
if 10 <= len(s) <= 12:
|
||||
return False, (
|
||||
"That looks like a phone number — but this field needs the "
|
||||
"Phone Number ID (Meta's internal ID, 15-17 digits, e.g. "
|
||||
"'7794189252778687'). Look just BELOW the 'From' dropdown in "
|
||||
"API Setup → it's labelled 'Phone number ID'."
|
||||
)
|
||||
if len(s) < 13:
|
||||
return False, "Phone Number ID looks too short (expected 13-18 digits)"
|
||||
if len(s) > 20:
|
||||
return False, "Phone Number ID looks too long (expected 13-18 digits)"
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_waba_id(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""WABA ID is numeric, similar length range as Phone Number ID."""
|
||||
if not value:
|
||||
return False, "WABA ID is required"
|
||||
s = value.strip()
|
||||
if not s.isdigit():
|
||||
return False, "WABA ID must be numeric"
|
||||
if len(s) < 10 or len(s) > 25:
|
||||
return False, "WABA ID looks wrong (expected 10-25 digits)"
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_app_id(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""Meta App ID is numeric, typically 15-16 digits."""
|
||||
if not value:
|
||||
return False, "App ID is required"
|
||||
s = value.strip()
|
||||
if not s.isdigit():
|
||||
return False, "App ID must be numeric"
|
||||
if len(s) < 13 or len(s) > 20:
|
||||
return False, "App ID looks wrong (expected 15-16 digits)"
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_app_secret(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""App Secret is a 32-character lowercase hex string."""
|
||||
if not value:
|
||||
return False, "App Secret is required"
|
||||
s = value.strip()
|
||||
if not re.fullmatch(r"[0-9a-f]+", s.lower()):
|
||||
return False, (
|
||||
"App Secret should be a hex string (only digits 0-9 and "
|
||||
"letters a-f). Make sure you copied the 'App secret' from "
|
||||
"Settings → Basic, not some other token."
|
||||
)
|
||||
if len(s) != 32:
|
||||
return False, f"App Secret should be exactly 32 hex characters (got {len(s)})"
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_access_token(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""Meta access tokens start with ``EAA`` and are 100-300+ characters.
|
||||
|
||||
Both temp tokens (24h) and System User permanent tokens share this
|
||||
prefix. We don't try to distinguish them.
|
||||
"""
|
||||
if not value:
|
||||
return False, "Access token is required"
|
||||
s = value.strip()
|
||||
if not s.startswith("EAA"):
|
||||
# Diagnose common paste mistakes
|
||||
if s.startswith("sk-"):
|
||||
return False, (
|
||||
"That's an OpenAI key (starts with 'sk-'), not a Meta "
|
||||
"WhatsApp access token. Meta tokens start with 'EAA'."
|
||||
)
|
||||
if s.startswith("xoxb-") or s.startswith("xoxp-"):
|
||||
return False, (
|
||||
"That's a Slack token, not a Meta WhatsApp access token. "
|
||||
"Meta tokens start with 'EAA'."
|
||||
)
|
||||
if s.startswith("ghp_") or s.startswith("gho_"):
|
||||
return False, (
|
||||
"That's a GitHub token, not a Meta WhatsApp access "
|
||||
"token. Meta tokens start with 'EAA'."
|
||||
)
|
||||
return False, (
|
||||
"Meta WhatsApp access tokens start with 'EAA'. Check that "
|
||||
"you're copying from the right place (API Setup → 'Generate "
|
||||
"access token', or Business Settings → System Users → "
|
||||
"'Generate token' for a permanent one)."
|
||||
)
|
||||
if len(s) < 100:
|
||||
return False, f"Access token looks too short ({len(s)} chars, expected 100+)"
|
||||
return True, None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompt helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _prompt(message: str, default: Optional[str] = None) -> str:
|
||||
"""Read one line of input. Returns "" on EOF / Ctrl+C / empty input.
|
||||
|
||||
The ``default`` parameter is shown to the user but NOT auto-applied
|
||||
on empty input — callers handle the "user kept existing" case
|
||||
explicitly so they can distinguish between a real value and a
|
||||
display preview (e.g. ``"abc12345..."`` for masked secrets).
|
||||
"""
|
||||
try:
|
||||
suffix = f" [{default}]" if default else ""
|
||||
raw = input(f"{message}{suffix}: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print()
|
||||
return ""
|
||||
return raw
|
||||
|
||||
|
||||
def _prompt_validated(
|
||||
message: str,
|
||||
validator,
|
||||
*,
|
||||
current: Optional[str] = None,
|
||||
help_text: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Repeat the prompt until the user enters a valid value or aborts.
|
||||
|
||||
Returns the validated value, or None if the user gave up (empty
|
||||
response after an error, or Ctrl+C). ``current`` is shown as a
|
||||
default for re-runs of the wizard with existing config.
|
||||
"""
|
||||
if help_text:
|
||||
for line in help_text.strip().splitlines():
|
||||
print(f" {line}")
|
||||
attempts = 0
|
||||
while True:
|
||||
attempts += 1
|
||||
value = _prompt(f" → {message}", default=current)
|
||||
if not value:
|
||||
return None
|
||||
ok, reason = validator(value)
|
||||
if ok:
|
||||
return value.strip()
|
||||
print(f" ✗ {reason}")
|
||||
if attempts >= 3:
|
||||
try:
|
||||
cont = input(" Try again, or press Enter to skip: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
return None
|
||||
if not cont:
|
||||
return None
|
||||
attempts = 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Wizard
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def run_whatsapp_cloud_setup() -> int:
|
||||
"""Interactive wizard for the WhatsApp Cloud API adapter.
|
||||
|
||||
Returns 0 on full success, 1 on user abort, 2 on partial completion
|
||||
(some fields written but the user bailed before finishing).
|
||||
"""
|
||||
from hermes_cli.config import get_env_value, save_env_value
|
||||
|
||||
print()
|
||||
print("⚕ WhatsApp Business Cloud API Setup")
|
||||
print("=" * 50)
|
||||
print()
|
||||
print("This wizard configures Hermes to talk to WhatsApp via Meta's")
|
||||
print("official Cloud API. It's the production-grade path:")
|
||||
print()
|
||||
print(" • No QR codes, no Node.js bridge subprocess")
|
||||
print(" • Stable connection — no account-ban risk")
|
||||
print(" • Business account required (not personal WhatsApp)")
|
||||
print(" • Public webhook URL required (Cloudflare Tunnel, ngrok,")
|
||||
print(" or your own reverse proxy with TLS)")
|
||||
print()
|
||||
print("If you don't have a Meta app set up yet, follow these steps")
|
||||
print("FIRST, then come back and re-run this wizard:")
|
||||
print()
|
||||
print(" 1. https://developers.facebook.com/apps → Create App")
|
||||
print(" → 'Connect with customers through WhatsApp'")
|
||||
print(" 2. App Dashboard → WhatsApp → API Setup")
|
||||
print(" 3. Click 'Generate access token' (temp 24h token is fine to")
|
||||
print(" start; switch to a System User permanent token later)")
|
||||
print()
|
||||
try:
|
||||
proceed = input("Press Enter to continue, or Ctrl+C to abort... ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\nSetup cancelled.")
|
||||
return 1
|
||||
|
||||
print()
|
||||
print("─" * 50)
|
||||
print("STEP 1 — Phone Number ID")
|
||||
print("─" * 50)
|
||||
current_phone_id = get_env_value("WHATSAPP_CLOUD_PHONE_NUMBER_ID") or None
|
||||
phone_id = _prompt_validated(
|
||||
"Phone Number ID",
|
||||
_validate_phone_number_id,
|
||||
current=current_phone_id,
|
||||
help_text=(
|
||||
"Found in: App Dashboard → WhatsApp → API Setup, in the\n"
|
||||
"'Send and receive messages' section.\n"
|
||||
"Look BELOW the 'From' dropdown — there's a 'Phone number ID'\n"
|
||||
"line with the value (15-17 digits, e.g. '7794189252778687').\n"
|
||||
"It is NOT the phone number itself (+1 555-...). That's the\n"
|
||||
"single most common setup mistake."
|
||||
),
|
||||
)
|
||||
if not phone_id:
|
||||
if current_phone_id:
|
||||
phone_id = current_phone_id
|
||||
print(f" ✓ Keeping existing: {phone_id}")
|
||||
else:
|
||||
print("\n✗ Phone Number ID is required. Aborting.")
|
||||
return 1
|
||||
else:
|
||||
save_env_value("WHATSAPP_CLOUD_PHONE_NUMBER_ID", phone_id)
|
||||
print(f" ✓ Saved: {phone_id}")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 2 — Access Token")
|
||||
print("─" * 50)
|
||||
current_token = get_env_value("WHATSAPP_CLOUD_ACCESS_TOKEN") or None
|
||||
current_display = (current_token[:15] + "...") if current_token else None
|
||||
token = _prompt_validated(
|
||||
"Access Token",
|
||||
_validate_access_token,
|
||||
current=current_display,
|
||||
help_text=(
|
||||
"Two options for getting one:\n\n"
|
||||
" (a) TEMP — App Dashboard → WhatsApp → API Setup →\n"
|
||||
" 'Generate access token' button. Lasts 24 hours.\n"
|
||||
" Fine for testing today; you'll have to regenerate\n"
|
||||
" tomorrow.\n\n"
|
||||
" (b) PERMANENT (production) — System User token. One-time\n"
|
||||
" setup, never expires:\n"
|
||||
" • business.facebook.com → Settings → System users →\n"
|
||||
" Add → Admin role\n"
|
||||
" • Assign Assets → your app (Manage app), your\n"
|
||||
" WhatsApp account (Manage WABAs)\n"
|
||||
" • Generate token → expiration: Never → permissions:\n"
|
||||
" business_management, whatsapp_business_messaging,\n"
|
||||
" whatsapp_business_management\n\n"
|
||||
"Tokens start with 'EAA'."
|
||||
),
|
||||
)
|
||||
# If they had a current token and just hit Enter, keep it.
|
||||
if not token:
|
||||
if current_token:
|
||||
token = current_token
|
||||
print(" ✓ Keeping existing token")
|
||||
else:
|
||||
print("\n✗ Access Token is required. Aborting.")
|
||||
return 1
|
||||
else:
|
||||
save_env_value("WHATSAPP_CLOUD_ACCESS_TOKEN", token)
|
||||
print(" ✓ Saved (token hidden)")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 3 — App Secret (required for webhook signature verification)")
|
||||
print("─" * 50)
|
||||
current_secret = get_env_value("WHATSAPP_CLOUD_APP_SECRET") or None
|
||||
current_secret_display = (current_secret[:8] + "...") if current_secret else None
|
||||
app_secret = _prompt_validated(
|
||||
"App Secret",
|
||||
_validate_app_secret,
|
||||
current=current_secret_display,
|
||||
help_text=(
|
||||
"Found in: App Dashboard → Settings → Basic →\n"
|
||||
"'App secret' field (click 'Show', enter your Facebook password).\n\n"
|
||||
"If 'Show' doesn't appear, you may need Admin role on the app.\n"
|
||||
"It's a 32-character lowercase hex string.\n\n"
|
||||
"Without the App Secret, inbound webhook POSTs are refused\n"
|
||||
"with HTTP 503 (we can't verify they actually came from Meta)."
|
||||
),
|
||||
)
|
||||
if not app_secret:
|
||||
if current_secret:
|
||||
app_secret = current_secret
|
||||
print(" ✓ Keeping existing App Secret")
|
||||
else:
|
||||
print("\n⚠ Skipping App Secret — inbound webhooks will be refused")
|
||||
print(" until you set WHATSAPP_CLOUD_APP_SECRET manually.")
|
||||
else:
|
||||
save_env_value("WHATSAPP_CLOUD_APP_SECRET", app_secret)
|
||||
print(" ✓ Saved (secret hidden)")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 4 — App ID & WABA ID (optional, for analytics)")
|
||||
print("─" * 50)
|
||||
current_app_id = get_env_value("WHATSAPP_CLOUD_APP_ID") or None
|
||||
app_id = _prompt_validated(
|
||||
"App ID (optional, press Enter to skip)",
|
||||
lambda v: (True, None) if not v else _validate_app_id(v),
|
||||
current=current_app_id,
|
||||
help_text=(
|
||||
"Found in: App Dashboard → Settings → Basic → 'App ID' at the\n"
|
||||
"top of the page. Numeric, ~15-16 digits.\n"
|
||||
"Not required for messaging — useful only for analytics later."
|
||||
),
|
||||
)
|
||||
if app_id:
|
||||
save_env_value("WHATSAPP_CLOUD_APP_ID", app_id)
|
||||
print(f" ✓ Saved: {app_id}")
|
||||
elif current_app_id:
|
||||
print(f" ✓ Keeping existing: {current_app_id}")
|
||||
|
||||
current_waba_id = get_env_value("WHATSAPP_CLOUD_WABA_ID") or None
|
||||
waba_id = _prompt_validated(
|
||||
"WABA ID (optional, press Enter to skip)",
|
||||
lambda v: (True, None) if not v else _validate_waba_id(v),
|
||||
current=current_waba_id,
|
||||
help_text=(
|
||||
"WhatsApp Business Account ID. Found in: App Dashboard →\n"
|
||||
"WhatsApp → API Setup, near the top — 'WhatsApp Business\n"
|
||||
"Account ID'. Numeric, ~15+ digits.\n"
|
||||
"Not required for messaging — useful for analytics."
|
||||
),
|
||||
)
|
||||
if waba_id:
|
||||
save_env_value("WHATSAPP_CLOUD_WABA_ID", waba_id)
|
||||
print(f" ✓ Saved: {waba_id}")
|
||||
elif current_waba_id:
|
||||
print(f" ✓ Keeping existing: {current_waba_id}")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 5 — Verify Token (auto-generated)")
|
||||
print("─" * 50)
|
||||
current_verify = get_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN") or None
|
||||
if current_verify:
|
||||
print(f" An existing verify token is already set ({current_verify[:8]}...).")
|
||||
try:
|
||||
regen = input(" Generate a new one? [y/N]: ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
regen = "n"
|
||||
if regen in {"y", "yes"}:
|
||||
verify_token = secrets.token_urlsafe(32)
|
||||
save_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN", verify_token)
|
||||
print(f" ✓ New verify token: {verify_token}")
|
||||
else:
|
||||
verify_token = current_verify
|
||||
print(" ✓ Keeping existing verify token")
|
||||
else:
|
||||
verify_token = secrets.token_urlsafe(32)
|
||||
save_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN", verify_token)
|
||||
print(f" ✓ Generated: {verify_token}")
|
||||
print()
|
||||
print(" → COPY THIS TOKEN NOW. You'll paste it into Meta's webhook")
|
||||
print(" configuration dialog (next step).")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 6 — Recipient Allowlist")
|
||||
print("─" * 50)
|
||||
print()
|
||||
print(" Who is allowed to message the bot? (Comma-separated phone")
|
||||
print(" numbers with country code, no '+' / spaces / dashes. Use '*'")
|
||||
print(" to allow anyone — only safe if you've also configured Meta's")
|
||||
print(" recipient whitelist for app-development mode.)")
|
||||
print()
|
||||
current_allow = get_env_value("WHATSAPP_CLOUD_ALLOWED_USERS") or None
|
||||
allow_default = current_allow if current_allow else None
|
||||
try:
|
||||
allowed = input(
|
||||
f" → Allowed users{' [' + allow_default + ']' if allow_default else ''}: "
|
||||
).strip() or (allow_default or "")
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
allowed = ""
|
||||
if allowed:
|
||||
# Light normalization — strip spaces and dashes from each entry.
|
||||
allowed = ",".join(
|
||||
re.sub(r"[\s\-+]", "", part) for part in allowed.split(",") if part.strip()
|
||||
)
|
||||
save_env_value("WHATSAPP_CLOUD_ALLOWED_USERS", allowed)
|
||||
print(f" ✓ Saved: {allowed}")
|
||||
else:
|
||||
print(" ⚠ No allowlist — every inbound message will be denied.")
|
||||
print(" Re-run this wizard or set WHATSAPP_CLOUD_ALLOWED_USERS manually.")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("SETUP COMPLETE — Next steps")
|
||||
print("─" * 50)
|
||||
print()
|
||||
print(" Hermes needs a public HTTPS URL to receive WhatsApp messages.")
|
||||
print(" The recommended path is Cloudflare Tunnel (free, no port")
|
||||
print(" forwarding, no DNS setup).")
|
||||
print()
|
||||
print(" 1. Install cloudflared (one-time, if you don't have it):")
|
||||
print(" Windows: winget install Cloudflare.cloudflared")
|
||||
print(" macOS: brew install cloudflared")
|
||||
print(" Linux: https://github.com/cloudflare/cloudflared/releases")
|
||||
print()
|
||||
print(" Alternatives: ngrok, or your own domain + reverse proxy")
|
||||
print(" with TLS.")
|
||||
print()
|
||||
print(" 2. Start the tunnel in a separate terminal:")
|
||||
print(" cloudflared tunnel --url http://localhost:8090")
|
||||
print(" Note the printed https://<random>.trycloudflare.com URL.")
|
||||
print()
|
||||
print(" 3. Start the Hermes gateway in another terminal:")
|
||||
print(" hermes gateway")
|
||||
print()
|
||||
print(" 4. Verify your local config is reachable. From a third")
|
||||
print(" terminal, with the tunnel URL substituted:")
|
||||
print()
|
||||
print(" curl 'https://YOUR-TUNNEL.trycloudflare.com/whatsapp/webhook?\\")
|
||||
print(f" hub.mode=subscribe&hub.verify_token={verify_token}&\\")
|
||||
print(" hub.challenge=hello'")
|
||||
print()
|
||||
print(" Expected: HTTP 200 with body 'hello'.")
|
||||
print(" Also try: curl https://YOUR-TUNNEL.trycloudflare.com/health")
|
||||
print(" (should return JSON with verify_token_configured: true).")
|
||||
print()
|
||||
print(" 5. Configure Meta to point at your tunnel:")
|
||||
print(" App Dashboard → WhatsApp → Configuration → Edit webhook")
|
||||
print(" Callback URL: <tunnel-url>/whatsapp/webhook")
|
||||
print(f" Verify Token: {verify_token}")
|
||||
print(" → Click 'Verify and save'")
|
||||
print(" → Then 'Manage' webhook fields → subscribe to 'messages'")
|
||||
print()
|
||||
print(" 6. Add your phone to Meta's recipient list:")
|
||||
print(" App Dashboard → WhatsApp → API Setup → 'To' →")
|
||||
print(" 'Manage phone number list'")
|
||||
print()
|
||||
print(" 7. DM the bot's test number from your phone.")
|
||||
print()
|
||||
print("─" * 50)
|
||||
print("Optional: polish your bot's WhatsApp profile")
|
||||
print("─" * 50)
|
||||
print()
|
||||
print(" WhatsApp shows a display name and profile picture for your bot")
|
||||
print(" in every chat header and contact list. These are set in Meta's")
|
||||
print(" Business Manager, not via this wizard — but here's where to do")
|
||||
print(" it once you're up and running:")
|
||||
print()
|
||||
effective_waba = waba_id or current_waba_id
|
||||
if effective_waba:
|
||||
print(" • Display name + profile picture:")
|
||||
print(" https://business.facebook.com/wa/manage/phone-numbers/"
|
||||
f"?waba_id={effective_waba}")
|
||||
else:
|
||||
print(" • Display name + profile picture:")
|
||||
print(" https://business.facebook.com/wa/manage/phone-numbers/")
|
||||
print(" (select your WhatsApp Business Account on that page)")
|
||||
print(" Display-name changes go through a ~24-48h Meta review.")
|
||||
print()
|
||||
print(" • About, description, website, hours, business category:")
|
||||
print(" Same page → click your phone number → 'Edit profile'.")
|
||||
print()
|
||||
print(" • Verified badge (the green check):")
|
||||
print(" Requires Meta's business verification process —")
|
||||
print(" Business Manager → Security Center → Start Verification.")
|
||||
print()
|
||||
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/")
|
||||
print(" messaging/whatsapp-cloud")
|
||||
print()
|
||||
return 0
|
||||
@@ -23,6 +23,7 @@ from rich.table import Table
|
||||
# Lazy imports to avoid circular dependencies and slow startup.
|
||||
# tools.skills_hub and tools.skills_guard are imported inside functions.
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
_console = Console()
|
||||
|
||||
@@ -178,9 +179,12 @@ def _existing_categories() -> List[str]:
|
||||
# top level (no category); otherwise treat as a category bucket.
|
||||
if (entry / "SKILL.md").exists():
|
||||
continue
|
||||
# Has at least one nested SKILL.md?
|
||||
# Has at least one nested SKILL.md (excluding dependency/cache dirs)?
|
||||
try:
|
||||
if any(entry.rglob("SKILL.md")):
|
||||
if any(
|
||||
not is_excluded_skill_path(p)
|
||||
for p in entry.rglob("SKILL.md")
|
||||
):
|
||||
out.append(entry.name)
|
||||
except OSError:
|
||||
continue
|
||||
@@ -319,12 +323,14 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
|
||||
c.print("[dim]No skills found in the Skills Hub.[/]\n")
|
||||
return
|
||||
|
||||
# Deduplicate by name, preferring higher trust
|
||||
# Deduplicate by identifier, preferring higher trust.
|
||||
# identifier is always unique per skill; name is not (browse-sh skills from different
|
||||
# sites can share the same task name, e.g. "search-listings" on Airbnb and Booking.com).
|
||||
seen: dict = {}
|
||||
for r in all_results:
|
||||
rank = _TRUST_RANK.get(r.trust_level, 0)
|
||||
if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0):
|
||||
seen[r.name] = r
|
||||
if r.identifier not in seen or rank > _TRUST_RANK.get(seen[r.identifier].trust_level, 0):
|
||||
seen[r.identifier] = r
|
||||
deduped = list(seen.values())
|
||||
|
||||
# Sort: official first, then by trust level (desc), then alphabetically
|
||||
@@ -702,8 +708,8 @@ def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> di
|
||||
seen: dict = {}
|
||||
for r in all_results:
|
||||
rank = _TRUST_RANK.get(r.trust_level, 0)
|
||||
if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0):
|
||||
seen[r.name] = r
|
||||
if r.identifier not in seen or rank > _TRUST_RANK.get(seen[r.identifier].trust_level, 0):
|
||||
seen[r.identifier] = r
|
||||
deduped = list(seen.values())
|
||||
deduped.sort(key=lambda r: (-_TRUST_RANK.get(r.trust_level, 0), r.source != "official", r.name.lower()))
|
||||
total = len(deduped)
|
||||
|
||||
@@ -309,7 +309,7 @@ def show_status(args):
|
||||
print()
|
||||
print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD))
|
||||
print(" Your free-tier Nous account does not include Tool Gateway access.")
|
||||
print(" Upgrade your subscription to unlock managed web, image, TTS, and browser tools.")
|
||||
print(" Upgrade your subscription to unlock managed web, image, TTS, STT, and browser tools.")
|
||||
try:
|
||||
portal_url = nous_status.get("portal_base_url", "").rstrip("/")
|
||||
if portal_url:
|
||||
|
||||
@@ -0,0 +1,253 @@
|
||||
"""Detect xAI models retired on May 15, 2026.
|
||||
|
||||
Source: https://docs.x.ai/developers/migration/may-15-retirement
|
||||
|
||||
Pure logic: walks a Hermes config dict, returns issues for any reference
|
||||
to a retired xAI model. No I/O, no CLI dependencies — testable in isolation
|
||||
and reusable from both `hermes doctor` and a future `hermes migrate xai`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
MIGRATION_GUIDE_URL = "https://docs.x.ai/developers/migration/may-15-retirement"
|
||||
RETIREMENT_DATE = "May 15, 2026"
|
||||
|
||||
|
||||
# Official mapping per xAI migration guide.
|
||||
# Some entries set ``reasoning_effort`` because non-reasoning variants don't
|
||||
# have a one-to-one replacement: ``grok-4.3`` reasons by default, so emulating
|
||||
# ``*-non-reasoning`` behavior on it requires ``reasoning_effort="none"``.
|
||||
_RETIRED_MODELS: Dict[str, Dict[str, Optional[str]]] = {
|
||||
"grok-4-0709": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-4-fast-reasoning": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-4-fast-non-reasoning": {"replacement": "grok-4.3", "reasoning_effort": "none", "note": None},
|
||||
"grok-4-1-fast-reasoning": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-4-1-fast-non-reasoning": {"replacement": "grok-4.3", "reasoning_effort": "none", "note": None},
|
||||
"grok-code-fast-1": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-3": {"replacement": "grok-4.3", "reasoning_effort": None, "note": None},
|
||||
"grok-imagine-image-pro": {"replacement": "grok-imagine-image-quality", "reasoning_effort": None, "note": None},
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RetirementIssue:
|
||||
"""A reference to a retired xAI model found in a Hermes config."""
|
||||
|
||||
config_path: str # e.g. "principal.model" or "auxiliary.vision.model"
|
||||
current_model: str # exact value found in config (preserves casing/prefix)
|
||||
replacement: str # recommended xAI replacement
|
||||
reasoning_effort: Optional[str] = None # set if non-reasoning variant migration
|
||||
note: Optional[str] = None # disambiguation note when applicable
|
||||
|
||||
|
||||
def _normalize(model_id: str) -> str:
|
||||
"""Strip provider prefix (``x-ai/grok-4`` → ``grok-4``) and lowercase."""
|
||||
m = model_id.strip().lower()
|
||||
for prefix in ("x-ai/", "xai/"):
|
||||
if m.startswith(prefix):
|
||||
m = m[len(prefix):]
|
||||
break
|
||||
return m
|
||||
|
||||
|
||||
def _looks_like_xai(model_id: Optional[str]) -> bool:
|
||||
if not isinstance(model_id, str) or not model_id.strip():
|
||||
return False
|
||||
return _normalize(model_id).startswith("grok-")
|
||||
|
||||
|
||||
def find_retired_xai_refs(config: Dict[str, Any]) -> List[RetirementIssue]:
|
||||
"""Walk all model slots in a Hermes config and return retirement issues.
|
||||
|
||||
Slots scanned:
|
||||
- ``principal.model``
|
||||
- ``auxiliary.<any>.model`` (introspective — covers future aux slots)
|
||||
- ``delegation.model``
|
||||
- ``tts.xai.model``
|
||||
- ``plugins.image_gen.xai.model``
|
||||
"""
|
||||
issues: List[RetirementIssue] = []
|
||||
|
||||
def _check(path: str, model: Any) -> None:
|
||||
if not _looks_like_xai(model):
|
||||
return
|
||||
norm = _normalize(model)
|
||||
entry = _RETIRED_MODELS.get(norm)
|
||||
if entry is None:
|
||||
return
|
||||
issues.append(RetirementIssue(
|
||||
config_path=path,
|
||||
current_model=model,
|
||||
replacement=entry["replacement"],
|
||||
reasoning_effort=entry.get("reasoning_effort"),
|
||||
note=entry.get("note"),
|
||||
))
|
||||
|
||||
if not isinstance(config, dict):
|
||||
return issues
|
||||
|
||||
principal = config.get("principal")
|
||||
if isinstance(principal, dict):
|
||||
_check("principal.model", principal.get("model"))
|
||||
|
||||
aux = config.get("auxiliary")
|
||||
if isinstance(aux, dict):
|
||||
for slot_name, slot_cfg in aux.items():
|
||||
if isinstance(slot_cfg, dict):
|
||||
_check(f"auxiliary.{slot_name}.model", slot_cfg.get("model"))
|
||||
|
||||
delegation = config.get("delegation")
|
||||
if isinstance(delegation, dict):
|
||||
_check("delegation.model", delegation.get("model"))
|
||||
|
||||
tts = config.get("tts")
|
||||
if isinstance(tts, dict):
|
||||
tts_xai = tts.get("xai")
|
||||
if isinstance(tts_xai, dict):
|
||||
_check("tts.xai.model", tts_xai.get("model"))
|
||||
|
||||
plugins = config.get("plugins")
|
||||
if isinstance(plugins, dict):
|
||||
image_gen = plugins.get("image_gen")
|
||||
if isinstance(image_gen, dict):
|
||||
ig_xai = image_gen.get("xai")
|
||||
if isinstance(ig_xai, dict):
|
||||
_check("plugins.image_gen.xai.model", ig_xai.get("model"))
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def format_issue(issue: RetirementIssue) -> str:
|
||||
"""One-line human-readable rendering of a retirement issue."""
|
||||
parts = [
|
||||
f"{issue.config_path}: {issue.current_model!r} → use {issue.replacement!r}"
|
||||
]
|
||||
if issue.reasoning_effort:
|
||||
parts.append(f'(set reasoning_effort: "{issue.reasoning_effort}")')
|
||||
if issue.note:
|
||||
parts.append(f"[note: {issue.note}]")
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Apply migration to config.yaml (round-trip preserves comments/order/types)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import datetime as _dt
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ApplyResult:
|
||||
"""Outcome of an apply_migration call."""
|
||||
|
||||
file_path: Path
|
||||
backup_path: Optional[Path]
|
||||
issues_resolved: List[RetirementIssue]
|
||||
config_changed: bool
|
||||
|
||||
|
||||
def _walk_to_parent(yaml_doc: Any, dotted_path: str) -> "tuple[Any, str]":
|
||||
"""Resolve a dotted slot path to (parent_mapping, leaf_key).
|
||||
|
||||
Example: "auxiliary.vision.model" -> (yaml_doc["auxiliary"]["vision"], "model").
|
||||
Raises KeyError if any intermediate node is missing or not a mapping.
|
||||
"""
|
||||
parts = dotted_path.split(".")
|
||||
if len(parts) < 2:
|
||||
raise ValueError(f"Path must have at least one parent: {dotted_path!r}")
|
||||
node = yaml_doc
|
||||
for segment in parts[:-1]:
|
||||
if not isinstance(node, dict) or segment not in node:
|
||||
raise KeyError(f"Path segment {segment!r} missing in {dotted_path!r}")
|
||||
node = node[segment]
|
||||
return node, parts[-1]
|
||||
|
||||
|
||||
def apply_migration(
|
||||
config_path: Path,
|
||||
issues: List[RetirementIssue],
|
||||
backup: bool = True,
|
||||
) -> ApplyResult:
|
||||
"""Rewrite ``config_path`` in-place so each issue is resolved.
|
||||
|
||||
For every issue, the model name is replaced by ``issue.replacement``. If the
|
||||
issue has ``reasoning_effort`` set (i.e. the migration is from a
|
||||
``*-non-reasoning`` variant), a sibling ``reasoning_effort`` key is added
|
||||
or updated alongside the model.
|
||||
|
||||
Uses ``ruamel.yaml`` round-trip mode so comments, key order, indentation,
|
||||
and type literals (booleans, ints) are preserved.
|
||||
|
||||
A backup copy is written to
|
||||
``<config_path>.bak-pre-migrate-xai-YYYYMMDD-HHMMSS`` before rewriting,
|
||||
unless ``backup=False``.
|
||||
"""
|
||||
from ruamel.yaml import YAML # local import — avoid hard dep at module load
|
||||
|
||||
config_path = Path(config_path)
|
||||
if not config_path.exists():
|
||||
raise FileNotFoundError(config_path)
|
||||
|
||||
if not issues:
|
||||
return ApplyResult(
|
||||
file_path=config_path,
|
||||
backup_path=None,
|
||||
issues_resolved=[],
|
||||
config_changed=False,
|
||||
)
|
||||
|
||||
yaml = YAML(typ="rt")
|
||||
yaml.preserve_quotes = True
|
||||
with config_path.open("r", encoding="utf-8") as fh:
|
||||
doc = yaml.load(fh)
|
||||
|
||||
if doc is None:
|
||||
return ApplyResult(
|
||||
file_path=config_path,
|
||||
backup_path=None,
|
||||
issues_resolved=[],
|
||||
config_changed=False,
|
||||
)
|
||||
|
||||
resolved: List[RetirementIssue] = []
|
||||
for issue in issues:
|
||||
try:
|
||||
parent, leaf = _walk_to_parent(doc, issue.config_path)
|
||||
except KeyError:
|
||||
# Slot vanished between scan and apply — skip silently
|
||||
continue
|
||||
parent[leaf] = issue.replacement
|
||||
if issue.reasoning_effort:
|
||||
parent["reasoning_effort"] = issue.reasoning_effort
|
||||
resolved.append(issue)
|
||||
|
||||
if not resolved:
|
||||
return ApplyResult(
|
||||
file_path=config_path,
|
||||
backup_path=None,
|
||||
issues_resolved=[],
|
||||
config_changed=False,
|
||||
)
|
||||
|
||||
backup_path: Optional[Path] = None
|
||||
if backup:
|
||||
ts = _dt.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
backup_path = config_path.with_name(
|
||||
f"{config_path.name}.bak-pre-migrate-xai-{ts}"
|
||||
)
|
||||
shutil.copy2(config_path, backup_path)
|
||||
|
||||
with config_path.open("w", encoding="utf-8") as fh:
|
||||
yaml.dump(doc, fh)
|
||||
|
||||
return ApplyResult(
|
||||
file_path=config_path,
|
||||
backup_path=backup_path,
|
||||
issues_resolved=resolved,
|
||||
config_changed=True,
|
||||
)
|
||||
@@ -235,6 +235,26 @@ def display_hermes_home() -> str:
|
||||
return str(home)
|
||||
|
||||
|
||||
def secure_parent_dir(path: Path) -> None:
|
||||
"""Chmod ``0o700`` on the parent directory of *path*, but only if safe.
|
||||
|
||||
Refuses to chmod ``/`` or any top-level directory (resolved parent with
|
||||
fewer than 3 parts, i.e. ``/`` or any direct child like ``/usr``) to
|
||||
prevent catastrophic host bricking when ``HERMES_HOME`` or other path
|
||||
env vars resolve to an unexpected location.
|
||||
|
||||
See https://github.com/NousResearch/hermes-agent/issues/25821.
|
||||
"""
|
||||
parent = path.parent.resolve()
|
||||
# Refuse root and its direct children (/usr, /home, /var, /tmp, …).
|
||||
if parent == Path("/") or len(parent.parts) < 3:
|
||||
return
|
||||
try:
|
||||
os.chmod(parent, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def get_subprocess_home() -> str | None:
|
||||
"""Return a per-profile HOME directory for subprocesses, or None.
|
||||
|
||||
|
||||
+42
-7
@@ -33,7 +33,7 @@ T = TypeVar("T")
|
||||
|
||||
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
|
||||
|
||||
SCHEMA_VERSION = 11
|
||||
SCHEMA_VERSION = 12
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WAL-compatibility fallback
|
||||
@@ -236,7 +236,8 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
reasoning_content TEXT,
|
||||
reasoning_details TEXT,
|
||||
codex_reasoning_items TEXT,
|
||||
codex_message_items TEXT
|
||||
codex_message_items TEXT,
|
||||
platform_message_id TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS state_meta (
|
||||
@@ -571,6 +572,19 @@ class SessionDB:
|
||||
# column gets created here.
|
||||
self._reconcile_columns(cursor)
|
||||
|
||||
# Indexes that reference reconciler-added columns must be created
|
||||
# AFTER _reconcile_columns runs — declaring them in SCHEMA_SQL
|
||||
# makes the initial executescript fail on legacy DBs (the index's
|
||||
# WHERE clause references a column that doesn't exist yet).
|
||||
try:
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_messages_platform_msg_id "
|
||||
"ON messages(session_id, platform_message_id) "
|
||||
"WHERE platform_message_id IS NOT NULL"
|
||||
)
|
||||
except sqlite3.OperationalError as exc:
|
||||
logger.debug("idx_messages_platform_msg_id create skipped: %s", exc)
|
||||
|
||||
# ── Schema version bookkeeping ─────────────────────────────────
|
||||
# Bump to current so future data migrations (if any) can gate on
|
||||
# version. No version-gated column additions remain.
|
||||
@@ -1445,12 +1459,19 @@ class SessionDB:
|
||||
reasoning_details: Any = None,
|
||||
codex_reasoning_items: Any = None,
|
||||
codex_message_items: Any = None,
|
||||
platform_message_id: str = None,
|
||||
) -> int:
|
||||
"""
|
||||
Append a message to a session. Returns the message row ID.
|
||||
|
||||
Also increments the session's message_count (and tool_call_count
|
||||
if role is 'tool' or tool_calls is present).
|
||||
|
||||
``platform_message_id`` is the external messaging platform's own
|
||||
message ID (e.g. Telegram update_id, Yuanbao msg_id). It is
|
||||
independent of the SQLite autoincrement primary key and is used by
|
||||
platform-specific flows like yuanbao's recall guard to redact a
|
||||
message by its platform-side identifier.
|
||||
"""
|
||||
# Serialize structured fields to JSON before entering the write txn
|
||||
reasoning_details_json = (
|
||||
@@ -1480,8 +1501,8 @@ class SessionDB:
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
codex_message_items, platform_message_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
@@ -1497,6 +1518,7 @@ class SessionDB:
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
platform_message_id,
|
||||
),
|
||||
)
|
||||
msg_id = cursor.lastrowid
|
||||
@@ -1558,13 +1580,18 @@ class SessionDB:
|
||||
json.dumps(codex_message_items) if codex_message_items else None
|
||||
)
|
||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||
# Accept either `platform_message_id` (new explicit name) or
|
||||
# `message_id` (yuanbao's existing convention on message dicts).
|
||||
platform_msg_id = (
|
||||
msg.get("platform_message_id") or msg.get("message_id")
|
||||
)
|
||||
|
||||
conn.execute(
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
codex_message_items, platform_message_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
@@ -1580,6 +1607,7 @@ class SessionDB:
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
platform_msg_id,
|
||||
),
|
||||
)
|
||||
total_messages += 1
|
||||
@@ -1897,7 +1925,7 @@ class SessionDB:
|
||||
rows = self._conn.execute(
|
||||
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
|
||||
"finish_reason, reasoning, reasoning_content, reasoning_details, "
|
||||
"codex_reasoning_items, codex_message_items "
|
||||
"codex_reasoning_items, codex_message_items, platform_message_id "
|
||||
f"FROM messages WHERE session_id IN ({placeholders}) ORDER BY id",
|
||||
tuple(session_ids),
|
||||
).fetchall()
|
||||
@@ -1918,6 +1946,13 @@ class SessionDB:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize tool_calls in conversation replay, falling back to []")
|
||||
msg["tool_calls"] = []
|
||||
# Surface the platform-side message id (e.g. yuanbao msg_id,
|
||||
# telegram update_id) so platform-specific flows like recall
|
||||
# can match by external identifier instead of having to fall
|
||||
# back to content-match heuristics. Exposed as ``message_id``
|
||||
# for backward compatibility with the JSONL transcript shape.
|
||||
if row["platform_message_id"]:
|
||||
msg["message_id"] = row["platform_message_id"]
|
||||
# Restore reasoning fields on assistant messages so providers
|
||||
# that replay reasoning (OpenRouter, OpenAI, Nous) receive
|
||||
# coherent multi-turn reasoning context.
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
@@ -0,0 +1,121 @@
|
||||
Create a professional infographic following these specifications:
|
||||
|
||||
## Image Specifications
|
||||
|
||||
- **Type**: Infographic
|
||||
- **Layout**: bento-grid
|
||||
- **Style**: retro-pop-grid
|
||||
- **Aspect Ratio**: 1:1 (square)
|
||||
- **Language**: en
|
||||
|
||||
## Core Principles
|
||||
|
||||
- Follow the layout structure precisely for information architecture
|
||||
- Apply style aesthetics consistently throughout
|
||||
- Keep information concise, highlight keywords and core concepts
|
||||
- Use ample whitespace for visual clarity
|
||||
- Maintain clear visual hierarchy
|
||||
|
||||
## Text Requirements
|
||||
|
||||
- All text must match the specified style treatment
|
||||
- Main titles should be prominent and readable
|
||||
- Key concepts should be visually emphasized
|
||||
- Labels should be clear and appropriately sized
|
||||
- Use English for all text content
|
||||
|
||||
## Layout Guidelines (bento-grid)
|
||||
|
||||
- Grid of rectangular cells with varied sizes (1x1, 2x1, 1x2, 2x2)
|
||||
- Hero cell ("ONE TOKEN, EVERY KEY") takes the largest position (top-center or upper-left, 2x2)
|
||||
- Supporting cells around the hero, mixed cell sizes for rhythm
|
||||
- Each cell self-contained with its own title + icon + brief content
|
||||
- Title strip at the top: "BITWARDEN SECRETS MANAGER — HERMES-AGENT PR #30035"
|
||||
- Footer strip at the bottom with commit SHA + repo
|
||||
|
||||
## Style Guidelines (retro-pop-grid)
|
||||
|
||||
- 1970s retro pop art with strict Swiss international grid
|
||||
- Background: warm vintage cream/beige (#F5F0E6)
|
||||
- Accents: salmon pink, sky blue, mustard yellow, mint green — all muted retro tones
|
||||
- Pure solid black (#000000) and solid white (#FFFFFF) for extreme-contrast cells
|
||||
- Uniform thick black outlines on ALL illustrations, text boxes, grid dividers
|
||||
- Pure 2D flat vector aesthetic with subtle screen-print texture
|
||||
- One cell inverted to black-background-with-white-text for the "NEVER BLOCKS STARTUP" warning section
|
||||
- Geometric fill patterns in empty cells: checkerboards, diagonal lines, dot grids
|
||||
- Flat abstract symbols: shields (security), wrenches (install), arrows (rotation), keyholes (auth), checkmarks (tests)
|
||||
- Vintage comic-style smiley face for "26/26 PASSING" cell
|
||||
- Bold brutalist or thick retro display fonts for headers; clean sans-serif body
|
||||
- Decorative stylistic labels acceptable: "WARNING", "NEW DEFAULT", "PINNED", "VERIFIED", "ROTATE"
|
||||
|
||||
## Avoid
|
||||
|
||||
- 3D rendering, gradients, soft shadows, sketch-like lines
|
||||
- Free-floating elements — everything anchored in grid cells
|
||||
- Pure white background — must use warm cream/beige
|
||||
|
||||
---
|
||||
|
||||
Generate the infographic based on the content below:
|
||||
|
||||
### Title (top strip)
|
||||
BITWARDEN SECRETS MANAGER → HERMES-AGENT
|
||||
PR #30035
|
||||
|
||||
### HERO CELL (largest, top-center, salmon pink background with thick black border)
|
||||
ONE TOKEN, EVERY KEY
|
||||
Rotate once in the Bitwarden web app.
|
||||
Every Hermes process picks it up on next start.
|
||||
NEW DEFAULT: override_existing = true
|
||||
|
||||
### Cell — LAZY INSTALL (sky blue background)
|
||||
~/.hermes/bin/bws
|
||||
bws v2.0.0 PINNED
|
||||
SHA-256 VERIFIED
|
||||
No apt · no brew · no sudo
|
||||
Icon: wrench + downward arrow
|
||||
|
||||
### Cell — CLI SURFACE (mustard yellow background, checkerboard accents)
|
||||
$ hermes secrets bitwarden
|
||||
setup wizard
|
||||
status diagnose
|
||||
sync fetch
|
||||
install binary
|
||||
disable off
|
||||
Icon: terminal prompt symbol
|
||||
|
||||
### Cell — SOURCE OF TRUTH (mint green background)
|
||||
BITWARDEN WINS
|
||||
Overwrites stale .env on every start
|
||||
Bootstrap token never overwritten (exception)
|
||||
Icon: keyhole + arrow
|
||||
|
||||
### Cell — INVERTED BLACK CELL with WHITE TEXT — NEVER BLOCKS STARTUP (extreme contrast)
|
||||
WARNING-FREE STARTUP
|
||||
Missing binary → warn + continue
|
||||
Bad token → warn + continue
|
||||
Network down → warn + continue
|
||||
Checksum mismatch → refuse + warn
|
||||
30s timeout ceiling
|
||||
Icon: white triangle warning sign
|
||||
|
||||
### Cell — TESTS (cream with thick black outline, vintage comic smiley face)
|
||||
26 / 26
|
||||
HERMETIC
|
||||
subprocess + urllib mocked
|
||||
linux · macos · windows
|
||||
x86_64 · arm64
|
||||
Icon: comic-style smiley face with checkmark
|
||||
|
||||
### Cell — CONFIG YAML (white background with black grid)
|
||||
secrets:
|
||||
bitwarden:
|
||||
enabled: true
|
||||
project_id: ...
|
||||
override_existing: true
|
||||
cache_ttl_seconds: 300
|
||||
auto_install: true
|
||||
|
||||
### Footer strip (bottom, black-on-cream)
|
||||
PR #30035 · commit 7f9b05668 · NousResearch/hermes-agent
|
||||
10 files · +1743 / -1 · agent/secret_sources/ · hermes_cli/secrets_cli.py
|
||||
@@ -0,0 +1,57 @@
|
||||
# Hermes-Agent PR #30035 — Bitwarden Secrets Manager Integration
|
||||
|
||||
## Hero
|
||||
**ONE TOKEN, EVERY KEY**
|
||||
Rotate once. Every Hermes process picks it up on next start.
|
||||
`secrets.bitwarden.override_existing: true` (default)
|
||||
|
||||
## Cells
|
||||
|
||||
### Lazy Install
|
||||
- `bws v2.0.0` pinned
|
||||
- Downloaded into `~/.hermes/bin/bws`
|
||||
- SHA-256 verified vs GitHub Releases checksum file
|
||||
- No apt, no brew, no sudo
|
||||
- Cross-platform: linux gnu+musl, macos universal, windows x86_64+arm64
|
||||
|
||||
### CLI Surface
|
||||
- `hermes secrets bitwarden setup` wizard
|
||||
- `hermes secrets bitwarden status` diagnose
|
||||
- `hermes secrets bitwarden sync` dry-run / --apply
|
||||
- `hermes secrets bitwarden install` binary only
|
||||
- `hermes secrets bitwarden disable` off switch
|
||||
|
||||
### Source of Truth
|
||||
- Bitwarden WINS on every Hermes start
|
||||
- BSM values overwrite stale `.env` lines
|
||||
- Rotate a key once → all your machines reload it
|
||||
- Bootstrap token `BWS_ACCESS_TOKEN` is the lone exception (never overwritten)
|
||||
|
||||
### Never Blocks Startup
|
||||
- Missing binary → warn + continue
|
||||
- Bad token → warn + continue
|
||||
- Checksum mismatch → refuse install + warn
|
||||
- No network → warn + continue
|
||||
- Timeout → 30s ceiling, warn + continue
|
||||
|
||||
### Tests
|
||||
- 26/26 passing, hermetic
|
||||
- subprocess + urllib mocked
|
||||
- Platform matrix tested (linux, macos, windows × x86_64, arm64)
|
||||
- Cache hit/miss, auth fail, non-JSON, timeout, override behavior
|
||||
|
||||
### Config
|
||||
```yaml
|
||||
secrets:
|
||||
bitwarden:
|
||||
enabled: true
|
||||
project_id: <uuid>
|
||||
override_existing: true # NEW DEFAULT
|
||||
cache_ttl_seconds: 300
|
||||
auto_install: true
|
||||
```
|
||||
|
||||
## Footer
|
||||
PR #30035 · commit 7f9b05668 · NousResearch/hermes-agent
|
||||
|
||||
10 files changed · +1743 / -1 · agent/secret_sources/ · hermes_cli/secrets_cli.py · tests · docs
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
@@ -16,6 +16,11 @@
|
||||
openssh,
|
||||
ffmpeg,
|
||||
tirith,
|
||||
|
||||
# linux-only deps
|
||||
wl-clipboard,
|
||||
xclip,
|
||||
|
||||
# Flake inputs — passed explicitly by packages.nix and overlays.nix
|
||||
uv2nix,
|
||||
pyproject-nix,
|
||||
@@ -68,6 +73,10 @@ let
|
||||
openssh
|
||||
ffmpeg
|
||||
tirith
|
||||
]
|
||||
++ lib.optionals stdenv.isLinux [
|
||||
wl-clipboard
|
||||
xclip
|
||||
];
|
||||
|
||||
runtimePath = lib.makeBinPath runtimeDeps;
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../ui-tui;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-dNL/J4tyQQ7Ji3xfIE5b5Jdi6rQyCFjqYpzLYftJVdc=";
|
||||
hash = "sha256-F6/MzZOWc0zhW9mIfnaY+PrllPvJcsA/OdFdEM+NpLY=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../web;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-FL2E8Vv8gyeClEa5b/pHn/ekWoHWTd4YwzV6zhLEos4=";
|
||||
hash = "sha256-xSsyluzU2lNhwGqB6XMCGMv3QFHZizE6hgUyc1jvyOw=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
|
||||
|
||||
+33
-25
@@ -24,6 +24,23 @@
|
||||
const { useState, useEffect, useCallback, useMemo, useRef } = SDK.hooks;
|
||||
const { cn, timeAgo } = SDK.utils;
|
||||
|
||||
// Newer host dashboards expose a DS-styled Checkbox on the plugin SDK.
|
||||
// Fall back to a native <input type="checkbox"> shim so older hosts that
|
||||
// predate the design-system rollout still render. The shim normalises
|
||||
// Radix's onCheckedChange(checked) signature to native onChange(event).
|
||||
const Checkbox = SDK.components.Checkbox || function (props) {
|
||||
const { checked, onCheckedChange, className, onClick, ...rest } = props;
|
||||
return h("input", Object.assign({
|
||||
type: "checkbox",
|
||||
checked: !!checked,
|
||||
className: className,
|
||||
onClick: onClick,
|
||||
onChange: function (e) {
|
||||
if (onCheckedChange) onCheckedChange(e.target.checked);
|
||||
},
|
||||
}, rest));
|
||||
};
|
||||
|
||||
// useI18n is a hook each component calls locally. Older host dashboards
|
||||
// may not expose it yet; fall back to a shim so the bundle still renders
|
||||
// English against an older host SDK. English fallback strings live
|
||||
@@ -1648,11 +1665,10 @@
|
||||
h(Label, { className: "text-xs text-muted-foreground" },
|
||||
"Orchestration mode"),
|
||||
h("label", { className: "flex items-center gap-2 text-xs h-8" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: !!settings.auto_decompose,
|
||||
onChange: function (e) {
|
||||
saveSettings({ auto_decompose: !!e.target.checked });
|
||||
onCheckedChange: function (checked) {
|
||||
saveSettings({ auto_decompose: checked === true });
|
||||
},
|
||||
}),
|
||||
"Auto-decompose triage tasks",
|
||||
@@ -1908,10 +1924,9 @@
|
||||
}),
|
||||
),
|
||||
h("label", { className: "flex items-center gap-2 text-xs" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: switchTo,
|
||||
onChange: function (e) { setSwitchTo(e.target.checked); },
|
||||
onCheckedChange: function (checked) { setSwitchTo(checked === true); },
|
||||
}),
|
||||
tx(t, "switchAfterCreate", "Switch to this board after creating it"),
|
||||
),
|
||||
@@ -1981,19 +1996,17 @@
|
||||
),
|
||||
h("label", { className: "flex items-center gap-2 text-xs",
|
||||
title: "Include archived tasks in the board view. Archived tasks are hidden by default." },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: props.includeArchived,
|
||||
onChange: function (e) { props.setIncludeArchived(e.target.checked); },
|
||||
onCheckedChange: function (checked) { props.setIncludeArchived(checked === true); },
|
||||
}),
|
||||
tx(t, "showArchived", "Show archived"),
|
||||
),
|
||||
h("label", { className: "flex items-center gap-2 text-xs",
|
||||
title: "Group the Running column by assigned profile" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: props.laneByProfile,
|
||||
onChange: function (e) { props.setLaneByProfile(e.target.checked); },
|
||||
onCheckedChange: function (checked) { props.setLaneByProfile(checked === true); },
|
||||
}),
|
||||
tx(t, "lanesByProfile", "Lanes by profile"),
|
||||
),
|
||||
@@ -2122,10 +2135,9 @@
|
||||
}, tx(t, "apply", "Apply")),
|
||||
),
|
||||
h("label", { className: "hermes-kanban-bulk-reclaim-first", title: "Reclaim any active claims before reassigning" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
checked: reclaimFirst,
|
||||
onChange: function (e) { setReclaimFirst(e.target.checked); },
|
||||
onCheckedChange: function (checked) { setReclaimFirst(checked === true); },
|
||||
}),
|
||||
"Reclaim first",
|
||||
),
|
||||
@@ -2313,14 +2325,12 @@
|
||||
},
|
||||
h("div", { className: "hermes-kanban-column-header",
|
||||
title: colHelp || "" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
className: "hermes-kanban-col-check",
|
||||
title: "Select all tasks in this column",
|
||||
"aria-label": `Select all tasks in ${colLabel || props.column.name}`,
|
||||
checked: props.column.tasks.length > 0 && props.column.tasks.every(function (t) { return props.selectedIds.has(t.id); }),
|
||||
onChange: function (e) {
|
||||
e.stopPropagation();
|
||||
onCheckedChange: function () {
|
||||
if (props.selectAllInColumn) props.selectAllInColumn(props.column.name);
|
||||
},
|
||||
onClick: function (e) { e.stopPropagation(); },
|
||||
@@ -2461,8 +2471,7 @@
|
||||
if (props.toggleSelected) props.toggleSelected(t.id, false);
|
||||
}
|
||||
};
|
||||
const handleCheckbox = function (e) {
|
||||
e.stopPropagation();
|
||||
const handleCheckedChange = function () {
|
||||
props.toggleSelected(t.id, true);
|
||||
};
|
||||
|
||||
@@ -2495,11 +2504,10 @@
|
||||
title: tx(i18n, "selectForBulk", "Select for bulk actions"),
|
||||
onClick: function (e) { e.stopPropagation(); },
|
||||
},
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
h(Checkbox, {
|
||||
className: "hermes-kanban-card-check",
|
||||
checked: props.selected,
|
||||
onChange: handleCheckbox,
|
||||
onCheckedChange: handleCheckedChange,
|
||||
onClick: function (e) { e.stopPropagation(); },
|
||||
"aria-label": `Select task ${t.id}`,
|
||||
}),
|
||||
|
||||
+12
-12
@@ -41,7 +41,11 @@ dependencies = [
|
||||
"ruamel.yaml==0.18.17",
|
||||
"requests==2.33.0", # CVE-2026-25645
|
||||
"jinja2==3.1.6",
|
||||
"pydantic==2.12.5",
|
||||
# Bumped from 2.12.5 to 2.13.4 to pull in pydantic-core 2.46.4.
|
||||
# pydantic-core 2.41.5 (pulled by 2.12.5) segfaults when the OpenAI SDK's
|
||||
# Responses API resource is exercised from a non-main thread, which is the
|
||||
# codex_responses dispatch in agent/chat_completion_helpers.py:_call.
|
||||
"pydantic==2.13.4",
|
||||
# Interactive CLI (prompt_toolkit is used directly by cli.py)
|
||||
"prompt_toolkit==3.0.52",
|
||||
# Cron scheduler (built-in feature — scheduled cron/interval jobs use croniter).
|
||||
@@ -80,7 +84,7 @@ modal = ["modal==1.3.4"]
|
||||
daytona = ["daytona==0.155.0"]
|
||||
vercel = ["vercel==0.5.7"]
|
||||
hindsight = ["hindsight-client==0.6.1"]
|
||||
dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
|
||||
dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
|
||||
messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
|
||||
cron = [] # croniter is now a core dependency; this extra kept for back-compat
|
||||
slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
|
||||
@@ -228,16 +232,12 @@ markers = [
|
||||
"integration: marks tests requiring external services (API keys, Modal, etc.)",
|
||||
"real_concurrent_gate: opt out of the autouse stub that disables _detect_concurrent_hermes_instances",
|
||||
]
|
||||
# pytest-timeout: per-test 60s hard cap with thread method.
|
||||
# Discovered May 2026: the suite reliably hangs at ~96% on full runs even
|
||||
# though every individual test completes in <30s. Root cause is leaked
|
||||
# threads / atexit handlers accumulating across thousands of tests until
|
||||
# something deadlocks at session teardown. Adding pytest-timeout (with
|
||||
# thread method, which forces an interrupt into the test thread) breaks
|
||||
# the deadlock — the suite then completes cleanly. The 60s cap is large
|
||||
# enough that no legitimate test trips it; if a test exceeds it that's a
|
||||
# real bug worth surfacing as a Timeout failure.
|
||||
addopts = "-m 'not integration' -n auto --timeout=30 --timeout-method=signal"
|
||||
# pytest-timeout: per-test 30s hard cap with signal method.
|
||||
# This is the fallback inside each per-file pytest subprocess (see
|
||||
# scripts/run_tests_parallel.py). Per-file isolation gives every test
|
||||
# file a fresh Python interpreter; pytest-timeout catches Python-level
|
||||
# hangs within a file.
|
||||
addopts = "-m 'not integration' --timeout=30 --timeout-method=signal"
|
||||
|
||||
[tool.ty.environment]
|
||||
python-version = "3.13"
|
||||
|
||||
+39
-23
@@ -168,7 +168,7 @@ from agent.tool_result_classification import (
|
||||
file_mutation_result_landed,
|
||||
)
|
||||
from agent.trajectory import (
|
||||
convert_scratchpad_to_think, has_incomplete_scratchpad,
|
||||
convert_scratchpad_to_think,
|
||||
save_trajectory as _save_trajectory_to_file,
|
||||
)
|
||||
from agent.message_sanitization import (
|
||||
@@ -1517,23 +1517,35 @@ class AIAgent:
|
||||
return content.strip()
|
||||
|
||||
def _save_session_log(self, messages: List[Dict[str, Any]] = None):
|
||||
"""
|
||||
Save the full raw session to a JSON file.
|
||||
"""Optional per-session JSON snapshot writer.
|
||||
|
||||
Stores every message exactly as the agent sees it: user messages,
|
||||
assistant messages (with reasoning, finish_reason, tool_calls),
|
||||
tool responses (with tool_call_id, tool_name), and injected system
|
||||
messages (compression summaries, todo snapshots, etc.).
|
||||
Gated by ``sessions.write_json_snapshots`` (default False). state.db
|
||||
is the canonical message store; this writer exists only for users
|
||||
whose external tooling consumes ``~/.hermes/sessions/session_{sid}.json``
|
||||
directly. When the flag is off this is a fast no-op.
|
||||
|
||||
REASONING_SCRATCHPAD tags are converted to <think> blocks for consistency.
|
||||
Overwritten after each turn so it always reflects the latest state.
|
||||
When enabled, rewrites the snapshot after every persistence point with
|
||||
the full message list (assistant content normalized via
|
||||
``_clean_session_content`` to convert REASONING_SCRATCHPAD to think
|
||||
tags). The truncation guard ("don't overwrite a larger log with
|
||||
fewer messages") is preserved so resume + branch don't clobber a
|
||||
fuller existing snapshot.
|
||||
"""
|
||||
if not getattr(self, "_session_json_enabled", False):
|
||||
return
|
||||
messages = messages or self._session_messages
|
||||
if not messages:
|
||||
return
|
||||
|
||||
# Re-derive the target path each call so /branch and /compress
|
||||
# session-id changes land in the right file without any re-point
|
||||
# bookkeeping at the call sites.
|
||||
try:
|
||||
log_file = self.logs_dir / f"session_{self.session_id}.json"
|
||||
except Exception:
|
||||
return
|
||||
|
||||
try:
|
||||
# Clean assistant content for session logs
|
||||
cleaned = []
|
||||
for msg in messages:
|
||||
if msg.get("role") == "assistant" and msg.get("content"):
|
||||
@@ -1542,12 +1554,11 @@ class AIAgent:
|
||||
cleaned.append(msg)
|
||||
|
||||
# Guard: never overwrite a larger session log with fewer messages.
|
||||
# This protects against data loss when --resume loads a session whose
|
||||
# messages weren't fully written to SQLite — the resumed agent starts
|
||||
# with partial history and would otherwise clobber the full JSON log.
|
||||
if self.session_log_file.exists():
|
||||
# Protects against data loss when a resumed agent starts with
|
||||
# partial history and would otherwise clobber the full JSON log.
|
||||
if log_file.exists():
|
||||
try:
|
||||
existing = json.loads(self.session_log_file.read_text(encoding="utf-8"))
|
||||
existing = json.loads(log_file.read_text(encoding="utf-8"))
|
||||
existing_count = existing.get("message_count", len(existing.get("messages", [])))
|
||||
if existing_count > len(cleaned):
|
||||
logging.debug(
|
||||
@@ -1572,7 +1583,7 @@ class AIAgent:
|
||||
}
|
||||
|
||||
atomic_json_write(
|
||||
self.session_log_file,
|
||||
log_file,
|
||||
entry,
|
||||
indent=2,
|
||||
default=str,
|
||||
@@ -1582,6 +1593,7 @@ class AIAgent:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to save session log: {e}")
|
||||
|
||||
|
||||
def interrupt(self, message: str = None) -> None:
|
||||
"""
|
||||
Request the agent to interrupt its current tool-calling loop.
|
||||
@@ -3188,17 +3200,21 @@ class AIAgent:
|
||||
Used to decide whether to strip image content parts from API-bound
|
||||
messages (for non-vision models) or let the provider adapter handle
|
||||
them natively (for vision-capable models).
|
||||
|
||||
Resolution order (see ``agent.image_routing._supports_vision_override``):
|
||||
1. ``model.supports_vision`` (top-level, single-model shortcut)
|
||||
2. ``providers.<provider>.models.<model>.supports_vision``
|
||||
3. models.dev capability lookup
|
||||
Custom/local models absent from models.dev would otherwise be
|
||||
misclassified as non-vision and have their images stripped.
|
||||
"""
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
from hermes_cli.config import load_config
|
||||
from agent.image_routing import _lookup_supports_vision
|
||||
cfg = load_config()
|
||||
provider = (getattr(self, "provider", "") or "").strip()
|
||||
model = (getattr(self, "model", "") or "").strip()
|
||||
if not provider or not model:
|
||||
return False
|
||||
caps = get_model_capabilities(provider, model)
|
||||
if caps is None:
|
||||
return False
|
||||
return bool(caps.supports_vision)
|
||||
return _lookup_supports_vision(provider, model, cfg) is True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
+9
-1
@@ -47,6 +47,9 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
||||
AUTHOR_MAP = {
|
||||
# teknium (multiple emails)
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"me@promplate.dev": "CNSeniorious000",
|
||||
"yichengqiao21@gmail.com": "YarrowQiao",
|
||||
"erhanyasarx@gmail.com": "erhnysr",
|
||||
"30366221+WorldWriter@users.noreply.github.com": "WorldWriter",
|
||||
"dafeng@DafengdeMacBook-Pro.local": "WorldWriter",
|
||||
"anadi.jaggia@gmail.com": "Jaggia",
|
||||
@@ -56,8 +59,10 @@ AUTHOR_MAP = {
|
||||
"mgongzai@gmail.com": "vKongv",
|
||||
"0x.badfriend@gmail.com": "discodirector",
|
||||
"altriatree@gmail.com": "TruaShamu",
|
||||
"nat@nthrow.io": "nthrow",
|
||||
"m@mobrienv.dev": "mikeyobrien",
|
||||
"saeed919@pm.me": "falasi",
|
||||
"omar@techdeveloper.site": "nycomar",
|
||||
"qiyin.zuo@pcitc.com": "qiyin-code",
|
||||
"mr.aashiz@gmail.com": "aashizpoudel",
|
||||
"70629228+shaun0927@users.noreply.github.com": "shaun0927",
|
||||
@@ -74,6 +79,7 @@ AUTHOR_MAP = {
|
||||
"108427749+buntingszn@users.noreply.github.com": "buntingszn",
|
||||
"yanglongwei06@gmail.com": "Alex-yang00",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"markuscontasul@gmail.com": "Glucksberg",
|
||||
"piyushvp1@gmail.com": "thelumiereguy",
|
||||
"dskwelmcy@163.com": "dskwe",
|
||||
"421774554@qq.com": "wuli666",
|
||||
@@ -372,6 +378,7 @@ AUTHOR_MAP = {
|
||||
"bloodcarter@gmail.com": "bloodcarter",
|
||||
"scott@scotttrinh.com": "scotttrinh",
|
||||
"quocanh261997@gmail.com": "quocanh261997",
|
||||
"savanne.kham@protonmail.com": "savanne-kham", # PR #28958 salvage (strip tool_name for strict providers)
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -680,7 +687,7 @@ AUTHOR_MAP = {
|
||||
"hmbown@gmail.com": "Hmbown",
|
||||
"iacobs@m0n5t3r.info": "m0n5t3r",
|
||||
"jiayuw794@gmail.com": "JiayuuWang",
|
||||
"jonny@nousresearch.com": "jquesnelle",
|
||||
"jonny@nousresearch.com": "yoniebans",
|
||||
"jake@nousresearch.com": "simpolism",
|
||||
"juan.ovalle@mistral.ai": "jjovalle99",
|
||||
"julien.talbot@ergonomia.re": "Julientalbot",
|
||||
@@ -713,6 +720,7 @@ AUTHOR_MAP = {
|
||||
"9219265+cresslank@users.noreply.github.com": "cresslank",
|
||||
"trevmanthony@gmail.com": "trevthefoolish",
|
||||
"ziliangpeng@users.noreply.github.com": "ziliangpeng",
|
||||
"ziliangdotme@gmail.com": "ziliangpeng",
|
||||
"centripetal-star@users.noreply.github.com": "centripetal-star",
|
||||
"LeonSGP43@users.noreply.github.com": "LeonSGP43",
|
||||
"154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
|
||||
|
||||
+40
-96
@@ -3,29 +3,36 @@
|
||||
# `pytest` directly to guarantee your local run matches CI behavior.
|
||||
#
|
||||
# What this script enforces:
|
||||
# * -n 4 xdist workers (CI has 4 cores; -n auto diverges locally)
|
||||
# * Per-file isolation via scripts/run_tests_parallel.py — each test
|
||||
# file runs in its own freshly-spawned `python -m pytest <file>`
|
||||
# subprocess. No xdist, no shared workers, no module-level leakage
|
||||
# between files.
|
||||
# * TZ=UTC, LANG=C.UTF-8, PYTHONHASHSEED=0 (deterministic)
|
||||
# * Credential env vars blanked (conftest.py also does this, but this
|
||||
# is belt-and-suspenders for anyone running `pytest` outside of
|
||||
# our conftest path — e.g. calling pytest on a single file)
|
||||
# * Proper venv activation
|
||||
# * Env vars blanked (conftest.py also does this, but this
|
||||
# is belt-and-suspenders for anyone running pytest outside our
|
||||
# conftest path — e.g. on a single file)
|
||||
# * Proper venv activation (probes .venv, venv, then ~/.hermes/...)
|
||||
#
|
||||
# Usage:
|
||||
# scripts/run_tests.sh # full suite
|
||||
# scripts/run_tests.sh tests/agent/ # one directory
|
||||
# scripts/run_tests.sh tests/agent/test_foo.py::TestClass::test_method
|
||||
# scripts/run_tests.sh --tb=long -v # pass-through pytest args
|
||||
# scripts/run_tests.sh # full suite
|
||||
# scripts/run_tests.sh -j 4 # cap parallelism
|
||||
# scripts/run_tests.sh tests/agent/ # discover only here
|
||||
# scripts/run_tests.sh tests/agent/ tests/acp/ # multiple roots
|
||||
# scripts/run_tests.sh tests/foo.py # single file
|
||||
# scripts/run_tests.sh tests/foo.py -- --tb=long # path + pytest args
|
||||
# scripts/run_tests.sh -- -v --tb=long # pytest args only
|
||||
#
|
||||
# Everything after a literal '--' is passed through to each per-file
|
||||
# pytest invocation. Positional path arguments before '--' override
|
||||
# the default discovery root (tests/).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Locate repo root ────────────────────────────────────────────────────────
|
||||
# Works whether this is the main checkout or a worktree.
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
# ── Activate venv ───────────────────────────────────────────────────────────
|
||||
# Prefer a .venv in the current tree, fall back to the main checkout's venv
|
||||
# (useful for worktrees where we don't always duplicate the venv).
|
||||
VENV=""
|
||||
for candidate in "$REPO_ROOT/.venv" "$REPO_ROOT/venv" "$HOME/.hermes/hermes-agent/venv"; do
|
||||
if [ -f "$candidate/bin/activate" ]; then
|
||||
@@ -41,94 +48,31 @@ fi
|
||||
|
||||
PYTHON="$VENV/bin/python"
|
||||
|
||||
# ── Ensure pytest-split is installed (required for shard-equivalent runs) ──
|
||||
if ! "$PYTHON" -c "import pytest_split" 2>/dev/null; then
|
||||
echo "→ installing pytest-split into $VENV"
|
||||
if command -v uv >/dev/null 2>&1; then
|
||||
uv pip install --python "$PYTHON" --quiet "pytest-split>=0.9,<1"
|
||||
elif "$PYTHON" -m pip --version >/dev/null 2>&1; then
|
||||
"$PYTHON" -m pip install --quiet "pytest-split>=0.9,<1"
|
||||
else
|
||||
echo "error: neither uv nor pip is available in $VENV — pytest-split is missing" >&2
|
||||
echo " fix: run uv pip install -e \".[dev]\" from $REPO_ROOT" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── Hermetic environment ────────────────────────────────────────────────────
|
||||
# Mirror what CI does in .github/workflows/tests.yml + what conftest.py does.
|
||||
# Unset every credential-shaped var currently in the environment.
|
||||
while IFS='=' read -r name _; do
|
||||
case "$name" in
|
||||
*_API_KEY|*_TOKEN|*_SECRET|*_PASSWORD|*_CREDENTIALS|*_ACCESS_KEY| \
|
||||
*_SECRET_ACCESS_KEY|*_PRIVATE_KEY|*_OAUTH_TOKEN|*_WEBHOOK_SECRET| \
|
||||
*_ENCRYPT_KEY|*_APP_SECRET|*_CLIENT_SECRET|*_CORP_SECRET|*_AES_KEY| \
|
||||
AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|FAL_KEY| \
|
||||
GH_TOKEN|GITHUB_TOKEN)
|
||||
unset "$name"
|
||||
;;
|
||||
esac
|
||||
done < <(env)
|
||||
|
||||
# Unset HERMES_* behavioral vars too.
|
||||
unset HERMES_YOLO_MODE HERMES_INTERACTIVE HERMES_QUIET HERMES_TOOL_PROGRESS \
|
||||
HERMES_TOOL_PROGRESS_MODE HERMES_MAX_ITERATIONS HERMES_SESSION_PLATFORM \
|
||||
HERMES_SESSION_CHAT_ID HERMES_SESSION_CHAT_NAME HERMES_SESSION_THREAD_ID \
|
||||
HERMES_SESSION_SOURCE HERMES_SESSION_KEY HERMES_GATEWAY_SESSION \
|
||||
HERMES_CRON_SESSION \
|
||||
HERMES_PLATFORM HERMES_INFERENCE_PROVIDER HERMES_MANAGED HERMES_DEV \
|
||||
HERMES_CONTAINER HERMES_EPHEMERAL_SYSTEM_PROMPT HERMES_TIMEZONE \
|
||||
HERMES_REDACT_SECRETS HERMES_BACKGROUND_NOTIFICATIONS HERMES_EXEC_ASK \
|
||||
HERMES_HOME_MODE 2>/dev/null || true
|
||||
|
||||
# Pin deterministic runtime.
|
||||
export TZ=UTC
|
||||
export LANG=C.UTF-8
|
||||
export LC_ALL=C.UTF-8
|
||||
export PYTHONHASHSEED=0
|
||||
|
||||
# ── Live-gateway test guard (developer machines) ────────────────────────────
|
||||
# If a system-wide hermes pytest_live_guard plugin is installed at
|
||||
# $HOME/.hermes/pytest_live_guard.py, force-load it here so every test run
|
||||
# from this script gets the protection regardless of which worktree is
|
||||
# checked out (in-tree tests/conftest.py guard may be missing on stale
|
||||
# branches). Harmless on CI / fresh machines that don't have the file.
|
||||
# ── Live-gateway plugin (computed before we drop env) ───────────────────────
|
||||
EXTRA_PYTHONPATH=""
|
||||
EXTRA_PYTEST_PLUGINS=""
|
||||
if [ -f "$HOME/.hermes/pytest_live_guard.py" ]; then
|
||||
case ":${PYTHONPATH:-}:" in
|
||||
*":$HOME/.hermes:"*) ;;
|
||||
*) export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}$HOME/.hermes" ;;
|
||||
esac
|
||||
if [[ ",${PYTEST_PLUGINS:-}," != *,pytest_live_guard,* ]]; then
|
||||
export PYTEST_PLUGINS="${PYTEST_PLUGINS:+$PYTEST_PLUGINS,}pytest_live_guard"
|
||||
fi
|
||||
EXTRA_PYTHONPATH="$HOME/.hermes"
|
||||
EXTRA_PYTEST_PLUGINS="pytest_live_guard"
|
||||
fi
|
||||
|
||||
# ── Worker count ────────────────────────────────────────────────────────────
|
||||
# CI uses `-n auto` on ubuntu-latest which gives 4 workers. A 20-core
|
||||
# workstation with `-n auto` gets 20 workers and exposes test-ordering
|
||||
# flakes that CI will never see. Pin to 4 so local matches CI.
|
||||
WORKERS="${HERMES_TEST_WORKERS:-4}"
|
||||
|
||||
# ── Run pytest ──────────────────────────────────────────────────────────────
|
||||
# ── Run in hermetic env ──────────────────────────────────────────────────────
|
||||
# env -i: start with empty environment, opt-in only what we need.
|
||||
# No credential var can leak — you'd have to explicitly add it here.
|
||||
echo "▶ running per-file parallel test suite via run_tests_parallel.py"
|
||||
echo " (TZ=UTC LANG=C.UTF-8 PYTHONHASHSEED=0; clean env)"
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
# If the first argument starts with `-` treat all args as pytest flags;
|
||||
# otherwise treat them as test paths.
|
||||
ARGS=("$@")
|
||||
|
||||
echo "▶ running pytest with $WORKERS workers, hermetic env, in $REPO_ROOT"
|
||||
echo " (TZ=UTC LANG=C.UTF-8 PYTHONHASHSEED=0; all credential env vars unset)"
|
||||
|
||||
# -o "addopts=" clears pyproject.toml's `-n auto` so our -n wins.
|
||||
# We re-add --timeout/--timeout-method here because pyproject.toml's
|
||||
# addopts is wiped above. The 60s cap is essential: see pyproject.toml
|
||||
# for why (suite deadlocks at session teardown without it).
|
||||
exec "$PYTHON" -m pytest \
|
||||
-o "addopts=" \
|
||||
-n "$WORKERS" \
|
||||
--timeout=30 \
|
||||
--timeout-method=signal \
|
||||
--ignore=tests/integration \
|
||||
--ignore=tests/e2e \
|
||||
-m "not integration" \
|
||||
"${ARGS[@]}"
|
||||
exec env -i \
|
||||
PATH="$PATH" \
|
||||
HOME="$HOME" \
|
||||
TZ=UTC \
|
||||
LANG=C.UTF-8 \
|
||||
LC_ALL=C.UTF-8 \
|
||||
PYTHONHASHSEED=0 \
|
||||
${EXTRA_PYTHONPATH:+PYTHONPATH="$EXTRA_PYTHONPATH"} \
|
||||
${EXTRA_PYTEST_PLUGINS:+PYTEST_PLUGINS="$EXTRA_PYTEST_PLUGINS"} \
|
||||
"$PYTHON" "$SCRIPT_DIR/run_tests_parallel.py" "$@"
|
||||
|
||||
Executable
+650
@@ -0,0 +1,650 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Per-file parallel test runner.
|
||||
|
||||
The minimum-viable replacement for pytest-xdist + a subprocess-isolation
|
||||
plugin. Discovers test files under ``tests/`` (excluding integration/e2e
|
||||
unless explicitly requested), then runs one ``python -m pytest <file>``
|
||||
subprocess per file, with bounded parallelism (default: ``os.cpu_count()``).
|
||||
|
||||
Why per-file rather than per-test?
|
||||
Per-test spawn overhead (~250ms × 17k tests = 70min CPU minimum)
|
||||
swamped the actual work. Per-file spawn (~250ms × ~850 files = ~3.5min)
|
||||
fits in the budget while still giving every file a fresh Python
|
||||
interpreter — the only isolation boundary that actually matters
|
||||
(cross-file module-level state leakage was the original flake source;
|
||||
intra-file state is the test author's responsibility).
|
||||
|
||||
Why drop xdist entirely?
|
||||
xdist's persistent workers accumulate state across files, which is
|
||||
exactly the leakage we wanted to fix. xdist also adds complexity
|
||||
(loadfile vs loadscope, --max-worker-restart, internal control plane)
|
||||
that we don't need when the unit of work is "run pytest on one file".
|
||||
A subprocess.Popen pool gated by a semaphore is ~60 lines and does
|
||||
the job.
|
||||
|
||||
Usage:
|
||||
python scripts/run_tests_parallel.py [pytest_args...]
|
||||
|
||||
Common pytest args pass through (e.g. ``-v``, ``-x``, ``--tb=long``,
|
||||
``-k 'pattern'``, ``--lf``).
|
||||
|
||||
Environment:
|
||||
HERMES_TEST_WORKERS Override worker count (default: os.cpu_count())
|
||||
HERMES_TEST_PATHS Override discovery roots (colon-sep, default: 'tests')
|
||||
|
||||
Exit code: 0 if every file's pytest exited 0; 1 otherwise.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, Future
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
|
||||
# Default test discovery roots.
|
||||
_DEFAULT_ROOTS = ["tests"]
|
||||
|
||||
# Directories to skip during discovery — the e2e + integration suites
|
||||
# require real services and are run separately. Match exactly the
|
||||
# ``--ignore=`` flags the previous CI command used.
|
||||
_SKIP_PARTS = {"integration", "e2e"}
|
||||
|
||||
# Per-file wall-clock cap. Generous default — pytest-timeout still
|
||||
# enforces per-test caps inside each subprocess; this is just an outer
|
||||
# safety net so a single hung file can't stall the whole suite. Override
|
||||
# via --file-timeout or HERMES_TEST_FILE_TIMEOUT.
|
||||
_DEFAULT_FILE_TIMEOUT_SECONDS = 600.0 # 10 minutes
|
||||
|
||||
|
||||
def _count_tests(
|
||||
files: List[Path], repo_root: Path, pytest_passthrough: List[str]
|
||||
) -> dict[Path, int]:
|
||||
"""Run ``pytest --co -q`` once to count individual tests per file.
|
||||
|
||||
Returns a mapping ``{file_path: test_count}``. Files with zero
|
||||
collected tests are omitted from the dict (not an error — e.g. the
|
||||
file only defines fixtures / conftest helpers).
|
||||
|
||||
This is a single subprocess call (~2-5s for ~1k files) that gives
|
||||
us the total test count for the discovery announcement and
|
||||
per-file counts for the progress lines.
|
||||
|
||||
``--ignore`` flags for directories in ``_SKIP_PARTS`` are added
|
||||
automatically so that pytest's own collection machinery (conftest
|
||||
walking, directory traversal) doesn't pull in tests we intend to
|
||||
skip — matching what the per-file runs will actually execute.
|
||||
"""
|
||||
# Build --ignore flags for skipped dirs so the --co collection
|
||||
# mirrors what we'll actually run (not what pytest might find via
|
||||
# conftest walking or directory traversal).
|
||||
ignore_args: List[str] = []
|
||||
for root in [repo_root / p for p in _DEFAULT_ROOTS]:
|
||||
for part in _SKIP_PARTS:
|
||||
d = root / part
|
||||
if d.is_dir():
|
||||
ignore_args.extend(["--ignore", str(d)])
|
||||
|
||||
cmd = [
|
||||
sys.executable, "-m", "pytest",
|
||||
"--co", "-q",
|
||||
*ignore_args,
|
||||
*[str(f) for f in files],
|
||||
*pytest_passthrough,
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=repo_root,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError):
|
||||
return {}
|
||||
|
||||
counts: dict[Path, int] = {}
|
||||
for line in result.stdout.splitlines():
|
||||
# Lines look like: tests/acp/test_auth.py::TestClass::test_name
|
||||
if "::" not in line:
|
||||
continue
|
||||
file_part = line.split("::", 1)[0]
|
||||
key = repo_root / file_part
|
||||
counts[key] = counts.get(key, 0) + 1
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
def _discover_files(roots: List[Path]) -> List[Path]:
|
||||
"""Return every ``test_*.py`` under the given roots (sorted).
|
||||
|
||||
Roots may be directories (recursed for ``test_*.py``) or explicit
|
||||
``.py`` files (included as-is, even if they don't match the
|
||||
``test_*`` prefix — caller knows what they want).
|
||||
|
||||
Exclude any file whose path contains a component in ``_SKIP_PARTS``,
|
||||
UNLESS the user explicitly named it as a root (in which case the
|
||||
user's intent overrides the skip filter).
|
||||
"""
|
||||
seen: set[Path] = set()
|
||||
out: List[Path] = []
|
||||
for root in roots:
|
||||
if not root.exists():
|
||||
continue
|
||||
if root.is_file():
|
||||
# Explicit file: include it as-is, skip the _SKIP_PARTS filter
|
||||
# since the user named it directly.
|
||||
real = root.resolve()
|
||||
if real not in seen:
|
||||
seen.add(real)
|
||||
out.append(root)
|
||||
continue
|
||||
for path in root.rglob("test_*.py"):
|
||||
if any(part in _SKIP_PARTS for part in path.parts):
|
||||
continue
|
||||
real = path.resolve()
|
||||
if real in seen:
|
||||
continue
|
||||
seen.add(real)
|
||||
out.append(path)
|
||||
return sorted(out)
|
||||
|
||||
|
||||
def _kill_tree(proc: "subprocess.Popen", pgid: int | None = None) -> None:
|
||||
"""Kill the pytest subprocess and every descendant it spawned.
|
||||
|
||||
A test run can spin up uvicorn servers, async runtimes, or other
|
||||
long-running grandchildren that survive the pytest subprocess exit
|
||||
if we don't kill the whole tree. ``subprocess.Popen.kill()`` only
|
||||
targets the immediate child; grandchildren reparent to PID 1
|
||||
(Linux) / get adopted by services.exe (Windows) and leak.
|
||||
|
||||
POSIX: the caller must pass ``pgid`` — the process group id captured
|
||||
immediately after Popen (via ``os.getpgid(proc.pid)``). We can't
|
||||
look it up here in the happy path because by the time we get
|
||||
called the leader process has already been reaped and its pid is
|
||||
gone from the kernel's process table, even though descendants in
|
||||
the group are still alive. SIGKILL'ing the captured pgid takes out
|
||||
everything in that group atomically.
|
||||
|
||||
Windows: ``taskkill /F /T /PID`` walks the recorded ppid chain and
|
||||
terminates the whole tree, even when the root has already exited.
|
||||
|
||||
Why not psutil: psutil walks the parent-child tree, but in the
|
||||
happy path the root has already been reaped so ``psutil.Process(pid)``
|
||||
can't find it; grandchildren reparented to PID 1 are also
|
||||
unreachable by tree walk at that point. The platform-native
|
||||
primitives (process groups / taskkill) handle both cases correctly
|
||||
without an extra abstraction layer.
|
||||
"""
|
||||
if proc.pid is None:
|
||||
return
|
||||
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
|
||||
subprocess.run(
|
||||
["taskkill", "/F", "/T", "/PID", str(proc.pid)],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=10,
|
||||
) # windows-footgun: ok
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
pass
|
||||
else:
|
||||
# POSIX: kill the captured pgid. Local-import signal so the
|
||||
# SIGKILL attribute is never referenced on Windows.
|
||||
if pgid is not None:
|
||||
try:
|
||||
import signal as _signal
|
||||
os.killpg(pgid, _signal.SIGKILL) # windows-footgun: ok
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
pass
|
||||
|
||||
# Belt-and-suspenders: ensure subprocess.communicate() sees the exit.
|
||||
try:
|
||||
proc.kill()
|
||||
except (ProcessLookupError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _run_one_file(
|
||||
file: Path,
|
||||
pytest_args: List[str],
|
||||
repo_root: Path,
|
||||
file_timeout: float,
|
||||
) -> Tuple[Path, int, str, dict[str, int]]:
|
||||
"""Run ``python -m pytest <file> <pytest_args>`` in a fresh subprocess.
|
||||
|
||||
Returns (file, returncode, captured_combined_output, summary_counts).
|
||||
|
||||
``summary_counts`` is the result of ``_parse_pytest_summary(output)`` —
|
||||
|
||||
pytest exit codes (https://docs.pytest.org/en/stable/reference/exit-codes.html):
|
||||
0 = all tests passed
|
||||
1 = some tests failed
|
||||
2 = test execution interrupted
|
||||
3 = internal error
|
||||
4 = pytest CLI usage error
|
||||
5 = no tests collected
|
||||
|
||||
We treat exit 5 as a pass: it just means every test in the file was
|
||||
skipped or filtered by a marker (e.g. ``-m 'not integration'`` skips
|
||||
files where every test is marked integration). That's intentional and
|
||||
not a failure mode.
|
||||
|
||||
On per-file timeout (``file_timeout`` seconds) or any other exception
|
||||
during ``communicate()``, we kill the whole process group / process
|
||||
tree so grandchildren (uvicorn servers, async runtimes, etc.) do not
|
||||
orphan onto PID 1. The pytest-timeout plugin enforces per-test
|
||||
timeouts inside the subprocess; this outer timeout exists only to
|
||||
bound a pathologically slow or hung file as a whole.
|
||||
"""
|
||||
cmd = [sys.executable, "-m", "pytest", str(file), *pytest_args]
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
cwd=repo_root,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
# POSIX: place the child at the head of its own process group so
|
||||
# _kill_tree can SIGKILL the group atomically.
|
||||
# Windows: this maps to CREATE_NEW_PROCESS_GROUP in CPython 3.12+;
|
||||
# _kill_tree handles the Windows path via taskkill /F /T.
|
||||
start_new_session=True,
|
||||
)
|
||||
|
||||
# Capture the pgid NOW, before the leader can exit and be reaped.
|
||||
# Once the leader is reaped, os.getpgid(proc.pid) raises
|
||||
# ProcessLookupError even though grandchildren in that group are
|
||||
# still alive — defeating the whole cleanup. None on Windows where
|
||||
# the pgid concept doesn't apply (taskkill walks ppid chain instead).
|
||||
pgid: int | None = None
|
||||
if sys.platform != "win32":
|
||||
try:
|
||||
pgid = os.getpgid(proc.pid)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
# Astonishingly fast child? Already dead. _kill_tree's
|
||||
# fallback will handle this case as a no-op.
|
||||
pgid = None
|
||||
|
||||
try:
|
||||
output, _ = proc.communicate(timeout=file_timeout)
|
||||
rc = proc.returncode
|
||||
except subprocess.TimeoutExpired:
|
||||
_kill_tree(proc, pgid=pgid)
|
||||
# Drain whatever the child wrote before we killed it so we have
|
||||
# something to surface in the failure dump.
|
||||
try:
|
||||
output, _ = proc.communicate(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
output = "(file timeout exceeded; output unavailable)"
|
||||
rc = 124 # de facto convention for "killed by timeout".
|
||||
output = (
|
||||
f"(per-file timeout: {file_timeout:.0f}s exceeded; "
|
||||
f"process tree SIGKILL'd)\n{output}"
|
||||
)
|
||||
except BaseException:
|
||||
# KeyboardInterrupt / runner crash — make sure no zombie
|
||||
# grandchildren outlive us.
|
||||
_kill_tree(proc, pgid=pgid)
|
||||
raise
|
||||
else:
|
||||
# Happy path: pytest exited on its own. The child process already
|
||||
# cleaned up its grandchildren if it's well-behaved, but
|
||||
# well-behaved is not universal — kill the group anyway. Already-
|
||||
# dead processes are a no-op.
|
||||
_kill_tree(proc, pgid=pgid)
|
||||
|
||||
if rc == 5:
|
||||
# No tests collected — every test in the file was filtered out.
|
||||
# Treat as a pass; surface info in a slightly distinct status
|
||||
# so the operator can spot it.
|
||||
rc = 0
|
||||
summary = _parse_pytest_summary(output)
|
||||
return file, rc, output, summary
|
||||
|
||||
|
||||
def _parse_pytest_summary(output: str) -> dict[str, int]:
|
||||
"""Extract per-file test pass/fail/skip counts from pytest output.
|
||||
|
||||
pytest prints a summary line like ``12 passed, 3 skipped, 1 failed in 2.1s``
|
||||
as the last non-empty line before the short test summary. We scrape that
|
||||
line for the individual counts so the progress display can show test-level
|
||||
granularity instead of just file-level pass/fail.
|
||||
|
||||
Returns a dict with keys ``passed``, ``failed``, ``skipped``, ``errors``,
|
||||
``xfailed``, ``xpassed`` (only keys found in the output are present).
|
||||
"""
|
||||
import re
|
||||
|
||||
result: dict[str, int] = {}
|
||||
# Walk backwards from the end — the summary line is always near the tail.
|
||||
for line in reversed(output.splitlines()):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
# Match "N passed", "N failed", "N skipped", "N errors", "N xfailed", "N xpassed"
|
||||
for m in re.finditer(r"(\d+)\s+(passed|failed|skipped|errors|xfailed|xpassed)", line):
|
||||
result[m.group(2)] = int(m.group(1))
|
||||
# Also match "N error" (singular — pytest uses this sometimes).
|
||||
for m in re.finditer(r"(\d+)\s+error\b", line):
|
||||
result.setdefault("errors", result.get("errors", 0) + int(m.group(1)))
|
||||
if result:
|
||||
# Found the counts line — done.
|
||||
break
|
||||
# Stop at the short test summary header (if any) — everything above
|
||||
# that is individual failure details, not the counts line.
|
||||
if line.startswith("FAILED") or line.startswith("SHORT TEST SUMMARY"):
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def _format_file(file: Path, repo_root: Path) -> str:
|
||||
"""Render a test-file path for display: strip the repo-root prefix
|
||||
when possible so output reads ``tests/acp/test_auth.py`` instead of
|
||||
``/home/runner/work/hermes-agent/hermes-agent/tests/acp/test_auth.py``.
|
||||
|
||||
Falls back to the absolute path for anything outside the repo root.
|
||||
"""
|
||||
try:
|
||||
return str(file.resolve().relative_to(repo_root.resolve()))
|
||||
except ValueError:
|
||||
return str(file)
|
||||
|
||||
|
||||
def _print_progress(
|
||||
tests_done: int,
|
||||
total_tests: int,
|
||||
file: Path,
|
||||
rc: int,
|
||||
dur: float,
|
||||
repo_root: Path,
|
||||
tests_passed: int,
|
||||
tests_failed: int,
|
||||
test_counts: dict[Path, int],
|
||||
file_summary: dict[str, int] | None = None,
|
||||
) -> None:
|
||||
"""Single-line live progress.
|
||||
|
||||
When ``file_summary`` is provided (parsed from pytest output), the
|
||||
per-file parenthetical shows individual test pass/fail counts instead
|
||||
of just the total test count.
|
||||
"""
|
||||
status = "✓" if rc == 0 else "✗"
|
||||
pct = (tests_done / total_tests * 100) if total_tests else 0
|
||||
# Digit width for left-side counter padding (derived from total file count).
|
||||
fw = len(str(tests_passed + tests_failed))
|
||||
# Build per-file test count string.
|
||||
if file_summary:
|
||||
parts = []
|
||||
p = file_summary.get("passed", 0)
|
||||
f = file_summary.get("failed", 0)
|
||||
s = file_summary.get("skipped", 0)
|
||||
e = file_summary.get("errors", 0)
|
||||
if p:
|
||||
parts.append(f"{p}✓")
|
||||
if f:
|
||||
parts.append(f"{f}✗")
|
||||
if s:
|
||||
parts.append(f"{s}s")
|
||||
if e:
|
||||
parts.append(f"{e}e")
|
||||
# xfailed/xpassed are rare; include if present.
|
||||
xf = file_summary.get("xfailed", 0)
|
||||
xp = file_summary.get("xpassed", 0)
|
||||
if xf:
|
||||
parts.append(f"{xf}xf")
|
||||
if xp:
|
||||
parts.append(f"{xp}xp")
|
||||
test_str = " ".join(parts) + ", " if parts else ""
|
||||
else:
|
||||
n_tests = test_counts.get(file, 0)
|
||||
test_str = f"{n_tests} tests, " if n_tests else ""
|
||||
msg = (
|
||||
f"[{pct:5.1f}% | {tests_done:>5}/{total_tests}"
|
||||
f" | ✓{tests_passed:>{fw}} | ✗{tests_failed:>{fw}}] "
|
||||
f"{status} {_format_file(file, repo_root)} ({test_str}{dur:.1f}s)"
|
||||
)
|
||||
# Truncate to terminal width if available (no clobbering ANSI lines).
|
||||
try:
|
||||
cols = os.get_terminal_size().columns
|
||||
if len(msg) > cols:
|
||||
msg = msg[: cols - 1] + "…"
|
||||
except OSError:
|
||||
pass
|
||||
print(msg, flush=True)
|
||||
|
||||
|
||||
def _print_inline_failure(
|
||||
file: Path, output: str, repo_root: Path, pytest_passthrough: List[str]
|
||||
) -> None:
|
||||
"""Print a compact failure summary immediately when a file fails.
|
||||
|
||||
Shows the tail of the pytest output (the failure section with stack
|
||||
traces) and a ready-to-run repro command, so the developer doesn't
|
||||
have to wait for the full run to finish before seeing what broke.
|
||||
"""
|
||||
rel = _format_file(file, repo_root)
|
||||
# Build a repro command the developer can copy-paste.
|
||||
passthrough_str = " ".join(pytest_passthrough) if pytest_passthrough else ""
|
||||
repro = f"python -m pytest {rel}"
|
||||
if passthrough_str:
|
||||
repro += f" {passthrough_str}"
|
||||
|
||||
# Grab just the failure lines (last ~30 lines of pytest output —
|
||||
# typically the FAILED summary + short test info).
|
||||
lines = output.rstrip().splitlines()
|
||||
tail = "\n".join(lines[-30:])
|
||||
|
||||
print(flush=True)
|
||||
print(f" ╔╍ Failed: {rel} ╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍", flush=True)
|
||||
for line in tail.splitlines():
|
||||
print(f" ║ {line}", flush=True)
|
||||
print(f" ║", flush=True)
|
||||
print(f" ║ Repro: {repro}", flush=True)
|
||||
print(f" ╚╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍", flush=True)
|
||||
print(flush=True)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-j",
|
||||
"--jobs",
|
||||
type=int,
|
||||
default=int(os.environ.get("HERMES_TEST_WORKERS") or (os.cpu_count() or 4) * 2),
|
||||
help="Parallel worker count (default: $HERMES_TEST_WORKERS or cpu_count*2)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--paths",
|
||||
default=os.environ.get("HERMES_TEST_PATHS", ":".join(_DEFAULT_ROOTS)),
|
||||
help="Colon-separated discovery roots (default: 'tests')",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-integration",
|
||||
action="store_true",
|
||||
help="Don't skip integration/ e2e/ during discovery",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--file-timeout",
|
||||
type=float,
|
||||
default=float(
|
||||
os.environ.get("HERMES_TEST_FILE_TIMEOUT", _DEFAULT_FILE_TIMEOUT_SECONDS)
|
||||
),
|
||||
help=(
|
||||
"Per-file wall-clock cap in seconds. On timeout, the pytest "
|
||||
"subprocess and its full process tree are SIGKILL'd. "
|
||||
"Default: 600 (10 min), env: HERMES_TEST_FILE_TIMEOUT."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"paths_positional",
|
||||
nargs="*",
|
||||
metavar="PATH",
|
||||
help=(
|
||||
"Restrict discovery to these paths (directories or .py files). "
|
||||
"Mutually exclusive with --paths. Anything after a literal '--' "
|
||||
"separator is passed through to each per-file pytest invocation."
|
||||
),
|
||||
)
|
||||
# Manually split argv on '--' so positional paths and pytest passthrough
|
||||
# args don't fight over each other. argparse's nargs="*" positional is
|
||||
# greedy and will swallow everything after '--' including the pytest
|
||||
# flags, defeating the convention.
|
||||
argv = sys.argv[1:]
|
||||
if "--" in argv:
|
||||
sep = argv.index("--")
|
||||
our_args, pytest_passthrough = argv[:sep], argv[sep + 1 :]
|
||||
else:
|
||||
our_args, pytest_passthrough = argv, []
|
||||
args = parser.parse_args(our_args)
|
||||
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
|
||||
# Resolve discovery roots: positional path args override --paths if any
|
||||
# were supplied, otherwise --paths (which itself defaults to 'tests').
|
||||
if args.paths_positional:
|
||||
# Positionals can be directories OR explicit .py files. Either is
|
||||
# fine — _discover_files handles both via rglob('test_*.py') for
|
||||
# dirs and direct inclusion for files.
|
||||
roots = [repo_root / p for p in args.paths_positional]
|
||||
else:
|
||||
roots = [repo_root / p for p in args.paths.split(":") if p]
|
||||
|
||||
if args.include_integration:
|
||||
# Caller takes responsibility — typically used via explicit -k filter.
|
||||
global _SKIP_PARTS # noqa: PLW0603 — config knob
|
||||
_SKIP_PARTS = set()
|
||||
|
||||
files = _discover_files(roots)
|
||||
if not files:
|
||||
print(f"No test files discovered under {[str(r) for r in roots]}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Count individual tests per file via a single pytest --co pass.
|
||||
test_counts = _count_tests(files, repo_root, pytest_passthrough)
|
||||
total_tests = sum(test_counts.values())
|
||||
|
||||
print(
|
||||
f"Discovered {len(files)} test files ({total_tests} tests) under "
|
||||
f"{[str(r.relative_to(repo_root)) if r.is_relative_to(repo_root) else str(r) for r in roots]}; "
|
||||
f"running with -j {args.jobs}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
# Capture and print on completion (out-of-order is fine — keeps the
|
||||
# terminal clean rather than interleaving N parallel pytest outputs).
|
||||
failures: List[Tuple[Path, str, Dict[str, int]]] = []
|
||||
started = time.monotonic()
|
||||
files_done = 0
|
||||
tests_done = 0
|
||||
pass_count = 0
|
||||
fail_count = 0
|
||||
tests_passed = 0
|
||||
tests_failed = 0
|
||||
lock = threading.Lock()
|
||||
|
||||
def _on_done(file: Path, started_at: float, fut: "Future[Tuple[Path, int, str, dict[str, int]]]") -> None:
|
||||
nonlocal files_done, tests_done, pass_count, fail_count, tests_passed, tests_failed
|
||||
n_tests = test_counts.get(file, 0)
|
||||
try:
|
||||
fpath, rc, output, summary = fut.result()
|
||||
except Exception as exc: # noqa: BLE001 — must always advance counter
|
||||
with lock:
|
||||
files_done += 1
|
||||
tests_done += n_tests
|
||||
fail_count += 1
|
||||
failures.append((file, f"runner crashed: {exc!r}", {}))
|
||||
_print_progress(
|
||||
tests_done, total_tests, file, 1,
|
||||
time.monotonic() - started_at,
|
||||
repo_root, tests_passed, tests_failed,
|
||||
test_counts,
|
||||
)
|
||||
return
|
||||
with lock:
|
||||
files_done += 1
|
||||
tests_done += n_tests
|
||||
# Accumulate test-level counts from parsed summary.
|
||||
tests_passed += summary.get("passed", 0)
|
||||
tests_failed += summary.get("failed", 0)
|
||||
if rc == 0:
|
||||
pass_count += 1
|
||||
else:
|
||||
fail_count += 1
|
||||
failures.append((fpath, output, summary))
|
||||
_print_progress(
|
||||
tests_done, total_tests, fpath, rc,
|
||||
time.monotonic() - started_at,
|
||||
repo_root, tests_passed, tests_failed,
|
||||
test_counts,
|
||||
file_summary=summary,
|
||||
)
|
||||
if rc != 0:
|
||||
_print_inline_failure(fpath, output, repo_root, pytest_passthrough)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=args.jobs) as pool:
|
||||
futures: List[Future] = []
|
||||
for file in files:
|
||||
t0 = time.monotonic()
|
||||
fut = pool.submit(
|
||||
_run_one_file, file, pytest_passthrough, repo_root, args.file_timeout
|
||||
)
|
||||
fut.add_done_callback(lambda f, file=file, t0=t0: _on_done(file, t0, f))
|
||||
futures.append(fut)
|
||||
# Block until everything's done. ThreadPoolExecutor.__exit__ waits
|
||||
# for all submitted work, but doing it explicitly here makes the
|
||||
# control flow obvious.
|
||||
for fut in futures:
|
||||
fut.result() if fut.exception() is None else None
|
||||
|
||||
elapsed = time.monotonic() - started
|
||||
print()
|
||||
pct = (tests_done / total_tests * 100) if total_tests else 0
|
||||
print(f"=== Summary: {len(files)} files, {tests_passed} tests passed, {tests_failed} failed ({pct:.0f}% complete) in {elapsed:.1f}s ({args.jobs} workers) ===")
|
||||
|
||||
if failures:
|
||||
print()
|
||||
print("=== Failure output ===")
|
||||
for file, output, _summary in failures:
|
||||
print()
|
||||
print(f"--- {_format_file(file, repo_root)} ---")
|
||||
print(output.rstrip())
|
||||
print()
|
||||
# Split: files with actual test failures vs non-zero exit for other reasons
|
||||
test_fail_files = [(f, s) for f, _o, s in failures if s.get("failed", 0) > 0]
|
||||
all_passed_but_nonzero = [(f, s) for f, _o, s in failures
|
||||
if s.get("failed", 0) == 0 and s.get("passed", 0) > 0]
|
||||
no_tests_ran = [(f, s) for f, _o, s in failures
|
||||
if s.get("failed", 0) == 0 and s.get("passed", 0) == 0]
|
||||
if test_fail_files:
|
||||
total_tf = sum(s.get("failed", 0) for _, s in test_fail_files)
|
||||
print(f"=== {len(test_fail_files)} file{'s' if len(test_fail_files) != 1 else ''} with test failures ({total_tf} test{'s' if total_tf != 1 else ''} failed) ===")
|
||||
for file, s in test_fail_files:
|
||||
nf = s.get("failed", 0)
|
||||
print(f" {_format_file(file, repo_root)} ({nf} test{'s' if nf != 1 else ''} failed)")
|
||||
if all_passed_but_nonzero:
|
||||
print(f"=== {len(all_passed_but_nonzero)} file{'s' if len(all_passed_but_nonzero) != 1 else ''} where all tests passed but pytest exited non-zero (warnings-as-errors, hook failures, etc.) ===")
|
||||
for file, s in all_passed_but_nonzero:
|
||||
print(f" {_format_file(file, repo_root)} ({s.get('passed', 0)} passed)")
|
||||
if no_tests_ran:
|
||||
print(f"=== {len(no_tests_ran)} file{'s' if len(no_tests_ran) != 1 else ''} where no tests ran (collection/import error, timeout before collection, etc.) ===")
|
||||
for file, s in no_tests_ran:
|
||||
print(f" {_format_file(file, repo_root)}")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
+16
-17
@@ -629,13 +629,12 @@
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/fetch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.1.tgz",
|
||||
"integrity": "sha512-GpptLrs57adMSuHi3VNj0mAF8dwh36LMaYF6XyJ6JMWlVsc+t42tm1HSEDmOs3A8fC9yyeisgLhsTVQokOZ0zw==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.0"
|
||||
"@protobufjs/aspromise": "^1.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/float": {
|
||||
@@ -645,9 +644,9 @@
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
|
||||
"integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.2.tgz",
|
||||
"integrity": "sha512-pa0vFRuws4wkvaXKK1uXZMAwAX4/t8ANaJo45iw/oQHNQ9q5xUzwgFmVJGXiga2BeN+zpX7Vf9vmsiIa2J+MUw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
@@ -1620,9 +1619,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "7.5.6",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.6.tgz",
|
||||
"integrity": "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==",
|
||||
"version": "7.6.0",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.6.0.tgz",
|
||||
"integrity": "sha512-LtESOsMPTZgyYtwxhvdgdjGL0HmXEaRA/hVD6sol4zA60hVXXXP/SGmxnqDbgGE8gy7pYex7cym+5vYPcmaXBQ==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
@@ -1630,14 +1629,14 @@
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.5",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.1",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.2",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.1",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^5.0.0"
|
||||
"long": "^5.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
@@ -2117,9 +2116,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.20.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
|
||||
"integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
|
||||
"version": "8.20.1",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.1.tgz",
|
||||
"integrity": "sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
|
||||
@@ -336,7 +336,8 @@ The registry of record is `hermes_cli/commands.py` — every consumer
|
||||
~/.hermes/config.yaml Main configuration
|
||||
~/.hermes/.env API keys and secrets
|
||||
$HERMES_HOME/skills/ Installed skills
|
||||
~/.hermes/sessions/ Session transcripts
|
||||
~/.hermes/sessions/ Gateway routing index, request dumps, *.jsonl transcripts (and optional per-session JSON snapshots when sessions.write_json_snapshots: true)
|
||||
~/.hermes/state.db Canonical session store (SQLite + FTS5)
|
||||
~/.hermes/logs/ Gateway and error logs
|
||||
~/.hermes/auth.json OAuth tokens and credential pools
|
||||
~/.hermes/hermes-agent/ Source code (if git-installed)
|
||||
@@ -867,7 +868,7 @@ hermes config set auxiliary.vision.model <model_name>
|
||||
| Env variables | `hermes config env-path` or [Env vars reference](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) |
|
||||
| CLI commands | `hermes --help` or [CLI reference](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) |
|
||||
| Gateway logs | `~/.hermes/logs/gateway.log` |
|
||||
| Session files | `~/.hermes/sessions/` or `hermes sessions browse` |
|
||||
| Session files | `hermes sessions browse` (reads state.db) |
|
||||
| Source code | `~/.hermes/hermes-agent/` |
|
||||
|
||||
---
|
||||
|
||||
@@ -40,6 +40,16 @@ def _clean_env(monkeypatch):
|
||||
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
# Module-level unhealthy cache (10-min TTL) leaks between tests;
|
||||
# earlier tests that call _mark_provider_unhealthy() poison the
|
||||
# cache for later ones, causing _resolve_auto to skip providers
|
||||
# that the test patched to return valid clients.
|
||||
import agent.auxiliary_client as _aux_mod
|
||||
_aux_mod._aux_unhealthy_until.clear()
|
||||
_aux_mod._aux_unhealthy_logged_at.clear()
|
||||
yield
|
||||
_aux_mod._aux_unhealthy_until.clear()
|
||||
_aux_mod._aux_unhealthy_logged_at.clear()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -461,6 +471,17 @@ class TestExpiredCodexFallback:
|
||||
import base64
|
||||
import time as _time
|
||||
|
||||
# Belt-and-suspenders: _try_openrouter marks openrouter unhealthy
|
||||
# when OPENROUTER_API_KEY is absent (which the preceding test in
|
||||
# this class exercises). The file-level _clean_env autouse fixture
|
||||
# clears the cache, but fixture ordering with the conftest
|
||||
# _hermetic_environment autouse can leave a narrow window where
|
||||
# the mark reappears. Explicitly clear here so this test is
|
||||
# independent of run order.
|
||||
import agent.auxiliary_client as _aux_mod
|
||||
_aux_mod._aux_unhealthy_until.clear()
|
||||
_aux_mod._aux_unhealthy_logged_at.clear()
|
||||
|
||||
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
|
||||
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
|
||||
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
|
||||
@@ -1047,6 +1068,20 @@ class TestGetProviderChain:
|
||||
class TestTryPaymentFallback:
|
||||
"""_try_payment_fallback skips the failed provider and tries alternatives."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_unhealthy_cache(self):
|
||||
"""Earlier tests in this file call _mark_provider_unhealthy() which
|
||||
pollutes the module-level ``_aux_unhealthy_until`` dict (10-min TTL).
|
||||
Without this cleanup the fallback chain skips providers we've patched
|
||||
to return valid clients — the patched function is never called.
|
||||
"""
|
||||
from agent.auxiliary_client import _aux_unhealthy_until, _aux_unhealthy_logged_at
|
||||
_aux_unhealthy_until.clear()
|
||||
_aux_unhealthy_logged_at.clear()
|
||||
yield
|
||||
_aux_unhealthy_until.clear()
|
||||
_aux_unhealthy_logged_at.clear()
|
||||
|
||||
def test_skips_failed_provider(self):
|
||||
mock_client = MagicMock()
|
||||
with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.agent_init import _merge_custom_provider_extra_body
|
||||
|
||||
|
||||
def test_custom_provider_extra_body_merges_into_request_overrides():
|
||||
agent = SimpleNamespace(
|
||||
provider="custom",
|
||||
model="google/gemma-4-31b-it",
|
||||
base_url="https://example.test/v1",
|
||||
request_overrides={"service_tier": "priority"},
|
||||
)
|
||||
|
||||
_merge_custom_provider_extra_body(
|
||||
agent,
|
||||
[
|
||||
{
|
||||
"name": "gemma",
|
||||
"base_url": "https://example.test/v1/",
|
||||
"model": "google/gemma-4-31b-it",
|
||||
"extra_body": {
|
||||
"enable_thinking": True,
|
||||
"reasoning_effort": "high",
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert agent.request_overrides == {
|
||||
"service_tier": "priority",
|
||||
"extra_body": {
|
||||
"enable_thinking": True,
|
||||
"reasoning_effort": "high",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_custom_provider_extra_body_preserves_caller_override():
|
||||
agent = SimpleNamespace(
|
||||
provider="custom",
|
||||
model="google/gemma-4-31b-it",
|
||||
base_url="https://example.test/v1",
|
||||
request_overrides={
|
||||
"extra_body": {
|
||||
"reasoning_effort": "low",
|
||||
"caller_only": True,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
_merge_custom_provider_extra_body(
|
||||
agent,
|
||||
[
|
||||
{
|
||||
"name": "gemma",
|
||||
"base_url": "https://example.test/v1",
|
||||
"model": "google/gemma-4-31b-it",
|
||||
"extra_body": {
|
||||
"enable_thinking": True,
|
||||
"reasoning_effort": "high",
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert agent.request_overrides["extra_body"] == {
|
||||
"enable_thinking": True,
|
||||
"reasoning_effort": "low",
|
||||
"caller_only": True,
|
||||
}
|
||||
|
||||
|
||||
def test_custom_provider_extra_body_ignores_other_custom_models():
|
||||
agent = SimpleNamespace(
|
||||
provider="custom",
|
||||
model="other-model",
|
||||
base_url="https://example.test/v1",
|
||||
request_overrides={},
|
||||
)
|
||||
|
||||
_merge_custom_provider_extra_body(
|
||||
agent,
|
||||
[
|
||||
{
|
||||
"name": "gemma",
|
||||
"base_url": "https://example.test/v1",
|
||||
"model": "google/gemma-4-31b-it",
|
||||
"extra_body": {"enable_thinking": True},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert agent.request_overrides == {}
|
||||
@@ -9,8 +9,11 @@ from unittest.mock import patch
|
||||
import pytest
|
||||
|
||||
from agent.image_routing import (
|
||||
_coerce_capability_bool,
|
||||
_coerce_mode,
|
||||
_explicit_aux_vision_override,
|
||||
_lookup_supports_vision,
|
||||
_supports_vision_override,
|
||||
build_native_content_parts,
|
||||
decide_image_input_mode,
|
||||
)
|
||||
@@ -125,6 +128,168 @@ class TestDecideImageInputMode:
|
||||
assert decide_image_input_mode("xiaomi", "mimo-v2.5-pro", {}) == "text"
|
||||
|
||||
|
||||
# ─── _coerce_capability_bool ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestCoerceCapabilityBool:
|
||||
def test_real_bool_passes_through(self):
|
||||
assert _coerce_capability_bool(True) is True
|
||||
assert _coerce_capability_bool(False) is False
|
||||
|
||||
def test_int_0_and_1(self):
|
||||
assert _coerce_capability_bool(1) is True
|
||||
assert _coerce_capability_bool(0) is False
|
||||
|
||||
def test_other_ints_return_none(self):
|
||||
assert _coerce_capability_bool(2) is None
|
||||
assert _coerce_capability_bool(-1) is None
|
||||
|
||||
def test_yaml_true_tokens(self):
|
||||
for s in ("true", "TRUE", "True", "yes", "on", "1", " true "):
|
||||
assert _coerce_capability_bool(s) is True
|
||||
|
||||
def test_yaml_false_tokens(self):
|
||||
for s in ("false", "FALSE", "False", "no", "off", "0", " false "):
|
||||
assert _coerce_capability_bool(s) is False
|
||||
|
||||
def test_quoted_false_does_not_silently_become_true(self):
|
||||
# Regression: bool("false") is True in Python. A user writing
|
||||
# supports_vision: "false" must NOT enable native vision routing.
|
||||
assert _coerce_capability_bool("false") is False
|
||||
|
||||
def test_unrecognised_strings_return_none(self):
|
||||
# None == fall through to models.dev, not a silent truthy.
|
||||
assert _coerce_capability_bool("maybe") is None
|
||||
assert _coerce_capability_bool("") is None
|
||||
assert _coerce_capability_bool("definitely") is None
|
||||
|
||||
def test_other_types_return_none(self):
|
||||
assert _coerce_capability_bool(None) is None
|
||||
assert _coerce_capability_bool([]) is None
|
||||
assert _coerce_capability_bool({}) is None
|
||||
assert _coerce_capability_bool(1.5) is None
|
||||
|
||||
|
||||
# ─── _supports_vision_override ───────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSupportsVisionOverride:
|
||||
def test_no_cfg_returns_none(self):
|
||||
assert _supports_vision_override(None, "custom", "my-llava") is None
|
||||
assert _supports_vision_override({}, "custom", "my-llava") is None
|
||||
|
||||
def test_top_level_shortcut_wins(self):
|
||||
cfg = {"model": {"supports_vision": True}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is True
|
||||
|
||||
def test_top_level_false_propagates(self):
|
||||
cfg = {"model": {"supports_vision": False}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is False
|
||||
|
||||
def test_per_provider_per_model_via_runtime_name(self):
|
||||
cfg = {
|
||||
"providers": {
|
||||
"custom": {"models": {"my-llava": {"supports_vision": True}}},
|
||||
},
|
||||
}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is True
|
||||
|
||||
def test_per_provider_per_model_via_config_name(self):
|
||||
# Named custom provider — runtime self.provider == "custom", config
|
||||
# holds the original name under model.provider.
|
||||
cfg = {
|
||||
"model": {"provider": "my-vllm"},
|
||||
"providers": {
|
||||
"my-vllm": {"models": {"my-llava": {"supports_vision": True}}},
|
||||
},
|
||||
}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is True
|
||||
|
||||
def test_quoted_false_string_in_yaml_does_not_enable(self):
|
||||
# Real-world: user writes supports_vision: "false" (quoted).
|
||||
cfg = {"model": {"supports_vision": "false"}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is False
|
||||
|
||||
def test_unrecognised_value_falls_through(self):
|
||||
cfg = {"model": {"supports_vision": "maybe"}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is None
|
||||
|
||||
def test_no_override_returns_none(self):
|
||||
cfg = {"model": {"default": "my-llava"}}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is None
|
||||
|
||||
def test_malformed_sections_are_ignored(self):
|
||||
# User accidentally wrote a string where a section was expected —
|
||||
# don't blow up, just fall through.
|
||||
cfg = {"model": "some-string", "providers": ["not-a-dict"]}
|
||||
assert _supports_vision_override(cfg, "custom", "my-llava") is None
|
||||
|
||||
|
||||
# ─── _lookup_supports_vision (override-aware) ────────────────────────────────
|
||||
|
||||
|
||||
class TestLookupSupportsVisionOverride:
|
||||
def test_config_override_short_circuits_models_dev(self):
|
||||
# Config says True, models.dev says None — config wins.
|
||||
cfg = {"model": {"supports_vision": True}}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert _lookup_supports_vision("custom", "my-llava", cfg) is True
|
||||
|
||||
def test_config_override_false_beats_vision_capable_models_dev(self):
|
||||
# User explicitly disables vision on a models.dev-vision-capable model.
|
||||
fake_caps = type("Caps", (), {"supports_vision": True})()
|
||||
cfg = {"model": {"supports_vision": False}}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
|
||||
assert _lookup_supports_vision("anthropic", "claude-sonnet-4", cfg) is False
|
||||
|
||||
def test_no_override_falls_back_to_models_dev(self):
|
||||
fake_caps = type("Caps", (), {"supports_vision": True})()
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
|
||||
assert _lookup_supports_vision("anthropic", "claude-sonnet-4", {}) is True
|
||||
|
||||
def test_no_override_no_models_dev_entry_returns_none(self):
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert _lookup_supports_vision("custom", "my-llava", {}) is None
|
||||
|
||||
def test_cfg_none_falls_back_to_models_dev(self):
|
||||
# Caller didn't pass cfg at all — old call sites must still work.
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert _lookup_supports_vision("openrouter", "x", None) is None
|
||||
|
||||
|
||||
# ─── decide_image_input_mode with auto + override ────────────────────────────
|
||||
|
||||
|
||||
class TestAutoModeRespectsOverride:
|
||||
def test_auto_native_for_custom_with_supports_vision_true(self):
|
||||
# The motivating bug: Qwen3.6 on local llama.cpp via provider=custom.
|
||||
# Without the override, auto falls back to text. With it, auto picks
|
||||
# native — no need to also set agent.image_input_mode: native.
|
||||
cfg = {"model": {"supports_vision": True}}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert decide_image_input_mode("custom", "qwen3.6-35b", cfg) == "native"
|
||||
|
||||
def test_auto_text_for_custom_with_supports_vision_false(self):
|
||||
cfg = {"model": {"supports_vision": False}}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert decide_image_input_mode("custom", "some-text-only", cfg) == "text"
|
||||
|
||||
def test_auto_text_for_custom_with_no_override(self):
|
||||
# Unchanged baseline: unknown custom model → text.
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert decide_image_input_mode("custom", "unknown", {}) == "text"
|
||||
|
||||
def test_explicit_aux_vision_override_still_wins(self):
|
||||
# If the user has configured a dedicated vision aux backend, respect
|
||||
# it even when supports_vision: true is also set.
|
||||
cfg = {
|
||||
"model": {"supports_vision": True},
|
||||
"auxiliary": {"vision": {"provider": "openrouter", "model": "gemini-2.5-pro"}},
|
||||
}
|
||||
with patch("agent.models_dev.get_model_capabilities", return_value=None):
|
||||
assert decide_image_input_mode("custom", "qwen3.6-35b", cfg) == "text"
|
||||
|
||||
|
||||
# ─── build_native_content_parts ──────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -442,6 +442,7 @@ class TestBuildNousSubscriptionPrompt:
|
||||
"web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
|
||||
"image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
|
||||
"stt": NousFeatureState("stt", "Speech-to-text", True, True, True, True, False, True, "OpenAI Whisper"),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
|
||||
},
|
||||
@@ -452,7 +453,7 @@ class TestBuildNousSubscriptionPrompt:
|
||||
|
||||
assert "Browser Use" in prompt
|
||||
assert "Modal execution is optional" in prompt
|
||||
assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys" in prompt
|
||||
assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, OpenAI Whisper, or Browser-Use API keys" in prompt
|
||||
|
||||
def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True)
|
||||
@@ -466,6 +467,7 @@ class TestBuildNousSubscriptionPrompt:
|
||||
"web": NousFeatureState("web", "Web tools", True, False, False, False, False, True, ""),
|
||||
"image_gen": NousFeatureState("image_gen", "Image generation", True, False, False, False, False, True, ""),
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
|
||||
"stt": NousFeatureState("stt", "Speech-to-text", True, False, False, False, False, True, ""),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""),
|
||||
},
|
||||
@@ -784,6 +786,7 @@ class TestPromptBuilderConstants:
|
||||
|
||||
def test_platform_hints_known_platforms(self):
|
||||
assert "whatsapp" in PLATFORM_HINTS
|
||||
assert "whatsapp_cloud" in PLATFORM_HINTS
|
||||
assert "telegram" in PLATFORM_HINTS
|
||||
assert "discord" in PLATFORM_HINTS
|
||||
assert "cron" in PLATFORM_HINTS
|
||||
@@ -791,6 +794,22 @@ class TestPromptBuilderConstants:
|
||||
assert "api_server" in PLATFORM_HINTS
|
||||
assert "webui" in PLATFORM_HINTS
|
||||
|
||||
def test_whatsapp_cloud_hint_mentions_24h_window(self):
|
||||
"""The Cloud API's 24-hour conversation window is a hard rule the
|
||||
agent should know about. Phase 5 (template fallback) was deferred,
|
||||
so the model needs to know free-form replies outside the window
|
||||
will fail with Graph error 131047 — otherwise it'll cheerfully
|
||||
try to schedule delayed messages that silently break."""
|
||||
hint = PLATFORM_HINTS["whatsapp_cloud"]
|
||||
assert "24-hour" in hint or "24h" in hint or "24 hour" in hint
|
||||
assert "131047" in hint
|
||||
|
||||
def test_whatsapp_cloud_hint_advertises_media(self):
|
||||
"""Cloud adapter supports the same MEDIA:/path/ convention as
|
||||
Baileys for outbound attachments."""
|
||||
hint = PLATFORM_HINTS["whatsapp_cloud"]
|
||||
assert "MEDIA:" in hint
|
||||
|
||||
def test_cli_hint_does_not_suggest_media_tags(self):
|
||||
# Regression: MEDIA:/path tags are intercepted only by messaging
|
||||
# gateway platforms. On the CLI they render as literal text and
|
||||
|
||||
@@ -556,10 +556,11 @@ Generate some audio.
|
||||
raising=False,
|
||||
)
|
||||
|
||||
with patch.dict(
|
||||
os.environ, {"HERMES_SESSION_PLATFORM": "telegram"}, clear=False
|
||||
):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
from gateway.session_context import clear_session_vars, set_session_vars
|
||||
|
||||
tokens = set_session_vars(platform="telegram")
|
||||
try:
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"test-skill",
|
||||
@@ -571,6 +572,8 @@ Generate some audio.
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/test-skill", "do stuff")
|
||||
finally:
|
||||
clear_session_vars(tokens)
|
||||
|
||||
assert msg is not None
|
||||
assert "local cli" in msg.lower()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Tests for agent/skill_utils.py — extract_skill_conditions metadata handling."""
|
||||
"""Tests for agent/skill_utils.py."""
|
||||
|
||||
from agent.skill_utils import extract_skill_conditions
|
||||
from agent.skill_utils import extract_skill_conditions, iter_skill_index_files
|
||||
|
||||
|
||||
def test_metadata_as_dict_with_hermes():
|
||||
@@ -56,3 +56,41 @@ def test_metadata_missing_entirely():
|
||||
"fallback_for_tools": [],
|
||||
"requires_tools": [],
|
||||
}
|
||||
|
||||
|
||||
def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
|
||||
real = tmp_path / "real-skill"
|
||||
real.mkdir()
|
||||
(real / "SKILL.md").write_text("---\nname: real-skill\n---\n", encoding="utf-8")
|
||||
|
||||
nested = (
|
||||
tmp_path
|
||||
/ "bring"
|
||||
/ "scripts"
|
||||
/ ".venv"
|
||||
/ "lib"
|
||||
/ "python3.13"
|
||||
/ "site-packages"
|
||||
/ "typer"
|
||||
/ ".agents"
|
||||
/ "skills"
|
||||
/ "typer"
|
||||
)
|
||||
nested.mkdir(parents=True)
|
||||
(nested / "SKILL.md").write_text("---\nname: typer\n---\n", encoding="utf-8")
|
||||
|
||||
node_module = (
|
||||
tmp_path
|
||||
/ "web-skill"
|
||||
/ "node_modules"
|
||||
/ "dep"
|
||||
/ ".agents"
|
||||
/ "skills"
|
||||
/ "dep"
|
||||
)
|
||||
node_module.mkdir(parents=True)
|
||||
(node_module / "SKILL.md").write_text("---\nname: dep\n---\n", encoding="utf-8")
|
||||
|
||||
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
|
||||
|
||||
assert found == [real / "SKILL.md"]
|
||||
|
||||
@@ -46,6 +46,26 @@ class TestChatCompletionsBasic:
|
||||
assert "codex_reasoning_items" in msgs[0]
|
||||
assert "codex_message_items" in msgs[0]
|
||||
|
||||
def test_convert_messages_strips_tool_name(self, transport):
|
||||
"""Internal `tool_name` (used for FTS indexing in the SQLite store) is
|
||||
not part of the OpenAI Chat Completions schema. Strict providers like
|
||||
Moonshot/Kimi reject it with HTTP 400 'Extra inputs are not permitted'.
|
||||
"""
|
||||
msgs = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{"role": "assistant", "content": None,
|
||||
"tool_calls": [{"id": "call_1", "type": "function",
|
||||
"function": {"name": "execute_code", "arguments": "{}"}}]},
|
||||
{"role": "tool", "tool_call_id": "call_1", "tool_name": "execute_code",
|
||||
"content": "result"},
|
||||
]
|
||||
result = transport.convert_messages(msgs)
|
||||
assert "tool_name" not in result[2]
|
||||
assert result[2]["content"] == "result"
|
||||
assert result[2]["tool_call_id"] == "call_1"
|
||||
# Original list untouched (deepcopy-on-demand)
|
||||
assert msgs[2]["tool_name"] == "execute_code"
|
||||
|
||||
|
||||
class TestChatCompletionsBuildKwargs:
|
||||
|
||||
|
||||
@@ -196,14 +196,13 @@ class TestCodexBuildKwargs:
|
||||
)
|
||||
# xAI Responses receives reasoning.effort on the allowlisted models.
|
||||
assert kw.get("reasoning") == {"effort": "high"}
|
||||
# As of May 2026 we deliberately do NOT request
|
||||
# reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
|
||||
# surface rejects replayed encrypted reasoning items on turn 2+
|
||||
# (the multi-turn "Expected to have received response.created
|
||||
# before error" failure). Grok still reasons natively each turn;
|
||||
# we just don't try to thread the prior turn's encrypted blob back
|
||||
# in. See tests/run_agent/test_codex_xai_oauth_recovery.py.
|
||||
assert "reasoning.encrypted_content" not in kw.get("include", [])
|
||||
# As of May 2026 (post-revert of PR #26644) we DO request
|
||||
# reasoning.encrypted_content back from xAI so we can replay it
|
||||
# across turns for cross-turn coherence — xAI explicitly relies
|
||||
# on this for their partnership integration. See
|
||||
# tests/run_agent/test_codex_xai_oauth_recovery.py for the
|
||||
# full history.
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
|
||||
def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
@@ -229,9 +228,9 @@ class TestCodexBuildKwargs:
|
||||
# api.x.ai 400s with "Model X does not support parameter reasoningEffort"
|
||||
# on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
|
||||
# Those models reason natively but don't expose the dial. The transport
|
||||
# must omit the `reasoning` key for them. As of May 2026 we also no
|
||||
# longer request ``reasoning.encrypted_content`` back from xAI on ANY
|
||||
# model — see test_xai_reasoning_effort_passed for the rationale.
|
||||
# must omit the `reasoning` key for them. As of May 2026 we DO request
|
||||
# ``reasoning.encrypted_content`` back from xAI on every model —
|
||||
# see test_xai_reasoning_effort_passed for the rationale.
|
||||
|
||||
def test_xai_grok_4_omits_reasoning_effort(self, transport):
|
||||
"""grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
|
||||
@@ -245,9 +244,9 @@ class TestCodexBuildKwargs:
|
||||
assert "reasoning" not in kw, (
|
||||
f"{model} must not receive a reasoning key (xAI rejects it)"
|
||||
)
|
||||
# We no longer ask xAI for encrypted_content back (see comment
|
||||
# above) — verify the include list is empty.
|
||||
assert "reasoning.encrypted_content" not in kw.get("include", [])
|
||||
# Even without the effort dial we still ask xAI to echo back
|
||||
# encrypted reasoning content so it can be replayed next turn.
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
|
||||
def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
|
||||
"""grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""
|
||||
|
||||
@@ -160,30 +160,6 @@ class TestBranchCommandCLI:
|
||||
assert agent.reset_session_state.called
|
||||
assert agent._last_flushed_db_idx == 4 # len(conversation_history)
|
||||
|
||||
def test_branch_updates_agent_session_log_file(self, cli_instance, session_db, tmp_path):
|
||||
"""Branching must redirect the agent's session_log_file to the new session's path."""
|
||||
from cli import HermesCLI
|
||||
from pathlib import Path
|
||||
|
||||
logs_dir = tmp_path / "sessions"
|
||||
logs_dir.mkdir()
|
||||
|
||||
agent = MagicMock()
|
||||
agent._last_flushed_db_idx = 0
|
||||
agent.logs_dir = logs_dir
|
||||
agent.session_log_file = logs_dir / f"session_{cli_instance.session_id}.json"
|
||||
cli_instance.agent = agent
|
||||
|
||||
old_log_file = agent.session_log_file
|
||||
HermesCLI._handle_branch_command(cli_instance, "/branch")
|
||||
|
||||
new_session_id = cli_instance.session_id
|
||||
expected_log = logs_dir / f"session_{new_session_id}.json"
|
||||
assert agent.session_log_file == expected_log, (
|
||||
"session_log_file must point to the branch session, not the original"
|
||||
)
|
||||
assert agent.session_log_file != old_log_file
|
||||
|
||||
def test_branch_sets_resumed_flag(self, cli_instance, session_db):
|
||||
"""Branch should set _resumed=True to prevent auto-title generation."""
|
||||
from cli import HermesCLI
|
||||
|
||||
+34
-184
@@ -20,12 +20,9 @@ test runner at ``scripts/run_tests.sh``.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
@@ -37,6 +34,22 @@ if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
|
||||
# ── Per-file process isolation ──────────────────────────────────────────────
|
||||
# Tests run via ``scripts/run_tests_parallel.py``, which spawns a fresh
|
||||
# ``python -m pytest <file>`` subprocess per test file. Cross-file state
|
||||
# leakage (module-level dicts, ContextVars, caches) is impossible: each
|
||||
# file gets a clean Python interpreter. Intra-file ordering is the test
|
||||
# author's responsibility — if test A in foo.py mutates state that test B
|
||||
# in foo.py reads, that's a real bug to fix in the file (it would also
|
||||
# bite anyone running ``pytest tests/foo.py`` directly).
|
||||
#
|
||||
# This replaces the historic _reset_module_state autouse fixture (manual
|
||||
# state clearing) and the brief experiment with subprocess-per-test
|
||||
# isolation (too slow at ~17k tests).
|
||||
#
|
||||
# See ``scripts/run_tests_parallel.py`` for the runner.
|
||||
|
||||
|
||||
# ── Credential env-var filter ──────────────────────────────────────────────
|
||||
#
|
||||
# Any env var in the current process matching ONE of these patterns is
|
||||
@@ -279,7 +292,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
|
||||
"WECOM_HOME_CHANNEL_NAME",
|
||||
# Platform gating — set by load_gateway_config() as a side effect when
|
||||
# a config.yaml is present, so individual test bodies that call the
|
||||
# loader leak these values into later tests on the same xdist worker.
|
||||
# loader leak these values into later tests in the same process.
|
||||
# Force-clear on every test setup so the leak can't happen.
|
||||
"SLACK_REQUIRE_MENTION",
|
||||
"SLACK_STRICT_MENTION",
|
||||
@@ -368,144 +381,21 @@ def _isolate_hermes_home(_hermetic_environment):
|
||||
return None
|
||||
|
||||
|
||||
# ── Module-level state reset ───────────────────────────────────────────────
|
||||
# ── Module-level state reset — replaced by per-file process isolation ──────
|
||||
#
|
||||
# Python modules are singletons per process, and pytest-xdist workers are
|
||||
# long-lived. Module-level dicts/sets (tool registries, approval state,
|
||||
# interrupt flags) and ContextVars persist across tests in the same worker,
|
||||
# causing tests that pass alone to fail when run with siblings.
|
||||
# Each test FILE runs in a freshly-spawned ``python -m pytest <file>``
|
||||
# subprocess via ``scripts/run_tests_parallel.py``, so module-level dicts /
|
||||
# sets / ContextVars from tests in one file cannot leak into tests in
|
||||
# another file. No manual per-module clearing needed.
|
||||
#
|
||||
# Each entry in this fixture clears state that belongs to a specific module.
|
||||
# New state buckets go here too — this is the single gate that prevents
|
||||
# "works alone, flakes in CI" bugs from state leakage.
|
||||
# Within a single file, ordering is the author's responsibility. If your
|
||||
# tests in the same file share mutable state, either reset it explicitly
|
||||
# in a fixture or split them across files.
|
||||
#
|
||||
# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
|
||||
# this closes; the running example was `test_command_guards` failing 12/15
|
||||
# CI runs because ``tools.approval._session_approved`` carried approvals
|
||||
# from one test's session into another's.
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_module_state():
|
||||
"""Clear module-level mutable state and ContextVars between tests.
|
||||
|
||||
Keeps state from leaking across tests on the same xdist worker. Modules
|
||||
that don't exist yet (test collection before production import) are
|
||||
skipped silently — production import later creates fresh empty state.
|
||||
"""
|
||||
# --- logging — quiet/one-shot paths mutate process-global logger state ---
|
||||
logging.disable(logging.NOTSET)
|
||||
for _logger_name in ("tools", "run_agent", "trajectory_compressor", "cron", "hermes_cli"):
|
||||
_logger = logging.getLogger(_logger_name)
|
||||
_logger.disabled = False
|
||||
_logger.setLevel(logging.NOTSET)
|
||||
_logger.propagate = True
|
||||
|
||||
# --- tools.approval — the single biggest source of cross-test pollution ---
|
||||
try:
|
||||
from tools import approval as _approval_mod
|
||||
_approval_mod._session_approved.clear()
|
||||
_approval_mod._session_yolo.clear()
|
||||
_approval_mod._permanent_approved.clear()
|
||||
_approval_mod._pending.clear()
|
||||
_approval_mod._gateway_queues.clear()
|
||||
_approval_mod._gateway_notify_cbs.clear()
|
||||
# ContextVar: reset to empty string so get_current_session_key()
|
||||
# falls through to the env var / default path, matching a fresh
|
||||
# process.
|
||||
_approval_mod._approval_session_key.set("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.interrupt — per-thread interrupt flag set ---
|
||||
try:
|
||||
from tools import interrupt as _interrupt_mod
|
||||
with _interrupt_mod._lock:
|
||||
_interrupt_mod._interrupted_threads.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- gateway.session_context — 9 ContextVars that represent
|
||||
# the active gateway session. If set in one test and not reset,
|
||||
# the next test's get_session_env() reads stale values.
|
||||
try:
|
||||
from gateway import session_context as _sc_mod
|
||||
for _cv in (
|
||||
_sc_mod._SESSION_PLATFORM,
|
||||
_sc_mod._SESSION_CHAT_ID,
|
||||
_sc_mod._SESSION_CHAT_NAME,
|
||||
_sc_mod._SESSION_THREAD_ID,
|
||||
_sc_mod._SESSION_USER_ID,
|
||||
_sc_mod._SESSION_USER_NAME,
|
||||
_sc_mod._SESSION_KEY,
|
||||
_sc_mod._CRON_AUTO_DELIVER_PLATFORM,
|
||||
_sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
|
||||
_sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
|
||||
):
|
||||
_cv.set(_sc_mod._UNSET)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.env_passthrough — ContextVar<set[str]> with no default ---
|
||||
# LookupError is normal if the test never set it. Setting it to an
|
||||
# empty set unconditionally normalizes the starting state.
|
||||
try:
|
||||
from tools import env_passthrough as _envp_mod
|
||||
_envp_mod._allowed_env_vars_var.set(set())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.terminal_tool — active environment/cwd cache ---
|
||||
# File tools prefer a live terminal cwd when one is cached for the task.
|
||||
# Clear terminal environments between tests so a prior terminal call can't
|
||||
# override TERMINAL_CWD in path-resolution tests.
|
||||
try:
|
||||
from tools import terminal_tool as _term_mod
|
||||
_envs_to_cleanup = []
|
||||
with _term_mod._env_lock:
|
||||
_envs_to_cleanup = list(_term_mod._active_environments.values())
|
||||
_term_mod._active_environments.clear()
|
||||
_term_mod._last_activity.clear()
|
||||
_term_mod._creation_locks.clear()
|
||||
for _env in _envs_to_cleanup:
|
||||
try:
|
||||
_env.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.credential_files — ContextVar<dict> ---
|
||||
try:
|
||||
from tools import credential_files as _credf_mod
|
||||
_credf_mod._registered_files_var.set({})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- agent.auxiliary_client — runtime main provider/model override and
|
||||
# payment-error health cache. Both are process-global in production;
|
||||
# reset them per test so one worker's fallback/402 test does not make
|
||||
# later auxiliary-client tests skip otherwise-available providers.
|
||||
try:
|
||||
from agent import auxiliary_client as _aux_mod
|
||||
_aux_mod.clear_runtime_main()
|
||||
_aux_mod._reset_aux_unhealthy_cache()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.file_tools — per-task read history + file-ops cache ---
|
||||
# _read_tracker accumulates per-task_id read history for loop detection,
|
||||
# capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
|
||||
# cap is hit faster than expected and capacity-related tests flake.
|
||||
try:
|
||||
from tools import file_tools as _ft_mod
|
||||
with _ft_mod._read_tracker_lock:
|
||||
_ft_mod._read_tracker.clear()
|
||||
with _ft_mod._file_ops_lock:
|
||||
_ft_mod._file_ops_cache.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
yield
|
||||
# The skill ``test-suite-cascade-diagnosis`` documents the cascade patterns
|
||||
# this replaces; the running example was ``test_command_guards`` failing
|
||||
# 12/15 CI runs because ``tools.approval._session_approved`` carried
|
||||
# approvals from one test's session into another's.
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@@ -532,13 +422,12 @@ def mock_config():
|
||||
}
|
||||
|
||||
|
||||
# ── Global test timeout ─────────────────────────────────────────────────────
|
||||
# Kill any individual test that takes longer than 30 seconds.
|
||||
# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the
|
||||
# entire test suite.
|
||||
# ── Per-test timeout — handled by the isolation plugin ─────────────────────
|
||||
#
|
||||
# The subprocess-per-test plugin enforces the configured ``isolate_timeout``
|
||||
# ini key by terminating the child if it overruns. The old SIGALRM-based
|
||||
# fixture (POSIX-only, didn't work on Windows) is gone.
|
||||
|
||||
def _timeout_handler(signum, frame):
|
||||
raise TimeoutError("Test exceeded 30 second timeout")
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_current_event_loop(request):
|
||||
@@ -584,45 +473,6 @@ def _ensure_current_event_loop(request):
|
||||
asyncio.set_event_loop(None)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _enforce_test_timeout():
|
||||
"""Kill any individual test that takes longer than 30 seconds.
|
||||
SIGALRM is Unix-only; skip on Windows."""
|
||||
if sys.platform == "win32":
|
||||
yield
|
||||
return
|
||||
old = signal.signal(signal.SIGALRM, _timeout_handler)
|
||||
signal.alarm(30)
|
||||
yield
|
||||
signal.alarm(0)
|
||||
signal.signal(signal.SIGALRM, old)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_tool_registry_caches():
|
||||
"""Clear tool-registry-level caches between tests.
|
||||
|
||||
The production registry caches ``check_fn()`` results for 30 s
|
||||
(see tools/registry.py) and :func:`get_tool_definitions` memoizes
|
||||
its result (see model_tools.py). Both are keyed on state that tests
|
||||
routinely mutate (env vars, registry._generation, config.yaml mtime)
|
||||
— but a stale result from test A can still be served to test B
|
||||
because 30 s covers the entire suite, and xdist worker reuse means
|
||||
one test's cache lands in another's process. Clearing before every
|
||||
test keeps hermetic behavior.
|
||||
"""
|
||||
try:
|
||||
from tools.registry import invalidate_check_fn_cache
|
||||
invalidate_check_fn_cache()
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
from model_tools import _clear_tool_defs_cache
|
||||
_clear_tool_defs_cache()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# ── Live-system guard ──────────────────────────────────────────────────────
|
||||
#
|
||||
# Several test files exercise the gateway-restart / kill code paths
|
||||
|
||||
@@ -74,7 +74,6 @@ class _Codex401ThenSuccessAgent(run_agent.AIAgent):
|
||||
self._cleanup_task_resources = lambda task_id: None
|
||||
self._persist_session = lambda messages, history=None: None
|
||||
self._save_trajectory = lambda messages, user_message, completed: None
|
||||
self._save_session_log = lambda messages: None
|
||||
|
||||
def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
|
||||
type(self).refresh_attempts += 1
|
||||
|
||||
@@ -2510,3 +2510,26 @@ class TestSendMediaTimeoutCancelsFuture:
|
||||
# 2. Second file still got dispatched — one timeout doesn't abort the batch
|
||||
adapter.send_video.assert_called_once()
|
||||
assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"
|
||||
|
||||
|
||||
class TestHomeTargetEnvVarRegistry:
|
||||
"""Regression: ``_HOME_TARGET_ENV_VARS`` must include every gateway
|
||||
platform that supports cron-driven outbound delivery. Missing an
|
||||
entry means ``hermes cron create --deliver=<platform>`` silently
|
||||
fails to route through the platform's home channel."""
|
||||
|
||||
def test_whatsapp_cloud_registered(self):
|
||||
"""``deliver=whatsapp_cloud`` routes through
|
||||
WHATSAPP_CLOUD_HOME_CHANNEL — added alongside the existing
|
||||
``whatsapp`` Baileys entry."""
|
||||
from cron.scheduler import _HOME_TARGET_ENV_VARS
|
||||
|
||||
assert "whatsapp_cloud" in _HOME_TARGET_ENV_VARS
|
||||
assert _HOME_TARGET_ENV_VARS["whatsapp_cloud"] == "WHATSAPP_CLOUD_HOME_CHANNEL"
|
||||
|
||||
def test_baileys_whatsapp_still_registered(self):
|
||||
"""Sanity guard: the Cloud addition didn't disturb Baileys
|
||||
whatsapp routing."""
|
||||
from cron.scheduler import _HOME_TARGET_ENV_VARS
|
||||
|
||||
assert _HOME_TARGET_ENV_VARS.get("whatsapp") == "WHATSAPP_HOME_CHANNEL"
|
||||
|
||||
+116
-17
@@ -313,19 +313,30 @@ def _scan_for_plugin_adapter_antipattern(source: str) -> list[str]:
|
||||
return offenses
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Reject plugin-adapter tests that use the sys.path anti-pattern.
|
||||
def _fingerprint_gateway_tests() -> str:
|
||||
"""Return a short fingerprint that changes when any gateway test file changes.
|
||||
|
||||
Runs once per pytest session on the controller, BEFORE any xdist
|
||||
worker is spawned. If any file under ``tests/gateway/`` matches the
|
||||
anti-pattern, we fail the whole session with a clear message —
|
||||
before a polluted ``sys.path`` can cascade across workers.
|
||||
Uses (mtime, size) pairs instead of content hashing — fast to compute
|
||||
(stat-only, no reads) and sufficient for cache invalidation across
|
||||
per-file subprocess runs.
|
||||
"""
|
||||
# Only run on the xdist controller (or in non-xdist runs). Skip on
|
||||
# worker subprocesses so we don't scan the filesystem N times.
|
||||
if hasattr(config, "workerinput"):
|
||||
return
|
||||
import hashlib
|
||||
|
||||
h = hashlib.sha256()
|
||||
for path in sorted(_GATEWAY_DIR.rglob("test_*.py")):
|
||||
try:
|
||||
st = path.stat()
|
||||
h.update(f"{path.name}:{st.st_mtime_ns}:{st.st_size}".encode())
|
||||
except OSError:
|
||||
h.update(f"{path.name}:missing".encode())
|
||||
return h.hexdigest()[:16]
|
||||
|
||||
|
||||
def _run_adapter_antipattern_scan() -> list[str]:
|
||||
"""Scan gateway test files for the plugin-adapter anti-pattern.
|
||||
|
||||
Returns a list of violation strings (empty if clean).
|
||||
"""
|
||||
violations: list[str] = []
|
||||
for path in _GATEWAY_DIR.rglob("test_*.py"):
|
||||
if path.name in {"_plugin_adapter_loader.py", "conftest.py"}:
|
||||
@@ -334,20 +345,108 @@ def pytest_configure(config):
|
||||
source = path.read_text(encoding="utf-8")
|
||||
except OSError:
|
||||
continue
|
||||
# Fast string pre-filter: skip files that can't possibly violate.
|
||||
# A violating file MUST contain both (a) an adapter/plugins/platforms
|
||||
# reference AND (b) either sys.path manipulation or a bare adapter import.
|
||||
if "adapter" not in source and "plugins/platforms" not in source:
|
||||
continue
|
||||
if not (
|
||||
"sys.path" in source
|
||||
or "import adapter" in source
|
||||
or "from adapter import" in source
|
||||
):
|
||||
continue
|
||||
offenses = _scan_for_plugin_adapter_antipattern(source)
|
||||
if offenses:
|
||||
violations.append(
|
||||
f" {path.relative_to(_GATEWAY_DIR.parent.parent)}:\n "
|
||||
+ "\n ".join(offenses)
|
||||
)
|
||||
return violations
|
||||
|
||||
if violations:
|
||||
raise pytest.UsageError(
|
||||
"Plugin-adapter-import anti-pattern detected in gateway tests:\n"
|
||||
+ "\n".join(violations)
|
||||
+ "\n\n"
|
||||
+ _GUARD_HINT
|
||||
)
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Reject plugin-adapter tests that use the sys.path anti-pattern.
|
||||
|
||||
Runs once per pytest session on the controller, BEFORE any xdist
|
||||
worker is spawned. If any file under ``tests/gateway/`` matches the
|
||||
anti-pattern, we fail the whole session with a clear message —
|
||||
before a polluted ``sys.path`` can cascade across workers.
|
||||
|
||||
**Performance**: in the per-file subprocess isolation model (no xdist),
|
||||
every subprocess is a "controller" — so the naive scan would run 257
|
||||
times, each costing ~1s of AST walking. We avoid this with two
|
||||
strategies:
|
||||
|
||||
1. **Tight string pre-filter**: a file can only violate if it contains
|
||||
*both* an adapter/plugins/platforms reference *and* a sys.path
|
||||
manipulation or bare ``import adapter``. This drops ~95% of files
|
||||
from needing AST parsing.
|
||||
2. **File-locked cache**: the scan result is cached in
|
||||
``.pytest-cache/gw-adapter-guard-<fingerprint>`` keyed on a
|
||||
fingerprint of the gateway test file mtimes/sizes. Concurrent
|
||||
subprocesses acquire a lock; only the first performs the scan;
|
||||
the rest wait and read the cached result.
|
||||
"""
|
||||
# Only run on the xdist controller (or in non-xdist runs). Skip on
|
||||
# worker subprocesses so we don't scan the filesystem N times.
|
||||
if hasattr(config, "workerinput"):
|
||||
return
|
||||
|
||||
fp = _fingerprint_gateway_tests()
|
||||
cache_dir = Path.cwd() / ".pytest-cache"
|
||||
cache_file = cache_dir / f"gw-adapter-guard-{fp}"
|
||||
lock_file = cache_dir / f".gw-adapter-guard-{fp}.lock"
|
||||
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Evict stale cache entries from previous fingerprints (best-effort).
|
||||
try:
|
||||
for old in cache_dir.glob("gw-adapter-guard-*"):
|
||||
if old.name != f"gw-adapter-guard-{fp}":
|
||||
old.unlink(missing_ok=True)
|
||||
for old in cache_dir.glob(".gw-adapter-guard-*.lock"):
|
||||
if old.name != f".gw-adapter-guard-{fp}.lock":
|
||||
old.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass # Non-critical; old files are harmless.
|
||||
|
||||
# Use filelock to ensure only one process scans at a time.
|
||||
# Concurrent subprocesses all hit pytest_configure simultaneously;
|
||||
# without a lock they'd all find no cache and all run the scan.
|
||||
try:
|
||||
from filelock import FileLock
|
||||
lock = FileLock(str(lock_file), timeout=120)
|
||||
except ImportError:
|
||||
# Fallback: no locking (still correct, just slower under contention).
|
||||
import contextlib
|
||||
|
||||
class _NoLock:
|
||||
def __enter__(self):
|
||||
return self
|
||||
def __exit__(self, *a):
|
||||
pass
|
||||
lock = _NoLock()
|
||||
|
||||
with lock:
|
||||
if cache_file.exists():
|
||||
cached = cache_file.read_text(encoding="utf-8")
|
||||
if cached == "clean":
|
||||
return
|
||||
raise pytest.UsageError(cached)
|
||||
|
||||
# Slow path: this process is the first to acquire the lock.
|
||||
violations = _run_adapter_antipattern_scan()
|
||||
|
||||
if violations:
|
||||
msg = (
|
||||
"Plugin-adapter-import anti-pattern detected in gateway tests:\n"
|
||||
+ "\n".join(violations)
|
||||
+ "\n\n"
|
||||
+ _GUARD_HINT
|
||||
)
|
||||
cache_file.write_text(msg, encoding="utf-8")
|
||||
raise pytest.UsageError(msg)
|
||||
else:
|
||||
cache_file.write_text("clean", encoding="utf-8")
|
||||
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
"""Yuanbao recall: branch A1 (exact id) and A2 (content-match) against DB-only transcripts.
|
||||
|
||||
state.db persists the platform-side ``message_id`` via the
|
||||
``platform_message_id`` column (added in the salvage of PR #29211) and
|
||||
``load_transcript`` surfaces it back on each message dict as ``message_id``
|
||||
— so the recall guard's exact-id match path stays canonical even with the
|
||||
JSONL file gone. When a row has no platform id (e.g. agent-processed
|
||||
@bot messages whose adapter didn't carry a msg_id, or pre-column legacy
|
||||
rows), recall falls through to content-match.
|
||||
"""
|
||||
from gateway.session import SessionStore
|
||||
from gateway.config import GatewayConfig
|
||||
|
||||
|
||||
def _pin_db(monkeypatch, tmp_path):
|
||||
"""Force SessionDB() to write into tmp_path instead of the real ~/.hermes."""
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
|
||||
|
||||
def test_recall_branch_a1_exact_id_match_round_trips_through_db(tmp_path, monkeypatch):
|
||||
"""A user message persisted with ``message_id`` must round-trip through
|
||||
state.db so recall can find and redact it by exact id (branch A1)."""
|
||||
_pin_db(monkeypatch, tmp_path)
|
||||
|
||||
config = GatewayConfig()
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
|
||||
sid = "test-yuanbao-recall-a1"
|
||||
store._db.create_session(session_id=sid, source="yuanbao:group:G")
|
||||
store.append_to_transcript(sid, {
|
||||
"role": "user",
|
||||
"content": "sensitive content",
|
||||
"timestamp": 1.0,
|
||||
"message_id": "platform-msg-abc",
|
||||
})
|
||||
store.append_to_transcript(sid, {
|
||||
"role": "assistant",
|
||||
"content": "ack",
|
||||
"timestamp": 2.0,
|
||||
})
|
||||
|
||||
history = store.load_transcript(sid)
|
||||
# The user row must carry its platform id back so the recall guard can
|
||||
# match by exact id; the assistant row had no platform id so it should
|
||||
# not gain one spuriously.
|
||||
user_msg = next(m for m in history if m["role"] == "user")
|
||||
assistant_msg = next(m for m in history if m["role"] == "assistant")
|
||||
assert user_msg.get("message_id") == "platform-msg-abc"
|
||||
assert "message_id" not in assistant_msg
|
||||
|
||||
# Branch A1: locate the row by exact platform id — no content heuristics.
|
||||
target = next(
|
||||
(m for m in history if m.get("message_id") == "platform-msg-abc"),
|
||||
None,
|
||||
)
|
||||
assert target is not None
|
||||
assert target["content"] == "sensitive content"
|
||||
|
||||
|
||||
def test_recall_branch_a2_content_match_when_no_platform_id(tmp_path, monkeypatch):
|
||||
"""Rows that lack a platform_message_id (e.g. agent-processed @bot
|
||||
messages) still match by content as a fallback."""
|
||||
_pin_db(monkeypatch, tmp_path)
|
||||
|
||||
config = GatewayConfig()
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
|
||||
sid = "test-yuanbao-recall-a2"
|
||||
store._db.create_session(session_id=sid, source="yuanbao:group:G")
|
||||
# No message_id on the dict — simulates an agent-processed message
|
||||
# that did not carry the platform msg_id through.
|
||||
store.append_to_transcript(sid, {
|
||||
"role": "user",
|
||||
"content": "sensitive content",
|
||||
"timestamp": 1.0,
|
||||
})
|
||||
|
||||
history = store.load_transcript(sid)
|
||||
assert all("message_id" not in m for m in history)
|
||||
|
||||
# Branch A2: content match recovers the target.
|
||||
target = next(
|
||||
(m for m in history
|
||||
if m.get("role") == "user" and m.get("content") == "sensitive content"),
|
||||
None,
|
||||
)
|
||||
assert target is not None
|
||||
@@ -206,9 +206,23 @@ class TestPlatformDefaults:
|
||||
"""Signal, BlueBubbles, etc. default to 'off' tool progress."""
|
||||
from gateway.display_config import resolve_display_setting
|
||||
|
||||
for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk"):
|
||||
for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk", "whatsapp_cloud"):
|
||||
assert resolve_display_setting({}, plat, "tool_progress") == "off", plat
|
||||
|
||||
def test_whatsapp_cloud_locked_to_low_tier_until_edit_message_lands(self):
|
||||
"""Regression guard: ``whatsapp_cloud`` must stay TIER_LOW until the
|
||||
adapter implements edit_message. Without an edit endpoint, raising
|
||||
the tier to MEDIUM would spam separate WhatsApp messages for every
|
||||
tool-progress update, which is the exact failure mode this entry
|
||||
exists to avoid.
|
||||
|
||||
When/if Cloud's edit_message lands, update _PLATFORM_DEFAULTS to
|
||||
TIER_MEDIUM and update this test to assert ``"new"`` accordingly.
|
||||
"""
|
||||
from gateway.display_config import resolve_display_setting
|
||||
assert resolve_display_setting({}, "whatsapp_cloud", "tool_progress") == "off"
|
||||
assert resolve_display_setting({}, "whatsapp_cloud", "streaming") is False
|
||||
|
||||
def test_minimal_tier_platforms(self):
|
||||
"""Email, SMS, webhook default to 'off' tool progress."""
|
||||
from gateway.display_config import resolve_display_setting
|
||||
|
||||
@@ -22,19 +22,26 @@ from gateway.config import PlatformConfig
|
||||
|
||||
|
||||
def _ensure_telegram_mock():
|
||||
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
|
||||
return
|
||||
|
||||
telegram_mod = MagicMock()
|
||||
telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
|
||||
telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
|
||||
telegram_mod.constants.ChatType.GROUP = "group"
|
||||
telegram_mod.constants.ChatType.SUPERGROUP = "supergroup"
|
||||
telegram_mod.constants.ChatType.CHANNEL = "channel"
|
||||
telegram_mod.constants.ChatType.PRIVATE = "private"
|
||||
|
||||
for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
|
||||
sys.modules.setdefault(name, telegram_mod)
|
||||
# Register telegram.constants as a separate module mock so that
|
||||
# ``from telegram.constants import ChatType`` resolves to our mock
|
||||
# with string-valued members (not auto-generated MagicMocks).
|
||||
constants_mod = MagicMock()
|
||||
constants_mod.ParseMode.MARKDOWN_V2 = "MarkdownV2"
|
||||
constants_mod.ChatType.GROUP = "group"
|
||||
constants_mod.ChatType.SUPERGROUP = "supergroup"
|
||||
constants_mod.ChatType.CHANNEL = "channel"
|
||||
constants_mod.ChatType.PRIVATE = "private"
|
||||
|
||||
sys.modules["telegram"] = telegram_mod
|
||||
sys.modules["telegram.ext"] = telegram_mod.ext
|
||||
sys.modules["telegram.constants"] = constants_mod
|
||||
sys.modules["telegram.request"] = telegram_mod.request
|
||||
|
||||
# Force reimport so the adapter picks up the mock ChatType.
|
||||
sys.modules.pop("gateway.platforms.telegram", None)
|
||||
|
||||
|
||||
_ensure_telegram_mock()
|
||||
|
||||
@@ -22,6 +22,11 @@ import pytest
|
||||
|
||||
from gateway.config import Platform, PlatformConfig, load_gateway_config
|
||||
|
||||
# Platform uses _missing_() for dynamic members, so "google_chat" is
|
||||
# resolvable via Platform("google_chat") even without a static
|
||||
# GOOGLE_CHAT attribute on the enum class.
|
||||
_GC = Platform("google_chat")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mock the google-* packages if they are not installed
|
||||
@@ -229,7 +234,7 @@ def _make_chat_envelope(text="hello", sender_email="u@example.com", sender_type=
|
||||
|
||||
class TestPlatformRegistration:
|
||||
def test_enum_value(self):
|
||||
assert Platform.GOOGLE_CHAT.value == "google_chat"
|
||||
assert _GC.value == "google_chat"
|
||||
|
||||
def test_requirements_check_returns_true_when_available(self):
|
||||
# The shim flag is True in this test module.
|
||||
@@ -266,14 +271,14 @@ class TestEnvConfigLoading:
|
||||
monkeypatch.setenv("GOOGLE_CHAT_PROJECT_ID", "p")
|
||||
# No subscription.
|
||||
cfg = load_gateway_config()
|
||||
assert Platform.GOOGLE_CHAT not in cfg.platforms
|
||||
assert _GC not in cfg.platforms
|
||||
|
||||
def test_missing_project_does_not_enable(self, monkeypatch):
|
||||
self._clean_env(monkeypatch)
|
||||
monkeypatch.setenv("GOOGLE_CHAT_SUBSCRIPTION_NAME",
|
||||
"projects/p/subscriptions/s")
|
||||
cfg = load_gateway_config()
|
||||
assert Platform.GOOGLE_CHAT not in cfg.platforms
|
||||
assert _GC not in cfg.platforms
|
||||
|
||||
|
||||
|
||||
@@ -2583,7 +2588,7 @@ class TestAuthorizationEmailMatch:
|
||||
runner.pairing_store.is_approved = MagicMock(return_value=False)
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.GOOGLE_CHAT,
|
||||
platform=_GC,
|
||||
chat_id="spaces/S",
|
||||
chat_type="dm",
|
||||
user_id="alice@example.com", # post-swap: email is canonical
|
||||
@@ -2604,7 +2609,7 @@ class TestAuthorizationEmailMatch:
|
||||
runner.pairing_store.is_approved = MagicMock(return_value=False)
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.GOOGLE_CHAT,
|
||||
platform=_GC,
|
||||
chat_id="spaces/S",
|
||||
chat_type="dm",
|
||||
user_id="bob@example.com",
|
||||
@@ -2630,7 +2635,7 @@ class TestAuthorizationEmailMatch:
|
||||
runner.pairing_store.is_approved = MagicMock(return_value=False)
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.GOOGLE_CHAT,
|
||||
platform=_GC,
|
||||
chat_id="spaces/S",
|
||||
chat_type="dm",
|
||||
user_id="users/77777", # no email available — resource name wins
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
"""Verify load_transcript returns SQLite messages without any JSONL file."""
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.session import SessionStore
|
||||
from gateway.config import GatewayConfig
|
||||
|
||||
|
||||
def test_load_transcript_returns_db_messages_when_no_jsonl(tmp_path, monkeypatch):
|
||||
"""Reading a transcript must work from SQLite alone — no JSONL fallback needed.
|
||||
|
||||
Pin DEFAULT_DB_PATH to tmp_path so this test cannot write to the real
|
||||
~/.hermes/state.db. (DEFAULT_DB_PATH is a module-level constant computed
|
||||
at hermes_state import time, before pytest's HERMES_HOME monkeypatch
|
||||
fires — the autouse fixture's HERMES_HOME override doesn't help here.)
|
||||
"""
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
|
||||
config = GatewayConfig()
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
|
||||
sid = "test-session-db-only"
|
||||
store._db.create_session(session_id=sid, source="test")
|
||||
store.append_to_transcript(sid, {"role": "user", "content": "hello", "timestamp": 1.0})
|
||||
store.append_to_transcript(sid, {"role": "assistant", "content": "world", "timestamp": 2.0})
|
||||
|
||||
history = store.load_transcript(sid)
|
||||
assert len(history) == 2
|
||||
assert history[0]["content"] == "hello"
|
||||
assert history[1]["content"] == "world"
|
||||
@@ -8,7 +8,6 @@ import gateway.mirror as mirror_mod
|
||||
from gateway.mirror import (
|
||||
mirror_to_session,
|
||||
_find_session_id,
|
||||
_append_to_jsonl,
|
||||
)
|
||||
|
||||
|
||||
@@ -152,33 +151,6 @@ class TestFindSessionId:
|
||||
assert result == "sess_1"
|
||||
|
||||
|
||||
class TestAppendToJsonl:
|
||||
def test_appends_message(self, tmp_path):
|
||||
sessions_dir = tmp_path / "sessions"
|
||||
sessions_dir.mkdir()
|
||||
|
||||
with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir):
|
||||
_append_to_jsonl("sess_1", {"role": "assistant", "content": "Hello"})
|
||||
|
||||
transcript = sessions_dir / "sess_1.jsonl"
|
||||
lines = transcript.read_text().strip().splitlines()
|
||||
assert len(lines) == 1
|
||||
msg = json.loads(lines[0])
|
||||
assert msg["role"] == "assistant"
|
||||
assert msg["content"] == "Hello"
|
||||
|
||||
def test_appends_multiple_messages(self, tmp_path):
|
||||
sessions_dir = tmp_path / "sessions"
|
||||
sessions_dir.mkdir()
|
||||
|
||||
with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir):
|
||||
_append_to_jsonl("sess_1", {"role": "assistant", "content": "msg1"})
|
||||
_append_to_jsonl("sess_1", {"role": "assistant", "content": "msg2"})
|
||||
|
||||
transcript = sessions_dir / "sess_1.jsonl"
|
||||
lines = transcript.read_text().strip().splitlines()
|
||||
assert len(lines) == 2
|
||||
|
||||
|
||||
class TestMirrorToSession:
|
||||
def test_successful_mirror(self, tmp_path):
|
||||
@@ -192,15 +164,16 @@ class TestMirrorToSession:
|
||||
|
||||
with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir), \
|
||||
patch.object(mirror_mod, "_SESSIONS_INDEX", index_file), \
|
||||
patch("gateway.mirror._append_to_sqlite"):
|
||||
patch("gateway.mirror._append_to_sqlite") as mock_sqlite:
|
||||
result = mirror_to_session("telegram", "12345", "Hello!", source_label="cli")
|
||||
|
||||
assert result is True
|
||||
|
||||
# Check JSONL was written
|
||||
transcript = sessions_dir / "sess_abc.jsonl"
|
||||
assert transcript.exists()
|
||||
msg = json.loads(transcript.read_text().strip())
|
||||
# Check SQLite writer was called with the mirror message
|
||||
mock_sqlite.assert_called_once()
|
||||
call_args = mock_sqlite.call_args
|
||||
assert call_args[0][0] == "sess_abc"
|
||||
msg = call_args[0][1]
|
||||
assert msg["content"] == "Hello!"
|
||||
assert msg["role"] == "assistant"
|
||||
assert msg["mirror"] is True
|
||||
@@ -222,12 +195,12 @@ class TestMirrorToSession:
|
||||
|
||||
with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir), \
|
||||
patch.object(mirror_mod, "_SESSIONS_INDEX", index_file), \
|
||||
patch("gateway.mirror._append_to_sqlite"):
|
||||
patch("gateway.mirror._append_to_sqlite") as mock_sqlite:
|
||||
result = mirror_to_session("telegram", "-1001", "Hello topic!", source_label="cron", thread_id="10")
|
||||
|
||||
assert result is True
|
||||
assert (sessions_dir / "sess_topic_a.jsonl").exists()
|
||||
assert not (sessions_dir / "sess_topic_b.jsonl").exists()
|
||||
mock_sqlite.assert_called_once()
|
||||
assert mock_sqlite.call_args[0][0] == "sess_topic_a"
|
||||
|
||||
def test_successful_mirror_uses_user_id_for_group_session(self, tmp_path):
|
||||
sessions_dir, index_file = _setup_sessions(tmp_path, {
|
||||
@@ -245,7 +218,7 @@ class TestMirrorToSession:
|
||||
|
||||
with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir), \
|
||||
patch.object(mirror_mod, "_SESSIONS_INDEX", index_file), \
|
||||
patch("gateway.mirror._append_to_sqlite"):
|
||||
patch("gateway.mirror._append_to_sqlite") as mock_sqlite:
|
||||
result = mirror_to_session(
|
||||
"telegram",
|
||||
"-1001",
|
||||
@@ -255,8 +228,8 @@ class TestMirrorToSession:
|
||||
)
|
||||
|
||||
assert result is True
|
||||
assert (sessions_dir / "sess_alice.jsonl").exists()
|
||||
assert not (sessions_dir / "sess_bob.jsonl").exists()
|
||||
mock_sqlite.assert_called_once()
|
||||
assert mock_sqlite.call_args[0][0] == "sess_alice"
|
||||
|
||||
def test_no_matching_session(self, tmp_path):
|
||||
sessions_dir, index_file = _setup_sessions(tmp_path, {})
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Regression tests for /retry replacement semantics."""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -11,14 +11,17 @@ from gateway.session import SessionStore
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_retry_replaces_last_user_turn_in_transcript(tmp_path):
|
||||
async def test_gateway_retry_replaces_last_user_turn_in_transcript(tmp_path, monkeypatch):
|
||||
# Pin DEFAULT_DB_PATH so SessionDB() doesn't write to the real ~/.hermes/state.db.
|
||||
# (Module-level constant snapshot, see test_load_transcript_db_only.)
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
|
||||
config = GatewayConfig()
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
store._db = None
|
||||
store._loaded = True
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
|
||||
session_id = "retry_session"
|
||||
store._db.create_session(session_id=session_id, source="test")
|
||||
for msg in [
|
||||
{"role": "session_meta", "tools": []},
|
||||
{"role": "user", "content": "first question"},
|
||||
|
||||
+25
-169
@@ -1,6 +1,4 @@
|
||||
"""Tests for gateway session management."""
|
||||
|
||||
import builtins
|
||||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
@@ -503,19 +501,19 @@ class TestSenderPrefixWithBackfill:
|
||||
|
||||
|
||||
class TestSessionStoreRewriteTranscript:
|
||||
"""Regression: /retry and /undo must persist truncated history to disk."""
|
||||
"""Regression: /retry and /undo must persist truncated history to DB."""
|
||||
|
||||
@pytest.fixture()
|
||||
def store(self, tmp_path):
|
||||
def store(self, tmp_path, monkeypatch):
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
config = GatewayConfig()
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None # no SQLite for these tests
|
||||
s._loaded = True
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
return s
|
||||
|
||||
def test_rewrite_replaces_jsonl(self, store, tmp_path):
|
||||
def test_rewrite_replaces_transcript(self, store, tmp_path):
|
||||
session_id = "test_session_1"
|
||||
store._db.create_session(session_id=session_id, source="test")
|
||||
# Write initial transcript
|
||||
for msg in [
|
||||
{"role": "user", "content": "hello"},
|
||||
@@ -538,6 +536,7 @@ class TestSessionStoreRewriteTranscript:
|
||||
|
||||
def test_rewrite_with_empty_list(self, store):
|
||||
session_id = "test_session_2"
|
||||
store._db.create_session(session_id=session_id, source="test")
|
||||
store.append_to_transcript(session_id, {"role": "user", "content": "hi"})
|
||||
|
||||
store.rewrite_transcript(session_id, [])
|
||||
@@ -546,171 +545,28 @@ class TestSessionStoreRewriteTranscript:
|
||||
assert reloaded == []
|
||||
|
||||
|
||||
class TestLoadTranscriptCorruptLines:
|
||||
"""Regression: corrupt JSONL lines (e.g. from mid-write crash) must be
|
||||
skipped instead of crashing the entire transcript load. GH-1193."""
|
||||
class TestLoadTranscriptDBOnly:
|
||||
"""After spec 002, load_transcript reads only from state.db."""
|
||||
|
||||
@pytest.fixture()
|
||||
def store(self, tmp_path):
|
||||
def test_db_only_returns_empty_for_nonexistent(self, tmp_path, monkeypatch):
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
config = GatewayConfig()
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
def test_corrupt_line_skipped(self, store, tmp_path):
|
||||
session_id = "corrupt_test"
|
||||
transcript_path = store.get_transcript_path(session_id)
|
||||
transcript_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(transcript_path, "w") as f:
|
||||
f.write('{"role": "user", "content": "hello"}\n')
|
||||
f.write('{"role": "assistant", "content": "hi th') # truncated
|
||||
f.write("\n")
|
||||
f.write('{"role": "user", "content": "goodbye"}\n')
|
||||
|
||||
messages = store.load_transcript(session_id)
|
||||
assert len(messages) == 2
|
||||
assert messages[0]["content"] == "hello"
|
||||
assert messages[1]["content"] == "goodbye"
|
||||
|
||||
def test_all_lines_corrupt_returns_empty(self, store, tmp_path):
|
||||
session_id = "all_corrupt"
|
||||
transcript_path = store.get_transcript_path(session_id)
|
||||
transcript_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(transcript_path, "w") as f:
|
||||
f.write("not json at all\n")
|
||||
f.write("{truncated\n")
|
||||
|
||||
messages = store.load_transcript(session_id)
|
||||
assert messages == []
|
||||
|
||||
def test_valid_transcript_unaffected(self, store, tmp_path):
|
||||
session_id = "valid_test"
|
||||
store.append_to_transcript(session_id, {"role": "user", "content": "a"})
|
||||
store.append_to_transcript(session_id, {"role": "assistant", "content": "b"})
|
||||
|
||||
messages = store.load_transcript(session_id)
|
||||
assert len(messages) == 2
|
||||
assert messages[0]["content"] == "a"
|
||||
assert messages[1]["content"] == "b"
|
||||
|
||||
|
||||
class TestLoadTranscriptPreferLongerSource:
|
||||
"""Regression: load_transcript must return whichever source (SQLite or JSONL)
|
||||
has more messages to prevent silent truncation. GH-3212."""
|
||||
|
||||
@pytest.fixture()
|
||||
def store_with_db(self, tmp_path):
|
||||
"""SessionStore with both SQLite and JSONL active."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
config = GatewayConfig()
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = SessionDB(db_path=tmp_path / "state.db")
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
def test_jsonl_longer_than_sqlite_returns_jsonl(self, store_with_db):
|
||||
"""Legacy session: JSONL has full history, SQLite has only recent turn."""
|
||||
sid = "legacy_session"
|
||||
store_with_db._db.create_session(session_id=sid, source="gateway", model="m")
|
||||
# JSONL has 10 messages (legacy history — written before SQLite existed)
|
||||
for i in range(10):
|
||||
role = "user" if i % 2 == 0 else "assistant"
|
||||
store_with_db.append_to_transcript(
|
||||
sid, {"role": role, "content": f"msg-{i}"}, skip_db=True,
|
||||
)
|
||||
# SQLite has only 2 messages (recent turn after migration)
|
||||
store_with_db._db.append_message(session_id=sid, role="user", content="new-q")
|
||||
store_with_db._db.append_message(session_id=sid, role="assistant", content="new-a")
|
||||
|
||||
result = store_with_db.load_transcript(sid)
|
||||
assert len(result) == 10
|
||||
assert result[0]["content"] == "msg-0"
|
||||
|
||||
def test_sqlite_longer_than_jsonl_returns_sqlite(self, store_with_db):
|
||||
"""Fully migrated session: SQLite has more (JSONL stopped growing)."""
|
||||
sid = "migrated_session"
|
||||
store_with_db._db.create_session(session_id=sid, source="gateway", model="m")
|
||||
# JSONL has 2 old messages
|
||||
store_with_db.append_to_transcript(
|
||||
sid, {"role": "user", "content": "old-q"}, skip_db=True,
|
||||
)
|
||||
store_with_db.append_to_transcript(
|
||||
sid, {"role": "assistant", "content": "old-a"}, skip_db=True,
|
||||
)
|
||||
# SQLite has 4 messages (superset after migration)
|
||||
for i in range(4):
|
||||
role = "user" if i % 2 == 0 else "assistant"
|
||||
store_with_db._db.append_message(session_id=sid, role=role, content=f"db-{i}")
|
||||
|
||||
result = store_with_db.load_transcript(sid)
|
||||
assert len(result) == 4
|
||||
assert result[0]["content"] == "db-0"
|
||||
|
||||
def test_sqlite_empty_falls_back_to_jsonl(self, store_with_db):
|
||||
"""No SQLite rows — falls back to JSONL (original behavior preserved)."""
|
||||
sid = "no_db_rows"
|
||||
store_with_db.append_to_transcript(
|
||||
sid, {"role": "user", "content": "hello"}, skip_db=True,
|
||||
)
|
||||
store_with_db.append_to_transcript(
|
||||
sid, {"role": "assistant", "content": "hi"}, skip_db=True,
|
||||
)
|
||||
|
||||
result = store_with_db.load_transcript(sid)
|
||||
assert len(result) == 2
|
||||
assert result[0]["content"] == "hello"
|
||||
|
||||
def test_both_empty_returns_empty(self, store_with_db):
|
||||
"""Neither source has data — returns empty list."""
|
||||
result = store_with_db.load_transcript("nonexistent")
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
result = store.load_transcript("nonexistent")
|
||||
assert result == []
|
||||
|
||||
def test_equal_length_prefers_sqlite(self, store_with_db):
|
||||
"""When both have same count, SQLite wins (has richer fields like reasoning)."""
|
||||
sid = "equal_session"
|
||||
store_with_db._db.create_session(session_id=sid, source="gateway", model="m")
|
||||
# Write 2 messages to JSONL only
|
||||
store_with_db.append_to_transcript(
|
||||
sid, {"role": "user", "content": "jsonl-q"}, skip_db=True,
|
||||
)
|
||||
store_with_db.append_to_transcript(
|
||||
sid, {"role": "assistant", "content": "jsonl-a"}, skip_db=True,
|
||||
)
|
||||
# Write 2 different messages to SQLite only
|
||||
store_with_db._db.append_message(session_id=sid, role="user", content="db-q")
|
||||
store_with_db._db.append_message(session_id=sid, role="assistant", content="db-a")
|
||||
def test_db_only_returns_messages(self, tmp_path, monkeypatch):
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
config = GatewayConfig()
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
sid = "db_only_session"
|
||||
store._db.create_session(session_id=sid, source="gateway", model="m")
|
||||
store._db.append_message(session_id=sid, role="user", content="db-q")
|
||||
store._db.append_message(session_id=sid, role="assistant", content="db-a")
|
||||
|
||||
result = store_with_db.load_transcript(sid)
|
||||
assert len(result) == 2
|
||||
# Should be the SQLite version (equal count → prefers SQLite)
|
||||
assert result[0]["content"] == "db-q"
|
||||
|
||||
def test_unreadable_jsonl_returns_sqlite(self, store_with_db, monkeypatch):
|
||||
"""Unreadable legacy JSONL must not hide valid SQLite history."""
|
||||
sid = "unreadable_jsonl"
|
||||
store_with_db._db.create_session(session_id=sid, source="gateway", model="m")
|
||||
store_with_db._db.append_message(session_id=sid, role="user", content="db-q")
|
||||
store_with_db._db.append_message(session_id=sid, role="assistant", content="db-a")
|
||||
|
||||
transcript_path = store_with_db.get_transcript_path(sid)
|
||||
transcript_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
transcript_path.write_text('{"role": "user", "content": "jsonl-q"}\n', encoding="utf-8")
|
||||
|
||||
real_open = builtins.open
|
||||
|
||||
def raise_for_transcript(path, *args, **kwargs):
|
||||
mode = args[0] if args else kwargs.get("mode", "r")
|
||||
if Path(path) == transcript_path and "r" in mode:
|
||||
raise OSError("simulated unreadable transcript")
|
||||
return real_open(path, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(builtins, "open", raise_for_transcript)
|
||||
|
||||
result = store_with_db.load_transcript(sid)
|
||||
result = store.load_transcript(sid)
|
||||
assert len(result) == 2
|
||||
assert result[0]["content"] == "db-q"
|
||||
assert result[1]["content"] == "db-a"
|
||||
|
||||
@@ -22,13 +22,18 @@ from gateway.session import SessionSource, SessionStore, build_session_key
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def store(tmp_path):
|
||||
"""SessionStore with no SQLite, for fast unit tests."""
|
||||
def store(tmp_path, monkeypatch):
|
||||
"""SessionStore with SQLite — load_transcript reads from DB only.
|
||||
|
||||
Pin DEFAULT_DB_PATH to tmp_path so SessionDB() can't write to the real
|
||||
~/.hermes/state.db. (DEFAULT_DB_PATH is a module-level constant computed
|
||||
at hermes_state import time, before pytest's HERMES_HOME monkeypatch
|
||||
fires — the autouse fixture's HERMES_HOME override doesn't help here.)
|
||||
"""
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
config = GatewayConfig()
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None
|
||||
s._loaded = True
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
return s
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
import asyncio
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
from gateway.config import Platform, PlatformConfig, load_gateway_config
|
||||
from gateway.platforms.base import MessageType
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
def _make_adapter(
|
||||
@@ -15,7 +18,9 @@ def _make_adapter(
|
||||
allow_from=None,
|
||||
group_allow_from=None,
|
||||
allowed_chats=None,
|
||||
group_allowed_chats=None,
|
||||
guest_mode=None,
|
||||
observe_unmentioned_group_messages=None,
|
||||
bot_username="hermes_bot",
|
||||
):
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
@@ -49,8 +54,14 @@ def _make_adapter(
|
||||
# environment; production adapters without this explicit key still fall
|
||||
# back to the env var.
|
||||
extra["allowed_chats"] = []
|
||||
if group_allowed_chats is not None:
|
||||
extra["group_allowed_chats"] = group_allowed_chats
|
||||
else:
|
||||
extra["group_allowed_chats"] = []
|
||||
if guest_mode is not None:
|
||||
extra["guest_mode"] = guest_mode
|
||||
if observe_unmentioned_group_messages is not None:
|
||||
extra["observe_unmentioned_group_messages"] = observe_unmentioned_group_messages
|
||||
|
||||
adapter = object.__new__(TelegramAdapter)
|
||||
adapter.platform = Platform.TELEGRAM
|
||||
@@ -60,7 +71,12 @@ def _make_adapter(
|
||||
adapter._pending_text_batches = {}
|
||||
adapter._pending_text_batch_tasks = {}
|
||||
adapter._text_batch_delay_seconds = 0.01
|
||||
adapter._text_batch_split_delay_seconds = 0.01
|
||||
adapter._mention_patterns = adapter._compile_mention_patterns()
|
||||
adapter._forum_lock = asyncio.Lock()
|
||||
adapter._forum_command_registered = set()
|
||||
adapter._active_sessions = {}
|
||||
adapter._pending_messages = {}
|
||||
# Trigger-gating tests don't exercise the allowlist gate (added by
|
||||
# #23795 + #24468). Force-authorize all senders so the trigger logic
|
||||
# under test runs. Without this, every fake message hits the new
|
||||
@@ -74,6 +90,7 @@ def _group_message(
|
||||
*,
|
||||
chat_id=-100,
|
||||
from_user_id=111,
|
||||
from_user_name="Alice Example",
|
||||
thread_id=None,
|
||||
reply_to_bot=False,
|
||||
entities=None,
|
||||
@@ -82,29 +99,34 @@ def _group_message(
|
||||
):
|
||||
reply_to_message = None
|
||||
if reply_to_bot:
|
||||
reply_to_message = SimpleNamespace(from_user=SimpleNamespace(id=999))
|
||||
reply_to_message = SimpleNamespace(from_user=SimpleNamespace(id=999), message_id=10, text="previous bot reply", caption=None)
|
||||
return SimpleNamespace(
|
||||
message_id=42,
|
||||
text=text,
|
||||
caption=caption,
|
||||
entities=entities or [],
|
||||
caption_entities=caption_entities or [],
|
||||
message_thread_id=thread_id,
|
||||
chat=SimpleNamespace(id=chat_id, type="group"),
|
||||
from_user=SimpleNamespace(id=from_user_id),
|
||||
is_topic_message=thread_id is not None,
|
||||
chat=SimpleNamespace(id=chat_id, type="group", title="Test Group", is_forum=thread_id is not None),
|
||||
from_user=SimpleNamespace(id=from_user_id, full_name=from_user_name, first_name=from_user_name.split()[0]),
|
||||
reply_to_message=reply_to_message,
|
||||
date=None,
|
||||
)
|
||||
|
||||
|
||||
def _dm_message(text="hello", *, from_user_id=111):
|
||||
return SimpleNamespace(
|
||||
message_id=43,
|
||||
text=text,
|
||||
caption=None,
|
||||
entities=[],
|
||||
caption_entities=[],
|
||||
message_thread_id=None,
|
||||
chat=SimpleNamespace(id=from_user_id, type="private"),
|
||||
from_user=SimpleNamespace(id=from_user_id),
|
||||
chat=SimpleNamespace(id=from_user_id, type="private", full_name="Alice Example", title=None, is_forum=False),
|
||||
from_user=SimpleNamespace(id=from_user_id, full_name="Alice Example", first_name="Alice"),
|
||||
reply_to_message=None,
|
||||
date=None,
|
||||
)
|
||||
|
||||
|
||||
@@ -134,6 +156,157 @@ def test_group_messages_can_be_opened_via_config():
|
||||
assert adapter._should_process_message(_group_message("hello everyone")) is True
|
||||
|
||||
|
||||
def test_unmentioned_group_messages_can_be_observed_without_dispatching():
|
||||
async def _run():
|
||||
adapter = _make_adapter(
|
||||
require_mention=True,
|
||||
allowed_chats=["-100"],
|
||||
group_allowed_chats=["-100"],
|
||||
observe_unmentioned_group_messages=True,
|
||||
)
|
||||
store = _FakeSessionStore()
|
||||
adapter._session_store = store
|
||||
update = SimpleNamespace(
|
||||
update_id=1001,
|
||||
message=_group_message("side chatter"),
|
||||
effective_message=None,
|
||||
)
|
||||
|
||||
await adapter._handle_text_message(update, SimpleNamespace())
|
||||
|
||||
adapter._message_handler.assert_not_awaited()
|
||||
assert len(store.messages) == 1
|
||||
session_id, message, skip_db = store.messages[0]
|
||||
assert session_id == "telegram-group-session"
|
||||
assert skip_db is False
|
||||
assert message["role"] == "user"
|
||||
assert message["content"] == "[Alice Example|111]\nside chatter"
|
||||
assert message["observed"] is True
|
||||
assert message["message_id"] == "42"
|
||||
assert store.sources[0].chat_id == "-100"
|
||||
assert store.sources[0].chat_type == "group"
|
||||
assert store.sources[0].user_id is None
|
||||
assert store.sources[0].user_name is None
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
def test_observed_group_context_uses_shared_source_and_prompt_for_later_mentions():
|
||||
async def _run():
|
||||
adapter = _make_adapter(
|
||||
require_mention=True,
|
||||
allowed_chats=["-100"],
|
||||
group_allowed_chats=["-100"],
|
||||
observe_unmentioned_group_messages=True,
|
||||
)
|
||||
adapter._session_store = _FakeSessionStore()
|
||||
text = "@hermes_bot what did Alice say?"
|
||||
msg = _group_message(
|
||||
text,
|
||||
from_user_id=222,
|
||||
from_user_name="Bob Example",
|
||||
entities=[_mention_entity(text)],
|
||||
)
|
||||
event = adapter._build_message_event(msg, MessageType.TEXT, update_id=1003)
|
||||
event.text = adapter._clean_bot_trigger_text(event.text)
|
||||
event.channel_prompt = "Existing topic prompt"
|
||||
|
||||
event = adapter._apply_telegram_group_observe_attribution(event)
|
||||
|
||||
assert event.source.chat_id == "-100"
|
||||
assert event.source.chat_type == "group"
|
||||
assert event.source.user_id is None
|
||||
assert event.source.user_name is None
|
||||
assert event.text == "[Bob Example|222]\nwhat did Alice say?"
|
||||
assert "Existing topic prompt" in event.channel_prompt
|
||||
assert "observed Telegram group context" in event.channel_prompt
|
||||
assert "current new message" in event.channel_prompt
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
def test_unmentioned_group_observe_requires_chat_allowlist_for_shared_context():
|
||||
async def _run():
|
||||
adapter = _make_adapter(
|
||||
require_mention=True,
|
||||
allowed_chats=["-100"],
|
||||
observe_unmentioned_group_messages=True,
|
||||
)
|
||||
store = _FakeSessionStore()
|
||||
adapter._session_store = store
|
||||
update = SimpleNamespace(
|
||||
update_id=1004,
|
||||
message=_group_message("side chatter"),
|
||||
effective_message=None,
|
||||
)
|
||||
|
||||
await adapter._handle_text_message(update, SimpleNamespace())
|
||||
|
||||
adapter._message_handler.assert_not_awaited()
|
||||
assert store.messages == []
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
def test_shared_group_observe_source_is_authorized_by_group_allowed_chats(monkeypatch):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-100",
|
||||
chat_type="group",
|
||||
user_id=None,
|
||||
user_name=None,
|
||||
)
|
||||
|
||||
monkeypatch.setenv("TELEGRAM_GROUP_ALLOWED_CHATS", "-100")
|
||||
monkeypatch.delenv("TELEGRAM_ALLOWED_CHATS", raising=False)
|
||||
|
||||
assert runner._is_user_authorized(source) is True
|
||||
|
||||
|
||||
def test_unmentioned_group_observe_respects_chat_allowlist():
|
||||
async def _run():
|
||||
adapter = _make_adapter(
|
||||
require_mention=True,
|
||||
allowed_chats=["-200"],
|
||||
group_allowed_chats=["-200"],
|
||||
observe_unmentioned_group_messages=True,
|
||||
)
|
||||
store = _FakeSessionStore()
|
||||
adapter._session_store = store
|
||||
update = SimpleNamespace(
|
||||
update_id=1002,
|
||||
message=_group_message("side chatter", chat_id=-201),
|
||||
effective_message=None,
|
||||
)
|
||||
|
||||
await adapter._handle_text_message(update, SimpleNamespace())
|
||||
|
||||
adapter._message_handler.assert_not_awaited()
|
||||
assert store.messages == []
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
class _FakeSessionEntry:
|
||||
session_id = "telegram-group-session"
|
||||
|
||||
|
||||
class _FakeSessionStore:
|
||||
def __init__(self):
|
||||
self.sources = []
|
||||
self.messages = []
|
||||
|
||||
def get_or_create_session(self, source):
|
||||
self.sources.append(source)
|
||||
return _FakeSessionEntry()
|
||||
|
||||
def append_to_transcript(self, session_id, message, skip_db=False):
|
||||
self.messages.append((session_id, message, skip_db))
|
||||
|
||||
|
||||
def test_group_messages_can_require_direct_trigger_via_config():
|
||||
adapter = _make_adapter(require_mention=True)
|
||||
|
||||
@@ -349,12 +522,15 @@ def test_config_bridges_telegram_group_settings(monkeypatch, tmp_path):
|
||||
" require_mention: true\n"
|
||||
" guest_mode: true\n"
|
||||
" exclusive_bot_mentions: true\n"
|
||||
" observe_unmentioned_group_messages: true\n"
|
||||
" mention_patterns:\n"
|
||||
" - \"^\\\\s*chompy\\\\b\"\n"
|
||||
" free_response_chats:\n"
|
||||
" - \"-123\"\n"
|
||||
" allowed_chats:\n"
|
||||
" - \"-100\"\n"
|
||||
" group_allowed_chats:\n"
|
||||
" - \"-100\"\n"
|
||||
" allowed_topics:\n"
|
||||
" - 8\n",
|
||||
encoding="utf-8",
|
||||
@@ -365,8 +541,10 @@ def test_config_bridges_telegram_group_settings(monkeypatch, tmp_path):
|
||||
monkeypatch.delenv("TELEGRAM_MENTION_PATTERNS", raising=False)
|
||||
monkeypatch.delenv("TELEGRAM_EXCLUSIVE_BOT_MENTIONS", raising=False)
|
||||
monkeypatch.delenv("TELEGRAM_GUEST_MODE", raising=False)
|
||||
monkeypatch.delenv("TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES", raising=False)
|
||||
monkeypatch.delenv("TELEGRAM_FREE_RESPONSE_CHATS", raising=False)
|
||||
monkeypatch.delenv("TELEGRAM_ALLOWED_CHATS", raising=False)
|
||||
monkeypatch.delenv("TELEGRAM_GROUP_ALLOWED_CHATS", raising=False)
|
||||
monkeypatch.delenv("TELEGRAM_ALLOWED_TOPICS", raising=False)
|
||||
|
||||
config = load_gateway_config()
|
||||
@@ -374,17 +552,21 @@ def test_config_bridges_telegram_group_settings(monkeypatch, tmp_path):
|
||||
assert config is not None
|
||||
assert __import__("os").environ["TELEGRAM_REQUIRE_MENTION"] == "true"
|
||||
assert __import__("os").environ["TELEGRAM_GUEST_MODE"] == "true"
|
||||
assert __import__("os").environ["TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES"] == "true"
|
||||
assert __import__("os").environ["TELEGRAM_EXCLUSIVE_BOT_MENTIONS"] == "true"
|
||||
assert json.loads(__import__("os").environ["TELEGRAM_MENTION_PATTERNS"]) == [r"^\s*chompy\b"]
|
||||
assert __import__("os").environ["TELEGRAM_FREE_RESPONSE_CHATS"] == "-123"
|
||||
assert __import__("os").environ["TELEGRAM_ALLOWED_CHATS"] == "-100"
|
||||
assert __import__("os").environ["TELEGRAM_GROUP_ALLOWED_CHATS"] == "-100"
|
||||
assert __import__("os").environ["TELEGRAM_ALLOWED_TOPICS"] == "8"
|
||||
tg_cfg = config.platforms.get(Platform.TELEGRAM)
|
||||
assert tg_cfg is not None
|
||||
assert tg_cfg.extra.get("guest_mode") is True
|
||||
assert tg_cfg.extra.get("allowed_chats") == ["-100"]
|
||||
assert tg_cfg.extra.get("group_allowed_chats") == ["-100"]
|
||||
assert tg_cfg.extra.get("allowed_topics") == [8]
|
||||
assert tg_cfg.extra.get("exclusive_bot_mentions") is True
|
||||
assert tg_cfg.extra.get("observe_unmentioned_group_messages") is True
|
||||
|
||||
|
||||
def test_config_bridges_telegram_user_allowlists(monkeypatch, tmp_path):
|
||||
@@ -518,3 +700,186 @@ def test_config_bridges_telegram_ignored_threads(monkeypatch, tmp_path):
|
||||
|
||||
assert config is not None
|
||||
assert __import__("os").environ["TELEGRAM_IGNORED_THREADS"] == "31,42"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers for location / media observe+attribution tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _group_location_message(
|
||||
*,
|
||||
chat_id=-100,
|
||||
from_user_id=111,
|
||||
from_user_name="Alice Example",
|
||||
lat=37.7749,
|
||||
lon=-122.4194,
|
||||
):
|
||||
return SimpleNamespace(
|
||||
message_id=50,
|
||||
text=None,
|
||||
caption=None,
|
||||
entities=[],
|
||||
caption_entities=[],
|
||||
message_thread_id=None,
|
||||
is_topic_message=False,
|
||||
chat=SimpleNamespace(id=chat_id, type="group", title="Test Group", is_forum=False),
|
||||
from_user=SimpleNamespace(
|
||||
id=from_user_id, full_name=from_user_name,
|
||||
first_name=from_user_name.split()[0],
|
||||
),
|
||||
reply_to_message=None,
|
||||
date=None,
|
||||
location=SimpleNamespace(latitude=lat, longitude=lon),
|
||||
venue=None,
|
||||
sticker=None,
|
||||
photo=None,
|
||||
video=None,
|
||||
audio=None,
|
||||
voice=None,
|
||||
document=None,
|
||||
)
|
||||
|
||||
|
||||
def _group_voice_message(
|
||||
*,
|
||||
chat_id=-100,
|
||||
from_user_id=111,
|
||||
from_user_name="Alice Example",
|
||||
caption=None,
|
||||
):
|
||||
return SimpleNamespace(
|
||||
message_id=51,
|
||||
text=None,
|
||||
caption=caption,
|
||||
entities=[],
|
||||
caption_entities=[],
|
||||
message_thread_id=None,
|
||||
is_topic_message=False,
|
||||
chat=SimpleNamespace(id=chat_id, type="group", title="Test Group", is_forum=False),
|
||||
from_user=SimpleNamespace(
|
||||
id=from_user_id, full_name=from_user_name,
|
||||
first_name=from_user_name.split()[0],
|
||||
),
|
||||
reply_to_message=None,
|
||||
date=None,
|
||||
location=None,
|
||||
venue=None,
|
||||
sticker=None,
|
||||
photo=None,
|
||||
video=None,
|
||||
audio=None,
|
||||
voice=SimpleNamespace(
|
||||
get_file=AsyncMock(side_effect=Exception("simulated download failure"))
|
||||
),
|
||||
document=None,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Observe + attribution parity: location messages
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_unmentioned_location_message_observed_in_group():
|
||||
async def _run():
|
||||
adapter = _make_adapter(
|
||||
require_mention=True,
|
||||
allowed_chats=["-100"],
|
||||
group_allowed_chats=["-100"],
|
||||
observe_unmentioned_group_messages=True,
|
||||
)
|
||||
store = _FakeSessionStore()
|
||||
adapter._session_store = store
|
||||
update = SimpleNamespace(
|
||||
update_id=2001,
|
||||
message=_group_location_message(),
|
||||
effective_message=None,
|
||||
)
|
||||
|
||||
await adapter._handle_location_message(update, SimpleNamespace())
|
||||
|
||||
adapter._message_handler.assert_not_awaited()
|
||||
assert len(store.messages) == 1
|
||||
_, message, _ = store.messages[0]
|
||||
assert message["observed"] is True
|
||||
assert store.sources[0].user_id is None
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
def test_triggered_location_message_uses_shared_session_in_observe_mode():
|
||||
async def _run():
|
||||
adapter = _make_adapter(
|
||||
require_mention=False,
|
||||
group_allowed_chats=["-100"],
|
||||
observe_unmentioned_group_messages=True,
|
||||
)
|
||||
adapter.handle_message = AsyncMock()
|
||||
update = SimpleNamespace(
|
||||
update_id=2002,
|
||||
message=_group_location_message(),
|
||||
effective_message=None,
|
||||
)
|
||||
|
||||
await adapter._handle_location_message(update, SimpleNamespace())
|
||||
|
||||
adapter.handle_message.assert_awaited_once()
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.source.user_id is None
|
||||
assert "[Alice Example|111]" in event.text
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Observe + attribution parity: media messages (voice as representative)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_unmentioned_voice_message_observed_in_group():
|
||||
async def _run():
|
||||
adapter = _make_adapter(
|
||||
require_mention=True,
|
||||
allowed_chats=["-100"],
|
||||
group_allowed_chats=["-100"],
|
||||
observe_unmentioned_group_messages=True,
|
||||
)
|
||||
store = _FakeSessionStore()
|
||||
adapter._session_store = store
|
||||
update = SimpleNamespace(
|
||||
update_id=3001,
|
||||
message=_group_voice_message(),
|
||||
effective_message=None,
|
||||
)
|
||||
|
||||
await adapter._handle_media_message(update, SimpleNamespace())
|
||||
|
||||
adapter._message_handler.assert_not_awaited()
|
||||
assert len(store.messages) == 1
|
||||
_, message, _ = store.messages[0]
|
||||
assert message["observed"] is True
|
||||
assert store.sources[0].user_id is None
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
def test_triggered_voice_message_uses_shared_session_in_observe_mode():
|
||||
async def _run():
|
||||
adapter = _make_adapter(
|
||||
require_mention=False,
|
||||
group_allowed_chats=["-100"],
|
||||
observe_unmentioned_group_messages=True,
|
||||
)
|
||||
adapter.handle_message = AsyncMock()
|
||||
update = SimpleNamespace(
|
||||
update_id=3002,
|
||||
message=_group_voice_message(caption="check this audio"),
|
||||
effective_message=None,
|
||||
)
|
||||
|
||||
await adapter._handle_media_message(update, SimpleNamespace())
|
||||
|
||||
adapter.handle_message.assert_awaited_once()
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.source.user_id is None
|
||||
assert "[Alice Example|111]" in event.text
|
||||
|
||||
asyncio.run(_run())
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -951,6 +951,30 @@ class TestTelegramMenuCommands:
|
||||
f"Command '{name}' is {len(name)} chars (limit {_TG_NAME_LIMIT})"
|
||||
)
|
||||
|
||||
def test_operational_builtins_survive_thirty_command_cap(self, tmp_path, monkeypatch):
|
||||
(tmp_path / "config.yaml").write_text(
|
||||
"display:\n tool_progress_command: true\n"
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
menu, hidden = telegram_menu_commands(max_commands=30)
|
||||
names = [name for name, _desc in menu]
|
||||
|
||||
assert len(names) == 30
|
||||
assert hidden > 0
|
||||
for name in (
|
||||
"debug",
|
||||
"restart",
|
||||
"update",
|
||||
"verbose",
|
||||
"commands",
|
||||
"help",
|
||||
"new",
|
||||
"stop",
|
||||
"status",
|
||||
):
|
||||
assert name in names
|
||||
|
||||
def test_includes_plugin_commands_via_lazy_discovery(self, tmp_path, monkeypatch):
|
||||
"""Telegram menu generation should discover plugin slash commands on first access."""
|
||||
from unittest.mock import patch
|
||||
|
||||
@@ -48,6 +48,27 @@ def test_init_creates_expected_tables(kanban_home):
|
||||
assert {"tasks", "task_links", "task_comments", "task_events"} <= names
|
||||
|
||||
|
||||
def test_connect_rejects_tls_record_in_sqlite_header(tmp_path, monkeypatch):
|
||||
"""Kanban should classify TLS-looking page-0 clobbers before WAL setup."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.delenv("HERMES_KANBAN_DB", raising=False)
|
||||
monkeypatch.delenv("HERMES_KANBAN_HOME", raising=False)
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
|
||||
corrupt = home / "kanban.db"
|
||||
corrupt.write_bytes(b"SQLit" + bytes.fromhex("17 03 03 00 13") + b"x" * 32)
|
||||
|
||||
with pytest.raises(sqlite3.DatabaseError) as exc_info:
|
||||
kb.connect(board="default")
|
||||
|
||||
msg = str(exc_info.value)
|
||||
assert "file is not a database" in msg
|
||||
assert "TLS record header detected at byte offset 5" in msg
|
||||
assert "53 51 4c 69 74 17 03 03 00 13" in msg
|
||||
|
||||
|
||||
def test_connect_migrates_legacy_db_before_optional_column_indexes(tmp_path):
|
||||
"""Legacy DBs missing additive indexed columns must migrate cleanly.
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user