Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f3058fbec7 | |||
| bd3bad232b | |||
| a2546ed4fe | |||
| 2f253a4f55 | |||
| 80a0c829d7 | |||
| f4b32301ce | |||
| b3309f3c0f | |||
| 2882899925 |
@@ -29,13 +29,9 @@ runs:
|
||||
- name: hermes --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
|
||||
# this exercises the actual production startup path. PR #30136
|
||||
# review caught that an --entrypoint override here had been
|
||||
# silently neutered by the s6-overlay migration — stage2-hook
|
||||
# ignores its CMD args, so the smoke test was a no-op.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
"${{ inputs.image }}" --help
|
||||
|
||||
- name: hermes dashboard --help
|
||||
@@ -47,4 +43,5 @@ runs:
|
||||
# installed package.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
"${{ inputs.image }}" dashboard --help
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
name: Docker / shell lint
|
||||
|
||||
# Lints the container build inputs: Dockerfile (via hadolint) and any shell
|
||||
# scripts under docker/ (via shellcheck). These catch the class of regression
|
||||
# the behavioral docker-publish smoke test can't — unquoted variable
|
||||
# expansions, silently-failing RUN commands, etc.
|
||||
#
|
||||
# Rules and ignores are documented in .hadolint.yaml at the repo root.
|
||||
# shellcheck severity is pinned to `error` so SC1091-style "can't follow
|
||||
# sourced script" info-level warnings don't fail the job — the .venv
|
||||
# activate script doesn't exist at lint time.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- Dockerfile
|
||||
- docker/**
|
||||
- .hadolint.yaml
|
||||
- .github/workflows/docker-lint.yml
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- Dockerfile
|
||||
- docker/**
|
||||
- .hadolint.yaml
|
||||
- .github/workflows/docker-lint.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: docker-lint-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
hadolint:
|
||||
name: Lint Dockerfile (hadolint)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: hadolint
|
||||
uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
|
||||
with:
|
||||
dockerfile: Dockerfile
|
||||
config: .hadolint.yaml
|
||||
failure-threshold: warning
|
||||
|
||||
shellcheck:
|
||||
name: Lint docker/ shell scripts (shellcheck)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: shellcheck
|
||||
uses: ludeeus/action-shellcheck@00cae500b08a931fb5698e11e79bfbd38e612a38 # v2.0.0
|
||||
env:
|
||||
# Severity = error: SC1091 (can't follow sourced script) is info-
|
||||
# level and would otherwise fail when the venv activate script
|
||||
# doesn't exist at lint time.
|
||||
SHELLCHECK_OPTS: --severity=error
|
||||
with:
|
||||
scandir: ./docker
|
||||
@@ -27,9 +27,9 @@ on:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets
|
||||
# its own :main or release-tagged image. :latest is guarded separately
|
||||
# by the move-latest job. PR runs reuse a PR-scoped group with
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets its
|
||||
# own SHA-tagged image; :main and :latest are guarded separately by the
|
||||
# move-main and move-latest jobs. PR runs reuse a PR-scoped group with
|
||||
# cancel-in-progress: true so rapid pushes to the same PR collapse to the
|
||||
# latest commit.
|
||||
concurrency:
|
||||
@@ -80,56 +80,6 @@ jobs:
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Run the docker-integration test suite against the freshly-built
|
||||
# image already loaded into the local daemon (`:test`). These tests
|
||||
# are excluded from the sharded `tests.yml :: test` matrix on purpose
|
||||
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
|
||||
# shard would otherwise reach the session-scoped ``built_image``
|
||||
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
|
||||
# ``docker build`` under a 180s pytest-timeout cap — guaranteed to
|
||||
# die in fixture setup.
|
||||
#
|
||||
# Piggybacking here avoids a second image build: the smoke test
|
||||
# already proved the image loads + runs, so the daemon has it under
|
||||
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
|
||||
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
|
||||
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
|
||||
#
|
||||
# Why this job and not a standalone one: the image is 5GB+; passing
|
||||
# it between jobs via ``docker save``/``upload-artifact`` is slower
|
||||
# than the build itself. Reusing the existing daemon state is the
|
||||
# cheapest path to coverage on every PR that touches docker code.
|
||||
# ---------------------------------------------------------------------
|
||||
- name: Install uv (for docker tests)
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Set up Python 3.11 (for docker tests)
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install Python dependencies (for docker tests)
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
# ``dev`` extra pulls in pytest, pytest-asyncio, pytest-timeout —
|
||||
# everything tests/docker/ needs. We deliberately avoid ``all``
|
||||
# here because the docker tests only drive the container via
|
||||
# subprocess and don't import hermes_agent's optional deps.
|
||||
uv pip install -e ".[dev]"
|
||||
|
||||
- name: Run docker integration tests
|
||||
env:
|
||||
# Skip rebuild; use the image already loaded by the build step.
|
||||
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
|
||||
# Match the policy in tests.yml :: test job — no accidental
|
||||
# real-API calls from inside the harness.
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/docker/ -v --tb=short
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
@@ -142,10 +92,10 @@ jobs:
|
||||
# pattern for multi-runner multi-platform builds.
|
||||
#
|
||||
# We apply the OCI revision label here (and again on arm64) because
|
||||
# the move-latest job reads it off the linux/amd64 sub-manifest
|
||||
# config of the floating tag to decide whether it's safe to advance.
|
||||
# The label must be on each per-arch image — manifest lists themselves
|
||||
# don't carry image config labels.
|
||||
# the move-main / move-latest jobs read it off the linux/amd64
|
||||
# sub-manifest config of the floating tag to decide whether it's safe
|
||||
# to advance. The label must be on each per-arch image — manifest
|
||||
# lists themselves don't carry image config labels.
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
@@ -258,14 +208,8 @@ jobs:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stitch both per-arch digests into a single tagged multi-arch manifest.
|
||||
# This is a registry-side operation — no building, no layer re-push —
|
||||
# so it runs in ~30 seconds. On main pushes it produces :main; on
|
||||
# releases it produces :<release_tag_name>.
|
||||
#
|
||||
# For main pushes the ancestor check runs BEFORE the manifest push so
|
||||
# we never overwrite :main with an older commit. The top-level
|
||||
# concurrency group (`docker-${{ github.ref }}` with
|
||||
# `cancel-in-progress: false`) already serialises runs per ref; the
|
||||
# ancestor check is defense-in-depth.
|
||||
# so it runs in ~30 seconds. On main pushes it produces :sha-<sha>.
|
||||
# On releases it produces :<release_tag_name>.
|
||||
# ---------------------------------------------------------------------------
|
||||
merge:
|
||||
if: github.repository == 'NousResearch/hermes-agent' && (github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release')
|
||||
@@ -273,15 +217,10 @@ jobs:
|
||||
needs: [build-amd64, build-arm64]
|
||||
timeout-minutes: 10
|
||||
outputs:
|
||||
pushed_sha_tag: ${{ steps.mark_pushed.outputs.pushed }}
|
||||
pushed_release_tag: ${{ steps.mark_release_pushed.outputs.pushed }}
|
||||
release_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
@@ -298,19 +237,120 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Read the git revision label off the current :main manifest, then
|
||||
# use `git merge-base --is-ancestor` to check whether our commit is
|
||||
# a descendant of it. If :main doesn't exist yet, or its label is
|
||||
# missing, we treat that as "safe to publish". If another run
|
||||
# already advanced :main past us (or diverged), we skip and leave
|
||||
# it alone.
|
||||
- name: Decide whether to move :main
|
||||
# Compute the tag for this run. Main pushes use sha-<sha> (so every
|
||||
# commit gets its own immutable tag); releases use the release tag name.
|
||||
- name: Compute tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=sha-${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Build the arg array from each digest file (filename = the digest
|
||||
# hex, with no sha256: prefix; empty file content, only the name
|
||||
# matters). Using an array avoids shellcheck SC2046 and keeps
|
||||
# every digest a single argv token even under pathological names.
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
docker buildx imagetools create \
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
# Signal to move-main that the SHA tag is live. Only on main pushes;
|
||||
# releases set pushed_release_tag instead.
|
||||
- name: Mark SHA tag pushed
|
||||
id: mark_pushed
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Signal to move-latest that the release tag is live.
|
||||
- name: Mark release tag pushed
|
||||
id: mark_release_pushed
|
||||
if: github.event_name == 'release'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :main to point at the SHA tag the merge job pushed.
|
||||
#
|
||||
# :main is the floating tag that tracks the tip of the main branch. Every
|
||||
# merge to main retags :main forward. Users who want "latest dev build"
|
||||
# pull :main; users who want stable releases pull :latest.
|
||||
#
|
||||
# The real serialization guarantee comes from the top-level concurrency
|
||||
# group (`docker-${{ github.ref }}` with `cancel-in-progress: false`),
|
||||
# which ensures at most one workflow run for this ref executes at a time.
|
||||
# That means two move-main steps for the same ref cannot overlap.
|
||||
#
|
||||
# This job has its own concurrency group as defense-in-depth: if the
|
||||
# top-level group is ever loosened, queued move-mains will run serially
|
||||
# in arrival order, each one running the ancestor check below and either
|
||||
# advancing :main or skipping. `cancel-in-progress: false` matches the
|
||||
# top-level setting — we don't want rapid pushes to cancel a queued
|
||||
# move-main, because the ancestor check is the real safety mechanism
|
||||
# and queueing is cheap (move-main is a ~30s registry op).
|
||||
#
|
||||
# Combined with the ancestor check, this means :main only ever moves
|
||||
# forward in git history.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-main:
|
||||
if: |
|
||||
github.repository == 'NousResearch/hermes-agent'
|
||||
&& github.event_name == 'push'
|
||||
&& github.ref == 'refs/heads/main'
|
||||
&& needs.merge.outputs.pushed_sha_tag == 'true'
|
||||
needs: merge
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
concurrency:
|
||||
group: docker-move-main-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Read the git revision label off the current :main manifest, then
|
||||
# use `git merge-base --is-ancestor` to check whether our commit is a
|
||||
# descendant of it. If :main doesn't exist yet, or its label is
|
||||
# missing, we treat that as "safe to publish". If another run already
|
||||
# advanced :main past us (or diverged), we skip and leave it alone.
|
||||
- name: Decide whether to move :main
|
||||
id: main_check
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
|
||||
# Pull the JSON for the linux/amd64 sub-manifest's config and extract
|
||||
# the OCI revision label with jq — Go template field access can't
|
||||
# handle dots in map keys, so using json+jq is the robust route.
|
||||
image_json=$(
|
||||
docker buildx imagetools inspect "${image}:main" \
|
||||
--format '{{ json (index .Image "linux/amd64") }}' \
|
||||
@@ -343,6 +383,7 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Make sure we have the :main commit locally for merge-base.
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
git fetch --no-tags --prune origin \
|
||||
"+refs/heads/main:refs/remotes/origin/main" \
|
||||
@@ -355,6 +396,7 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Our SHA must be a descendant of the current :main to be safe.
|
||||
if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then
|
||||
echo "Our commit is a descendant of :main — safe to advance."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
@@ -363,48 +405,19 @@ jobs:
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Compute the tag for this run. Main pushes tag directly as :main
|
||||
# (no per-commit SHA tags); releases use the release tag name.
|
||||
- name: Compute tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=main" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Gate the manifest push on the ancestor check for main pushes.
|
||||
# For releases there is no gate — the check doesn't even run.
|
||||
- name: Create manifest list and push
|
||||
if: github.event_name != 'push' || steps.main_check.outputs.push_main == 'true'
|
||||
working-directory: /tmp/digests
|
||||
# Retag the already-pushed SHA manifest as :main. This is a registry-
|
||||
# side operation — no rebuild, no layer re-push — so it's quick and
|
||||
# atomic per-tag. The ancestor check above plus the cancel-in-progress
|
||||
# concurrency on this job together guarantee we only ever move :main
|
||||
# forward in git history.
|
||||
- name: Move :main to this SHA
|
||||
if: steps.main_check.outputs.push_main == 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
image=nousresearch/hermes-agent
|
||||
docker buildx imagetools create \
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
- name: Inspect image
|
||||
if: github.event_name != 'push' || steps.main_check.outputs.push_main == 'true'
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
# Signal to move-latest that the release tag is live.
|
||||
- name: Mark release tag pushed
|
||||
id: mark_release_pushed
|
||||
if: github.event_name == 'release'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
--tag "${image}:main" \
|
||||
"${image}:sha-${GITHUB_SHA}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :latest to point at the release tag the merge job pushed.
|
||||
@@ -414,10 +427,10 @@ jobs:
|
||||
#
|
||||
# We still run an ancestor check against the existing :latest so that a
|
||||
# backport release on an older branch (e.g. patching v1.1.5 after v1.2.3
|
||||
# is out) doesn't drag :latest backwards. The check is the same shape
|
||||
# as the ancestor check in the merge job for :main: read the OCI
|
||||
# revision label off the current :latest, look up that commit in git,
|
||||
# and only advance if our release commit is a strict descendant.
|
||||
# is out) doesn't drag :latest backwards. The check is the same shape as
|
||||
# move-main: read the OCI revision label off the current :latest, look up
|
||||
# that commit in git, and only advance if our release commit is a strict
|
||||
# descendant.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-latest:
|
||||
if: |
|
||||
|
||||
@@ -47,17 +47,14 @@ jobs:
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
# Added lines only, excluding lockfiles.
|
||||
# Three-dot diff (base...head) diffs from the merge base to HEAD,
|
||||
# so only changes introduced by this PR are included — not changes
|
||||
# that landed on main after the PR branched off.
|
||||
DIFF=$(git diff "$BASE"..."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
|
||||
DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
|
||||
|
||||
FINDINGS=""
|
||||
|
||||
# --- .pth files (auto-execute on Python startup) ---
|
||||
# The exact mechanism used in the litellm supply chain attack:
|
||||
# https://github.com/BerriAI/litellm/issues/24512
|
||||
PTH_FILES=$(git diff --name-only "$BASE"..."$HEAD" | grep '\.pth$' || true)
|
||||
PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
|
||||
if [ -n "$PTH_FILES" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: .pth file added or modified
|
||||
@@ -100,7 +97,7 @@ jobs:
|
||||
|
||||
# --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
|
||||
# These execute during pip install or interpreter startup.
|
||||
SETUP_HITS=$(git diff --name-only "$BASE"..."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
|
||||
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
|
||||
if [ -n "$SETUP_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: Install-hook file added or modified
|
||||
@@ -161,7 +158,7 @@ jobs:
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
# Only check added lines in pyproject.toml
|
||||
ADDED=$(git diff "$BASE"..."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true)
|
||||
ADDED=$(git diff "$BASE".."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true)
|
||||
|
||||
if [ -z "$ADDED" ]; then
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
|
||||
+7
-103
@@ -24,34 +24,12 @@ jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
slice: [1, 2, 3, 4, 5, 6]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Restore duration cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: test_durations.json
|
||||
# Single stable key. main always overwrites, PRs always find it.
|
||||
key: test-durations
|
||||
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
|
||||
rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
|
||||
rg --version
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
@@ -65,79 +43,16 @@ jobs:
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Run tests (slice ${{ matrix.slice }}/6)
|
||||
# Per-file isolation via scripts/run_tests_parallel.py: discovers
|
||||
# every test_*.py file under tests/ (excluding integration/ + e2e/),
|
||||
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
|
||||
# with bounded parallelism. No xdist, no shared workers, no
|
||||
# module-level state leakage between files.
|
||||
#
|
||||
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
|
||||
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
|
||||
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
|
||||
# every file a fresh interpreter — the only isolation boundary
|
||||
# that matters in practice (cross-file leakage was the original
|
||||
# flake source; intra-file is the test author's responsibility).
|
||||
#
|
||||
# Why drop xdist entirely: xdist's persistent workers accumulate
|
||||
# state across files, which is exactly the leakage we wanted to
|
||||
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
|
||||
# the job with cleaner semantics.
|
||||
#
|
||||
# Matrix slicing (--slice I/N): files are distributed across 6
|
||||
# jobs by cached duration (LPT algorithm) so each job gets
|
||||
# roughly equal wall time. Without a cache, files default to 2s
|
||||
# estimate and get split roughly evenly by count — still correct,
|
||||
# just not perfectly balanced.
|
||||
- name: Run tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
|
||||
python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto --timeout=30 --timeout-method=signal
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
|
||||
- name: Upload per-slice durations
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: test-durations-slice-${{ matrix.slice }}
|
||||
path: test_durations.json
|
||||
retention-days: 1
|
||||
|
||||
# Merge per-slice duration data into a single cache, so future runs
|
||||
# (including PRs) get balanced slicing.
|
||||
save-durations:
|
||||
needs: test
|
||||
if: always() && github.ref == 'refs/heads/main'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download all slice durations
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
pattern: test-durations-slice-*
|
||||
path: durations
|
||||
merge-multiple: true
|
||||
|
||||
- name: Merge into single durations file
|
||||
run: |
|
||||
python3 -c "
|
||||
import json, glob, os
|
||||
merged = {}
|
||||
for f in glob.glob('durations/*test_durations.json'):
|
||||
with open(f) as fh:
|
||||
merged.update(json.load(fh))
|
||||
with open('test_durations.json', 'w') as fh:
|
||||
json.dump(merged, fh, indent=2, sort_keys=True)
|
||||
print(f'Merged {len(merged)} file durations')
|
||||
"
|
||||
|
||||
- name: Save merged duration cache
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: test_durations.json
|
||||
key: test-durations
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
@@ -145,19 +60,8 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
|
||||
rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
|
||||
rg --version
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
@@ -178,4 +82,4 @@ jobs:
|
||||
env:
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
|
||||
@@ -18,8 +18,6 @@ __pycache__/web_tools.cpython-310.pyc
|
||||
logs/
|
||||
data/
|
||||
.pytest_cache/
|
||||
test_durations.json
|
||||
.pytest-cache/
|
||||
tmp/
|
||||
temp_vision_images/
|
||||
hermes-*/*
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
# hadolint configuration for the Hermes Agent Dockerfile.
|
||||
# See https://github.com/hadolint/hadolint#configure for rules.
|
||||
#
|
||||
# We want hadolint to surface NEW Dockerfile lint regressions, but we
|
||||
# don't want to rewrite the existing image to silence rules that are
|
||||
# either intentional or pragmatic tradeoffs for this project. Each
|
||||
# ignore below has a one-line justification.
|
||||
failure-threshold: warning
|
||||
|
||||
ignored:
|
||||
# Pin versions in apt get install. We intentionally don't pin common
|
||||
# tools (curl, git, openssh-client, etc.) — security updates flow in
|
||||
# via the periodic base-image rebuild, and pinning would lock us to
|
||||
# superseded patch releases. Same rationale as nearly every distro-
|
||||
# base official image (python, node, debian).
|
||||
- DL3008
|
||||
# Use WORKDIR to switch to a directory. The image uses `(cd web && …)`
|
||||
# / `(cd ../ui-tui && …)` inline subshells for one-off build steps
|
||||
# because they don't affect later RUN commands; promoting them to
|
||||
# full WORKDIR switches with restores would obscure intent.
|
||||
- DL3003
|
||||
# Multiple consecutive RUN instructions. The `touch README.md` + `uv
|
||||
# sync` split is intentional — `touch` is cheap, `uv sync` is the
|
||||
# expensive layer-cached step we want isolated, and merging them
|
||||
# would invalidate the cache for trivial changes.
|
||||
- DL3059
|
||||
# Last USER should not be root. /init (s6-overlay) runs as root so the
|
||||
# stage2 hook can usermod/groupmod and chown the data volume per
|
||||
# HERMES_UID at runtime; each supervised service then drops to the
|
||||
# hermes user via `s6-setuidgid`.
|
||||
- DL3002
|
||||
|
||||
# Require explicit base-image pins (SHA256) — we already do this.
|
||||
trustedRegistries:
|
||||
- docker.io
|
||||
- ghcr.io
|
||||
@@ -1013,39 +1013,17 @@ def profile_env(tmp_path, monkeypatch):
|
||||
|
||||
**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
|
||||
hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
|
||||
`-n auto` xdist workers, in-tree subprocess-isolation plugin). Direct `pytest`
|
||||
on a 16+ core developer machine with API keys set diverges from CI in ways
|
||||
that have caused multiple "works locally, fails in CI" incidents (and the reverse).
|
||||
4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
|
||||
developer machine with API keys set diverges from CI in ways that have caused
|
||||
multiple "works locally, fails in CI" incidents (and the reverse).
|
||||
|
||||
```bash
|
||||
scripts/run_tests.sh # full suite, CI-parity
|
||||
scripts/run_tests.sh tests/gateway/ # one directory
|
||||
scripts/run_tests.sh tests/agent/test_foo.py::test_x # one test
|
||||
scripts/run_tests.sh -v --tb=long # pass-through pytest flags
|
||||
scripts/run_tests.sh --no-isolate tests/foo/ # disable subprocess isolation (faster, for debugging)
|
||||
```
|
||||
|
||||
### Subprocess-per-test isolation
|
||||
|
||||
Every test runs in a freshly-spawned Python subprocess via the in-tree plugin
|
||||
at `tests/_isolate_plugin.py`. This means module-level dicts/sets and
|
||||
ContextVars from one test cannot leak into the next — the historic
|
||||
`_reset_module_state` autouse fixture is gone.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- The plugin uses `multiprocessing.get_context("spawn")`, which works on
|
||||
Linux, macOS, and Windows alike (POSIX `fork` is not used).
|
||||
- Per-test overhead is ~0.5–1.0s (Python startup + pytest collection). xdist
|
||||
parallelism amortizes this across cores; on a 20-core box the full suite
|
||||
finishes in roughly the same wall time as before, but flake-free.
|
||||
- `isolate_timeout` (configured in `pyproject.toml`) caps each test at 30s.
|
||||
Hangs are killed and surfaced as a failure report.
|
||||
- Pass `--no-isolate` to disable isolation — useful when debugging a single
|
||||
test interactively, or when you specifically want to verify state leakage.
|
||||
- The plugin disables itself in child processes (sentinel envvar
|
||||
`HERMES_ISOLATE_CHILD=1`), so there's no fork-bomb risk.
|
||||
|
||||
### Why the wrapper (and why the old "just call pytest" doesn't work)
|
||||
|
||||
Five real sources of local-vs-CI drift the script closes:
|
||||
@@ -1056,7 +1034,7 @@ Five real sources of local-vs-CI drift the script closes:
|
||||
| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
|
||||
| Timezone | Local TZ (PDT etc.) | UTC |
|
||||
| Locale | Whatever is set | C.UTF-8 |
|
||||
| xdist workers | `-n auto` = all cores | `-n auto` (safe — subprocess isolation prevents cross-worker flakes) |
|
||||
| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
|
||||
|
||||
`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
|
||||
invocation (including IDE integrations) gets hermetic behavior — but the wrapper
|
||||
@@ -1064,21 +1042,15 @@ is belt-and-suspenders.
|
||||
|
||||
### Running without the wrapper (only if you must)
|
||||
|
||||
If you can't use the wrapper (e.g. inside an IDE that shells pytest directly),
|
||||
at minimum activate the venv. The isolation plugin loads automatically from
|
||||
`addopts` in `pyproject.toml`, so you get the same per-test process isolation
|
||||
either way.
|
||||
If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
python -m pytest tests/ -q
|
||||
python -m pytest tests/ -q -n 4
|
||||
```
|
||||
|
||||
If you need to bypass isolation for fast feedback while debugging:
|
||||
|
||||
```bash
|
||||
python -m pytest tests/agent/test_foo.py -q --no-isolate
|
||||
```
|
||||
Worker count above 4 will surface test-ordering flakes that CI never sees.
|
||||
|
||||
Always run the full suite before pushing changes.
|
||||
|
||||
|
||||
+10
-114
@@ -1,4 +1,5 @@
|
||||
FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source
|
||||
FROM tianon/gosu:1.19-trixie@sha256:3b176695959c71e123eb390d427efc665eeb561b1540e82679c15e992006b8b9 AS gosu_source
|
||||
FROM debian:13.4
|
||||
|
||||
# Disable Python stdout buffering to ensure logs are printed immediately
|
||||
@@ -8,68 +9,18 @@ ENV PYTHONUNBUFFERED=1
|
||||
# install survives the /opt/data volume overlay at runtime.
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache.
|
||||
# tini was previously PID 1 to reap orphaned zombie processes (MCP stdio
|
||||
# subprocesses, git, bun, etc.) that would otherwise accumulate when hermes
|
||||
# ran as PID 1. See #15012. Phase 2 of the s6-overlay supervision plan
|
||||
# replaces tini with s6-overlay's /init (PID 1 = s6-svscan), which reaps
|
||||
# zombies non-blockingly on SIGCHLD and additionally supervises the main
|
||||
# hermes process, the dashboard, and per-profile gateways.
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli xz-utils && \
|
||||
build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# ---------- s6-overlay install ----------
|
||||
# s6-overlay provides supervision for the main hermes process, the dashboard,
|
||||
# and per-profile gateways. /init becomes PID 1 below — see ENTRYPOINT.
|
||||
#
|
||||
# Multi-arch: BuildKit auto-populates TARGETARCH (amd64 / arm64). s6-overlay
|
||||
# uses tarball names keyed on the kernel arch string (x86_64 / aarch64), so
|
||||
# we map between them inline. The noarch + symlinks tarballs are
|
||||
# architecture-independent and reused as-is.
|
||||
#
|
||||
# We use `curl` instead of `ADD` for the per-arch tarball because `ADD`
|
||||
# evaluates its URL at parse time, before any ARG / TARGETARCH substitution
|
||||
# — splitting one URL per arch into two ADDs would download both on every
|
||||
# build and leave dead bytes in the cache. A single curl + arch-keyed URL
|
||||
# is simpler and cache-friendlier.
|
||||
#
|
||||
# Supply-chain integrity: every tarball is checksum-verified against the
|
||||
# upstream-published SHA256. To bump S6_OVERLAY_VERSION, fetch the four
|
||||
# `.sha256` files from the corresponding release and update the ARGs. The
|
||||
# checksum lookup happens during build, so a compromised release artifact
|
||||
# fails the build loudly instead of silently producing a tampered image.
|
||||
ARG TARGETARCH
|
||||
ARG S6_OVERLAY_VERSION=3.2.3.0
|
||||
ARG S6_OVERLAY_NOARCH_SHA256=b720f9d9340efc8bb07528b9743813c836e4b02f8693d90241f047998b4c53cf
|
||||
ARG S6_OVERLAY_X86_64_SHA256=a93f02882c6ed46b21e7adb5c0add86154f01236c93cd82c7d682722e8840563
|
||||
ARG S6_OVERLAY_AARCH64_SHA256=0952056ff913482163cc30e35b2e944b507ba1025d78f5becbb89367bf344581
|
||||
ARG S6_OVERLAY_SYMLINKS_SHA256=a60dc5235de3ecbcf874b9c1f18d73263ab99b289b9329aa950e8729c4789f0e
|
||||
ADD https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz /tmp/
|
||||
ADD https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-symlinks-noarch.tar.xz /tmp/
|
||||
RUN set -eu; \
|
||||
case "${TARGETARCH:-amd64}" in \
|
||||
amd64) s6_arch="x86_64"; s6_arch_sha="${S6_OVERLAY_X86_64_SHA256}" ;; \
|
||||
arm64) s6_arch="aarch64"; s6_arch_sha="${S6_OVERLAY_AARCH64_SHA256}" ;; \
|
||||
*) echo "Unsupported TARGETARCH=${TARGETARCH} for s6-overlay" >&2; exit 1 ;; \
|
||||
esac; \
|
||||
curl -fsSL --retry 3 -o /tmp/s6-overlay-arch.tar.xz \
|
||||
"https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-${s6_arch}.tar.xz"; \
|
||||
{ \
|
||||
printf '%s %s\n' "${S6_OVERLAY_NOARCH_SHA256}" /tmp/s6-overlay-noarch.tar.xz; \
|
||||
printf '%s %s\n' "${s6_arch_sha}" /tmp/s6-overlay-arch.tar.xz; \
|
||||
printf '%s %s\n' "${S6_OVERLAY_SYMLINKS_SHA256}" /tmp/s6-overlay-symlinks-noarch.tar.xz; \
|
||||
} > /tmp/s6-overlay.sha256; \
|
||||
sha256sum -c /tmp/s6-overlay.sha256; \
|
||||
tar -C / -Jxpf /tmp/s6-overlay-noarch.tar.xz; \
|
||||
tar -C / -Jxpf /tmp/s6-overlay-arch.tar.xz; \
|
||||
tar -C / -Jxpf /tmp/s6-overlay-symlinks-noarch.tar.xz; \
|
||||
rm /tmp/s6-overlay-*.tar.xz /tmp/s6-overlay.sha256
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
RUN useradd -u 10000 -m -d /opt/data hermes
|
||||
|
||||
COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/
|
||||
COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/
|
||||
|
||||
WORKDIR /opt/hermes
|
||||
@@ -152,73 +103,18 @@ RUN cd web && npm run build && \
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes && \
|
||||
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
|
||||
# Start as root so the s6-overlay stage2 hook can usermod/groupmod and chown
|
||||
# the data volume. Each supervised service then drops to the hermes user via
|
||||
# `s6-setuidgid hermes` in its run script. If HERMES_UID is unset, services
|
||||
# run as the default hermes user (UID 10000).
|
||||
# Start as root so the entrypoint can usermod/groupmod + gosu.
|
||||
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
|
||||
|
||||
# ---------- Link hermes-agent itself (editable) ----------
|
||||
# Deps are already installed in the cached layer above; `--no-deps` makes
|
||||
# this a fast (~1s) egg-link creation with no resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
|
||||
# ---------- s6-overlay service wiring ----------
|
||||
# Static services declared at build time: main-hermes + dashboard.
|
||||
# Per-profile gateway services are registered dynamically at runtime by
|
||||
# the profile create/delete hooks (Phase 4); they live under
|
||||
# /run/service/ (tmpfs) and are reconciled on container restart by
|
||||
# /etc/cont-init.d/02-reconcile-profiles (Phase 4 Task 4.0).
|
||||
COPY docker/s6-rc.d/ /etc/s6-overlay/s6-rc.d/
|
||||
|
||||
# stage2-hook handles UID/GID remap, volume chown, config seeding,
|
||||
# skills sync — all the work the old entrypoint.sh did before
|
||||
# `exec hermes`. Wired in as cont-init.d/01- so it
|
||||
# runs before user services start.
|
||||
#
|
||||
# 02-reconcile-profiles re-creates per-profile gateway s6 service
|
||||
# slots from $HERMES_HOME/profiles/<name>/ after a container restart
|
||||
# (the /run/service/ scandir is tmpfs and wiped on restart). Phase 4.
|
||||
RUN mkdir -p /etc/cont-init.d && \
|
||||
printf '#!/bin/sh\nexec /opt/hermes/docker/stage2-hook.sh\n' \
|
||||
> /etc/cont-init.d/01-hermes-setup && \
|
||||
chmod +x /etc/cont-init.d/01-hermes-setup
|
||||
COPY --chmod=0755 docker/cont-init.d/015-supervise-perms /etc/cont-init.d/015-supervise-perms
|
||||
COPY --chmod=0755 docker/cont-init.d/02-reconcile-profiles /etc/cont-init.d/02-reconcile-profiles
|
||||
|
||||
# ---------- Runtime ----------
|
||||
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
ENV HERMES_HOME=/opt/data
|
||||
# Pre-s6 entrypoint.sh did `source .venv/bin/activate` which exported
|
||||
# the venv bin onto PATH; Architecture B's main-wrapper.sh does the
|
||||
# same for the container's main process, but `docker exec` and our
|
||||
# cont-init.d scripts don't pass through the wrapper. Expose the venv
|
||||
# bin globally so `docker exec <container> hermes ...` and any
|
||||
# subprocess that doesn't activate the venv first still find hermes.
|
||||
ENV PATH="/opt/hermes/.venv/bin:/opt/data/.local/bin:${PATH}"
|
||||
ENV PATH="/opt/data/.local/bin:${PATH}"
|
||||
RUN mkdir -p /opt/data
|
||||
VOLUME [ "/opt/data" ]
|
||||
|
||||
# s6-overlay's /init is PID 1. It sets up the supervision tree, runs
|
||||
# /etc/cont-init.d/* (our stage2 hook), starts s6-rc services
|
||||
# declared in /etc/s6-overlay/s6-rc.d/, then exec's its remaining
|
||||
# argv as the container's "main program" with stdin/stdout/stderr
|
||||
# inherited (this is what makes interactive --tui work). When the
|
||||
# main program exits, /init begins stage 3 shutdown and the container
|
||||
# exits with the program's exit code. Replaces tini — see Phase 2 of
|
||||
# docs/plans/2026-05-07-s6-overlay-dynamic-subagent-gateways.md.
|
||||
#
|
||||
# We use the ENTRYPOINT+CMD split rather than CMD alone so the
|
||||
# wrapper is prepended to user-supplied args automatically:
|
||||
#
|
||||
# docker run <image> → /init main-wrapper.sh (CMD default)
|
||||
# docker run <image> chat -q "hi" → /init main-wrapper.sh chat -q hi
|
||||
# docker run <image> sleep infinity → /init main-wrapper.sh sleep infinity
|
||||
# docker run <image> --tui → /init main-wrapper.sh --tui
|
||||
#
|
||||
# main-wrapper.sh handles arg routing (bare-exec vs. hermes
|
||||
# subcommand vs. no-args), drops to the hermes user via s6-setuidgid,
|
||||
# and exec's the final program so its exit code becomes the container
|
||||
# exit code. Without the wrapper-as-ENTRYPOINT, leading-dash args
|
||||
# like `--version` would be intercepted by /init's POSIX shell.
|
||||
ENTRYPOINT [ "/init", "/opt/hermes/docker/main-wrapper.sh" ]
|
||||
CMD [ ]
|
||||
ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
|
||||
|
||||
@@ -79,27 +79,6 @@ hermes doctor # Diagnose any issues
|
||||
|
||||
📖 **[Full documentation →](https://hermes-agent.nousresearch.com/docs/)**
|
||||
|
||||
---
|
||||
|
||||
## Skip the API-key collection — Nous Portal
|
||||
|
||||
Hermes works with whatever provider you want — that's not changing. But if you'd rather not collect five separate API keys for the model, web search, image generation, TTS, and a cloud browser, **[Nous Portal](https://portal.nousresearch.com)** covers all of them under one subscription:
|
||||
|
||||
- **300+ models** — pick any of them with `/model <name>`
|
||||
- **Tool Gateway** — web search (Firecrawl), image generation (FAL), text-to-speech (OpenAI), cloud browser (Browser Use), all routed through your sub. No extra accounts.
|
||||
|
||||
One command from a fresh install:
|
||||
|
||||
```bash
|
||||
hermes setup --portal
|
||||
```
|
||||
|
||||
That logs you in via OAuth, sets Nous as your provider, and turns on the Tool Gateway. Check what's wired up any time with `hermes portal status`. Full details on the [Tool Gateway docs page](https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway).
|
||||
|
||||
You can still bring your own keys per-tool whenever you want — the gateway is per-backend, not all-or-nothing.
|
||||
|
||||
---
|
||||
|
||||
## CLI vs Messaging Quick Reference
|
||||
|
||||
Hermes has two entry points: start the terminal UI with `hermes`, or run the gateway and talk to it from Telegram, Discord, Slack, WhatsApp, Signal, or Email. Once you're in a conversation, many slash commands are shared across both interfaces.
|
||||
|
||||
@@ -65,27 +65,6 @@ hermes doctor # 诊断问题
|
||||
|
||||
📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**
|
||||
|
||||
---
|
||||
|
||||
## 省去到处收集 API Key — Nous Portal
|
||||
|
||||
Hermes 始终允许你使用任意服务商,这点不会改变。但如果你不想为模型、网页搜索、图像生成、TTS、云浏览器分别去申请五个不同的 API Key,**[Nous Portal](https://portal.nousresearch.com)** 用一个订阅就能覆盖全部:
|
||||
|
||||
- **300+ 模型** — 用 `/model <name>` 随时切换
|
||||
- **Tool Gateway** — 网页搜索(Firecrawl)、图像生成(FAL)、文本转语音(OpenAI)、云浏览器(Browser Use),全部通过订阅托管。无需额外注册任何账户。
|
||||
|
||||
全新安装时一条命令即可:
|
||||
|
||||
```bash
|
||||
hermes setup --portal
|
||||
```
|
||||
|
||||
它会通过 OAuth 登录、把 Nous 设为推理服务商,并启用 Tool Gateway。随时用 `hermes portal status` 查看路由状态。完整说明见 [Tool Gateway 文档](https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway)。
|
||||
|
||||
你随时可以按工具单独切回自己的 API Key — Gateway 是按工具粒度生效的,不是一刀切。
|
||||
|
||||
---
|
||||
|
||||
## CLI 与消息平台 快速对照
|
||||
|
||||
Hermes 有两种入口:用 `hermes` 启动终端 UI,或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后,许多斜杠命令在两种界面中通用。
|
||||
|
||||
@@ -1534,11 +1534,7 @@ class HermesACPAgent(acp.Agent):
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
|
||||
if final_response and conn and (not streamed_message or result.get("response_transformed")):
|
||||
# Deliver the final response when streaming did not already send it,
|
||||
# or when a plugin hook transformed the response after streaming
|
||||
# finished (e.g. transform_llm_output) — otherwise the appended /
|
||||
# rewritten text never reaches the client.
|
||||
if final_response and conn and not streamed_message:
|
||||
update = acp.update_agent_message_text(final_response)
|
||||
await conn.session_update(session_id, update)
|
||||
|
||||
|
||||
+11
-126
@@ -71,71 +71,6 @@ def _ra():
|
||||
return run_agent
|
||||
|
||||
|
||||
def _normalized_custom_base_url(value: Any) -> str:
|
||||
if not isinstance(value, str):
|
||||
return ""
|
||||
return value.strip().rstrip("/")
|
||||
|
||||
|
||||
def _custom_provider_model_matches(agent_model: str, entry: Dict[str, Any]) -> bool:
|
||||
provider_model = str(entry.get("model", "") or "").strip().lower()
|
||||
if not provider_model:
|
||||
return True
|
||||
return provider_model == str(agent_model or "").strip().lower()
|
||||
|
||||
|
||||
def _custom_provider_extra_body_for_agent(
|
||||
*,
|
||||
provider: str,
|
||||
model: str,
|
||||
base_url: str,
|
||||
custom_providers: List[Dict[str, Any]],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
if (provider or "").strip().lower() != "custom":
|
||||
return None
|
||||
|
||||
target_url = _normalized_custom_base_url(base_url)
|
||||
if not target_url:
|
||||
return None
|
||||
|
||||
fallback: Optional[Dict[str, Any]] = None
|
||||
for entry in custom_providers or []:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if _normalized_custom_base_url(entry.get("base_url")) != target_url:
|
||||
continue
|
||||
extra_body = entry.get("extra_body")
|
||||
if not isinstance(extra_body, dict) or not extra_body:
|
||||
continue
|
||||
provider_model = str(entry.get("model", "") or "").strip()
|
||||
if provider_model:
|
||||
if _custom_provider_model_matches(model, entry):
|
||||
return dict(extra_body)
|
||||
elif fallback is None:
|
||||
fallback = dict(extra_body)
|
||||
|
||||
return fallback
|
||||
|
||||
|
||||
def _merge_custom_provider_extra_body(agent, custom_providers: List[Dict[str, Any]]) -> None:
|
||||
extra_body = _custom_provider_extra_body_for_agent(
|
||||
provider=agent.provider,
|
||||
model=agent.model,
|
||||
base_url=agent.base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if not extra_body:
|
||||
return
|
||||
|
||||
overrides = dict(getattr(agent, "request_overrides", {}) or {})
|
||||
merged_extra_body = dict(extra_body)
|
||||
existing_extra_body = overrides.get("extra_body")
|
||||
if isinstance(existing_extra_body, dict):
|
||||
merged_extra_body.update(existing_extra_body)
|
||||
overrides["extra_body"] = merged_extra_body
|
||||
agent.request_overrides = overrides
|
||||
|
||||
|
||||
def init_agent(
|
||||
agent,
|
||||
base_url: str = None,
|
||||
@@ -607,31 +542,6 @@ def init_agent(
|
||||
# Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
|
||||
_is_native_anthropic = agent.provider == "anthropic"
|
||||
effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
|
||||
|
||||
# MiniMax OAuth issues short-lived (~15-min) access tokens. The
|
||||
# Anthropic SDK caches ``api_key`` as a static string at client
|
||||
# construction time, so a session that resolves the bearer once
|
||||
# at startup will keep sending the same token until MiniMax
|
||||
# returns 401 mid-session. Swap the static string for a callable
|
||||
# token provider — ``build_anthropic_client`` recognizes the
|
||||
# callable and installs an httpx event hook that mints a fresh
|
||||
# bearer per outbound request (re-reading auth.json so a refresh
|
||||
# persisted by another process is visible immediately).
|
||||
# The cached refresh path is a no-op when the token still has
|
||||
# ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of life left, so steady-
|
||||
# state cost is one file read + one timestamp compare per request.
|
||||
if agent.provider == "minimax-oauth" and isinstance(effective_key, str) and effective_key:
|
||||
try:
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
effective_key = build_minimax_oauth_token_provider()
|
||||
except Exception as _mm_exc: # noqa: BLE001 — never block startup on this
|
||||
import logging as _logging
|
||||
_logging.getLogger(__name__).warning(
|
||||
"MiniMax OAuth: failed to install per-request token provider "
|
||||
"(%s); falling back to static bearer that will expire ~15min in.",
|
||||
_mm_exc,
|
||||
)
|
||||
|
||||
agent.api_key = effective_key
|
||||
agent._anthropic_api_key = effective_key
|
||||
agent._anthropic_base_url = base_url
|
||||
@@ -643,7 +553,7 @@ def init_agent(
|
||||
# that cause 401/403 on their endpoints. Guards #1739 and
|
||||
# the third-party identity-injection bug.
|
||||
from agent.anthropic_adapter import _is_oauth_token as _is_oat
|
||||
agent._is_anthropic_oauth = _is_oat(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
|
||||
agent._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False
|
||||
agent._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
|
||||
# No OpenAI client needed for Anthropic mode
|
||||
agent.client = None
|
||||
@@ -976,14 +886,16 @@ def init_agent(
|
||||
|
||||
# Expose session ID to tools (terminal, execute_code) so agents can
|
||||
# reference their own session for --resume commands, cross-session
|
||||
# coordination, and logging. Keep the ContextVar and os.environ
|
||||
# fallback synchronized because different tool paths still read both.
|
||||
# coordination, and logging. Uses the ContextVar system from
|
||||
# session_context.py for concurrency safety (gateway runs multiple
|
||||
# sessions in one process). Also writes os.environ as fallback for
|
||||
# CLI mode where ContextVars aren't used.
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
try:
|
||||
from gateway.session_context import set_current_session_id
|
||||
|
||||
set_current_session_id(agent.session_id)
|
||||
from gateway.session_context import _SESSION_ID
|
||||
_SESSION_ID.set(agent.session_id)
|
||||
except Exception:
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
pass # CLI/test mode — ContextVar not needed
|
||||
|
||||
# Session logs go into ~/.hermes/sessions/ alongside gateway sessions
|
||||
hermes_home = get_hermes_home()
|
||||
@@ -1148,18 +1060,7 @@ def init_agent(
|
||||
# through _ra().get_tool_definitions()). Duplicate function names cause
|
||||
# 400 errors on providers that enforce unique names (e.g. Xiaomi
|
||||
# MiMo via Nous Portal).
|
||||
#
|
||||
# Respect the platform's enabled_toolsets configuration (#5544):
|
||||
# enabled_toolsets is None → no filter, inject (backward compat)
|
||||
# "memory" in enabled_toolsets → user opted in, inject
|
||||
# otherwise (incl. []) → user excluded memory, skip injection
|
||||
#
|
||||
# Without this gate, `platform_toolsets: telegram: []` still leaks memory
|
||||
# provider tools (fact_store, etc.) into the tool surface — a 10x latency
|
||||
# penalty on local models and a frequent trigger of tool-call loops.
|
||||
if agent._memory_manager and agent.tools is not None and (
|
||||
agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
|
||||
):
|
||||
if agent._memory_manager and agent.tools is not None:
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in agent.tools
|
||||
@@ -1312,7 +1213,6 @@ def init_agent(
|
||||
# Store for reuse by _check_compression_model_feasibility (auxiliary
|
||||
# compression model context-length detection needs the same list).
|
||||
agent._custom_providers = _custom_providers
|
||||
_merge_custom_provider_extra_body(agent, _custom_providers)
|
||||
|
||||
# Check custom_providers per-model context_length
|
||||
if _config_context_length is None and _custom_providers:
|
||||
@@ -1427,7 +1327,6 @@ def init_agent(
|
||||
base_url=agent.base_url,
|
||||
api_key=getattr(agent, "api_key", ""),
|
||||
provider=agent.provider,
|
||||
api_mode=agent.api_mode,
|
||||
)
|
||||
if not agent.quiet_mode:
|
||||
_ra().logger.info("Using context engine: %s", _selected_engine.name)
|
||||
@@ -1470,22 +1369,8 @@ def init_agent(
|
||||
# errors. Even with the cache fix, dedup is the right defense
|
||||
# against plugin paths that may register the same schemas via
|
||||
# ctx.register_tool(). Mirrors the memory tools dedup above.
|
||||
#
|
||||
# Respect the platform's enabled_toolsets configuration (#5544):
|
||||
# context engine tools follow the same gating pattern as memory
|
||||
# provider tools — without the gate, `platform_toolsets: telegram: []`
|
||||
# would still leak lcm_* tools into the tool surface and incur the
|
||||
# same local-model latency penalty.
|
||||
agent._context_engine_tool_names: set = set()
|
||||
if (
|
||||
hasattr(agent, "context_compressor")
|
||||
and agent.context_compressor
|
||||
and agent.tools is not None
|
||||
and (
|
||||
agent.enabled_toolsets is None
|
||||
or "context_engine" in agent.enabled_toolsets
|
||||
)
|
||||
):
|
||||
if hasattr(agent, "context_compressor") and agent.context_compressor and agent.tools is not None:
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in agent.tools
|
||||
|
||||
@@ -132,7 +132,7 @@ def convert_to_trajectory_format(agent, messages: List[Dict[str, Any]], user_que
|
||||
except json.JSONDecodeError:
|
||||
# This shouldn't happen since we validate and retry during conversation,
|
||||
# but if it does, log warning and use empty dict
|
||||
logger.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
|
||||
logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
|
||||
arguments = {}
|
||||
|
||||
tool_call_json = {
|
||||
@@ -617,28 +617,9 @@ def recover_with_credential_pool(
|
||||
# existing entitlement keyword set in ``_is_entitlement_failure``.
|
||||
# Any 403 against ``xai-oauth`` is treated as entitlement here so
|
||||
# the refresh loop can't spin in those cases either.
|
||||
#
|
||||
# Exception (#29344): xAI's ``[WKE=unauthenticated:...]`` suffix and
|
||||
# the ``OAuth2 access token could not be validated`` phrasing are
|
||||
# xAI's authoritative "this is a stale token, not entitlement"
|
||||
# signal. When either fires we must NOT apply the catch-all
|
||||
# override — refresh is the recoverable path for these bodies, and
|
||||
# blanket-classifying them as entitlement was the bug that left
|
||||
# long-running TUI sessions stuck on stale tokens until the user
|
||||
# exited and reopened.
|
||||
is_entitlement = agent._is_entitlement_failure(error_context, status_code)
|
||||
if not is_entitlement and status_code == 403 and (agent.provider or "") == "xai-oauth":
|
||||
_disambiguator_haystack = " ".join(
|
||||
str(error_context.get(k) or "").lower()
|
||||
for k in ("message", "reason", "code", "error")
|
||||
if isinstance(error_context, dict)
|
||||
)
|
||||
_is_xai_auth_failure = (
|
||||
"[wke=unauthenticated:" in _disambiguator_haystack
|
||||
or "oauth2 access token could not be validated" in _disambiguator_haystack
|
||||
)
|
||||
if not _is_xai_auth_failure:
|
||||
is_entitlement = True
|
||||
is_entitlement = True
|
||||
if is_entitlement:
|
||||
_ra().logger.info(
|
||||
"Credential %s — entitlement-shaped 403 from %s; "
|
||||
@@ -747,7 +728,7 @@ def try_recover_primary_transport(
|
||||
time.sleep(wait_time)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning("Primary transport recovery failed: %s", e)
|
||||
logging.warning("Primary transport recovery failed: %s", e)
|
||||
return False
|
||||
|
||||
# ── End provider fallback ──────────────────────────────────────────────
|
||||
@@ -910,20 +891,19 @@ def restore_primary_runtime(agent) -> bool:
|
||||
base_url=rt["compressor_base_url"],
|
||||
api_key=rt["compressor_api_key"],
|
||||
provider=rt["compressor_provider"],
|
||||
api_mode=rt.get("compressor_api_mode", ""),
|
||||
)
|
||||
|
||||
# ── Reset fallback chain for the new turn ──
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_index = 0
|
||||
|
||||
logger.info(
|
||||
logging.info(
|
||||
"Primary runtime restored for new turn: %s (%s)",
|
||||
agent.model, agent.provider,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning("Failed to restore primary runtime: %s", e)
|
||||
logging.warning("Failed to restore primary runtime: %s", e)
|
||||
return False
|
||||
|
||||
# Which error types indicate a transient transport failure worth
|
||||
@@ -1084,7 +1064,10 @@ def dump_api_request_debug(
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json"
|
||||
atomic_json_write(dump_file, dump_payload, default=str)
|
||||
dump_file.write_text(
|
||||
json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
agent._vprint(f"{agent.log_prefix}🧾 Request debug dump written to: {dump_file}")
|
||||
|
||||
@@ -1094,7 +1077,7 @@ def dump_api_request_debug(
|
||||
return dump_file
|
||||
except Exception as dump_error:
|
||||
if agent.verbose_logging:
|
||||
logger.warning(f"Failed to dump API request debug payload: {dump_error}")
|
||||
logging.warning(f"Failed to dump API request debug payload: {dump_error}")
|
||||
return None
|
||||
|
||||
|
||||
@@ -1369,22 +1352,6 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
||||
# API key — falling back would send Anthropic credentials to third-party endpoints.
|
||||
_is_native_anthropic = new_provider == "anthropic"
|
||||
effective_key = (api_key or agent.api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or agent.api_key or "")
|
||||
|
||||
# MiniMax OAuth: swap static string for a per-request callable token
|
||||
# provider so the rebuilt client survives 15-min token expiry. See
|
||||
# the matching block in agent_init.py for the full rationale.
|
||||
if new_provider == "minimax-oauth" and isinstance(effective_key, str) and effective_key:
|
||||
try:
|
||||
from hermes_cli.auth import build_minimax_oauth_token_provider
|
||||
effective_key = build_minimax_oauth_token_provider()
|
||||
except Exception as _mm_exc: # noqa: BLE001
|
||||
import logging as _logging
|
||||
_logging.getLogger(__name__).warning(
|
||||
"MiniMax OAuth: failed to install per-request token provider "
|
||||
"on switch (%s); using static bearer.",
|
||||
_mm_exc,
|
||||
)
|
||||
|
||||
agent.api_key = effective_key
|
||||
agent._anthropic_api_key = effective_key
|
||||
agent._anthropic_base_url = base_url or getattr(agent, "_anthropic_base_url", None)
|
||||
@@ -1392,7 +1359,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
||||
effective_key, agent._anthropic_base_url,
|
||||
timeout=get_provider_request_timeout(agent.provider, agent.model),
|
||||
)
|
||||
agent._is_anthropic_oauth = _is_oauth_token(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
|
||||
agent._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False
|
||||
agent.client = None
|
||||
agent._client_kwargs = {}
|
||||
else:
|
||||
@@ -1479,7 +1446,6 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
||||
"compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
|
||||
"compressor_provider": getattr(_cc, "provider", agent.provider) if _cc else agent.provider,
|
||||
"compressor_context_length": _cc.context_length if _cc else 0,
|
||||
"compressor_api_mode": getattr(_cc, "api_mode", agent.api_mode) if _cc else agent.api_mode,
|
||||
"compressor_threshold_tokens": _cc.threshold_tokens if _cc else 0,
|
||||
}
|
||||
if api_mode == "anthropic_messages":
|
||||
@@ -1511,7 +1477,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
||||
agent._fallback_chain = fallback_chain
|
||||
agent._fallback_model = fallback_chain[0] if fallback_chain else None
|
||||
|
||||
logger.info(
|
||||
logging.info(
|
||||
"Model switched in-place: %s (%s) -> %s (%s)",
|
||||
old_model, old_provider, new_model, new_provider,
|
||||
)
|
||||
@@ -2150,56 +2116,33 @@ def apply_pending_steer_to_tool_results(agent, messages: list, num_tool_msgs: in
|
||||
|
||||
|
||||
def force_close_tcp_sockets(client: Any) -> int:
|
||||
"""Abort in-flight TCP I/O by shutting down sockets WITHOUT closing FDs.
|
||||
"""Force-close underlying TCP sockets to prevent CLOSE-WAIT accumulation.
|
||||
|
||||
When a provider drops a connection mid-stream — or the user issues an
|
||||
interrupt — we want to unblock httpx's reader/writer immediately rather
|
||||
than waiting for the kernel's per-connection timeout. ``shutdown(SHUT_RDWR)``
|
||||
achieves that: it sends FIN, breaks any pending ``recv``/``send`` with EOF
|
||||
or ``EPIPE``, but does NOT release the file descriptor.
|
||||
When a provider drops a connection mid-stream, httpx's ``client.close()``
|
||||
performs a graceful shutdown which leaves sockets in CLOSE-WAIT until the
|
||||
OS times them out (often minutes). This method walks the httpx transport
|
||||
pool and issues ``socket.shutdown(SHUT_RDWR)`` + ``socket.close()`` to
|
||||
force an immediate TCP RST, freeing the file descriptors.
|
||||
|
||||
Historically this helper also called ``socket.close()`` so the FD got
|
||||
released immediately, but that's unsafe when (as is the case for both the
|
||||
interrupt-abort path and stale-call kill path) the helper runs on a
|
||||
different thread than the one driving the request:
|
||||
|
||||
* The Python ``socket.socket`` we close here is the SAME object held by
|
||||
httpx's pool, so closing it via Python sets its ``_fd`` to -1 and
|
||||
future operations on that Python object fail safely.
|
||||
* BUT the SSL wrapper (``ssl.SSLSocket``'s underlying OpenSSL ``BIO``)
|
||||
caches the raw integer FD. Once ``os.close(fd)`` runs, the kernel may
|
||||
immediately recycle that integer to the next ``open()`` call — e.g.
|
||||
the kanban dispatcher opening ``kanban.db``.
|
||||
* The owning worker thread then unwinds httpx, the SSL layer flushes a
|
||||
pending TLS record, and the encrypted bytes get written into the
|
||||
wrong file (issue #29507: 24-byte TLS application-data record
|
||||
clobbering SQLite header bytes 5..28).
|
||||
|
||||
The fix is to let the owning thread own the close. ``shutdown()`` from any
|
||||
thread is FD-safe; ``close()`` is not. The httpx connection's own close
|
||||
path — which runs from the worker thread when it unwinds — will release
|
||||
the FD via the same ``socket.socket`` object, and because Python's socket
|
||||
close atomically swaps ``_fd`` to -1 *before* issuing ``os.close``, there
|
||||
is no FD-aliasing window when only one thread closes.
|
||||
|
||||
Returns the number of sockets shut down. (Field kept as
|
||||
``tcp_force_closed=N`` in the log line for backwards-compatible parsing.)
|
||||
Returns the number of sockets force-closed.
|
||||
"""
|
||||
import socket as _socket
|
||||
|
||||
shutdown_count = 0
|
||||
closed = 0
|
||||
try:
|
||||
for sock in _iter_pool_sockets(client):
|
||||
try:
|
||||
sock.shutdown(_socket.SHUT_RDWR)
|
||||
except OSError:
|
||||
# Already shut down / not connected / FD invalid — all benign.
|
||||
pass
|
||||
# IMPORTANT (#29507): do NOT call sock.close() here. See docstring.
|
||||
shutdown_count += 1
|
||||
try:
|
||||
sock.close()
|
||||
except OSError:
|
||||
pass
|
||||
closed += 1
|
||||
except Exception as exc:
|
||||
_ra().logger.debug("Force-close TCP sockets sweep error: %s", exc)
|
||||
return shutdown_count
|
||||
return closed
|
||||
|
||||
|
||||
|
||||
|
||||
+227
-255
@@ -1606,155 +1606,182 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
|
||||
return out
|
||||
|
||||
|
||||
def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert an assistant message to Anthropic content blocks.
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Handles thinking blocks, regular content, tool calls, and
|
||||
reasoning_content injection for Kimi/DeepSeek endpoints.
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
content = m.get("content", "")
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
system = None
|
||||
result = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
return {"role": "assistant", "content": effective}
|
||||
|
||||
|
||||
def _convert_tool_message_to_result(
|
||||
result: List[Dict[str, Any]], m: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Convert a tool message to an Anthropic tool_result, merging consecutive
|
||||
results into one user message.
|
||||
|
||||
Mutates ``result`` in place — either appends a new user message or extends
|
||||
the trailing user message's tool_result list.
|
||||
"""
|
||||
content = m.get("content", "")
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
if role == "assistant":
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
result.append({"role": "assistant", "content": effective})
|
||||
continue
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
if role == "tool":
|
||||
# Sanitize tool_use_id and ensure non-empty content.
|
||||
# Computer-use (and other multimodal) tool results arrive as
|
||||
# either a list of OpenAI-style content parts, or a dict
|
||||
# marked `_multimodal` with an embedded `content` list. Convert
|
||||
# both into Anthropic `tool_result` inner blocks (text + image).
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
)
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
continue
|
||||
|
||||
def _convert_user_message(content: Any) -> Dict[str, Any]:
|
||||
"""Validate and convert a user message to anthropic format."""
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
return {"role": "user", "content": converted_blocks}
|
||||
else:
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
return {"role": "user", "content": content}
|
||||
# Regular user message — validate non-empty content (Anthropic rejects empty)
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
# Check if all text blocks are empty
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
result.append({"role": "user", "content": converted_blocks})
|
||||
else:
|
||||
# Validate string content is non-empty
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
result.append({"role": "user", "content": content})
|
||||
|
||||
|
||||
def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
"""Strip tool_use blocks with no matching tool_result, and vice versa.
|
||||
|
||||
Context compression or session truncation can remove either side of a
|
||||
tool-call pair. Anthropic rejects both orphans with HTTP 400.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||||
tool_result_ids = set()
|
||||
for m in result:
|
||||
@@ -1772,7 +1799,10 @@ def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||||
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them)
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them).
|
||||
# This is the mirror of the above: context compression or session truncation
|
||||
# can remove an assistant message containing a tool_use while leaving the
|
||||
# subsequent tool_result intact. Anthropic rejects these with a 400.
|
||||
tool_use_ids = set()
|
||||
for m in result:
|
||||
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||||
@@ -1789,16 +1819,12 @@ def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||||
|
||||
|
||||
def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Merge consecutive same-role messages to enforce Anthropic alternation.
|
||||
|
||||
Returns a new list (caller must rebind ``result``).
|
||||
"""
|
||||
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
||||
fixed = []
|
||||
for m in result:
|
||||
if fixed and fixed[-1]["role"] == m["role"]:
|
||||
if m["role"] == "user":
|
||||
# Merge consecutive user messages
|
||||
prev_content = fixed[-1]["content"]
|
||||
curr_content = m["content"]
|
||||
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
||||
@@ -1806,6 +1832,7 @@ def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any
|
||||
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
||||
fixed[-1]["content"] = prev_content + curr_content
|
||||
else:
|
||||
# Mixed types — wrap string in list
|
||||
if isinstance(prev_content, str):
|
||||
prev_content = [{"type": "text", "text": prev_content}]
|
||||
if isinstance(curr_content, str):
|
||||
@@ -1828,6 +1855,7 @@ def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any
|
||||
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||||
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||||
else:
|
||||
# Mixed types — normalize both to list and merge
|
||||
if isinstance(prev_blocks, str):
|
||||
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
||||
if isinstance(curr_blocks, str):
|
||||
@@ -1835,34 +1863,37 @@ def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any
|
||||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||||
else:
|
||||
fixed.append(m)
|
||||
return fixed
|
||||
result = fixed
|
||||
|
||||
|
||||
def _manage_thinking_signatures(
|
||||
result: List[Dict[str, Any]], base_url: str | None, model: str | None
|
||||
) -> None:
|
||||
"""Strip or preserve thinking blocks based on endpoint type.
|
||||
|
||||
Anthropic signs thinking blocks against the full turn content.
|
||||
Any upstream mutation (context compression, session truncation, orphan
|
||||
stripping, message merging) invalidates the signature, causing HTTP 400
|
||||
"Invalid signature in thinking block".
|
||||
|
||||
Signatures are Anthropic-proprietary. Third-party endpoints (MiniMax,
|
||||
Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
|
||||
and will reject them outright. Kimi's /coding and DeepSeek's /anthropic
|
||||
endpoints speak the Anthropic protocol upstream but require unsigned
|
||||
thinking blocks (synthesised from ``reasoning_content``) to round-trip on
|
||||
replayed assistant tool-call messages. See hermes-agent#13848 (Kimi) and
|
||||
hermes-agent#16748 (DeepSeek).
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# ── Thinking block signature management ──────────────────────────
|
||||
# Anthropic signs thinking blocks against the full turn content.
|
||||
# Any upstream mutation (context compression, session truncation,
|
||||
# orphan stripping, message merging) invalidates the signature,
|
||||
# causing HTTP 400 "Invalid signature in thinking block".
|
||||
#
|
||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
# thinking blocks if it supports extended thinking.
|
||||
#
|
||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||||
# EXCEPT the last one — preserves reasoning continuity on the
|
||||
# current tool-use chain while avoiding stale signature errors.
|
||||
# 2. Downgrade unsigned thinking blocks (no signature) to text —
|
||||
# Anthropic can't validate them and will reject them.
|
||||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
# Kimi / DeepSeek share a contract: strip signed Anthropic blocks
|
||||
# (neither upstream can validate Anthropic signatures), preserve unsigned
|
||||
# ones synthesised from reasoning_content. See #13848, #16748.
|
||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
||||
_preserve_unsigned_thinking = (
|
||||
_is_kimi_family_endpoint(base_url, model)
|
||||
or _is_deepseek_anthropic_endpoint(base_url)
|
||||
@@ -1879,19 +1910,26 @@ def _manage_thinking_signatures(
|
||||
continue
|
||||
|
||||
if _preserve_unsigned_thinking:
|
||||
# Kimi / DeepSeek: strip signed, preserve unsigned.
|
||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
||||
# thinking server-side and require unsigned thinking blocks on
|
||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
||||
# blocks (neither upstream can validate Anthropic signatures) but
|
||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Signed (or redacted-with-data) — upstream can't validate, strip.
|
||||
# Anthropic-signed block — upstream can't validate, strip
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: the upstream needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party: strip ALL thinking blocks (signatures are proprietary).
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
stripped = [
|
||||
b for b in m["content"]
|
||||
@@ -1899,21 +1937,24 @@ def _manage_thinking_signatures(
|
||||
]
|
||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||
else:
|
||||
# Latest assistant on direct Anthropic: keep signed, downgrade unsigned
|
||||
# to text so the reasoning isn't lost.
|
||||
# Latest assistant on direct Anthropic: keep signed thinking
|
||||
# blocks for reasoning continuity; downgrade unsigned ones to
|
||||
# plain text.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("type") == "redacted_thinking":
|
||||
# Redacted blocks use 'data' for the signature payload —
|
||||
# drop the block when 'data' is missing (can't be validated).
|
||||
# Redacted blocks use 'data' for the signature payload
|
||||
if b.get("data"):
|
||||
new_content.append(b)
|
||||
# else: drop — no data means it can't be validated
|
||||
elif b.get("signature"):
|
||||
# Signed thinking block — keep it
|
||||
new_content.append(b)
|
||||
else:
|
||||
# Unsigned thinking — downgrade to text so it's not lost
|
||||
thinking_text = b.get("thinking", "")
|
||||
if thinking_text:
|
||||
new_content.append({"type": "text", "text": thinking_text})
|
||||
@@ -1925,15 +1966,12 @@ def _manage_thinking_signatures(
|
||||
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
||||
b.pop("cache_control", None)
|
||||
|
||||
|
||||
def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
|
||||
"""Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
|
||||
|
||||
Base64 images cost ~1,465 tokens each and accumulate across tool calls.
|
||||
Walk backward, keep the most recent N, replace older ones with a placeholder.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# ── Image eviction: keep only the most recent N screenshots ─────
|
||||
# computer_use screenshots (base64 images) sit inside tool_result
|
||||
# blocks: they accumulate and are sent with every API call. Each
|
||||
# costs ~1,465 tokens; after 10+ the conversation becomes slow
|
||||
# even for simple text queries. Walk backward, keep the most recent
|
||||
# _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
|
||||
_MAX_KEEP_IMAGES = 3
|
||||
_image_count = 0
|
||||
for msg in reversed(result):
|
||||
@@ -1960,68 +1998,6 @@ def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
|
||||
for b in inner
|
||||
]
|
||||
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
system = None
|
||||
result: List[Dict[str, Any]] = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
result.append(_convert_assistant_message(m))
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
_convert_tool_message_to_result(result, m)
|
||||
continue
|
||||
|
||||
# Regular user message
|
||||
result.append(_convert_user_message(content))
|
||||
|
||||
_strip_orphaned_tool_blocks(result)
|
||||
result = _merge_consecutive_roles(result)
|
||||
_manage_thinking_signatures(result, base_url, model)
|
||||
_evict_old_screenshots(result)
|
||||
|
||||
return system, result
|
||||
|
||||
|
||||
@@ -2122,13 +2098,9 @@ def build_anthropic_kwargs(
|
||||
block["text"] = text
|
||||
|
||||
# 3. Prefix tool names with mcp_ (Claude Code convention)
|
||||
# Skip names that already begin with the marker — native MCP server
|
||||
# tools (from mcp_servers: in config.yaml) are registered under their
|
||||
# full mcp_<server>_<tool> name and would double-prefix otherwise,
|
||||
# breaking round-trip registry lookup in normalize_response. GH-25255.
|
||||
if anthropic_tools:
|
||||
for tool in anthropic_tools:
|
||||
if "name" in tool and not tool["name"].startswith(_MCP_TOOL_PREFIX):
|
||||
if "name" in tool:
|
||||
tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
|
||||
|
||||
# 4. Prefix tool names in message history (tool_use and tool_result blocks)
|
||||
|
||||
+2
-116
@@ -3730,37 +3730,6 @@ _VISION_AUTO_PROVIDER_ORDER = (
|
||||
)
|
||||
|
||||
|
||||
def _main_model_supports_vision(provider: str, model: Optional[str]) -> bool:
|
||||
"""Return True when ``provider``/``model`` is known to accept image input.
|
||||
|
||||
Used by the vision auto-detect chain to skip the user's main provider
|
||||
when it's known to be text-only (e.g. DeepSeek, gpt-oss without vision).
|
||||
Without this guard, ``resolve_vision_provider_client(provider="auto")``
|
||||
would happily return the main-provider client and any subsequent image
|
||||
payload would surface as a cryptic provider-side error
|
||||
(``unknown variant `image_url`, expected `text```, #31179).
|
||||
|
||||
Returns True when capability lookup is unknown — preserves the historical
|
||||
behaviour of attempting the call, so providers we haven't catalogued yet
|
||||
don't silently regress to text-only.
|
||||
"""
|
||||
try:
|
||||
from agent.image_routing import _lookup_supports_vision
|
||||
from hermes_cli.config import load_config
|
||||
except ImportError:
|
||||
return True
|
||||
try:
|
||||
supports = _lookup_supports_vision(provider, model, load_config())
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return True
|
||||
if supports is None:
|
||||
# No capability data — keep current behaviour and let the call attempt
|
||||
# happen rather than silently skipping. This avoids false-positive
|
||||
# skips for new/custom providers.
|
||||
return True
|
||||
return bool(supports)
|
||||
|
||||
|
||||
def _normalize_vision_provider(provider: Optional[str]) -> str:
|
||||
return _normalize_aux_provider(provider)
|
||||
|
||||
@@ -3901,23 +3870,6 @@ def resolve_vision_provider_client(
|
||||
"vision support) — falling through to aggregator chain",
|
||||
main_provider,
|
||||
)
|
||||
elif not _main_model_supports_vision(main_provider, vision_model):
|
||||
# The main model is known to be text-only (e.g. DeepSeek V4,
|
||||
# gpt-oss-120b without vision). Building a client and sending
|
||||
# an image would produce a cryptic provider-side error like
|
||||
# ``unknown variant `image_url`, expected `text``` (#31179).
|
||||
# Fall through to the aggregator chain instead.
|
||||
#
|
||||
# Only log the provider name (not the model) — mirrors the
|
||||
# sibling _PROVIDERS_WITHOUT_VISION branch above, and avoids
|
||||
# CodeQL py/clear-text-logging-sensitive-data heuristic false
|
||||
# positives on multi-value interpolations.
|
||||
logger.debug(
|
||||
"Vision auto-detect: skipping main provider %s "
|
||||
"(reports no vision capability) — falling through to "
|
||||
"aggregator chain",
|
||||
main_provider,
|
||||
)
|
||||
else:
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
@@ -4329,23 +4281,6 @@ def _get_cached_client(
|
||||
return client, model or default_model
|
||||
|
||||
|
||||
# Aliases that target direct REST APIs not modeled as first-class providers
|
||||
# in PROVIDER_REGISTRY. Used for ``auxiliary.<task>.provider`` so users can
|
||||
# write the obvious name and have it resolve to a working ``custom`` endpoint
|
||||
# without needing to know our internal provider IDs.
|
||||
#
|
||||
# Why these specifically: PROVIDER_REGISTRY has ``openai-codex`` (OAuth) and
|
||||
# ``custom`` (manual base_url + OPENAI_API_KEY) but no plain ``openai`` for
|
||||
# direct API-key access. Users predictably type ``provider: openai`` and
|
||||
# expect it to use OPENAI_API_KEY against api.openai.com. Previously this
|
||||
# silently fell back to the user's main provider, sending OpenAI model names
|
||||
# to e.g. DeepSeek and producing cryptic ``unknown variant 'image_url'``
|
||||
# errors (issue #31179).
|
||||
_AUX_DIRECT_API_BASE_URLS: Dict[str, str] = {
|
||||
"openai": "https://api.openai.com/v1",
|
||||
}
|
||||
|
||||
|
||||
def _resolve_task_provider_model(
|
||||
task: str = None,
|
||||
provider: str = None,
|
||||
@@ -4382,25 +4317,6 @@ def _resolve_task_provider_model(
|
||||
resolved_model = model or cfg_model
|
||||
resolved_api_mode = cfg_api_mode
|
||||
|
||||
# Convenience aliases for direct API-key endpoints that aren't first-class
|
||||
# providers (e.g. ``provider: openai`` → custom + api.openai.com/v1).
|
||||
# Applied to both explicit args and config-derived values. When the user
|
||||
# has already supplied a base_url we keep their endpoint but still rewrite
|
||||
# the provider to ``custom`` so resolution doesn't hit the
|
||||
# PROVIDER_REGISTRY-only path (which has no ``openai`` entry).
|
||||
def _expand_direct_api_alias(prov: Optional[str], existing_base: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
|
||||
if not prov:
|
||||
return prov, existing_base
|
||||
target_base = _AUX_DIRECT_API_BASE_URLS.get(prov.strip().lower())
|
||||
if target_base is None:
|
||||
return prov, existing_base
|
||||
return "custom", existing_base or target_base
|
||||
|
||||
if provider:
|
||||
provider, base_url = _expand_direct_api_alias(provider, base_url)
|
||||
if cfg_provider:
|
||||
cfg_provider, cfg_base_url = _expand_direct_api_alias(cfg_provider, cfg_base_url)
|
||||
|
||||
if base_url:
|
||||
return "custom", resolved_model, base_url, api_key, resolved_api_mode
|
||||
if provider:
|
||||
@@ -4428,17 +4344,7 @@ _DEFAULT_AUX_TIMEOUT = 30.0
|
||||
|
||||
|
||||
def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
|
||||
"""Return the config dict for auxiliary.<task>, or {} when unavailable.
|
||||
|
||||
For plugin-registered auxiliary tasks (see
|
||||
:meth:`hermes_cli.plugins.PluginContext.register_auxiliary_task`) the
|
||||
plugin's declared *defaults* are layered underneath the user's config
|
||||
so an unconfigured plugin task still works:
|
||||
|
||||
plugin defaults ← config.yaml auxiliary.<task> (user wins)
|
||||
|
||||
Built-in tasks ignore this path (their defaults live in DEFAULT_CONFIG).
|
||||
"""
|
||||
"""Return the config dict for auxiliary.<task>, or {} when unavailable."""
|
||||
if not task:
|
||||
return {}
|
||||
try:
|
||||
@@ -4448,27 +4354,7 @@ def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
|
||||
return {}
|
||||
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||
if not isinstance(task_config, dict):
|
||||
task_config = {}
|
||||
|
||||
# Layer plugin-declared defaults underneath user config so
|
||||
# ctx.register_auxiliary_task(defaults={...}) takes effect without
|
||||
# forcing the user to write config.yaml entries.
|
||||
try:
|
||||
from hermes_cli.plugins import get_plugin_auxiliary_tasks
|
||||
for _entry in get_plugin_auxiliary_tasks():
|
||||
if _entry.get("key") == task:
|
||||
_defaults = _entry.get("defaults") or {}
|
||||
if isinstance(_defaults, dict):
|
||||
merged = dict(_defaults)
|
||||
merged.update(task_config)
|
||||
return merged
|
||||
break
|
||||
except Exception:
|
||||
# Plugin discovery failure must not break aux task config reads.
|
||||
pass
|
||||
|
||||
return task_config
|
||||
return task_config if isinstance(task_config, dict) else {}
|
||||
|
||||
|
||||
def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
|
||||
|
||||
@@ -115,10 +115,7 @@ _SKILL_REVIEW_PROMPT = (
|
||||
"Protected skills (DO NOT edit these):\n"
|
||||
" • Bundled skills (shipped with Hermes, e.g. 'hermes-agent').\n"
|
||||
" • Hub-installed skills (installed via 'hermes skills install').\n"
|
||||
"Pinned skills (marked via 'hermes curator pin') CAN be improved — "
|
||||
"pin only blocks deletion/archive/consolidation by the curator, not "
|
||||
"content updates. Patch them when a pitfall or missing step turns up, "
|
||||
"same as any other agent-created skill.\n"
|
||||
" • Pinned skills (marked via 'hermes curator pin').\n"
|
||||
"If the only skills that need updating are protected, say\n"
|
||||
"'Nothing to save.' and stop.\n\n"
|
||||
"Do NOT capture (these become persistent self-imposed constraints "
|
||||
@@ -201,10 +198,7 @@ _COMBINED_REVIEW_PROMPT = (
|
||||
"Protected skills (DO NOT edit these):\n"
|
||||
" • Bundled skills (shipped with Hermes, e.g. 'hermes-agent').\n"
|
||||
" • Hub-installed skills (installed via 'hermes skills install').\n"
|
||||
"Pinned skills (marked via 'hermes curator pin') CAN be improved — "
|
||||
"pin only blocks deletion/archive/consolidation by the curator, not "
|
||||
"content updates. Patch them when a pitfall or missing step turns up, "
|
||||
"same as any other agent-created skill.\n"
|
||||
" • Pinned skills (marked via 'hermes curator pin').\n"
|
||||
"If the only skills that need updating are protected, say\n"
|
||||
"'Nothing to save.' and stop.\n\n"
|
||||
"Do NOT capture as skills (these become persistent self-imposed "
|
||||
|
||||
@@ -91,55 +91,23 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
provider fallback.
|
||||
"""
|
||||
result = {"response": None, "error": None}
|
||||
request_client_holder = {"client": None, "owner_tid": None}
|
||||
request_client_holder = {"client": None}
|
||||
request_client_lock = threading.Lock()
|
||||
|
||||
def _set_request_client(client):
|
||||
with request_client_lock:
|
||||
request_client_holder["client"] = client
|
||||
# #29507: stamp the owning thread so a stranger-thread interrupt
|
||||
# only shuts the connection down rather than racing the worker
|
||||
# for FD ownership during ``client.close()``.
|
||||
request_client_holder["owner_tid"] = threading.get_ident()
|
||||
return client
|
||||
|
||||
def _take_request_client():
|
||||
with request_client_lock:
|
||||
client = request_client_holder.get("client")
|
||||
request_client_holder["client"] = None
|
||||
request_client_holder["owner_tid"] = None
|
||||
return client
|
||||
|
||||
def _close_request_client_once(reason: str) -> None:
|
||||
# #29507: dispatch on the calling thread.
|
||||
#
|
||||
# When ``_call`` (the worker) reaches its ``finally`` it owns the
|
||||
# close and we pop + fully close as before. When a *stranger* thread
|
||||
# (the interrupt-check loop, the stale-call detector) drives the
|
||||
# close, only shut the sockets down so the worker's blocked
|
||||
# ``recv``/``send`` unwinds with an ``EPIPE`` / EOF — and let the
|
||||
# worker close ``client`` from its own thread on its way out. That
|
||||
# avoids the FD-recycling race where the kernel reassigned a
|
||||
# just-closed TLS socket FD to ``kanban.db``, and the still-live SSL
|
||||
# BIO on the worker thread then wrote a 24-byte TLS application-data
|
||||
# record into the SQLite header (#29507).
|
||||
with request_client_lock:
|
||||
request_client = request_client_holder.get("client")
|
||||
owner_tid = request_client_holder.get("owner_tid")
|
||||
stranger_thread = (
|
||||
request_client is not None
|
||||
and owner_tid is not None
|
||||
and owner_tid != threading.get_ident()
|
||||
)
|
||||
if not stranger_thread:
|
||||
# Owning thread (or no recorded owner) → pop and fully close.
|
||||
request_client_holder["client"] = None
|
||||
request_client_holder["owner_tid"] = None
|
||||
if request_client is None:
|
||||
return
|
||||
if stranger_thread:
|
||||
agent._abort_request_openai_client(request_client, reason=reason)
|
||||
else:
|
||||
request_client = _take_request_client()
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason=reason)
|
||||
|
||||
def _call():
|
||||
@@ -757,7 +725,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
||||
current_base_url = str(getattr(agent, "base_url", "") or "").rstrip("/").lower()
|
||||
fb_base_url_for_dedup = (fb.get("base_url") or "").strip().rstrip("/").lower()
|
||||
if fb_provider == current_provider and fb_model == current_model:
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"Fallback skip: chain entry %s/%s matches current provider/model",
|
||||
fb_provider, fb_model,
|
||||
)
|
||||
@@ -768,7 +736,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
||||
and fb_base_url_for_dedup == current_base_url
|
||||
and fb_model == current_model
|
||||
):
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"Fallback skip: chain entry base_url %s matches current backend",
|
||||
fb_base_url_for_dedup,
|
||||
)
|
||||
@@ -800,7 +768,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
||||
explicit_base_url=fb_base_url_hint,
|
||||
explicit_api_key=fb_api_key_hint)
|
||||
if fb_client is None:
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"Fallback to %s failed: provider not configured",
|
||||
fb_provider)
|
||||
return agent._try_activate_fallback() # try next in chain
|
||||
@@ -808,11 +776,8 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
||||
from hermes_cli.model_normalize import normalize_model_for_provider
|
||||
|
||||
fb_model = normalize_model_for_provider(fb_model, fb_provider)
|
||||
except Exception as _norm_err:
|
||||
logger.warning(
|
||||
"Could not normalize fallback model %r for provider %r: %s",
|
||||
fb_model, fb_provider, _norm_err,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Determine api_mode from provider / base URL / model
|
||||
fb_api_mode = "chat_completions"
|
||||
@@ -940,20 +905,19 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
||||
base_url=agent.base_url,
|
||||
api_key=getattr(agent, "api_key", ""), # callable preserved → call_llm
|
||||
provider=agent.provider,
|
||||
api_mode=agent.api_mode,
|
||||
)
|
||||
|
||||
agent._emit_status(
|
||||
f"🔄 Primary model failed — switching to fallback: "
|
||||
f"{fb_model} via {fb_provider}"
|
||||
)
|
||||
logger.info(
|
||||
logging.info(
|
||||
"Fallback activated: %s → %s (%s)",
|
||||
old_model, fb_model, fb_provider,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("Failed to activate fallback %s: %s", fb_model, e)
|
||||
logging.error("Failed to activate fallback %s: %s", fb_model, e)
|
||||
return agent._try_activate_fallback() # try next in chain
|
||||
|
||||
|
||||
@@ -1169,7 +1133,7 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
|
||||
final_response = "I reached the iteration limit and couldn't generate a summary."
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get summary response: {e}")
|
||||
logging.warning(f"Failed to get summary response: {e}")
|
||||
final_response = f"I reached the maximum iterations ({agent.max_iterations}) but couldn't summarize. Error: {str(e)}"
|
||||
|
||||
return final_response
|
||||
@@ -1198,12 +1162,12 @@ def cleanup_task_resources(agent, task_id: str) -> None:
|
||||
_ra().cleanup_vm(task_id)
|
||||
except Exception as e:
|
||||
if agent.verbose_logging:
|
||||
logger.warning(f"Failed to cleanup VM for task {task_id}: {e}")
|
||||
logging.warning(f"Failed to cleanup VM for task {task_id}: {e}")
|
||||
try:
|
||||
_ra().cleanup_browser(task_id)
|
||||
except Exception as e:
|
||||
if agent.verbose_logging:
|
||||
logger.warning(f"Failed to cleanup browser for task {task_id}: {e}")
|
||||
logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")
|
||||
|
||||
|
||||
|
||||
@@ -1307,44 +1271,23 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
return result["response"]
|
||||
|
||||
result = {"response": None, "error": None, "partial_tool_names": []}
|
||||
request_client_holder = {"client": None, "diag": None, "owner_tid": None}
|
||||
request_client_holder = {"client": None, "diag": None}
|
||||
request_client_lock = threading.Lock()
|
||||
|
||||
def _set_request_client(client):
|
||||
with request_client_lock:
|
||||
request_client_holder["client"] = client
|
||||
# See #29507 explanation in the non-streaming variant above.
|
||||
request_client_holder["owner_tid"] = threading.get_ident()
|
||||
return client
|
||||
|
||||
def _take_request_client():
|
||||
with request_client_lock:
|
||||
client = request_client_holder.get("client")
|
||||
request_client_holder["client"] = None
|
||||
request_client_holder["owner_tid"] = None
|
||||
return client
|
||||
|
||||
def _close_request_client_once(reason: str) -> None:
|
||||
# See #29507 explanation in the non-streaming variant above. A
|
||||
# stranger thread (the interrupt-check / stale-stream detector loop)
|
||||
# only aborts sockets — never pops, never calls ``client.close()`` —
|
||||
# so the worker thread retains ownership of the FD release.
|
||||
with request_client_lock:
|
||||
request_client = request_client_holder.get("client")
|
||||
owner_tid = request_client_holder.get("owner_tid")
|
||||
stranger_thread = (
|
||||
request_client is not None
|
||||
and owner_tid is not None
|
||||
and owner_tid != threading.get_ident()
|
||||
)
|
||||
if not stranger_thread:
|
||||
request_client_holder["client"] = None
|
||||
request_client_holder["owner_tid"] = None
|
||||
if request_client is None:
|
||||
return
|
||||
if stranger_thread:
|
||||
agent._abort_request_openai_client(request_client, reason=reason)
|
||||
else:
|
||||
request_client = _take_request_client()
|
||||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason=reason)
|
||||
|
||||
first_delta_fired = {"done": False}
|
||||
@@ -2077,21 +2020,8 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
# Streaming failed AFTER some tokens were already delivered to
|
||||
# the platform. Re-raising would let the outer retry loop make
|
||||
# a new API call, creating a duplicate message. Return a
|
||||
# partial response stub instead and let the outer loop decide:
|
||||
#
|
||||
# - text-only partials → finish_reason="length" so the
|
||||
# conversation loop persists the partial assistant content
|
||||
# and asks the model to continue from where the stream
|
||||
# died (issue #30963: partial stop misclassified as a
|
||||
# clean completion was exiting the loop with budget
|
||||
# remaining and an unfinished goal).
|
||||
#
|
||||
# - partial mid-tool-call → finish_reason="stop" stays.
|
||||
# The user-visible warning we append says "Ask me to
|
||||
# retry if you want to continue", so the agent should
|
||||
# hand control back rather than auto-retry a tool call
|
||||
# that may have side-effects.
|
||||
#
|
||||
# partial "stop" response instead so the outer loop treats this
|
||||
# turn as complete (no retry, no fallback).
|
||||
# Recover whatever content was already streamed to the user.
|
||||
# _current_streamed_assistant_text accumulates text fired
|
||||
# through _fire_stream_delta, so it has exactly what the
|
||||
@@ -2129,17 +2059,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
"of text; surfaced warning to user: %s",
|
||||
_partial_names, len(_partial_text or ""), result["error"],
|
||||
)
|
||||
_stub_finish_reason = "stop"
|
||||
else:
|
||||
logger.warning(
|
||||
"Partial stream delivered before error; returning "
|
||||
"length-truncated stub with %s chars of recovered "
|
||||
"content so the loop can continue from where the "
|
||||
"stream died: %s",
|
||||
"Partial stream delivered before error; returning stub "
|
||||
"response with %s chars of recovered content to prevent "
|
||||
"duplicate messages: %s",
|
||||
len(_partial_text or ""),
|
||||
result["error"],
|
||||
)
|
||||
_stub_finish_reason = "length"
|
||||
_stub_msg = SimpleNamespace(
|
||||
role="assistant", content=_partial_text, tool_calls=None,
|
||||
reasoning_content=None,
|
||||
@@ -2148,7 +2075,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
id="partial-stream-stub",
|
||||
model=getattr(agent, "model", "unknown"),
|
||||
choices=[SimpleNamespace(
|
||||
index=0, message=_stub_msg, finish_reason=_stub_finish_reason,
|
||||
index=0, message=_stub_msg, finish_reason="stop",
|
||||
)],
|
||||
usage=None,
|
||||
)
|
||||
|
||||
@@ -609,7 +609,6 @@ class ContextCompressor(ContextEngine):
|
||||
"""Update tracked token usage from API response."""
|
||||
self.last_prompt_tokens = usage.get("prompt_tokens", 0)
|
||||
self.last_completion_tokens = usage.get("completion_tokens", 0)
|
||||
self.last_total_tokens = usage.get("total_tokens", self.last_prompt_tokens + self.last_completion_tokens)
|
||||
|
||||
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||
"""Check if context exceeds the compression threshold.
|
||||
@@ -898,7 +897,7 @@ class ContextCompressor(ContextEngine):
|
||||
into the warning log.
|
||||
"""
|
||||
self._summary_model_fallen_back = True
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"Summary model '%s' %s (%s). "
|
||||
"Falling back to main model '%s' for compression.",
|
||||
self.summary_model, reason, e, self.model,
|
||||
@@ -1087,7 +1086,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||
logger.warning("Context compression: no provider available for "
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS)
|
||||
@@ -1183,7 +1182,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
e,
|
||||
|
||||
@@ -200,7 +200,6 @@ class ContextEngine(ABC):
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
provider: str = "",
|
||||
api_mode: str = "",
|
||||
) -> None:
|
||||
"""Called when the user switches models or on fallback activation.
|
||||
|
||||
|
||||
@@ -381,12 +381,12 @@ def compress_context(
|
||||
agent._session_db.end_session(agent.session_id, "compression")
|
||||
old_session_id = agent.session_id
|
||||
agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
try:
|
||||
from gateway.session_context import set_current_session_id
|
||||
|
||||
set_current_session_id(agent.session_id)
|
||||
from gateway.session_context import _SESSION_ID
|
||||
_SESSION_ID.set(agent.session_id)
|
||||
except Exception:
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
pass
|
||||
agent._session_db_created = False
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
@@ -504,7 +504,7 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
|
||||
return False
|
||||
|
||||
try:
|
||||
from tools.vision_tools import _resize_image_for_vision
|
||||
from tools.vision_tools import _resize_image_for_vision, _is_anthropic_provider
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: vision_tools unavailable — %s", exc)
|
||||
return False
|
||||
@@ -546,6 +546,7 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
|
||||
Path(tmp.name),
|
||||
mime_type=mime,
|
||||
max_base64_bytes=target_bytes,
|
||||
clamp_dimensions=_is_anthropic_provider(),
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
|
||||
+25
-189
@@ -46,7 +46,6 @@ from agent.message_sanitization import (
|
||||
_strip_non_ascii,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
estimate_messages_tokens_rough,
|
||||
estimate_request_tokens_rough,
|
||||
get_next_probe_tier,
|
||||
@@ -74,50 +73,6 @@ from utils import base_url_host_matches, env_var_enabled
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
|
||||
"""Return a user-facing error when Ollama is loaded with too little context."""
|
||||
if not getattr(agent, "tools", None):
|
||||
return None
|
||||
|
||||
runtime_ctx = getattr(agent, "_ollama_num_ctx", None)
|
||||
if not isinstance(runtime_ctx, int) or runtime_ctx <= 0:
|
||||
return None
|
||||
if runtime_ctx >= MINIMUM_CONTEXT_LENGTH:
|
||||
return None
|
||||
|
||||
model = getattr(agent, "model", "") or "the selected model"
|
||||
base_url = getattr(agent, "base_url", "") or "unknown base URL"
|
||||
provider = getattr(agent, "provider", "") or "unknown"
|
||||
tool_count = len(getattr(agent, "tools", None) or [])
|
||||
|
||||
logger.warning(
|
||||
"Ollama runtime context too small for Hermes tool use: "
|
||||
"model=%s provider=%s base_url=%s runtime_context=%d "
|
||||
"minimum_context=%d estimated_request_tokens=%d tool_count=%d "
|
||||
"session=%s",
|
||||
model,
|
||||
provider,
|
||||
base_url,
|
||||
runtime_ctx,
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
request_tokens,
|
||||
tool_count,
|
||||
getattr(agent, "session_id", None) or "none",
|
||||
)
|
||||
|
||||
return (
|
||||
f"Ollama loaded `{model}` with only {runtime_ctx:,} tokens of runtime "
|
||||
f"context, but Hermes needs at least {MINIMUM_CONTEXT_LENGTH:,} tokens "
|
||||
"for reliable tool use.\n\n"
|
||||
"Increase the Ollama context for this model and restart/reload the "
|
||||
"model before trying again. A known-good starting point is 65,536 "
|
||||
"tokens. In Hermes config, set `model.ollama_num_ctx: 65536` "
|
||||
"(and `model.context_length: 65536` if you also override the displayed "
|
||||
"model context). If you manage the model through an Ollama Modelfile, "
|
||||
"set `PARAMETER num_ctx 65536` there instead."
|
||||
)
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to ``run_agent`` so callers can patch
|
||||
``run_agent.handle_function_call`` / ``run_agent._set_interrupt`` /
|
||||
@@ -572,7 +527,6 @@ def run_conversation(
|
||||
api_call_count = 0
|
||||
final_response = None
|
||||
interrupted = False
|
||||
failed = False
|
||||
codex_ack_continuations = 0
|
||||
length_continue_retries = 0
|
||||
truncated_tool_call_retries = 0
|
||||
@@ -929,26 +883,6 @@ def run_conversation(
|
||||
# Calculate approximate request size for logging
|
||||
total_chars = sum(len(str(msg)) for msg in api_messages)
|
||||
approx_tokens = estimate_messages_tokens_rough(api_messages)
|
||||
approx_request_tokens = estimate_request_tokens_rough(
|
||||
api_messages, tools=agent.tools or None
|
||||
)
|
||||
|
||||
_runtime_context_error = _ollama_context_limit_error(
|
||||
agent, approx_request_tokens
|
||||
)
|
||||
if _runtime_context_error:
|
||||
final_response = _runtime_context_error
|
||||
failed = True
|
||||
_turn_exit_reason = "ollama_runtime_context_too_small"
|
||||
messages.append({"role": "assistant", "content": final_response})
|
||||
agent._emit_status("❌ Ollama runtime context is too small for Hermes tool use")
|
||||
api_call_count -= 1
|
||||
agent._api_call_count = api_call_count
|
||||
try:
|
||||
agent.iteration_budget.refund()
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
|
||||
# Thinking spinner for quiet mode (animated during API call)
|
||||
thinking_spinner = None
|
||||
@@ -989,7 +923,6 @@ def run_conversation(
|
||||
copilot_auth_retry_attempted=False
|
||||
thinking_sig_retry_attempted = False
|
||||
image_shrink_retry_attempted = False
|
||||
multimodal_tool_content_retry_attempted = False
|
||||
oauth_1m_beta_retry_attempted = False
|
||||
llama_cpp_grammar_retry_attempted = False
|
||||
has_retried_429 = False
|
||||
@@ -1183,7 +1116,7 @@ def run_conversation(
|
||||
else str(_codex_error_obj) if _codex_error_obj
|
||||
else f"Responses API returned status '{_codex_resp_status}'"
|
||||
)
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"Codex response status='%s' (error=%s). Routing to fallback. %s",
|
||||
_codex_resp_status, _codex_error_msg,
|
||||
agent._client_log_context(),
|
||||
@@ -1335,7 +1268,7 @@ def run_conversation(
|
||||
primary_recovery_attempted = False
|
||||
continue
|
||||
agent._emit_status(f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
|
||||
logger.error(f"{agent.log_prefix}Invalid API response after {max_retries} retries.")
|
||||
logging.error(f"{agent.log_prefix}Invalid API response after {max_retries} retries.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
@@ -1348,7 +1281,7 @@ def run_conversation(
|
||||
# Backoff before retry — jittered exponential: 5s base, 120s cap
|
||||
wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
|
||||
agent._vprint(f"{agent.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True)
|
||||
logger.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
|
||||
logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
|
||||
|
||||
# Sleep in small increments to stay responsive to interrupts
|
||||
sleep_end = time.time() + wait_time
|
||||
@@ -1414,18 +1347,7 @@ def run_conversation(
|
||||
finish_reason = "length"
|
||||
|
||||
if finish_reason == "length":
|
||||
if getattr(response, "id", "") == "partial-stream-stub":
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ Stream interrupted by network error "
|
||||
f"(finish_reason='length' on partial-stream-stub)",
|
||||
force=True,
|
||||
)
|
||||
else:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ Response truncated "
|
||||
f"(finish_reason='length') - model hit max output tokens",
|
||||
force=True,
|
||||
)
|
||||
agent._vprint(f"{agent.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True)
|
||||
|
||||
# Normalize the truncated response to a single OpenAI-style
|
||||
# message shape so text-continuation and tool-call retry
|
||||
@@ -1518,40 +1440,17 @@ def run_conversation(
|
||||
truncated_response_parts.append(assistant_message.content)
|
||||
|
||||
if length_continue_retries < 3:
|
||||
# Distinguish a real output-token truncation
|
||||
# from a partial-stream-stub network error
|
||||
# (#30963). Same continuation machinery,
|
||||
# but the prompt has to tell the truth or
|
||||
# the model goes off rails ("I wasn't
|
||||
# truncated, I'm done").
|
||||
_is_partial_stream_stub = (
|
||||
getattr(response, "id", "") == "partial-stream-stub"
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}↻ Requesting continuation "
|
||||
f"({length_continue_retries}/3)..."
|
||||
)
|
||||
if _is_partial_stream_stub:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}↻ Stream interrupted — "
|
||||
f"requesting continuation "
|
||||
f"({length_continue_retries}/3)..."
|
||||
)
|
||||
_continue_content = (
|
||||
"[System: The previous response was cut off by a "
|
||||
"network error mid-stream. Continue exactly where "
|
||||
"you left off. Do not restart or repeat prior text. "
|
||||
"Finish the answer directly.]"
|
||||
)
|
||||
else:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}↻ Requesting continuation "
|
||||
f"({length_continue_retries}/3)..."
|
||||
)
|
||||
_continue_content = (
|
||||
continue_msg = {
|
||||
"role": "user",
|
||||
"content": (
|
||||
"[System: Your previous response was truncated by the output "
|
||||
"length limit. Continue exactly where you left off. Do not "
|
||||
"restart or repeat prior text. Finish the answer directly.]"
|
||||
)
|
||||
continue_msg = {
|
||||
"role": "user",
|
||||
"content": _continue_content,
|
||||
),
|
||||
}
|
||||
messages.append(continue_msg)
|
||||
agent._session_messages = messages
|
||||
@@ -2095,31 +1994,6 @@ def run_conversation(
|
||||
"or shrink didn't reduce size; surfacing original error."
|
||||
)
|
||||
|
||||
# Multimodal-tool-content recovery: providers that follow
|
||||
# the OpenAI spec strictly (tool message content must be a
|
||||
# string) reject our list-type content with a 400. Strip
|
||||
# image parts from any list-type tool messages, mark the
|
||||
# (provider, model) as no-list-tool-content for the rest
|
||||
# of this session so future tool results preemptively
|
||||
# downgrade, and retry once. See issue #27344.
|
||||
if (
|
||||
classified.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
and not multimodal_tool_content_retry_attempted
|
||||
):
|
||||
multimodal_tool_content_retry_attempted = True
|
||||
if agent._try_strip_image_parts_from_tool_messages(api_messages):
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}📐 Provider rejected list-type tool content — "
|
||||
f"downgraded screenshots to text and retrying...",
|
||||
force=True,
|
||||
)
|
||||
continue
|
||||
else:
|
||||
logger.info(
|
||||
"multimodal-tool-content recovery: no list-type tool "
|
||||
"messages with image parts found; surfacing original error."
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejected the 1M-context beta
|
||||
# header ("long context beta is not yet available for this
|
||||
# subscription"). Disable the beta for the rest of this
|
||||
@@ -2259,7 +2133,7 @@ def run_conversation(
|
||||
f"stripped all thinking blocks, retrying...",
|
||||
force=True,
|
||||
)
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"%sThinking block signature recovery: stripped "
|
||||
"reasoning_details from %d messages",
|
||||
agent.log_prefix, len(messages),
|
||||
@@ -2284,7 +2158,7 @@ def run_conversation(
|
||||
from tools.schema_sanitizer import strip_pattern_and_format
|
||||
_, _stripped = strip_pattern_and_format(agent.tools)
|
||||
except Exception as _strip_exc: # pragma: no cover — defensive
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"%sllama.cpp grammar recovery: strip helper failed: %s",
|
||||
agent.log_prefix, _strip_exc,
|
||||
)
|
||||
@@ -2295,7 +2169,7 @@ def run_conversation(
|
||||
f"stripped {_stripped} pattern/format keyword(s), retrying...",
|
||||
force=True,
|
||||
)
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"%sllama.cpp grammar recovery: stripped %d "
|
||||
"pattern/format keyword(s) from tool schemas",
|
||||
agent.log_prefix, _stripped,
|
||||
@@ -2303,7 +2177,7 @@ def run_conversation(
|
||||
continue
|
||||
# No keywords found to strip — fall through to normal
|
||||
# retry path rather than loop forever on the same error.
|
||||
logger.warning(
|
||||
logging.warning(
|
||||
"%sllama.cpp grammar error but no pattern/format "
|
||||
"keywords to strip — falling through to normal retry",
|
||||
agent.log_prefix,
|
||||
@@ -2404,7 +2278,6 @@ def run_conversation(
|
||||
base_url=agent.base_url,
|
||||
api_key=getattr(agent, "api_key", ""),
|
||||
provider=agent.provider,
|
||||
api_mode=agent.api_mode,
|
||||
)
|
||||
# Context probing flags — only set on built-in
|
||||
# compressor (plugin engines manage their own).
|
||||
@@ -2518,7 +2391,7 @@ def run_conversation(
|
||||
error_context=error_context,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
logging.info(
|
||||
"Nous 429 looks like upstream capacity "
|
||||
"(no exhausted bucket in headers or "
|
||||
"last-known state) -- not tripping "
|
||||
@@ -2578,7 +2451,7 @@ def run_conversation(
|
||||
if compression_attempts > max_compression_attempts:
|
||||
agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
|
||||
agent._vprint(f"{agent.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
|
||||
logging.error(f"{agent.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
@@ -2609,7 +2482,7 @@ def run_conversation(
|
||||
else:
|
||||
agent._vprint(f"{agent.log_prefix}❌ Payload too large and cannot compress further.", force=True)
|
||||
agent._vprint(f"{agent.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}413 payload too large. Cannot compress further.")
|
||||
logging.error(f"{agent.log_prefix}413 payload too large. Cannot compress further.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
@@ -2662,7 +2535,7 @@ def run_conversation(
|
||||
if compression_attempts > max_compression_attempts:
|
||||
agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
|
||||
agent._vprint(f"{agent.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
logging.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
@@ -2714,7 +2587,6 @@ def run_conversation(
|
||||
base_url=agent.base_url,
|
||||
api_key=getattr(agent, "api_key", ""),
|
||||
provider=agent.provider,
|
||||
api_mode=agent.api_mode,
|
||||
)
|
||||
# Context probing flags — only set on built-in
|
||||
# compressor (plugin engines manage their own).
|
||||
@@ -2736,7 +2608,7 @@ def run_conversation(
|
||||
if compression_attempts > max_compression_attempts:
|
||||
agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
|
||||
agent._vprint(f"{agent.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
logging.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
@@ -2769,7 +2641,7 @@ def run_conversation(
|
||||
# Can't compress further and already at minimum tier
|
||||
agent._vprint(f"{agent.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
|
||||
agent._vprint(f"{agent.log_prefix} 💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
|
||||
logging.error(f"{agent.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
@@ -2806,21 +2678,6 @@ def run_conversation(
|
||||
# retryable=True mapping takes effect instead.
|
||||
and not isinstance(api_error, ssl.SSLError)
|
||||
)
|
||||
# ``FailoverReason.billing`` (HTTP 402) is NOT in this
|
||||
# exclusion set. By the time we reach this block:
|
||||
# • credential-pool rotation (line ~2031) has already
|
||||
# fired for billing and either ``continue``d or
|
||||
# returned (False, ...) — pool is exhausted or absent.
|
||||
# • the eager-fallback branch above (line ~2422) also
|
||||
# fires on billing and ``continue``s if a fallback
|
||||
# provider is configured.
|
||||
# Falling through to here means BOTH recovery paths
|
||||
# gave up. Treating 402 as retryable from this point
|
||||
# just burns more paid requests against a depleted
|
||||
# balance with no recovery mechanism left — see #31273
|
||||
# (real-world: ~$40 in 48h on a 24/7 gateway). Aborting
|
||||
# mirrors how 401/403 (also ``should_fallback=True``)
|
||||
# already behave once their recovery paths have failed.
|
||||
is_client_error = (
|
||||
is_local_validation_error
|
||||
or (
|
||||
@@ -2828,6 +2685,7 @@ def run_conversation(
|
||||
and not classified.should_compress
|
||||
and classified.reason not in {
|
||||
FailoverReason.rate_limit,
|
||||
FailoverReason.billing,
|
||||
FailoverReason.overloaded,
|
||||
FailoverReason.context_overflow,
|
||||
FailoverReason.payload_too_large,
|
||||
@@ -2876,7 +2734,7 @@ def run_conversation(
|
||||
agent._vprint(f"{agent.log_prefix} • Check credits: https://openrouter.ai/settings/credits", force=True)
|
||||
else:
|
||||
agent._vprint(f"{agent.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True)
|
||||
logger.error(f"{agent.log_prefix}Non-retryable client error: {api_error}")
|
||||
logging.error(f"{agent.log_prefix}Non-retryable client error: {api_error}")
|
||||
# Skip session persistence when the error is likely
|
||||
# context-overflow related (status 400 + large session).
|
||||
# Persisting the failed user message would make the
|
||||
@@ -2953,7 +2811,7 @@ def run_conversation(
|
||||
force=True,
|
||||
)
|
||||
|
||||
logger.error(
|
||||
logging.error(
|
||||
"%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s",
|
||||
agent.log_prefix, max_retries, _final_summary,
|
||||
_provider, _model, len(api_messages), f"{approx_tokens:,}",
|
||||
@@ -3484,19 +3342,6 @@ def run_conversation(
|
||||
f"⚠️ Tool guardrail halted {decision.tool_name}: {decision.code}"
|
||||
)
|
||||
messages.append({"role": "assistant", "content": final_response})
|
||||
# Emit the halt message to the client so it's not
|
||||
# indistinguishable from a crash. The stream display
|
||||
# was flushed (callback(None)) before tool execution,
|
||||
# but the callback is still alive — fire the text
|
||||
# through it so SSE/TUI clients see the explanation.
|
||||
if final_response:
|
||||
agent._safe_print(f"\n{final_response}\n")
|
||||
if agent.stream_delta_callback:
|
||||
try:
|
||||
agent.stream_delta_callback(final_response)
|
||||
agent.stream_delta_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
|
||||
# Reset per-turn retry counters after successful tool
|
||||
@@ -4003,11 +3848,7 @@ def run_conversation(
|
||||
)
|
||||
|
||||
# Determine if conversation completed successfully
|
||||
completed = (
|
||||
final_response is not None
|
||||
and api_call_count < agent.max_iterations
|
||||
and not failed
|
||||
)
|
||||
completed = final_response is not None and api_call_count < agent.max_iterations
|
||||
|
||||
# Save trajectory if enabled. ``user_message`` may be a multimodal
|
||||
# list of parts; the trajectory format wants a plain string.
|
||||
@@ -4092,8 +3933,6 @@ def run_conversation(
|
||||
except Exception as _ver_err:
|
||||
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
|
||||
|
||||
_response_transformed = False
|
||||
|
||||
# Plugin hook: transform_llm_output
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
# Plugins can transform the LLM's output text before it's returned.
|
||||
@@ -4111,7 +3950,6 @@ def run_conversation(
|
||||
for _hook_result in _transform_results:
|
||||
if isinstance(_hook_result, str) and _hook_result:
|
||||
final_response = _hook_result
|
||||
_response_transformed = True
|
||||
break # First non-empty string wins
|
||||
except Exception as exc:
|
||||
logger.warning("transform_llm_output hook failed: %s", exc)
|
||||
@@ -4160,10 +3998,8 @@ def run_conversation(
|
||||
"api_calls": api_call_count,
|
||||
"completed": completed,
|
||||
"turn_exit_reason": _turn_exit_reason,
|
||||
"failed": failed,
|
||||
"partial": False, # True only when stopped due to invalid tool calls
|
||||
"interrupted": interrupted,
|
||||
"response_transformed": _response_transformed,
|
||||
"response_previewed": getattr(agent, "_response_was_previewed", False),
|
||||
"model": agent.model,
|
||||
"provider": agent.provider,
|
||||
|
||||
@@ -50,7 +50,6 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -177,9 +176,7 @@ def get_keep() -> int:
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(
|
||||
1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
+6
-56
@@ -787,65 +787,33 @@ class KawaiiSpinner:
|
||||
# Cute tool message (completion line that replaces the spinner)
|
||||
# =========================================================================
|
||||
|
||||
_ERROR_SUFFIX_MAX_LEN = 48
|
||||
|
||||
|
||||
def _trim_error(msg: str) -> str:
|
||||
"""Shrink an error message for inline display in a tool status line.
|
||||
|
||||
Strips overly long absolute paths down to just the filename so the
|
||||
suffix stays readable on narrow terminals.
|
||||
"""
|
||||
msg = msg.strip()
|
||||
# Common case: "File not found: /very/long/absolute/path/foo.py"
|
||||
if "File not found:" in msg:
|
||||
_, _, tail = msg.partition("File not found:")
|
||||
tail = tail.strip()
|
||||
if "/" in tail:
|
||||
msg = f"File not found: {tail.rsplit('/', 1)[-1]}"
|
||||
if len(msg) > _ERROR_SUFFIX_MAX_LEN:
|
||||
msg = msg[: _ERROR_SUFFIX_MAX_LEN - 3] + "..."
|
||||
return msg
|
||||
|
||||
|
||||
def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
|
||||
"""Inspect a tool result string for signs of failure.
|
||||
|
||||
Returns ``(is_failure, suffix)`` where *suffix* is a short informational
|
||||
tag like ``" [exit 1]"`` for terminal failures, ``" [full]"`` for memory
|
||||
overflow, or a trimmed error message (``" [File not found: foo.py]"``).
|
||||
On success returns ``(False, "")``.
|
||||
Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
|
||||
like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
|
||||
failures. On success, returns ``(False, "")``.
|
||||
"""
|
||||
if result is None:
|
||||
return False, ""
|
||||
if file_mutation_result_landed(tool_name, result):
|
||||
return False, ""
|
||||
|
||||
data = safe_json_loads(result)
|
||||
|
||||
# Terminal: non-zero exit code is the canonical failure signal.
|
||||
if tool_name == "terminal":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
exit_code = data.get("exit_code")
|
||||
if exit_code is not None and exit_code != 0:
|
||||
err_msg = data.get("error")
|
||||
if err_msg:
|
||||
return True, f" [{_trim_error(str(err_msg))}]"
|
||||
return True, f" [exit {exit_code}]"
|
||||
return False, ""
|
||||
|
||||
# Memory: distinguish "store full" from real errors.
|
||||
# Memory-specific: distinguish "full" from real errors
|
||||
if tool_name == "memory":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
|
||||
return True, " [full]"
|
||||
|
||||
# Structured error in JSON result (any tool that surfaces {"error": ...}).
|
||||
if isinstance(data, dict):
|
||||
err = data.get("error") or data.get("message")
|
||||
if err and (data.get("success") is False or "error" in data):
|
||||
return True, f" [{_trim_error(str(err))}]"
|
||||
|
||||
# Generic heuristic for non-terminal tools
|
||||
# Multimodal tool results (dicts with _multimodal=True) are not strings —
|
||||
# treat them as successes since failures would be JSON-encoded strings.
|
||||
@@ -953,29 +921,11 @@ def get_cute_tool_message(
|
||||
if tool_name == "todo":
|
||||
todos_arg = args.get("todos")
|
||||
merge = args.get("merge", False)
|
||||
# Parse result for completion progress
|
||||
total = 0
|
||||
done = 0
|
||||
if result:
|
||||
try:
|
||||
data = safe_json_loads(result)
|
||||
if data:
|
||||
s = data.get("summary", {})
|
||||
total = s.get("total", 0)
|
||||
done = s.get("completed", 0)
|
||||
except Exception:
|
||||
pass
|
||||
if todos_arg is None:
|
||||
if total > 0:
|
||||
return _wrap(f"┊ 📋 plan {done}/{total} task(s) {dur}")
|
||||
return _wrap(f"┊ 📋 plan reading tasks {dur}")
|
||||
elif merge:
|
||||
if total > 0 and done > 0:
|
||||
return _wrap(f"┊ 📋 plan update {done}/{total} ✓ {dur}")
|
||||
return _wrap(f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}")
|
||||
else:
|
||||
if total > 0 and done > 0:
|
||||
return _wrap(f"┊ 📋 plan {done}/{total} task(s) {dur}")
|
||||
return _wrap(f"┊ 📋 plan {len(todos_arg)} task(s) {dur}")
|
||||
if tool_name == "session_search":
|
||||
return _wrap(f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}")
|
||||
|
||||
@@ -50,7 +50,6 @@ class FailoverReason(enum.Enum):
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
multimodal_tool_content_unsupported = "multimodal_tool_content_unsupported" # Provider rejected list-type content in tool messages (e.g. Xiaomi MiMo) — downgrade to text and retry
|
||||
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
@@ -166,32 +165,6 @@ _IMAGE_TOO_LARGE_PATTERNS = [
|
||||
# the likely culprit; we still try the shrink path before giving up.
|
||||
]
|
||||
|
||||
# Providers that follow the OpenAI spec strictly require tool message
|
||||
# ``content`` to be a string. Some (Anthropic native, Codex Responses,
|
||||
# Gemini native, first-party OpenAI) extend this to accept a content-parts
|
||||
# list (text + image_url) so screenshots from computer_use survive. Others
|
||||
# (Xiaomi MiMo, some Alibaba endpoints, a long tail of OpenAI-compatible
|
||||
# providers) reject the list with a 400 — the patterns below are the most
|
||||
# common error shapes we see. Recovery: strip image parts from tool
|
||||
# messages in-place, record the (provider, model) for the rest of the
|
||||
# session so we don't waste another call learning the same lesson, retry.
|
||||
#
|
||||
# See: https://github.com/NousResearch/hermes-agent/issues/27344
|
||||
_MULTIMODAL_TOOL_CONTENT_PATTERNS = [
|
||||
# Xiaomi MiMo: {"error":{"code":"400","message":"Param Incorrect","param":"text is not set"}}
|
||||
"text is not set",
|
||||
# Generic "tool message must be string" shapes
|
||||
"tool message content must be a string",
|
||||
"tool content must be a string",
|
||||
"tool message must be a string",
|
||||
# OpenAI-compat servers that reject list-type tool content with a
|
||||
# schema-validation message
|
||||
"expected string, got list",
|
||||
"expected string, got array",
|
||||
# Alibaba/DashScope variant
|
||||
"tool_call.content must be string",
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
@@ -240,24 +213,6 @@ _MODEL_NOT_FOUND_PATTERNS = [
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# Request-validation patterns — the request is malformed and will fail
|
||||
# identically on every retry. Some OpenAI-compatible gateways (notably
|
||||
# codex.nekos.me) return these as 5xx instead of the standard 4xx, which
|
||||
# makes the generic "5xx → retryable server_error" rule misfire: the retry
|
||||
# loop hammers the same deterministic rejection 3+ times, then the
|
||||
# transport-recovery path resets the counter and does it again, producing
|
||||
# a request flood. When a 5xx body carries one of these unambiguous
|
||||
# request-validation signals, classify as a non-retryable format_error so
|
||||
# the loop fails fast and falls back instead of looping.
|
||||
_REQUEST_VALIDATION_PATTERNS = [
|
||||
"unknown parameter",
|
||||
"unsupported parameter",
|
||||
"unrecognized request argument",
|
||||
"invalid_request_error",
|
||||
"unknown_parameter",
|
||||
"unsupported_parameter",
|
||||
]
|
||||
|
||||
# OpenRouter aggregator policy-block patterns.
|
||||
#
|
||||
# When a user's OpenRouter account privacy setting (or a per-request
|
||||
@@ -763,23 +718,6 @@ def _classify_by_status(
|
||||
)
|
||||
|
||||
if status_code in {500, 502}:
|
||||
# Some OpenAI-compatible gateways return request-validation errors
|
||||
# with a 5xx status (codex.nekos.me returns 502 for unknown/
|
||||
# unsupported parameters). These are deterministic — every retry
|
||||
# gets the identical rejection — so the generic "5xx → retryable
|
||||
# server_error" rule turns one bad request into a retry flood.
|
||||
# Detect the unambiguous request-validation signals (in either the
|
||||
# message text or the structured error code) and fail fast.
|
||||
if (
|
||||
any(p in error_msg for p in _REQUEST_VALIDATION_PATTERNS)
|
||||
or error_code.lower() in {"invalid_request_error", "unknown_parameter",
|
||||
"unsupported_parameter"}
|
||||
):
|
||||
return result_fn(
|
||||
FailoverReason.format_error,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
return result_fn(FailoverReason.server_error, retryable=True)
|
||||
|
||||
if status_code in {503, 529}:
|
||||
@@ -843,19 +781,6 @@ def _classify_400(
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Multimodal tool content rejected from 400. Must be checked BEFORE
|
||||
# image_too_large because the recovery is different (strip image parts
|
||||
# from tool messages, mark the model as no-list-tool-content for the
|
||||
# rest of the session) and BEFORE context_overflow because some of the
|
||||
# patterns ("text is not set") are ambiguous in isolation but become
|
||||
# specific when combined with a 400 on a request known to contain
|
||||
# multimodal tool content.
|
||||
if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.multimodal_tool_content_unsupported,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
|
||||
# Must be checked BEFORE context_overflow because messages can trip both
|
||||
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
|
||||
@@ -997,13 +922,6 @@ def _classify_by_message(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Multimodal tool content patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.multimodal_tool_content_unsupported,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Image-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
+11
-293
@@ -97,43 +97,6 @@ def is_write_denied(path: str) -> bool:
|
||||
if resolved.startswith(prefix):
|
||||
return True
|
||||
|
||||
# Hermes control-plane files: block both the ACTIVE profile's view
|
||||
# (hermes_home) AND the global root view. Without the root pass, a
|
||||
# profile-mode session leaves <root>/auth.json + <root>/config.yaml
|
||||
# writable — letting a prompt-injected write_file overwrite the global
|
||||
# files that every profile inherits from (same shape as #15981).
|
||||
control_file_names = ("auth.json", "config.yaml", "webhook_subscriptions.json")
|
||||
mcp_tokens_dir_name = "mcp-tokens"
|
||||
|
||||
hermes_dirs = []
|
||||
for base in (_hermes_home_path(), _hermes_root_path()):
|
||||
try:
|
||||
real = os.path.realpath(base)
|
||||
if real not in hermes_dirs:
|
||||
hermes_dirs.append(real)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for base_real in hermes_dirs:
|
||||
for name in control_file_names:
|
||||
try:
|
||||
if resolved == os.path.realpath(os.path.join(base_real, name)):
|
||||
return True
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
mcp_real = os.path.realpath(os.path.join(base_real, mcp_tokens_dir_name))
|
||||
if resolved == mcp_real or resolved.startswith(mcp_real + os.sep):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
pairing_real = os.path.realpath(os.path.join(base_real, "pairing"))
|
||||
if resolved == pairing_real or resolved.startswith(pairing_real + os.sep):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
safe_root = get_safe_write_root()
|
||||
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
|
||||
return True
|
||||
@@ -142,266 +105,21 @@ def is_write_denied(path: str) -> bool:
|
||||
|
||||
|
||||
def get_read_block_error(path: str) -> Optional[str]:
|
||||
"""Return an error message when a read targets a denied Hermes path.
|
||||
|
||||
Two categories are blocked:
|
||||
|
||||
* Internal Hermes cache files under ``HERMES_HOME/skills/.hub`` —
|
||||
readable metadata that an attacker could use as a prompt-injection
|
||||
carrier.
|
||||
* Credential / secret stores under HERMES_HOME and the global Hermes
|
||||
root: ``auth.json``, ``auth.lock``, ``.anthropic_oauth.json``,
|
||||
``.env``, ``webhook_subscriptions.json``, and anything under
|
||||
``mcp-tokens/``. These hold plaintext provider keys, OAuth tokens,
|
||||
and HMAC secrets that the agent never needs to read directly —
|
||||
provider tools / gateway adapters consume them through internal
|
||||
channels.
|
||||
|
||||
**This is NOT a security boundary.** The terminal tool runs as the
|
||||
same OS user with shell access; the agent can still ``cat auth.json``
|
||||
or ``cat ~/.hermes/.env`` and exfiltrate the file. The read-deny exists
|
||||
as defense-in-depth that:
|
||||
|
||||
* Returns a clear error to models that respect tool denials, which
|
||||
empirically prompts most modern models to stop rather than reach
|
||||
for the shell.
|
||||
* Surfaces a visible audit trail when something tries to read
|
||||
credentials — easier to spot in logs than a generic ``cat``.
|
||||
|
||||
Treat any user-visible framing around this as "may help" rather than
|
||||
"stops attackers." A determined model or malicious instruction can
|
||||
always shell out.
|
||||
|
||||
Callers that resolve relative paths against a non-process cwd
|
||||
(e.g. ``TERMINAL_CWD`` in ``tools/file_tools.py``) MUST pre-resolve
|
||||
and pass the absolute path string. This function's own ``resolve()``
|
||||
is anchored at the Python process cwd, so a relative input like
|
||||
``"auth.json"`` would otherwise miss the denylist when the task's
|
||||
terminal cwd differs from the process cwd.
|
||||
"""
|
||||
"""Return an error message when a read targets internal Hermes cache files."""
|
||||
resolved = Path(path).expanduser().resolve()
|
||||
|
||||
# Resolve BOTH the active HERMES_HOME (profile-aware) AND the global
|
||||
# Hermes root so credential stores at <root>/auth.json etc. are also
|
||||
# blocked when running under a profile (HERMES_HOME points at
|
||||
# <root>/profiles/<name> in profile mode). Same shape as the write
|
||||
# deny widening (#15981, #14157).
|
||||
hermes_dirs: list[Path] = []
|
||||
for base in (_hermes_home_path(), _hermes_root_path()):
|
||||
hermes_home = _hermes_home_path().resolve()
|
||||
blocked_dirs = [
|
||||
hermes_home / "skills" / ".hub" / "index-cache",
|
||||
hermes_home / "skills" / ".hub",
|
||||
]
|
||||
for blocked in blocked_dirs:
|
||||
try:
|
||||
real = base.resolve()
|
||||
if real not in hermes_dirs:
|
||||
hermes_dirs.append(real)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Skills .hub: prompt-injection carriers.
|
||||
for hd in hermes_dirs:
|
||||
blocked_dirs = [
|
||||
hd / "skills" / ".hub" / "index-cache",
|
||||
hd / "skills" / ".hub",
|
||||
]
|
||||
for blocked in blocked_dirs:
|
||||
try:
|
||||
resolved.relative_to(blocked)
|
||||
except ValueError:
|
||||
continue
|
||||
return (
|
||||
f"Access denied: {path} is an internal Hermes cache file "
|
||||
"and cannot be read directly to prevent prompt injection. "
|
||||
"Use the skills_list or skill_view tools instead."
|
||||
)
|
||||
|
||||
# Credential / secret stores. Exact-file matches under either
|
||||
# HERMES_HOME or <root>.
|
||||
credential_file_names = (
|
||||
"auth.json",
|
||||
"auth.lock",
|
||||
".anthropic_oauth.json",
|
||||
".env",
|
||||
"webhook_subscriptions.json",
|
||||
)
|
||||
for hd in hermes_dirs:
|
||||
for name in credential_file_names:
|
||||
try:
|
||||
blocked = (hd / name).resolve()
|
||||
except Exception:
|
||||
continue
|
||||
if resolved == blocked:
|
||||
return (
|
||||
f"Access denied: {path} is a Hermes credential store "
|
||||
"and cannot be read directly. Provider tools consume "
|
||||
"these credentials through internal channels. "
|
||||
"(Defense-in-depth — not a security boundary; the "
|
||||
"terminal tool can still bypass.)"
|
||||
)
|
||||
|
||||
# mcp-tokens/: directory prefix match — anything inside is OAuth
|
||||
# token material.
|
||||
for hd in hermes_dirs:
|
||||
try:
|
||||
mcp_tokens = (hd / "mcp-tokens").resolve()
|
||||
except Exception:
|
||||
continue
|
||||
if resolved == mcp_tokens:
|
||||
return (
|
||||
f"Access denied: {path} is the Hermes MCP token directory "
|
||||
"and cannot be read directly. (Defense-in-depth — not a "
|
||||
"security boundary; the terminal tool can still bypass.)"
|
||||
)
|
||||
try:
|
||||
resolved.relative_to(mcp_tokens)
|
||||
resolved.relative_to(blocked)
|
||||
except ValueError:
|
||||
continue
|
||||
return (
|
||||
f"Access denied: {path} is a Hermes MCP token file "
|
||||
"and cannot be read directly. (Defense-in-depth — not a "
|
||||
"security boundary; the terminal tool can still bypass.)"
|
||||
f"Access denied: {path} is an internal Hermes cache file "
|
||||
"and cannot be read directly to prevent prompt injection. "
|
||||
"Use the skills_list or skill_view tools instead."
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cross-profile write guard (#TBD)
|
||||
#
|
||||
# Hermes profiles are separate HERMES_HOME dirs under
|
||||
# ``<root>/profiles/<name>/``. Each profile has its own skills/, plugins/,
|
||||
# cron/, memories/. When an agent runs under one profile, writing into
|
||||
# ANOTHER profile's directories is almost always wrong — those skills /
|
||||
# plugins / cron jobs / memories affect a different session the user runs
|
||||
# from a different shell.
|
||||
#
|
||||
# Soft guard, NOT a security boundary: the agent runs as the same OS user
|
||||
# and has unrestricted terminal access, so this returns a warning the model
|
||||
# can choose to honor or override with ``cross_profile=True``. Same shape
|
||||
# as the dangerous-command approval flow — the agent is told the boundary
|
||||
# exists, and explicit user direction is required to cross it.
|
||||
#
|
||||
# Reference: May 2026 incident where a hermes-security profile session
|
||||
# edited skills under both ``~/.hermes/profiles/hermes-security/skills/``
|
||||
# AND ``~/.hermes/skills/`` (the default profile's skills) without realizing
|
||||
# the second path belonged to a different profile.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Profile-scoped directories under HERMES_HOME / <root> / <root>/profiles/<X>/
|
||||
# that should be guarded. Adding a new area here extends the guard with no
|
||||
# other code change.
|
||||
PROFILE_SCOPED_AREAS = ("skills", "plugins", "cron", "memories")
|
||||
|
||||
|
||||
def _resolve_active_profile_name() -> str:
|
||||
"""Return the active profile name derived from HERMES_HOME.
|
||||
|
||||
``~/.hermes`` -> ``"default"``
|
||||
``~/.hermes/profiles/X`` -> ``"X"``
|
||||
|
||||
Falls back to ``"default"`` on any resolution failure so the guard
|
||||
never raises into the tool path.
|
||||
"""
|
||||
try:
|
||||
home_real = _hermes_home_path().resolve()
|
||||
root_real = _hermes_root_path().resolve()
|
||||
except (OSError, RuntimeError):
|
||||
return "default"
|
||||
profiles_dir = root_real / "profiles"
|
||||
try:
|
||||
rel = home_real.relative_to(profiles_dir)
|
||||
parts = rel.parts
|
||||
if len(parts) >= 1:
|
||||
return parts[0]
|
||||
except ValueError:
|
||||
pass
|
||||
return "default"
|
||||
|
||||
|
||||
def classify_cross_profile_target(path: str) -> Optional[dict]:
|
||||
"""Classify a write target as cross-profile if it lands in another
|
||||
profile's scoped area (skills/plugins/cron/memories).
|
||||
|
||||
Returns ``None`` when the target is outside Hermes scope, or is inside
|
||||
the ACTIVE profile, or doesn't hit a profile-scoped area. Otherwise
|
||||
returns a dict with:
|
||||
|
||||
* ``active_profile``: name of the profile the agent is running as
|
||||
* ``target_profile``: name of the profile the path belongs to
|
||||
* ``area``: which scoped area (``"skills"``, ``"plugins"``, etc.)
|
||||
* ``target_path``: the resolved path string
|
||||
|
||||
The caller decides what to do with the result — surface a warning to
|
||||
the model, prompt the user, or (with explicit consent /
|
||||
``cross_profile=True``) proceed anyway.
|
||||
"""
|
||||
try:
|
||||
target = Path(os.path.expanduser(str(path))).resolve()
|
||||
root_real = _hermes_root_path().resolve()
|
||||
except (OSError, RuntimeError):
|
||||
return None
|
||||
|
||||
target_profile: Optional[str] = None
|
||||
area: Optional[str] = None
|
||||
|
||||
try:
|
||||
rel = target.relative_to(root_real)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
parts = rel.parts
|
||||
if not parts:
|
||||
return None
|
||||
|
||||
if parts[0] in PROFILE_SCOPED_AREAS:
|
||||
# ``<root>/<area>/...`` → default profile.
|
||||
target_profile = "default"
|
||||
area = parts[0]
|
||||
elif (
|
||||
parts[0] == "profiles"
|
||||
and len(parts) >= 3
|
||||
and parts[2] in PROFILE_SCOPED_AREAS
|
||||
):
|
||||
# ``<root>/profiles/<name>/<area>/...`` → named profile.
|
||||
target_profile = parts[1]
|
||||
area = parts[2]
|
||||
else:
|
||||
return None
|
||||
|
||||
active_profile = _resolve_active_profile_name()
|
||||
if target_profile == active_profile:
|
||||
# In-profile write — not a cross-profile event.
|
||||
return None
|
||||
|
||||
return {
|
||||
"active_profile": active_profile,
|
||||
"target_profile": target_profile,
|
||||
"area": area,
|
||||
"target_path": str(target),
|
||||
}
|
||||
|
||||
|
||||
def get_cross_profile_warning(path: str) -> Optional[str]:
|
||||
"""Return a model-facing warning string when ``path`` is cross-profile.
|
||||
|
||||
Returns ``None`` when the write is in-scope (same profile) or outside
|
||||
Hermes entirely. Caller is expected to surface the warning to the
|
||||
agent as a tool-result error, NOT to silently allow the write — the
|
||||
agent must either get explicit user direction to proceed, or pass
|
||||
``cross_profile=True`` to its write tool.
|
||||
|
||||
This is defense-in-depth: the terminal tool runs as the same OS user
|
||||
and can write any of these paths without going through this guard.
|
||||
Treat the guard as a confusion-reducer, not a security boundary.
|
||||
"""
|
||||
info = classify_cross_profile_target(path)
|
||||
if info is None:
|
||||
return None
|
||||
return (
|
||||
f"Cross-profile write blocked by soft guard: {info['target_path']} "
|
||||
f"belongs to Hermes profile {info['target_profile']!r}, but the "
|
||||
f"agent is running under profile {info['active_profile']!r}. "
|
||||
f"Editing another profile's {info['area']}/ will affect that "
|
||||
f"profile's future sessions, not the one you are currently in. "
|
||||
f"Confirm with the user before proceeding. To bypass this guard "
|
||||
f"after explicit user direction, retry the call with "
|
||||
f"``cross_profile=True``. (Defense-in-depth — not a security "
|
||||
f"boundary; the terminal tool can still bypass.)"
|
||||
)
|
||||
|
||||
@@ -209,7 +209,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
# via a custom provider. Values sourced from models.dev (2026-04).
|
||||
# Keys use substring matching (longest-first), so e.g. "grok-4.20"
|
||||
# matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
|
||||
"grok-build": 256000, # grok-build-0.1
|
||||
"grok-code-fast": 256000, # grok-code-fast-1
|
||||
"grok-4-1-fast": 2000000, # grok-4-1-fast-(non-)reasoning
|
||||
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
|
||||
@@ -641,7 +640,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
|
||||
return cache
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
|
||||
logging.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
|
||||
return _model_metadata_cache or {}
|
||||
|
||||
|
||||
|
||||
@@ -167,9 +167,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"gemini": "google",
|
||||
"google": "google",
|
||||
"xai": "xai",
|
||||
# xAI OAuth is an authentication/transport path for the same xAI model
|
||||
# catalog, so model metadata should resolve through the xAI provider.
|
||||
"xai-oauth": "xai",
|
||||
"xiaomi": "xiaomi",
|
||||
"nvidia": "nvidia",
|
||||
"groq": "groq",
|
||||
|
||||
@@ -176,15 +176,6 @@ _URL_USERINFO_RE = re.compile(
|
||||
r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
|
||||
)
|
||||
|
||||
# HTTP access logs often use a relative request target rather than a full URL:
|
||||
# `"POST /webhook?password=... HTTP/1.1"`. The full-URL redactor above only
|
||||
# sees strings containing `://`, so handle request-target query strings too.
|
||||
_HTTP_REQUEST_TARGET_QUERY_RE = re.compile(
|
||||
r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS|TRACE|CONNECT)\s+[^ \t\r\n\"']*?)"
|
||||
r"\?([^ \t\r\n\"']+)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Form-urlencoded body detection: conservative — only applies when the entire
|
||||
# text looks like a query string (k=v&k=v pattern with no newlines).
|
||||
_FORM_BODY_RE = re.compile(
|
||||
@@ -302,15 +293,6 @@ def _redact_url_userinfo(text: str) -> str:
|
||||
)
|
||||
|
||||
|
||||
def _redact_http_request_target_query_params(text: str) -> str:
|
||||
"""Redact sensitive query params in HTTP access-log request targets."""
|
||||
def _sub(m: re.Match) -> str:
|
||||
prefix = m.group(1)
|
||||
query = _redact_query_string(m.group(2))
|
||||
return f"{prefix}?{query}"
|
||||
return _HTTP_REQUEST_TARGET_QUERY_RE.sub(_sub, text)
|
||||
|
||||
|
||||
def _redact_form_body(text: str) -> str:
|
||||
"""Redact sensitive values in a form-urlencoded body.
|
||||
|
||||
@@ -415,11 +397,6 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
||||
if "?" in text:
|
||||
text = _redact_url_query_params(text)
|
||||
|
||||
# HTTP access logs can contain relative request targets with query params
|
||||
# and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`.
|
||||
if "?" in text and "=" in text and _has_http_method_substring(text):
|
||||
text = _redact_http_request_target_query_params(text)
|
||||
|
||||
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
|
||||
if "&" in text and "=" in text:
|
||||
text = _redact_form_body(text)
|
||||
@@ -479,25 +456,6 @@ def _has_known_prefix_substring(text: str) -> bool:
|
||||
return any(p in text for p in _PREFIX_SUBSTRINGS)
|
||||
|
||||
|
||||
_HTTP_METHOD_SUBSTRINGS = (
|
||||
"GET ",
|
||||
"POST ",
|
||||
"PUT ",
|
||||
"PATCH ",
|
||||
"DELETE ",
|
||||
"HEAD ",
|
||||
"OPTIONS ",
|
||||
"TRACE ",
|
||||
"CONNECT ",
|
||||
)
|
||||
|
||||
|
||||
def _has_http_method_substring(text: str) -> bool:
|
||||
"""Cheap pre-check before scanning for access-log request targets."""
|
||||
upper = text.upper()
|
||||
return any(method in upper for method in _HTTP_METHOD_SUBSTRINGS)
|
||||
|
||||
|
||||
class RedactingFormatter(logging.Formatter):
|
||||
"""Log formatter that redacts secrets from all log messages."""
|
||||
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
"""External secret source integrations.
|
||||
|
||||
A secret source is anything that can supply environment-variable-shaped
|
||||
credentials at process startup, _after_ ~/.hermes/.env has loaded. By
|
||||
default sources are non-destructive: they only set values for env vars
|
||||
that aren't already present, so .env and shell exports continue to win.
|
||||
|
||||
Currently shipped:
|
||||
|
||||
- ``bitwarden`` — Bitwarden Secrets Manager (`bws` CLI). See
|
||||
``agent.secret_sources.bitwarden`` for the integration and
|
||||
``hermes_cli.secrets_cli`` for the user-facing setup wizard.
|
||||
"""
|
||||
@@ -1,535 +0,0 @@
|
||||
"""Bitwarden Secrets Manager (`bws` CLI) integration.
|
||||
|
||||
Hermes pulls API keys from Bitwarden Secrets Manager at process startup
|
||||
so they don't have to live in plaintext in ``~/.hermes/.env``.
|
||||
|
||||
Design summary
|
||||
--------------
|
||||
|
||||
* The ``bws`` binary is auto-installed into ``<hermes_home>/bin/bws`` on
|
||||
first use. Hermes pins one version (``_BWS_VERSION``) and downloads
|
||||
the matching asset from the official GitHub Releases page, verifying
|
||||
the SHA-256 against the release's published checksum file.
|
||||
* The access token is stored in ``~/.hermes/.env`` as
|
||||
``BWS_ACCESS_TOKEN`` (or whatever name the user picked in
|
||||
``secrets.bitwarden.access_token_env``). This is the one
|
||||
bootstrap secret — every other provider key can live in Bitwarden.
|
||||
* Pulling secrets is a single ``bws secret list <project_id>
|
||||
--output json`` call. We cache the result in-process for
|
||||
``cache_ttl_seconds`` so back-to-back ``hermes`` invocations don't
|
||||
hammer the API.
|
||||
* Failures NEVER block Hermes startup. Missing binary, no network,
|
||||
expired token, etc. all emit a one-line warning and continue with
|
||||
whatever credentials ``.env`` already had.
|
||||
|
||||
The module is intentionally subprocess-driven rather than going through
|
||||
the ``bitwarden-sdk-secrets`` Python package: one cross-platform binary
|
||||
is easier to lazy-install than a wheels-with-Rust-extension dependency.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import stat
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import zipfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Pinned upstream version. Bump in a follow-up PR — never auto-resolve
|
||||
# "latest" because upstream release shape (asset names, CLI flags) is
|
||||
# allowed to change between majors and we want updates to be deliberate.
|
||||
_BWS_VERSION = "2.0.0"
|
||||
|
||||
_BWS_RELEASE_BASE = (
|
||||
f"https://github.com/bitwarden/sdk-sm/releases/download/bws-v{_BWS_VERSION}"
|
||||
)
|
||||
_BWS_CHECKSUM_NAME = f"bws-sha256-checksums-{_BWS_VERSION}.txt"
|
||||
|
||||
# How long to wait for bws subprocesses and HTTP downloads, in seconds.
|
||||
_BWS_DOWNLOAD_TIMEOUT = 60
|
||||
_BWS_RUN_TIMEOUT = 30
|
||||
|
||||
# In-process cache so repeated load_hermes_dotenv() calls (CLI startup,
|
||||
# gateway hot-reload, test suites) don't re-fetch from BSM.
|
||||
_CacheKey = Tuple[str, str, str] # (access_token_fingerprint, project_id, server_url)
|
||||
_CACHE: Dict[_CacheKey, "_CachedFetch"] = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class _CachedFetch:
|
||||
secrets: Dict[str, str]
|
||||
fetched_at: float
|
||||
|
||||
def is_fresh(self, ttl_seconds: float) -> bool:
|
||||
if ttl_seconds <= 0:
|
||||
return False
|
||||
return (time.time() - self.fetched_at) < ttl_seconds
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class FetchResult:
|
||||
"""Outcome of a single BSM pull."""
|
||||
|
||||
secrets: Dict[str, str] = field(default_factory=dict)
|
||||
applied: List[str] = field(default_factory=list) # set into os.environ
|
||||
skipped: List[str] = field(default_factory=list) # already set, not overridden
|
||||
warnings: List[str] = field(default_factory=list) # non-fatal issues
|
||||
error: Optional[str] = None # fatal: nothing was fetched
|
||||
binary_path: Optional[Path] = None
|
||||
|
||||
@property
|
||||
def ok(self) -> bool:
|
||||
return self.error is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Binary discovery + lazy install
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _hermes_bin_dir() -> Path:
|
||||
"""Where Hermes stores its managed binaries. Profile-aware."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
return get_hermes_home() / "bin"
|
||||
|
||||
|
||||
def find_bws(*, install_if_missing: bool = False) -> Optional[Path]:
|
||||
"""Return a path to a usable ``bws`` binary, or None.
|
||||
|
||||
Resolution order:
|
||||
1. ``<hermes_home>/bin/bws`` (our managed copy — preferred)
|
||||
2. ``shutil.which("bws")`` (system PATH)
|
||||
|
||||
When ``install_if_missing`` is True and neither resolves, this calls
|
||||
:func:`install_bws` to download and verify the pinned version.
|
||||
"""
|
||||
managed = _hermes_bin_dir() / _platform_binary_name()
|
||||
if managed.exists() and os.access(managed, os.X_OK):
|
||||
return managed
|
||||
|
||||
system = shutil.which("bws")
|
||||
if system:
|
||||
return Path(system)
|
||||
|
||||
if install_if_missing:
|
||||
try:
|
||||
return install_bws()
|
||||
except Exception as exc: # noqa: BLE001 — never block startup
|
||||
logger.warning("bws auto-install failed: %s", exc)
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _platform_binary_name() -> str:
|
||||
return "bws.exe" if platform.system() == "Windows" else "bws"
|
||||
|
||||
|
||||
def _platform_asset_name() -> str:
|
||||
"""Map (uname, arch, libc) → the upstream asset filename.
|
||||
|
||||
Asset names follow Rust's target triple convention. Linux defaults
|
||||
to gnu (glibc); we switch to musl only if ldd --version says so.
|
||||
"""
|
||||
system = platform.system()
|
||||
machine = platform.machine().lower()
|
||||
|
||||
if system == "Darwin":
|
||||
# Universal binary works on both Intel and Apple Silicon — no
|
||||
# need to pick a per-arch asset.
|
||||
return f"bws-macos-universal-{_BWS_VERSION}.zip"
|
||||
|
||||
if system == "Windows":
|
||||
arch = "aarch64" if machine in ("arm64", "aarch64") else "x86_64"
|
||||
return f"bws-{arch}-pc-windows-msvc-{_BWS_VERSION}.zip"
|
||||
|
||||
if system == "Linux":
|
||||
arch = "aarch64" if machine in ("arm64", "aarch64") else "x86_64"
|
||||
libc = "gnu"
|
||||
# ldd --version writes to stderr on glibc, stdout on musl. We
|
||||
# don't need bullet-proof detection — getting it wrong falls
|
||||
# back to a clear error from the binary loader, which we catch.
|
||||
try:
|
||||
res = subprocess.run(
|
||||
["ldd", "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=2,
|
||||
)
|
||||
if "musl" in (res.stdout + res.stderr).lower():
|
||||
libc = "musl"
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
return f"bws-{arch}-unknown-linux-{libc}-{_BWS_VERSION}.zip"
|
||||
|
||||
raise RuntimeError(
|
||||
f"Unsupported platform for bws auto-install: {system} {machine}"
|
||||
)
|
||||
|
||||
|
||||
def install_bws(*, force: bool = False) -> Path:
|
||||
"""Download, verify, and install the pinned ``bws`` binary.
|
||||
|
||||
Returns the path to the installed executable. Raises on any
|
||||
failure (network, checksum, extraction) — callers in the auto-install
|
||||
path catch these; the user-facing ``hermes secrets bitwarden setup``
|
||||
surface lets them propagate so the wizard can show a clear error.
|
||||
"""
|
||||
bin_dir = _hermes_bin_dir()
|
||||
bin_dir.mkdir(parents=True, exist_ok=True)
|
||||
target = bin_dir / _platform_binary_name()
|
||||
|
||||
if target.exists() and not force:
|
||||
return target
|
||||
|
||||
asset_name = _platform_asset_name()
|
||||
asset_url = f"{_BWS_RELEASE_BASE}/{asset_name}"
|
||||
checksum_url = f"{_BWS_RELEASE_BASE}/{_BWS_CHECKSUM_NAME}"
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix="hermes-bws-") as tmpdir:
|
||||
tmp = Path(tmpdir)
|
||||
zip_path = tmp / asset_name
|
||||
checksum_path = tmp / _BWS_CHECKSUM_NAME
|
||||
|
||||
logger.info("Downloading %s", asset_url)
|
||||
_http_download(asset_url, zip_path)
|
||||
_http_download(checksum_url, checksum_path)
|
||||
|
||||
expected = _expected_sha256(checksum_path, asset_name)
|
||||
actual = _sha256_file(zip_path)
|
||||
if expected.lower() != actual.lower():
|
||||
raise RuntimeError(
|
||||
f"Checksum mismatch for {asset_name}: "
|
||||
f"expected {expected}, got {actual}"
|
||||
)
|
||||
|
||||
with zipfile.ZipFile(zip_path) as zf:
|
||||
member = _pick_zip_member(zf, _platform_binary_name())
|
||||
zf.extract(member, tmp)
|
||||
extracted = tmp / member
|
||||
|
||||
# Move into place atomically. We write to a sibling tempfile in
|
||||
# the final directory so the rename can't cross filesystems.
|
||||
fd, staged = tempfile.mkstemp(dir=str(bin_dir), prefix=".bws_")
|
||||
os.close(fd)
|
||||
shutil.copy2(extracted, staged)
|
||||
os.chmod(
|
||||
staged,
|
||||
stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR
|
||||
| stat.S_IRGRP | stat.S_IXGRP
|
||||
| stat.S_IROTH | stat.S_IXOTH,
|
||||
)
|
||||
os.replace(staged, target)
|
||||
|
||||
logger.info("Installed bws %s at %s", _BWS_VERSION, target)
|
||||
return target
|
||||
|
||||
|
||||
def _http_download(url: str, dest: Path) -> None:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "hermes-agent"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=_BWS_DOWNLOAD_TIMEOUT) as resp: # noqa: S310
|
||||
with open(dest, "wb") as f:
|
||||
shutil.copyfileobj(resp, f)
|
||||
except urllib.error.URLError as exc:
|
||||
raise RuntimeError(f"Failed to download {url}: {exc}") from exc
|
||||
|
||||
|
||||
def _expected_sha256(checksum_file: Path, asset_name: str) -> str:
|
||||
"""Parse the upstream ``bws-sha256-checksums-X.Y.Z.txt`` file.
|
||||
|
||||
Format is the standard ``sha256sum`` output: ``<hex> <filename>``,
|
||||
one per line.
|
||||
"""
|
||||
text = checksum_file.read_text(encoding="utf-8", errors="replace")
|
||||
for line in text.splitlines():
|
||||
parts = line.strip().split()
|
||||
if len(parts) >= 2 and parts[-1] == asset_name:
|
||||
return parts[0]
|
||||
raise RuntimeError(
|
||||
f"No checksum entry for {asset_name} in {checksum_file.name}"
|
||||
)
|
||||
|
||||
|
||||
def _sha256_file(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _pick_zip_member(zf: zipfile.ZipFile, binary_name: str) -> str:
|
||||
"""Find the binary inside the upstream zip.
|
||||
|
||||
Historically the archive has been flat (``bws`` at the root) but we
|
||||
tolerate a top-level directory just in case upstream changes.
|
||||
"""
|
||||
candidates = [n for n in zf.namelist() if n.split("/")[-1] == binary_name]
|
||||
if not candidates:
|
||||
raise RuntimeError(
|
||||
f"Could not find {binary_name} inside downloaded archive "
|
||||
f"(members: {zf.namelist()[:5]}...)"
|
||||
)
|
||||
# Prefer the shortest path (i.e. root over nested) for determinism.
|
||||
candidates.sort(key=len)
|
||||
return candidates[0]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Secret fetch + apply
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _token_fingerprint(token: str) -> str:
|
||||
"""SHA-256 prefix used as a cache key — never logged, never displayed."""
|
||||
return hashlib.sha256(token.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def fetch_bitwarden_secrets(
|
||||
*,
|
||||
access_token: str,
|
||||
project_id: str,
|
||||
binary: Optional[Path] = None,
|
||||
cache_ttl_seconds: float = 300,
|
||||
use_cache: bool = True,
|
||||
server_url: str = "",
|
||||
) -> Tuple[Dict[str, str], List[str]]:
|
||||
"""Pull the secrets for ``project_id`` from Bitwarden Secrets Manager.
|
||||
|
||||
Returns ``(secrets_dict, warnings_list)``.
|
||||
|
||||
Set ``server_url`` to point at a non-default Bitwarden region or a
|
||||
self-hosted instance — e.g. ``https://vault.bitwarden.eu`` for EU
|
||||
Cloud accounts. When empty, ``bws`` uses its built-in default
|
||||
(``https://vault.bitwarden.com``, US Cloud). This is plumbed into
|
||||
the subprocess as ``BWS_SERVER_URL``.
|
||||
|
||||
Raises :class:`RuntimeError` for fatal conditions (missing binary,
|
||||
auth failure, unparseable output). Callers in the env_loader path
|
||||
catch this and emit a single warning; callers in the user-facing
|
||||
setup wizard let it propagate.
|
||||
"""
|
||||
if not access_token:
|
||||
raise RuntimeError("Bitwarden access token is empty")
|
||||
if not project_id:
|
||||
raise RuntimeError("Bitwarden project_id is empty")
|
||||
|
||||
cache_key = (_token_fingerprint(access_token), project_id, server_url or "")
|
||||
if use_cache:
|
||||
cached = _CACHE.get(cache_key)
|
||||
if cached and cached.is_fresh(cache_ttl_seconds):
|
||||
return cached.secrets, []
|
||||
|
||||
bws = binary or find_bws(install_if_missing=True)
|
||||
if bws is None:
|
||||
raise RuntimeError(
|
||||
"bws binary not available — auto-install failed and `bws` is "
|
||||
"not on PATH. Install manually from "
|
||||
"https://github.com/bitwarden/sdk-sm/releases or re-run "
|
||||
"`hermes secrets bitwarden setup`."
|
||||
)
|
||||
|
||||
secrets, warnings = _run_bws_list(bws, access_token, project_id, server_url)
|
||||
_CACHE[cache_key] = _CachedFetch(secrets=secrets, fetched_at=time.time())
|
||||
return secrets, warnings
|
||||
|
||||
|
||||
def _run_bws_list(
|
||||
bws: Path, access_token: str, project_id: str, server_url: str = ""
|
||||
) -> Tuple[Dict[str, str], List[str]]:
|
||||
cmd = [str(bws), "secret", "list", project_id, "--output", "json"]
|
||||
env = os.environ.copy()
|
||||
env["BWS_ACCESS_TOKEN"] = access_token
|
||||
# Make sure we're not echoing telemetry / colour codes into json.
|
||||
env.setdefault("NO_COLOR", "1")
|
||||
# Region / self-hosted support. bws defaults to https://vault.bitwarden.com
|
||||
# (US Cloud); EU Cloud users need https://vault.bitwarden.eu, and
|
||||
# self-hosted users need their own URL. When unset, fall back to whatever
|
||||
# BWS_SERVER_URL the caller already had in their shell env (preserved by
|
||||
# the copy above) so manual overrides keep working too.
|
||||
if server_url:
|
||||
env["BWS_SERVER_URL"] = server_url
|
||||
|
||||
try:
|
||||
proc = subprocess.run( # noqa: S603 — bws path is trusted
|
||||
cmd,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=_BWS_RUN_TIMEOUT,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise RuntimeError(
|
||||
f"bws timed out after {_BWS_RUN_TIMEOUT}s fetching secrets"
|
||||
) from exc
|
||||
except OSError as exc:
|
||||
raise RuntimeError(f"failed to invoke bws: {exc}") from exc
|
||||
|
||||
if proc.returncode != 0:
|
||||
# bws writes auth/network errors to stderr in plain English.
|
||||
# Strip ANSI just in case and surface the first 200 chars.
|
||||
err = (proc.stderr or proc.stdout or "").strip().replace("\x1b", "")
|
||||
raise RuntimeError(
|
||||
f"bws exited {proc.returncode}: {err[:200]}"
|
||||
)
|
||||
|
||||
raw = proc.stdout.strip()
|
||||
if not raw:
|
||||
return {}, ["bws returned no output (empty project?)"]
|
||||
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise RuntimeError(f"bws returned non-JSON output: {exc}") from exc
|
||||
|
||||
if not isinstance(payload, list):
|
||||
raise RuntimeError(
|
||||
f"bws returned unexpected shape: {type(payload).__name__}"
|
||||
)
|
||||
|
||||
secrets: Dict[str, str] = {}
|
||||
warnings: List[str] = []
|
||||
for item in payload:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
key = item.get("key")
|
||||
value = item.get("value")
|
||||
if not isinstance(key, str) or not isinstance(value, str):
|
||||
continue
|
||||
if not _is_valid_env_name(key):
|
||||
warnings.append(
|
||||
f"Skipping secret {key!r}: not a valid env-var name"
|
||||
)
|
||||
continue
|
||||
secrets[key] = value
|
||||
return secrets, warnings
|
||||
|
||||
|
||||
def _is_valid_env_name(name: str) -> bool:
|
||||
if not name:
|
||||
return False
|
||||
if not (name[0].isalpha() or name[0] == "_"):
|
||||
return False
|
||||
return all(c.isalnum() or c == "_" for c in name)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entry point — called from hermes_cli.env_loader
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def apply_bitwarden_secrets(
|
||||
*,
|
||||
enabled: bool,
|
||||
access_token_env: str = "BWS_ACCESS_TOKEN",
|
||||
project_id: str = "",
|
||||
override_existing: bool = False,
|
||||
cache_ttl_seconds: float = 300,
|
||||
auto_install: bool = True,
|
||||
server_url: str = "",
|
||||
) -> FetchResult:
|
||||
"""Pull secrets from BSM and set them on ``os.environ``.
|
||||
|
||||
This is the function ``load_hermes_dotenv()`` calls after the .env
|
||||
files have loaded. It is intentionally defensive — any failure
|
||||
returns a :class:`FetchResult` with ``error`` set; it never raises.
|
||||
|
||||
``server_url`` selects the Bitwarden region or self-hosted endpoint
|
||||
(e.g. ``https://vault.bitwarden.eu`` for EU Cloud). Empty string
|
||||
means use ``bws``'s default (US Cloud).
|
||||
|
||||
Parameters mirror the ``secrets.bitwarden.*`` config keys so the
|
||||
caller can just splat the dict in.
|
||||
"""
|
||||
result = FetchResult()
|
||||
|
||||
if not enabled:
|
||||
return result
|
||||
|
||||
access_token = os.environ.get(access_token_env, "").strip()
|
||||
if not access_token:
|
||||
result.error = (
|
||||
f"secrets.bitwarden.enabled is true but {access_token_env} is "
|
||||
"not set. Run `hermes secrets bitwarden setup`."
|
||||
)
|
||||
return result
|
||||
|
||||
if not project_id:
|
||||
result.error = (
|
||||
"secrets.bitwarden.project_id is empty. "
|
||||
"Run `hermes secrets bitwarden setup`."
|
||||
)
|
||||
return result
|
||||
|
||||
binary = find_bws(install_if_missing=auto_install)
|
||||
result.binary_path = binary
|
||||
if binary is None:
|
||||
result.error = (
|
||||
"bws binary not available and auto-install is disabled. "
|
||||
"Run `hermes secrets bitwarden setup` to install."
|
||||
)
|
||||
return result
|
||||
|
||||
try:
|
||||
secrets, warnings = fetch_bitwarden_secrets(
|
||||
access_token=access_token,
|
||||
project_id=project_id,
|
||||
binary=binary,
|
||||
cache_ttl_seconds=cache_ttl_seconds,
|
||||
server_url=server_url,
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
result.error = str(exc)
|
||||
return result
|
||||
|
||||
result.secrets = secrets
|
||||
result.warnings.extend(warnings)
|
||||
|
||||
for key, value in secrets.items():
|
||||
if key == access_token_env:
|
||||
# Don't let BSM clobber the very token we used to fetch
|
||||
# itself — that would be a footgun if someone stored the
|
||||
# token as a BSM secret too.
|
||||
result.skipped.append(key)
|
||||
continue
|
||||
if not override_existing and os.environ.get(key):
|
||||
result.skipped.append(key)
|
||||
continue
|
||||
os.environ[key] = value
|
||||
result.applied.append(key)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test hook — used by hermetic tests to flush the cache between cases.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _reset_cache_for_tests() -> None:
|
||||
_CACHE.clear()
|
||||
+3
-58
@@ -12,7 +12,7 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import get_config_path, get_skills_dir, is_termux
|
||||
from hermes_constants import get_config_path, get_skills_dir
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -24,43 +24,7 @@ PLATFORM_MAP = {
|
||||
"windows": "win32",
|
||||
}
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset(
|
||||
(
|
||||
".git",
|
||||
".github",
|
||||
".hub",
|
||||
".archive",
|
||||
".venv",
|
||||
"venv",
|
||||
"node_modules",
|
||||
"site-packages",
|
||||
"__pycache__",
|
||||
".tox",
|
||||
".nox",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def is_excluded_skill_path(path) -> bool:
|
||||
"""True if any component of *path* is in EXCLUDED_SKILL_DIRS.
|
||||
|
||||
Use this on every SKILL.md path produced by ``rglob`` to prune
|
||||
dependency, virtualenv, VCS, and cache directories. Centralising the
|
||||
check here keeps every skill-scanning site in sync with the shared
|
||||
exclusion set.
|
||||
|
||||
Accepts a Path or string.
|
||||
"""
|
||||
try:
|
||||
parts = path.parts # Path
|
||||
except AttributeError:
|
||||
from pathlib import PurePath
|
||||
parts = PurePath(str(path)).parts
|
||||
return any(part in EXCLUDED_SKILL_DIRS for part in parts)
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -136,14 +100,6 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
|
||||
|
||||
If the field is absent or empty the skill is compatible with **all**
|
||||
platforms (backward-compatible default).
|
||||
|
||||
Termux note: on Termux/Android, ``sys.platform`` is ``"linux"`` on
|
||||
older Pythons but became ``"android"`` on Python 3.13+. Termux is a
|
||||
Linux userland riding on the Android kernel, so skills tagged
|
||||
``linux`` are treated as compatible in Termux regardless of which
|
||||
``sys.platform`` value Python reports. Individual Linux commands
|
||||
inside a skill may still misbehave (no systemd, BusyBox utils, no
|
||||
apt/dnf, etc.) but that is on the skill, not on platform gating.
|
||||
"""
|
||||
platforms = frontmatter.get("platforms")
|
||||
if not platforms:
|
||||
@@ -151,21 +107,11 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
|
||||
if not isinstance(platforms, list):
|
||||
platforms = [platforms]
|
||||
current = sys.platform
|
||||
running_in_termux = is_termux()
|
||||
for platform in platforms:
|
||||
normalized = str(platform).lower().strip()
|
||||
mapped = PLATFORM_MAP.get(normalized, normalized)
|
||||
if current.startswith(mapped):
|
||||
return True
|
||||
# Termux runs a Linux userland on Android. Accept linux-tagged
|
||||
# skills regardless of whether sys.platform is "linux" (pre-3.13
|
||||
# Termux) or "android" (Python 3.13+ Termux, and any other
|
||||
# Android runtime).
|
||||
if running_in_termux and mapped == "linux":
|
||||
return True
|
||||
# Explicit termux/android tags match a Termux session too.
|
||||
if running_in_termux and mapped in ("termux", "android"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -532,8 +478,7 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
||||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
|
||||
directories so dependencies cannot register nested skills.
|
||||
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
|
||||
@@ -205,40 +205,6 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
if _env_hints:
|
||||
stable_parts.append(_env_hints)
|
||||
|
||||
# Active-profile hint — names the Hermes profile the agent is running
|
||||
# under so it doesn't conflate ~/.hermes/skills/ (default profile) with
|
||||
# ~/.hermes/profiles/<active>/skills/ (this profile's). Deterministic
|
||||
# for the lifetime of the agent — profile name doesn't change
|
||||
# mid-session, so this doesn't break the prompt cache.
|
||||
# See file_safety._resolve_active_profile_name + classify_cross_profile_target
|
||||
# for the matching tool-side guard.
|
||||
try:
|
||||
from agent.file_safety import _resolve_active_profile_name
|
||||
active_profile = _resolve_active_profile_name()
|
||||
except Exception:
|
||||
active_profile = "default"
|
||||
if active_profile == "default":
|
||||
stable_parts.append(
|
||||
"Active Hermes profile: default. Other profiles (if any) live "
|
||||
"under ~/.hermes/profiles/<name>/. Each profile has its own "
|
||||
"skills/, plugins/, cron/, and memories/ that affect a different "
|
||||
"session than this one. Do not modify another profile's "
|
||||
"skills/plugins/cron/memories unless the user explicitly directs "
|
||||
"you to."
|
||||
)
|
||||
else:
|
||||
stable_parts.append(
|
||||
f"Active Hermes profile: {active_profile}. This session reads "
|
||||
f"and writes ~/.hermes/profiles/{active_profile}/. The default "
|
||||
f"profile's data lives at ~/.hermes/skills/, ~/.hermes/plugins/, "
|
||||
f"~/.hermes/cron/, ~/.hermes/memories/ — those belong to a "
|
||||
f"different session run from a different shell. Do NOT modify "
|
||||
f"another profile's skills/plugins/cron/memories unless the user "
|
||||
f"explicitly directs you to. The cross-profile write guard will "
|
||||
f"refuse such writes by default; pass cross_profile=True only "
|
||||
f"after explicit direction."
|
||||
)
|
||||
|
||||
platform_key = (agent.platform or "").lower().strip()
|
||||
if platform_key in PLATFORM_HINTS:
|
||||
stable_parts.append(PLATFORM_HINTS[platform_key])
|
||||
|
||||
@@ -388,7 +388,6 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
|
||||
agent.tool_progress_callback(
|
||||
"tool.completed", function_name, None, None,
|
||||
duration=tool_duration, is_error=is_error,
|
||||
result=function_result,
|
||||
)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
@@ -492,7 +491,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
try:
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Unexpected JSON error after validation: {e}")
|
||||
logging.warning(f"Unexpected JSON error after validation: {e}")
|
||||
function_args = {}
|
||||
if not isinstance(function_args, dict):
|
||||
function_args = {}
|
||||
@@ -823,7 +822,6 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
agent.tool_progress_callback(
|
||||
"tool.completed", function_name, None, None,
|
||||
duration=tool_duration, is_error=_is_error_result,
|
||||
result=function_result,
|
||||
)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
@@ -106,17 +106,7 @@ class AnthropicTransport(ProviderTransport):
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
|
||||
stripped = name[len(_MCP_PREFIX):]
|
||||
# Only strip the mcp_ prefix for OAuth-injected tools
|
||||
# (where Hermes adds the prefix when sending to Anthropic
|
||||
# and must remove it on the way back). Native MCP server
|
||||
# tools (from mcp_servers: in config.yaml) are registered
|
||||
# in the tool registry under their FULL mcp_<server>_<tool>
|
||||
# name and must NOT be stripped. GH-25255.
|
||||
from tools.registry import registry as _tool_registry
|
||||
if (_tool_registry.get_entry(stripped)
|
||||
and not _tool_registry.get_entry(name)):
|
||||
name = stripped
|
||||
name = name[len(_MCP_PREFIX):]
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=block.id,
|
||||
|
||||
@@ -113,8 +113,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
self, messages: list[dict[str, Any]], **kwargs
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — strip internal fields
|
||||
that strict chat-completions providers reject with HTTP 400/422
|
||||
(or, in the case of some OpenAI-compatible gateways, 5xx):
|
||||
that strict chat-completions providers reject with HTTP 400/422.
|
||||
|
||||
Strips:
|
||||
|
||||
- Codex Responses API fields: ``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id`` /
|
||||
@@ -126,16 +127,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
``Extra inputs are not permitted, field: 'messages[N].tool_name'``.
|
||||
Permissive providers (OpenRouter, MiniMax) silently ignore the
|
||||
field, which masked the bug for months.
|
||||
- Hermes-internal scaffolding markers — any top-level message key
|
||||
starting with ``_`` (e.g. ``_empty_recovery_synthetic``,
|
||||
``_empty_terminal_sentinel``, ``_thinking_prefill``). These are
|
||||
bookkeeping flags the agent loop attaches to messages so the
|
||||
persistence layer can later strip its own scaffolding; they must
|
||||
never reach the wire. Permissive providers (real OpenAI,
|
||||
Anthropic) silently drop unknown message keys, but strict
|
||||
gateways (e.g. opencode-go, codex.nekos.me) reject with
|
||||
``Extra inputs are not permitted, field: 'messages[N]._empty_recovery_synthetic'``,
|
||||
which then poisons every subsequent request in the session.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
@@ -148,9 +139,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
):
|
||||
needs_sanitize = True
|
||||
break
|
||||
if any(isinstance(k, str) and k.startswith("_") for k in msg):
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
@@ -172,11 +160,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
msg.pop("tool_name", None)
|
||||
# Drop all Hermes-internal scaffolding markers (``_``-prefixed).
|
||||
# OpenAI's message schema has no ``_``-prefixed fields, so this
|
||||
# is safe and future-proofs against new markers being added.
|
||||
for key in [k for k in msg if isinstance(k, str) and k.startswith("_")]:
|
||||
msg.pop(key, None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
|
||||
@@ -87,39 +87,6 @@ class TurnResult:
|
||||
_TURN_ABORTED_MARKERS = ("<turn_aborted>", "<turn_aborted/>")
|
||||
|
||||
|
||||
def _coerce_turn_input_text(user_input: Any) -> str:
|
||||
"""Collapse Hermes/OpenAI rich content into app-server text input.
|
||||
|
||||
The current `turn/start` path sends text items only. TUI image attachment
|
||||
can hand us OpenAI-style content parts, so keep the text/path hints and
|
||||
replace opaque image payloads with a small marker instead of putting a
|
||||
Python list into the `text` field.
|
||||
"""
|
||||
if isinstance(user_input, str):
|
||||
return user_input
|
||||
if isinstance(user_input, list):
|
||||
parts: list[str] = []
|
||||
for item in user_input:
|
||||
if isinstance(item, str):
|
||||
if item.strip():
|
||||
parts.append(item)
|
||||
continue
|
||||
if not isinstance(item, dict):
|
||||
if item is not None:
|
||||
parts.append(str(item))
|
||||
continue
|
||||
item_type = item.get("type")
|
||||
if item_type in {"text", "input_text"}:
|
||||
text = item.get("text") or item.get("content") or ""
|
||||
if text:
|
||||
parts.append(str(text))
|
||||
elif item_type in {"image", "image_url", "input_image"}:
|
||||
parts.append("[image attached]")
|
||||
text = "\n\n".join(p for p in parts if p).strip()
|
||||
return text or "What do you see in this image?"
|
||||
return "" if user_input is None else str(user_input)
|
||||
|
||||
|
||||
# Substrings in codex stderr / JSON-RPC error messages that signal the
|
||||
# subprocess died because its OAuth credentials are no longer valid.
|
||||
# Kept conservative: we only redirect users to `codex login` when we're
|
||||
@@ -360,7 +327,7 @@ class CodexAppServerSession:
|
||||
|
||||
def run_turn(
|
||||
self,
|
||||
user_input: Any,
|
||||
user_input: str,
|
||||
*,
|
||||
turn_timeout: float = 600.0,
|
||||
notification_poll_timeout: float = 0.25,
|
||||
@@ -398,8 +365,6 @@ class CodexAppServerSession:
|
||||
self._interrupt_event.clear()
|
||||
projector = CodexEventProjector()
|
||||
|
||||
user_input_text = _coerce_turn_input_text(user_input)
|
||||
|
||||
# Send turn/start with the user input. Text-only for now (codex
|
||||
# supports rich content but Hermes' text path is the common case).
|
||||
try:
|
||||
@@ -407,7 +372,7 @@ class CodexAppServerSession:
|
||||
"turn/start",
|
||||
{
|
||||
"threadId": self._thread_id,
|
||||
"input": [{"type": "text", "text": user_input_text}],
|
||||
"input": [{"type": "text", "text": user_input}],
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
@@ -39,7 +39,7 @@ model:
|
||||
# LM Studio is first-class and uses provider: "lmstudio".
|
||||
# It works with both no-auth and auth-enabled server modes.
|
||||
#
|
||||
# Can also be overridden for a single invocation with the --provider flag.
|
||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||
provider: "auto"
|
||||
|
||||
# API configuration (falls back to OPENROUTER_API_KEY env var)
|
||||
|
||||
@@ -51,8 +51,6 @@ os.environ["HERMES_QUIET"] = "1" # Our own modules
|
||||
|
||||
import yaml
|
||||
|
||||
from hermes_cli.fallback_config import get_fallback_chain
|
||||
|
||||
# prompt_toolkit for fixed input area TUI
|
||||
from prompt_toolkit.history import FileHistory
|
||||
from prompt_toolkit.styles import Style as PTStyle
|
||||
@@ -83,73 +81,17 @@ except Exception:
|
||||
import threading
|
||||
import queue
|
||||
|
||||
def CanonicalUsage(*args, **kwargs):
|
||||
from agent.usage_pricing import CanonicalUsage as _CanonicalUsage
|
||||
|
||||
return _CanonicalUsage(*args, **kwargs)
|
||||
|
||||
|
||||
def estimate_usage_cost(*args, **kwargs):
|
||||
from agent.usage_pricing import estimate_usage_cost as _estimate_usage_cost
|
||||
|
||||
return _estimate_usage_cost(*args, **kwargs)
|
||||
|
||||
|
||||
def format_duration_compact(*args, **kwargs):
|
||||
seconds = float(args[0] if args else kwargs.get("seconds", 0.0))
|
||||
if seconds < 60:
|
||||
return f"{seconds:.0f}s"
|
||||
minutes = seconds / 60
|
||||
if minutes < 60:
|
||||
return f"{minutes:.0f}m"
|
||||
hours = minutes / 60
|
||||
if hours < 24:
|
||||
remaining_min = int(minutes % 60)
|
||||
return f"{int(hours)}h {remaining_min}m" if remaining_min else f"{int(hours)}h"
|
||||
days = hours / 24
|
||||
return f"{days:.1f}d"
|
||||
|
||||
|
||||
def format_token_count_compact(*args, **kwargs):
|
||||
value = int(args[0] if args else kwargs.get("value", 0))
|
||||
abs_value = abs(value)
|
||||
if abs_value < 1_000:
|
||||
return str(value)
|
||||
|
||||
sign = "-" if value < 0 else ""
|
||||
units = ((1_000_000_000, "B"), (1_000_000, "M"), (1_000, "K"))
|
||||
for threshold, suffix in units:
|
||||
if abs_value >= threshold:
|
||||
scaled = abs_value / threshold
|
||||
if scaled < 10:
|
||||
text = f"{scaled:.2f}"
|
||||
elif scaled < 100:
|
||||
text = f"{scaled:.1f}"
|
||||
else:
|
||||
text = f"{scaled:.0f}"
|
||||
if "." in text:
|
||||
text = text.rstrip("0").rstrip(".")
|
||||
return f"{sign}{text}{suffix}"
|
||||
|
||||
return f"{value:,}"
|
||||
|
||||
|
||||
def is_table_divider(*args, **kwargs):
|
||||
from agent.markdown_tables import is_table_divider as _is_table_divider
|
||||
|
||||
return _is_table_divider(*args, **kwargs)
|
||||
|
||||
|
||||
def looks_like_table_row(*args, **kwargs):
|
||||
from agent.markdown_tables import looks_like_table_row as _looks_like_table_row
|
||||
|
||||
return _looks_like_table_row(*args, **kwargs)
|
||||
|
||||
|
||||
def realign_markdown_tables(*args, **kwargs):
|
||||
from agent.markdown_tables import realign_markdown_tables as _realign_markdown_tables
|
||||
|
||||
return _realign_markdown_tables(*args, **kwargs)
|
||||
from agent.usage_pricing import (
|
||||
CanonicalUsage,
|
||||
estimate_usage_cost,
|
||||
format_duration_compact,
|
||||
format_token_count_compact,
|
||||
)
|
||||
from agent.markdown_tables import (
|
||||
is_table_divider,
|
||||
looks_like_table_row,
|
||||
realign_markdown_tables,
|
||||
)
|
||||
# NOTE: `from agent.account_usage import ...` is deliberately NOT at module
|
||||
# top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
|
||||
# needed when the user runs `/limits`. Lazy-imported inside the handler below.
|
||||
@@ -415,12 +357,6 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"display": {
|
||||
"compact": False,
|
||||
"resume_display": "full",
|
||||
# Recap tuning for /resume — see hermes_cli/config.py DEFAULT_CONFIG.
|
||||
"resume_exchanges": 10,
|
||||
"resume_max_user_chars": 300,
|
||||
"resume_max_assistant_chars": 200,
|
||||
"resume_max_assistant_lines": 3,
|
||||
"resume_skip_tool_only": True,
|
||||
"show_reasoning": False,
|
||||
"streaming": True,
|
||||
"busy_input_mode": "interrupt",
|
||||
@@ -474,9 +410,7 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
from hermes_cli.config import _normalize_root_model_keys
|
||||
|
||||
file_config = _normalize_root_model_keys(yaml.safe_load(f) or {})
|
||||
file_config = yaml.safe_load(f) or {}
|
||||
|
||||
_file_has_terminal_config = "terminal" in file_config
|
||||
|
||||
@@ -497,6 +431,21 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
if "model" in file_config["model"] and "default" not in file_config["model"]:
|
||||
defaults["model"]["default"] = file_config["model"]["model"]
|
||||
|
||||
# Legacy root-level provider/base_url fallback.
|
||||
# Some users (or old code) put provider: / base_url: at the
|
||||
# config root instead of inside the model: section. These are
|
||||
# only used as a FALLBACK when model.provider / model.base_url
|
||||
# is not already set — never as an override. The canonical
|
||||
# location is model.provider (written by `hermes model`).
|
||||
if not defaults["model"].get("provider"):
|
||||
root_provider = file_config.get("provider")
|
||||
if root_provider:
|
||||
defaults["model"]["provider"] = root_provider
|
||||
if not defaults["model"].get("base_url"):
|
||||
root_base_url = file_config.get("base_url")
|
||||
if root_base_url:
|
||||
defaults["model"]["base_url"] = root_base_url
|
||||
|
||||
# Deep merge file_config into defaults.
|
||||
# First: merge keys that exist in both (deep-merge dicts, overwrite scalars)
|
||||
for key in defaults:
|
||||
@@ -768,142 +717,31 @@ from rich.markup import escape as _escape
|
||||
from rich.panel import Panel
|
||||
from rich.text import Text as _RichText
|
||||
|
||||
# Import agent and tool systems lazily. Bare interactive startup only needs the
|
||||
# prompt; the full agent/tool registry is initialized on first use.
|
||||
def AIAgent(*args, **kwargs):
|
||||
from run_agent import AIAgent as _AIAgent
|
||||
import fire
|
||||
|
||||
return _AIAgent(*args, **kwargs)
|
||||
|
||||
|
||||
def get_tool_definitions(*args, **kwargs):
|
||||
from model_tools import get_tool_definitions as _get_tool_definitions
|
||||
|
||||
return _get_tool_definitions(*args, **kwargs)
|
||||
|
||||
|
||||
def get_toolset_for_tool(*args, **kwargs):
|
||||
from model_tools import get_toolset_for_tool as _get_toolset_for_tool
|
||||
|
||||
return _get_toolset_for_tool(*args, **kwargs)
|
||||
# Import the agent and tool systems
|
||||
from run_agent import AIAgent
|
||||
from model_tools import get_tool_definitions, get_toolset_for_tool
|
||||
|
||||
# Extracted CLI modules (Phase 3)
|
||||
from hermes_cli.banner import build_welcome_banner
|
||||
from hermes_cli.commands import SlashCommandCompleter, SlashCommandAutoSuggest
|
||||
|
||||
|
||||
def get_all_toolsets(*args, **kwargs):
|
||||
from toolsets import get_all_toolsets as _get_all_toolsets
|
||||
|
||||
return _get_all_toolsets(*args, **kwargs)
|
||||
|
||||
|
||||
def get_toolset_info(*args, **kwargs):
|
||||
from toolsets import get_toolset_info as _get_toolset_info
|
||||
|
||||
return _get_toolset_info(*args, **kwargs)
|
||||
|
||||
|
||||
def validate_toolset(*args, **kwargs):
|
||||
from toolsets import validate_toolset as _validate_toolset
|
||||
|
||||
return _validate_toolset(*args, **kwargs)
|
||||
|
||||
|
||||
def _sync_process_session_id(session_id: str) -> None:
|
||||
"""Keep process-local session-id consumers aligned after CLI switches."""
|
||||
from gateway.session_context import set_current_session_id
|
||||
|
||||
set_current_session_id(session_id)
|
||||
from toolsets import get_all_toolsets, get_toolset_info, validate_toolset
|
||||
|
||||
# Cron job system for scheduled tasks (execution is handled by the gateway)
|
||||
def get_job(*args, **kwargs):
|
||||
from cron import get_job as _get_job
|
||||
|
||||
return _get_job(*args, **kwargs)
|
||||
from cron import get_job
|
||||
|
||||
# Resource cleanup imports for safe shutdown (terminal VMs, browser sessions)
|
||||
from tools.terminal_tool import cleanup_all_environments as _cleanup_all_terminals
|
||||
from tools.terminal_tool import set_sudo_password_callback, set_approval_callback
|
||||
from tools.skills_tool import set_secret_capture_callback
|
||||
from hermes_cli.callbacks import prompt_for_secret
|
||||
|
||||
|
||||
def _cleanup_all_terminals(*args, **kwargs):
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
|
||||
return cleanup_all_environments(*args, **kwargs)
|
||||
|
||||
|
||||
def set_sudo_password_callback(*args, **kwargs):
|
||||
from tools.terminal_tool import set_sudo_password_callback as _set_sudo_password_callback
|
||||
|
||||
return _set_sudo_password_callback(*args, **kwargs)
|
||||
|
||||
|
||||
def set_approval_callback(*args, **kwargs):
|
||||
from tools.terminal_tool import set_approval_callback as _set_approval_callback
|
||||
|
||||
return _set_approval_callback(*args, **kwargs)
|
||||
|
||||
|
||||
def set_secret_capture_callback(*args, **kwargs):
|
||||
from tools.skills_tool import set_secret_capture_callback as _set_secret_capture_callback
|
||||
|
||||
return _set_secret_capture_callback(*args, **kwargs)
|
||||
|
||||
|
||||
def _cleanup_all_browsers(*args, **kwargs):
|
||||
from tools.browser_tool import _emergency_cleanup_all_sessions
|
||||
|
||||
return _emergency_cleanup_all_sessions(*args, **kwargs)
|
||||
from tools.browser_tool import _emergency_cleanup_all_sessions as _cleanup_all_browsers
|
||||
|
||||
# Guard to prevent cleanup from running multiple times on exit
|
||||
_cleanup_done = False
|
||||
# Weak reference to the active AIAgent for memory provider shutdown at exit
|
||||
_active_agent_ref = None
|
||||
_deferred_agent_startup_done = False
|
||||
|
||||
|
||||
def _prepare_deferred_agent_startup() -> None:
|
||||
"""Run Termux-deferred agent discovery before the first real agent turn."""
|
||||
global _deferred_agent_startup_done
|
||||
if _deferred_agent_startup_done:
|
||||
return
|
||||
if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
|
||||
return
|
||||
_deferred_agent_startup_done = True
|
||||
_accept_hooks = os.environ.get("HERMES_ACCEPT_HOOKS", "").lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"plugin discovery failed at deferred CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at deferred CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from agent.shell_hooks import register_from_config
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
register_from_config(load_config(), accept_hooks=_accept_hooks)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"shell-hook registration failed at deferred CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def _run_cleanup():
|
||||
"""Run resource cleanup exactly once."""
|
||||
@@ -2617,13 +2455,7 @@ def _build_compact_banner() -> str:
|
||||
line1 = f"{agent_name} - AI Agent Framework"
|
||||
tiny_line = agent_name
|
||||
|
||||
if os.environ.get("HERMES_FAST_STARTUP_BANNER") == "1":
|
||||
from hermes_cli import __release_date__ as _release_date
|
||||
from hermes_cli import __version__ as _version
|
||||
|
||||
version_line = f"Hermes Agent v{_version} ({_release_date})"
|
||||
else:
|
||||
version_line = format_banner_version_label()
|
||||
version_line = format_banner_version_label()
|
||||
|
||||
w = min(shutil.get_terminal_size().columns - 2, 88)
|
||||
if w < 30:
|
||||
@@ -2672,48 +2504,19 @@ def _looks_like_slash_command(text: str) -> bool:
|
||||
# Skill Slash Commands — dynamic commands generated from installed skills
|
||||
# ============================================================================
|
||||
|
||||
_skill_commands = None
|
||||
_skill_bundles = None
|
||||
from agent.skill_commands import (
|
||||
scan_skill_commands,
|
||||
get_skill_commands,
|
||||
build_skill_invocation_message,
|
||||
build_preloaded_skills_prompt,
|
||||
)
|
||||
from agent.skill_bundles import (
|
||||
get_skill_bundles,
|
||||
build_bundle_invocation_message,
|
||||
)
|
||||
|
||||
|
||||
def _ensure_skill_commands() -> dict:
|
||||
global _skill_commands
|
||||
if _skill_commands is None:
|
||||
from agent.skill_commands import scan_skill_commands
|
||||
|
||||
_skill_commands = scan_skill_commands()
|
||||
return _skill_commands
|
||||
|
||||
|
||||
def get_skill_commands() -> dict:
|
||||
return _ensure_skill_commands()
|
||||
|
||||
|
||||
def build_skill_invocation_message(*args, **kwargs):
|
||||
from agent.skill_commands import build_skill_invocation_message as _impl
|
||||
|
||||
return _impl(*args, **kwargs)
|
||||
|
||||
|
||||
def build_preloaded_skills_prompt(*args, **kwargs):
|
||||
from agent.skill_commands import build_preloaded_skills_prompt as _impl
|
||||
|
||||
return _impl(*args, **kwargs)
|
||||
|
||||
|
||||
def get_skill_bundles() -> dict:
|
||||
global _skill_bundles
|
||||
if _skill_bundles is None:
|
||||
from agent.skill_bundles import get_skill_bundles as _impl
|
||||
|
||||
_skill_bundles = _impl()
|
||||
return _skill_bundles
|
||||
|
||||
|
||||
def build_bundle_invocation_message(*args, **kwargs):
|
||||
from agent.skill_bundles import build_bundle_invocation_message as _impl
|
||||
|
||||
return _impl(*args, **kwargs)
|
||||
_skill_commands = scan_skill_commands()
|
||||
_skill_bundles = get_skill_bundles()
|
||||
|
||||
|
||||
def _get_plugin_cmd_handler_names() -> set:
|
||||
@@ -2812,7 +2615,7 @@ class HermesCLI:
|
||||
api_key: str = None,
|
||||
base_url: str = None,
|
||||
max_turns: int = None,
|
||||
verbose: Optional[bool] = None,
|
||||
verbose: bool = False,
|
||||
compact: bool = False,
|
||||
resume: str = None,
|
||||
checkpoints: bool = False,
|
||||
@@ -2863,12 +2666,7 @@ class HermesCLI:
|
||||
else:
|
||||
self.busy_input_mode = "interrupt"
|
||||
|
||||
# self.verbose ONLY controls global DEBUG logging (root logger level).
|
||||
# display.tool_progress="verbose" controls tool-call rendering (full args,
|
||||
# results, think blocks) and is independent — see _apply_logging_levels.
|
||||
# Coupling the two (PR #6a1aa420e) caused all module DEBUG logs to spew
|
||||
# to console whenever a user set tool_progress: verbose in config.
|
||||
self.verbose = bool(verbose) if verbose is not None else False
|
||||
self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose")
|
||||
|
||||
# streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml)
|
||||
self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False)
|
||||
@@ -3054,9 +2852,12 @@ class HermesCLI:
|
||||
pass
|
||||
|
||||
# Fallback provider chain — tried in order when primary fails after retries.
|
||||
# Merge new ``fallback_providers`` entries with any legacy
|
||||
# ``fallback_model`` entries so old configs still participate.
|
||||
self._fallback_model = get_fallback_chain(CLI_CONFIG)
|
||||
# Supports new list format (fallback_providers) and legacy single-dict (fallback_model).
|
||||
fb = CLI_CONFIG.get("fallback_providers") or CLI_CONFIG.get("fallback_model") or []
|
||||
# Normalize legacy single-dict to a one-element list
|
||||
if isinstance(fb, dict):
|
||||
fb = [fb] if fb.get("provider") and fb.get("model") else []
|
||||
self._fallback_model = fb
|
||||
|
||||
# Signature of the currently-initialised agent's runtime. Used to
|
||||
# rebuild the agent when provider / model / base_url changes across
|
||||
@@ -3064,9 +2865,7 @@ class HermesCLI:
|
||||
self._active_agent_route_signature = None
|
||||
|
||||
# Agent will be initialized on first use
|
||||
self.agent: Optional[Any] = None
|
||||
self._tool_callbacks_installed = False
|
||||
self._tirith_security_checked = False
|
||||
self.agent: Optional[AIAgent] = None
|
||||
self._app = None # prompt_toolkit Application (set in run())
|
||||
|
||||
# Conversation state
|
||||
@@ -4689,41 +4488,6 @@ class HermesCLI:
|
||||
route["request_overrides"] = overrides
|
||||
return route
|
||||
|
||||
def _install_tool_callbacks(self) -> None:
|
||||
"""Install tool callbacks that need the live prompt UI."""
|
||||
if getattr(self, "_tool_callbacks_installed", False):
|
||||
return
|
||||
set_sudo_password_callback(self._sudo_password_callback)
|
||||
set_approval_callback(self._approval_callback)
|
||||
set_secret_capture_callback(self._secret_capture_callback)
|
||||
try:
|
||||
from tools.computer_use_tool import set_approval_callback as _set_cu_cb
|
||||
|
||||
_set_cu_cb(self._computer_use_approval_callback)
|
||||
except ImportError:
|
||||
pass
|
||||
self._tool_callbacks_installed = True
|
||||
|
||||
def _ensure_tirith_security(self) -> None:
|
||||
"""Check tirith availability once before tools can run terminal commands."""
|
||||
if getattr(self, "_tirith_security_checked", False):
|
||||
return
|
||||
self._tirith_security_checked = True
|
||||
try:
|
||||
from tools.tirith_security import ensure_installed, is_platform_supported
|
||||
|
||||
tirith_path = ensure_installed(log_failures=False)
|
||||
if tirith_path is None and is_platform_supported():
|
||||
security_cfg = self.config.get("security", {}) or {}
|
||||
tirith_enabled = security_cfg.get("tirith_enabled", True)
|
||||
if tirith_enabled:
|
||||
_cprint(
|
||||
f" {_DIM}⚠ tirith security scanner enabled but not available "
|
||||
f"— command scanning will use pattern matching only{_RST}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, request_overrides: dict | None = None) -> bool:
|
||||
"""
|
||||
Initialize the agent on first use.
|
||||
@@ -4735,10 +4499,6 @@ class HermesCLI:
|
||||
if self.agent is not None:
|
||||
return True
|
||||
|
||||
_prepare_deferred_agent_startup()
|
||||
self._install_tool_callbacks()
|
||||
self._ensure_tirith_security()
|
||||
|
||||
if not self._ensure_runtime_credentials():
|
||||
return False
|
||||
|
||||
@@ -4953,10 +4713,8 @@ class HermesCLI:
|
||||
context_length=ctx_len,
|
||||
)
|
||||
|
||||
# Tool discovery is intentionally deferred on the Termux bare prompt
|
||||
# path; availability warnings are shown once tools are initialized.
|
||||
if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
|
||||
self._show_tool_availability_warnings()
|
||||
# Show tool availability warnings if any tools are disabled
|
||||
self._show_tool_availability_warnings()
|
||||
|
||||
# Warn about very low context lengths (common with local servers)
|
||||
if ctx_len and ctx_len <= 8192:
|
||||
@@ -5094,13 +4852,10 @@ class HermesCLI:
|
||||
if self.resume_display == "minimal":
|
||||
return
|
||||
|
||||
# Read limits from config (with hardcoded defaults)
|
||||
_disp = CLI_CONFIG.get("display", {})
|
||||
MAX_DISPLAY_EXCHANGES = int(_disp.get("resume_exchanges", 10))
|
||||
MAX_USER_LEN = int(_disp.get("resume_max_user_chars", 300))
|
||||
MAX_ASST_LEN = int(_disp.get("resume_max_assistant_chars", 200))
|
||||
MAX_ASST_LINES = int(_disp.get("resume_max_assistant_lines", 3))
|
||||
SKIP_TOOL_ONLY = _disp.get("resume_skip_tool_only", True)
|
||||
MAX_DISPLAY_EXCHANGES = 10 # max user+assistant pairs to show
|
||||
MAX_USER_LEN = 300 # truncate user messages
|
||||
MAX_ASST_LEN = 200 # truncate assistant text
|
||||
MAX_ASST_LINES = 3 # max lines of assistant text
|
||||
|
||||
# Collect displayable entries (skip system, tool-result messages)
|
||||
entries = [] # list of (role, display_text)
|
||||
@@ -5163,10 +4918,6 @@ class HermesCLI:
|
||||
if not parts:
|
||||
# Skip pure-reasoning messages that have no visible output
|
||||
continue
|
||||
# Skip tool-call-only entries when SKIP_TOOL_ONLY is enabled
|
||||
has_text = bool(text)
|
||||
if SKIP_TOOL_ONLY and not has_text and tool_calls:
|
||||
continue
|
||||
entries.append(("assistant", " ".join(parts)))
|
||||
_last_asst_idx = len(entries) - 1
|
||||
_last_asst_full = " ".join(full_parts)
|
||||
@@ -5740,13 +5491,9 @@ class HermesCLI:
|
||||
|
||||
def _show_status(self):
|
||||
"""Show compact startup status line."""
|
||||
# Avoid pulling the full tool registry into the bare Termux prompt path.
|
||||
if os.environ.get("HERMES_DEFER_AGENT_STARTUP") == "1":
|
||||
tool_status = "tools deferred"
|
||||
else:
|
||||
tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
|
||||
tool_count = len(tools) if tools else 0
|
||||
tool_status = f"{tool_count} tools"
|
||||
# Get tool count
|
||||
tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
|
||||
tool_count = len(tools) if tools else 0
|
||||
|
||||
# Format model name (shorten if needed)
|
||||
model_short = self.model.split("/")[-1] if "/" in self.model else self.model
|
||||
@@ -5778,7 +5525,7 @@ class HermesCLI:
|
||||
|
||||
self._console_print(
|
||||
f" {api_indicator} [{accent_color}]{model_short}[/] "
|
||||
f"[dim {separator_color}]·[/] [bold {label_color}]{tool_status}[/]"
|
||||
f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]"
|
||||
f"{toolsets_info}{provider_info}"
|
||||
)
|
||||
|
||||
@@ -5891,10 +5638,9 @@ class HermesCLI:
|
||||
continue
|
||||
ChatConsole().print(f" [bold {_accent_hex()}]{cmd:<15}[/] [dim]-[/] {_escape(desc)}")
|
||||
|
||||
skill_commands = _ensure_skill_commands()
|
||||
if skill_commands:
|
||||
_cprint(f"\n ⚡ {_BOLD}Skill Commands{_RST} ({len(skill_commands)} installed):")
|
||||
for cmd, info in sorted(skill_commands.items()):
|
||||
if _skill_commands:
|
||||
_cprint(f"\n ⚡ {_BOLD}Skill Commands{_RST} ({len(_skill_commands)} installed):")
|
||||
for cmd, info in sorted(_skill_commands.items()):
|
||||
ChatConsole().print(
|
||||
f" [bold {_accent_hex()}]{cmd:<22}[/] [dim]-[/] {_escape(info['description'])}"
|
||||
)
|
||||
@@ -6172,16 +5918,15 @@ class HermesCLI:
|
||||
else:
|
||||
print(" Recent sessions:")
|
||||
print()
|
||||
print(f" {'#':<3} {'Title':<32} {'Preview':<40} {'Last Active':<13} {'ID'}")
|
||||
print(f" {'─' * 3} {'─' * 32} {'─' * 40} {'─' * 13} {'─' * 24}")
|
||||
for idx, session in enumerate(sessions, start=1):
|
||||
title = session.get("title") or "—"
|
||||
print(f" {'Title':<32} {'Preview':<40} {'Last Active':<13} {'ID'}")
|
||||
print(f" {'─' * 32} {'─' * 40} {'─' * 13} {'─' * 24}")
|
||||
for session in sessions:
|
||||
title = (session.get("title") or "—")[:30]
|
||||
preview = (session.get("preview") or "")[:38]
|
||||
last_active = _relative_time(session.get("last_active"))
|
||||
print(f" {idx:<3} {title:<32} {preview:<40} {last_active:<13} {session['id']}")
|
||||
print(f" {title:<32} {preview:<40} {last_active:<13} {session['id']}")
|
||||
print()
|
||||
print(" Use /resume <number>, /resume <session id>, or /resume <session title> to continue.")
|
||||
print(" Example: /resume 2")
|
||||
print(" Use /resume <session id or title> to continue where you left off.")
|
||||
print()
|
||||
return True
|
||||
|
||||
@@ -6292,7 +6037,6 @@ class HermesCLI:
|
||||
self.conversation_history = []
|
||||
self._pending_title = None
|
||||
self._resumed = False
|
||||
_sync_process_session_id(self.session_id)
|
||||
|
||||
if self.agent:
|
||||
self.agent.session_id = self.session_id
|
||||
@@ -6526,7 +6270,7 @@ class HermesCLI:
|
||||
target = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
if not target:
|
||||
_cprint(" Usage: /resume <number|session_id_or_title>")
|
||||
_cprint(" Usage: /resume <session_id_or_title>")
|
||||
if self._show_recent_sessions(reason="resume"):
|
||||
return
|
||||
_cprint(" Tip: Use /history or `hermes sessions list` to find sessions.")
|
||||
@@ -6537,20 +6281,10 @@ class HermesCLI:
|
||||
_cprint(f" {format_session_db_unavailable()}")
|
||||
return
|
||||
|
||||
# Resolve numbered selection, title, or ID
|
||||
if target.isdigit():
|
||||
sessions = self._list_recent_sessions(limit=10)
|
||||
index = int(target)
|
||||
if index < 1 or index > len(sessions):
|
||||
_cprint(f" Resume index {index} is out of range.")
|
||||
_cprint(" Use /resume with no arguments to see available sessions.")
|
||||
return
|
||||
selected = sessions[index - 1]
|
||||
target_id = selected["id"]
|
||||
else:
|
||||
from hermes_cli.main import _resolve_session_by_name_or_id
|
||||
resolved = _resolve_session_by_name_or_id(target)
|
||||
target_id = resolved or target
|
||||
# Resolve title or ID
|
||||
from hermes_cli.main import _resolve_session_by_name_or_id
|
||||
resolved = _resolve_session_by_name_or_id(target)
|
||||
target_id = resolved or target
|
||||
|
||||
session_meta = self._session_db.get_session(target_id)
|
||||
if not session_meta:
|
||||
@@ -6589,7 +6323,6 @@ class HermesCLI:
|
||||
self.session_id = target_id
|
||||
self._resumed = True
|
||||
self._pending_title = None
|
||||
_sync_process_session_id(target_id)
|
||||
|
||||
# Load conversation history (strip transcript-only metadata entries)
|
||||
restored = self._session_db.get_messages_as_conversation(target_id)
|
||||
@@ -6641,7 +6374,6 @@ class HermesCLI:
|
||||
f" ({msg_count} user message{'s' if msg_count != 1 else ''},"
|
||||
f" {len(self.conversation_history)} total)"
|
||||
)
|
||||
self._display_resumed_history()
|
||||
else:
|
||||
_cprint(f" ↻ Resumed session {target_id}{title_part} — no messages, starting fresh.")
|
||||
|
||||
@@ -6764,7 +6496,6 @@ class HermesCLI:
|
||||
self.session_start = now
|
||||
self._pending_title = None
|
||||
self._resumed = True # Prevents auto-title generation
|
||||
_sync_process_session_id(new_session_id)
|
||||
|
||||
# Sync the agent
|
||||
if self.agent:
|
||||
@@ -8126,7 +7857,6 @@ class HermesCLI:
|
||||
"clear",
|
||||
"This clears the screen and starts a new session.\n"
|
||||
"The current conversation history will be discarded.",
|
||||
cmd_original=cmd_original,
|
||||
) is None:
|
||||
return
|
||||
self.new_session(silent=True)
|
||||
@@ -8251,16 +7981,12 @@ class HermesCLI:
|
||||
if not self._handle_handoff_command(cmd_original):
|
||||
return False
|
||||
elif canonical == "new":
|
||||
# Strip inline-skip tokens (now/--yes/-y) before deriving the title
|
||||
# so "/new now My Session" yields title="My Session" instead of
|
||||
# title="now My Session". See _split_destructive_skip.
|
||||
_new_args, _ = self._split_destructive_skip(cmd_original)
|
||||
title = _new_args.strip() or None
|
||||
parts = cmd_original.split(maxsplit=1)
|
||||
title = parts[1].strip() if len(parts) > 1 else None
|
||||
if self._confirm_destructive_slash(
|
||||
"new",
|
||||
"This starts a fresh session.\n"
|
||||
"The current conversation history will be discarded.",
|
||||
cmd_original=cmd_original,
|
||||
) is None:
|
||||
return
|
||||
self.new_session(title=title)
|
||||
@@ -8287,7 +8013,6 @@ class HermesCLI:
|
||||
if self._confirm_destructive_slash(
|
||||
"undo",
|
||||
"This removes the last user/assistant exchange from history.",
|
||||
cmd_original=cmd_original,
|
||||
) is None:
|
||||
return
|
||||
self.undo_last()
|
||||
@@ -8436,8 +8161,6 @@ class HermesCLI:
|
||||
else:
|
||||
# Check for user-defined quick commands (bypass agent loop, no LLM call)
|
||||
base_cmd = cmd_lower.split()[0]
|
||||
skill_commands = _ensure_skill_commands()
|
||||
skill_bundles = get_skill_bundles()
|
||||
quick_commands = self.config.get("quick_commands", {})
|
||||
if base_cmd.lstrip("/") in quick_commands:
|
||||
qcmd = quick_commands[base_cmd.lstrip("/")]
|
||||
@@ -8493,14 +8216,14 @@ class HermesCLI:
|
||||
_cprint(f"\033[1;31mPlugin command error: {e}{_RST}")
|
||||
# Skill bundles take precedence over individual skills — /<bundle>
|
||||
# loads multiple skills at once. Rescans cheaply when files change.
|
||||
elif base_cmd in skill_bundles:
|
||||
elif base_cmd in get_skill_bundles():
|
||||
user_instruction = cmd_original[len(base_cmd):].strip()
|
||||
bundle_result = build_bundle_invocation_message(
|
||||
base_cmd, user_instruction, task_id=self.session_id
|
||||
)
|
||||
if bundle_result:
|
||||
msg, loaded_names, missing = bundle_result
|
||||
bundle_info = skill_bundles[base_cmd]
|
||||
bundle_info = get_skill_bundles()[base_cmd]
|
||||
print(
|
||||
f"\n⚡ Loading bundle: {bundle_info['name']} "
|
||||
f"({len(loaded_names)} skills)"
|
||||
@@ -8516,13 +8239,13 @@ class HermesCLI:
|
||||
f"[bold red]Failed to load bundle for {base_cmd}[/]"
|
||||
)
|
||||
# Check for skill slash commands (/gif-search, /axolotl, etc.)
|
||||
elif base_cmd in skill_commands:
|
||||
elif base_cmd in _skill_commands:
|
||||
user_instruction = cmd_original[len(base_cmd):].strip()
|
||||
msg = build_skill_invocation_message(
|
||||
base_cmd, user_instruction, task_id=self.session_id
|
||||
)
|
||||
if msg:
|
||||
skill_name = skill_commands[base_cmd]["name"]
|
||||
skill_name = _skill_commands[base_cmd]["name"]
|
||||
print(f"\n⚡ Loading skill: {skill_name}")
|
||||
if hasattr(self, '_pending_input'):
|
||||
self._pending_input.put(msg)
|
||||
@@ -8534,7 +8257,7 @@ class HermesCLI:
|
||||
# that execution-time resolution agrees with tab-completion.
|
||||
from hermes_cli.commands import COMMANDS
|
||||
typed_base = cmd_lower.split()[0]
|
||||
all_known = set(COMMANDS) | set(skill_commands) | set(skill_bundles)
|
||||
all_known = set(COMMANDS) | set(_skill_commands) | set(get_skill_bundles())
|
||||
matches = [c for c in all_known if c.startswith(typed_base)]
|
||||
if len(matches) > 1:
|
||||
# Prefer an exact match (typed the full command name)
|
||||
@@ -9365,23 +9088,18 @@ class HermesCLI:
|
||||
_cprint(" Failed to save runtime_footer setting to config.yaml")
|
||||
|
||||
def _toggle_verbose(self):
|
||||
"""Cycle tool progress mode: off → new → all → verbose → off.
|
||||
|
||||
Tool-progress display (full args / results / think blocks at the
|
||||
``verbose`` step) is INDEPENDENT of global DEBUG logging. Cycling
|
||||
through here does not change ``self.verbose`` or the agent's
|
||||
``verbose_logging`` / ``quiet_mode`` — those remain under the
|
||||
explicit ``-v``/``--verbose`` flag and the ``/verbose-logging``
|
||||
toggle. See PR #6a1aa420e for the history that decoupled them.
|
||||
"""
|
||||
"""Cycle tool progress mode: off → new → all → verbose → off."""
|
||||
cycle = ["off", "new", "all", "verbose"]
|
||||
try:
|
||||
idx = cycle.index(self.tool_progress_mode)
|
||||
except ValueError:
|
||||
idx = 2 # default to "all"
|
||||
self.tool_progress_mode = cycle[(idx + 1) % len(cycle)]
|
||||
self.verbose = self.tool_progress_mode == "verbose"
|
||||
|
||||
if self.agent:
|
||||
self.agent.verbose_logging = self.verbose
|
||||
self.agent.quiet_mode = not self.verbose
|
||||
self.agent.reasoning_callback = self._current_reasoning_callback()
|
||||
|
||||
# Use raw ANSI codes via _cprint so the output is routed through
|
||||
@@ -9393,7 +9111,7 @@ class HermesCLI:
|
||||
"off": f"{_Colors.DIM}Tool progress: OFF{_Colors.RESET} — silent mode, just the final response.",
|
||||
"new": f"{_Colors.YELLOW}Tool progress: NEW{_Colors.RESET} — show each new tool (skip repeats).",
|
||||
"all": f"{_Colors.GREEN}Tool progress: ALL{_Colors.RESET} — show every tool call.",
|
||||
"verbose": f"{_Colors.BOLD}{_Colors.GREEN}Tool progress: VERBOSE{_Colors.RESET} — full args, results, and think blocks.",
|
||||
"verbose": f"{_Colors.BOLD}{_Colors.GREEN}Tool progress: VERBOSE{_Colors.RESET} — full args, results, think blocks, and debug logs.",
|
||||
}
|
||||
_cprint(labels.get(self.tool_progress_mode, ""))
|
||||
|
||||
@@ -9939,49 +9657,7 @@ class HermesCLI:
|
||||
if _reload_thread.is_alive():
|
||||
print(" ⚠️ MCP reload timed out (30s). Some servers may not have reconnected.")
|
||||
|
||||
# Inline-skip tokens that bypass the destructive-slash confirmation modal.
|
||||
# Matches the escape-hatch pattern users on broken modal platforms
|
||||
# (currently native Windows PowerShell — issue #30768) need to self-serve
|
||||
# without having to flip approvals.destructive_slash_confirm in config.
|
||||
_DESTRUCTIVE_SKIP_TOKENS = frozenset({"now", "--yes", "-y"})
|
||||
|
||||
@classmethod
|
||||
def _split_destructive_skip(cls, cmd_text: Optional[str]) -> tuple[str, bool]:
|
||||
"""Split inline-skip tokens out of a destructive slash command.
|
||||
|
||||
Returns ``(remainder, skip)`` where ``remainder`` is the original
|
||||
text with the command word and any recognized skip tokens removed,
|
||||
and ``skip`` is True iff at least one skip token was found.
|
||||
|
||||
Examples:
|
||||
"/reset now" -> ("", True)
|
||||
"/reset --yes My title" -> ("My title", True)
|
||||
"/new My title" -> ("My title", False)
|
||||
"/clear" -> ("", False)
|
||||
"""
|
||||
if not cmd_text:
|
||||
return "", False
|
||||
tokens = cmd_text.strip().split()
|
||||
if not tokens:
|
||||
return "", False
|
||||
# Drop leading "/cmd" word — callers pass the full command text.
|
||||
if tokens[0].startswith("/"):
|
||||
tokens = tokens[1:]
|
||||
skip = False
|
||||
kept: list[str] = []
|
||||
for tok in tokens:
|
||||
if tok.lower() in cls._DESTRUCTIVE_SKIP_TOKENS:
|
||||
skip = True
|
||||
continue
|
||||
kept.append(tok)
|
||||
return " ".join(kept), skip
|
||||
|
||||
def _confirm_destructive_slash(
|
||||
self,
|
||||
command: str,
|
||||
detail: str,
|
||||
cmd_original: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
def _confirm_destructive_slash(self, command: str, detail: str) -> Optional[str]:
|
||||
"""Prompt the user to confirm a destructive session slash command.
|
||||
|
||||
Used by ``/clear``, ``/new``/``/reset``, and ``/undo`` before they
|
||||
@@ -9997,24 +9673,9 @@ class HermesCLI:
|
||||
gate is off the function returns ``"once"`` immediately without
|
||||
prompting.
|
||||
|
||||
Inline-skip: if ``cmd_original`` contains ``now``, ``--yes``, or
|
||||
``-y`` as an argument (e.g. ``/reset now``, ``/new --yes My title``),
|
||||
the modal is bypassed and ``"once"`` is returned immediately. This is
|
||||
an escape hatch for platforms where the prompt_toolkit modal hangs
|
||||
(issue #30768 — native Windows PowerShell). Callers are responsible
|
||||
for stripping the skip tokens from any remaining argument parsing
|
||||
(see :meth:`_split_destructive_skip`).
|
||||
|
||||
Returns ``"once"``, ``"always"``, or ``None`` (cancelled). Callers
|
||||
proceed with the destructive action when the result is non-None.
|
||||
"""
|
||||
# Inline-skip escape hatch — works regardless of platform/modal state.
|
||||
# See class-level _DESTRUCTIVE_SKIP_TOKENS for the accepted tokens.
|
||||
if cmd_original:
|
||||
_, _skip = self._split_destructive_skip(cmd_original)
|
||||
if _skip:
|
||||
return "once"
|
||||
|
||||
# Gate check — respects prior "Always Approve" clicks.
|
||||
try:
|
||||
cfg = load_cli_config()
|
||||
@@ -10349,7 +10010,9 @@ class HermesCLI:
|
||||
self._last_scrollback_tool = function_name
|
||||
try:
|
||||
from agent.display import get_cute_tool_message
|
||||
line = get_cute_tool_message(function_name, stored_args, duration, result=kwargs.get("result"))
|
||||
line = get_cute_tool_message(function_name, stored_args, duration)
|
||||
if is_error:
|
||||
line = f"{line} [error]"
|
||||
_cprint(f" {line}")
|
||||
except Exception:
|
||||
pass
|
||||
@@ -10558,7 +10221,6 @@ class HermesCLI:
|
||||
self._voice_processing = True
|
||||
|
||||
submitted = False
|
||||
transcription_failed = False
|
||||
wav_path = None
|
||||
try:
|
||||
if self._voice_recorder is None:
|
||||
@@ -10607,24 +10269,18 @@ class HermesCLI:
|
||||
else:
|
||||
error = result.get("error", "Unknown error")
|
||||
_cprint(f"\n{_DIM}Transcription failed: {error}{_RST}")
|
||||
transcription_failed = True
|
||||
|
||||
except Exception as e:
|
||||
_cprint(f"\n{_DIM}Voice processing error: {e}{_RST}")
|
||||
transcription_failed = wav_path is not None
|
||||
finally:
|
||||
with self._voice_lock:
|
||||
self._voice_processing = False
|
||||
if hasattr(self, '_app') and self._app:
|
||||
self._app.invalidate()
|
||||
# Clean up temp file unless transcription failed. On failure, keep
|
||||
# the source recording so long dictation is not lost.
|
||||
# Clean up temp file
|
||||
try:
|
||||
if wav_path and os.path.isfile(wav_path):
|
||||
if transcription_failed:
|
||||
_cprint(f"{_DIM}Recording preserved at: {wav_path}{_RST}")
|
||||
else:
|
||||
os.unlink(wav_path)
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -12360,11 +12016,37 @@ class HermesCLI:
|
||||
self._voice_tts_done = threading.Event() # Signals TTS playback finished
|
||||
self._voice_tts_done.set() # Initially "done" (no TTS pending)
|
||||
|
||||
if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
|
||||
self._install_tool_callbacks()
|
||||
# Register callbacks so terminal_tool prompts route through our UI
|
||||
set_sudo_password_callback(self._sudo_password_callback)
|
||||
set_approval_callback(self._approval_callback)
|
||||
set_secret_capture_callback(self._secret_capture_callback)
|
||||
|
||||
if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
|
||||
self._ensure_tirith_security()
|
||||
# Computer-use shares the same approval UI (prompt_toolkit dialog).
|
||||
# The tool handler expects a 3-arg callback (action, args, summary)
|
||||
# and returns "approve_once" | "approve_session" | "always_approve"
|
||||
# | "deny". Adapt our existing generic callback.
|
||||
try:
|
||||
from tools.computer_use_tool import set_approval_callback as _set_cu_cb
|
||||
_set_cu_cb(self._computer_use_approval_callback)
|
||||
except ImportError:
|
||||
pass # computer_use extras not installed
|
||||
|
||||
# Ensure tirith security scanner is available (downloads if needed).
|
||||
# Warn the user if tirith is enabled in config but not available,
|
||||
# so they know command security scanning is degraded. Suppressed
|
||||
# on platforms where tirith ships no binary (Windows etc.) — the
|
||||
# user can't act on it and pattern-matching guards still run.
|
||||
try:
|
||||
from tools.tirith_security import ensure_installed, is_platform_supported
|
||||
tirith_path = ensure_installed(log_failures=False)
|
||||
if tirith_path is None and is_platform_supported():
|
||||
security_cfg = self.config.get("security", {}) or {}
|
||||
tirith_enabled = security_cfg.get("tirith_enabled", True)
|
||||
if tirith_enabled:
|
||||
_cprint(f" {_DIM}⚠ tirith security scanner enabled but not available "
|
||||
f"— command scanning will use pattern matching only{_RST}")
|
||||
except Exception:
|
||||
pass # Non-fatal — fail-open at scan time if unavailable
|
||||
|
||||
# Key bindings for the input area
|
||||
kb = KeyBindings()
|
||||
@@ -14522,7 +14204,7 @@ def main(
|
||||
api_key: str = None,
|
||||
base_url: str = None,
|
||||
max_turns: int = None,
|
||||
verbose: Optional[bool] = None,
|
||||
verbose: bool = False,
|
||||
quiet: bool = False,
|
||||
compact: bool = False,
|
||||
list_tools: bool = False,
|
||||
@@ -14868,6 +14550,4 @@ def main(
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import fire
|
||||
|
||||
fire.Fire(main)
|
||||
|
||||
+1
-4
@@ -529,9 +529,7 @@ def _send_media_via_adapter(
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
from gateway.platforms.base import BasePlatformAdapter, should_send_media_as_audio
|
||||
|
||||
media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
|
||||
for media_path, _is_voice in media_files:
|
||||
try:
|
||||
@@ -616,7 +614,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
# Extract MEDIA: tags so attachments are forwarded as files, not raw text
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)
|
||||
media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)
|
||||
|
||||
try:
|
||||
config = load_gateway_config()
|
||||
|
||||
+5
-10
@@ -6,22 +6,17 @@
|
||||
#
|
||||
# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
|
||||
# files created inside the container stay readable/writable on the host.
|
||||
# The s6-overlay stage2 hook remaps the internal `hermes` user to these
|
||||
# values via usermod/groupmod; each supervised service then drops to that
|
||||
# user via `s6-setuidgid`.
|
||||
# The entrypoint remaps the internal `hermes` user to these values via
|
||||
# usermod/groupmod + gosu.
|
||||
#
|
||||
# Security notes:
|
||||
# - The dashboard service binds to 127.0.0.1 by default. It stores API
|
||||
# keys; exposing it on LAN without auth is unsafe. If you want remote
|
||||
# access, use an SSH tunnel or put it behind a reverse proxy that
|
||||
# adds authentication — do NOT pass --insecure --host 0.0.0.0.
|
||||
# - If you override entrypoint, keep `/init` as the first command in
|
||||
# the chain (or let docker use the image's default ENTRYPOINT,
|
||||
# which is `["/init", "/opt/hermes/docker/main-wrapper.sh"]`).
|
||||
# `/init` is s6-overlay's PID 1 — it runs the cont-init.d scripts
|
||||
# (chown, profile reconcile, dashboard toggle) and sets up the
|
||||
# supervision tree before any service starts. Bypassing it skips
|
||||
# all of that setup and the gateway will not work correctly.
|
||||
# - If you override entrypoint, keep /opt/hermes/docker/entrypoint.sh in
|
||||
# the command chain. It drops root to the hermes user before gateway
|
||||
# files such as gateway.lock are created.
|
||||
# - The gateway's API server is off unless you uncomment API_SERVER_KEY
|
||||
# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
|
||||
# this on an internet-facing host.
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
#!/command/with-contenv sh
|
||||
# shellcheck shell=sh
|
||||
# Make supervise/ trees for ALL declared s6 services queryable and
|
||||
# controllable by the unprivileged hermes user (UID 10000).
|
||||
#
|
||||
# Background (PR #30136 review item I4): the entire s6 lifecycle
|
||||
# (s6-svc, s6-svstat, s6-svwait) is dispatched as the hermes user
|
||||
# inside the container (every Hermes runtime path runs under
|
||||
# ``s6-setuidgid hermes``). But s6-supervise creates each service's
|
||||
# ``supervise/`` and top-level ``event/`` directory with mode 0700
|
||||
# owned by its effective UID — which is root, because s6-supervise
|
||||
# is spawned by s6-svscan running as PID 1. So unprivileged clients
|
||||
# get EACCES on every probe / control call against the slot.
|
||||
#
|
||||
# Two fixes, one in each registration path:
|
||||
#
|
||||
# 1. For RUNTIME-registered profile gateways (created via the s6
|
||||
# runtime register hooks in profiles.py): the Python helper
|
||||
# ``_seed_supervise_skeleton`` pre-creates supervise/ + event/ +
|
||||
# supervise/control owned by hermes BEFORE s6-svscanctl -a fires.
|
||||
# s6-supervise's mkdir/mkfifo are EEXIST-safe, so it inherits our
|
||||
# ownership and never tries to chown back to root.
|
||||
#
|
||||
# 2. For STATIC s6-rc services (dashboard, main-hermes) declared at
|
||||
# image-build time under /etc/s6-overlay/s6-rc.d/*: these are
|
||||
# compiled by s6-rc at boot, and s6-supervise spawns BEFORE
|
||||
# cont-init.d gets to run — so by the time we're here, the
|
||||
# supervise/ tree is already there as root:root 0700. We chown
|
||||
# it here. s6-supervise will keep using the same files; it never
|
||||
# re-asserts ownership on a running service.
|
||||
#
|
||||
# This script runs as root after 01-hermes-setup but before
|
||||
# 02-reconcile-profiles, so the chowns are settled before the
|
||||
# Python reconciler walks the scandir. Lexicographic ordering
|
||||
# guarantees this — the suffix is unusual because we want to slot
|
||||
# in between 01 and the existing 02-reconcile-profiles without
|
||||
# renumbering both (which would be a churn-noise patch on its own).
|
||||
|
||||
set -eu
|
||||
|
||||
# /run/s6-rc/servicedirs holds the live, compiled service directories
|
||||
# for every static (s6-rc) service. Symlinks under /run/service/*
|
||||
# point here. Per-service supervise/ + event/ both need hermes
|
||||
# ownership for s6-svstat etc. to work as hermes.
|
||||
SVC_ROOT=/run/s6-rc/servicedirs
|
||||
|
||||
if [ ! -d "$SVC_ROOT" ]; then
|
||||
echo "[supervise-perms] $SVC_ROOT not present; skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
for svc in "$SVC_ROOT"/*; do
|
||||
[ -d "$svc" ] || continue
|
||||
name=$(basename "$svc")
|
||||
|
||||
# Skip s6-overlay-internal services (they need to stay root-only;
|
||||
# the s6rc-* helpers manage the supervision tree itself).
|
||||
case "$name" in
|
||||
s6rc-*|s6-linux-*)
|
||||
continue
|
||||
;;
|
||||
esac
|
||||
|
||||
# supervise/ tree — needed by s6-svstat / s6-svc.
|
||||
if [ -d "$svc/supervise" ]; then
|
||||
chown -R hermes:hermes "$svc/supervise" 2>/dev/null || \
|
||||
echo "[supervise-perms] could not chown $svc/supervise"
|
||||
# 0710 = group searchable. ``s6-svstat`` only needs to openat
|
||||
# status, not list the dir, but giving the hermes group +x is
|
||||
# the minimum that lets group members access the contents.
|
||||
chmod 0710 "$svc/supervise" 2>/dev/null || true
|
||||
# supervise/control is a FIFO that s6-svc writes commands
|
||||
# into; the hermes user needs +w. Owner is already hermes
|
||||
# after the recursive chown above; widen perms to 0660 so
|
||||
# ``s6-svc`` works for any member of the hermes group too.
|
||||
if [ -p "$svc/supervise/control" ]; then
|
||||
chmod 0660 "$svc/supervise/control" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Top-level event/ dir — s6-svlisten1 / s6-svwait subscribe here.
|
||||
if [ -d "$svc/event" ]; then
|
||||
chown hermes:hermes "$svc/event" 2>/dev/null || \
|
||||
echo "[supervise-perms] could not chown $svc/event"
|
||||
# Preserve s6's 03730 mode (setgid + g+rwx + sticky).
|
||||
chmod 03730 "$svc/event" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
|
||||
echo "[supervise-perms] chowned supervise/ trees for static s6-rc services"
|
||||
@@ -1,46 +0,0 @@
|
||||
#!/command/with-contenv sh
|
||||
# shellcheck shell=sh
|
||||
# Container-boot reconciliation of per-profile gateway s6 services.
|
||||
#
|
||||
# Runs as root after 01-hermes-setup (the stage2 hook) has chowned
|
||||
# the volume and seeded $HERMES_HOME, but before s6-rc starts user
|
||||
# services. /etc/cont-init.d/* scripts run in lexicographic order,
|
||||
# so the `02-` prefix guarantees ordering.
|
||||
#
|
||||
# Service directories under /run/service/ live on tmpfs and are
|
||||
# wiped on every container restart. Profile directories under
|
||||
# $HERMES_HOME/profiles/ live on the persistent VOLUME. This script
|
||||
# walks the persistent profiles, recreates the s6 service slots,
|
||||
# and auto-starts only those whose last recorded state was
|
||||
# `running` — see hermes_cli/container_boot.py.
|
||||
#
|
||||
# Phase 4 also needs hermes-user writes to /run/service/ (so the
|
||||
# profile create/delete hooks can register/unregister at runtime),
|
||||
# so we chown the scandir before invoking the reconciler. We
|
||||
# additionally chown the s6-svscan control FIFO so the hermes user
|
||||
# can send rescan signals via ``s6-svscanctl -a``; without this the
|
||||
# entire runtime-registration path is inert under UID 10000 (the
|
||||
# Python wrapper catches the resulting EACCES, prints a warning,
|
||||
# and swallows the failure).
|
||||
set -e
|
||||
|
||||
# Make the dynamic scandir hermes-writable. The directory itself
|
||||
# starts root-owned by s6-overlay.
|
||||
chown hermes:hermes /run/service 2>/dev/null || true
|
||||
|
||||
# Make the svscan control FIFO hermes-writable so s6-svscanctl -a
|
||||
# / -an work for the hermes user. The FIFO is created by s6-svscan
|
||||
# at PID-1 startup, so by the time this cont-init.d script runs it
|
||||
# already exists. Both ``control`` and ``lock`` need to be writable
|
||||
# for the various svscanctl operations; the directory itself stays
|
||||
# root-owned (we only need to touch the two FIFOs/locks inside).
|
||||
if [ -d /run/service/.s6-svscan ]; then
|
||||
for entry in control lock; do
|
||||
if [ -e "/run/service/.s6-svscan/$entry" ]; then
|
||||
chown hermes:hermes "/run/service/.s6-svscan/$entry" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
exec s6-setuidgid hermes /opt/hermes/.venv/bin/python -m hermes_cli.container_boot
|
||||
|
||||
+158
-25
@@ -1,27 +1,160 @@
|
||||
#!/bin/sh
|
||||
# s6-overlay shim. The real logic lives in docker/stage2-hook.sh, invoked
|
||||
# by /etc/cont-init.d/01-hermes-setup (installed by the Dockerfile). This
|
||||
# file exists so external references to docker/entrypoint.sh still work,
|
||||
# but it's no longer the ENTRYPOINT — /init is.
|
||||
#!/bin/bash
|
||||
# Docker/Podman entrypoint: bootstrap config files into the mounted volume, then run hermes.
|
||||
set -e
|
||||
|
||||
HERMES_HOME="${HERMES_HOME:-/opt/data}"
|
||||
INSTALL_DIR="/opt/hermes"
|
||||
|
||||
# --- Privilege dropping via gosu ---
|
||||
# When started as root (the default for Docker, or fakeroot in rootless Podman),
|
||||
# optionally remap the hermes user/group to match host-side ownership, fix volume
|
||||
# permissions, then re-exec as hermes.
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
|
||||
echo "Changing hermes UID to $HERMES_UID"
|
||||
usermod -u "$HERMES_UID" hermes
|
||||
fi
|
||||
|
||||
if [ -n "$HERMES_GID" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
|
||||
echo "Changing hermes GID to $HERMES_GID"
|
||||
# -o allows non-unique GID (e.g. macOS GID 20 "staff" may already exist
|
||||
# as "dialout" in the Debian-based container image)
|
||||
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
|
||||
# files created by previous runs (under the old UID) become inaccessible.
|
||||
# Always chown -R when UID was remapped; otherwise only if top-level is wrong.
|
||||
actual_hermes_uid=$(id -u hermes)
|
||||
needs_chown=false
|
||||
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
|
||||
needs_chown=true
|
||||
elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
needs_chown=true
|
||||
fi
|
||||
if [ "$needs_chown" = true ]; then
|
||||
echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
|
||||
# In rootless Podman the container's "root" is mapped to an unprivileged
|
||||
# host UID — chown will fail. That's fine: the volume is already owned
|
||||
# by the mapped user on the host side.
|
||||
chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
# The .venv must also be re-chowned when UID is remapped, otherwise
|
||||
# lazy_deps.py cannot install platform packages (discord.py, etc.).
|
||||
chown -R hermes:hermes "$INSTALL_DIR/.venv" 2>/dev/null || \
|
||||
echo "Warning: chown .venv failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
exec gosu hermes "$0" "$@"
|
||||
fi
|
||||
|
||||
# --- Running as hermes from here ---
|
||||
source "${INSTALL_DIR}/.venv/bin/activate"
|
||||
|
||||
# Stamp install method for detect_install_method()
|
||||
echo "docker" > "${HERMES_HOME:=/opt/data}/.install_method" 2>/dev/null || true
|
||||
|
||||
# Create essential directory structure. Cache and platform directories
|
||||
# (cache/images, cache/audio, platforms/whatsapp, etc.) are created on
|
||||
# demand by the application — don't pre-create them here so new installs
|
||||
# get the consolidated layout from get_hermes_dir().
|
||||
# The "home/" subdirectory is a per-profile HOME for subprocesses (git,
|
||||
# ssh, gh, npm …). Without it those tools write to /root which is
|
||||
# ephemeral and shared across profiles. See issue #4426.
|
||||
mkdir -p "$HERMES_HOME"/{cron,sessions,logs,hooks,memories,skills,skins,plans,workspace,home}
|
||||
|
||||
# .env
|
||||
if [ ! -f "$HERMES_HOME/.env" ]; then
|
||||
cp "$INSTALL_DIR/.env.example" "$HERMES_HOME/.env"
|
||||
fi
|
||||
|
||||
# config.yaml
|
||||
if [ ! -f "$HERMES_HOME/config.yaml" ]; then
|
||||
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# SOUL.md
|
||||
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
|
||||
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
|
||||
fi
|
||||
|
||||
# auth.json: bootstrap from env on first boot only. Used by orchestrators
|
||||
# (e.g. provisioning a Hermes VPS from an account-management service) that
|
||||
# need to seed the OAuth refresh credential non-interactively, instead of
|
||||
# walking the user through `hermes setup` + the device-flow login dance.
|
||||
# Subsequent token rotations write back to the same file, which lives on a
|
||||
# persistent volume — so this env var is consumed exactly once at first
|
||||
# boot. The `[ ! -f ... ]` guard is critical: without it, a container
|
||||
# restart would clobber a rotated refresh token with the now-stale value
|
||||
# the orchestrator originally seeded.
|
||||
if [ ! -f "$HERMES_HOME/auth.json" ] && [ -n "$HERMES_AUTH_JSON_BOOTSTRAP" ]; then
|
||||
printf '%s' "$HERMES_AUTH_JSON_BOOTSTRAP" > "$HERMES_HOME/auth.json"
|
||||
chmod 600 "$HERMES_HOME/auth.json"
|
||||
fi
|
||||
|
||||
# Sync bundled skills (manifest-based so user edits are preserved)
|
||||
if [ -d "$INSTALL_DIR/skills" ]; then
|
||||
python3 "$INSTALL_DIR/tools/skills_sync.py"
|
||||
fi
|
||||
|
||||
# Optionally start `hermes dashboard` as a side-process.
|
||||
#
|
||||
# When called directly (e.g. by an old wrapper script that hard-coded
|
||||
# docker/entrypoint.sh as the container ENTRYPOINT, or by an external
|
||||
# orchestration script that invokes it inside the container), forward to
|
||||
# the stage2 hook for parity with the pre-s6 entrypoint behavior. The
|
||||
# stage2 hook only handles cont-init bootstrap (UID remap, chown, config
|
||||
# seed, skills sync); it does NOT exec the CMD. Callers that depended
|
||||
# on the pre-s6 contract "entrypoint.sh sets up state then execs hermes"
|
||||
# will see the bootstrap happen but the CMD will not run from this shim.
|
||||
# Toggled by HERMES_DASHBOARD=1 (also accepts "true"/"yes", case-insensitive).
|
||||
# Host/port/TUI can be overridden via:
|
||||
# HERMES_DASHBOARD_HOST (default 0.0.0.0 — exposed outside the container)
|
||||
# HERMES_DASHBOARD_PORT (default 9119, matches `hermes dashboard` default)
|
||||
# HERMES_DASHBOARD_TUI (already honored by `hermes dashboard` itself)
|
||||
#
|
||||
# Deprecation: this shim is preserved for one release cycle to give
|
||||
# downstream users time to migrate their wrappers to the image's real
|
||||
# ENTRYPOINT (`/init`). It will be removed in a future major release.
|
||||
# Surface a warning to stderr so anyone still invoking this path
|
||||
# sees the migration notice in their logs.
|
||||
echo "[hermes] WARNING: docker/entrypoint.sh is a deprecated shim under " \
|
||||
"s6-overlay. The container's real ENTRYPOINT is /init + " \
|
||||
"main-wrapper.sh; this script only runs the stage2 cont-init hook " \
|
||||
"and does NOT exec the CMD. If you hard-coded docker/entrypoint.sh " \
|
||||
"as your ENTRYPOINT, drop the override — docker will use the image's " \
|
||||
"default ENTRYPOINT (/init), which handles bootstrap AND CMD." >&2
|
||||
exec /opt/hermes/docker/stage2-hook.sh "$@"
|
||||
# The dashboard is a long-lived server. We background it *before* the final
|
||||
# `exec hermes "$@"` so the user's chosen foreground command (chat, gateway,
|
||||
# sleep infinity, …) remains PID-of-interest for the container runtime. When
|
||||
# the container stops the whole process tree is torn down, so no explicit
|
||||
# cleanup is needed.
|
||||
case "${HERMES_DASHBOARD:-}" in
|
||||
1|true|TRUE|True|yes|YES|Yes)
|
||||
dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}"
|
||||
dash_port="${HERMES_DASHBOARD_PORT:-9119}"
|
||||
dash_args=(--host "$dash_host" --port "$dash_port" --no-open)
|
||||
# Binding to anything other than localhost requires --insecure — the
|
||||
# dashboard refuses otherwise because it exposes API keys. Inside a
|
||||
# container this is the expected deployment (host reaches it via
|
||||
# published port), so opt in automatically.
|
||||
if [ "$dash_host" != "127.0.0.1" ] && [ "$dash_host" != "localhost" ]; then
|
||||
dash_args+=(--insecure)
|
||||
fi
|
||||
echo "Starting hermes dashboard on ${dash_host}:${dash_port} (background)"
|
||||
# Prefix dashboard output so it's distinguishable from the main
|
||||
# process in `docker logs`. stdbuf keeps the pipe line-buffered.
|
||||
(
|
||||
stdbuf -oL -eL hermes dashboard "${dash_args[@]}" 2>&1 \
|
||||
| sed -u 's/^/[dashboard] /'
|
||||
) &
|
||||
;;
|
||||
esac
|
||||
|
||||
# Final exec: two supported invocation patterns.
|
||||
#
|
||||
# docker run <image> -> exec `hermes` with no args (legacy default)
|
||||
# docker run <image> chat -q "..." -> exec `hermes chat -q "..."` (legacy wrap)
|
||||
# docker run <image> sleep infinity -> exec `sleep infinity` directly
|
||||
# docker run <image> bash -> exec `bash` directly
|
||||
#
|
||||
# If the first positional arg resolves to an executable on PATH, we assume the
|
||||
# caller wants to run it directly (needed by the launcher which runs long-lived
|
||||
# `sleep infinity` sandbox containers — see tools/environments/docker.py).
|
||||
# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
|
||||
# preserving the documented `docker run <image> <subcommand>` behavior.
|
||||
if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
|
||||
exec "$@"
|
||||
fi
|
||||
exec hermes "$@"
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
#!/bin/sh
|
||||
# /opt/hermes/docker/main-wrapper.sh — wraps the container's CMD with
|
||||
# the same argument-routing logic the pre-s6 entrypoint.sh used. Runs
|
||||
# as /init's "main program" (Docker CMD) so it inherits stdin/stdout/
|
||||
# stderr from the container.
|
||||
#
|
||||
# Routing:
|
||||
# no args → exec `hermes` (the default)
|
||||
# first arg is an executable → exec it directly (sleep, bash, sh, …)
|
||||
# first arg is anything else → exec `hermes <args>` (subcommand passthrough)
|
||||
#
|
||||
# We drop to the hermes user via `s6-setuidgid` so the supervised
|
||||
# workload runs unprivileged (UID 10000 by default).
|
||||
set -e
|
||||
|
||||
cd /opt/data
|
||||
# shellcheck disable=SC1091
|
||||
. /opt/hermes/.venv/bin/activate
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
exec s6-setuidgid hermes hermes
|
||||
fi
|
||||
|
||||
if command -v "$1" >/dev/null 2>&1; then
|
||||
# Bare executable — pass through directly.
|
||||
exec s6-setuidgid hermes "$@"
|
||||
fi
|
||||
|
||||
# Hermes subcommand pass-through.
|
||||
exec s6-setuidgid hermes hermes "$@"
|
||||
@@ -1,30 +0,0 @@
|
||||
#!/command/with-contenv sh
|
||||
# shellcheck shell=sh
|
||||
# Dashboard finish script. Companion to ./run.
|
||||
#
|
||||
# When HERMES_DASHBOARD is unset (or falsy), ./run exits 0 immediately.
|
||||
# Without this finish script, s6-supervise would just restart the run
|
||||
# script in a tight loop. By exiting 125 here, we tell s6-supervise
|
||||
# "this service has permanently failed; do not restart" — equivalent
|
||||
# to `s6-svc -O`. The supervise slot reports as down, matching reality
|
||||
# (no dashboard process is running).
|
||||
#
|
||||
# When HERMES_DASHBOARD IS enabled and the run script later exits or
|
||||
# is killed, we want s6-supervise to restart it (the whole point of
|
||||
# supervised lifecycle). So we exit non-125 in that case.
|
||||
|
||||
# Arguments passed to a finish script: $1=run-exit-code, $2=signal-num,
|
||||
# $3=service-dir-name, $4=run-pgid. See servicedir(7).
|
||||
|
||||
case "${HERMES_DASHBOARD:-}" in
|
||||
1|true|TRUE|True|yes|YES|Yes)
|
||||
# Dashboard was enabled — let s6-supervise restart on crash by
|
||||
# exiting non-125. (Pass-through any sensible default.)
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
# Dashboard disabled — permanent-failure marker so s6-supervise
|
||||
# leaves the slot in 'down' state and s6-svstat reflects that.
|
||||
exit 125
|
||||
;;
|
||||
esac
|
||||
@@ -1,40 +0,0 @@
|
||||
#!/command/with-contenv sh
|
||||
# shellcheck shell=sh
|
||||
# Dashboard service. Always declared so s6 has a supervised slot; if
|
||||
# HERMES_DASHBOARD isn't truthy the run script exits cleanly and the
|
||||
# companion finish script returns 125 (s6's "permanent failure, do
|
||||
# not restart" marker), so s6-svstat reports the slot as down. See
|
||||
# also docker/s6-rc.d/dashboard/finish.
|
||||
|
||||
case "${HERMES_DASHBOARD:-}" in
|
||||
1|true|TRUE|True|yes|YES|Yes) ;;
|
||||
*)
|
||||
# Exit 0; the finish script will exit 125 → s6-supervise won't
|
||||
# restart us and the slot reports down. Using a clean exit
|
||||
# (rather than `exec sleep infinity`) means s6-svstat reflects
|
||||
# reality: when HERMES_DASHBOARD is unset, the service is NOT
|
||||
# running, just supervised-with-permanent-failure. See PR
|
||||
# #30136 review item I3.
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
cd /opt/data
|
||||
# shellcheck disable=SC1091
|
||||
. /opt/hermes/.venv/bin/activate
|
||||
|
||||
dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}"
|
||||
dash_port="${HERMES_DASHBOARD_PORT:-9119}"
|
||||
|
||||
# Binding to anything other than localhost requires --insecure — the
|
||||
# dashboard refuses otherwise because it exposes API keys. Inside a
|
||||
# container this is the expected deployment.
|
||||
insecure=""
|
||||
case "$dash_host" in
|
||||
127.0.0.1|localhost) ;;
|
||||
*) insecure="--insecure" ;;
|
||||
esac
|
||||
|
||||
# shellcheck disable=SC2086 # word-splitting of $insecure is intentional
|
||||
exec s6-setuidgid hermes hermes dashboard \
|
||||
--host "$dash_host" --port "$dash_port" --no-open $insecure
|
||||
@@ -1 +0,0 @@
|
||||
longrun
|
||||
@@ -1,27 +0,0 @@
|
||||
#!/command/with-contenv sh
|
||||
# shellcheck shell=sh
|
||||
# Main hermes service.
|
||||
#
|
||||
# IMPORTANT — this is NOT how the user's CMD runs.
|
||||
#
|
||||
# We chose Architecture B from the plan: the container's CMD (the bare
|
||||
# command the user passes to `docker run <image> …`) runs as /init's
|
||||
# "main program" via Docker's CMD mechanism, NOT as an s6-supervised
|
||||
# service. This is the canonical s6-overlay pattern for "container
|
||||
# exits when the program exits" semantics, and it lets us preserve
|
||||
# every pre-s6 invocation contract (chat passthrough, sleep infinity,
|
||||
# bash, --tui) without re-implementing argument routing through
|
||||
# /run/s6/container_environment.
|
||||
#
|
||||
# So why does this service exist at all? Two reasons:
|
||||
# 1. s6-rc requires at least one user service for the "user" bundle
|
||||
# to be valid. We can't ship an empty bundle.
|
||||
# 2. Future work may want to supervise a long-lived hermes process
|
||||
# (e.g. for gateway-server containers); having the slot already
|
||||
# wired in keeps that change small.
|
||||
#
|
||||
# For now this service is a no-op: it sleeps forever, doing nothing.
|
||||
# The dashboard runs as a real s6 service alongside it (see
|
||||
# ../dashboard/run) and per-profile gateways register dynamically via
|
||||
# /run/service/ at runtime (Phase 4).
|
||||
exec sleep infinity
|
||||
@@ -1 +0,0 @@
|
||||
longrun
|
||||
@@ -1,134 +0,0 @@
|
||||
#!/bin/sh
|
||||
# s6-overlay stage2 hook — runs as root after the supervision tree is
|
||||
# up but before user services start. Handles UID/GID remap, volume
|
||||
# chown, config seeding, and skills sync.
|
||||
#
|
||||
# Per-service privilege drop happens inside each service's `run` script
|
||||
# (and in main-wrapper.sh) via s6-setuidgid, not here.
|
||||
#
|
||||
# Wired into the image as /etc/cont-init.d/01-hermes-setup by the
|
||||
# Dockerfile. The shim at docker/entrypoint.sh forwards to this script
|
||||
# so external references to docker/entrypoint.sh still work.
|
||||
#
|
||||
# NB: cont-init.d scripts run with no arguments — the user's CMD args
|
||||
# are NOT visible here. That's fine: we use Architecture B (s6-overlay
|
||||
# main-program model), so main-wrapper.sh runs the CMD with full
|
||||
# stdin/stdout/stderr access and handles arg parsing there.
|
||||
|
||||
set -eu
|
||||
|
||||
HERMES_HOME="${HERMES_HOME:-/opt/data}"
|
||||
INSTALL_DIR="/opt/hermes"
|
||||
|
||||
# --- UID/GID remap ---
|
||||
if [ -n "${HERMES_UID:-}" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
|
||||
echo "[stage2] Changing hermes UID to $HERMES_UID"
|
||||
usermod -u "$HERMES_UID" hermes
|
||||
fi
|
||||
if [ -n "${HERMES_GID:-}" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
|
||||
echo "[stage2] Changing hermes GID to $HERMES_GID"
|
||||
# -o allows non-unique GID (e.g. macOS GID 20 "staff" may already
|
||||
# exist as "dialout" in the Debian-based container image).
|
||||
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# --- Fix ownership of data volume ---
|
||||
actual_hermes_uid=$(id -u hermes)
|
||||
needs_chown=false
|
||||
if [ -n "${HERMES_UID:-}" ] && [ "$HERMES_UID" != "10000" ]; then
|
||||
needs_chown=true
|
||||
elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
needs_chown=true
|
||||
fi
|
||||
if [ "$needs_chown" = true ]; then
|
||||
echo "[stage2] Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
|
||||
# In rootless Podman the container's "root" is mapped to an
|
||||
# unprivileged host UID — chown will fail. That's fine: the volume
|
||||
# is already owned by the mapped user on the host side.
|
||||
chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
|
||||
echo "[stage2] Warning: chown failed (rootless container?) — continuing"
|
||||
# The .venv must also be re-chowned when UID is remapped, otherwise
|
||||
# lazy_deps.py cannot install platform packages (discord.py, etc.).
|
||||
chown -R hermes:hermes "$INSTALL_DIR/.venv" 2>/dev/null || \
|
||||
echo "[stage2] Warning: chown .venv failed (rootless container?) — continuing"
|
||||
fi
|
||||
|
||||
# Always reset ownership of $HERMES_HOME/profiles to hermes on every
|
||||
# boot. Profile dirs and files can land owned by root when commands
|
||||
# are invoked via `docker exec <container> hermes …` (which defaults
|
||||
# to root unless `-u` is passed), and that breaks the cont-init
|
||||
# reconciler (02-reconcile-profiles) which runs as hermes and walks
|
||||
# the profiles dir. Idempotent; skipped on rootless containers where
|
||||
# chown would fail.
|
||||
if [ -d "$HERMES_HOME/profiles" ]; then
|
||||
chown -R hermes:hermes "$HERMES_HOME/profiles" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# --- config.yaml permissions ---
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it
|
||||
# was edited on the host after initial ownership setup.
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# --- Seed directory structure as hermes user ---
|
||||
# Run as hermes via s6-setuidgid so dirs end up owned correctly (matters
|
||||
# under rootless Podman where chown back to root would fail).
|
||||
#
|
||||
# Use direct `mkdir -p` invocation (no `sh -c "..."` wrapper) so the
|
||||
# shell isn't a second interpreter — defends against $HERMES_HOME values
|
||||
# containing shell metacharacters. PR #30136 review item O2.
|
||||
s6-setuidgid hermes mkdir -p \
|
||||
"$HERMES_HOME/cron" \
|
||||
"$HERMES_HOME/sessions" \
|
||||
"$HERMES_HOME/logs" \
|
||||
"$HERMES_HOME/hooks" \
|
||||
"$HERMES_HOME/memories" \
|
||||
"$HERMES_HOME/skills" \
|
||||
"$HERMES_HOME/skins" \
|
||||
"$HERMES_HOME/plans" \
|
||||
"$HERMES_HOME/workspace" \
|
||||
"$HERMES_HOME/home"
|
||||
|
||||
# --- Install-method stamp (read by detect_install_method() in hermes status) ---
|
||||
# Preserved from the tini-era entrypoint (PR #27843). Must be written as
|
||||
# the hermes user so ownership matches the file's documented owner.
|
||||
# tee is invoked directly via s6-setuidgid (no `sh -c` wrapper) for the
|
||||
# same shell-metacharacter safety described above.
|
||||
printf 'docker\n' | s6-setuidgid hermes tee "$HERMES_HOME/.install_method" >/dev/null \
|
||||
|| true
|
||||
|
||||
# --- Seed config files (only on first boot) ---
|
||||
seed_one() {
|
||||
dest=$1
|
||||
src=$2
|
||||
if [ ! -f "$HERMES_HOME/$dest" ] && [ -f "$INSTALL_DIR/$src" ]; then
|
||||
s6-setuidgid hermes cp "$INSTALL_DIR/$src" "$HERMES_HOME/$dest"
|
||||
fi
|
||||
}
|
||||
seed_one ".env" ".env.example"
|
||||
seed_one "config.yaml" "cli-config.yaml.example"
|
||||
seed_one "SOUL.md" "docker/SOUL.md"
|
||||
|
||||
# auth.json: bootstrap from env on first boot only. Same semantics as the
|
||||
# pre-s6 entrypoint — the [ ! -f ] guard is critical to avoid clobbering
|
||||
# rotated refresh tokens on container restart.
|
||||
if [ ! -f "$HERMES_HOME/auth.json" ] && [ -n "${HERMES_AUTH_JSON_BOOTSTRAP:-}" ]; then
|
||||
printf '%s' "$HERMES_AUTH_JSON_BOOTSTRAP" > "$HERMES_HOME/auth.json"
|
||||
chown hermes:hermes "$HERMES_HOME/auth.json" 2>/dev/null || true
|
||||
chmod 600 "$HERMES_HOME/auth.json"
|
||||
fi
|
||||
|
||||
# --- Sync bundled skills ---
|
||||
# Invoke the venv's python by absolute path so we don't need a `sh -c`
|
||||
# wrapper to source the activate script. This is safe because
|
||||
# skills_sync.py doesn't depend on any environment exports beyond what
|
||||
# the python binary's own bin-stub already sets up (sys.path is rooted
|
||||
# at the venv's site-packages by virtue of running .venv/bin/python).
|
||||
if [ -d "$INSTALL_DIR/skills" ]; then
|
||||
s6-setuidgid hermes "$INSTALL_DIR/.venv/bin/python" "$INSTALL_DIR/tools/skills_sync.py" \
|
||||
|| echo "[stage2] Warning: skills_sync.py failed; continuing"
|
||||
fi
|
||||
|
||||
echo "[stage2] Setup complete; starting user services"
|
||||
@@ -1,434 +0,0 @@
|
||||
# s6-overlay Supervision for Per-Profile Gateways in Docker — Implementation Plan
|
||||
|
||||
> **Status: shipped.** Phases 0–5 landed via PR
|
||||
> [NousResearch/hermes-agent#30136](https://github.com/NousResearch/hermes-agent/pull/30136)
|
||||
> in May 2026. This document is preserved as a post-implementation reference
|
||||
> for the architecture and the resolved design questions. The phase-by-phase
|
||||
> TDD walkthrough (≈2,800 lines) and the v2/v3 re-validation preambles have
|
||||
> been removed — the canonical implementation history is the PR commit log
|
||||
> (`git log --oneline a957ef083..a6f7171a5 -- 'docker/*' 'hermes_cli/service_manager.py' …`).
|
||||
> Open Questions are collapsed into a single Decision Log table; full
|
||||
> deliberations live in PR review comments.
|
||||
|
||||
**Goal:** Replace `tini` with s6-overlay as PID 1 in the Hermes Docker image so
|
||||
that the main hermes process, the dashboard, and dynamically-created
|
||||
per-profile gateways all run as supervised services (auto-restart on crash,
|
||||
clean shutdown, signal forwarding, zombie reaping). Preserve every existing
|
||||
`docker run …` invocation pattern — including interactive TUI.
|
||||
|
||||
**Architecture:** s6-overlay's `/init` is the container ENTRYPOINT, running
|
||||
s6-svscan as PID 1. Main hermes and the dashboard are declared as static
|
||||
s6-rc services at image build time. Per-profile gateways — which users create
|
||||
*after* the image is built (`hermes profile create coder` →
|
||||
`coder gateway start`) — are registered dynamically by writing service
|
||||
directories under a scandir watched by s6-svscan. A `ServiceManager` protocol
|
||||
abstracts the install/start/stop/restart surface across the init systems we
|
||||
care about (systemd on Linux host, launchd on macOS host, Scheduled Tasks on
|
||||
native Windows host, s6 inside container) and adds a second tier for runtime
|
||||
service registration that only s6 implements.
|
||||
|
||||
**Tech Stack:**
|
||||
|
||||
- [s6-overlay](https://github.com/just-containers/s6-overlay) v3.2.3.0
|
||||
(noarch + per-arch tarballs ~15 MB). SHA256-pinned via build ARGs;
|
||||
multi-arch via `TARGETARCH` (amd64 → `x86_64`, arm64 → `aarch64`).
|
||||
- Debian 13.4 base image (unchanged).
|
||||
- [hadolint](https://github.com/hadolint/hadolint) for the Dockerfile +
|
||||
[shellcheck](https://github.com/koalaman/shellcheck) for entrypoint scripts.
|
||||
- Python subprocess wrappers for `s6-svc`, `s6-svstat`, `s6-svscanctl`.
|
||||
- Existing systemd/launchd/windows surface in `hermes_cli/gateway.py` and
|
||||
`hermes_cli/gateway_windows.py`.
|
||||
|
||||
**Scope:**
|
||||
|
||||
- Container-only (host-side systemd/launchd/windows behavior is preserved,
|
||||
not modified).
|
||||
- s6-overlay only (no pure-Python fallback).
|
||||
- Architecture A (s6 owns PID 1; tini is removed).
|
||||
- Interactive TUI must keep working:
|
||||
`docker run -it --rm nousresearch/hermes-agent:latest --tui`.
|
||||
- Dynamic registration is limited to per-profile gateways — one service per
|
||||
profile, created when a profile is created, torn down when deleted. A
|
||||
`gateway-default` slot is always registered for the root HERMES_HOME
|
||||
profile so `hermes gateway start` (no `-p`) has somewhere to land.
|
||||
|
||||
**Out of scope:**
|
||||
|
||||
- Host-side dynamic supervision (systemd-run / launchd transient plists) —
|
||||
not needed.
|
||||
- Pure-Python supervisor fallback — not needed.
|
||||
- Arbitrary user-defined supervised processes inside the container — only
|
||||
profile gateways.
|
||||
- Migration of existing per-profile systemd unit generation to s6 on the
|
||||
host side.
|
||||
- Non-Docker container runtimes (Podman rootless validated reactively).
|
||||
- UX polish around in-container profile lifecycle (e.g. a nice status view
|
||||
of all supervised profile gateways) — deferred to follow-up.
|
||||
|
||||
---
|
||||
|
||||
## Background From The Codebase
|
||||
|
||||
> **Note on line numbers:** This section refers to functions and structures
|
||||
> by name only. Use `grep -n 'def <name>' <file>` to locate anything below
|
||||
> if you need the current line.
|
||||
|
||||
### Pre-s6 container init (what we replaced)
|
||||
|
||||
The original `Dockerfile` declared
|
||||
`ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]`.
|
||||
tini was PID 1, reaped zombies, forwarded SIGTERM to the process group. The
|
||||
old `docker/entrypoint.sh`:
|
||||
|
||||
1. `gosu` privilege drop from root → `hermes` UID.
|
||||
2. Copied `.env.example`, `cli-config.yaml.example`, `SOUL.md` into
|
||||
`$HERMES_HOME` if missing.
|
||||
3. Synced bundled skills via `tools/skills_sync.py`.
|
||||
4. Optionally backgrounded `hermes dashboard` in a subshell when
|
||||
`HERMES_DASHBOARD=1` — **not supervised**, no restart.
|
||||
5. `exec hermes "$@"` — tini's sole direct child.
|
||||
|
||||
Known limitations: dashboard crash → stays dead; dashboard fails at startup →
|
||||
silent; gateway crash → dashboard dies too. The May 4, 2026 decision was
|
||||
"leave as is" because nothing in the container needed supervision then.
|
||||
Adding per-profile gateway supervision changed that.
|
||||
|
||||
### ServiceManager surface (what we wrapped, not refactored)
|
||||
|
||||
All init-system logic lives in **`hermes_cli/gateway.py`** (~5,400 LOC at
|
||||
re-validation). The systemd/launchd code is ~1,500 lines of that, plus a
|
||||
separate **`hermes_cli/gateway_windows.py`** (~690 LOC) for Windows
|
||||
Scheduled Tasks.
|
||||
|
||||
| Layer | Systemd functions | Launchd functions | Windows functions |
|
||||
|---|---|---|---|
|
||||
| **Detection** | `supports_systemd_services()`, `_systemd_operational()`, `_wsl_systemd_operational()`, `_container_systemd_operational()` | `is_macos()` | `is_windows()`, `gateway_windows.is_installed()` |
|
||||
| **Paths** | `get_systemd_unit_path(system)`, `get_service_name()` | `get_launchd_plist_path()`, `get_launchd_label()` | `gateway_windows.get_task_name()`, `get_task_script_path()`, `get_startup_entry_path()` |
|
||||
| **Install/lifecycle** | `systemd_install(force, system, run_as_user)`, `systemd_uninstall(system)`, `systemd_start/stop/restart(system)` | `launchd_install(force)`, `launchd_uninstall/start/stop/restart` | `gateway_windows.install/uninstall/start/stop/restart` |
|
||||
| **Probes** | `_probe_systemd_service_running(system)`, `_read_systemd_unit_properties(system)`, `_wait_for_systemd_service_restart`, `_recover_pending_systemd_restart` | `_probe_launchd_service_running()` | `gateway_windows.is_task_registered()`, `_pid_exists` helper |
|
||||
| **D-Bus plumbing** | `_ensure_user_systemd_env`, `_user_systemd_socket_ready`, `_user_systemd_private_socket_path`, `get_systemd_linger_status` | — | — |
|
||||
| **Unit/plist generation** | `generate_systemd_unit(system, run_as_user)`, `systemd_unit_is_current`, `refresh_systemd_unit_if_needed` | plist templating in `launchd_install` | `_build_gateway_cmd_script`, `_build_startup_launcher`, `_write_task_script` |
|
||||
|
||||
Container-relevant callers outside `gateway.py`:
|
||||
|
||||
- `hermes_cli/status.py` — gained an `s6` branch for in-container runs.
|
||||
- `hermes_cli/profiles.py` — `create_profile` / `delete_profile` register and
|
||||
unregister with s6 inside the container (no-op on host).
|
||||
- `hermes_cli/doctor.py` — `_check_gateway_service_linger` skips on s6, and a
|
||||
new "Service Supervisor" section reports main-hermes / dashboard /
|
||||
profile-gateway counts via the ServiceManager.
|
||||
- `hermes_cli/gateway.py::gateway_command` — the
|
||||
`elif is_container():` rejection arms that refused gateway lifecycle
|
||||
operations were removed; the `_dispatch_via_service_manager_if_s6` helper
|
||||
intercepts start/stop/restart and routes them through s6.
|
||||
|
||||
### Per-profile gateway spawning
|
||||
|
||||
`hermes gateway start`, `coder gateway start` (profile alias), and
|
||||
`hermes -p <profile> gateway start` all spawn a gateway process scoped to a
|
||||
given profile. See
|
||||
[Profiles: Running Gateways](https://hermes-agent.nousresearch.com/docs/user-guide/profiles#running-gateways).
|
||||
On host, lifecycle is managed via per-profile systemd units
|
||||
(`hermes-gateway-<profile>.service`); inside the container, an s6 service at
|
||||
`/run/service/gateway-<name>/` is registered when the profile is created and
|
||||
torn down when it's deleted.
|
||||
|
||||
**Persistence across container restart:** `/run/service/` is tmpfs —
|
||||
service registrations are wiped when the container restarts. Profile
|
||||
directories at `/opt/data/profiles/<name>/` live on the persistent VOLUME,
|
||||
and each one records its gateway's last state in `gateway_state.json`.
|
||||
`/etc/cont-init.d/02-reconcile-profiles` walks the persistent profiles on
|
||||
every container boot, recreates the s6 service slots via
|
||||
`hermes_cli/container_boot.py`, and auto-starts those whose last recorded
|
||||
state was `running`. Profiles whose last state was `stopped`,
|
||||
`startup_failed`, `starting`, or absent get their slot recreated in the
|
||||
`down` state and wait for explicit user action. `docker restart` is therefore
|
||||
invisible to a user with running profile gateways: they come back up;
|
||||
stopped ones stay stopped.
|
||||
|
||||
### s6-overlay constraints
|
||||
|
||||
- **Root/non-root model:** `/init` runs as root to set up the supervision
|
||||
tree, install signal handlers, and run the stage2 hook that does
|
||||
`usermod`/`chown`. Each supervised service drops to UID 10000 via
|
||||
`s6-setuidgid hermes` in its `run` script. The per-service `s6-supervise`
|
||||
monitor stays root so it can signal its child regardless of UID. Net
|
||||
effect: hermes and all its subprocesses run as UID 10000 exactly as
|
||||
before; only the supervision tree itself runs as root.
|
||||
- v3.2.3.0 has limited non-root support for running `/init` itself as
|
||||
non-root — some tools (`fix-attrs`, `logutil-service`) assume root. We
|
||||
don't hit this because `/init` runs as root.
|
||||
- Scandir hard cap: `services_max` default 1000, configurable to 160,000.
|
||||
- `/command/with-contenv` sources `/run/s6/container_environment/*` into
|
||||
service env — convenient for passing `HERMES_HOME` etc.
|
||||
- s6 signal semantics: service crash triggers `s6-supervise` restart after
|
||||
1s; override with a `finish` script.
|
||||
- Zombie reaping: PID 1 (s6-svscan) reaps all zombies non-blockingly on
|
||||
SIGCHLD. Any subagent subprocess spawned by the main hermes process is
|
||||
reaped automatically.
|
||||
|
||||
---
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
### D1. s6-overlay replaces tini entirely
|
||||
|
||||
Container ENTRYPOINT is `/init`, PID 1 is s6-svscan. The main hermes
|
||||
process, the dashboard, and every per-profile gateway run as supervised
|
||||
services. This is a single breaking change to the container contract.
|
||||
|
||||
### D2. Main hermes is an s6 service with container-exit semantics
|
||||
|
||||
The contract "container exits when `hermes` exits" is preserved via a
|
||||
service `finish` script that writes to
|
||||
`/run/s6-linux-init-container-results/exitcode` and calls
|
||||
`/run/s6/basedir/bin/halt`. All five supported invocations work:
|
||||
|
||||
| `docker run <image> …` | Behavior |
|
||||
|---|---|
|
||||
| (no args) | `hermes` with no args, container exits when hermes exits |
|
||||
| `chat -q "..."` | `hermes chat -q "..."`, container exits with hermes exit code |
|
||||
| `sleep infinity` | `sleep infinity` directly (long-lived sandbox mode) |
|
||||
| `bash` | interactive `bash` directly |
|
||||
| `docker run -it … --tui` | interactive Ink TUI with real TTY — see D9 |
|
||||
|
||||
`docker/main-wrapper.sh` detects whether `$1` is an executable on PATH and
|
||||
routes either to "run this as a one-shot main service" or "wrap with
|
||||
hermes".
|
||||
|
||||
### D3. Static services at build time; dynamic (per-profile) services at runtime
|
||||
|
||||
s6 offers two mechanisms:
|
||||
|
||||
- **s6-rc** (declarative, compile-then-swap): used for main hermes and the
|
||||
dashboard — they're known at image build time.
|
||||
- **scandir** (drop a directory + `s6-svscanctl -a`): used for per-profile
|
||||
gateways — profiles are user-created after the image is built.
|
||||
|
||||
Per-profile gateway service dirs live at `/run/service/gateway-<profile>/`
|
||||
(tmpfs, hermes-writable). s6-svscan picks them up on rescan.
|
||||
|
||||
### D4. ServiceManager protocol with two methods for runtime registration
|
||||
|
||||
Host paths (systemd, launchd, Windows Scheduled Tasks) need only
|
||||
install/start/stop/restart of pre-declared services. Inside the container,
|
||||
we additionally need to register services at runtime when a profile is
|
||||
created. The protocol exposes this directly:
|
||||
|
||||
```python
|
||||
class ServiceManager(Protocol):
|
||||
kind: ServiceManagerKind # "systemd" | "launchd" | "windows" | "s6" | "none"
|
||||
|
||||
# Lifecycle of an already-declared service
|
||||
def start(self, name: str) -> None: ...
|
||||
def stop(self, name: str) -> None: ...
|
||||
def restart(self, name: str) -> None: ...
|
||||
def is_running(self, name: str) -> bool: ...
|
||||
|
||||
# Runtime registration (container-only; hosts raise NotImplementedError)
|
||||
def supports_runtime_registration(self) -> bool: ...
|
||||
def register_profile_gateway(
|
||||
self, profile: str, *,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
) -> None: ...
|
||||
def unregister_profile_gateway(self, profile: str) -> None: ...
|
||||
def list_profile_gateways(self) -> list[str]: ...
|
||||
```
|
||||
|
||||
Systemd, launchd, and Windows backends raise `NotImplementedError` on the
|
||||
registration methods. Only the s6 backend implements them. Callers check
|
||||
`supports_runtime_registration()` before calling.
|
||||
|
||||
The scope is intentionally narrow: it's specifically "register/unregister a
|
||||
profile gateway," not a general-purpose process-management API.
|
||||
|
||||
### D5. Per-profile gateway service spec is fixed, not user-provided
|
||||
|
||||
Every profile gateway has the same command shape
|
||||
(`hermes -p <profile> gateway run`, or `hermes gateway run` for the default
|
||||
profile). The s6 backend generates the `run` script from a fixed template
|
||||
given the profile name — no arbitrary command list. This keeps the API
|
||||
surface tight and prevents callers from accidentally registering
|
||||
non-gateway services.
|
||||
|
||||
Port selection is governed by the profile's `config.yaml`
|
||||
(`[gateway] port = …`) — the single source of truth. (The original plan
|
||||
proposed a Python-side SHA-256 port allocator with a 600-port range; it was
|
||||
retired during PR review because it was dead code through the entire stack.)
|
||||
|
||||
### D6. Add detect_service_manager() alongside supports_systemd_services()
|
||||
|
||||
`supports_systemd_services()` stays as-is (host code paths unchanged). A new
|
||||
`detect_service_manager() -> Literal["systemd", "launchd", "windows", "s6", "none"]`
|
||||
composes existing detection functions (`is_macos()`, `is_windows()`,
|
||||
`supports_systemd_services()`, `is_container()` + `_s6_running()`) and adds
|
||||
an s6 branch for container detection. Host call sites continue to use the
|
||||
existing functions; container-only code (the profile hooks) uses the new one.
|
||||
|
||||
`_s6_running()` probes `/proc/1/comm` (world-readable) and
|
||||
`/run/s6/basedir`. The earlier `/proc/1/exe` probe was root-only readable
|
||||
and silently failed for the unprivileged hermes user (UID 10000), making
|
||||
the entire runtime-registration path inert in production — caught in PR
|
||||
review.
|
||||
|
||||
### D7. Wrap existing systemd/launchd/windows functions, don't rewrite them
|
||||
|
||||
`SystemdServiceManager` / `LaunchdServiceManager` / `WindowsServiceManager`
|
||||
are thin adapters over the existing `systemd_*` / `launchd_*` module-level
|
||||
functions in `hermes_cli/gateway.py` and the
|
||||
`gateway_windows.install/uninstall/start/stop/restart/is_installed`
|
||||
functions in `hermes_cli/gateway_windows.py`. We get the abstraction
|
||||
without rewriting ~2,200 LOC of working code.
|
||||
|
||||
### D8. Profile create/delete hooks register/unregister the s6 service
|
||||
|
||||
When `hermes profile create <name>` runs inside the container, the
|
||||
profile-creation code path calls
|
||||
`ServiceManager.register_profile_gateway(<name>)` if
|
||||
`supports_runtime_registration()` is True. When `hermes profile delete
|
||||
<name>` runs, it calls `unregister_profile_gateway(<name>)`. On host, both
|
||||
calls are no-ops (registration not supported; existing systemd unit
|
||||
generation continues to handle install/uninstall).
|
||||
|
||||
Existing per-profile `hermes -p <profile> gateway start/stop/restart` CLI
|
||||
commands continue to work — in the container they dispatch to
|
||||
`ServiceManager.start/stop/restart("gateway-<profile>")`, which translates
|
||||
to `s6-svc -u`/`-d`/`-t` on the service dir.
|
||||
|
||||
`hermes gateway start` (no `-p`) targets a special `gateway-default` slot
|
||||
that's always registered by the cont-init reconciler. Its run script omits
|
||||
the `-p` flag and runs against the root `$HERMES_HOME` profile.
|
||||
|
||||
`--all` lifecycle (`hermes gateway stop --all`, `... restart --all`)
|
||||
iterates `mgr.list_profile_gateways()` through s6 so s6's `want up`/`want
|
||||
down` flips correctly. Without this, `--all` fell through to `pkill`
|
||||
followed by s6-supervise auto-restart — net effect: kick instead of stop.
|
||||
|
||||
### D9. Interactive TUI bypasses s6 service-mode and runs as CMD for TTY passthrough
|
||||
|
||||
`docker run -it --rm <image> --tui` needs a real TTY connected to container
|
||||
stdin/stdout for Ink raw-mode keyboard input, cursor control, and SIGWINCH.
|
||||
Running the TUI as a normal s6 service fails because s6-supervise
|
||||
disconnects service stdio from the container TTY (documented:
|
||||
[s6-overlay#230](https://github.com/just-containers/s6-overlay/issues/230)).
|
||||
|
||||
**The pattern:** s6-overlay's `/init` execs a CMD as the container's "main
|
||||
program" after the supervision tree is up. The CMD inherits
|
||||
stdin/stdout/stderr from `/init` — which in `-it` mode is the container
|
||||
TTY. The stage2 hook detects the TUI case and short-circuits the
|
||||
main-hermes service so the hermes CMD becomes that main program.
|
||||
|
||||
```sh
|
||||
# In docker/stage2-hook.sh
|
||||
_is_tui_invocation() {
|
||||
for arg in "$@"; do
|
||||
case "$arg" in --tui|-T) return 0 ;; esac
|
||||
done
|
||||
case "${HERMES_TUI:-}" in 1|true|TRUE|yes) return 0 ;; esac
|
||||
if [ -t 0 ] && [ $# -eq 0 ]; then return 0; fi
|
||||
return 1
|
||||
}
|
||||
```
|
||||
|
||||
And in `docker/s6-rc.d/main-hermes/run`:
|
||||
|
||||
```sh
|
||||
if [ -f /var/run/s6/container_environment/HERMES_TUI_MODE ]; then
|
||||
exec sleep infinity # s6-overlay will exec CMD as the TTY-connected main
|
||||
fi
|
||||
exec s6-setuidgid hermes hermes ${HERMES_ARGS:-}
|
||||
```
|
||||
|
||||
In TUI mode main hermes is effectively unsupervised (same as the pre-s6
|
||||
behavior with tini — acceptable because the user is interactively
|
||||
present). Dashboard and profile gateways still get full s6 supervision via
|
||||
their separate services.
|
||||
|
||||
The integration test `test_tty_passthrough_to_container` uses `tput cols`
|
||||
and `COLUMNS=123` as the probe.
|
||||
|
||||
---
|
||||
|
||||
## Risk Register
|
||||
|
||||
| Risk | Likelihood | Impact | Mitigation |
|
||||
|---|---|---|---|
|
||||
| Phase 2 breaks a downstream user's Dockerfile that `FROM`s ours | Medium | Medium | Release notes call out ENTRYPOINT change; the test harness (`tests/docker/`) gives high confidence in behavior parity |
|
||||
| TUI TTY passthrough fails on some Docker versions | Low | High | Harness includes `test_tty_passthrough_to_container` as a hard gate; fallback plan = s6-fdholder ([s6-overlay#230](https://github.com/just-containers/s6-overlay/issues/230) Solution 2) |
|
||||
| s6-overlay non-root quirks (logutil-service, fix-attrs) bite us | Low | Low | Supervisor runs as root, services drop — sidesteps these issues |
|
||||
| Podman rootless UID mapping confuses s6 | Medium | Low | Documented as supported, fix reactively; a Podman + Docker environment is stood up for validation |
|
||||
| Test harness is flaky (docker daemon issues, timing) | Medium | Low | Generous timeouts; skip when docker unavailable; polling helpers replace fixed sleeps in `test_container_restart.py` |
|
||||
| Profile gateway crash loop masks a real config error | Low | Medium | s6 `finish` script `max_restarts` cap (planned follow-up); operators see crash-looping logs in `$HERMES_HOME/logs/gateways/<profile>/` |
|
||||
| Dockerfile+entrypoint drift from linter (hadolint/shellcheck) reveals latent bugs | Low | Low | CI lint jobs catch them; fix or document ignore with rationale |
|
||||
| Stale `gateway.pid` from a dead container collides with an unrelated live PID in the restarted container | Low | Medium | Cont-init reconciliation removes `gateway.pid` and `processes.json` from every profile dir on boot, before any new gateway starts |
|
||||
| `docker restart` silently loses per-profile gateway registrations (tmpfs scandir wiped) | High (without mitigation) | High | Cont-init reconciliation re-registers from persistent `$HERMES_HOME/profiles/` and auto-starts those last seen `running`; outcome recorded to `$HERMES_HOME/logs/container-boot.log` (size-bounded, rotates to `.1` at 256 KiB) |
|
||||
| A `running` gateway that's actually broken auto-restarts into a crash loop after every container restart | Low | Medium | s6 `finish` script `max_restarts` cap (planned); follow-up: `hermes doctor` alerts when N consecutive container restarts ended in `startup_failed` |
|
||||
| `_s6_running()` detection works as root but silently fails for unprivileged hermes user, making runtime-registration path inert | High (without mitigation) | High | **Caught in PR review.** Detection now probes `/proc/1/comm` (world-readable) + `/run/s6/basedir`. Docker integration tests refactored to `docker exec -u hermes` so the realistic runtime user is exercised |
|
||||
| `s6-svscanctl` from hermes hits EACCES on the root-owned control FIFO | Medium | Medium | `02-reconcile-profiles` chowns `/run/service/.s6-svscan/{control,lock}` to hermes after stage1 creates them |
|
||||
| Per-service `supervise/control` FIFO is root-owned by s6-supervise, blocking `s6-svc` from hermes | Known | Medium | Surfaced cleanly as `S6CommandError` (with rc + stderr) instead of raw `CalledProcessError`. Permission fix tracked as a follow-up (small SUID helper, polling chown loop in cont-init.d, or replace `s6-svc` with `down`-marker manipulation) |
|
||||
|
||||
---
|
||||
|
||||
## Decision Log
|
||||
|
||||
| # | Question | Decision |
|
||||
|---|---|---|
|
||||
| OQ1 | Gate Phase 2 behind env var? | Ship directly (Hermes is pre-1.0; users can pin the previous image) |
|
||||
| OQ2 | s6 root model | Root `/init`, drop per-service via `s6-setuidgid hermes` |
|
||||
| OQ3 | Dashboard opt-in mechanism | Always declared as an s6 service; `03-dashboard-toggle` cont-init script writes a `down` marker when `HERMES_DASHBOARD` is unset so `s6-svstat` reports the slot's real state |
|
||||
| OQ4 | Podman rootless | Supported, fix reactively |
|
||||
| OQ5 | Service naming | `gateway-<profile>` (matches pre-existing `hermes-gateway-<profile>.service` systemd convention) |
|
||||
| OQ6 | — (retired; no subagent gateways in scope) | — |
|
||||
| OQ7 | Resource limits per profile gateway | Defer (no per-cgroup limits; rely on the container's overall limit) |
|
||||
| OQ8 | Log persistence | `$HERMES_HOME/logs/gateways/<profile>/`. The log path is sourced from runtime `$HERMES_HOME` via `with-contenv`, NOT Python-substituted at registration time |
|
||||
| OQ9 | TUI passthrough | Trust the documented [s6-overlay#230](https://github.com/just-containers/s6-overlay/issues/230) Solution 1; harness includes a TTY passthrough hard-gate test |
|
||||
|
||||
**Post-merge additions from PR #30136 review:**
|
||||
|
||||
- **Multi-arch tarballs:** `TARGETARCH` mapped to `x86_64` / `aarch64`;
|
||||
per-arch tarball fetched via `curl` because `ADD` doesn't honor BuildKit
|
||||
args.
|
||||
- **SHA256 verification:** all three tarballs (noarch, symlinks, per-arch)
|
||||
pinned via build ARGs and verified with `sha256sum -c` against a single
|
||||
checksum file (avoids hadolint DL4006 piped-shell warning).
|
||||
- **`gateway-default` slot:** always registered by the reconciler so
|
||||
`hermes gateway start` (no `-p`) has somewhere to land.
|
||||
- **Friendly lifecycle errors:** `GatewayNotRegisteredError` and
|
||||
`S6CommandError` translate `CalledProcessError` into actionable CLI
|
||||
messages.
|
||||
- **Atomic publication in the reconciler:** mirrors
|
||||
`register_profile_gateway`'s tmp+rename pattern.
|
||||
- **`container-boot.log` rotation:** 256 KiB soft cap, rotated to `.1`.
|
||||
- **`port` parameter retired:** allocator + kwarg were dead code through
|
||||
the entire stack; `config.yaml` is the single source of truth.
|
||||
|
||||
---
|
||||
|
||||
## Verification Checklist
|
||||
|
||||
- [x] Test harness (`tests/docker/`) passes against the s6 image
|
||||
- [x] hadolint + shellcheck run green in CI
|
||||
- [x] `docker run -it --rm hermes-agent --tui` starts the Ink TUI with
|
||||
working keyboard input, cursor control, and resize (SIGWINCH)
|
||||
- [x] Dashboard crashes are recovered by s6 within ~2s
|
||||
- [x] `hermes profile create test` inside a container creates
|
||||
`/run/service/gateway-test/`
|
||||
- [x] `hermes -p test gateway start` inside a container dispatches through s6
|
||||
- [x] `hermes -p test gateway stop` inside a container cleanly stops via s6
|
||||
- [x] `hermes profile delete test` inside a container removes
|
||||
`/run/service/gateway-test/`
|
||||
- [x] Profile gateway logs persist at
|
||||
`$HERMES_HOME/logs/gateways/test/current`
|
||||
- [x] `hermes status` inside the container shows `Manager: s6`
|
||||
- [x] `hermes gateway start` (no `-p`) inside a container targets
|
||||
`gateway-default` and runs against the root profile
|
||||
- [x] `hermes gateway stop --all` / `... restart --all` iterate every
|
||||
profile gateway under s6 instead of pkill-then-supervise-restart
|
||||
- [x] `docker restart` survives per-profile gateway registrations via the
|
||||
cont-init reconciler; running gateways come back up, stopped ones
|
||||
stay down
|
||||
- [x] Multi-arch image builds for both `linux/amd64` and `linux/arm64`
|
||||
- [x] s6-overlay tarballs are SHA256-verified at build time
|
||||
- [x] No systemd/launchd host-side functions were modified (only wrapped)
|
||||
- [x] `hermes gateway install/start/stop` on Linux host and macOS host
|
||||
behave identically to pre-change
|
||||
+90
-101
@@ -424,9 +424,7 @@ _PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] =
|
||||
Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
|
||||
Platform.API_SERVER: lambda cfg: True,
|
||||
Platform.WEBHOOK: lambda cfg: True,
|
||||
Platform.MSGRAPH_WEBHOOK: lambda cfg: bool(
|
||||
str(cfg.extra.get("client_state") or "").strip()
|
||||
),
|
||||
Platform.MSGRAPH_WEBHOOK: lambda cfg: True,
|
||||
Platform.FEISHU: lambda cfg: bool(cfg.extra.get("app_id")),
|
||||
Platform.WECOM: lambda cfg: bool(cfg.extra.get("bot_id")),
|
||||
Platform.WECOM_CALLBACK: lambda cfg: bool(
|
||||
@@ -928,6 +926,73 @@ def load_gateway_config() -> GatewayConfig:
|
||||
ac = ",".join(str(v) for v in ac)
|
||||
os.environ["SLACK_ALLOWED_CHANNELS"] = str(ac)
|
||||
|
||||
# Discord settings → env vars (env vars take precedence)
|
||||
discord_cfg = yaml_cfg.get("discord", {})
|
||||
if isinstance(discord_cfg, dict):
|
||||
if "require_mention" in discord_cfg and not os.getenv("DISCORD_REQUIRE_MENTION"):
|
||||
os.environ["DISCORD_REQUIRE_MENTION"] = str(discord_cfg["require_mention"]).lower()
|
||||
if "thread_require_mention" in discord_cfg and not os.getenv("DISCORD_THREAD_REQUIRE_MENTION"):
|
||||
os.environ["DISCORD_THREAD_REQUIRE_MENTION"] = str(discord_cfg["thread_require_mention"]).lower()
|
||||
frc = discord_cfg.get("free_response_channels")
|
||||
if frc is not None and not os.getenv("DISCORD_FREE_RESPONSE_CHANNELS"):
|
||||
if isinstance(frc, list):
|
||||
frc = ",".join(str(v) for v in frc)
|
||||
os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
|
||||
if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
|
||||
os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
|
||||
if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
|
||||
os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()
|
||||
# ignored_channels: channels where bot never responds (even when mentioned)
|
||||
ic = discord_cfg.get("ignored_channels")
|
||||
if ic is not None and not os.getenv("DISCORD_IGNORED_CHANNELS"):
|
||||
if isinstance(ic, list):
|
||||
ic = ",".join(str(v) for v in ic)
|
||||
os.environ["DISCORD_IGNORED_CHANNELS"] = str(ic)
|
||||
# allowed_channels: if set, bot ONLY responds in these channels (whitelist)
|
||||
ac = discord_cfg.get("allowed_channels")
|
||||
if ac is not None and not os.getenv("DISCORD_ALLOWED_CHANNELS"):
|
||||
if isinstance(ac, list):
|
||||
ac = ",".join(str(v) for v in ac)
|
||||
os.environ["DISCORD_ALLOWED_CHANNELS"] = str(ac)
|
||||
# no_thread_channels: channels where bot responds directly without creating thread
|
||||
ntc = discord_cfg.get("no_thread_channels")
|
||||
if ntc is not None and not os.getenv("DISCORD_NO_THREAD_CHANNELS"):
|
||||
if isinstance(ntc, list):
|
||||
ntc = ",".join(str(v) for v in ntc)
|
||||
os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
|
||||
# history_backfill: recover missed channel messages for shared sessions
|
||||
# when require_mention is active. Fetches messages between bot turns
|
||||
# and prepends them to the user message for context.
|
||||
if "history_backfill" in discord_cfg and not os.getenv("DISCORD_HISTORY_BACKFILL"):
|
||||
os.environ["DISCORD_HISTORY_BACKFILL"] = str(discord_cfg["history_backfill"]).lower()
|
||||
hbl = discord_cfg.get("history_backfill_limit")
|
||||
if hbl is not None and not os.getenv("DISCORD_HISTORY_BACKFILL_LIMIT"):
|
||||
os.environ["DISCORD_HISTORY_BACKFILL_LIMIT"] = str(hbl)
|
||||
# allow_mentions: granular control over what the bot can ping.
|
||||
# Safe defaults (no @everyone/roles) are applied in the adapter;
|
||||
# these YAML keys only override when set and let users opt back
|
||||
# into unsafe modes (e.g. roles=true) if they actually want it.
|
||||
allow_mentions_cfg = discord_cfg.get("allow_mentions")
|
||||
if isinstance(allow_mentions_cfg, dict):
|
||||
for yaml_key, env_key in (
|
||||
("everyone", "DISCORD_ALLOW_MENTION_EVERYONE"),
|
||||
("roles", "DISCORD_ALLOW_MENTION_ROLES"),
|
||||
("users", "DISCORD_ALLOW_MENTION_USERS"),
|
||||
("replied_user", "DISCORD_ALLOW_MENTION_REPLIED_USER"),
|
||||
):
|
||||
if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
|
||||
os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()
|
||||
# reply_to_mode: top-level preferred, falls back to extra.reply_to_mode
|
||||
# YAML 1.1 parses bare 'off' as boolean False — coerce to string "off".
|
||||
_discord_extra = discord_cfg.get("extra") if isinstance(discord_cfg.get("extra"), dict) else {}
|
||||
_discord_rtm = (
|
||||
discord_cfg["reply_to_mode"] if "reply_to_mode" in discord_cfg
|
||||
else _discord_extra.get("reply_to_mode")
|
||||
)
|
||||
if _discord_rtm is not None and not os.getenv("DISCORD_REPLY_TO_MODE"):
|
||||
_rtm_str = "off" if _discord_rtm is False else str(_discord_rtm).lower()
|
||||
os.environ["DISCORD_REPLY_TO_MODE"] = _rtm_str
|
||||
|
||||
# Bridge top-level require_mention to Telegram when the telegram: section
|
||||
# does not already provide one. Users often write "require_mention: true"
|
||||
# at the top level alongside group_sessions_per_user, expecting it to work
|
||||
@@ -1813,17 +1878,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
# need to seed ``PlatformConfig.extra`` from env vars (e.g. Google Chat's
|
||||
# project_id / subscription_name) can supply ``env_enablement_fn`` on
|
||||
# their PlatformEntry — called here BEFORE adapter construction.
|
||||
#
|
||||
# Enablement gate (#31116): when a plugin registers ``is_connected``
|
||||
# (the "has the user actually configured credentials for this?" check),
|
||||
# we MUST consult it before flipping ``enabled = True``. Otherwise
|
||||
# ``check_fn`` alone — which for adapter plugins typically just
|
||||
# verifies the SDK is importable / lazy-installs it — silently enables
|
||||
# platforms the user never opted into, and the gateway then tries to
|
||||
# connect to Discord / Teams / Google Chat with no token and emits
|
||||
# noisy retry-forever errors. ``_platform_status`` was already fixed
|
||||
# for the same bug class in commit 7849a3d73; this is the runtime
|
||||
# counterpart.
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
discover_plugins() # idempotent
|
||||
@@ -1836,99 +1890,34 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
logger.debug("check_fn for %s raised: %s", entry.name, e)
|
||||
continue
|
||||
platform = Platform(entry.name)
|
||||
existing_cfg = config.platforms.get(platform)
|
||||
# Seed candidate extras from ``env_enablement_fn`` so plugins
|
||||
# whose ``is_connected`` reads ``config.extra`` (e.g. Google
|
||||
# Chat's ``_is_connected`` checks ``config.extra["project_id"]``)
|
||||
# see the same state they will after enablement. Without this,
|
||||
# Google-Chat-on-env-vars-only setups silently fail the gate
|
||||
# below even though the user is configured. Plugins whose
|
||||
# ``is_connected`` reads env vars directly (Discord, IRC,
|
||||
# Teams, LINE, ntfy, Simplex) are unaffected; this only
|
||||
# restores Google Chat.
|
||||
seed_for_probe = None
|
||||
if platform not in config.platforms:
|
||||
config.platforms[platform] = PlatformConfig()
|
||||
config.platforms[platform].enabled = True
|
||||
# Seed extras from env if the plugin opted in.
|
||||
if entry.env_enablement_fn is not None:
|
||||
try:
|
||||
seed_for_probe = entry.env_enablement_fn()
|
||||
seed = entry.env_enablement_fn()
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"env_enablement_fn for %s raised: %s", entry.name, e
|
||||
)
|
||||
seed_for_probe = None
|
||||
|
||||
# Only consult is_connected for platforms that are NOT already
|
||||
# explicitly configured in YAML / env (existing_cfg with
|
||||
# enabled=True means the user wrote it themselves or another
|
||||
# env-var bridge enabled it — keep that decision).
|
||||
if existing_cfg is None or not existing_cfg.enabled:
|
||||
if entry.is_connected is not None:
|
||||
try:
|
||||
# Probe with ``enabled=True`` since we're asking
|
||||
# "would this plugin BE configured if we enabled
|
||||
# it?" not "is it currently enabled?". Google
|
||||
# Chat's ``_is_connected`` short-circuits on
|
||||
# ``config.enabled`` being False, which on the
|
||||
# default ``PlatformConfig()`` would fail the
|
||||
# gate even with proper env vars set.
|
||||
if existing_cfg is not None:
|
||||
probe_cfg = existing_cfg
|
||||
if not probe_cfg.enabled:
|
||||
probe_cfg = PlatformConfig(
|
||||
enabled=True,
|
||||
extra=dict(probe_cfg.extra or {}),
|
||||
)
|
||||
else:
|
||||
probe_cfg = PlatformConfig(enabled=True)
|
||||
if isinstance(seed_for_probe, dict) and seed_for_probe:
|
||||
# Don't mutate ``existing_cfg``; the probe gets
|
||||
# a transient view with env-seeded extras layered
|
||||
# on top of whatever's already there.
|
||||
probe_extra = dict(getattr(probe_cfg, "extra", {}) or {})
|
||||
for k, v in seed_for_probe.items():
|
||||
if k == "home_channel":
|
||||
continue
|
||||
probe_extra.setdefault(k, v)
|
||||
probe_cfg = PlatformConfig(
|
||||
enabled=True,
|
||||
extra=probe_extra,
|
||||
)
|
||||
configured = bool(entry.is_connected(probe_cfg))
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"is_connected for %s raised: %s — skipping enablement",
|
||||
entry.name, exc,
|
||||
seed = None
|
||||
if isinstance(seed, dict) and seed:
|
||||
# Extract the home_channel dict (if provided) so we wire it
|
||||
# up as a proper HomeChannel dataclass. Everything else is
|
||||
# merged into ``extra``.
|
||||
home = seed.pop("home_channel", None)
|
||||
config.platforms[platform].extra.update(seed)
|
||||
if isinstance(home, dict) and home.get("chat_id"):
|
||||
config.platforms[platform].home_channel = HomeChannel(
|
||||
platform=platform,
|
||||
chat_id=str(home["chat_id"]),
|
||||
name=str(home.get("name") or "Home"),
|
||||
thread_id=(
|
||||
str(home["thread_id"])
|
||||
if home.get("thread_id")
|
||||
else None
|
||||
),
|
||||
)
|
||||
configured = False
|
||||
if not configured:
|
||||
logger.debug(
|
||||
"Plugin platform '%s' available but not configured "
|
||||
"(is_connected returned False) — skipping enable",
|
||||
entry.name,
|
||||
)
|
||||
continue
|
||||
if platform not in config.platforms:
|
||||
config.platforms[platform] = PlatformConfig()
|
||||
config.platforms[platform].enabled = True
|
||||
# Commit env-seeded extras onto the now-enabled platform.
|
||||
# We've already called ``env_enablement_fn`` above (for the
|
||||
# probe); reuse that result instead of calling it twice.
|
||||
if isinstance(seed_for_probe, dict) and seed_for_probe:
|
||||
seed = dict(seed_for_probe)
|
||||
# Extract the home_channel dict (if provided) so we wire it
|
||||
# up as a proper HomeChannel dataclass. Everything else is
|
||||
# merged into ``extra``.
|
||||
home = seed.pop("home_channel", None)
|
||||
config.platforms[platform].extra.update(seed)
|
||||
if isinstance(home, dict) and home.get("chat_id"):
|
||||
config.platforms[platform].home_channel = HomeChannel(
|
||||
platform=platform,
|
||||
chat_id=str(home["chat_id"]),
|
||||
name=str(home.get("name") or "Home"),
|
||||
thread_id=(
|
||||
str(home["thread_id"])
|
||||
if home.get("thread_id")
|
||||
else None
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Plugin platform enable pass failed: %s", e)
|
||||
|
||||
+39
-168
@@ -18,7 +18,6 @@ Security features (based on OWASP + NIST SP 800-63-4 guidance):
|
||||
Storage: ~/.hermes/pairing/
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
@@ -28,10 +27,6 @@ import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from gateway.whatsapp_identity import (
|
||||
expand_whatsapp_aliases,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
from hermes_constants import get_hermes_dir
|
||||
from utils import atomic_replace
|
||||
|
||||
@@ -114,40 +109,12 @@ class PairingStore:
|
||||
def _save_json(self, path: Path, data: dict) -> None:
|
||||
_secure_write(path, json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
def _normalize_user_id(self, platform: str, user_id: str) -> str:
|
||||
"""Normalize platform-specific user IDs before persisting them."""
|
||||
raw_user_id = str(user_id or "").strip()
|
||||
if platform == "whatsapp":
|
||||
return normalize_whatsapp_identifier(raw_user_id) or raw_user_id
|
||||
return raw_user_id
|
||||
|
||||
def _user_id_aliases(self, platform: str, user_id: str) -> set[str]:
|
||||
"""Return all known equivalent user IDs for auth/rate-limit checks."""
|
||||
raw_user_id = str(user_id or "").strip()
|
||||
if not raw_user_id:
|
||||
return set()
|
||||
|
||||
aliases = {raw_user_id, self._normalize_user_id(platform, raw_user_id)}
|
||||
if platform == "whatsapp":
|
||||
aliases.update(expand_whatsapp_aliases(raw_user_id))
|
||||
aliases.discard("")
|
||||
return aliases
|
||||
|
||||
def _user_ids_match(self, platform: str, left: str, right: str) -> bool:
|
||||
"""Return True when two user IDs represent the same principal."""
|
||||
left_aliases = self._user_id_aliases(platform, left)
|
||||
right_aliases = self._user_id_aliases(platform, right)
|
||||
return bool(left_aliases and right_aliases and (left_aliases & right_aliases))
|
||||
|
||||
# ----- Approved users -----
|
||||
|
||||
def is_approved(self, platform: str, user_id: str) -> bool:
|
||||
"""Check if a user is approved (paired) on a platform."""
|
||||
approved = self._load_json(self._approved_path(platform))
|
||||
for approved_user_id in approved:
|
||||
if self._user_ids_match(platform, approved_user_id, user_id):
|
||||
return True
|
||||
return False
|
||||
return user_id in approved
|
||||
|
||||
def list_approved(self, platform: str = None) -> list:
|
||||
"""List approved users, optionally filtered by platform."""
|
||||
@@ -162,16 +129,7 @@ class PairingStore:
|
||||
def _approve_user(self, platform: str, user_id: str, user_name: str = "") -> None:
|
||||
"""Add a user to the approved list. Must be called under self._lock."""
|
||||
approved = self._load_json(self._approved_path(platform))
|
||||
normalized_user_id = self._normalize_user_id(platform, user_id)
|
||||
duplicate_ids = [
|
||||
approved_user_id
|
||||
for approved_user_id in approved
|
||||
if self._user_ids_match(platform, approved_user_id, normalized_user_id)
|
||||
]
|
||||
for approved_user_id in duplicate_ids:
|
||||
del approved[approved_user_id]
|
||||
|
||||
approved[normalized_user_id] = {
|
||||
approved[user_id] = {
|
||||
"user_name": user_name,
|
||||
"approved_at": time.time(),
|
||||
}
|
||||
@@ -182,25 +140,14 @@ class PairingStore:
|
||||
path = self._approved_path(platform)
|
||||
with self._lock:
|
||||
approved = self._load_json(path)
|
||||
matching_ids = [
|
||||
approved_user_id
|
||||
for approved_user_id in approved
|
||||
if self._user_ids_match(platform, approved_user_id, user_id)
|
||||
]
|
||||
if matching_ids:
|
||||
for approved_user_id in matching_ids:
|
||||
del approved[approved_user_id]
|
||||
if user_id in approved:
|
||||
del approved[user_id]
|
||||
self._save_json(path, approved)
|
||||
return True
|
||||
return False
|
||||
|
||||
# ----- Pending codes -----
|
||||
|
||||
@staticmethod
|
||||
def _hash_code(code: str, salt: bytes) -> str:
|
||||
"""Hash a pairing code with the given salt using SHA-256."""
|
||||
return hashlib.sha256(salt + code.encode("utf-8")).hexdigest()
|
||||
|
||||
def generate_code(
|
||||
self, platform: str, user_id: str, user_name: str = ""
|
||||
) -> Optional[str]:
|
||||
@@ -211,13 +158,9 @@ class PairingStore:
|
||||
- User is rate-limited (too recent request)
|
||||
- Max pending codes reached for this platform
|
||||
- User/platform is in lockout due to failed attempts
|
||||
|
||||
The code is NOT stored in plaintext. Only a salted SHA-256 hash is
|
||||
persisted so that reading the pending file does not reveal codes.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
normalized_user_id = self._normalize_user_id(platform, user_id)
|
||||
|
||||
# Check lockout
|
||||
if self._is_locked_out(platform):
|
||||
@@ -235,18 +178,9 @@ class PairingStore:
|
||||
# Generate cryptographically random code
|
||||
code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
|
||||
|
||||
# Hash the code with a random salt before storing
|
||||
salt = os.urandom(16)
|
||||
code_hash = self._hash_code(code, salt)
|
||||
|
||||
# Use a unique entry id as the key (not the code itself)
|
||||
entry_id = secrets.token_hex(8)
|
||||
|
||||
# Store pending request with hashed code
|
||||
pending[entry_id] = {
|
||||
"hash": code_hash,
|
||||
"salt": salt.hex(),
|
||||
"user_id": normalized_user_id,
|
||||
# Store pending request
|
||||
pending[code] = {
|
||||
"user_id": user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": time.time(),
|
||||
}
|
||||
@@ -261,16 +195,10 @@ class PairingStore:
|
||||
"""
|
||||
Approve a pairing code. Adds the user to the approved list.
|
||||
|
||||
Returns ``{user_id, user_name}`` on success, ``None`` if the code is
|
||||
Returns {user_id, user_name} on success, None if code is
|
||||
invalid/expired OR the platform is currently locked out after
|
||||
``MAX_FAILED_ATTEMPTS`` failed approvals (#10195). Callers can
|
||||
disambiguate with ``_is_locked_out(platform)``.
|
||||
|
||||
Verification: the user-provided code is hashed with each stored
|
||||
entry's salt and compared to the stored hash using constant-time
|
||||
comparison. Pre-hash entries (legacy plaintext-key format from
|
||||
pre-upgrade pending.json files) are silently ignored — they get
|
||||
pruned at TTL by ``_cleanup_expired``.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
@@ -285,77 +213,37 @@ class PairingStore:
|
||||
return None
|
||||
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
|
||||
# Find the entry whose hash matches the provided code.
|
||||
# Tolerate legacy plaintext-key entries (no salt/hash) and
|
||||
# malformed entries — skip them rather than KeyError, so an
|
||||
# in-place upgrade across an existing pending.json doesn't
|
||||
# crash on the first approve call. Legacy entries get pruned
|
||||
# at their TTL by _cleanup_expired.
|
||||
matched_key = None
|
||||
matched_entry = None
|
||||
for entry_id, entry in pending.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if "salt" not in entry or "hash" not in entry:
|
||||
continue
|
||||
try:
|
||||
salt = bytes.fromhex(entry["salt"])
|
||||
except ValueError:
|
||||
continue
|
||||
candidate_hash = self._hash_code(code, salt)
|
||||
if secrets.compare_digest(candidate_hash, entry["hash"]):
|
||||
matched_key = entry_id
|
||||
matched_entry = entry
|
||||
break
|
||||
|
||||
if matched_key is None:
|
||||
if code not in pending:
|
||||
self._record_failed_attempt(platform)
|
||||
return None
|
||||
|
||||
del pending[matched_key]
|
||||
entry = pending.pop(code)
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Add to approved list
|
||||
self._approve_user(platform, matched_entry["user_id"],
|
||||
matched_entry.get("user_name", ""))
|
||||
self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
|
||||
|
||||
return {
|
||||
"user_id": matched_entry["user_id"],
|
||||
"user_name": matched_entry.get("user_name", ""),
|
||||
"user_id": entry["user_id"],
|
||||
"user_name": entry.get("user_name", ""),
|
||||
}
|
||||
|
||||
def list_pending(self, platform: str = None) -> list:
|
||||
"""List pending pairing requests, optionally filtered by platform.
|
||||
|
||||
Codes are stored hashed — the ``code`` field is replaced with the
|
||||
first 8 hex characters of the hash so admins can distinguish entries
|
||||
without revealing the original code. Legacy plaintext-key entries
|
||||
(pre-hash format) are shown with a "legacy" placeholder so admins
|
||||
can see them age out without crashing on a missing ``hash`` field.
|
||||
"""
|
||||
"""List pending pairing requests, optionally filtered by platform."""
|
||||
results = []
|
||||
with self._lock:
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
self._cleanup_expired(p)
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
for entry_id, info in pending.items():
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
created_at = info.get("created_at")
|
||||
if not isinstance(created_at, (int, float)):
|
||||
continue
|
||||
age_min = int((time.time() - created_at) / 60)
|
||||
hash_val = info.get("hash")
|
||||
code_display = hash_val[:8] if isinstance(hash_val, str) else "legacy"
|
||||
results.append({
|
||||
"platform": p,
|
||||
"code": code_display,
|
||||
"user_id": info.get("user_id", ""),
|
||||
"user_name": info.get("user_name", ""),
|
||||
"age_minutes": age_min,
|
||||
})
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
self._cleanup_expired(p)
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
for code, info in pending.items():
|
||||
age_min = int((time.time() - info["created_at"]) / 60)
|
||||
results.append({
|
||||
"platform": p,
|
||||
"code": code,
|
||||
"user_id": info["user_id"],
|
||||
"user_name": info.get("user_name", ""),
|
||||
"age_minutes": age_min,
|
||||
})
|
||||
return results
|
||||
|
||||
def clear_pending(self, platform: str = None) -> int:
|
||||
@@ -374,20 +262,15 @@ class PairingStore:
|
||||
def _is_rate_limited(self, platform: str, user_id: str) -> bool:
|
||||
"""Check if a user has requested a code too recently."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
for alias in self._user_id_aliases(platform, user_id):
|
||||
key = f"{platform}:{alias}"
|
||||
last_request = limits.get(key, 0)
|
||||
if (time.time() - last_request) < RATE_LIMIT_SECONDS:
|
||||
return True
|
||||
return False
|
||||
key = f"{platform}:{user_id}"
|
||||
last_request = limits.get(key, 0)
|
||||
return (time.time() - last_request) < RATE_LIMIT_SECONDS
|
||||
|
||||
def _record_rate_limit(self, platform: str, user_id: str) -> None:
|
||||
"""Record the time of a pairing request for rate limiting."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
now = time.time()
|
||||
for alias in self._user_id_aliases(platform, user_id):
|
||||
key = f"{platform}:{alias}"
|
||||
limits[key] = now
|
||||
key = f"{platform}:{user_id}"
|
||||
limits[key] = time.time()
|
||||
self._save_json(self._rate_limit_path(), limits)
|
||||
|
||||
def _is_locked_out(self, platform: str) -> bool:
|
||||
@@ -414,29 +297,17 @@ class PairingStore:
|
||||
# ----- Cleanup -----
|
||||
|
||||
def _cleanup_expired(self, platform: str) -> None:
|
||||
"""Remove expired pending codes.
|
||||
|
||||
Tolerant of malformed / legacy entries — anything without a numeric
|
||||
``created_at`` is treated as expired (it's effectively unusable
|
||||
with the new hash-keyed schema anyway).
|
||||
"""
|
||||
"""Remove expired pending codes."""
|
||||
path = self._pending_path(platform)
|
||||
pending = self._load_json(path)
|
||||
now = time.time()
|
||||
expired = []
|
||||
for entry_id, info in pending.items():
|
||||
if not isinstance(info, dict):
|
||||
expired.append(entry_id)
|
||||
continue
|
||||
created_at = info.get("created_at")
|
||||
if not isinstance(created_at, (int, float)):
|
||||
expired.append(entry_id)
|
||||
continue
|
||||
if (now - created_at) > CODE_TTL_SECONDS:
|
||||
expired.append(entry_id)
|
||||
expired = [
|
||||
code for code, info in pending.items()
|
||||
if (now - info["created_at"]) > CODE_TTL_SECONDS
|
||||
]
|
||||
if expired:
|
||||
for entry_id in expired:
|
||||
del pending[entry_id]
|
||||
for code in expired:
|
||||
del pending[code]
|
||||
self._save_json(path, pending)
|
||||
|
||||
def _all_platforms(self, suffix: str) -> list:
|
||||
|
||||
@@ -35,7 +35,6 @@ import re
|
||||
import sqlite3
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
@@ -338,12 +337,10 @@ class ResponseStore:
|
||||
db_path = str(get_hermes_home() / "response_store.db")
|
||||
except Exception:
|
||||
db_path = ":memory:"
|
||||
self._db_path: Optional[str] = db_path if db_path != ":memory:" else None
|
||||
try:
|
||||
self._conn = sqlite3.connect(db_path, check_same_thread=False)
|
||||
except Exception:
|
||||
self._conn = sqlite3.connect(":memory:", check_same_thread=False)
|
||||
self._db_path = None
|
||||
# Use shared WAL-fallback helper so response_store.db degrades
|
||||
# gracefully on NFS/SMB/FUSE-mounted HERMES_HOME (same filesystem
|
||||
# issue addressed for state.db/kanban.db — see
|
||||
@@ -364,31 +361,6 @@ class ResponseStore:
|
||||
)"""
|
||||
)
|
||||
self._conn.commit()
|
||||
# response_store.db contains conversation history (tool payloads,
|
||||
# prompts, results). Tighten to owner-only after creation so other
|
||||
# local users on a shared box can't read it. Run once at __init__
|
||||
# rather than after every commit — chmod-on-every-write is wasted
|
||||
# syscalls on a hot path.
|
||||
self._tighten_file_permissions()
|
||||
|
||||
def _tighten_file_permissions(self) -> None:
|
||||
"""Force owner-only permissions on the DB and SQLite sidecars."""
|
||||
if not self._db_path:
|
||||
return
|
||||
for candidate in (
|
||||
Path(self._db_path),
|
||||
Path(f"{self._db_path}-wal"),
|
||||
Path(f"{self._db_path}-shm"),
|
||||
):
|
||||
try:
|
||||
if candidate.exists():
|
||||
candidate.chmod(0o600)
|
||||
except OSError:
|
||||
logger.debug(
|
||||
"Failed to restrict response store permissions for %s",
|
||||
candidate,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def get(self, response_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Retrieve a stored response by ID (updates access time for LRU)."""
|
||||
|
||||
+26
-340
@@ -15,7 +15,6 @@ import re
|
||||
import socket as _socket
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from urllib.parse import urlsplit
|
||||
@@ -41,16 +40,6 @@ def _platform_name(platform) -> str:
|
||||
return str(value or "").lower()
|
||||
|
||||
|
||||
def _float_env(name: str, default: float) -> float:
|
||||
raw = os.environ.get(name, "").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _thread_metadata_for_source(source, reply_to_message_id: str | None = None) -> dict | None:
|
||||
"""Build platform-aware thread metadata for adapter sends.
|
||||
|
||||
@@ -483,7 +472,7 @@ sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
from hermes_constants import get_hermes_dir, get_hermes_home
|
||||
from hermes_constants import get_hermes_dir
|
||||
|
||||
|
||||
GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
|
||||
@@ -824,86 +813,6 @@ def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DOCUMENT_CACHE_DIR = get_hermes_dir("cache/documents", "document_cache")
|
||||
SCREENSHOT_CACHE_DIR = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
||||
_HERMES_HOME = get_hermes_home()
|
||||
MEDIA_DELIVERY_ALLOW_DIRS_ENV = "HERMES_MEDIA_ALLOW_DIRS"
|
||||
MEDIA_DELIVERY_SAFE_ROOTS = (
|
||||
IMAGE_CACHE_DIR,
|
||||
AUDIO_CACHE_DIR,
|
||||
VIDEO_CACHE_DIR,
|
||||
DOCUMENT_CACHE_DIR,
|
||||
SCREENSHOT_CACHE_DIR,
|
||||
_HERMES_HOME / "image_cache",
|
||||
_HERMES_HOME / "audio_cache",
|
||||
_HERMES_HOME / "video_cache",
|
||||
_HERMES_HOME / "document_cache",
|
||||
_HERMES_HOME / "browser_screenshots",
|
||||
)
|
||||
|
||||
|
||||
def _media_delivery_allowed_roots() -> List[Path]:
|
||||
"""Return roots from which model-emitted local media may be delivered."""
|
||||
roots = [Path(root) for root in MEDIA_DELIVERY_SAFE_ROOTS]
|
||||
extra_roots = os.environ.get(MEDIA_DELIVERY_ALLOW_DIRS_ENV, "")
|
||||
for chunk in extra_roots.split(os.pathsep):
|
||||
for raw_root in chunk.split(","):
|
||||
raw_root = raw_root.strip()
|
||||
if not raw_root:
|
||||
continue
|
||||
root = Path(os.path.expanduser(raw_root))
|
||||
if root.is_absolute():
|
||||
roots.append(root)
|
||||
return roots
|
||||
|
||||
|
||||
def _path_is_within(path: Path, root: Path) -> bool:
|
||||
try:
|
||||
path.relative_to(root)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def validate_media_delivery_path(path: str) -> Optional[str]:
|
||||
"""Return a safe absolute file path for native media delivery, else None.
|
||||
|
||||
MEDIA tags and bare local paths in model output are untrusted text. Only
|
||||
existing regular files under Hermes-managed media caches, or roots the
|
||||
operator explicitly allowlists, may be uploaded as native attachments.
|
||||
Symlinks are resolved before the containment check.
|
||||
"""
|
||||
if not path:
|
||||
return None
|
||||
|
||||
candidate = str(path).strip()
|
||||
if len(candidate) >= 2 and candidate[0] == candidate[-1] and candidate[0] in "`\"'":
|
||||
candidate = candidate[1:-1].strip()
|
||||
candidate = candidate.lstrip("`\"'").rstrip("`\"',.;:)}]")
|
||||
if not candidate:
|
||||
return None
|
||||
|
||||
expanded = Path(os.path.expanduser(candidate))
|
||||
if not expanded.is_absolute():
|
||||
return None
|
||||
|
||||
try:
|
||||
resolved = expanded.resolve(strict=True)
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
return None
|
||||
|
||||
if not resolved.is_file():
|
||||
return None
|
||||
|
||||
for root in _media_delivery_allowed_roots():
|
||||
try:
|
||||
resolved_root = root.expanduser().resolve(strict=False)
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
continue
|
||||
if _path_is_within(resolved, resolved_root):
|
||||
return str(resolved)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
SUPPORTED_DOCUMENT_TYPES = {
|
||||
".pdf": "application/pdf",
|
||||
@@ -1114,14 +1023,6 @@ class MessageEvent:
|
||||
return args
|
||||
|
||||
|
||||
@dataclass
|
||||
class TextDebounceState:
|
||||
event: MessageEvent
|
||||
task: asyncio.Task | None
|
||||
first_ts: float
|
||||
last_ts: float
|
||||
|
||||
|
||||
_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
|
||||
re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
|
||||
re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
|
||||
@@ -1417,17 +1318,6 @@ class BasePlatformAdapter(ABC):
|
||||
self._active_sessions: Dict[str, asyncio.Event] = {}
|
||||
self._pending_messages: Dict[str, MessageEvent] = {}
|
||||
self._session_tasks: Dict[str, asyncio.Task] = {}
|
||||
self._busy_text_mode: str = (
|
||||
os.environ.get("HERMES_GATEWAY_BUSY_TEXT_MODE", "queue").strip().lower()
|
||||
or "queue"
|
||||
)
|
||||
self._busy_text_debounce_seconds: float = _float_env(
|
||||
"HERMES_GATEWAY_BUSY_TEXT_DEBOUNCE_SECONDS", 0.35
|
||||
)
|
||||
self._busy_text_hard_cap_seconds: float = _float_env(
|
||||
"HERMES_GATEWAY_BUSY_TEXT_HARD_CAP_SECONDS", 1.0
|
||||
)
|
||||
self._text_debounce: dict[str, TextDebounceState] = {}
|
||||
# Background message-processing tasks spawned by handle_message().
|
||||
# Gateway shutdown cancels these so an old gateway instance doesn't keep
|
||||
# working on a task after --replace or manual restarts.
|
||||
@@ -2229,35 +2119,6 @@ class BasePlatformAdapter(ABC):
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to, metadata=metadata)
|
||||
|
||||
@staticmethod
|
||||
def validate_media_delivery_path(path: str) -> Optional[str]:
|
||||
"""Return a resolved path if it is safe for native attachment upload."""
|
||||
return validate_media_delivery_path(path)
|
||||
|
||||
@staticmethod
|
||||
def filter_media_delivery_paths(media_files) -> List[Tuple[str, bool]]:
|
||||
"""Drop unsafe MEDIA paths and normalize accepted paths."""
|
||||
safe_media: List[Tuple[str, bool]] = []
|
||||
for media_path, is_voice in media_files or []:
|
||||
safe_path = validate_media_delivery_path(str(media_path))
|
||||
if safe_path:
|
||||
safe_media.append((safe_path, bool(is_voice)))
|
||||
else:
|
||||
logger.warning("Skipping unsafe MEDIA directive path outside allowed roots")
|
||||
return safe_media
|
||||
|
||||
@staticmethod
|
||||
def filter_local_delivery_paths(file_paths) -> List[str]:
|
||||
"""Drop unsafe bare local file paths and normalize accepted paths."""
|
||||
safe_paths: List[str] = []
|
||||
for file_path in file_paths or []:
|
||||
safe_path = validate_media_delivery_path(str(file_path))
|
||||
if safe_path:
|
||||
safe_paths.append(safe_path)
|
||||
else:
|
||||
logger.warning("Skipping unsafe local file path outside allowed roots")
|
||||
return safe_paths
|
||||
|
||||
@staticmethod
|
||||
def extract_media(content: str) -> Tuple[List[Tuple[str, bool]], str]:
|
||||
"""
|
||||
@@ -2755,161 +2616,6 @@ class BasePlatformAdapter(ABC):
|
||||
return f"{existing_text}\n\n{new_text}".strip()
|
||||
return existing_text
|
||||
|
||||
def _text_debounce_store(self) -> dict[str, TextDebounceState]:
|
||||
store = getattr(self, "_text_debounce", None)
|
||||
if store is None:
|
||||
store = {}
|
||||
self._text_debounce = store
|
||||
return store
|
||||
|
||||
def _is_queue_text_debounce_candidate(self, event: MessageEvent) -> bool:
|
||||
"""Return True for normal text eligible for queue-mode debounce."""
|
||||
result = (
|
||||
getattr(self, "_busy_text_mode", "queue") == "queue"
|
||||
and event.message_type == MessageType.TEXT
|
||||
and not getattr(event, "internal", False)
|
||||
and not event.is_command()
|
||||
and bool((event.text or "").strip())
|
||||
)
|
||||
if result:
|
||||
logger.debug(
|
||||
"[%s] Queue-text debounce candidate accepted: session=%s text_len=%d",
|
||||
self.name,
|
||||
getattr(event, "session_key", "?"),
|
||||
len(event.text or ""),
|
||||
)
|
||||
return result
|
||||
|
||||
def _can_merge_text_debounce_events(self, existing: MessageEvent, event: MessageEvent) -> bool:
|
||||
"""Return True when two text debounce events came from the same sender."""
|
||||
|
||||
def _identity(candidate: MessageEvent) -> tuple[str, ...] | None:
|
||||
source = getattr(candidate, "source", None)
|
||||
if source is None:
|
||||
return None
|
||||
platform = _platform_name(getattr(source, "platform", None))
|
||||
sender = getattr(source, "user_id_alt", None) or getattr(source, "user_id", None)
|
||||
if sender:
|
||||
return (platform, str(sender))
|
||||
if getattr(source, "chat_type", None) in {"dm", "private"} and getattr(source, "chat_id", None):
|
||||
return (platform, "dm", str(source.chat_id))
|
||||
return None
|
||||
|
||||
existing_sender = _identity(existing)
|
||||
incoming_sender = _identity(event)
|
||||
return existing_sender is not None and existing_sender == incoming_sender
|
||||
|
||||
def _text_debounce_delay(self, session_key: str) -> float:
|
||||
"""Return bounded busy-text debounce delay for ``session_key``."""
|
||||
state = self._text_debounce_store().get(session_key)
|
||||
if state is None:
|
||||
return 0.0
|
||||
now = time.monotonic()
|
||||
window_deadline = state.last_ts + self._busy_text_debounce_seconds
|
||||
hard_cap_deadline = state.first_ts + self._busy_text_hard_cap_seconds
|
||||
return max(0.0, min(window_deadline, hard_cap_deadline) - now)
|
||||
|
||||
async def _queue_text_debounce(self, session_key: str, event: MessageEvent) -> None:
|
||||
"""Buffer normal queue-mode busy text and schedule a bounded flush."""
|
||||
store = self._text_debounce_store()
|
||||
state = store.get(session_key)
|
||||
|
||||
if state is not None and not self._can_merge_text_debounce_events(state.event, event):
|
||||
# Preserve sender attribution in shared sessions. The current
|
||||
# buffer becomes the next pending turn; the new sender starts a
|
||||
# fresh debounce burst when the pending slot allows it.
|
||||
await self._flush_text_debounce_now(session_key)
|
||||
state = store.get(session_key)
|
||||
if state is not None and not self._can_merge_text_debounce_events(state.event, event):
|
||||
existing_pending = self._pending_messages.get(session_key)
|
||||
if existing_pending is not None and self._can_merge_text_debounce_events(existing_pending, event):
|
||||
merge_pending_message_event(
|
||||
self._pending_messages,
|
||||
session_key,
|
||||
event,
|
||||
merge_text=True,
|
||||
)
|
||||
return
|
||||
|
||||
now = time.monotonic()
|
||||
if state is None:
|
||||
state = TextDebounceState(
|
||||
event=event,
|
||||
task=None,
|
||||
first_ts=now,
|
||||
last_ts=now,
|
||||
)
|
||||
store[session_key] = state
|
||||
else:
|
||||
if event.text:
|
||||
state.event.text = (
|
||||
f"{state.event.text}\n{event.text}"
|
||||
if state.event.text
|
||||
else event.text
|
||||
)
|
||||
latest_message_id = getattr(event, "message_id", None)
|
||||
latest_anchor = latest_message_id or getattr(event, "reply_to_message_id", None)
|
||||
if latest_message_id is not None:
|
||||
state.event.message_id = str(latest_message_id)
|
||||
if latest_anchor is not None and hasattr(state.event, "reply_to_message_id"):
|
||||
state.event.reply_to_message_id = str(latest_anchor)
|
||||
state.last_ts = now
|
||||
|
||||
if state.task is not None and not state.task.done():
|
||||
state.task.cancel()
|
||||
|
||||
delay = self._text_debounce_delay(session_key)
|
||||
state.task = asyncio.create_task(self._flush_text_debounce(session_key, delay))
|
||||
|
||||
async def _flush_text_debounce(self, session_key: str, delay: float) -> None:
|
||||
"""Timer task that flushes the debounced text buffer."""
|
||||
try:
|
||||
await asyncio.sleep(delay)
|
||||
await self._flush_text_debounce_now(session_key)
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
finally:
|
||||
current = asyncio.current_task()
|
||||
state = self._text_debounce_store().get(session_key)
|
||||
if state is not None and state.task is current:
|
||||
state.task = None
|
||||
|
||||
async def _flush_text_debounce_now(self, session_key: str) -> bool:
|
||||
"""Force-flush one debounced busy-text burst into the pending slot."""
|
||||
store = self._text_debounce_store()
|
||||
state = store.get(session_key)
|
||||
if state is None:
|
||||
return False
|
||||
|
||||
current = asyncio.current_task()
|
||||
if state.task is not None and state.task is not current and not state.task.done():
|
||||
state.task.cancel()
|
||||
state.task = None
|
||||
|
||||
existing_pending = self._pending_messages.get(session_key)
|
||||
if (
|
||||
existing_pending is not None
|
||||
and not self._can_merge_text_debounce_events(existing_pending, state.event)
|
||||
):
|
||||
return False
|
||||
|
||||
state = store.pop(session_key, None)
|
||||
if state is None:
|
||||
return False
|
||||
merge_pending_message_event(
|
||||
self._pending_messages,
|
||||
session_key,
|
||||
state.event,
|
||||
merge_text=True,
|
||||
)
|
||||
return True
|
||||
|
||||
def _discard_text_debounce(self, session_key: str) -> None:
|
||||
"""Cancel and drop pending text debounce state for control commands."""
|
||||
state = self._text_debounce_store().pop(session_key, None)
|
||||
if state is not None and state.task is not None and not state.task.done():
|
||||
state.task.cancel()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session task + guard ownership helpers
|
||||
# ------------------------------------------------------------------
|
||||
@@ -2979,7 +2685,6 @@ class BasePlatformAdapter(ABC):
|
||||
self._active_sessions.pop(session_key, None)
|
||||
self._pending_messages.pop(session_key, None)
|
||||
self._session_tasks.pop(session_key, None)
|
||||
self._discard_text_debounce(session_key)
|
||||
return True
|
||||
|
||||
def _start_session_processing(
|
||||
@@ -3061,7 +2766,6 @@ class BasePlatformAdapter(ABC):
|
||||
)
|
||||
if discard_pending:
|
||||
self._pending_messages.pop(session_key, None)
|
||||
self._discard_text_debounce(session_key)
|
||||
if release_guard:
|
||||
self._release_session_guard(session_key)
|
||||
|
||||
@@ -3076,7 +2780,6 @@ class BasePlatformAdapter(ABC):
|
||||
command-scoped guard, then — if a follow-up message landed while the
|
||||
command was running — spawns a fresh processing task for it.
|
||||
"""
|
||||
await self._flush_text_debounce_now(session_key)
|
||||
pending_event = self._pending_messages.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
if pending_event is None:
|
||||
@@ -3208,7 +2911,6 @@ class BasePlatformAdapter(ABC):
|
||||
# through the dedicated handoff path that serializes
|
||||
# cancellation + runner response + pending drain.
|
||||
if cmd in {"stop", "new", "reset"}:
|
||||
self._discard_text_debounce(session_key)
|
||||
try:
|
||||
await self._dispatch_active_session_command(event, session_key, cmd)
|
||||
except Exception as e:
|
||||
@@ -3253,9 +2955,8 @@ class BasePlatformAdapter(ABC):
|
||||
# clarify-intercept can resolve it and unblock the agent.
|
||||
#
|
||||
# Without this bypass: the message gets queued in
|
||||
# _pending_messages as a follow-up turn instead of reaching the
|
||||
# clarify resolver, leaving the agent blocked and discarding the
|
||||
# user's answer.
|
||||
# _pending_messages AND triggers an interrupt, killing the
|
||||
# agent run mid-clarify and discarding the user's answer.
|
||||
# Same shape as the /approve deadlock fix (PR #4926) — both
|
||||
# cases are "agent thread blocked on Event.wait, message must
|
||||
# reach the resolver before being treated as a new turn."
|
||||
@@ -3314,28 +3015,27 @@ class BasePlatformAdapter(ABC):
|
||||
merge_pending_message_event(self._pending_messages, session_key, event)
|
||||
return # Don't interrupt now - will run after current task completes
|
||||
|
||||
if self._is_queue_text_debounce_candidate(event):
|
||||
logger.debug(
|
||||
"[%s] New text message while session %s is active — "
|
||||
"debouncing follow-up (busy_text_mode=queue, window=%.2fs)",
|
||||
self.name,
|
||||
session_key,
|
||||
self._busy_text_debounce_seconds,
|
||||
)
|
||||
await self._queue_text_debounce(session_key, event)
|
||||
else:
|
||||
logger.debug(
|
||||
"[%s] New message while session %s is active — queuing follow-up "
|
||||
"(no interrupt, will cascade after current turn)",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
merge_pending_message_event(
|
||||
self._pending_messages,
|
||||
session_key,
|
||||
event,
|
||||
merge_text=event.message_type == MessageType.TEXT,
|
||||
)
|
||||
# Default behavior for non-photo follow-ups: interrupt the running agent.
|
||||
#
|
||||
# Use merge_text=True so rapid TEXT follow-ups (#4469) accumulate
|
||||
# into the single pending slot instead of clobbering each other.
|
||||
# Without merging, three rapid messages "A", "B", "C" land like:
|
||||
# _pending_messages[k] = A (interrupts)
|
||||
# _pending_messages[k] = B (replaces A before consumer reads)
|
||||
# _pending_messages[k] = C (replaces B)
|
||||
# ...and only "C" reaches the next turn. merge_pending_message_event
|
||||
# already does the right thing for photo/media bursts; the
|
||||
# ``merge_text=True`` flag extends that to plain TEXT events.
|
||||
# Same shape as the Telegram bursty-grace path in gateway/run.py.
|
||||
logger.debug("[%s] New message while session %s is active — triggering interrupt", self.name, session_key)
|
||||
merge_pending_message_event(
|
||||
self._pending_messages,
|
||||
session_key,
|
||||
event,
|
||||
merge_text=True,
|
||||
)
|
||||
# Signal the interrupt (the processing task checks this)
|
||||
self._active_sessions[session_key].set()
|
||||
return # Don't process now - will be handled after current task finishes
|
||||
|
||||
# Mark session as active BEFORE spawning background task to close
|
||||
@@ -3466,7 +3166,6 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
# Extract MEDIA:<path> tags (from TTS tool) before other processing
|
||||
media_files, response = self.extract_media(response)
|
||||
media_files = self.filter_media_delivery_paths(media_files)
|
||||
|
||||
# Extract image URLs and send them as native platform attachments
|
||||
images, text_content = self.extract_images(response)
|
||||
@@ -3480,7 +3179,6 @@ class BasePlatformAdapter(ABC):
|
||||
# Auto-detect bare local file paths for native media delivery
|
||||
# (helps small models that don't use MEDIA: syntax)
|
||||
local_files, text_content = self.extract_local_files(text_content)
|
||||
local_files = self.filter_local_delivery_paths(local_files)
|
||||
if local_files:
|
||||
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
|
||||
|
||||
@@ -3689,15 +3387,10 @@ class BasePlatformAdapter(ABC):
|
||||
ProcessingOutcome.SUCCESS if processing_ok else ProcessingOutcome.FAILURE,
|
||||
)
|
||||
|
||||
# The active drain owns debounce state. If a queue-mode timer has
|
||||
# not fired yet, force-flush into _pending_messages here and let
|
||||
# this task hand off the follow-up.
|
||||
await self._flush_text_debounce_now(session_key)
|
||||
|
||||
# Check if there's a pending message that was queued during our processing
|
||||
if session_key in self._pending_messages:
|
||||
pending_event = self._pending_messages.pop(session_key)
|
||||
logger.debug("[%s] Processing queued follow-up message", self.name)
|
||||
logger.debug("[%s] Processing queued message from interrupt", self.name)
|
||||
# Keep the _active_sessions entry live across the turn chain
|
||||
# and only CLEAR the interrupt Event — do NOT delete the entry.
|
||||
# If we deleted here, a concurrent inbound message arriving
|
||||
@@ -3706,7 +3399,7 @@ class BasePlatformAdapter(ABC):
|
||||
# with the recursive drain below. Two agents on one
|
||||
# session_key = duplicate responses, duplicate tool calls.
|
||||
# Clearing the Event keeps the guard live so follow-ups take
|
||||
# the busy-handler path as intended.
|
||||
# the busy-handler path (queue + interrupt) as intended.
|
||||
_active = self._active_sessions.get(session_key)
|
||||
if _active is not None:
|
||||
_active.clear()
|
||||
@@ -3799,9 +3492,6 @@ class BasePlatformAdapter(ABC):
|
||||
await self.stop_typing(event.source.chat_id)
|
||||
except Exception:
|
||||
pass
|
||||
# Final drain/release boundary: force-flush any timer that missed
|
||||
# the in-band drain before deciding whether the guard can clear.
|
||||
await self._flush_text_debounce_now(session_key)
|
||||
# Late-arrival drain: a message may have arrived during the
|
||||
# cleanup awaits above (typing_task cancel, stop_typing). Such
|
||||
# messages passed the Level-1 guard (entry still live, Event
|
||||
@@ -3921,10 +3611,6 @@ class BasePlatformAdapter(ABC):
|
||||
self._session_tasks.clear()
|
||||
self._pending_messages.clear()
|
||||
self._active_sessions.clear()
|
||||
for state in list(self._text_debounce_store().values()):
|
||||
if state.task is not None and not state.task.done():
|
||||
state.task.cancel()
|
||||
self._text_debounce_store().clear()
|
||||
|
||||
def has_pending_interrupt(self, session_key: str) -> bool:
|
||||
"""Check if there's a pending interrupt for a session."""
|
||||
|
||||
@@ -189,10 +189,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
app = web.Application()
|
||||
app.router.add_get("/health", lambda _: web.Response(text="ok"))
|
||||
app.router.add_post(self.webhook_path, self._handle_webhook)
|
||||
# The webhook auth value is carried in the query string because the
|
||||
# BlueBubbles webhook API cannot send custom headers. Do not let
|
||||
# aiohttp access logs write that request target to agent.log.
|
||||
self._runner = web.AppRunner(app, access_log=None)
|
||||
self._runner = web.AppRunner(app)
|
||||
await self._runner.setup()
|
||||
site = web.TCPSite(self._runner, self.webhook_host, self.webhook_port)
|
||||
await site.start()
|
||||
@@ -245,14 +242,6 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
return f"{base}?password={quote(self.password, safe='')}"
|
||||
return base
|
||||
|
||||
@property
|
||||
def _webhook_register_url_for_log(self) -> str:
|
||||
"""Webhook registration URL safe for logs."""
|
||||
base = self._webhook_url
|
||||
if self.password:
|
||||
return f"{base}?password=***"
|
||||
return base
|
||||
|
||||
async def _find_registered_webhooks(self, url: str) -> list:
|
||||
"""Return list of BB webhook entries matching *url*."""
|
||||
try:
|
||||
@@ -280,8 +269,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
existing = await self._find_registered_webhooks(webhook_url)
|
||||
if existing:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook already registered: %s",
|
||||
self._webhook_register_url_for_log,
|
||||
"[bluebubbles] webhook already registered: %s", webhook_url
|
||||
)
|
||||
return True
|
||||
|
||||
@@ -296,7 +284,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
if 200 <= status < 300:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook registered with server: %s",
|
||||
self._webhook_register_url_for_log,
|
||||
webhook_url,
|
||||
)
|
||||
return True
|
||||
else:
|
||||
@@ -336,8 +324,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
removed = True
|
||||
if removed:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook unregistered: %s",
|
||||
self._webhook_register_url_for_log,
|
||||
"[bluebubbles] webhook unregistered: %s", webhook_url
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
@@ -947,3 +934,4 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
asyncio.create_task(self.mark_read(session_chat_id))
|
||||
|
||||
return web.Response(text="ok")
|
||||
|
||||
|
||||
@@ -358,19 +358,6 @@ class DingTalkAdapter(BasePlatformAdapter):
|
||||
await asyncio.gather(*self._bg_tasks, return_exceptions=True)
|
||||
self._bg_tasks.clear()
|
||||
|
||||
# Finalize any open streaming cards before the HTTP client closes so
|
||||
# they don't stay stuck in streaming state on DingTalk's UI after
|
||||
# a gateway restart. _close_streaming_siblings handles its own
|
||||
# per-card exceptions; the outer try is a safety net for token fetch.
|
||||
for _chat_id in list(self._streaming_cards):
|
||||
try:
|
||||
await self._close_streaming_siblings(_chat_id)
|
||||
except Exception as _exc:
|
||||
logger.debug(
|
||||
"[%s] Failed to finalize streaming card on disconnect for %s: %s",
|
||||
self.name, _chat_id, _exc,
|
||||
)
|
||||
|
||||
if self._http_client:
|
||||
await self._http_client.aclose()
|
||||
self._http_client = None
|
||||
|
||||
@@ -1489,8 +1489,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
reported in ``raw_response['warnings']`` so the caller can surface
|
||||
partial-send issues.
|
||||
"""
|
||||
# _derive_forum_thread_name is defined further down in this same
|
||||
# module — no cross-module import needed.
|
||||
from tools.send_message_tool import _derive_forum_thread_name
|
||||
|
||||
formatted = self.format_message(content)
|
||||
chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
|
||||
@@ -1552,8 +1551,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
ForumChannel accepts the same file/files/content kwargs as
|
||||
``channel.send``, creating the thread and starter message atomically.
|
||||
"""
|
||||
# _derive_forum_thread_name is defined further down in this same
|
||||
# module — no cross-module import needed.
|
||||
from tools.send_message_tool import _derive_forum_thread_name
|
||||
|
||||
if not thread_name:
|
||||
# Prefer the text content, fall back to the first attached
|
||||
@@ -5701,492 +5699,7 @@ def _define_discord_view_classes() -> None:
|
||||
self.resolved = True
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
|
||||
if DISCORD_AVAILABLE:
|
||||
_define_discord_view_classes()
|
||||
|
||||
|
||||
# ── Standalone (out-of-process) sender ────────────────────────────────────────
|
||||
# Used by ``tools/send_message_tool._send_via_adapter`` when the gateway runner
|
||||
# is not in this process (e.g. ``hermes cron`` running standalone) and no live
|
||||
# DiscordAdapter instance is available. Implements the same forum/thread/
|
||||
# multipart logic the live adapter would use, via Discord's REST API directly.
|
||||
#
|
||||
# This block was previously hosted in ``tools/send_message_tool.py`` as
|
||||
# ``_send_discord``. It moved into the plugin so all Discord-specific HTTP
|
||||
# logic lives next to the adapter — same shape as Teams' ``_standalone_send``.
|
||||
|
||||
# Process-local cache for Discord channel-type probes. Avoids re-probing the
|
||||
# same channel on every send when the directory cache has no entry (e.g. fresh
|
||||
# install, or channel created after the last directory build).
|
||||
_DISCORD_CHANNEL_TYPE_PROBE_CACHE: Dict[str, bool] = {}
|
||||
|
||||
|
||||
def _remember_channel_is_forum(chat_id: str, is_forum: bool) -> None:
|
||||
_DISCORD_CHANNEL_TYPE_PROBE_CACHE[str(chat_id)] = bool(is_forum)
|
||||
|
||||
|
||||
def _probe_is_forum_cached(chat_id: str) -> Optional[bool]:
|
||||
return _DISCORD_CHANNEL_TYPE_PROBE_CACHE.get(str(chat_id))
|
||||
|
||||
|
||||
def _derive_forum_thread_name(message: str) -> str:
|
||||
"""Derive a thread name from the first line of the message, capped at 100 chars."""
|
||||
first_line = message.strip().split("\n", 1)[0].strip()
|
||||
# Strip common markdown heading prefixes
|
||||
first_line = first_line.lstrip("#").strip()
|
||||
if not first_line:
|
||||
first_line = "New Post"
|
||||
return first_line[:100]
|
||||
|
||||
|
||||
def _standalone_sanitize_error(text) -> str:
|
||||
"""Local copy of tools.send_message_tool._sanitize_error_text — strips bot
|
||||
tokens from any error payload before bubbling it up. Inlined so the
|
||||
plugin doesn't introduce a hard dependency on send_message_tool internals.
|
||||
"""
|
||||
s = str(text)
|
||||
# Mask anything that looks like a Bot token in an Authorization header.
|
||||
import re as _re_san
|
||||
return _re_san.sub(
|
||||
r"(Authorization:\s*Bot\s+)\S+",
|
||||
r"\1***",
|
||||
s,
|
||||
flags=_re_san.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
async def _standalone_send(
|
||||
pconfig,
|
||||
chat_id: str,
|
||||
message: str,
|
||||
*,
|
||||
thread_id: Optional[str] = None,
|
||||
media_files: Optional[list] = None,
|
||||
force_document: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Send via Discord REST API without a live gateway adapter.
|
||||
|
||||
Used by ``tools/send_message_tool._send_via_adapter`` when the gateway
|
||||
runner is not in this process. Reads ``DISCORD_BOT_TOKEN`` from
|
||||
``pconfig.token`` (set by the gateway config loader from env) and falls
|
||||
back to the ``DISCORD_BOT_TOKEN`` env var.
|
||||
|
||||
Forum channels (type 15) reject ``POST /messages`` — a thread post is
|
||||
created automatically via ``POST /channels/{id}/threads``. Media files
|
||||
are uploaded as multipart attachments on the starter message of the new
|
||||
thread. Channel type is resolved from the channel directory first, then
|
||||
a process-local probe cache, and only as a last resort with a live
|
||||
``GET /channels/{id}`` probe (whose result is memoized).
|
||||
|
||||
``force_document`` is accepted for signature parity but unused — Discord
|
||||
treats every uploaded file as a generic attachment.
|
||||
"""
|
||||
try:
|
||||
import aiohttp
|
||||
except ImportError:
|
||||
return {"error": "aiohttp not installed. Run: pip install aiohttp"}
|
||||
|
||||
token = (getattr(pconfig, "token", None) or os.getenv("DISCORD_BOT_TOKEN", "")).strip()
|
||||
if not token:
|
||||
return {"error": "Discord standalone send: DISCORD_BOT_TOKEN is not set"}
|
||||
|
||||
try:
|
||||
from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
|
||||
_proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
|
||||
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
|
||||
auth_headers = {"Authorization": f"Bot {token}"}
|
||||
json_headers = {**auth_headers, "Content-Type": "application/json"}
|
||||
media_files = media_files or []
|
||||
last_data = None
|
||||
warnings = []
|
||||
|
||||
# Thread endpoint: Discord threads are channels; send directly to the thread ID.
|
||||
if thread_id:
|
||||
url = f"https://discord.com/api/v10/channels/{thread_id}/messages"
|
||||
else:
|
||||
# Check if the target channel is a forum channel (type 15).
|
||||
# Forum channels reject POST /messages — create a thread post instead.
|
||||
# Three-layer detection: directory cache → process-local probe
|
||||
# cache → GET /channels/{id} probe (with result memoized).
|
||||
_channel_type = None
|
||||
try:
|
||||
from gateway.channel_directory import lookup_channel_type
|
||||
_channel_type = lookup_channel_type("discord", chat_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if _channel_type == "forum":
|
||||
is_forum = True
|
||||
elif _channel_type is not None:
|
||||
is_forum = False
|
||||
else:
|
||||
cached = _probe_is_forum_cached(chat_id)
|
||||
if cached is not None:
|
||||
is_forum = cached
|
||||
else:
|
||||
is_forum = False
|
||||
try:
|
||||
info_url = f"https://discord.com/api/v10/channels/{chat_id}"
|
||||
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15), **_sess_kw) as info_sess:
|
||||
async with info_sess.get(info_url, headers=json_headers, **_req_kw) as info_resp:
|
||||
if info_resp.status == 200:
|
||||
info = await info_resp.json()
|
||||
is_forum = info.get("type") == 15
|
||||
_remember_channel_is_forum(chat_id, is_forum)
|
||||
except Exception:
|
||||
logger.debug("Failed to probe channel type for %s", chat_id, exc_info=True)
|
||||
|
||||
if is_forum:
|
||||
thread_name = _derive_forum_thread_name(message)
|
||||
thread_url = f"https://discord.com/api/v10/channels/{chat_id}/threads"
|
||||
|
||||
# Filter to readable media files up front so we can pick the
|
||||
# right code path (JSON vs multipart) before opening a session.
|
||||
valid_media = []
|
||||
for media_path, _is_voice in media_files:
|
||||
if not os.path.exists(media_path):
|
||||
warning = f"Media file not found, skipping: {media_path}"
|
||||
logger.warning(warning)
|
||||
warnings.append(warning)
|
||||
continue
|
||||
valid_media.append(media_path)
|
||||
|
||||
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60), **_sess_kw) as session:
|
||||
if valid_media:
|
||||
# Multipart: payload_json + files[N] creates a forum
|
||||
# thread with the starter message plus attachments in
|
||||
# a single API call.
|
||||
attachments_meta = [
|
||||
{"id": str(idx), "filename": os.path.basename(path)}
|
||||
for idx, path in enumerate(valid_media)
|
||||
]
|
||||
starter_message = {"content": message, "attachments": attachments_meta}
|
||||
payload_json = json.dumps({"name": thread_name, "message": starter_message})
|
||||
|
||||
form = aiohttp.FormData()
|
||||
form.add_field("payload_json", payload_json, content_type="application/json")
|
||||
|
||||
try:
|
||||
for idx, media_path in enumerate(valid_media):
|
||||
with open(media_path, "rb") as fh:
|
||||
form.add_field(
|
||||
f"files[{idx}]",
|
||||
fh.read(),
|
||||
filename=os.path.basename(media_path),
|
||||
)
|
||||
async with session.post(thread_url, headers=auth_headers, data=form, **_req_kw) as resp:
|
||||
if resp.status not in {200, 201}:
|
||||
body = await resp.text()
|
||||
return {"error": f"Discord forum thread creation error ({resp.status}): {body}"}
|
||||
data = await resp.json()
|
||||
except Exception as e:
|
||||
return {"error": _standalone_sanitize_error(f"Discord forum thread upload failed: {e}")}
|
||||
else:
|
||||
# No media — simple JSON POST creates the thread with
|
||||
# just the text starter.
|
||||
async with session.post(
|
||||
thread_url,
|
||||
headers=json_headers,
|
||||
json={
|
||||
"name": thread_name,
|
||||
"message": {"content": message},
|
||||
},
|
||||
**_req_kw,
|
||||
) as resp:
|
||||
if resp.status not in {200, 201}:
|
||||
body = await resp.text()
|
||||
return {"error": f"Discord forum thread creation error ({resp.status}): {body}"}
|
||||
data = await resp.json()
|
||||
|
||||
thread_id_created = data.get("id")
|
||||
starter_msg_id = (data.get("message") or {}).get("id", thread_id_created)
|
||||
result = {
|
||||
"success": True,
|
||||
"platform": "discord",
|
||||
"chat_id": chat_id,
|
||||
"thread_id": thread_id_created,
|
||||
"message_id": starter_msg_id,
|
||||
}
|
||||
if warnings:
|
||||
result["warnings"] = warnings
|
||||
return result
|
||||
|
||||
url = f"https://discord.com/api/v10/channels/{chat_id}/messages"
|
||||
|
||||
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30), **_sess_kw) as session:
|
||||
# Send text message (skip if empty and media is present)
|
||||
if message.strip() or not media_files:
|
||||
async with session.post(url, headers=json_headers, json={"content": message}, **_req_kw) as resp:
|
||||
if resp.status not in {200, 201}:
|
||||
body = await resp.text()
|
||||
return {"error": f"Discord API error ({resp.status}): {body}"}
|
||||
last_data = await resp.json()
|
||||
|
||||
# Send each media file as a separate multipart upload
|
||||
for media_path, _is_voice in media_files:
|
||||
if not os.path.exists(media_path):
|
||||
warning = f"Media file not found, skipping: {media_path}"
|
||||
logger.warning(warning)
|
||||
warnings.append(warning)
|
||||
continue
|
||||
try:
|
||||
form = aiohttp.FormData()
|
||||
filename = os.path.basename(media_path)
|
||||
with open(media_path, "rb") as f:
|
||||
form.add_field("files[0]", f, filename=filename)
|
||||
async with session.post(url, headers=auth_headers, data=form, **_req_kw) as resp:
|
||||
if resp.status not in {200, 201}:
|
||||
body = await resp.text()
|
||||
warning = _standalone_sanitize_error(f"Failed to send media {media_path}: Discord API error ({resp.status}): {body}")
|
||||
logger.error(warning)
|
||||
warnings.append(warning)
|
||||
continue
|
||||
last_data = await resp.json()
|
||||
except Exception as e:
|
||||
warning = _standalone_sanitize_error(f"Failed to send media {media_path}: {e}")
|
||||
logger.error(warning)
|
||||
warnings.append(warning)
|
||||
|
||||
if last_data is None:
|
||||
error = "No deliverable text or media remained after processing"
|
||||
if warnings:
|
||||
return {"error": error, "warnings": warnings}
|
||||
return {"error": error}
|
||||
|
||||
result = {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": last_data.get("id")}
|
||||
if warnings:
|
||||
result["warnings"] = warnings
|
||||
return result
|
||||
except Exception as e:
|
||||
return {"error": _standalone_sanitize_error(f"Discord send failed: {e}")}
|
||||
|
||||
|
||||
# ── Plugin entry point ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _clean_discord_user_ids(raw: str) -> list:
|
||||
"""Strip common Discord mention prefixes from a comma-separated ID string."""
|
||||
cleaned = []
|
||||
for uid in raw.replace(" ", "").split(","):
|
||||
uid = uid.strip()
|
||||
if uid.startswith("<@") and uid.endswith(">"):
|
||||
uid = uid.lstrip("<@!").rstrip(">")
|
||||
if uid.lower().startswith("user:"):
|
||||
uid = uid[5:]
|
||||
if uid:
|
||||
cleaned.append(uid)
|
||||
return cleaned
|
||||
|
||||
|
||||
def interactive_setup() -> None:
|
||||
"""Guide the user through Discord bot setup.
|
||||
|
||||
Mirrors Teams' ``interactive_setup`` shape: lazy-imports CLI helpers so
|
||||
the plugin's import surface stays small, prompts for the bot token,
|
||||
captures an allowlist, and offers to set a home channel.
|
||||
"""
|
||||
from hermes_cli.config import get_env_value, save_env_value
|
||||
from hermes_cli.cli_output import (
|
||||
prompt,
|
||||
prompt_yes_no,
|
||||
print_header,
|
||||
print_info,
|
||||
print_success,
|
||||
)
|
||||
|
||||
print_header("Discord")
|
||||
existing = get_env_value("DISCORD_BOT_TOKEN")
|
||||
if existing:
|
||||
print_info("Discord: already configured")
|
||||
if not prompt_yes_no("Reconfigure Discord?", False):
|
||||
if not get_env_value("DISCORD_ALLOWED_USERS"):
|
||||
print_info("⚠️ Discord has no user allowlist - anyone can use your bot!")
|
||||
if prompt_yes_no("Add allowed users now?", True):
|
||||
print_info(" To find Discord ID: Enable Developer Mode, right-click name → Copy ID")
|
||||
allowed_users = prompt("Allowed user IDs (comma-separated)")
|
||||
if allowed_users:
|
||||
cleaned_ids = _clean_discord_user_ids(allowed_users)
|
||||
save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
|
||||
print_success("Discord allowlist configured")
|
||||
return
|
||||
|
||||
print_info("Create a bot at https://discord.com/developers/applications")
|
||||
token = prompt("Discord bot token", password=True)
|
||||
if not token:
|
||||
return
|
||||
save_env_value("DISCORD_BOT_TOKEN", token)
|
||||
print_success("Discord token saved")
|
||||
|
||||
print()
|
||||
print_info("🔒 Security: Restrict who can use your bot")
|
||||
print_info(" To find your Discord user ID:")
|
||||
print_info(" 1. Enable Developer Mode in Discord settings")
|
||||
print_info(" 2. Right-click your name → Copy ID")
|
||||
print()
|
||||
print_info(" You can also use Discord usernames (resolved on gateway start).")
|
||||
print()
|
||||
allowed_users = prompt(
|
||||
"Allowed user IDs or usernames (comma-separated, leave empty for open access)"
|
||||
)
|
||||
if allowed_users:
|
||||
cleaned_ids = _clean_discord_user_ids(allowed_users)
|
||||
save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
|
||||
print_success("Discord allowlist configured")
|
||||
else:
|
||||
print_info("⚠️ No allowlist set - anyone in servers with your bot can use it!")
|
||||
|
||||
print()
|
||||
print_info("📬 Home Channel: where Hermes delivers cron job results,")
|
||||
print_info(" cross-platform messages, and notifications.")
|
||||
print_info(" To get a channel ID: right-click a channel → Copy Channel ID")
|
||||
print_info(" (requires Developer Mode in Discord settings)")
|
||||
print_info(" You can also set this later by typing /set-home in a Discord channel.")
|
||||
home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
|
||||
if home_channel:
|
||||
save_env_value("DISCORD_HOME_CHANNEL", home_channel)
|
||||
|
||||
|
||||
def _apply_yaml_config(yaml_cfg: dict, discord_cfg: dict) -> dict | None:
|
||||
"""Translate ``config.yaml`` ``discord:`` keys into env vars.
|
||||
|
||||
Implements the ``apply_yaml_config_fn`` contract (#24836). Mirrors the
|
||||
legacy ``discord_cfg`` block that used to live in
|
||||
``gateway/config.py::load_gateway_config()`` before this migration.
|
||||
|
||||
The DiscordAdapter reads its runtime configuration via ``os.getenv()``
|
||||
throughout the connect / handle code paths (``DISCORD_REQUIRE_MENTION``,
|
||||
``DISCORD_FREE_RESPONSE_CHANNELS``, ``DISCORD_AUTO_THREAD``,
|
||||
``DISCORD_REACTIONS``, ``DISCORD_IGNORED_CHANNELS``,
|
||||
``DISCORD_ALLOWED_CHANNELS``, ``DISCORD_NO_THREAD_CHANNELS``,
|
||||
``DISCORD_HISTORY_BACKFILL``, ``DISCORD_HISTORY_BACKFILL_LIMIT``,
|
||||
``DISCORD_ALLOW_MENTION_*``, ``DISCORD_REPLY_TO_MODE``,
|
||||
``DISCORD_THREAD_REQUIRE_MENTION``). Rather than rewrite ~50 call sites
|
||||
inside the adapter to read from ``PlatformConfig.extra`` instead, this
|
||||
hook keeps the existing env-driven model and merely owns the
|
||||
YAML→env translation here, next to the adapter that consumes it.
|
||||
|
||||
Env vars take precedence over YAML — every assignment is guarded by
|
||||
``not os.getenv(...)`` so explicit env vars survive a config.yaml
|
||||
update. Returns ``None`` because no extras are seeded into
|
||||
``PlatformConfig.extra`` directly (everything flows through env).
|
||||
"""
|
||||
if "require_mention" in discord_cfg and not os.getenv("DISCORD_REQUIRE_MENTION"):
|
||||
os.environ["DISCORD_REQUIRE_MENTION"] = str(discord_cfg["require_mention"]).lower()
|
||||
if "thread_require_mention" in discord_cfg and not os.getenv("DISCORD_THREAD_REQUIRE_MENTION"):
|
||||
os.environ["DISCORD_THREAD_REQUIRE_MENTION"] = str(discord_cfg["thread_require_mention"]).lower()
|
||||
frc = discord_cfg.get("free_response_channels")
|
||||
if frc is not None and not os.getenv("DISCORD_FREE_RESPONSE_CHANNELS"):
|
||||
if isinstance(frc, list):
|
||||
frc = ",".join(str(v) for v in frc)
|
||||
os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
|
||||
if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
|
||||
os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
|
||||
if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
|
||||
os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()
|
||||
# ignored_channels: channels where bot never responds (even when mentioned)
|
||||
ic = discord_cfg.get("ignored_channels")
|
||||
if ic is not None and not os.getenv("DISCORD_IGNORED_CHANNELS"):
|
||||
if isinstance(ic, list):
|
||||
ic = ",".join(str(v) for v in ic)
|
||||
os.environ["DISCORD_IGNORED_CHANNELS"] = str(ic)
|
||||
# allowed_channels: if set, bot ONLY responds in these channels (whitelist)
|
||||
ac = discord_cfg.get("allowed_channels")
|
||||
if ac is not None and not os.getenv("DISCORD_ALLOWED_CHANNELS"):
|
||||
if isinstance(ac, list):
|
||||
ac = ",".join(str(v) for v in ac)
|
||||
os.environ["DISCORD_ALLOWED_CHANNELS"] = str(ac)
|
||||
# no_thread_channels: channels where bot responds directly without creating thread
|
||||
ntc = discord_cfg.get("no_thread_channels")
|
||||
if ntc is not None and not os.getenv("DISCORD_NO_THREAD_CHANNELS"):
|
||||
if isinstance(ntc, list):
|
||||
ntc = ",".join(str(v) for v in ntc)
|
||||
os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
|
||||
# history_backfill: recover missed channel messages for shared sessions
|
||||
# when require_mention is active. Fetches messages between bot turns
|
||||
# and prepends them to the user message for context.
|
||||
if "history_backfill" in discord_cfg and not os.getenv("DISCORD_HISTORY_BACKFILL"):
|
||||
os.environ["DISCORD_HISTORY_BACKFILL"] = str(discord_cfg["history_backfill"]).lower()
|
||||
hbl = discord_cfg.get("history_backfill_limit")
|
||||
if hbl is not None and not os.getenv("DISCORD_HISTORY_BACKFILL_LIMIT"):
|
||||
os.environ["DISCORD_HISTORY_BACKFILL_LIMIT"] = str(hbl)
|
||||
# allow_mentions: granular control over what the bot can ping.
|
||||
# Safe defaults (no @everyone/roles) are applied in the adapter;
|
||||
# these YAML keys only override when set and let users opt back
|
||||
# into unsafe modes (e.g. roles=true) if they actually want it.
|
||||
allow_mentions_cfg = discord_cfg.get("allow_mentions")
|
||||
if isinstance(allow_mentions_cfg, dict):
|
||||
for yaml_key, env_key in (
|
||||
("everyone", "DISCORD_ALLOW_MENTION_EVERYONE"),
|
||||
("roles", "DISCORD_ALLOW_MENTION_ROLES"),
|
||||
("users", "DISCORD_ALLOW_MENTION_USERS"),
|
||||
("replied_user", "DISCORD_ALLOW_MENTION_REPLIED_USER"),
|
||||
):
|
||||
if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
|
||||
os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()
|
||||
# reply_to_mode: top-level preferred, falls back to extra.reply_to_mode.
|
||||
# YAML 1.1 parses bare 'off' as boolean False — coerce to string "off".
|
||||
_discord_extra = discord_cfg.get("extra") if isinstance(discord_cfg.get("extra"), dict) else {}
|
||||
_discord_rtm = (
|
||||
discord_cfg["reply_to_mode"] if "reply_to_mode" in discord_cfg
|
||||
else _discord_extra.get("reply_to_mode")
|
||||
)
|
||||
if _discord_rtm is not None and not os.getenv("DISCORD_REPLY_TO_MODE"):
|
||||
_rtm_str = "off" if _discord_rtm is False else str(_discord_rtm).lower()
|
||||
os.environ["DISCORD_REPLY_TO_MODE"] = _rtm_str
|
||||
return None # all settings flow through env; nothing to merge into extras
|
||||
|
||||
|
||||
def _is_connected(config) -> bool:
|
||||
"""Discord is considered connected when DISCORD_BOT_TOKEN is set.
|
||||
|
||||
Looks up via ``hermes_cli.gateway.get_env_value`` at call time (not via
|
||||
the plugin's own bound import) so tests that patch ``gateway_mod.get_env_value``
|
||||
— including ``test_setup_openclaw_migration`` — can suppress ambient
|
||||
``DISCORD_BOT_TOKEN`` env vars. Matches what the legacy
|
||||
``_PLATFORMS["discord"]`` dispatch did before this migration.
|
||||
"""
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
return bool((gateway_mod.get_env_value("DISCORD_BOT_TOKEN") or "").strip())
|
||||
|
||||
|
||||
def _build_adapter(config):
|
||||
"""Factory wrapper that constructs DiscordAdapter from a PlatformConfig."""
|
||||
return DiscordAdapter(config)
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Plugin entry point — called by the Hermes plugin system."""
|
||||
ctx.register_platform(
|
||||
name="discord",
|
||||
label="Discord",
|
||||
adapter_factory=_build_adapter,
|
||||
check_fn=check_discord_requirements,
|
||||
is_connected=_is_connected,
|
||||
required_env=["DISCORD_BOT_TOKEN"],
|
||||
install_hint="pip install 'hermes-agent[discord]'",
|
||||
# Interactive setup wizard — replaces the central
|
||||
# hermes_cli/setup.py::_setup_discord function. Same shape as Teams.
|
||||
setup_fn=interactive_setup,
|
||||
# YAML→env config bridge — owns the translation of ``config.yaml``
|
||||
# ``discord:`` keys (require_mention, free_response_channels,
|
||||
# auto_thread, reactions, ignored_channels, allowed_channels,
|
||||
# no_thread_channels, allow_mentions.*, reply_to_mode,
|
||||
# thread_require_mention) into ``DISCORD_*`` env vars that the
|
||||
# adapter reads via ``os.getenv()``. Replaces the hardcoded block
|
||||
# that used to live in ``gateway/config.py``. Hook contract: #24836.
|
||||
apply_yaml_config_fn=_apply_yaml_config,
|
||||
# Auth env vars for _is_user_authorized() integration
|
||||
allowed_users_env="DISCORD_ALLOWED_USERS",
|
||||
allow_all_env="DISCORD_ALLOW_ALL_USERS",
|
||||
# Cron home-channel delivery
|
||||
cron_deliver_env_var="DISCORD_HOME_CHANNEL",
|
||||
# Out-of-process cron delivery via Discord REST API. Without this
|
||||
# hook, ``deliver=discord`` cron jobs fail with "No live adapter"
|
||||
# when cron runs separately from the gateway. Mirrors Teams pattern.
|
||||
standalone_sender_fn=_standalone_send,
|
||||
# Discord hard limit per message
|
||||
max_message_length=2000,
|
||||
# Display
|
||||
emoji="🎮",
|
||||
allow_update_command=True,
|
||||
)
|
||||
+10
-72
@@ -1514,10 +1514,8 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
connection_mode=str(
|
||||
extra.get("connection_mode") or os.getenv("FEISHU_CONNECTION_MODE", "websocket")
|
||||
).strip().lower(),
|
||||
encrypt_key=str(extra.get("encrypt_key") or os.getenv("FEISHU_ENCRYPT_KEY", "")).strip(),
|
||||
verification_token=str(
|
||||
extra.get("verification_token") or os.getenv("FEISHU_VERIFICATION_TOKEN", "")
|
||||
).strip(),
|
||||
encrypt_key=os.getenv("FEISHU_ENCRYPT_KEY", "").strip(),
|
||||
verification_token=os.getenv("FEISHU_VERIFICATION_TOKEN", "").strip(),
|
||||
group_policy=os.getenv("FEISHU_GROUP_POLICY", "allowlist").strip().lower(),
|
||||
allowed_group_users=frozenset(
|
||||
item.strip()
|
||||
@@ -1644,11 +1642,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
self._connection_mode,
|
||||
)
|
||||
return False
|
||||
if self._connection_mode == "webhook" and not (self._verification_token or self._encrypt_key):
|
||||
logger.error(
|
||||
"[Feishu] Webhook mode requires FEISHU_VERIFICATION_TOKEN or FEISHU_ENCRYPT_KEY."
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
self._app_lock_identity = self._app_id
|
||||
@@ -2570,44 +2563,13 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
if approval_id is None:
|
||||
logger.debug("[Feishu] Card action missing approval_id, ignoring")
|
||||
return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
|
||||
state = self._approval_state.get(approval_id)
|
||||
if not state:
|
||||
logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
|
||||
return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
|
||||
choice = _APPROVAL_CHOICE_MAP.get(action_value.get("hermes_action"), "deny")
|
||||
|
||||
operator = getattr(event, "operator", None)
|
||||
open_id = str(getattr(operator, "open_id", "") or "")
|
||||
sender_id = SimpleNamespace(open_id=open_id, user_id=str(getattr(operator, "user_id", "") or ""))
|
||||
if not self._allow_group_message(sender_id, state.get("chat_id", ""), is_bot=False):
|
||||
logger.warning("[Feishu] Unauthorized approval click by %s", open_id or "<unknown>")
|
||||
return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
|
||||
|
||||
callback_chat_id = str(getattr(getattr(event, "context", None), "open_chat_id", "") or "")
|
||||
expected_chat_id = str(state.get("chat_id", "") or "")
|
||||
if callback_chat_id and expected_chat_id and callback_chat_id != expected_chat_id:
|
||||
logger.warning(
|
||||
"[Feishu] Approval callback chat mismatch for %s (expected=%s, got=%s)",
|
||||
approval_id,
|
||||
expected_chat_id,
|
||||
callback_chat_id,
|
||||
)
|
||||
return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
|
||||
|
||||
user_name = self._get_cached_sender_name(open_id) or open_id
|
||||
|
||||
chat_context = getattr(event, "context", None)
|
||||
chat_id = str(getattr(chat_context, "open_chat_id", "") or "")
|
||||
if not self._submit_on_loop(
|
||||
loop,
|
||||
self._resolve_approval(
|
||||
approval_id=approval_id,
|
||||
choice=choice,
|
||||
user_name=user_name,
|
||||
open_id=open_id,
|
||||
chat_id=chat_id,
|
||||
),
|
||||
):
|
||||
if not self._submit_on_loop(loop, self._resolve_approval(approval_id, choice, user_name)):
|
||||
return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
|
||||
|
||||
if P2CardActionTriggerResponse is None:
|
||||
@@ -2655,33 +2617,11 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
response.card = card
|
||||
return response
|
||||
|
||||
async def _resolve_approval(
|
||||
self,
|
||||
approval_id: Any,
|
||||
choice: str,
|
||||
user_name: str,
|
||||
*,
|
||||
open_id: str = "",
|
||||
chat_id: str = "",
|
||||
) -> None:
|
||||
async def _resolve_approval(self, approval_id: Any, choice: str, user_name: str) -> None:
|
||||
"""Pop approval state and unblock the waiting agent thread."""
|
||||
state = self._approval_state.get(approval_id)
|
||||
if not state:
|
||||
logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
|
||||
return
|
||||
if not self._is_interactive_operator_authorized(open_id):
|
||||
logger.warning("[Feishu] Unauthorized approval click by %s for approval %s", open_id or "<unknown>", approval_id)
|
||||
return
|
||||
expected_chat_id = str(state.get("chat_id", "") or "")
|
||||
if expected_chat_id and chat_id and expected_chat_id != chat_id:
|
||||
logger.warning(
|
||||
"[Feishu] Approval %s chat mismatch (expected=%s, got=%s)",
|
||||
approval_id, expected_chat_id, chat_id,
|
||||
)
|
||||
return
|
||||
state = self._approval_state.pop(approval_id, None)
|
||||
if not state:
|
||||
logger.debug("[Feishu] Approval %s already resolved while validating callback", approval_id)
|
||||
logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
|
||||
return
|
||||
try:
|
||||
from tools.approval import resolve_gateway_approval
|
||||
@@ -3289,6 +3229,11 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
self._record_webhook_anomaly(remote_ip, "400")
|
||||
return web.json_response({"code": 400, "msg": "invalid json"}, status=400)
|
||||
|
||||
# URL verification challenge — respond before other checks so that Feishu's
|
||||
# subscription setup works even before encrypt_key is wired.
|
||||
if payload.get("type") == "url_verification":
|
||||
return web.json_response({"challenge": payload.get("challenge", "")})
|
||||
|
||||
# Verification token check — second layer of defence beyond signature (matches openclaw).
|
||||
if self._verification_token:
|
||||
header = payload.get("header") or {}
|
||||
@@ -3298,13 +3243,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
self._record_webhook_anomaly(remote_ip, "401-token")
|
||||
return web.Response(status=401, text="Invalid verification token")
|
||||
|
||||
# URL verification challenge — Feishu includes the verification token in
|
||||
# challenge requests. Validate the token (above) before reflecting the
|
||||
# challenge so an unauthenticated remote request cannot prove endpoint
|
||||
# control by getting attacker-supplied challenge data echoed back.
|
||||
if payload.get("type") == "url_verification":
|
||||
return web.json_response({"challenge": payload.get("challenge", "")})
|
||||
|
||||
# Timing-safe signature verification (only enforced when encrypt_key is set).
|
||||
if self._encrypt_key and not self._is_webhook_signature_valid(request.headers, body_bytes):
|
||||
logger.warning("[Feishu] Webhook rejected: invalid signature from %s", remote_ip)
|
||||
|
||||
@@ -138,8 +138,7 @@ _OUTBOUND_MENTION_RE = re.compile(
|
||||
)
|
||||
|
||||
_E2EE_INSTALL_HINT = (
|
||||
"Install with: pip install 'mautrix[encryption]' asyncpg aiosqlite "
|
||||
"(requires libolm C library)"
|
||||
"Install with: pip install 'mautrix[encryption]' (requires libolm C library)"
|
||||
)
|
||||
|
||||
_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
|
||||
@@ -215,22 +214,9 @@ def _create_matrix_session(proxy_url: str | None):
|
||||
|
||||
|
||||
def _check_e2ee_deps() -> bool:
|
||||
"""Return True if mautrix E2EE dependencies are available.
|
||||
|
||||
Verifies python-olm (via mautrix.crypto.OlmMachine), the SQLite crypto
|
||||
store backend (mautrix.crypto.store.asyncpg.PgCryptoStore — yes, the
|
||||
PgCryptoStore class also drives the sqlite backend in mautrix 0.21),
|
||||
and the database drivers actually used at connect time (``asyncpg`` for
|
||||
the underlying upgrade_table machinery, ``aiosqlite`` for the
|
||||
``sqlite:///`` URL we pass to ``Database.create``). Without all four,
|
||||
encrypted rooms fail at connect time with a confusing
|
||||
``No module named 'asyncpg'`` (#31116).
|
||||
"""
|
||||
"""Return True if mautrix E2EE dependencies (python-olm) are available."""
|
||||
try:
|
||||
from mautrix.crypto import OlmMachine # noqa: F401
|
||||
from mautrix.crypto.store.asyncpg import PgCryptoStore # noqa: F401
|
||||
import asyncpg # noqa: F401
|
||||
import aiosqlite # noqa: F401
|
||||
|
||||
return True
|
||||
except (ImportError, AttributeError):
|
||||
@@ -240,13 +226,8 @@ def _check_e2ee_deps() -> bool:
|
||||
def check_matrix_requirements() -> bool:
|
||||
"""Return True if the Matrix adapter can be used.
|
||||
|
||||
Lazy-installs the full ``platform.matrix`` feature group via
|
||||
``tools.lazy_deps.ensure_and_bind`` whenever any of the declared
|
||||
packages (mautrix, Markdown, aiosqlite, asyncpg, aiohttp-socks) is
|
||||
missing — not just mautrix itself. Previously this short-circuited on
|
||||
``import mautrix``, which left the other four packages uninstalled
|
||||
forever and broke E2EE connect with ``No module named 'asyncpg'``
|
||||
(#31116). Rebinds module-level type globals on success.
|
||||
Lazy-installs mautrix via ``tools.lazy_deps.ensure("platform.matrix")``
|
||||
on first call if not present. Rebinds all module-level type globals on success.
|
||||
"""
|
||||
token = os.getenv("MATRIX_ACCESS_TOKEN", "")
|
||||
password = os.getenv("MATRIX_PASSWORD", "")
|
||||
@@ -258,20 +239,9 @@ def check_matrix_requirements() -> bool:
|
||||
if not homeserver:
|
||||
logger.warning("Matrix: MATRIX_HOMESERVER not set")
|
||||
return False
|
||||
|
||||
# Check whether any package in the platform.matrix feature group is
|
||||
# missing. ``feature_missing`` is cheap (per-spec importlib.metadata
|
||||
# lookups) and correctly handles ``mautrix[encryption]`` by stripping
|
||||
# the extras marker before checking the bare package.
|
||||
try:
|
||||
from tools.lazy_deps import feature_missing, ensure_and_bind
|
||||
missing = feature_missing("platform.matrix")
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("Matrix: lazy_deps lookup failed: %s", exc)
|
||||
missing = ()
|
||||
ensure_and_bind = None # type: ignore[assignment]
|
||||
|
||||
if missing or ensure_and_bind is None:
|
||||
import mautrix # noqa: F401
|
||||
except ImportError:
|
||||
def _import():
|
||||
from mautrix.types import (
|
||||
ContentURI, EventID, EventType, PaginationDirection,
|
||||
@@ -291,14 +261,10 @@ def check_matrix_requirements() -> bool:
|
||||
"UserID": UserID,
|
||||
}
|
||||
|
||||
if ensure_and_bind is None:
|
||||
return False
|
||||
from tools.lazy_deps import ensure_and_bind
|
||||
if not ensure_and_bind("platform.matrix", _import, globals(), prompt=False):
|
||||
logger.warning(
|
||||
"Matrix: required packages not installed (%s). "
|
||||
"Run: pip install 'mautrix[encryption]' asyncpg aiosqlite "
|
||||
"Markdown aiohttp-socks",
|
||||
", ".join(missing) if missing else "platform.matrix",
|
||||
"Matrix: mautrix not installed. Run: pip install 'mautrix[encryption]'"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
@@ -133,12 +133,6 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
|
||||
self._notification_scheduler = scheduler
|
||||
|
||||
async def connect(self) -> bool:
|
||||
if self._client_state is None:
|
||||
logger.error(
|
||||
"[msgraph_webhook] Refusing to start without extra.client_state configured"
|
||||
)
|
||||
return False
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_get(self._health_path, self._handle_health)
|
||||
app.router.add_get(self._webhook_path, self._handle_validation)
|
||||
@@ -316,7 +310,7 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
expected = self._client_state
|
||||
if expected is None:
|
||||
return False
|
||||
return True
|
||||
provided = self._string_or_none(notification.get("clientState"))
|
||||
if provided is None:
|
||||
return False
|
||||
|
||||
@@ -534,30 +534,9 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self._mark_transport_disconnected()
|
||||
self._fail_pending("Connection closed")
|
||||
|
||||
# Stop reconnecting for fatal codes (unrecoverable errors)
|
||||
if code in {
|
||||
4001, # Invalid opcode
|
||||
4002, # Invalid payload
|
||||
4010, # Invalid shard
|
||||
4011, # Sharding required
|
||||
4012, # Invalid API version
|
||||
4013, # Invalid intent
|
||||
4014, # Intent not authorized
|
||||
4914, # Offline/sandbox-only
|
||||
4915, # Banned
|
||||
}:
|
||||
fatal_descriptions = {
|
||||
4001: "invalid opcode",
|
||||
4002: "invalid payload",
|
||||
4010: "invalid shard",
|
||||
4011: "sharding required",
|
||||
4012: "invalid API version",
|
||||
4013: "invalid intent",
|
||||
4014: "intent not authorized",
|
||||
4914: "offline/sandbox-only",
|
||||
4915: "banned",
|
||||
}
|
||||
desc = fatal_descriptions.get(code, f"fatal error (code={code})")
|
||||
# Stop reconnecting for fatal codes
|
||||
if code in {4914, 4915}:
|
||||
desc = "offline/sandbox-only" if code == 4914 else "banned"
|
||||
logger.error(
|
||||
"[%s] Bot is %s. Check QQ Open Platform.", self._log_tag, desc
|
||||
)
|
||||
@@ -594,11 +573,10 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self._token_expires_at = 0.0
|
||||
|
||||
# Session invalid → clear session, will re-identify on next Hello
|
||||
# Note: 4009 (connection timeout) is NOT included here — it is
|
||||
# resumable per the QQ protocol and should preserve session state.
|
||||
if code in {
|
||||
4006,
|
||||
4007,
|
||||
4009,
|
||||
4900,
|
||||
4901,
|
||||
4902,
|
||||
@@ -727,8 +705,9 @@ class QQAdapter(BasePlatformAdapter):
|
||||
"token": f"QQBot {token}",
|
||||
"intents": (1 << 25)
|
||||
| (1 << 30)
|
||||
| (1 << 12)
|
||||
| (1 << 26), # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE + INTERACTION
|
||||
| (
|
||||
1 << 12
|
||||
), # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE
|
||||
"shard": [0, 1],
|
||||
"properties": {
|
||||
"$os": "macOS",
|
||||
@@ -847,32 +826,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
if op == 11:
|
||||
return
|
||||
|
||||
# op 7 = Server Reconnect — server asks client to reconnect (e.g.
|
||||
# load-balancing, maintenance). Close the WS so _read_events raises
|
||||
# and the outer loop triggers a reconnect with Resume.
|
||||
if op == 7:
|
||||
logger.info("[%s] Server requested reconnect (op 7)", self._log_tag)
|
||||
if self._ws and not self._ws.closed:
|
||||
self._create_task(self._ws.close())
|
||||
return
|
||||
|
||||
# op 9 = Invalid Session — d=True means session is resumable,
|
||||
# d=False means we must re-identify from scratch.
|
||||
if op == 9:
|
||||
resumable = bool(d) if d is not None else False
|
||||
if not resumable:
|
||||
logger.info(
|
||||
"[%s] Invalid session (op 9, not resumable), clearing session",
|
||||
self._log_tag,
|
||||
)
|
||||
self._session_id = None
|
||||
self._last_seq = None
|
||||
else:
|
||||
logger.info("[%s] Invalid session (op 9, resumable)", self._log_tag)
|
||||
if self._ws and not self._ws.closed:
|
||||
self._create_task(self._ws.close())
|
||||
return
|
||||
|
||||
logger.debug("[%s] Unknown op: %s", self._log_tag, op)
|
||||
|
||||
def _handle_ready(self, d: Any) -> None:
|
||||
@@ -1054,46 +1007,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
"deny": "deny",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _parse_gateway_session_key(session_key: str) -> Optional[Dict[str, str]]:
|
||||
"""Parse ``agent:main:<platform>:<chat_type>:<chat_id>[:<user_id>]``."""
|
||||
parts = str(session_key or "").split(":")
|
||||
if len(parts) < 5 or parts[0] != "agent" or parts[1] != "main":
|
||||
return None
|
||||
parsed = {
|
||||
"platform": parts[2],
|
||||
"chat_type": parts[3],
|
||||
"chat_id": parts[4],
|
||||
}
|
||||
if len(parts) > 5:
|
||||
parsed["user_id"] = parts[5]
|
||||
return parsed
|
||||
|
||||
def _is_authorized_interaction_for_session(
|
||||
self,
|
||||
event: InteractionEvent,
|
||||
session_key: str,
|
||||
) -> bool:
|
||||
"""Authorize approval/update interactions against session + operator."""
|
||||
parsed = self._parse_gateway_session_key(session_key)
|
||||
operator = str(event.operator_openid or "").strip()
|
||||
if not parsed or parsed.get("platform") != "qqbot" or not operator:
|
||||
return False
|
||||
|
||||
chat_type = parsed.get("chat_type", "")
|
||||
chat_id = parsed.get("chat_id", "")
|
||||
if chat_type == "c2c":
|
||||
return bool(chat_id) and operator == chat_id
|
||||
|
||||
if chat_type in {"group", "guild"}:
|
||||
event_chat = str(event.group_openid or event.guild_id or "").strip()
|
||||
if not event_chat or event_chat != chat_id:
|
||||
return False
|
||||
session_user = str(parsed.get("user_id", "")).strip()
|
||||
return bool(session_user) and operator == session_user
|
||||
|
||||
return False
|
||||
|
||||
async def _default_interaction_dispatch(
|
||||
self,
|
||||
event: InteractionEvent,
|
||||
@@ -1127,13 +1040,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self._log_tag, decision, session_key,
|
||||
)
|
||||
return
|
||||
if not self._is_authorized_interaction_for_session(event, session_key):
|
||||
logger.warning(
|
||||
"[%s] Rejected unauthorized approval click for session %s "
|
||||
"(operator=%s)",
|
||||
self._log_tag, session_key, event.operator_openid,
|
||||
)
|
||||
return
|
||||
try:
|
||||
# Import lazily to keep the adapter importable in tests that
|
||||
# don't exercise the approval subsystem.
|
||||
@@ -1154,13 +1060,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
|
||||
update_answer = parse_update_prompt_button_data(button_data)
|
||||
if update_answer is not None:
|
||||
update_session_key = f"agent:main:qqbot:{event.scene}:{event.group_openid or event.guild_id or event.user_openid}"
|
||||
if not self._is_authorized_interaction_for_session(event, update_session_key):
|
||||
logger.warning(
|
||||
"[%s] Rejected unauthorized update prompt click (operator=%s)",
|
||||
self._log_tag, event.operator_openid,
|
||||
)
|
||||
return
|
||||
self._write_update_response(update_answer, event.operator_openid)
|
||||
return
|
||||
|
||||
@@ -1708,7 +1607,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
elif ct.startswith("image/"):
|
||||
# Image: download and cache locally.
|
||||
try:
|
||||
cached_path = await self._download_and_cache(url, ct, filename)
|
||||
cached_path = await self._download_and_cache(url, ct)
|
||||
if cached_path and os.path.isfile(cached_path):
|
||||
image_urls.append(cached_path)
|
||||
image_media_types.append(ct or "image/jpeg")
|
||||
@@ -1721,15 +1620,11 @@ class QQAdapter(BasePlatformAdapter):
|
||||
except Exception as exc:
|
||||
logger.debug("[%s] Failed to cache image: %s", self._log_tag, exc)
|
||||
else:
|
||||
# Other attachments (video, file, etc.): download and record with path.
|
||||
# Other attachments (video, file, etc.): record as text.
|
||||
try:
|
||||
cached_path = await self._download_and_cache(url, ct, filename)
|
||||
cached_path = await self._download_and_cache(url, ct)
|
||||
if cached_path:
|
||||
name = filename or ct
|
||||
if ct.startswith("video/"):
|
||||
other_attachments.append(f"[video: {name} ({cached_path})]")
|
||||
else:
|
||||
other_attachments.append(f"[file: {name} ({cached_path})]")
|
||||
other_attachments.append(f"[Attachment: {filename or ct}]")
|
||||
except Exception as exc:
|
||||
logger.debug("[%s] Failed to cache attachment: %s", self._log_tag, exc)
|
||||
|
||||
@@ -1741,14 +1636,8 @@ class QQAdapter(BasePlatformAdapter):
|
||||
"attachment_info": attachment_info,
|
||||
}
|
||||
|
||||
async def _download_and_cache(
|
||||
self, url: str, content_type: str, original_name: str = "",
|
||||
) -> Optional[str]:
|
||||
"""Download a URL and cache it locally.
|
||||
|
||||
:param original_name: Preferred filename from attachment metadata.
|
||||
Falls back to the URL path basename if empty.
|
||||
"""
|
||||
async def _download_and_cache(self, url: str, content_type: str) -> Optional[str]:
|
||||
"""Download a URL and cache it locally."""
|
||||
from tools.url_safety import is_safe_url
|
||||
|
||||
if not is_safe_url(url):
|
||||
@@ -1779,11 +1668,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
# Convert to .wav using ffmpeg so STT engines can process it.
|
||||
return await self._convert_audio_to_wav(data, url)
|
||||
else:
|
||||
filename = (
|
||||
original_name
|
||||
or Path(urlparse(url).path).name
|
||||
or "qq_attachment"
|
||||
)
|
||||
filename = Path(urlparse(url).path).name or "qq_attachment"
|
||||
return cache_document_from_bytes(data, filename)
|
||||
|
||||
@staticmethod
|
||||
@@ -1996,7 +1881,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
@staticmethod
|
||||
def _guess_ext_from_data(data: bytes) -> str:
|
||||
"""Guess file extension from magic bytes."""
|
||||
if data[:9] == b"#!SILK_V3" or data[:6] == b"#!SILK":
|
||||
if data[:9] == b"#!SILK_V3" or data[:5] == b"#!SILK":
|
||||
return ".silk"
|
||||
if data[:2] == b"\x02!":
|
||||
return ".silk"
|
||||
@@ -2016,7 +1901,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
@staticmethod
|
||||
def _looks_like_silk(data: bytes) -> bool:
|
||||
"""Check if bytes look like a SILK audio file."""
|
||||
return data[:6] == b"#!SILK" or data[:2] == b"\x02!" or data[:9] == b"#!SILK_V3"
|
||||
return data[:4] == b"#!SILK" or data[:2] == b"\x02!" or data[:9] == b"#!SILK_V3"
|
||||
|
||||
async def _convert_silk_to_wav(self, src_path: str, wav_path: str) -> Optional[str]:
|
||||
"""Convert audio file to WAV using the pilk library.
|
||||
|
||||
@@ -429,13 +429,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self._polling_conflict_count: int = 0
|
||||
self._polling_network_error_count: int = 0
|
||||
self._polling_error_callback_ref = None
|
||||
# After sustained reconnect storms the PTB httpx pool can return
|
||||
# SendResult(success=True) for sends that never actually transmit.
|
||||
# _handle_polling_network_error sets this; _verify_polling_after_reconnect
|
||||
# clears it once getMe() confirms the Bot client is healthy.
|
||||
# While True, send() short-circuits to a failure so callers
|
||||
# (cron live-adapter branch) fall through to standalone delivery.
|
||||
self._send_path_degraded: bool = False
|
||||
# DM Topics: map of topic_name -> message_thread_id (populated at startup)
|
||||
self._dm_topics: Dict[str, int] = {}
|
||||
# Track forum chats where we've already registered bot commands
|
||||
@@ -475,10 +468,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
# "all" — every message triggers a push notification (legacy
|
||||
# behavior; opt-in via display.platforms.telegram.notifications).
|
||||
self._notifications_mode: str = "important"
|
||||
# send_or_update_status() bookkeeping: {(chat_id, status_key) -> bot message_id}
|
||||
# Tracks status bubbles owned by this adapter so subsequent calls with the
|
||||
# same key edit the same message instead of appending new ones (#30045).
|
||||
self._status_message_ids: Dict[tuple, str] = {}
|
||||
|
||||
def _notification_kwargs(
|
||||
self, metadata: Optional[Dict[str, Any]]
|
||||
@@ -881,7 +870,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
MAX_DELAY = 60
|
||||
|
||||
self._polling_network_error_count += 1
|
||||
self._send_path_degraded = True
|
||||
attempt = self._polling_network_error_count
|
||||
|
||||
if attempt > MAX_NETWORK_RETRIES:
|
||||
@@ -979,7 +967,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(self._app.bot.get_me(), PROBE_TIMEOUT)
|
||||
self._send_path_degraded = False
|
||||
except Exception as probe_err:
|
||||
logger.warning(
|
||||
"[%s] Polling heartbeat probe failed %ds after reconnect: %s",
|
||||
@@ -1692,11 +1679,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"""Send a message to a Telegram chat."""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
# getattr() — tests build adapters via object.__new__() (no __init__).
|
||||
if getattr(self, "_send_path_degraded", False):
|
||||
return SendResult(success=False, error="send_path_degraded", retryable=True)
|
||||
|
||||
|
||||
# Skip whitespace-only text to prevent Telegram 400 empty-text errors.
|
||||
if not content or not content.strip():
|
||||
return SendResult(success=True, message_id=None)
|
||||
@@ -1925,40 +1908,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
is_connect_timeout = self._looks_like_connect_timeout(e)
|
||||
return SendResult(success=False, error=str(e), retryable=(is_connect_timeout or not is_timeout))
|
||||
|
||||
async def send_or_update_status(
|
||||
self,
|
||||
chat_id: str,
|
||||
status_key: str,
|
||||
content: str,
|
||||
*,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a status message, or edit the previous one with the same key.
|
||||
|
||||
Issue #30045: progress/status callbacks (context-pressure, lifecycle,
|
||||
compression, etc.) used to append a fresh bubble on every call. With
|
||||
this method, the first call sends and the message id is remembered;
|
||||
subsequent calls with the same (chat_id, status_key) edit that same
|
||||
message in place. If the edit fails (message deleted, too old, etc.)
|
||||
we drop the cached id and send fresh.
|
||||
"""
|
||||
key = (str(chat_id), str(status_key))
|
||||
cached_id = self._status_message_ids.get(key)
|
||||
if cached_id is not None:
|
||||
result = await self.edit_message(
|
||||
chat_id, cached_id, content, finalize=True, metadata=metadata,
|
||||
)
|
||||
if result.success:
|
||||
if result.message_id:
|
||||
self._status_message_ids[key] = str(result.message_id)
|
||||
return result
|
||||
# Edit failed — clear the cached id and fall through to a fresh send.
|
||||
self._status_message_ids.pop(key, None)
|
||||
result = await self.send(chat_id, content, metadata=metadata)
|
||||
if result.success and result.message_id:
|
||||
self._status_message_ids[key] = str(result.message_id)
|
||||
return result
|
||||
|
||||
async def edit_message(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -4624,10 +4573,10 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return (
|
||||
"You are handling a Telegram group chat message.\n"
|
||||
f"- Your identity: user_id={bot_id}, @-mention name in this group=@{username}\n"
|
||||
"- observed Telegram group context may be provided in a separate context-only block "
|
||||
"before the current message; it is not necessarily addressed to you.\n"
|
||||
"- Lines in history prefixed with `[nickname|user_id]` are observed Telegram group context "
|
||||
"and are not necessarily addressed to you.\n"
|
||||
"- Treat only the current new message as a request explicitly directed at you, "
|
||||
"and use observed context only when the current message asks for it."
|
||||
"and answer it directly."
|
||||
)
|
||||
|
||||
def _apply_telegram_group_observe_attribution(self, event: MessageEvent) -> MessageEvent:
|
||||
@@ -4644,12 +4593,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
shared_source = self._telegram_group_observe_shared_source(event.source)
|
||||
observe_prompt = self._telegram_group_observe_channel_prompt()
|
||||
channel_prompt = f"{event.channel_prompt}\n\n{observe_prompt}" if event.channel_prompt else observe_prompt
|
||||
if event.message_type == MessageType.COMMAND:
|
||||
return dataclasses.replace(
|
||||
event,
|
||||
source=shared_source,
|
||||
channel_prompt=channel_prompt,
|
||||
)
|
||||
return dataclasses.replace(
|
||||
event,
|
||||
text=self._telegram_group_observe_attributed_text(event),
|
||||
|
||||
@@ -27,8 +27,6 @@ Security:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import binascii
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
@@ -310,37 +308,11 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
data = json.loads(subs_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
return
|
||||
# Merge: static routes take precedence over dynamic ones.
|
||||
# Reject any dynamic route whose effective secret is empty —
|
||||
# an empty secret would cause _handle_webhook to skip HMAC
|
||||
# validation entirely, letting unauthenticated callers in.
|
||||
new_dynamic: Dict[str, dict] = {}
|
||||
for k, v in data.items():
|
||||
if k in self._static_routes:
|
||||
continue
|
||||
effective_secret = v.get("secret", self._global_secret)
|
||||
if not effective_secret:
|
||||
logger.warning(
|
||||
"[webhook] Dynamic route '%s' skipped: 'secret' is "
|
||||
"missing or empty. Set a valid HMAC secret, or use "
|
||||
"'%s' to explicitly disable auth (testing only).",
|
||||
k,
|
||||
_INSECURE_NO_AUTH,
|
||||
)
|
||||
continue
|
||||
if (
|
||||
effective_secret == _INSECURE_NO_AUTH
|
||||
and not _is_loopback_host(self._host)
|
||||
):
|
||||
logger.warning(
|
||||
"[webhook] Dynamic route '%s' skipped: INSECURE_NO_AUTH "
|
||||
"is only allowed on loopback hosts. Current host: '%s'.",
|
||||
k,
|
||||
self._host,
|
||||
)
|
||||
continue
|
||||
new_dynamic[k] = v
|
||||
self._dynamic_routes = new_dynamic
|
||||
# Merge: static routes take precedence over dynamic ones
|
||||
self._dynamic_routes = {
|
||||
k: v for k, v in data.items()
|
||||
if k not in self._static_routes
|
||||
}
|
||||
self._routes = {**self._dynamic_routes, **self._static_routes}
|
||||
self._dynamic_routes_mtime = mtime
|
||||
logger.info(
|
||||
@@ -379,21 +351,9 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
logger.error("[webhook] Failed to read body: %s", e)
|
||||
return web.json_response({"error": "Bad request"}, status=400)
|
||||
|
||||
# Validate HMAC signature FIRST (skip only for the explicit local-test
|
||||
# INSECURE_NO_AUTH mode). Missing/empty secrets must fail closed here,
|
||||
# not only during connect(), so direct handler reuse cannot turn a
|
||||
# network webhook route into an unauthenticated agent-dispatch surface.
|
||||
# Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode)
|
||||
secret = route_config.get("secret", self._global_secret)
|
||||
if not secret:
|
||||
logger.error(
|
||||
"[webhook] Route %s has no HMAC secret; refusing request",
|
||||
route_name,
|
||||
)
|
||||
return web.json_response(
|
||||
{"error": "Webhook route is missing an HMAC secret"},
|
||||
status=403,
|
||||
)
|
||||
if secret != _INSECURE_NO_AUTH:
|
||||
if secret and secret != _INSECURE_NO_AUTH:
|
||||
if not self._validate_signature(request, raw_body, secret):
|
||||
logger.warning(
|
||||
"[webhook] Invalid signature for route %s", route_name
|
||||
@@ -433,7 +393,6 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
request.headers.get("X-GitHub-Event", "")
|
||||
or request.headers.get("X-GitLab-Event", "")
|
||||
or payload.get("event_type", "")
|
||||
or payload.get("type", "")
|
||||
or "unknown"
|
||||
)
|
||||
allowed_events = route_config.get("events", [])
|
||||
@@ -486,10 +445,7 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
# Build a unique delivery ID
|
||||
delivery_id = request.headers.get(
|
||||
"X-GitHub-Delivery",
|
||||
request.headers.get(
|
||||
"svix-id",
|
||||
request.headers.get("X-Request-ID", str(int(time.time() * 1000))),
|
||||
),
|
||||
request.headers.get("X-Request-ID", str(int(time.time() * 1000))),
|
||||
)
|
||||
|
||||
# ── Idempotency ─────────────────────────────────────────
|
||||
@@ -634,32 +590,7 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
def _validate_signature(
|
||||
self, request: "web.Request", body: bytes, secret: str
|
||||
) -> bool:
|
||||
"""Validate webhook signature (GitHub, GitLab, Svix, generic HMAC-SHA256)."""
|
||||
def _header(name: str) -> str:
|
||||
return (
|
||||
request.headers.get(name, "")
|
||||
or request.headers.get(name.lower(), "")
|
||||
or request.headers.get(name.upper(), "")
|
||||
)
|
||||
|
||||
# Svix / AgentMail:
|
||||
# svix-id: msg_...
|
||||
# svix-timestamp: unix seconds
|
||||
# svix-signature: v1,<base64-hmac> [v1,<base64-hmac> ...]
|
||||
# Signed content is: "{id}.{timestamp}.{raw_body}". Svix secrets
|
||||
# usually start with "whsec_" and the remainder is base64-encoded.
|
||||
svix_id = _header("svix-id")
|
||||
svix_timestamp = _header("svix-timestamp")
|
||||
svix_signature = _header("svix-signature")
|
||||
if svix_id or svix_timestamp or svix_signature:
|
||||
return self._validate_svix_signature(
|
||||
body=body,
|
||||
secret=secret,
|
||||
msg_id=svix_id,
|
||||
timestamp=svix_timestamp,
|
||||
signature_header=svix_signature,
|
||||
)
|
||||
|
||||
"""Validate webhook signature (GitHub, GitLab, generic HMAC-SHA256)."""
|
||||
# GitHub: X-Hub-Signature-256 = sha256=<hex>
|
||||
gh_sig = request.headers.get("X-Hub-Signature-256", "")
|
||||
if gh_sig:
|
||||
@@ -687,56 +618,6 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return False
|
||||
|
||||
def _validate_svix_signature(
|
||||
self,
|
||||
body: bytes,
|
||||
secret: str,
|
||||
msg_id: str,
|
||||
timestamp: str,
|
||||
signature_header: str,
|
||||
tolerance_seconds: int = 300,
|
||||
) -> bool:
|
||||
"""Validate Svix-compatible signatures used by AgentMail webhooks."""
|
||||
if not (msg_id and timestamp and signature_header and secret):
|
||||
return False
|
||||
|
||||
try:
|
||||
ts = int(timestamp)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
if abs(int(time.time()) - ts) > tolerance_seconds:
|
||||
logger.warning("[webhook] Svix signature timestamp outside replay window")
|
||||
return False
|
||||
|
||||
if secret.startswith("whsec_"):
|
||||
encoded_secret = secret.removeprefix("whsec_")
|
||||
try:
|
||||
key = base64.b64decode(encoded_secret, validate=True)
|
||||
except (binascii.Error, ValueError):
|
||||
logger.debug("[webhook] Invalid whsec_ Svix signing secret")
|
||||
return False
|
||||
else:
|
||||
# Be permissive for providers that document Svix-style headers but
|
||||
# hand out raw shared secrets rather than whsec_ base64 secrets.
|
||||
logger.debug("[webhook] Validating Svix-style signature with raw secret")
|
||||
key = secret.encode()
|
||||
|
||||
signed_content = msg_id.encode() + b"." + timestamp.encode() + b"." + body
|
||||
expected = base64.b64encode(
|
||||
hmac.new(key, signed_content, hashlib.sha256).digest()
|
||||
).decode()
|
||||
|
||||
# Svix can send multiple signatures separated by spaces during secret
|
||||
# rotation. Each entry is formatted as "vN,<base64>".
|
||||
for part in signature_header.split():
|
||||
try:
|
||||
version, signature = part.split(",", 1)
|
||||
except ValueError:
|
||||
continue
|
||||
if version == "v1" and hmac.compare_digest(signature, expected):
|
||||
return True
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Prompt rendering
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -616,18 +616,6 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
# Guard against the cancel-delivery race: when the sleep timer
|
||||
# fires just before cancel() is called, CPython sets
|
||||
# Task._must_cancel but cannot cancel the already-done sleep
|
||||
# future, so CancelledError is delivered at the *next* await
|
||||
# (handle_message) rather than here. By that point this task
|
||||
# has already popped the merged event, so the superseding task
|
||||
# sees an empty batch and silently drops the message.
|
||||
# This check is synchronous — no await between the sleep and
|
||||
# the pop — so no other coroutine can modify the task registry
|
||||
# in between.
|
||||
if self._pending_text_batch_tasks.get(key) is not current_task:
|
||||
return
|
||||
event = self._pending_text_batches.pop(key, None)
|
||||
if not event:
|
||||
return
|
||||
|
||||
@@ -187,6 +187,7 @@ class WecomCallbackAdapter(BasePlatformAdapter):
|
||||
app = self._resolve_app_for_chat(chat_id)
|
||||
touser = chat_id.split(":", 1)[1] if ":" in chat_id else chat_id
|
||||
try:
|
||||
token = await self._get_access_token(app)
|
||||
payload = {
|
||||
"touser": touser,
|
||||
"msgtype": "text",
|
||||
@@ -194,31 +195,18 @@ class WecomCallbackAdapter(BasePlatformAdapter):
|
||||
"text": {"content": content[:2048]},
|
||||
"safe": 0,
|
||||
}
|
||||
for _attempt in range(2):
|
||||
token = await self._get_access_token(app)
|
||||
resp = await self._http_client.post(
|
||||
f"https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token={token}",
|
||||
json=payload,
|
||||
)
|
||||
data = resp.json()
|
||||
errcode = data.get("errcode")
|
||||
if errcode in {40001, 42001} and _attempt == 0:
|
||||
# WeCom rejected the token — evict the cached entry so
|
||||
# the next _get_access_token call forces a fresh fetch.
|
||||
logger.warning(
|
||||
"[WecomCallback] Token rejected for app '%s' (errcode=%s), refreshing",
|
||||
app.get("name", "default"), errcode,
|
||||
)
|
||||
self._access_tokens.pop(app["name"], None)
|
||||
continue
|
||||
if errcode != 0:
|
||||
return SendResult(success=False, error=str(data))
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=str(data.get("msgid", "")),
|
||||
raw_response=data,
|
||||
)
|
||||
return SendResult(success=False, error="send failed after token refresh")
|
||||
resp = await self._http_client.post(
|
||||
f"https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token={token}",
|
||||
json=payload,
|
||||
)
|
||||
data = resp.json()
|
||||
if data.get("errcode") != 0:
|
||||
return SendResult(success=False, error=str(data))
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=str(data.get("msgid", "")),
|
||||
raw_response=data,
|
||||
)
|
||||
except Exception as exc:
|
||||
return SendResult(success=False, error=str(exc))
|
||||
|
||||
|
||||
@@ -1679,10 +1679,8 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
|
||||
# Extract MEDIA: tags and bare local file paths before text delivery.
|
||||
media_files, cleaned_content = self.extract_media(content)
|
||||
media_files = self.filter_media_delivery_paths(media_files)
|
||||
_, image_cleaned = self.extract_images(cleaned_content)
|
||||
local_files, final_content = self.extract_local_files(image_cleaned)
|
||||
local_files = self.filter_local_delivery_paths(local_files)
|
||||
|
||||
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
|
||||
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
|
||||
|
||||
+221
-446
@@ -54,7 +54,6 @@ from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
from agent.i18n import t
|
||||
from hermes_cli.config import cfg_get
|
||||
from hermes_cli.fallback_config import get_fallback_chain
|
||||
|
||||
# --- Agent cache tuning ---------------------------------------------------
|
||||
# Bounds the per-session AIAgent cache to prevent unbounded growth in
|
||||
@@ -139,85 +138,6 @@ def _gateway_platform_value(platform: Any) -> str:
|
||||
return str(getattr(platform, "value", platform) or "").strip().lower()
|
||||
|
||||
|
||||
def _is_transient_network_error(exc: BaseException) -> bool:
|
||||
"""Return True for transient network errors safe to log + swallow.
|
||||
|
||||
The crash class targeted by #31066 / #31110: an unhandled Telegram
|
||||
``TimedOut`` (or peer ``NetworkError`` / ``httpx`` connection error)
|
||||
propagating to the event loop and killing the entire gateway
|
||||
process. These are by definition transient — the next poll cycle or
|
||||
user action recovers — so they must never crash the process.
|
||||
|
||||
Walk the exception cause chain so wrapped errors (e.g. PTB's
|
||||
``NetworkError`` wrapping ``httpx.ConnectError``) are still
|
||||
classified. The chain is bounded to avoid pathological cycles.
|
||||
"""
|
||||
seen: set[int] = set()
|
||||
cur: Optional[BaseException] = exc
|
||||
depth = 0
|
||||
transient_class_names = {
|
||||
"TimedOut",
|
||||
"NetworkError",
|
||||
"ReadError",
|
||||
"WriteError",
|
||||
"ConnectError",
|
||||
"ConnectTimeout",
|
||||
"ReadTimeout",
|
||||
"WriteTimeout",
|
||||
"PoolTimeout",
|
||||
"RemoteProtocolError",
|
||||
"ServerDisconnectedError",
|
||||
"ClientConnectorError",
|
||||
"ClientOSError",
|
||||
}
|
||||
while cur is not None and depth < 12:
|
||||
ident = id(cur)
|
||||
if ident in seen:
|
||||
break
|
||||
seen.add(ident)
|
||||
depth += 1
|
||||
name = type(cur).__name__
|
||||
if name in transient_class_names:
|
||||
return True
|
||||
cur = cur.__cause__ or cur.__context__
|
||||
return False
|
||||
|
||||
|
||||
def _gateway_loop_exception_handler(
|
||||
loop: "asyncio.AbstractEventLoop", context: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Loop-level safety net for transient network errors.
|
||||
|
||||
Installed once during :func:`start_gateway`. Catches the
|
||||
``telegram.error.TimedOut`` crash class (issues #31066 / #31110)
|
||||
and any peer transient network error before it can kill the
|
||||
gateway process. Logs at WARNING with full traceback so the
|
||||
originating call site stays diagnosable; non-transient errors
|
||||
are forwarded to the default loop handler so real bugs still
|
||||
surface.
|
||||
"""
|
||||
exc = context.get("exception")
|
||||
if exc is not None and _is_transient_network_error(exc):
|
||||
message = context.get("message") or "transient network error"
|
||||
task = context.get("future") or context.get("task")
|
||||
task_name = ""
|
||||
if task is not None:
|
||||
try:
|
||||
task_name = task.get_name() if hasattr(task, "get_name") else repr(task)
|
||||
except Exception:
|
||||
task_name = repr(task)
|
||||
logger.warning(
|
||||
"Gateway swallowed transient network error from %s: %s: %s",
|
||||
task_name or "<unknown task>",
|
||||
type(exc).__name__,
|
||||
exc,
|
||||
exc_info=(type(exc), exc, exc.__traceback__),
|
||||
)
|
||||
return
|
||||
# Fall back to the default handler for anything we don't recognise.
|
||||
loop.default_exception_handler(context)
|
||||
|
||||
|
||||
def _redact_gateway_user_facing_secrets(text: str) -> str:
|
||||
"""Best-effort secret redaction before text can leave the gateway."""
|
||||
redacted = str(text or "")
|
||||
@@ -318,19 +238,6 @@ def _prepare_gateway_status_message(platform: Any, event_type: str, message: str
|
||||
return text
|
||||
|
||||
|
||||
async def _send_or_update_status_coro(adapter, chat_id, status_key, content, metadata):
|
||||
"""Route a status message through adapter.send_or_update_status when supported.
|
||||
|
||||
Issue #30045: adapters that implement send_or_update_status (currently
|
||||
Telegram) edit the previous bubble for the same status_key instead of
|
||||
appending a new one. Adapters without the method fall back to plain send.
|
||||
"""
|
||||
sender = getattr(adapter, "send_or_update_status", None)
|
||||
if callable(sender):
|
||||
return await sender(chat_id, status_key, content, metadata=metadata)
|
||||
return await adapter.send(chat_id, content, metadata=metadata)
|
||||
|
||||
|
||||
def _telegramize_command_mentions(text: str, platform: Any) -> str:
|
||||
"""Rewrite slash-command mentions to Telegram-valid command names.
|
||||
|
||||
@@ -540,109 +447,6 @@ def _build_replay_entry(role: str, content: Any, msg: Dict[str, Any]) -> Dict[st
|
||||
return entry
|
||||
|
||||
|
||||
_TELEGRAM_OBSERVED_CONTEXT_PROMPT_MARKER = "observed Telegram group context"
|
||||
_OBSERVED_GROUP_CONTEXT_HEADER = "[Observed Telegram group context - context only, not requests]"
|
||||
_CURRENT_ADDRESSED_MESSAGE_HEADER = "[Current addressed message - answer only this unless it explicitly asks you to use the observed context]"
|
||||
|
||||
|
||||
def _uses_telegram_observed_group_context(channel_prompt: Optional[str]) -> bool:
|
||||
"""Return True for Telegram group turns that may include observed chatter.
|
||||
|
||||
Telegram's observe-unmentioned mode persists skipped group chatter so a
|
||||
later @mention can see it. Those rows must not replay as ordinary user
|
||||
turns: a weak wake word like ``@bot cambio`` should not make the model treat
|
||||
old unmentioned chatter as pending work. The Telegram adapter marks these
|
||||
turns with a channel prompt; this helper keeps the run-path check explicit
|
||||
and unit-testable.
|
||||
"""
|
||||
|
||||
return bool(channel_prompt and _TELEGRAM_OBSERVED_CONTEXT_PROMPT_MARKER in channel_prompt)
|
||||
|
||||
|
||||
def _build_gateway_agent_history(
|
||||
history: List[Dict[str, Any]],
|
||||
*,
|
||||
channel_prompt: Optional[str] = None,
|
||||
) -> tuple[List[Dict[str, Any]], Optional[str]]:
|
||||
"""Convert stored gateway transcript rows into agent replay messages.
|
||||
|
||||
Observed Telegram group rows are returned as API-only context for the
|
||||
current addressed message instead of being replayed as normal prior user
|
||||
turns. Keeping that context out of ``conversation_history`` avoids
|
||||
consecutive-user repair merging it with the live user turn and then hiding
|
||||
the current message behind ``history_offset`` during persistence.
|
||||
"""
|
||||
|
||||
agent_history: List[Dict[str, Any]] = []
|
||||
observed_group_context: List[str] = []
|
||||
separate_observed_context = _uses_telegram_observed_group_context(channel_prompt)
|
||||
|
||||
for msg in history or []:
|
||||
role = msg.get("role")
|
||||
if not role:
|
||||
continue
|
||||
|
||||
# Skip metadata entries (tool definitions, session info) -- these are
|
||||
# for transcript logging, not for the LLM.
|
||||
if role in {"session_meta",}:
|
||||
continue
|
||||
|
||||
# Skip system messages -- the agent rebuilds its own system prompt.
|
||||
if role == "system":
|
||||
continue
|
||||
|
||||
content = msg.get("content")
|
||||
if separate_observed_context and msg.get("observed") and role == "user" and content:
|
||||
observed_group_context.append(str(content).strip())
|
||||
continue
|
||||
|
||||
# Rich agent messages (tool_calls, tool results) must be passed through
|
||||
# intact so the API sees valid assistant→tool sequences.
|
||||
has_tool_calls = "tool_calls" in msg
|
||||
has_tool_call_id = "tool_call_id" in msg
|
||||
is_tool_message = role == "tool"
|
||||
|
||||
if has_tool_calls or has_tool_call_id or is_tool_message:
|
||||
clean_msg = {k: v for k, v in msg.items() if k not in {"timestamp", "observed"}}
|
||||
agent_history.append(clean_msg)
|
||||
elif content:
|
||||
# Simple text message - just need role and content.
|
||||
if msg.get("mirror"):
|
||||
mirror_src = msg.get("mirror_source", "another session")
|
||||
content = f"[Delivered from {mirror_src}] {content}"
|
||||
entry = _build_replay_entry(role, content, msg)
|
||||
agent_history.append(entry)
|
||||
|
||||
observed_context = "\n".join(observed_group_context).strip() or None
|
||||
return agent_history, observed_context
|
||||
|
||||
|
||||
def _wrap_current_message_with_observed_context(message: Any, observed_context: Optional[str]) -> Any:
|
||||
"""Prepend observed Telegram context to the API-only current user turn."""
|
||||
|
||||
if not observed_context:
|
||||
return message
|
||||
|
||||
prefix = (
|
||||
f"{_OBSERVED_GROUP_CONTEXT_HEADER}\n"
|
||||
f"{observed_context}\n\n"
|
||||
f"{_CURRENT_ADDRESSED_MESSAGE_HEADER}\n"
|
||||
)
|
||||
|
||||
if isinstance(message, str):
|
||||
return f"{prefix}{message}"
|
||||
|
||||
if isinstance(message, list):
|
||||
wrapped = [dict(part) if isinstance(part, dict) else part for part in message]
|
||||
for part in wrapped:
|
||||
if isinstance(part, dict) and part.get("type") == "text":
|
||||
part["text"] = f"{prefix}{part.get('text', '')}"
|
||||
return wrapped
|
||||
return [{"type": "text", "text": prefix.rstrip()}] + wrapped
|
||||
|
||||
return message
|
||||
|
||||
|
||||
def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
|
||||
"""Return the ``timestamp`` of the last usable transcript row, if any.
|
||||
|
||||
@@ -853,29 +657,31 @@ if _config_path.exists():
|
||||
os.environ[_env_var] = str(_val)
|
||||
# Compression config is read directly from config.yaml by run_agent.py
|
||||
# and auxiliary_client.py — no env var bridging needed.
|
||||
# Auxiliary model/direct-endpoint overrides (vision, web_extract,
|
||||
# approval, plus any plugin-registered auxiliary tasks).
|
||||
# Each task has provider/model/base_url/api_key; bridge non-default
|
||||
# values to env vars named AUXILIARY_<KEY_UPPER>_*. The legacy
|
||||
# hard-coded list (vision/web_extract/approval) is replaced by a
|
||||
# dynamic loop so plugin-registered tasks benefit from the same
|
||||
# config→env bridging without core knowing about each one.
|
||||
# Auxiliary model/direct-endpoint overrides (vision, web_extract).
|
||||
# Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
|
||||
_auxiliary_cfg = _cfg.get("auxiliary", {})
|
||||
if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
|
||||
# Built-in tasks that previously had explicit env-var bridging.
|
||||
# Kept here as the canonical bridged set; plugin tasks are added
|
||||
# below via the plugin auxiliary registry.
|
||||
_aux_bridged_keys = {"vision", "web_extract", "approval"}
|
||||
try:
|
||||
from hermes_cli.plugins import get_plugin_auxiliary_tasks
|
||||
for _entry in get_plugin_auxiliary_tasks():
|
||||
_aux_bridged_keys.add(_entry["key"])
|
||||
except Exception:
|
||||
# Plugin discovery failure must not break gateway startup;
|
||||
# built-in bridging stays intact.
|
||||
pass
|
||||
|
||||
for _task_key in _aux_bridged_keys:
|
||||
_aux_task_env = {
|
||||
"vision": {
|
||||
"provider": "AUXILIARY_VISION_PROVIDER",
|
||||
"model": "AUXILIARY_VISION_MODEL",
|
||||
"base_url": "AUXILIARY_VISION_BASE_URL",
|
||||
"api_key": "AUXILIARY_VISION_API_KEY",
|
||||
},
|
||||
"web_extract": {
|
||||
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
|
||||
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
|
||||
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
|
||||
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
|
||||
},
|
||||
"approval": {
|
||||
"provider": "AUXILIARY_APPROVAL_PROVIDER",
|
||||
"model": "AUXILIARY_APPROVAL_MODEL",
|
||||
"base_url": "AUXILIARY_APPROVAL_BASE_URL",
|
||||
"api_key": "AUXILIARY_APPROVAL_API_KEY",
|
||||
},
|
||||
}
|
||||
for _task_key, _env_map in _aux_task_env.items():
|
||||
_task_cfg = _auxiliary_cfg.get(_task_key, {})
|
||||
if not isinstance(_task_cfg, dict):
|
||||
continue
|
||||
@@ -883,15 +689,14 @@ if _config_path.exists():
|
||||
_model = str(_task_cfg.get("model", "")).strip()
|
||||
_base_url = str(_task_cfg.get("base_url", "")).strip()
|
||||
_api_key = str(_task_cfg.get("api_key", "")).strip()
|
||||
_upper = _task_key.upper()
|
||||
if _prov and _prov != "auto":
|
||||
os.environ[f"AUXILIARY_{_upper}_PROVIDER"] = _prov
|
||||
os.environ[_env_map["provider"]] = _prov
|
||||
if _model:
|
||||
os.environ[f"AUXILIARY_{_upper}_MODEL"] = _model
|
||||
os.environ[_env_map["model"]] = _model
|
||||
if _base_url:
|
||||
os.environ[f"AUXILIARY_{_upper}_BASE_URL"] = _base_url
|
||||
os.environ[_env_map["base_url"]] = _base_url
|
||||
if _api_key:
|
||||
os.environ[f"AUXILIARY_{_upper}_API_KEY"] = _api_key
|
||||
os.environ[_env_map["api_key"]] = _api_key
|
||||
# config.yaml is the documented, authoritative source for these
|
||||
# settings — it unconditionally wins over .env values. Previously
|
||||
# the guards below read `if X not in os.environ` and let stale
|
||||
@@ -918,8 +723,6 @@ if _config_path.exists():
|
||||
if _display_cfg and isinstance(_display_cfg, dict):
|
||||
if "busy_input_mode" in _display_cfg:
|
||||
os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
|
||||
if "busy_text_mode" in _display_cfg:
|
||||
os.environ["HERMES_GATEWAY_BUSY_TEXT_MODE"] = str(_display_cfg["busy_text_mode"])
|
||||
if "busy_ack_enabled" in _display_cfg:
|
||||
os.environ["HERMES_GATEWAY_BUSY_ACK_ENABLED"] = str(_display_cfg["busy_ack_enabled"])
|
||||
# Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
|
||||
@@ -1043,12 +846,6 @@ _AGENT_PENDING_SENTINEL = object()
|
||||
def _resolve_runtime_agent_kwargs() -> dict:
|
||||
"""Resolve provider credentials for gateway-created AIAgent instances.
|
||||
|
||||
Provider is read from ``config.yaml`` ``model.provider`` (the single
|
||||
source of truth). ``resolve_runtime_provider()`` falls through to env
|
||||
var lookups internally for legacy compatibility, but the gateway does
|
||||
not consult environment variables for behavioral config — config.yaml
|
||||
is authoritative.
|
||||
|
||||
If the primary provider fails with an authentication error, attempt to
|
||||
resolve credentials using the fallback provider chain from config.yaml
|
||||
before giving up.
|
||||
@@ -1060,7 +857,9 @@ def _resolve_runtime_agent_kwargs() -> dict:
|
||||
from hermes_cli.auth import AuthError
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider()
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
)
|
||||
except AuthError as auth_exc:
|
||||
# Primary provider auth failed (expired token, revoked key, etc.).
|
||||
# Try the fallback provider chain before raising.
|
||||
@@ -1093,22 +892,19 @@ def _try_resolve_fallback_provider() -> dict | None:
|
||||
return None
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
fb_list = get_fallback_chain(cfg)
|
||||
if not fb_list:
|
||||
fb = cfg.get("fallback_providers") or cfg.get("fallback_model")
|
||||
if not fb:
|
||||
return None
|
||||
# Normalize to list
|
||||
fb_list = fb if isinstance(fb, list) else [fb]
|
||||
for entry in fb_list:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
try:
|
||||
explicit_api_key = entry.get("api_key")
|
||||
if not explicit_api_key:
|
||||
key_env = str(
|
||||
entry.get("key_env") or entry.get("api_key_env") or ""
|
||||
).strip()
|
||||
if key_env:
|
||||
explicit_api_key = os.getenv(key_env, "").strip() or None
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=entry.get("provider"),
|
||||
explicit_base_url=entry.get("base_url"),
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_api_key=entry.get("api_key"),
|
||||
)
|
||||
logger.info(
|
||||
"Fallback provider resolved: %s model=%s",
|
||||
@@ -1313,7 +1109,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
normalized = command_name.lower().replace("_", "-")
|
||||
try:
|
||||
from tools.skills_tool import _get_disabled_skill_names
|
||||
from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
|
||||
from agent.skill_utils import get_all_skills_dirs
|
||||
disabled = _get_disabled_skill_names()
|
||||
|
||||
# Check disabled skills across all dirs (local + external)
|
||||
@@ -1321,7 +1117,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
if not skills_dir.exists():
|
||||
continue
|
||||
for skill_md in skills_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts):
|
||||
continue
|
||||
slug, declared_name = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug or not declared_name:
|
||||
@@ -1340,8 +1136,6 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
|
||||
if optional_dir.exists():
|
||||
for skill_md in optional_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(skill_md):
|
||||
continue
|
||||
slug, _declared = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug:
|
||||
continue
|
||||
@@ -1402,26 +1196,6 @@ def _load_gateway_config() -> dict:
|
||||
return {}
|
||||
|
||||
|
||||
def _load_gateway_runtime_config() -> dict:
|
||||
"""Load gateway config for runtime reads, expanding supported ``${VAR}`` refs.
|
||||
|
||||
Runtime helpers should honor the same env-template expansion documented for
|
||||
``config.yaml`` while still respecting tests that monkeypatch
|
||||
``gateway.run._hermes_home``. Build on ``_load_gateway_config()`` rather
|
||||
than calling the canonical loader directly so both behaviors stay aligned.
|
||||
|
||||
Expansion failures are intentionally NOT swallowed — silently returning
|
||||
the unexpanded dict would mask the very bug this helper exists to fix.
|
||||
"""
|
||||
cfg = _load_gateway_config()
|
||||
if not isinstance(cfg, dict) or not cfg:
|
||||
return {}
|
||||
from hermes_cli.config import _expand_env_vars
|
||||
|
||||
expanded = _expand_env_vars(cfg)
|
||||
return expanded if isinstance(expanded, dict) else {}
|
||||
|
||||
|
||||
def _resolve_gateway_model(config: dict | None = None) -> str:
|
||||
"""Read model from config.yaml — single source of truth.
|
||||
|
||||
@@ -1635,7 +1409,6 @@ class GatewayRunner:
|
||||
# blow up on attribute access.
|
||||
_running_agents_ts: Dict[str, float] = {}
|
||||
_busy_input_mode: str = "interrupt"
|
||||
_busy_text_mode: str = "interrupt"
|
||||
_restart_drain_timeout: float = DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
_exit_code: Optional[int] = None
|
||||
_draining: bool = False
|
||||
@@ -1662,7 +1435,6 @@ class GatewayRunner:
|
||||
self._service_tier = self._load_service_tier()
|
||||
self._show_reasoning = self._load_show_reasoning()
|
||||
self._busy_input_mode = self._load_busy_input_mode()
|
||||
self._busy_text_mode = self._load_busy_text_mode()
|
||||
self._restart_drain_timeout = self._load_restart_drain_timeout()
|
||||
self._provider_routing = self._load_provider_routing()
|
||||
self._fallback_model = self._load_fallback_model()
|
||||
@@ -2272,14 +2044,13 @@ class GatewayRunner:
|
||||
) -> Optional[str]:
|
||||
"""Pin DM-topic routing to the user's last-active topic.
|
||||
|
||||
Telegram can omit ``message_thread_id`` or surface General (``1``)
|
||||
for some topic-mode DM replies. In those lobby-shaped cases, keep the
|
||||
conversation attached to the user's most-recent bound topic.
|
||||
|
||||
Do not rewrite a non-lobby, previously-unbound thread id: a newly
|
||||
created Telegram DM topic is also "unknown" until the first inbound
|
||||
message is recorded, and rewriting it would send that brand-new topic's
|
||||
answer into an older lane. Returns None to leave the source alone.
|
||||
Telegram fragments topic-mode DMs two ways: a Reply on a message
|
||||
in another topic delivers ``message_thread_id`` for *that* topic,
|
||||
and ``_build_message_event`` strips the thread_id on plain replies
|
||||
(#3206 — needed for non-topic users). Both route the user to the
|
||||
wrong session. When topic mode is on, rewrite the thread_id to the
|
||||
user's most-recent binding if the inbound id is missing/General or
|
||||
not a known topic for this chat. Returns None to leave it alone.
|
||||
"""
|
||||
if (
|
||||
source.platform != Platform.TELEGRAM
|
||||
@@ -2289,14 +2060,6 @@ class GatewayRunner:
|
||||
or not self._telegram_topic_mode_enabled(source)
|
||||
):
|
||||
return None
|
||||
inbound = str(source.thread_id or "")
|
||||
is_lobby = not inbound or inbound in self._TELEGRAM_GENERAL_TOPIC_IDS
|
||||
if not is_lobby:
|
||||
# A non-lobby, unknown thread_id is most likely the first message in
|
||||
# a brand-new Telegram DM topic. Preserve it so it can be recorded
|
||||
# as a new independent lane below instead of hijacking the latest
|
||||
# existing topic binding.
|
||||
return None
|
||||
session_db = getattr(self, "_session_db", None)
|
||||
if session_db is None:
|
||||
return None
|
||||
@@ -2309,6 +2072,11 @@ class GatewayRunner:
|
||||
return None
|
||||
if not bindings:
|
||||
return None
|
||||
inbound = str(source.thread_id or "")
|
||||
is_lobby = not inbound or inbound in self._TELEGRAM_GENERAL_TOPIC_IDS
|
||||
known = {str(b.get("thread_id") or "") for b in bindings}
|
||||
if not is_lobby and inbound in known:
|
||||
return None
|
||||
user_id = str(source.user_id)
|
||||
for b in bindings: # newest-first
|
||||
if str(b.get("user_id") or "") == user_id:
|
||||
@@ -2762,8 +2530,15 @@ class GatewayRunner:
|
||||
"""
|
||||
file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
|
||||
if not file_path:
|
||||
cfg = _load_gateway_runtime_config()
|
||||
file_path = str(cfg.get("prefill_messages_file", "") or "")
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
file_path = cfg.get("prefill_messages_file", "")
|
||||
except Exception:
|
||||
pass
|
||||
if not file_path:
|
||||
return []
|
||||
path = Path(file_path).expanduser()
|
||||
@@ -2793,8 +2568,16 @@ class GatewayRunner:
|
||||
prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
|
||||
if prompt:
|
||||
return prompt
|
||||
cfg = _load_gateway_runtime_config()
|
||||
return str(cfg_get(cfg, "agent", "system_prompt", default="") or "").strip()
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return (cfg_get(cfg, "agent", "system_prompt", default="") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _load_reasoning_config() -> dict | None:
|
||||
@@ -2805,8 +2588,16 @@ class GatewayRunner:
|
||||
default (medium).
|
||||
"""
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
cfg = _load_gateway_runtime_config()
|
||||
effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
|
||||
effort = ""
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
result = parse_reasoning_effort(effort)
|
||||
if effort and effort.strip() and result is None:
|
||||
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
|
||||
@@ -2880,8 +2671,16 @@ class GatewayRunner:
|
||||
"fast"/"priority"/"on" => "priority", while "normal"/"off" disables it.
|
||||
Returns None when unset or unsupported.
|
||||
"""
|
||||
cfg = _load_gateway_runtime_config()
|
||||
raw = str(cfg_get(cfg, "agent", "service_tier", default="") or "").strip()
|
||||
raw = ""
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
raw = str(cfg_get(cfg, "agent", "service_tier", default="") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
value = raw.lower()
|
||||
if not value or value in {"normal", "default", "standard", "off", "none"}:
|
||||
@@ -2894,43 +2693,54 @@ class GatewayRunner:
|
||||
@staticmethod
|
||||
def _load_show_reasoning() -> bool:
|
||||
"""Load show_reasoning toggle from config.yaml display section."""
|
||||
cfg = _load_gateway_runtime_config()
|
||||
return is_truthy_value(
|
||||
cfg_get(cfg, "display", "show_reasoning"),
|
||||
default=False,
|
||||
)
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return is_truthy_value(
|
||||
cfg_get(cfg, "display", "show_reasoning"),
|
||||
default=False,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _load_busy_input_mode() -> str:
|
||||
"""Load gateway drain-time busy-input behavior from config/env."""
|
||||
mode = os.getenv("HERMES_GATEWAY_BUSY_INPUT_MODE", "").strip().lower()
|
||||
if not mode:
|
||||
cfg = _load_gateway_runtime_config()
|
||||
mode = str(cfg_get(cfg, "display", "busy_input_mode", default="") or "").strip().lower()
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
mode = str(cfg_get(cfg, "display", "busy_input_mode", default="") or "").strip().lower()
|
||||
except Exception:
|
||||
pass
|
||||
if mode == "queue":
|
||||
return "queue"
|
||||
if mode == "steer":
|
||||
return "steer"
|
||||
return "interrupt"
|
||||
|
||||
@staticmethod
|
||||
def _load_busy_text_mode() -> str:
|
||||
"""Load normal busy TEXT follow-up behavior from config/env."""
|
||||
mode = os.getenv("HERMES_GATEWAY_BUSY_TEXT_MODE", "").strip().lower()
|
||||
if not mode:
|
||||
cfg = _load_gateway_runtime_config()
|
||||
mode = str(cfg_get(cfg, "display", "busy_text_mode", default="") or "").strip().lower()
|
||||
if mode == "interrupt":
|
||||
return "interrupt"
|
||||
return "queue"
|
||||
|
||||
@staticmethod
|
||||
def _load_restart_drain_timeout() -> float:
|
||||
"""Load graceful gateway restart/stop drain timeout in seconds."""
|
||||
raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
|
||||
if not raw:
|
||||
cfg = _load_gateway_runtime_config()
|
||||
raw = str(cfg_get(cfg, "agent", "restart_drain_timeout", default="") or "").strip()
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
raw = str(cfg_get(cfg, "agent", "restart_drain_timeout", default="") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
value = parse_restart_drain_timeout(raw)
|
||||
if raw and value == DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT:
|
||||
try:
|
||||
@@ -2955,12 +2765,19 @@ class GatewayRunner:
|
||||
"""
|
||||
mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
|
||||
if not mode:
|
||||
cfg = _load_gateway_runtime_config()
|
||||
raw = cfg_get(cfg, "display", "background_process_notifications")
|
||||
if raw is False:
|
||||
mode = "off"
|
||||
elif raw not in {None, ""}:
|
||||
mode = str(raw)
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
raw = cfg_get(cfg, "display", "background_process_notifications")
|
||||
if raw is False:
|
||||
mode = "off"
|
||||
elif raw not in {None, ""}:
|
||||
mode = str(raw)
|
||||
except Exception:
|
||||
pass
|
||||
mode = (mode or "all").strip().lower()
|
||||
valid = {"all", "result", "error", "off"}
|
||||
if mode not in valid:
|
||||
@@ -2986,12 +2803,12 @@ class GatewayRunner:
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _load_fallback_model() -> list | None:
|
||||
def _load_fallback_model() -> list | dict | None:
|
||||
"""Load fallback provider chain from config.yaml.
|
||||
|
||||
Returns the merged effective chain from ``fallback_providers`` plus any
|
||||
legacy ``fallback_model`` entries. ``fallback_providers`` stays first
|
||||
when both keys are present.
|
||||
Returns a list of provider dicts (``fallback_providers``), a single
|
||||
dict (legacy ``fallback_model``), or None if not configured.
|
||||
AIAgent.__init__ normalizes both formats into a chain.
|
||||
"""
|
||||
try:
|
||||
import yaml as _y
|
||||
@@ -2999,7 +2816,7 @@ class GatewayRunner:
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
fb = get_fallback_chain(cfg)
|
||||
fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or None
|
||||
if fb:
|
||||
return fb
|
||||
except Exception:
|
||||
@@ -3071,19 +2888,11 @@ class GatewayRunner:
|
||||
|
||||
running_agent = self._running_agents.get(session_key)
|
||||
|
||||
effective_mode = self._busy_input_mode
|
||||
busy_text_mode = getattr(self, "_busy_text_mode", "queue")
|
||||
if (
|
||||
event.message_type == MessageType.TEXT
|
||||
and busy_text_mode == "queue"
|
||||
and effective_mode != "steer"
|
||||
):
|
||||
return False
|
||||
|
||||
# Steer mode: inject mid-run via running_agent.steer() instead of
|
||||
# queueing + interrupting. If the agent isn't running yet
|
||||
# (sentinel) or lacks steer(), or the payload is empty, fall back
|
||||
# to queue semantics so nothing is lost.
|
||||
effective_mode = self._busy_input_mode
|
||||
steered = False
|
||||
if effective_mode == "steer":
|
||||
steer_text = (event.text or "").strip()
|
||||
@@ -3108,12 +2917,7 @@ class GatewayRunner:
|
||||
# successful steer — the text already landed inside the run and
|
||||
# must NOT also be replayed as a next-turn user message.
|
||||
if not steered:
|
||||
merge_pending_message_event(
|
||||
adapter._pending_messages,
|
||||
session_key,
|
||||
event,
|
||||
merge_text=event.message_type == MessageType.TEXT,
|
||||
)
|
||||
merge_pending_message_event(adapter._pending_messages, session_key, event)
|
||||
|
||||
is_queue_mode = effective_mode == "queue"
|
||||
is_steer_mode = effective_mode == "steer"
|
||||
@@ -4045,7 +3849,6 @@ class GatewayRunner:
|
||||
adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
|
||||
adapter.set_session_store(self.session_store)
|
||||
adapter.set_busy_session_handler(self._handle_active_session_busy_message)
|
||||
adapter._busy_text_mode = self._busy_text_mode
|
||||
|
||||
# Try to connect
|
||||
logger.info("Connecting to %s...", platform.value)
|
||||
@@ -5150,11 +4953,6 @@ class GatewayRunner:
|
||||
if not candidates:
|
||||
return
|
||||
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
candidates = BasePlatformAdapter.filter_local_delivery_paths(candidates)
|
||||
if not candidates:
|
||||
return
|
||||
|
||||
_IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
|
||||
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
|
||||
|
||||
@@ -5658,7 +5456,6 @@ class GatewayRunner:
|
||||
adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
|
||||
adapter.set_session_store(self.session_store)
|
||||
adapter.set_busy_session_handler(self._handle_active_session_busy_message)
|
||||
adapter._busy_text_mode = self._busy_text_mode
|
||||
|
||||
success = await self._connect_adapter_with_timeout(adapter, platform)
|
||||
if success:
|
||||
@@ -6098,12 +5895,6 @@ class GatewayRunner:
|
||||
if platform_registry.is_registered(platform.value):
|
||||
adapter = platform_registry.create_adapter(platform.value, config)
|
||||
if adapter is not None:
|
||||
# Adapters that need a back-reference to the gateway runner
|
||||
# (e.g. for cross-platform admin alerts) declare a
|
||||
# ``gateway_runner`` attribute. Inject it after creation so
|
||||
# plugin adapters don't need a custom factory signature.
|
||||
if hasattr(adapter, "gateway_runner"):
|
||||
adapter.gateway_runner = self
|
||||
return adapter
|
||||
# Registered but failed to instantiate — don't silently fall
|
||||
# through to built-ins (there are none for plugin platforms).
|
||||
@@ -6146,6 +5937,15 @@ class GatewayRunner:
|
||||
adapter._notifications_mode = _notify_mode
|
||||
return adapter
|
||||
|
||||
elif platform == Platform.DISCORD:
|
||||
from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
|
||||
if not check_discord_requirements():
|
||||
logger.warning("Discord: discord.py not installed")
|
||||
return None
|
||||
adapter = DiscordAdapter(config)
|
||||
adapter.gateway_runner = self # For cross-platform admin alerts on unauthorized slash
|
||||
return adapter
|
||||
|
||||
elif platform == Platform.WHATSAPP:
|
||||
from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
|
||||
if not check_whatsapp_requirements():
|
||||
@@ -6412,6 +6212,18 @@ class GatewayRunner:
|
||||
if allow_bots_var and os.getenv(allow_bots_var, "none").lower().strip() in {"mentions", "all"}:
|
||||
return True
|
||||
|
||||
# Discord role-based access (DISCORD_ALLOWED_ROLES): the adapter's
|
||||
# on_message pre-filter already verified role membership — if the
|
||||
# message reached here, the user passed that check. Authorize
|
||||
# directly to avoid the "no allowlists configured" branch below
|
||||
# rejecting role-only setups where DISCORD_ALLOWED_USERS is empty
|
||||
# (issue #7871).
|
||||
if (
|
||||
source.platform == Platform.DISCORD
|
||||
and os.getenv("DISCORD_ALLOWED_ROLES", "").strip()
|
||||
):
|
||||
return True
|
||||
|
||||
# Check pairing store (always checked, regardless of allowlists)
|
||||
platform_name = source.platform.value if source.platform else ""
|
||||
if self.pairing_store.is_approved(platform_name, user_id):
|
||||
@@ -11350,16 +11162,14 @@ class GatewayRunner:
|
||||
# send_multiple_images (Telegram sendPhoto recompresses to ~1280px).
|
||||
force_document_attachments = "[[as_document]]" in response
|
||||
|
||||
from gateway.platforms.base import BasePlatformAdapter, should_send_media_as_audio
|
||||
|
||||
media_files, _ = adapter.extract_media(response)
|
||||
media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)
|
||||
_, cleaned = adapter.extract_images(response)
|
||||
local_files, _ = adapter.extract_local_files(cleaned)
|
||||
local_files = BasePlatformAdapter.filter_local_delivery_paths(local_files)
|
||||
|
||||
_thread_meta = self._thread_metadata_for_source(event.source, self._reply_anchor_for_event(event))
|
||||
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
|
||||
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
|
||||
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
|
||||
|
||||
@@ -11651,8 +11461,6 @@ class GatewayRunner:
|
||||
# Extract media files from the response
|
||||
if response:
|
||||
media_files, response = adapter.extract_media(response)
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)
|
||||
images, text_content = adapter.extract_images(response)
|
||||
|
||||
preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
|
||||
@@ -12741,7 +12549,7 @@ class GatewayRunner:
|
||||
return t("gateway.title.current_no_title", session_id=session_id)
|
||||
|
||||
async def _handle_resume_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /resume command — list or switch to a previous session."""
|
||||
"""Handle /resume command — switch to a previously-named session."""
|
||||
if not self._session_db:
|
||||
from hermes_state import format_session_db_unavailable
|
||||
return format_session_db_unavailable(prefix=t("gateway.shared.session_db_unavailable_prefix"))
|
||||
@@ -12750,44 +12558,30 @@ class GatewayRunner:
|
||||
session_key = self._session_key_for_source(source)
|
||||
name = event.get_command_args().strip()
|
||||
|
||||
def _list_titled_sessions() -> list[dict]:
|
||||
user_source = source.platform.value if source.platform else None
|
||||
sessions = self._session_db.list_sessions_rich(source=user_source, limit=10)
|
||||
return [s for s in sessions if s.get("title")][:10]
|
||||
|
||||
if not name:
|
||||
# List recent titled sessions for this user/platform
|
||||
try:
|
||||
titled = _list_titled_sessions()
|
||||
user_source = source.platform.value if source.platform else None
|
||||
sessions = self._session_db.list_sessions_rich(
|
||||
source=user_source, limit=10
|
||||
)
|
||||
titled = [s for s in sessions if s.get("title")]
|
||||
if not titled:
|
||||
return t("gateway.resume.no_named_sessions")
|
||||
lines = [t("gateway.resume.list_header")]
|
||||
for idx, s in enumerate(titled[:10], start=1):
|
||||
for s in titled[:10]:
|
||||
title = s["title"]
|
||||
preview = s.get("preview", "")[:40]
|
||||
preview_part = t("gateway.resume.list_preview_suffix", preview=preview) if preview else ""
|
||||
lines.append(t("gateway.resume.list_item_numbered", index=idx, title=title, preview_part=preview_part))
|
||||
lines.append(t("gateway.resume.list_footer_numbered"))
|
||||
lines.append(t("gateway.resume.list_item", title=title, preview_part=preview_part))
|
||||
lines.append(t("gateway.resume.list_footer"))
|
||||
return "\n".join(lines)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to list titled sessions: %s", e)
|
||||
return t("gateway.resume.list_failed", error=e)
|
||||
|
||||
# Resolve a numbered choice or a title to a session ID.
|
||||
if name.isdigit():
|
||||
try:
|
||||
titled = _list_titled_sessions()
|
||||
except Exception as e:
|
||||
logger.debug("Failed to list titled sessions for numeric resume: %s", e)
|
||||
return t("gateway.resume.list_failed", error=e)
|
||||
index = int(name)
|
||||
if index < 1 or index > len(titled):
|
||||
return t("gateway.resume.out_of_range", index=index)
|
||||
target = titled[index - 1]
|
||||
target_id = target.get("id")
|
||||
name = target.get("title") or name
|
||||
else:
|
||||
target_id = self._session_db.resolve_session_by_title(name)
|
||||
# Resolve the name to a session ID.
|
||||
target_id = self._session_db.resolve_session_by_title(name)
|
||||
if not target_id:
|
||||
return t("gateway.resume.not_found", name=name)
|
||||
# Compression creates child continuations that hold the live transcript.
|
||||
@@ -16269,7 +16063,11 @@ class GatewayRunner:
|
||||
)
|
||||
return
|
||||
_fut = safe_schedule_threadsafe(
|
||||
_send_or_update_status_coro(_status_adapter, _status_chat_id, event_type, prepared_message, _status_thread_metadata),
|
||||
_status_adapter.send(
|
||||
_status_chat_id,
|
||||
prepared_message,
|
||||
metadata=_status_thread_metadata,
|
||||
),
|
||||
_loop_for_step,
|
||||
logger=logger,
|
||||
log_message=f"status_callback ({event_type}) scheduling error",
|
||||
@@ -16670,16 +16468,45 @@ class GatewayRunner:
|
||||
# that may include tool_calls, tool_call_id, reasoning, etc.
|
||||
# - These must be passed through intact so the API sees valid
|
||||
# assistant→tool sequences (dropping tool_calls causes 500 errors)
|
||||
#
|
||||
# Telegram observed group context is handled structurally here:
|
||||
# observed=True transcript rows are withheld from replayable
|
||||
# history and attached to the current addressed message as
|
||||
# API-only context, so persisted history stores only the real
|
||||
# addressed user turn.
|
||||
agent_history, observed_group_context = _build_gateway_agent_history(
|
||||
history,
|
||||
channel_prompt=channel_prompt,
|
||||
)
|
||||
agent_history = []
|
||||
for msg in history:
|
||||
role = msg.get("role")
|
||||
if not role:
|
||||
continue
|
||||
|
||||
# Skip metadata entries (tool definitions, session info)
|
||||
# -- these are for transcript logging, not for the LLM
|
||||
if role in {"session_meta",}:
|
||||
continue
|
||||
|
||||
# Skip system messages -- the agent rebuilds its own system prompt
|
||||
if role == "system":
|
||||
continue
|
||||
|
||||
# Rich agent messages (tool_calls, tool results) must be passed
|
||||
# through intact so the API sees valid assistant→tool sequences
|
||||
has_tool_calls = "tool_calls" in msg
|
||||
has_tool_call_id = "tool_call_id" in msg
|
||||
is_tool_message = role == "tool"
|
||||
|
||||
if has_tool_calls or has_tool_call_id or is_tool_message:
|
||||
clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
|
||||
agent_history.append(clean_msg)
|
||||
else:
|
||||
# Simple text message - just need role and content
|
||||
content = msg.get("content")
|
||||
if content:
|
||||
# Tag cross-platform mirror messages so the agent knows their origin
|
||||
if msg.get("mirror"):
|
||||
mirror_src = msg.get("mirror_source", "another session")
|
||||
content = f"[Delivered from {mirror_src}] {content}"
|
||||
# Preserve assistant reasoning + Codex replay fields so
|
||||
# multi-turn reasoning context, prefix-cache hits, and
|
||||
# provider-specific echo requirements survive session
|
||||
# reload. See ``_ASSISTANT_REPLAY_FIELDS`` for the full
|
||||
# whitelist and rationale.
|
||||
entry = _build_replay_entry(role, content, msg)
|
||||
agent_history.append(entry)
|
||||
|
||||
# Collect MEDIA paths already in history so we can exclude them
|
||||
# from the current turn's extraction. This is compression-safe:
|
||||
@@ -16912,17 +16739,7 @@ class GatewayRunner:
|
||||
else:
|
||||
_run_message = message
|
||||
|
||||
_api_run_message = _wrap_current_message_with_observed_context(
|
||||
_run_message,
|
||||
observed_group_context,
|
||||
)
|
||||
_conversation_kwargs = {
|
||||
"conversation_history": agent_history,
|
||||
"task_id": session_id,
|
||||
}
|
||||
if observed_group_context:
|
||||
_conversation_kwargs["persist_user_message"] = message
|
||||
result = agent.run_conversation(_api_run_message, **_conversation_kwargs)
|
||||
result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
|
||||
finally:
|
||||
unregister_gateway_notify(_approval_session_key)
|
||||
# Cancel any pending clarify entries so blocked agent
|
||||
@@ -17138,7 +16955,6 @@ class GatewayRunner:
|
||||
"context_length": _context_length,
|
||||
"session_id": effective_session_id,
|
||||
"response_previewed": result.get("response_previewed", False),
|
||||
"response_transformed": result.get("response_transformed", False),
|
||||
}
|
||||
|
||||
# Start progress message sender if enabled
|
||||
@@ -17776,11 +17592,7 @@ class GatewayRunner:
|
||||
_content_delivered = bool(
|
||||
_sc and getattr(_sc, "final_content_delivered", False)
|
||||
)
|
||||
# Plugin hooks (e.g. transform_llm_output) may have appended content
|
||||
# after streaming finished — when the response was transformed, always
|
||||
# send the final version so the appended content reaches the client.
|
||||
_transformed = bool(response.get("response_transformed"))
|
||||
if not _is_empty_sentinel and not _transformed and (_streamed or _previewed or _content_delivered):
|
||||
if not _is_empty_sentinel and (_streamed or _previewed or _content_delivered):
|
||||
logger.info(
|
||||
"Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s content_delivered=%s).",
|
||||
session_key or "?",
|
||||
@@ -17789,28 +17601,6 @@ class GatewayRunner:
|
||||
_content_delivered,
|
||||
)
|
||||
response["already_sent"] = True
|
||||
elif not _is_empty_sentinel and _transformed and _sc is not None:
|
||||
# Plugin hooks transformed the response after streaming — edit the
|
||||
# existing streamed message instead of sending a duplicate.
|
||||
_sc_msg_id = _sc.message_id
|
||||
if _sc_msg_id:
|
||||
try:
|
||||
await _sc.adapter.edit_message(
|
||||
chat_id=source.chat_id,
|
||||
message_id=_sc_msg_id,
|
||||
content=response["final_response"],
|
||||
finalize=True,
|
||||
)
|
||||
response["already_sent"] = True
|
||||
logger.info(
|
||||
"Edited streamed message %s for session %s to include plugin-transformed content.",
|
||||
_sc_msg_id, session_key or "?",
|
||||
)
|
||||
except Exception as _edit_err:
|
||||
logger.warning(
|
||||
"Failed to edit streamed message for session %s: %s",
|
||||
session_key or "?", _edit_err,
|
||||
)
|
||||
|
||||
# Schedule deletion of tracked temporary progress bubbles after the
|
||||
# final response lands. Failed runs skip this so bubbles remain as
|
||||
@@ -18237,21 +18027,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
runner.request_restart(detached=False, via_service=True)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
# Install a loop-level exception handler that swallows transient
|
||||
# network errors from background tasks. Issues #31066 / #31110:
|
||||
# an unhandled ``telegram.error.TimedOut`` (or peer NetworkError /
|
||||
# httpx connection error) in any awaited coroutine would propagate
|
||||
# to the loop and kill the gateway process, taking down every
|
||||
# profile attached to the same runner. systemd then restarts the
|
||||
# service after ~5s but the active conversation turn is lost.
|
||||
#
|
||||
# The fix is intentionally narrow: only well-known transient
|
||||
# network errors are swallowed (and logged with full traceback so
|
||||
# the originating call site is still discoverable). Anything else
|
||||
# is forwarded to the default handler so real bugs still surface.
|
||||
loop.set_exception_handler(_gateway_loop_exception_handler)
|
||||
|
||||
if threading.current_thread() is threading.main_thread():
|
||||
for sig in (signal.SIGINT, signal.SIGTERM):
|
||||
try:
|
||||
|
||||
@@ -1277,7 +1277,6 @@ class SessionStore:
|
||||
platform_message_id=(
|
||||
message.get("platform_message_id") or message.get("message_id")
|
||||
),
|
||||
observed=bool(message.get("observed")),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
|
||||
@@ -83,21 +83,6 @@ _VAR_MAP = {
|
||||
}
|
||||
|
||||
|
||||
def set_current_session_id(session_id: str) -> None:
|
||||
"""Synchronize ``HERMES_SESSION_ID`` across ContextVar and ``os.environ``.
|
||||
|
||||
Long-lived single-process entrypoints like the CLI can rotate sessions via
|
||||
``/new``, ``/resume``, ``/branch``, or compression splits without
|
||||
reconstructing the entire agent. Tools still consult
|
||||
``get_session_env("HERMES_SESSION_ID")`` with an ``os.environ`` fallback,
|
||||
so both storage paths must move together when the active session changes.
|
||||
"""
|
||||
import os
|
||||
|
||||
os.environ["HERMES_SESSION_ID"] = session_id
|
||||
_SESSION_ID.set(session_id)
|
||||
|
||||
|
||||
def set_session_vars(
|
||||
platform: str = "",
|
||||
chat_id: str = "",
|
||||
|
||||
@@ -192,11 +192,6 @@ class GatewayStreamConsumer:
|
||||
"""True when the stream consumer delivered the final assistant reply."""
|
||||
return self._final_response_sent
|
||||
|
||||
@property
|
||||
def message_id(self) -> str | None:
|
||||
"""The Discord/chat message ID of the last-sent or edited message."""
|
||||
return self._message_id
|
||||
|
||||
@property
|
||||
def final_content_delivered(self) -> bool:
|
||||
"""True when the final response content reached the user, even if
|
||||
|
||||
@@ -129,8 +129,7 @@ def build_top_level_parser():
|
||||
default=None,
|
||||
help=(
|
||||
"Provider override for this invocation (e.g. openrouter, anthropic). "
|
||||
"Applies to -z/--oneshot and --tui. The persistent provider lives in config.yaml "
|
||||
"under model.provider — use `hermes setup` or edit the file to change it."
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -269,11 +268,7 @@ def build_top_level_parser():
|
||||
help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Verbose output",
|
||||
"-v", "--verbose", action="store_true", help="Verbose output"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-Q",
|
||||
|
||||
+27
-164
@@ -41,7 +41,7 @@ from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer, ThreadingHTTPServer
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, FrozenSet, List, Optional, Tuple
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from urllib.parse import parse_qs, urlencode, urlparse
|
||||
|
||||
import httpx
|
||||
@@ -553,7 +553,6 @@ _PLACEHOLDER_SECRET_VALUES = {
|
||||
"***",
|
||||
"changeme",
|
||||
"your_api_key",
|
||||
"your_api_key_here",
|
||||
"your-api-key",
|
||||
"placeholder",
|
||||
"example",
|
||||
@@ -1560,67 +1559,6 @@ def _optional_base_url(value: Any) -> Optional[str]:
|
||||
return cleaned if cleaned else None
|
||||
|
||||
|
||||
# Allowlist of hosts the Nous Portal proxy is willing to forward minted
|
||||
# bearer tokens to. The bearer is a long-lived agent_key minted by
|
||||
# portal.nousresearch.com — sending it anywhere else would leak it.
|
||||
#
|
||||
# This is consulted only for URLs coming from the NETWORK side (Portal
|
||||
# refresh / agent-key-mint responses). User-controlled env-var overrides
|
||||
# (NOUS_INFERENCE_BASE_URL) bypass validation — that's the documented
|
||||
# dev/staging escape hatch and the env source is already trusted (the
|
||||
# user set it themselves).
|
||||
_ALLOWED_NOUS_INFERENCE_HOSTS: FrozenSet[str] = frozenset({
|
||||
"inference-api.nousresearch.com",
|
||||
})
|
||||
|
||||
|
||||
def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[str]:
|
||||
"""Validate a Portal-returned inference URL against the host allowlist.
|
||||
|
||||
Returns ``url`` (normalised by stripping trailing slashes) if it's a
|
||||
well-formed ``https://<allowlisted-host>/...`` URL. Returns ``None``
|
||||
if the URL is missing, malformed, non-https, or points at an
|
||||
unexpected host — letting the caller fall back to the configured
|
||||
default rather than persist or forward a poisoned value.
|
||||
|
||||
Defense-in-depth: a compromised refresh / mint response from the
|
||||
Portal API (MITM, malicious response injection) could otherwise
|
||||
redirect every subsequent proxy request — bearing the user's
|
||||
legitimately-minted agent_key — to an attacker-controlled endpoint.
|
||||
Validating scheme + host at the source closes that loop before the
|
||||
poisoned URL ever lands in ``auth.json``.
|
||||
|
||||
The env-var override path (``NOUS_INFERENCE_BASE_URL``) bypasses
|
||||
this — env values come from the trusted OS user, not from the
|
||||
network, and the override is documented for staging/dev use.
|
||||
|
||||
Co-authored-by: memosr <mehmet.sr35@gmail.com>
|
||||
"""
|
||||
if not isinstance(url, str):
|
||||
return None
|
||||
cleaned = url.strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
try:
|
||||
parsed = urlparse(cleaned)
|
||||
except Exception:
|
||||
return None
|
||||
if parsed.scheme != "https":
|
||||
logger.warning(
|
||||
"nous: refusing non-https inference URL scheme %r from Portal response",
|
||||
parsed.scheme,
|
||||
)
|
||||
return None
|
||||
if parsed.hostname not in _ALLOWED_NOUS_INFERENCE_HOSTS:
|
||||
logger.warning(
|
||||
"nous: refusing inference URL host %r from Portal response "
|
||||
"(not in allowlist); falling back to default",
|
||||
parsed.hostname,
|
||||
)
|
||||
return None
|
||||
return cleaned.rstrip("/")
|
||||
|
||||
|
||||
def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
|
||||
if not isinstance(token, str) or token.count(".") != 2:
|
||||
return {}
|
||||
@@ -2066,10 +2004,7 @@ def resolve_qwen_runtime_credentials(
|
||||
def get_qwen_auth_status() -> Dict[str, Any]:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
try:
|
||||
# Validate the runtime credentials, including refresh when the cached
|
||||
# CLI token is expired. Otherwise stale tokens show up as "logged in"
|
||||
# and `hermes model` walks users into a broken Qwen setup flow.
|
||||
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
|
||||
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
|
||||
return {
|
||||
"logged_in": True,
|
||||
"auth_file": str(auth_path),
|
||||
@@ -4841,7 +4776,7 @@ def refresh_nous_oauth_pure(
|
||||
state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
|
||||
state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
|
||||
state["scope"] = refreshed.get("scope") or state.get("scope")
|
||||
refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
|
||||
refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
|
||||
if refreshed_url:
|
||||
state["inference_base_url"] = refreshed_url
|
||||
state["obtained_at"] = now.isoformat()
|
||||
@@ -4877,7 +4812,7 @@ def refresh_nous_oauth_pure(
|
||||
state["agent_key_expires_in"] = mint_payload.get("expires_in")
|
||||
state["agent_key_reused"] = bool(mint_payload.get("reused", False))
|
||||
state["agent_key_obtained_at"] = now.isoformat()
|
||||
minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
|
||||
minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
|
||||
if minted_url:
|
||||
state["inference_base_url"] = minted_url
|
||||
|
||||
@@ -5155,7 +5090,7 @@ def resolve_nous_runtime_credentials(
|
||||
state["refresh_token"] = refreshed.get("refresh_token") or refresh_token
|
||||
state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
|
||||
state["scope"] = refreshed.get("scope") or state.get("scope")
|
||||
refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
|
||||
refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
|
||||
if refreshed_url:
|
||||
inference_base_url = refreshed_url
|
||||
state["obtained_at"] = now.isoformat()
|
||||
@@ -5263,7 +5198,7 @@ def resolve_nous_runtime_credentials(
|
||||
state["refresh_token"] = refreshed.get("refresh_token") or latest_refresh_token
|
||||
state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
|
||||
state["scope"] = refreshed.get("scope") or state.get("scope")
|
||||
refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
|
||||
refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
|
||||
if refreshed_url:
|
||||
inference_base_url = refreshed_url
|
||||
state["obtained_at"] = now.isoformat()
|
||||
@@ -5318,7 +5253,7 @@ def resolve_nous_runtime_credentials(
|
||||
state["agent_key_expires_in"] = mint_payload.get("expires_in")
|
||||
state["agent_key_reused"] = bool(mint_payload.get("reused", False))
|
||||
state["agent_key_obtained_at"] = now.isoformat()
|
||||
minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
|
||||
minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
|
||||
if minted_url:
|
||||
inference_base_url = minted_url
|
||||
_oauth_trace(
|
||||
@@ -7110,95 +7045,10 @@ def _refresh_minimax_oauth_state(
|
||||
return new_state
|
||||
|
||||
|
||||
def _minimax_oauth_quarantine_on_terminal_refresh(state: Dict[str, Any], exc: AuthError) -> None:
|
||||
"""Wipe dead tokens from auth.json after a terminal refresh failure.
|
||||
|
||||
Shared by both the eager-resolve path and the lazy per-request token
|
||||
provider. Mirrors the Nous / xAI-OAuth / Codex-OAuth quarantine pattern
|
||||
so subsequent calls fail fast without a network retry.
|
||||
"""
|
||||
if not (exc.relogin_required and state.get("refresh_token")):
|
||||
return
|
||||
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
|
||||
state.pop(_k, None)
|
||||
state["last_auth_error"] = {
|
||||
"provider": "minimax-oauth",
|
||||
"code": exc.code or "refresh_failed",
|
||||
"message": str(exc),
|
||||
"reason": "runtime_refresh_failure",
|
||||
"relogin_required": True,
|
||||
"at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
try:
|
||||
_minimax_save_auth_state(state)
|
||||
except Exception as _save_exc:
|
||||
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
|
||||
|
||||
|
||||
def build_minimax_oauth_token_provider() -> Callable[[], str]:
|
||||
"""Return a zero-arg callable that yields a fresh MiniMax access token.
|
||||
|
||||
The Anthropic SDK caches ``api_key`` as a static string at construction
|
||||
time, so a session that resolves credentials once at startup will keep
|
||||
sending the same bearer until MiniMax's server returns 401 — typically
|
||||
~15 minutes in, because MiniMax issues short-lived access tokens.
|
||||
|
||||
Returning a *callable* instead of a string lets us hook into the
|
||||
existing Entra-ID bearer infrastructure in
|
||||
:mod:`agent.anthropic_adapter`: ``build_anthropic_client`` detects a
|
||||
callable and routes through ``_build_anthropic_client_with_bearer_hook``,
|
||||
which mints a fresh ``Authorization`` header on every outbound request.
|
||||
Each invocation re-reads the persisted state from ``auth.json`` and
|
||||
calls :func:`_refresh_minimax_oauth_state` — that helper is a no-op
|
||||
when the token still has more than ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS``
|
||||
of life left, so the steady-state cost is one file read + one
|
||||
timestamp compare per request.
|
||||
|
||||
Reading state fresh each time also means a refresh persisted by one
|
||||
process (CLI, gateway, cron) is immediately visible to every other
|
||||
process sharing the same ``auth.json``.
|
||||
"""
|
||||
def _provide() -> str:
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if not state or not state.get("access_token"):
|
||||
raise AuthError(
|
||||
"Not logged into MiniMax OAuth. Run `hermes model` and select "
|
||||
"MiniMax (OAuth).",
|
||||
provider="minimax-oauth", code="not_logged_in", relogin_required=True,
|
||||
)
|
||||
try:
|
||||
state = _refresh_minimax_oauth_state(state)
|
||||
except AuthError as exc:
|
||||
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
|
||||
raise
|
||||
token = state.get("access_token")
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"MiniMax OAuth state has no access_token after refresh.",
|
||||
provider="minimax-oauth", code="no_access_token", relogin_required=True,
|
||||
)
|
||||
return token
|
||||
|
||||
return _provide
|
||||
|
||||
|
||||
def resolve_minimax_oauth_runtime_credentials(
|
||||
*, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
|
||||
as_token_provider: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Return {provider, api_key, base_url, source} for minimax-oauth.
|
||||
|
||||
When ``as_token_provider`` is True, ``api_key`` is a zero-arg callable
|
||||
that mints a fresh access token per call (proactively refreshing if
|
||||
the cached token is within ``MINIMAX_OAUTH_REFRESH_SKEW_SECONDS`` of
|
||||
expiry). This is what the runtime provider path uses so that long
|
||||
sessions survive MiniMax's short access-token lifetime — see
|
||||
:func:`build_minimax_oauth_token_provider` for the rationale.
|
||||
|
||||
The default (string ``api_key``) preserves the historical contract for
|
||||
diagnostic call sites like ``hermes status`` that just want to know
|
||||
whether a valid token exists right now.
|
||||
"""
|
||||
"""Return {provider, api_key, base_url, source} for minimax-oauth."""
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if not state or not state.get("access_token"):
|
||||
raise AuthError(
|
||||
@@ -7209,15 +7059,28 @@ def resolve_minimax_oauth_runtime_credentials(
|
||||
try:
|
||||
state = _refresh_minimax_oauth_state(state)
|
||||
except AuthError as exc:
|
||||
_minimax_oauth_quarantine_on_terminal_refresh(state, exc)
|
||||
if exc.relogin_required and state.get("refresh_token"):
|
||||
# Terminal refresh failure — clear dead tokens from auth.json so
|
||||
# subsequent calls fail fast without a network retry, mirroring
|
||||
# the Nous / xAI-OAuth / Codex-OAuth quarantine pattern.
|
||||
for _k in ("access_token", "refresh_token", "expires_at", "expires_in", "obtained_at"):
|
||||
state.pop(_k, None)
|
||||
state["last_auth_error"] = {
|
||||
"provider": "minimax-oauth",
|
||||
"code": exc.code or "refresh_failed",
|
||||
"message": str(exc),
|
||||
"reason": "runtime_refresh_failure",
|
||||
"relogin_required": True,
|
||||
"at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
try:
|
||||
_minimax_save_auth_state(state)
|
||||
except Exception as _save_exc:
|
||||
logger.debug("MiniMax OAuth: failed to persist quarantined state: %s", _save_exc)
|
||||
raise
|
||||
if as_token_provider:
|
||||
api_key: Any = build_minimax_oauth_token_provider()
|
||||
else:
|
||||
api_key = state["access_token"]
|
||||
return {
|
||||
"provider": "minimax-oauth",
|
||||
"api_key": api_key,
|
||||
"api_key": state["access_token"],
|
||||
"base_url": state["inference_base_url"].rstrip("/"),
|
||||
"source": "oauth",
|
||||
}
|
||||
|
||||
@@ -164,7 +164,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
cli_only=True),
|
||||
CommandDef("skills", "Search, install, inspect, or manage skills",
|
||||
"Tools & Skills", cli_only=True,
|
||||
subcommands=("search", "browse", "inspect", "install", "audit")),
|
||||
subcommands=("search", "browse", "inspect", "install")),
|
||||
CommandDef("bundles", "List skill bundles (aliases /<name> for multiple skills)",
|
||||
"Tools & Skills"),
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
@@ -449,7 +449,7 @@ def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
|
||||
:func:`hermes_cli.plugins.PluginContext.register_command`. They behave
|
||||
like ``CommandDef`` entries for gateway surfacing: they appear in the
|
||||
Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
|
||||
(via :func:`plugins.platforms.discord.adapter._register_slash_commands`) in
|
||||
(via :func:`gateway.platforms.discord._register_slash_commands`) in
|
||||
Discord's native slash command picker.
|
||||
|
||||
Lookup is lazy so importing this module never forces plugin discovery
|
||||
|
||||
+3
-60
@@ -658,8 +658,7 @@ DEFAULT_CONFIG = {
|
||||
# are owned by your host user instead of root, which avoids needing
|
||||
# `sudo chown` after container runs. Default off to preserve behavior
|
||||
# for images whose entrypoints expect to start as root (e.g. the
|
||||
# bundled Hermes image, which drops to the `hermes` user via
|
||||
# s6-setuidgid inside each supervised service).
|
||||
# bundled Hermes image, which drops to the `hermes` user via gosu).
|
||||
# When on, SETUID/SETGID caps are omitted from the container since
|
||||
# no privilege drop is needed.
|
||||
"docker_run_as_host_user": False,
|
||||
@@ -1009,19 +1008,6 @@ DEFAULT_CONFIG = {
|
||||
"compact": False,
|
||||
"personality": "kawaii",
|
||||
"resume_display": "full",
|
||||
# Recap tuning for /resume and startup resume. The defaults match the
|
||||
# historical hardcoded values; expose them as config so power users can
|
||||
# widen or tighten the snapshot to taste.
|
||||
"resume_exchanges": 10, # max user+assistant pairs to show
|
||||
"resume_max_user_chars": 300, # truncate user message text
|
||||
"resume_max_assistant_chars": 200, # truncate non-last assistant text
|
||||
"resume_max_assistant_lines": 3, # truncate non-last assistant lines
|
||||
# When True (default), assistant entries that are *only* tool calls
|
||||
# (no visible text) are skipped in the recap. This prevents the recap
|
||||
# from being dominated by `[2 tool calls: terminal, read_file]` lines
|
||||
# when an exchange was tool-heavy. Set False to restore the legacy
|
||||
# behavior of showing tool-call summaries inline.
|
||||
"resume_skip_tool_only": True,
|
||||
"busy_input_mode": "interrupt", # interrupt | queue | steer
|
||||
# When true, `hermes --tui` auto-resumes the most recent human-
|
||||
# facing session on launch instead of forging a fresh one.
|
||||
@@ -1761,45 +1747,6 @@ DEFAULT_CONFIG = {
|
||||
"retries": 2,
|
||||
},
|
||||
|
||||
# =========================================================================
|
||||
# External secret sources
|
||||
# =========================================================================
|
||||
# Pull credentials from external secret managers at process startup
|
||||
# rather than storing them in ~/.hermes/.env.
|
||||
"secrets": {
|
||||
"bitwarden": {
|
||||
# Master switch. When false, BSM is never contacted and the
|
||||
# bws binary is never auto-installed — same as not having
|
||||
# this section at all.
|
||||
"enabled": False,
|
||||
# Name of the env var that holds the Bitwarden machine-account
|
||||
# access token. This is the one bootstrap secret; it lives
|
||||
# in ~/.hermes/.env (or your shell) and never in config.yaml.
|
||||
"access_token_env": "BWS_ACCESS_TOKEN",
|
||||
# UUID of the BSM project to sync from.
|
||||
"project_id": "",
|
||||
# Seconds to cache fetched secrets in-process. 0 disables.
|
||||
"cache_ttl_seconds": 300,
|
||||
# When True, BSM values overwrite existing env vars. Default
|
||||
# True because the point of using BSM is centralized rotation —
|
||||
# if .env had the final say, rotating in Bitwarden wouldn't
|
||||
# take effect until you also cleared the matching .env line.
|
||||
"override_existing": True,
|
||||
# When True, the bws binary is auto-downloaded into
|
||||
# ~/.hermes/bin/ on first use. When False you must install
|
||||
# bws yourself and have it on PATH.
|
||||
"auto_install": True,
|
||||
# Bitwarden region / self-hosted endpoint. Empty string
|
||||
# means use the bws CLI default (US Cloud,
|
||||
# https://vault.bitwarden.com). Set to
|
||||
# https://vault.bitwarden.eu for EU Cloud, or your own URL
|
||||
# for self-hosted Bitwarden. Plumbed into the bws subprocess
|
||||
# as BWS_SERVER_URL. Prompted for during
|
||||
# `hermes secrets bitwarden setup`.
|
||||
"server_url": "",
|
||||
},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 23,
|
||||
}
|
||||
@@ -3070,7 +3017,7 @@ def _normalize_custom_provider_entry(
|
||||
"api_mode", "transport", "model", "default_model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"request_timeout_seconds", "stale_timeout_seconds",
|
||||
"discover_models", "extra_body",
|
||||
"discover_models",
|
||||
}
|
||||
for camel, snake in _CAMEL_ALIASES.items():
|
||||
if camel in entry and snake not in entry:
|
||||
@@ -3165,10 +3112,6 @@ def _normalize_custom_provider_entry(
|
||||
if isinstance(discover_models, bool):
|
||||
normalized["discover_models"] = discover_models
|
||||
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
normalized["extra_body"] = dict(extra_body)
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
@@ -3329,7 +3272,7 @@ _KNOWN_ROOT_KEYS = {
|
||||
# Valid fields inside a custom_providers list entry
|
||||
_VALID_CUSTOM_PROVIDER_FIELDS = {
|
||||
"name", "base_url", "api_key", "api_mode", "model", "models",
|
||||
"context_length", "rate_limit_delay", "extra_body",
|
||||
"context_length", "rate_limit_delay",
|
||||
# key_env is read at runtime by runtime_provider.py and auxiliary_client.py
|
||||
# — include it here so the set accurately describes the supported schema.
|
||||
"key_env",
|
||||
|
||||
@@ -1,325 +0,0 @@
|
||||
"""Container-boot reconciliation of per-profile gateway s6 services.
|
||||
|
||||
Service directories under /run/service/ live on **tmpfs** and are wiped
|
||||
on every container restart. Profile directories under
|
||||
``$HERMES_HOME/profiles/<name>/`` live on the persistent VOLUME, and
|
||||
each one records its gateway's last state in ``gateway_state.json``.
|
||||
This module bridges the two: on every container boot, walk the
|
||||
persistent profiles, recreate the s6 service slots, and auto-start
|
||||
only those whose last recorded state was ``running``.
|
||||
|
||||
Wired into the image as /etc/cont-init.d/02-reconcile-profiles by the
|
||||
Dockerfile (Phase 4 Task 4.0). Runs as root after 01-hermes-setup
|
||||
(the stage2 hook) has chowned the volume and seeded $HERMES_HOME, but
|
||||
before s6-rc starts user services.
|
||||
|
||||
Without this module, every ``docker restart`` would silently wipe
|
||||
every per-profile gateway, even though the user's profiles still
|
||||
exist on disk.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Only this prior state triggers automatic restart. Everything else
|
||||
# (startup_failed, starting, stopped, missing) registers the slot in
|
||||
# the down state and waits for explicit user action — this avoids the
|
||||
# crash-loop where a broken gateway keeps being restarted across
|
||||
# `docker restart` cycles.
|
||||
_AUTOSTART_STATES = frozenset({"running"})
|
||||
|
||||
# Stale runtime files we sweep before recreating service slots. These
|
||||
# all hold container-namespaced state (PIDs, process tables) that's
|
||||
# garbage post-restart — a numerically-equal PID in the new container
|
||||
# is a different process. See the Risk Register in the plan.
|
||||
_STALE_RUNTIME_FILES = ("gateway.pid", "processes.json")
|
||||
|
||||
ReconcileActionLabel = Literal["started", "registered", "skipped"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReconcileAction:
|
||||
"""One profile's outcome from a single reconciliation pass."""
|
||||
profile: str
|
||||
prior_state: str | None
|
||||
action: ReconcileActionLabel
|
||||
|
||||
|
||||
def reconcile_profile_gateways(
|
||||
*,
|
||||
hermes_home: Path,
|
||||
scandir: Path,
|
||||
dry_run: bool = False,
|
||||
) -> list[ReconcileAction]:
|
||||
"""Recreate s6 service registrations for every persistent profile.
|
||||
|
||||
Always registers a ``gateway-default`` slot for the root profile
|
||||
(the implicit profile that lives at the top of ``$HERMES_HOME``,
|
||||
not under ``profiles/``). The dispatcher in ``hermes_cli.gateway``
|
||||
maps an empty profile suffix to ``gateway-default``, so this slot
|
||||
is what ``hermes gateway start`` (no ``-p``) targets. Without it,
|
||||
bare ``hermes gateway start`` inside the container would land on
|
||||
``s6-svc -u /run/service/gateway-default`` → uncaught
|
||||
``CalledProcessError`` → traceback to the user (PR #30136 review).
|
||||
|
||||
The default slot's prior state is read from
|
||||
``$HERMES_HOME/gateway_state.json`` (sibling to the profile root,
|
||||
not under ``profiles/``); stale runtime files there are swept the
|
||||
same way as for named profiles.
|
||||
|
||||
Args:
|
||||
hermes_home: The container's HERMES_HOME (typically /opt/data).
|
||||
Profiles live under ``<hermes_home>/profiles/<name>/``;
|
||||
the default profile lives at ``<hermes_home>`` itself.
|
||||
scandir: The s6 dynamic scandir (typically /run/service). Service
|
||||
directories are created at ``<scandir>/gateway-<profile>/``.
|
||||
dry_run: When True, walk and return the action list without
|
||||
touching the filesystem. For tests and `--dry-run` debug.
|
||||
|
||||
Returns:
|
||||
One :class:`ReconcileAction` per profile, in this order:
|
||||
``default`` first, then named profiles in directory order.
|
||||
"""
|
||||
actions: list[ReconcileAction] = []
|
||||
|
||||
# Default profile — always register, even if nothing has ever
|
||||
# populated the root profile dir. The slot exists so
|
||||
# ``hermes gateway start`` (no ``-p``) has somewhere to land;
|
||||
# auto-up only when the prior state was "running" (same rule as
|
||||
# named profiles).
|
||||
default_prior_state = _read_prior_state(hermes_home)
|
||||
default_should_start = default_prior_state in _AUTOSTART_STATES
|
||||
if not dry_run:
|
||||
_cleanup_stale_runtime_files(hermes_home)
|
||||
_register_service(scandir, "default", start=default_should_start)
|
||||
actions.append(ReconcileAction(
|
||||
profile="default",
|
||||
prior_state=default_prior_state,
|
||||
action="started" if default_should_start else "registered",
|
||||
))
|
||||
|
||||
profiles_root = hermes_home / "profiles"
|
||||
if profiles_root.is_dir():
|
||||
for entry in sorted(profiles_root.iterdir()):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
# SOUL.md is always seeded by `hermes profile create` (config.yaml
|
||||
# is not — that comes later via `hermes setup`). Use it as the
|
||||
# "real profile" marker so stray dirs (backups, manual mkdir)
|
||||
# aren't picked up.
|
||||
if not (entry / "SOUL.md").exists():
|
||||
continue
|
||||
# The "default" service name is reserved for the root
|
||||
# profile (above) — if a user has somehow created a
|
||||
# ``profiles/default/`` directory, skip it to avoid the
|
||||
# slot collision. Their gateway would still be reachable
|
||||
# via ``hermes -p default-named gateway start`` if they
|
||||
# rename the directory; we don't try to disambiguate here.
|
||||
if entry.name == "default":
|
||||
log.warning(
|
||||
"profiles/default/ exists — skipping to avoid colliding "
|
||||
"with the reserved root-profile s6 slot",
|
||||
)
|
||||
continue
|
||||
|
||||
prior_state = _read_prior_state(entry)
|
||||
should_start = prior_state in _AUTOSTART_STATES
|
||||
|
||||
if not dry_run:
|
||||
_cleanup_stale_runtime_files(entry)
|
||||
_register_service(scandir, entry.name, start=should_start)
|
||||
|
||||
actions.append(ReconcileAction(
|
||||
profile=entry.name,
|
||||
prior_state=prior_state,
|
||||
action="started" if should_start else "registered",
|
||||
))
|
||||
|
||||
if not dry_run:
|
||||
_write_reconcile_log(hermes_home, actions)
|
||||
return actions
|
||||
|
||||
|
||||
def _read_prior_state(profile_dir: Path) -> str | None:
|
||||
"""Read gateway_state.json's ``gateway_state`` field, or None if
|
||||
missing or unparseable. Unparseable counts as "no prior state" so
|
||||
we don't bork the whole reconciliation on a corrupt file."""
|
||||
state_file = profile_dir / "gateway_state.json"
|
||||
if not state_file.exists():
|
||||
return None
|
||||
try:
|
||||
return json.loads(state_file.read_text()).get("gateway_state")
|
||||
except (OSError, json.JSONDecodeError):
|
||||
log.warning(
|
||||
"could not read %s; treating as no prior state", state_file,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _cleanup_stale_runtime_files(profile_dir: Path) -> None:
|
||||
"""Remove gateway.pid and processes.json — they reference PIDs in
|
||||
the dead container's process namespace and would otherwise confuse
|
||||
the newly-started gateway's process-mismatch checks."""
|
||||
for name in _STALE_RUNTIME_FILES:
|
||||
(profile_dir / name).unlink(missing_ok=True)
|
||||
|
||||
|
||||
def _register_service(scandir: Path, profile: str, *, start: bool) -> None:
|
||||
"""Recreate the s6 service slot for one profile.
|
||||
|
||||
Mirrors the rendering in :func:`S6ServiceManager.register_profile_gateway`,
|
||||
but here we control the start state directly via the ``down`` marker
|
||||
file (s6-svscan honors it on rescan). Cannot use the manager
|
||||
directly because the cont-init.d phase runs as root before
|
||||
s6-svscan starts scanning the dynamic scandir — the manager's
|
||||
``s6-svscanctl -a`` call would fail with no control socket.
|
||||
|
||||
Atomicity: build the new layout in a sibling temp directory and
|
||||
rename it into place via :meth:`Path.replace`. This matches
|
||||
:meth:`S6ServiceManager.register_profile_gateway` (PR #30136
|
||||
review item O4) — even though cont-init.d runs before s6-svscan
|
||||
starts scanning, an atomic publication keeps the contract uniform
|
||||
between the two registration paths and protects against a
|
||||
half-populated dir if the script is interrupted mid-write.
|
||||
"""
|
||||
import shutil
|
||||
|
||||
from hermes_cli.service_manager import (
|
||||
S6ServiceManager,
|
||||
_seed_supervise_skeleton,
|
||||
validate_profile_name,
|
||||
)
|
||||
|
||||
validate_profile_name(profile)
|
||||
service_dir = scandir / f"gateway-{profile}"
|
||||
tmp_dir = service_dir.with_name(service_dir.name + ".tmp")
|
||||
|
||||
# Wipe any leftover tmp from a previous interrupted run.
|
||||
if tmp_dir.exists():
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
tmp_dir.mkdir(parents=True)
|
||||
|
||||
try:
|
||||
(tmp_dir / "type").write_text("longrun\n")
|
||||
|
||||
# Reuse the manager's run-script rendering — single source of
|
||||
# truth so register_profile_gateway and reconcile_profile_gateways
|
||||
# stay consistent. extra_env is empty here; users who need
|
||||
# per-profile env can set it via the profile's config.yaml
|
||||
# (which the gateway itself loads).
|
||||
run = tmp_dir / "run"
|
||||
run.write_text(S6ServiceManager._render_run_script(profile, extra_env={}))
|
||||
run.chmod(0o755)
|
||||
|
||||
# Persistent log rotation (OQ8-C).
|
||||
log_subdir = tmp_dir / "log"
|
||||
log_subdir.mkdir()
|
||||
log_run = log_subdir / "run"
|
||||
log_run.write_text(S6ServiceManager._render_log_run(profile))
|
||||
log_run.chmod(0o755)
|
||||
|
||||
# The presence of a `down` file tells s6-supervise to NOT
|
||||
# start the service when s6-svscan picks it up. User brings
|
||||
# it up explicitly with `hermes -p <profile> gateway start`
|
||||
# (which routes through the Phase 4
|
||||
# _dispatch_via_service_manager_if_s6 helper to `s6-svc -u`).
|
||||
if not start:
|
||||
(tmp_dir / "down").touch()
|
||||
|
||||
# Pre-create the supervise/ skeleton with hermes ownership
|
||||
# BEFORE we publish the slot. Mirrors the same pre-creation
|
||||
# step in S6ServiceManager.register_profile_gateway — when
|
||||
# s6-svscan picks the published slot up, the s6-supervise it
|
||||
# spawns will EEXIST our dirs/FIFOs and inherit hermes
|
||||
# ownership, so runtime s6-svc / s6-svstat / s6-svwait calls
|
||||
# (all dispatched as the hermes user) won't hit EACCES. See
|
||||
# ``_seed_supervise_skeleton`` in service_manager.py for the
|
||||
# full rationale.
|
||||
_seed_supervise_skeleton(tmp_dir)
|
||||
|
||||
# Publish atomically. Path.replace handles the existing-target
|
||||
# case the same way os.rename does on POSIX: the target is
|
||||
# silently replaced, so a previous reconcile pass's slot is
|
||||
# cleanly overwritten in one operation.
|
||||
if service_dir.exists():
|
||||
shutil.rmtree(service_dir)
|
||||
tmp_dir.replace(service_dir)
|
||||
except Exception:
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
raise
|
||||
|
||||
|
||||
def _write_reconcile_log(
|
||||
hermes_home: Path, actions: list[ReconcileAction],
|
||||
) -> None:
|
||||
"""Append one line per profile to $HERMES_HOME/logs/container-boot.log.
|
||||
|
||||
Operators inspect this to debug "why didn't my profile come back
|
||||
up". Keeping a separate log file (vs. mixing into agent.log) lets
|
||||
troubleshooters grep for "profile=foo" without wading through
|
||||
unrelated activity.
|
||||
|
||||
Size-bounded: when the file exceeds ``_LOG_ROTATE_BYTES``
|
||||
(defaults to 256 KiB ≈ 3000 reconcile lines), the current file
|
||||
is renamed to ``container-boot.log.1`` (replacing any previous
|
||||
rotation) before the new entries are appended. This gives long-
|
||||
lived containers a soft cap of ~512 KiB across the two files
|
||||
without pulling in logrotate or s6-log machinery just for this
|
||||
one append-only file (PR #30136 review item O3).
|
||||
"""
|
||||
import time
|
||||
log_dir = hermes_home / "logs"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_path = log_dir / "container-boot.log"
|
||||
|
||||
# Rotate before opening to append, so the new entries always land
|
||||
# in a fresh file when we crossed the threshold last time.
|
||||
try:
|
||||
if log_path.exists() and log_path.stat().st_size >= _LOG_ROTATE_BYTES:
|
||||
log_path.replace(log_dir / "container-boot.log.1")
|
||||
except OSError as exc:
|
||||
# Rotation failure is non-fatal — keep appending to the
|
||||
# existing file rather than losing the entry entirely.
|
||||
log.warning("could not rotate %s: %s", log_path, exc)
|
||||
|
||||
ts = time.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||
with log_path.open("a", encoding="utf-8") as f:
|
||||
for a in actions:
|
||||
f.write(
|
||||
f"{ts} profile={a.profile} prior_state={a.prior_state} "
|
||||
f"action={a.action}\n"
|
||||
)
|
||||
|
||||
|
||||
# 256 KiB soft cap on container-boot.log; rotated to .1 when crossed.
|
||||
# At ~80 B per reconcile-action line this is ~3000 lines, or about a
|
||||
# year of daily reboots on a 5-profile container. Two files = ~512 KiB
|
||||
# worst case. Tuned for visibility (small enough to grep / cat without
|
||||
# scrolling forever) more than space (the persistent volume has GB).
|
||||
_LOG_ROTATE_BYTES = 256 * 1024
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Entry point invoked from /etc/cont-init.d/02-reconcile-profiles."""
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", "/opt/data"))
|
||||
scandir = Path(os.environ.get("S6_PROFILE_GATEWAY_SCANDIR", "/run/service"))
|
||||
actions = reconcile_profile_gateways(
|
||||
hermes_home=hermes_home, scandir=scandir,
|
||||
)
|
||||
for a in actions:
|
||||
print(
|
||||
f"reconcile: profile={a.profile} "
|
||||
f"prior_state={a.prior_state} action={a.action}"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -71,7 +71,7 @@ def curses_checklist(
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
|
||||
curses.init_pair(3, 8, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
|
||||
+1
-9
@@ -14,7 +14,6 @@ Currently supports:
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
@@ -37,12 +36,6 @@ _REDACTION_BANNER = (
|
||||
"run with --no-redact to disable]\n"
|
||||
)
|
||||
|
||||
_EMAIL_ADDRESS_RE = re.compile(
|
||||
r"(?<![A-Za-z0-9._%+-])"
|
||||
r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"
|
||||
r"(?![A-Za-z0-9._%+-])"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Paste services — try paste.rs first, dpaste.com as fallback.
|
||||
@@ -405,8 +398,7 @@ def _redact_log_text(text: str) -> str:
|
||||
return text
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
text = redact_sensitive_text(text, force=True)
|
||||
return _EMAIL_ADDRESS_RE.sub("[REDACTED_EMAIL]", text)
|
||||
return redact_sensitive_text(text, force=True)
|
||||
|
||||
|
||||
def _capture_log_snapshot(
|
||||
|
||||
+1
-85
@@ -207,69 +207,14 @@ def _fail_and_issue(text: str, detail: str, fix: str, issues: list[str]) -> None
|
||||
issues.append(fix)
|
||||
|
||||
|
||||
def _check_s6_supervision(issues: list[str]) -> None:
|
||||
"""Inside a container under our s6 /init, surface what s6 sees.
|
||||
|
||||
Runs as a counterpart to :func:`_check_gateway_service_linger` for
|
||||
the systemd-on-host case. No-op everywhere except in the s6
|
||||
container so host runs aren't cluttered with irrelevant output.
|
||||
|
||||
Reports:
|
||||
- Whether the main-hermes and dashboard static services are up
|
||||
- How many per-profile gateway slots are registered (via
|
||||
``S6ServiceManager.list_profile_gateways()``) and how many are
|
||||
currently supervised as ``up``
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.service_manager import (
|
||||
S6ServiceManager,
|
||||
detect_service_manager,
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if detect_service_manager() != "s6":
|
||||
return
|
||||
|
||||
_section("s6 Supervision")
|
||||
|
||||
mgr = S6ServiceManager()
|
||||
|
||||
# Static services. They live under /run/service/ via s6-rc symlinks,
|
||||
# so the same s6-svstat probe works.
|
||||
for static in ("main-hermes", "dashboard"):
|
||||
if mgr.is_running(static):
|
||||
check_ok(f"{static}: up")
|
||||
else:
|
||||
check_info(f"{static}: down (expected if not enabled via env)")
|
||||
|
||||
profiles = mgr.list_profile_gateways()
|
||||
if not profiles:
|
||||
check_info("No per-profile gateways registered yet — create one with `hermes profile create <name>`")
|
||||
return
|
||||
|
||||
up_count = sum(1 for p in profiles if mgr.is_running(f"gateway-{p}"))
|
||||
check_ok(
|
||||
f"Per-profile gateways: {up_count}/{len(profiles)} supervised up"
|
||||
+ (f" ({', '.join(sorted(profiles))})" if len(profiles) <= 8 else "")
|
||||
)
|
||||
|
||||
|
||||
def _check_gateway_service_linger(issues: list[str]) -> None:
|
||||
"""Warn when a systemd user gateway service will stop after logout.
|
||||
|
||||
Skipped inside a container running under s6 — the linger concept
|
||||
(user-systemd surviving SSH logout) doesn't apply there, and the
|
||||
s6 supervision state is surfaced separately by
|
||||
``_check_s6_supervision``.
|
||||
"""
|
||||
"""Warn when a systemd user gateway service will stop after logout."""
|
||||
try:
|
||||
from hermes_cli.gateway import (
|
||||
get_systemd_linger_status,
|
||||
get_systemd_unit_path,
|
||||
is_linux,
|
||||
)
|
||||
from hermes_cli.service_manager import detect_service_manager
|
||||
except Exception as e:
|
||||
check_warn("Gateway service linger", f"(could not import gateway helpers: {e})")
|
||||
return
|
||||
@@ -277,12 +222,6 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
|
||||
if not is_linux():
|
||||
return
|
||||
|
||||
# Inside a container under our s6 /init, _check_s6_supervision
|
||||
# reports the live supervision state; the linger warning would be
|
||||
# confusing here (no systemd, no logout, no "lingering" concept).
|
||||
if detect_service_manager() == "s6":
|
||||
return
|
||||
|
||||
unit_path = get_systemd_unit_path()
|
||||
if not unit_path.exists():
|
||||
return
|
||||
@@ -1045,7 +984,6 @@ def run_doctor(args):
|
||||
pass
|
||||
|
||||
_check_gateway_service_linger(issues)
|
||||
_check_s6_supervision(issues)
|
||||
|
||||
if sys.platform != "win32":
|
||||
_section("Command Installation")
|
||||
@@ -1138,26 +1076,6 @@ def run_doctor(args):
|
||||
|
||||
# Docker (optional)
|
||||
terminal_env = os.getenv("TERMINAL_ENV", "local")
|
||||
try:
|
||||
from hermes_constants import is_container as _is_container
|
||||
running_in_container = _is_container()
|
||||
except Exception:
|
||||
running_in_container = False
|
||||
|
||||
if running_in_container:
|
||||
# Inside our container the Docker terminal backend is not
|
||||
# configured by default (Docker-in-Docker isn't set up); the
|
||||
# local backend is the intended one. Skip the noisy "docker
|
||||
# not found" warning. If the user has explicitly chosen
|
||||
# TERMINAL_ENV=docker inside the container they likely mounted
|
||||
# /var/run/docker.sock, so fall through to the normal check.
|
||||
if terminal_env != "docker":
|
||||
check_info(
|
||||
"Running inside a container — using local terminal backend "
|
||||
"(docker-in-docker is not configured by default)"
|
||||
)
|
||||
# Skip to next section; Docker isn't relevant here.
|
||||
terminal_env = "local"
|
||||
if terminal_env == "docker":
|
||||
if _safe_which("docker"):
|
||||
# Check if docker daemon is running
|
||||
@@ -1180,8 +1098,6 @@ def run_doctor(args):
|
||||
check_ok("docker", "(optional)")
|
||||
elif _is_termux():
|
||||
check_info("Docker backend is not available inside Termux (expected on Android)")
|
||||
elif running_in_container:
|
||||
pass # already explained above
|
||||
else:
|
||||
check_warn("docker not found", "(optional)")
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ from pathlib import Path
|
||||
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
|
||||
def _get_git_commit(project_root: Path) -> str:
|
||||
@@ -70,8 +69,6 @@ def _count_skills(hermes_home: Path) -> int:
|
||||
return 0
|
||||
count = 0
|
||||
for item in skills_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(item):
|
||||
continue
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
+1
-131
@@ -21,44 +21,6 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
|
||||
# tests) don't spam the same warning multiple times.
|
||||
_WARNED_KEYS: set[str] = set()
|
||||
|
||||
# Map of env-var name → source label ("bitwarden", etc.) for credentials
|
||||
# that were injected by an external secret source during load_hermes_dotenv().
|
||||
# Used by setup / `hermes model` flows to label detected credentials so
|
||||
# users understand WHERE a key came from when their .env doesn't contain it
|
||||
# directly (otherwise the "credentials detected ✓" line looks identical to
|
||||
# the .env case and they don't know Bitwarden is wired up).
|
||||
_SECRET_SOURCES: dict[str, str] = {}
|
||||
|
||||
|
||||
def get_secret_source(env_var: str) -> str | None:
|
||||
"""Return the label of the secret source that supplied ``env_var``, if any.
|
||||
|
||||
Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
|
||||
during the current process's ``load_hermes_dotenv()`` call. Returns
|
||||
``None`` for keys that came from ``.env``, the shell environment, or
|
||||
aren't tracked.
|
||||
"""
|
||||
return _SECRET_SOURCES.get(env_var)
|
||||
|
||||
|
||||
def format_secret_source_suffix(env_var: str) -> str:
|
||||
"""Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
|
||||
|
||||
Use this when printing a detected credential so the user can see where
|
||||
it came from. Empty string when the credential came from ``.env`` or
|
||||
the shell — those are the implicit / "default" cases users already
|
||||
understand.
|
||||
"""
|
||||
source = get_secret_source(env_var)
|
||||
if not source:
|
||||
return ""
|
||||
if source == "bitwarden":
|
||||
return " (from Bitwarden)"
|
||||
# Generic fallback — future-proofing for additional secret sources
|
||||
# (e.g. 1Password, HashiCorp Vault) without having to update every
|
||||
# call site.
|
||||
return f" (from {source})"
|
||||
|
||||
|
||||
def _format_offending_chars(value: str, limit: int = 3) -> str:
|
||||
"""Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
|
||||
@@ -140,10 +102,6 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
|
||||
This produces mangled values — e.g. a bot token duplicated 8×
|
||||
(see #8908).
|
||||
|
||||
Also strips embedded null bytes which crash ``os.environ[k] = v``
|
||||
with ``ValueError: embedded null byte`` — typically introduced by
|
||||
copy-pasting API keys from terminals or rich-text editors.
|
||||
|
||||
We delegate to ``hermes_cli.config._sanitize_env_lines`` which
|
||||
already knows all valid Hermes env-var names and can split
|
||||
concatenated lines correctly.
|
||||
@@ -159,11 +117,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
|
||||
try:
|
||||
with open(path, **read_kw) as f:
|
||||
original = f.readlines()
|
||||
# Strip null bytes before _sanitize_env_lines so they never
|
||||
# reach python-dotenv (which passes them to os.environ and
|
||||
# crashes with ValueError).
|
||||
stripped = [line.replace("\x00", "") for line in original]
|
||||
sanitized = _sanitize_env_lines(stripped)
|
||||
sanitized = _sanitize_env_lines(original)
|
||||
if sanitized != original:
|
||||
import tempfile
|
||||
fd, tmp = tempfile.mkstemp(
|
||||
@@ -218,88 +172,4 @@ def load_hermes_dotenv(
|
||||
_load_dotenv_with_fallback(project_env_path, override=not loaded)
|
||||
loaded.append(project_env_path)
|
||||
|
||||
_apply_external_secret_sources(home_path)
|
||||
|
||||
return loaded
|
||||
|
||||
|
||||
def _apply_external_secret_sources(home_path: Path) -> None:
|
||||
"""Pull secrets from external sources (currently Bitwarden) into env.
|
||||
|
||||
Runs AFTER dotenv loads so .env values are visible (we use them to
|
||||
locate the access token) but BEFORE the rest of Hermes reads
|
||||
``os.environ`` for credentials. Any failure here is logged and
|
||||
swallowed — external secret sources must never block startup.
|
||||
"""
|
||||
try:
|
||||
cfg = _load_secrets_config(home_path)
|
||||
except Exception: # noqa: BLE001 — config errors must not block startup
|
||||
return
|
||||
|
||||
bw_cfg = (cfg or {}).get("bitwarden") or {}
|
||||
if not bw_cfg.get("enabled"):
|
||||
return
|
||||
|
||||
try:
|
||||
from agent.secret_sources.bitwarden import apply_bitwarden_secrets
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
result = apply_bitwarden_secrets(
|
||||
enabled=True,
|
||||
access_token_env=bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN"),
|
||||
project_id=bw_cfg.get("project_id", ""),
|
||||
override_existing=bool(bw_cfg.get("override_existing", False)),
|
||||
cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
|
||||
auto_install=bool(bw_cfg.get("auto_install", True)),
|
||||
server_url=str(bw_cfg.get("server_url", "") or "").strip(),
|
||||
)
|
||||
|
||||
if result.applied:
|
||||
# Re-run the ASCII sanitization pass: BSM values are user-supplied
|
||||
# and might have the same copy-paste corruption as a manually
|
||||
# edited .env (see #6843).
|
||||
_sanitize_loaded_credentials()
|
||||
# Remember where these came from so the setup / `hermes model`
|
||||
# flows can label detected credentials with "(from Bitwarden)" —
|
||||
# otherwise users see "credentials ✓" with no hint that the value
|
||||
# came from BSM rather than .env.
|
||||
for name in result.applied:
|
||||
_SECRET_SOURCES[name] = "bitwarden"
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: applied {len(result.applied)} "
|
||||
f"secret{'s' if len(result.applied) != 1 else ''} "
|
||||
f"({', '.join(sorted(result.applied))})",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if result.error:
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: {result.error}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
for warn in result.warnings:
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: {warn}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def _load_secrets_config(home_path: Path) -> dict:
|
||||
"""Read just the ``secrets:`` section out of config.yaml.
|
||||
|
||||
Imported lazily and isolated from the main config loader so a
|
||||
malformed config can't take down dotenv loading entirely.
|
||||
"""
|
||||
config_path = home_path / "config.yaml"
|
||||
if not config_path.exists():
|
||||
return {}
|
||||
try:
|
||||
import yaml # type: ignore
|
||||
except ImportError:
|
||||
return {}
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
except Exception: # noqa: BLE001
|
||||
return {}
|
||||
return data.get("secrets") or {}
|
||||
|
||||
@@ -21,8 +21,6 @@ from __future__ import annotations
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from hermes_cli.fallback_config import get_fallback_chain
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
@@ -32,11 +30,20 @@ def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Return the normalized fallback chain as a list of dicts.
|
||||
|
||||
Accepts both the new list format (``fallback_providers``) and the legacy
|
||||
``fallback_model`` format. When both are present, the effective chain is
|
||||
merged with ``fallback_providers`` entries kept first. The returned list is
|
||||
always a fresh copy — callers can mutate without touching the config dict.
|
||||
single-dict format (``fallback_model``). The returned list is always a
|
||||
fresh copy — callers can mutate without touching the config dict.
|
||||
"""
|
||||
return get_fallback_chain(config)
|
||||
chain = config.get("fallback_providers") or []
|
||||
if isinstance(chain, list):
|
||||
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
if result:
|
||||
return result
|
||||
legacy = config.get("fallback_model")
|
||||
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
|
||||
return [dict(legacy)]
|
||||
if isinstance(legacy, list):
|
||||
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
return []
|
||||
|
||||
|
||||
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
"""Helpers for reading the effective fallback provider chain from config."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
def _normalized_base_url(value: Any) -> str:
|
||||
if not isinstance(value, str):
|
||||
return ""
|
||||
return value.strip().rstrip("/")
|
||||
|
||||
|
||||
def _iter_fallback_entries(raw: Any) -> list[dict[str, Any]]:
|
||||
if isinstance(raw, dict):
|
||||
candidates = [raw]
|
||||
elif isinstance(raw, list):
|
||||
candidates = raw
|
||||
else:
|
||||
return []
|
||||
|
||||
entries: list[dict[str, Any]] = []
|
||||
for entry in candidates:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
provider = str(entry.get("provider") or "").strip()
|
||||
model = str(entry.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
continue
|
||||
|
||||
normalized = dict(entry)
|
||||
normalized["provider"] = provider
|
||||
normalized["model"] = model
|
||||
|
||||
base_url = _normalized_base_url(entry.get("base_url"))
|
||||
if base_url:
|
||||
normalized["base_url"] = base_url
|
||||
|
||||
entries.append(normalized)
|
||||
return entries
|
||||
|
||||
|
||||
def _entry_identity(entry: dict[str, Any]) -> tuple[str, str, str]:
|
||||
return (
|
||||
str(entry.get("provider") or "").strip().lower(),
|
||||
str(entry.get("model") or "").strip().lower(),
|
||||
_normalized_base_url(entry.get("base_url")).lower(),
|
||||
)
|
||||
|
||||
|
||||
def get_fallback_chain(config: dict[str, Any] | None) -> list[dict[str, Any]]:
|
||||
"""Return the effective fallback chain merged across old and new config keys.
|
||||
|
||||
``fallback_providers`` remains the primary source of truth and keeps its
|
||||
order. Legacy ``fallback_model`` entries are appended afterwards unless
|
||||
they target the same provider/model/base_url route as an earlier entry.
|
||||
The returned list always contains fresh dict copies.
|
||||
"""
|
||||
|
||||
config = config or {}
|
||||
chain: list[dict[str, Any]] = []
|
||||
seen: set[tuple[str, str, str]] = set()
|
||||
|
||||
for key in ("fallback_providers", "fallback_model"):
|
||||
for entry in _iter_fallback_entries(config.get(key)):
|
||||
identity = _entry_identity(entry)
|
||||
if identity in seen:
|
||||
continue
|
||||
seen.add(identity)
|
||||
chain.append(entry)
|
||||
|
||||
return chain
|
||||
+35
-191
@@ -981,18 +981,6 @@ def get_gateway_runtime_snapshot(system: bool = False) -> GatewayRuntimeSnapshot
|
||||
from hermes_constants import is_container
|
||||
|
||||
if is_linux() and is_container():
|
||||
# Phase 4: report s6 supervision when running under our /init.
|
||||
# Other container runtimes (or containers built before Phase 2)
|
||||
# still get the original "docker (foreground)" label.
|
||||
try:
|
||||
from hermes_cli.service_manager import detect_service_manager
|
||||
if detect_service_manager() == "s6":
|
||||
return GatewayRuntimeSnapshot(
|
||||
manager="s6 (container supervisor)",
|
||||
gateway_pids=gateway_pids,
|
||||
)
|
||||
except Exception:
|
||||
pass # Fall through to the legacy label on any detection error.
|
||||
return GatewayRuntimeSnapshot(
|
||||
manager="docker (foreground)",
|
||||
gateway_pids=gateway_pids,
|
||||
@@ -1214,17 +1202,7 @@ def _systemd_operational(system: bool = False) -> bool:
|
||||
|
||||
|
||||
def _container_systemd_operational() -> bool:
|
||||
"""Return True when a container exposes working user or system systemd.
|
||||
|
||||
This is NOT our Hermes Docker image — that one runs s6-overlay as
|
||||
PID 1 (since Phase 2 of the s6-overlay supervision plan) and is
|
||||
detected via ``service_manager.detect_service_manager() == "s6"``.
|
||||
This function handles the "container managed by something else"
|
||||
case: systemd-nspawn, certain k8s pods, containers built FROM
|
||||
systemd-bearing distros where the user has wired systemd as their
|
||||
init. In those environments systemctl behaves identically to the
|
||||
host case, so we fall through to the normal systemd code paths.
|
||||
"""
|
||||
"""Return True when a container exposes working user or system systemd."""
|
||||
if _systemd_operational(system=False):
|
||||
return True
|
||||
if _systemd_operational(system=True):
|
||||
@@ -3349,9 +3327,34 @@ _PLATFORMS = [
|
||||
"help": "For DMs, this is your user ID. You can set it later by typing /set-home in chat."},
|
||||
],
|
||||
},
|
||||
# Discord moved to plugins/platforms/discord/ — its setup metadata is
|
||||
# discovered dynamically via _all_platforms() from the platform registry
|
||||
# entry registered by plugins/platforms/discord/adapter.py::register().
|
||||
{
|
||||
"key": "discord",
|
||||
"label": "Discord",
|
||||
"emoji": "💬",
|
||||
"token_var": "DISCORD_BOT_TOKEN",
|
||||
"setup_instructions": [
|
||||
"1. Go to https://discord.com/developers/applications → New Application",
|
||||
"2. Go to Bot → Reset Token → copy the bot token",
|
||||
"3. Enable: Bot → Privileged Gateway Intents → Message Content Intent",
|
||||
"4. Invite the bot to your server:",
|
||||
" OAuth2 → URL Generator → check BOTH scopes:",
|
||||
" - bot",
|
||||
" - applications.commands (required for slash commands!)",
|
||||
" Bot Permissions: Send Messages, Read Message History, Attach Files",
|
||||
" Copy the URL and open it in your browser to invite.",
|
||||
"5. Get your user ID: enable Developer Mode in Discord settings,",
|
||||
" then right-click your name → Copy ID",
|
||||
],
|
||||
"vars": [
|
||||
{"name": "DISCORD_BOT_TOKEN", "prompt": "Bot token", "password": True,
|
||||
"help": "Paste the token from step 2 above."},
|
||||
{"name": "DISCORD_ALLOWED_USERS", "prompt": "Allowed user IDs or usernames (comma-separated)", "password": False,
|
||||
"is_allowlist": True,
|
||||
"help": "Paste your user ID from step 5 above."},
|
||||
{"name": "DISCORD_HOME_CHANNEL", "prompt": "Home channel ID (for cron/notification delivery, or empty to set later with /set-home)", "password": False,
|
||||
"help": "Right-click a channel → Copy Channel ID (requires Developer Mode)."},
|
||||
],
|
||||
},
|
||||
{
|
||||
"key": "slack",
|
||||
"label": "Slack",
|
||||
@@ -3759,12 +3762,7 @@ def _platform_status(platform: dict) -> str:
|
||||
configured = bool(entry.is_connected(synthetic))
|
||||
except Exception:
|
||||
configured = False
|
||||
else:
|
||||
# No is_connected hook — fall back to check_fn as a coarse
|
||||
# "are deps present" gate. Don't fall back when is_connected
|
||||
# is defined and returned False; that would let "SDK is
|
||||
# installed" override "no token configured" and incorrectly
|
||||
# report the platform as ready.
|
||||
if not configured:
|
||||
try:
|
||||
configured = bool(entry.check_fn())
|
||||
except Exception:
|
||||
@@ -4020,11 +4018,15 @@ def _setup_dingtalk():
|
||||
client_id, client_secret = result
|
||||
save_env_value("DINGTALK_CLIENT_ID", client_id)
|
||||
save_env_value("DINGTALK_CLIENT_SECRET", client_secret)
|
||||
save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
|
||||
print()
|
||||
print_success(f"{emoji} {label} configured via QR scan!")
|
||||
else:
|
||||
# ── Manual entry ──
|
||||
_setup_standard_platform(dingtalk_platform)
|
||||
# Also enable allow-all by default for convenience
|
||||
if get_env_value("DINGTALK_CLIENT_ID"):
|
||||
save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
|
||||
|
||||
|
||||
def _setup_wecom():
|
||||
@@ -4745,9 +4747,7 @@ def _builtin_setup_fn(key: str):
|
||||
from hermes_cli import setup as _s
|
||||
return {
|
||||
"telegram": _s._setup_telegram,
|
||||
# discord moved into the plugin: setup_fn is registered by
|
||||
# plugins/platforms/discord/adapter.py::register() and dispatched
|
||||
# via the plugin path in _configure_platform().
|
||||
"discord": _s._setup_discord,
|
||||
"slack": _s._setup_slack,
|
||||
"matrix": _s._setup_matrix,
|
||||
"mattermost": _s._setup_mattermost,
|
||||
@@ -5025,108 +5025,6 @@ def gateway_setup():
|
||||
# Main Command Handler
|
||||
# =============================================================================
|
||||
|
||||
def _dispatch_via_service_manager_if_s6(
|
||||
action: str, profile: str | None = None,
|
||||
) -> bool:
|
||||
"""If we're in a container with s6, dispatch gateway lifecycle via s6.
|
||||
|
||||
Returns True iff dispatched (caller should ``return``); False
|
||||
otherwise — caller continues with the host-side code path.
|
||||
|
||||
``action`` is one of ``start`` / ``stop`` / ``restart``. The
|
||||
profile defaults to the current one (resolved via ``_profile_arg``).
|
||||
The s6 service slot was created either by the Phase 4 profile-create
|
||||
hook or by the container-boot reconciler (cont-init.d/02-…). If it
|
||||
doesn't exist or s6 returns an error, the named errors from
|
||||
:mod:`hermes_cli.service_manager` are caught and surfaced as
|
||||
actionable CLI messages (no raw ``CalledProcessError`` traceback).
|
||||
"""
|
||||
from hermes_cli.service_manager import (
|
||||
GatewayNotRegisteredError,
|
||||
S6CommandError,
|
||||
detect_service_manager,
|
||||
get_service_manager,
|
||||
)
|
||||
|
||||
if detect_service_manager() != "s6":
|
||||
return False
|
||||
if profile is None:
|
||||
# _profile_suffix() returns the bare profile name for
|
||||
# HERMES_HOME=<root>/profiles/<name>, "" for the default root,
|
||||
# or a hash for unrelated paths. Map "" → "default" so the
|
||||
# default-profile gateway is reachable as gateway-default.
|
||||
profile = _profile_suffix() or "default"
|
||||
mgr = get_service_manager()
|
||||
service_name = f"gateway-{profile}"
|
||||
try:
|
||||
if action == "start":
|
||||
mgr.start(service_name)
|
||||
elif action == "stop":
|
||||
mgr.stop(service_name)
|
||||
elif action == "restart":
|
||||
mgr.restart(service_name)
|
||||
else:
|
||||
return False
|
||||
except GatewayNotRegisteredError as exc:
|
||||
print(f"✗ {exc}")
|
||||
sys.exit(1)
|
||||
except S6CommandError as exc:
|
||||
print(f"✗ {exc}")
|
||||
sys.exit(1)
|
||||
return True
|
||||
|
||||
|
||||
def _dispatch_all_via_service_manager_if_s6(action: str) -> bool:
|
||||
"""Inside a container with s6, dispatch ``--all`` lifecycle to every
|
||||
registered profile gateway.
|
||||
|
||||
Returns True iff dispatched (caller should ``return``); False
|
||||
otherwise — caller continues with the host-side code path.
|
||||
|
||||
Without this, ``hermes gateway stop --all`` and ``... restart --all``
|
||||
fall through to ``kill_gateway_processes(all_profiles=True)``, which
|
||||
just ``pkill``s every gateway process. s6-supervise observes the
|
||||
crash and restarts each one ~1s later — so ``--all`` ends up
|
||||
*kicking* every gateway instead of *stopping* it. By iterating
|
||||
``list_profile_gateways()`` and sending the lifecycle command
|
||||
through the service manager we get the intended semantics (s6's
|
||||
``want up``/``want down`` flips correctly so supervise stays down
|
||||
after a stop).
|
||||
|
||||
``action`` is one of ``stop`` / ``restart`` (``start --all`` isn't
|
||||
a supported CLI surface).
|
||||
"""
|
||||
from hermes_cli.service_manager import (
|
||||
detect_service_manager,
|
||||
get_service_manager,
|
||||
)
|
||||
|
||||
if detect_service_manager() != "s6":
|
||||
return False
|
||||
if action not in ("stop", "restart"):
|
||||
return False
|
||||
mgr = get_service_manager()
|
||||
profiles = mgr.list_profile_gateways()
|
||||
if not profiles:
|
||||
print("✗ No profile gateways registered under s6")
|
||||
return True
|
||||
fn = mgr.stop if action == "stop" else mgr.restart
|
||||
errors: list[tuple[str, Exception]] = []
|
||||
for profile in profiles:
|
||||
service_name = f"gateway-{profile}"
|
||||
try:
|
||||
fn(service_name)
|
||||
except Exception as exc: # noqa: BLE001 — report and continue
|
||||
errors.append((profile, exc))
|
||||
succeeded = len(profiles) - len(errors)
|
||||
verb = "stopped" if action == "stop" else "restarted"
|
||||
if succeeded:
|
||||
print(f"✓ {verb.capitalize()} {succeeded} profile gateway(s) under s6")
|
||||
for profile, exc in errors:
|
||||
print(f"✗ Could not {action} gateway-{profile}: {exc}")
|
||||
return True
|
||||
|
||||
|
||||
def gateway_command(args):
|
||||
"""Handle gateway subcommands."""
|
||||
try:
|
||||
@@ -5211,21 +5109,6 @@ def _gateway_command_inner(args):
|
||||
print(" nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 & # background")
|
||||
sys.exit(1)
|
||||
elif is_container():
|
||||
# Phase 4: inside a container with s6 the gateway service is
|
||||
# auto-registered when the profile is created (and reconciled
|
||||
# at every container boot). `install` is therefore informational.
|
||||
from hermes_cli.service_manager import detect_service_manager
|
||||
if detect_service_manager() == "s6":
|
||||
print("Per-profile gateways are auto-registered when you create a profile.")
|
||||
print()
|
||||
print(" hermes profile create <name> # creates the s6 service slot")
|
||||
print(" hermes -p <name> gateway start # bring it up via s6")
|
||||
print(" hermes status # see currently-supervised gateways")
|
||||
return
|
||||
# Fallback for pre-s6 containers or other container runtimes
|
||||
# we haven't taught about supervision (Podman without our
|
||||
# /init, k8s plain runs, etc.) — the historical guidance still
|
||||
# applies.
|
||||
print("Service installation is not needed inside a Docker container.")
|
||||
print("The container runtime is your service manager — use Docker restart policies instead:")
|
||||
print()
|
||||
@@ -5256,13 +5139,6 @@ def _gateway_command_inner(args):
|
||||
from hermes_cli import gateway_windows
|
||||
gateway_windows.uninstall()
|
||||
elif is_container():
|
||||
from hermes_cli.service_manager import detect_service_manager
|
||||
if detect_service_manager() == "s6":
|
||||
print("Per-profile gateways are auto-unregistered when you delete the profile.")
|
||||
print()
|
||||
print(" hermes profile delete <name> # tears down the s6 service slot")
|
||||
print(" hermes -p <name> gateway stop # stop without deleting the profile")
|
||||
return
|
||||
print("Service uninstall is not applicable inside a Docker container.")
|
||||
print("To stop the gateway, stop or remove the container:")
|
||||
print()
|
||||
@@ -5277,14 +5153,6 @@ def _gateway_command_inner(args):
|
||||
system = getattr(args, 'system', False)
|
||||
start_all = getattr(args, 'all', False)
|
||||
|
||||
# Phase 4: inside a container with s6, dispatch via the service
|
||||
# manager instead of falling through to systemd/launchd/windows.
|
||||
# `--all` isn't meaningful here (each profile has its own service
|
||||
# slot — start them individually via `hermes -p <name> gateway
|
||||
# start`), so just bring up the current profile's slot.
|
||||
if not start_all and _dispatch_via_service_manager_if_s6("start"):
|
||||
return
|
||||
|
||||
if start_all:
|
||||
# Kill all stale gateway processes across all profiles before starting
|
||||
killed = kill_gateway_processes(all_profiles=True)
|
||||
@@ -5314,11 +5182,6 @@ def _gateway_command_inner(args):
|
||||
print("To enable systemd: add systemd=true to /etc/wsl.conf and run 'wsl --shutdown' from PowerShell.")
|
||||
sys.exit(1)
|
||||
elif is_container():
|
||||
# Reached only when s6 ISN'T running (the early dispatch
|
||||
# above handles the s6 case). Pre-s6 containers or other
|
||||
# container runtimes that don't ship our /init get the
|
||||
# historical guidance: the gateway is the container's main
|
||||
# process, so use docker lifecycle commands.
|
||||
print("Service start is not applicable inside a Docker container.")
|
||||
print("The gateway runs as the container's main process.")
|
||||
print()
|
||||
@@ -5335,15 +5198,6 @@ def _gateway_command_inner(args):
|
||||
stop_all = getattr(args, 'all', False)
|
||||
system = getattr(args, 'system', False)
|
||||
|
||||
# Phase 4: inside a container with s6, dispatch via the service
|
||||
# manager. ``--all`` iterates every registered profile gateway
|
||||
# through s6 (otherwise it would fall through to ``pkill``,
|
||||
# which s6-supervise observes as a crash and immediately restarts).
|
||||
if stop_all and _dispatch_all_via_service_manager_if_s6("stop"):
|
||||
return
|
||||
if not stop_all and _dispatch_via_service_manager_if_s6("stop"):
|
||||
return
|
||||
|
||||
if stop_all:
|
||||
# --all: kill every gateway process on the machine
|
||||
service_available = False
|
||||
@@ -5413,16 +5267,6 @@ def _gateway_command_inner(args):
|
||||
restart_all = getattr(args, 'all', False)
|
||||
service_configured = False
|
||||
|
||||
# Phase 4: inside a container with s6, dispatch via the service
|
||||
# manager (s6-svc -t restarts the supervised process). ``--all``
|
||||
# iterates every registered profile gateway through s6; without
|
||||
# this it would fall through to ``pkill``, which s6-supervise
|
||||
# would observe as a crash and immediately restart anyway.
|
||||
if restart_all and _dispatch_all_via_service_manager_if_s6("restart"):
|
||||
return
|
||||
if not restart_all and _dispatch_via_service_manager_if_s6("restart"):
|
||||
return
|
||||
|
||||
if restart_all:
|
||||
# --all: stop every gateway process across all profiles, then start fresh
|
||||
service_stopped = False
|
||||
|
||||
@@ -365,9 +365,7 @@ def _write_task_script() -> Path:
|
||||
|
||||
content = _build_gateway_cmd_script(python_path, working_dir, hermes_home, profile_arg)
|
||||
script_path = get_task_script_path()
|
||||
tmp = script_path.with_suffix(".tmp")
|
||||
tmp.write_text(content, encoding="utf-8", newline="")
|
||||
tmp.replace(script_path)
|
||||
script_path.write_text(content, encoding="utf-8", newline="")
|
||||
return script_path
|
||||
|
||||
|
||||
@@ -438,9 +436,7 @@ def _install_startup_entry(script_path: Path) -> Path:
|
||||
"""Write the Startup-folder fallback launcher. Returns its path."""
|
||||
entry = get_startup_entry_path()
|
||||
entry.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = entry.with_suffix(".tmp")
|
||||
tmp.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
|
||||
tmp.replace(entry)
|
||||
entry.write_text(_build_startup_launcher(script_path), encoding="utf-8", newline="")
|
||||
return entry
|
||||
|
||||
|
||||
|
||||
@@ -550,39 +550,6 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
|
||||
p_unblock = sub.add_parser("unblock", help="Return one or more blocked/scheduled tasks to ready")
|
||||
p_unblock.add_argument("task_ids", nargs="+")
|
||||
|
||||
p_promote = sub.add_parser(
|
||||
"promote",
|
||||
help="Manually move one or more todo/blocked tasks to ready (recovery path)",
|
||||
)
|
||||
p_promote.add_argument("task_id")
|
||||
p_promote.add_argument(
|
||||
"reason",
|
||||
nargs="*",
|
||||
help="Audit-trail reason (recorded on the task_events row)",
|
||||
)
|
||||
p_promote.add_argument(
|
||||
"--ids",
|
||||
nargs="+",
|
||||
default=None,
|
||||
help="Additional task ids to promote with the same reason (bulk mode)",
|
||||
)
|
||||
p_promote.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Promote even if parent dependencies are not yet done/archived",
|
||||
)
|
||||
p_promote.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Validate the promotion without mutating state",
|
||||
)
|
||||
p_promote.add_argument(
|
||||
"--json",
|
||||
dest="json",
|
||||
action="store_true",
|
||||
help="Emit machine-readable JSON result",
|
||||
)
|
||||
|
||||
p_archive = sub.add_parser("archive", help="Archive one or more tasks")
|
||||
p_archive.add_argument("task_ids", nargs="*",
|
||||
help="Task ids to archive (default mode)")
|
||||
@@ -932,7 +899,6 @@ def kanban_command(args: argparse.Namespace) -> int:
|
||||
"block": _cmd_block,
|
||||
"schedule": _cmd_schedule,
|
||||
"unblock": _cmd_unblock,
|
||||
"promote": _cmd_promote,
|
||||
"archive": _cmd_archive,
|
||||
"tail": _cmd_tail,
|
||||
"dispatch": _cmd_dispatch,
|
||||
@@ -1989,57 +1955,6 @@ def _cmd_unblock(args: argparse.Namespace) -> int:
|
||||
return 0 if not failed else 1
|
||||
|
||||
|
||||
def _cmd_promote(args: argparse.Namespace) -> int:
|
||||
reason = " ".join(args.reason).strip() if args.reason else None
|
||||
author = _profile_author()
|
||||
as_json = getattr(args, "json", False)
|
||||
extra_ids = list(getattr(args, "ids", None) or [])
|
||||
# Dedupe while preserving order; positional task_id always first.
|
||||
ids: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for tid in [args.task_id, *extra_ids]:
|
||||
if tid not in seen:
|
||||
ids.append(tid)
|
||||
seen.add(tid)
|
||||
|
||||
results: list[dict[str, object]] = []
|
||||
with kb.connect() as conn:
|
||||
for tid in ids:
|
||||
ok, err = kb.promote_task(
|
||||
conn,
|
||||
tid,
|
||||
actor=author,
|
||||
reason=reason,
|
||||
force=bool(args.force),
|
||||
dry_run=bool(args.dry_run),
|
||||
)
|
||||
results.append({
|
||||
"task_id": tid,
|
||||
"promoted": ok,
|
||||
"dry_run": bool(args.dry_run),
|
||||
"forced": bool(args.force),
|
||||
"reason": reason,
|
||||
"error": err,
|
||||
})
|
||||
|
||||
failed = [r for r in results if not r["promoted"]]
|
||||
if as_json:
|
||||
# Single-id stays a flat object for back-compat; bulk emits a list.
|
||||
payload: object = results[0] if len(results) == 1 else results
|
||||
print(json.dumps(payload, indent=2, ensure_ascii=False))
|
||||
return 0 if not failed else 1
|
||||
|
||||
tag = " (dry)" if args.dry_run else ""
|
||||
label = "Would promote" if args.dry_run else "Promoted"
|
||||
for r in results:
|
||||
if r["promoted"]:
|
||||
suffix = f": {reason}" if reason else ""
|
||||
print(f"{label} {r['task_id']} -> ready{tag}{suffix}")
|
||||
else:
|
||||
print(f"cannot promote {r['task_id']}: {r['error']}", file=sys.stderr)
|
||||
return 0 if not failed else 1
|
||||
|
||||
|
||||
def _cmd_archive(args: argparse.Namespace) -> int:
|
||||
ids = list(args.task_ids or [])
|
||||
purge_ids = list(getattr(args, "purge_ids", None) or [])
|
||||
|
||||
+4
-392
@@ -75,7 +75,6 @@ import json
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import shutil
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -83,7 +82,6 @@ import threading
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
@@ -1007,131 +1005,6 @@ def _validate_sqlite_header(path: Path) -> None:
|
||||
)
|
||||
|
||||
|
||||
class KanbanDbCorruptError(RuntimeError):
|
||||
"""Raised when an existing kanban DB file fails integrity checks.
|
||||
|
||||
Fail-closed guard against silent recreation of a corrupt board file,
|
||||
which would otherwise destroy the user's tasks. Carries both the
|
||||
original path and the timestamped backup we made before refusing.
|
||||
"""
|
||||
|
||||
def __init__(self, db_path: Path, backup_path: Optional[Path], reason: str):
|
||||
self.db_path = db_path
|
||||
self.backup_path = backup_path
|
||||
self.reason = reason
|
||||
backup_str = str(backup_path) if backup_path is not None else "<backup failed>"
|
||||
super().__init__(
|
||||
f"Refusing to open corrupt kanban DB at {db_path}: {reason}. "
|
||||
f"Original preserved; backup at {backup_str}."
|
||||
)
|
||||
|
||||
|
||||
def _backup_corrupt_db(path: Path) -> Optional[Path]:
|
||||
"""Copy a corrupt DB (and its WAL/SHM sidecars) to a timestamped backup.
|
||||
|
||||
Returns the backup path of the main DB file, or ``None`` if the copy
|
||||
itself failed (the caller still raises loudly in that case).
|
||||
|
||||
Writes are confined to the original DB's parent directory. The
|
||||
backup basename is derived purely from ``path.name``, never from
|
||||
caller-supplied directory segments — no traversal is possible.
|
||||
"""
|
||||
# Resolve once and pin the parent so subsequent path operations cannot
|
||||
# escape it. ``Path.resolve()`` collapses any ``..`` segments and
|
||||
# symlinks, and we only ever write inside ``parent``.
|
||||
resolved = path.resolve()
|
||||
parent = resolved.parent
|
||||
base_name = resolved.name # basename only
|
||||
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
candidate = parent / f"{base_name}.corrupt.{stamp}.bak"
|
||||
# Defensive: candidate must still be inside parent after construction.
|
||||
# f-string interpolation of ``base_name`` cannot escape ``parent``
|
||||
# because ``base_name`` is itself a resolved basename, but assert it
|
||||
# anyway so static analyzers can see the containment guarantee.
|
||||
if candidate.parent != parent:
|
||||
return None
|
||||
counter = 0
|
||||
while candidate.exists():
|
||||
counter += 1
|
||||
candidate = parent / f"{base_name}.corrupt.{stamp}.{counter}.bak"
|
||||
if candidate.parent != parent:
|
||||
return None
|
||||
try:
|
||||
shutil.copy2(resolved, candidate)
|
||||
except OSError:
|
||||
return None
|
||||
for suffix in ("-wal", "-shm"):
|
||||
sidecar = parent / (base_name + suffix)
|
||||
if sidecar.parent != parent or not sidecar.exists():
|
||||
continue
|
||||
try:
|
||||
sidecar_backup = parent / (candidate.name + suffix)
|
||||
if sidecar_backup.parent != parent:
|
||||
continue
|
||||
shutil.copy2(sidecar, sidecar_backup)
|
||||
except OSError:
|
||||
pass
|
||||
return candidate
|
||||
|
||||
|
||||
def _guard_existing_db_is_healthy(path: Path) -> None:
|
||||
"""Run ``PRAGMA integrity_check`` on an existing non-empty DB file.
|
||||
|
||||
Opens the probe in read/write mode so SQLite can recover or
|
||||
checkpoint a healthy WAL/hot-journal DB before we declare it
|
||||
corrupt. If the file is malformed, copy it (and any WAL/SHM
|
||||
sidecars) to a timestamped backup and raise
|
||||
:class:`KanbanDbCorruptError` so callers cannot silently recreate
|
||||
the schema on top of a damaged DB.
|
||||
|
||||
Transient lock/busy errors (``sqlite3.OperationalError``) are NOT
|
||||
treated as corruption; they propagate raw so the caller sees a
|
||||
normal lock failure and no spurious ``.corrupt`` backup is made.
|
||||
|
||||
No-op for missing files, zero-byte files (treated as fresh), and
|
||||
paths already proven healthy this process (cache hit).
|
||||
|
||||
Path-trust note: ``path`` arrives via :func:`connect`, which itself
|
||||
resolves it from an explicit ``db_path`` argument, the
|
||||
:func:`kanban_db_path` env-var chain, or the kanban-home default —
|
||||
all sources Hermes treats as user-controlled-but-trusted on the
|
||||
user's own machine. We additionally resolve the path here and
|
||||
confine all filesystem writes to its parent directory so any
|
||||
accidental ``..`` segments are collapsed before any I/O happens.
|
||||
"""
|
||||
# Resolve before any I/O. ``Path.resolve()`` normalizes ``..`` and
|
||||
# symlinks, giving us a canonical path whose parent dir we can pin.
|
||||
try:
|
||||
resolved = path.resolve()
|
||||
except OSError:
|
||||
return
|
||||
try:
|
||||
if not resolved.exists() or resolved.stat().st_size == 0:
|
||||
return
|
||||
except OSError:
|
||||
return
|
||||
if str(resolved) in _INITIALIZED_PATHS:
|
||||
return
|
||||
reason: Optional[str] = None
|
||||
try:
|
||||
probe = sqlite3.connect(str(resolved), timeout=5, isolation_level=None)
|
||||
try:
|
||||
row = probe.execute("PRAGMA integrity_check").fetchone()
|
||||
finally:
|
||||
probe.close()
|
||||
if not row or (row[0] or "").lower() != "ok":
|
||||
reason = f"integrity_check returned {row[0] if row else '<no row>'!r}"
|
||||
except sqlite3.OperationalError:
|
||||
# Lock contention, busy, transient IO — not corruption. Let it propagate.
|
||||
raise
|
||||
except sqlite3.DatabaseError as exc:
|
||||
reason = f"sqlite refused to open file: {exc}"
|
||||
if reason is None:
|
||||
return
|
||||
backup = _backup_corrupt_db(resolved)
|
||||
raise KanbanDbCorruptError(resolved, backup, reason)
|
||||
|
||||
|
||||
def connect(
|
||||
db_path: Optional[Path] = None,
|
||||
*,
|
||||
@@ -1160,13 +1033,7 @@ def connect(
|
||||
else:
|
||||
path = kanban_db_path(board=board)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Cheap byte-level check first — catches the #29507 TLS-overwrite shape
|
||||
# and other invalid-header cases without opening a sqlite connection.
|
||||
_validate_sqlite_header(path)
|
||||
# Full integrity probe — catches corruption past the header (malformed
|
||||
# pages, broken internal metadata). Cached per-path after first success
|
||||
# via _INITIALIZED_PATHS so it only runs once per process per path.
|
||||
_guard_existing_db_is_healthy(path)
|
||||
resolved = str(path.resolve())
|
||||
conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
|
||||
try:
|
||||
@@ -1651,15 +1518,8 @@ def create_task(
|
||||
now = int(time.time())
|
||||
|
||||
# Resolve workspace_path from board-level default_workdir when the
|
||||
# caller did not specify one explicitly. Board defaults represent
|
||||
# persistent project checkouts, so only persistent workspace kinds may
|
||||
# inherit them. Scratch workspaces are auto-deleted on completion and
|
||||
# must stay under the per-board scratch root created by
|
||||
# ``resolve_workspace``; inheriting ``default_workdir`` for a scratch
|
||||
# task would point cleanup at the user's source tree (#28818). The
|
||||
# containment guard in ``_cleanup_workspace`` is the safety rail, but
|
||||
# we also stop the bad state from being created in the first place.
|
||||
if workspace_path is None and workspace_kind in {"dir", "worktree"}:
|
||||
# caller did not specify one explicitly.
|
||||
if workspace_path is None:
|
||||
board_slug = board if board else get_current_board()
|
||||
board_meta = read_board_metadata(board_slug)
|
||||
board_default = board_meta.get("default_workdir")
|
||||
@@ -3044,81 +2904,6 @@ def complete_task(
|
||||
# Workspace / tmux cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _is_managed_scratch_path(p: Path) -> bool:
|
||||
"""Return True iff *p* is a strict descendant of a kanban-managed scratch root.
|
||||
|
||||
A managed root is exclusively a ``workspaces/`` directory — never the
|
||||
broader kanban home, a board root, or sibling subtrees like ``logs/`` or
|
||||
``boards/<slug>/`` itself. Allowed roots:
|
||||
|
||||
* ``HERMES_KANBAN_WORKSPACES_ROOT`` when set (worker-side override
|
||||
injected by the dispatcher).
|
||||
* ``<kanban_home>/kanban/workspaces`` — legacy default-board scratch root.
|
||||
* ``<kanban_home>/kanban/boards/<slug>/workspaces`` for each board slug
|
||||
that currently exists on disk.
|
||||
|
||||
The check requires strict descendancy: a path equal to one of these
|
||||
roots is NOT managed (deleting the workspaces root would wipe every
|
||||
task's scratch dir at once), and a path that resolves to ``<kanban_home>
|
||||
/kanban`` itself, ``<kanban_home>/kanban/logs``, or
|
||||
``<kanban_home>/kanban/boards/<slug>`` is rejected because those
|
||||
subtrees hold Hermes' own DB, metadata, and logs, not task workspaces.
|
||||
|
||||
Used by :func:`_cleanup_workspace` to refuse to ``shutil.rmtree`` paths
|
||||
outside Hermes-managed storage. A board ``default_workdir`` pointing at a
|
||||
real source tree can otherwise pair with ``workspace_kind='scratch'`` and
|
||||
cause task completion to delete user data (#28818).
|
||||
"""
|
||||
try:
|
||||
p_abs = p.resolve(strict=False)
|
||||
except OSError:
|
||||
return False
|
||||
roots: list[Path] = []
|
||||
override = os.environ.get("HERMES_KANBAN_WORKSPACES_ROOT", "").strip()
|
||||
if override:
|
||||
try:
|
||||
roots.append(Path(override).expanduser().resolve(strict=False))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
home = kanban_home()
|
||||
except OSError:
|
||||
home = None
|
||||
if home is not None:
|
||||
try:
|
||||
roots.append((home / "kanban" / "workspaces").resolve(strict=False))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
boards_parent = (home / "kanban" / "boards").resolve(strict=False)
|
||||
except OSError:
|
||||
boards_parent = None
|
||||
if boards_parent is not None:
|
||||
try:
|
||||
entries = list(boards_parent.iterdir())
|
||||
except OSError:
|
||||
entries = []
|
||||
for entry in entries:
|
||||
try:
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
except OSError:
|
||||
continue
|
||||
try:
|
||||
roots.append((entry / "workspaces").resolve(strict=False))
|
||||
except OSError:
|
||||
continue
|
||||
for root in roots:
|
||||
if p_abs == root:
|
||||
continue
|
||||
try:
|
||||
if p_abs.is_relative_to(root):
|
||||
return True
|
||||
except ValueError:
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
|
||||
"""Remove a task's scratch workspace dir and kill its stale tmux session.
|
||||
|
||||
@@ -3141,21 +2926,8 @@ def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None:
|
||||
import shutil
|
||||
wp = Path(path)
|
||||
if wp.is_dir():
|
||||
# Containment guard (#28818): a board's ``default_workdir`` can
|
||||
# pair ``workspace_kind='scratch'`` with a user-supplied path
|
||||
# pointing at a real source tree. Without this check, task
|
||||
# completion would unconditionally ``shutil.rmtree`` that path
|
||||
# and silently delete the user's source data.
|
||||
if _is_managed_scratch_path(wp):
|
||||
shutil.rmtree(wp, ignore_errors=True)
|
||||
_log.debug("Removed scratch workspace: %s", wp)
|
||||
else:
|
||||
_log.warning(
|
||||
"Refusing to remove out-of-scratch workspace for task %s: %s "
|
||||
"(workspace_kind='scratch' but path is outside any "
|
||||
"kanban-managed workspaces root)",
|
||||
task_id, wp,
|
||||
)
|
||||
shutil.rmtree(wp, ignore_errors=True)
|
||||
_log.debug("Removed scratch workspace: %s", wp)
|
||||
# Also kill the tmux session for the worker that owned this task,
|
||||
# if the tmux session is now dead (worker process exited).
|
||||
_cleanup_worker_tmux(conn, task_id)
|
||||
@@ -3189,93 +2961,6 @@ def _cleanup_worker_tmux(conn: sqlite3.Connection, task_id: str) -> None:
|
||||
pass # best-effort — never block completion
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# First-use tip for scratch workspaces
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Scratch workspaces are intentionally ephemeral — ``_cleanup_workspace``
|
||||
# removes them as soon as ``complete_task`` runs. New users often don't
|
||||
# realize that and lose worker output (community report, May 2026). The
|
||||
# behavior is right; the lack of warning is the bug.
|
||||
#
|
||||
# On the FIRST scratch workspace materialization across the whole install
|
||||
# we:
|
||||
# 1. Log a warning line on the dispatcher logger.
|
||||
# 2. Append a ``tip_scratch_workspace`` event on the task so it's visible
|
||||
# via ``hermes kanban show <id>`` and the dashboard.
|
||||
# 3. Touch a sentinel file under ``kanban_home() / '.scratch_tip_shown'``
|
||||
# so we don't repeat the tip — once you know, you know.
|
||||
#
|
||||
# Scope is per-install, not per-board: a user creating a second board
|
||||
# already learned the lesson on board #1.
|
||||
|
||||
_SCRATCH_TIP_SENTINEL_NAME = ".scratch_tip_shown"
|
||||
|
||||
_SCRATCH_TIP_MESSAGE = (
|
||||
"scratch workspaces are ephemeral — they're deleted when the task "
|
||||
"completes. Use --workspace worktree: (git worktree) or "
|
||||
"--workspace dir:/abs/path (existing dir) to preserve worker output."
|
||||
)
|
||||
|
||||
|
||||
def _scratch_tip_sentinel_path() -> Path:
|
||||
"""Path to the per-install scratch-workspace-tip sentinel file."""
|
||||
return kanban_home() / _SCRATCH_TIP_SENTINEL_NAME
|
||||
|
||||
|
||||
def _scratch_tip_shown() -> bool:
|
||||
"""True iff the scratch-workspace tip has already been emitted on this
|
||||
install. Best-effort — any error means we re-emit, which is the safer
|
||||
failure mode for a help message."""
|
||||
try:
|
||||
return _scratch_tip_sentinel_path().exists()
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def _mark_scratch_tip_shown() -> None:
|
||||
"""Touch the sentinel so future scratch workspaces stay silent.
|
||||
|
||||
Best-effort: a failure here just means the tip might appear once more,
|
||||
which is preferable to crashing dispatch over a help message.
|
||||
"""
|
||||
try:
|
||||
path = _scratch_tip_sentinel_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.touch(exist_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _maybe_emit_scratch_tip(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
workspace_kind: Optional[str],
|
||||
) -> None:
|
||||
"""Emit the first-use scratch-workspace tip exactly once per install.
|
||||
|
||||
Called from the dispatcher right after a scratch workspace is
|
||||
materialized. No-op for ``worktree`` / ``dir`` workspaces (they're
|
||||
preserved by design) and no-op after the sentinel exists.
|
||||
"""
|
||||
if (workspace_kind or "scratch") != "scratch":
|
||||
return
|
||||
if _scratch_tip_shown():
|
||||
return
|
||||
try:
|
||||
_log.warning("kanban: %s (task %s)", _SCRATCH_TIP_MESSAGE, task_id)
|
||||
with write_txn(conn):
|
||||
_append_event(
|
||||
conn, task_id, "tip_scratch_workspace",
|
||||
{"message": _SCRATCH_TIP_MESSAGE},
|
||||
)
|
||||
except Exception:
|
||||
# Best-effort — never block the spawn loop over a help message.
|
||||
pass
|
||||
finally:
|
||||
_mark_scratch_tip_shown()
|
||||
|
||||
|
||||
def edit_completed_task_result(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
@@ -3398,77 +3083,6 @@ def block_task(
|
||||
return True
|
||||
|
||||
|
||||
|
||||
def promote_task(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
*,
|
||||
actor: str,
|
||||
reason: Optional[str] = None,
|
||||
force: bool = False,
|
||||
dry_run: bool = False,
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Manually promote a `todo` or `blocked` task to `ready`.
|
||||
|
||||
Mirrors the automatic promotion done by ``recompute_ready`` but
|
||||
drives it from a deliberate operator action with an audit-trail
|
||||
entry. Refuses to promote if any parent dep is not in a terminal
|
||||
state (`done`/`archived`) unless ``force=True``. Does NOT change
|
||||
assignee or claim state. Returns ``(True, None)`` on success and
|
||||
``(False, reason)`` if refused. ``dry_run=True`` validates the
|
||||
promotion would succeed without mutating state.
|
||||
"""
|
||||
row = conn.execute(
|
||||
"SELECT status FROM tasks WHERE id = ?", (task_id,)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return False, f"task {task_id} not found"
|
||||
|
||||
cur_status = row["status"]
|
||||
if cur_status not in ("todo", "blocked"):
|
||||
return False, (
|
||||
f"task {task_id} is {cur_status!r}; promote only applies to "
|
||||
f"'todo' or 'blocked'"
|
||||
)
|
||||
|
||||
if not force:
|
||||
parents = conn.execute(
|
||||
"SELECT t.id, t.status FROM tasks t "
|
||||
"JOIN task_links l ON l.parent_id = t.id "
|
||||
"WHERE l.child_id = ?",
|
||||
(task_id,),
|
||||
).fetchall()
|
||||
unsatisfied = [
|
||||
p["id"] for p in parents
|
||||
if p["status"] not in ("done", "archived")
|
||||
]
|
||||
if unsatisfied:
|
||||
return False, (
|
||||
f"unsatisfied parent dependencies: "
|
||||
f"{', '.join(unsatisfied)} (use --force to override)"
|
||||
)
|
||||
|
||||
if dry_run:
|
||||
return True, None
|
||||
|
||||
with write_txn(conn):
|
||||
upd = conn.execute(
|
||||
"UPDATE tasks SET status = 'ready' "
|
||||
"WHERE id = ? AND status IN ('todo', 'blocked')",
|
||||
(task_id,),
|
||||
)
|
||||
if upd.rowcount != 1:
|
||||
return False, f"task {task_id} status changed during promotion"
|
||||
_append_event(
|
||||
conn,
|
||||
task_id,
|
||||
"promoted_manual",
|
||||
{"actor": actor, "reason": reason, "forced": force},
|
||||
)
|
||||
|
||||
return True, None
|
||||
|
||||
|
||||
def unblock_task(conn: sqlite3.Connection, task_id: str) -> bool:
|
||||
"""Transition ``blocked``/``scheduled`` -> ready or todo.
|
||||
|
||||
@@ -5278,7 +4892,6 @@ def dispatch_once(
|
||||
continue
|
||||
# Persist the resolved workspace path so the worker can cd there.
|
||||
set_workspace_path(conn, claimed.id, str(workspace))
|
||||
_maybe_emit_scratch_tip(conn, claimed.id, claimed.workspace_kind)
|
||||
_spawn = spawn_fn if spawn_fn is not None else _default_spawn
|
||||
try:
|
||||
# Back-compat: older spawn_fn signatures accept only
|
||||
@@ -5357,7 +4970,6 @@ def dispatch_once(
|
||||
continue
|
||||
# Persist the resolved workspace path so the worker can cd there.
|
||||
set_workspace_path(conn, claimed.id, str(workspace))
|
||||
_maybe_emit_scratch_tip(conn, claimed.id, claimed.workspace_kind)
|
||||
# Force-load sdlc-review skill for review agents. The
|
||||
# _default_spawn function already auto-loads kanban-worker, and
|
||||
# appends task.skills via --skills. Setting task.skills here
|
||||
|
||||
+104
-588
@@ -61,76 +61,12 @@ try:
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def _is_termux_startup_environment_fast() -> bool:
|
||||
"""Tiny Termux check for pre-import startup shortcuts."""
|
||||
prefix = os.environ.get("PREFIX", "")
|
||||
return bool(
|
||||
os.environ.get("TERMUX_VERSION")
|
||||
or "com.termux/files/usr" in prefix
|
||||
or prefix.startswith("/data/data/com.termux/")
|
||||
)
|
||||
|
||||
|
||||
def _is_termux_fast_version_argv(argv: list[str]) -> bool:
|
||||
return argv in (["--version"], ["-V"], ["version"])
|
||||
|
||||
|
||||
def _read_openai_version_fast() -> str | None:
|
||||
"""Read OpenAI SDK version without importing ``importlib.metadata``."""
|
||||
for base in sys.path:
|
||||
if not base:
|
||||
base = os.getcwd()
|
||||
version_file = os.path.join(base, "openai", "_version.py")
|
||||
try:
|
||||
with open(version_file, encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
stripped = line.strip()
|
||||
if not stripped.startswith("__version__"):
|
||||
continue
|
||||
_key, _sep, value = stripped.partition("=")
|
||||
value = value.split("#", 1)[0].strip().strip("\"'")
|
||||
return value or None
|
||||
except OSError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _print_fast_version_info() -> None:
|
||||
from hermes_cli import __release_date__, __version__
|
||||
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
|
||||
print(f"Hermes Agent v{__version__} ({__release_date__})")
|
||||
print(f"Project: {project_root}")
|
||||
print(f"Python: {sys.version.split()[0]}")
|
||||
|
||||
openai_version = _read_openai_version_fast()
|
||||
print(f"OpenAI SDK: {openai_version}" if openai_version else "OpenAI SDK: Not installed")
|
||||
|
||||
|
||||
def _try_termux_ultrafast_version() -> bool:
|
||||
"""Handle ``hermes --version`` before config/logging imports on Termux."""
|
||||
if os.environ.get("HERMES_TERMUX_DISABLE_FAST_CLI") == "1":
|
||||
return False
|
||||
if not _is_termux_startup_environment_fast():
|
||||
return False
|
||||
if not _is_termux_fast_version_argv(sys.argv[1:]):
|
||||
return False
|
||||
|
||||
_print_fast_version_info()
|
||||
return True
|
||||
|
||||
|
||||
if _try_termux_ultrafast_version():
|
||||
raise SystemExit(0)
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@@ -339,133 +275,6 @@ def _is_termux_startup_environment(env: dict[str, str] | None = None) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def _read_packed_ref(common_dir: Path, ref: str) -> str | None:
|
||||
"""Look up a ref in .git/packed-refs without spawning git.
|
||||
|
||||
packed-refs lines look like ``<sha> <ref>`` with optional ``^<sha>``
|
||||
peel lines and ``#``-prefixed comments / ``# pack-refs with:`` header.
|
||||
"""
|
||||
try:
|
||||
text = (common_dir / "packed-refs").read_text(encoding="utf-8", errors="replace")
|
||||
except OSError:
|
||||
return None
|
||||
for line in text.splitlines():
|
||||
if not line or line.startswith("#") or line.startswith("^"):
|
||||
continue
|
||||
parts = line.split(" ", 1)
|
||||
if len(parts) == 2 and parts[1].strip() == ref:
|
||||
return parts[0].strip()
|
||||
return None
|
||||
|
||||
|
||||
def _read_git_revision_fingerprint(repo_root: Path) -> str | None:
|
||||
"""Return a cheap checkout fingerprint without spawning git."""
|
||||
git_dir = repo_root / ".git"
|
||||
try:
|
||||
if git_dir.is_file():
|
||||
for line in git_dir.read_text(encoding="utf-8", errors="replace").splitlines():
|
||||
key, _, value = line.partition(":")
|
||||
if key.strip() == "gitdir" and value.strip():
|
||||
git_dir = (repo_root / value.strip()).resolve()
|
||||
break
|
||||
# Worktrees point HEAD at a per-worktree gitdir but pack their refs
|
||||
# in the main repo's gitdir (referenced via ``commondir``). Resolve
|
||||
# that up front so packed-refs lookups hit the right file.
|
||||
common_dir = git_dir
|
||||
commondir_file = git_dir / "commondir"
|
||||
if commondir_file.exists():
|
||||
try:
|
||||
rel = commondir_file.read_text(encoding="utf-8", errors="replace").strip()
|
||||
if rel:
|
||||
common_dir = (git_dir / rel).resolve()
|
||||
except OSError:
|
||||
pass
|
||||
head_file = git_dir / "HEAD"
|
||||
head = head_file.read_text(encoding="utf-8", errors="replace").strip()
|
||||
if head.startswith("ref:"):
|
||||
ref = head.split(":", 1)[1].strip()
|
||||
# Loose refs may live in the worktree gitdir OR the common dir
|
||||
# (branches created via `git worktree add` typically live in the
|
||||
# common dir's refs/heads/).
|
||||
for candidate in (git_dir, common_dir):
|
||||
ref_file = candidate / ref
|
||||
if ref_file.exists():
|
||||
return f"git:{ref}:{ref_file.read_text(encoding='utf-8', errors='replace').strip()}"
|
||||
packed_sha = _read_packed_ref(common_dir, ref)
|
||||
if packed_sha:
|
||||
return f"git:{ref}:{packed_sha}"
|
||||
# Ref name is known but unresolved — still stable across launches,
|
||||
# and the version/release fallback in the caller will invalidate
|
||||
# after `hermes update`.
|
||||
return f"git:{ref}:unresolved"
|
||||
return f"git:HEAD:{head}"
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def _termux_bundled_skills_fingerprint() -> str:
|
||||
"""Cheap invalidation key for Termux bundled-skill startup sync."""
|
||||
git_fp = _read_git_revision_fingerprint(PROJECT_ROOT)
|
||||
if git_fp:
|
||||
return git_fp
|
||||
skills_dir = PROJECT_ROOT / "skills"
|
||||
try:
|
||||
stat = skills_dir.stat()
|
||||
return f"skills:{__version__}:{__release_date__}:{stat.st_mtime_ns}:{stat.st_size}"
|
||||
except OSError:
|
||||
return f"skills:{__version__}:{__release_date__}:missing"
|
||||
|
||||
|
||||
def _termux_bundled_skills_stamp_path() -> Path:
|
||||
return get_hermes_home() / "skills" / ".termux_bundled_sync_stamp"
|
||||
|
||||
|
||||
def _termux_bundled_skills_sync_needed() -> bool:
|
||||
if not _is_termux_startup_environment():
|
||||
return True
|
||||
if os.environ.get("HERMES_TERMUX_FORCE_SKILLS_SYNC") == "1":
|
||||
return True
|
||||
try:
|
||||
stamp = _termux_bundled_skills_stamp_path()
|
||||
return stamp.read_text(encoding="utf-8").strip() != _termux_bundled_skills_fingerprint()
|
||||
except OSError:
|
||||
return True
|
||||
|
||||
|
||||
def _mark_termux_bundled_skills_synced() -> None:
|
||||
if not _is_termux_startup_environment():
|
||||
return
|
||||
try:
|
||||
stamp = _termux_bundled_skills_stamp_path()
|
||||
stamp.parent.mkdir(parents=True, exist_ok=True)
|
||||
stamp.write_text(_termux_bundled_skills_fingerprint() + "\n", encoding="utf-8")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _sync_bundled_skills_for_startup() -> bool:
|
||||
"""Sync bundled skills, but skip unchanged Termux checkouts cheaply.
|
||||
|
||||
Hashing every bundled skill is safe but expensive on older Android
|
||||
storage. The git/ref stamp keeps post-update correctness: a changed
|
||||
checkout revision forces one real sync, then later starts skip it.
|
||||
"""
|
||||
if _is_termux_startup_environment() and not _termux_bundled_skills_sync_needed():
|
||||
return False
|
||||
|
||||
from tools.skills_sync import sync_skills
|
||||
|
||||
sync_skills(quiet=True)
|
||||
_mark_termux_bundled_skills_synced()
|
||||
return True
|
||||
|
||||
|
||||
def _termux_should_prefetch_update_check() -> bool:
|
||||
if not _is_termux_startup_environment():
|
||||
return True
|
||||
return os.environ.get("HERMES_TERMUX_PREFETCH_UPDATES") == "1"
|
||||
|
||||
|
||||
def _relative_time(ts) -> str:
|
||||
"""Format a timestamp as relative time (e.g., '2h ago', 'yesterday')."""
|
||||
if not ts:
|
||||
@@ -655,7 +464,7 @@ def _session_browse_picker(sessions: list) -> Optional[str]:
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1) # selected
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1) # header
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1) # search
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim
|
||||
curses.init_pair(4, 8, -1) # dim
|
||||
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
@@ -1337,13 +1146,13 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
p = Path(ext_dir)
|
||||
if (p / "dist" / "entry.js").is_file():
|
||||
node = _node_bin("node")
|
||||
return [node, "--expose-gc", str(p / "dist" / "entry.js")], p
|
||||
return [node, str(p / "dist" / "entry.js")], p
|
||||
|
||||
# 1b. Bundled in wheel (pip install)
|
||||
bundled = _find_bundled_tui()
|
||||
if bundled is not None:
|
||||
node = _node_bin("node")
|
||||
return [node, "--expose-gc", str(bundled)], bundled.parent
|
||||
return [node, str(bundled)], bundled.parent
|
||||
|
||||
# 2. Normal flow: npm install if needed, always esbuild, then node dist/entry.js.
|
||||
# --dev flow: npm install if needed, then tsx src/entry.tsx.
|
||||
@@ -1420,7 +1229,7 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
sys.exit(1)
|
||||
|
||||
node = _node_bin("node")
|
||||
return [node, "--expose-gc", str(tui_dir / "dist" / "entry.js")], tui_dir
|
||||
return [node, str(tui_dir / "dist" / "entry.js")], tui_dir
|
||||
|
||||
|
||||
def _normalize_tui_toolsets(toolsets: object) -> list[str]:
|
||||
@@ -1454,7 +1263,7 @@ def _launch_tui(
|
||||
provider: Optional[str] = None,
|
||||
toolsets: object = None,
|
||||
skills: object = None,
|
||||
verbose: Optional[bool] = None,
|
||||
verbose: bool = False,
|
||||
quiet: bool = False,
|
||||
query: Optional[str] = None,
|
||||
image: Optional[str] = None,
|
||||
@@ -1542,16 +1351,16 @@ def _launch_tui(
|
||||
env["HERMES_TUI_TOOL_PROGRESS"] = "off"
|
||||
if accept_hooks:
|
||||
env["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
# Guarantee an 8GB V8 heap for the TUI. Default node cap is ~1.5–4GB
|
||||
# depending on version and can fatal-OOM on long sessions with large
|
||||
# transcripts / reasoning blobs. Token-level merge: respect any
|
||||
# user-supplied --max-old-space-size (they may have set it higher).
|
||||
# --expose-gc is *not* added here: Node rejects it in NODE_OPTIONS
|
||||
# ("--expose-gc is not allowed in NODE_OPTIONS") and refuses to start.
|
||||
# It is passed as a direct argv flag in _make_tui_argv() instead.
|
||||
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
|
||||
# ~1.5–4GB depending on version and can fatal-OOM on long sessions with
|
||||
# large transcripts / reasoning blobs. Token-level merge: respect any
|
||||
# user-supplied --max-old-space-size (they may have set it higher) and
|
||||
# avoid duplicating --expose-gc.
|
||||
_tokens = env.get("NODE_OPTIONS", "").split()
|
||||
if not any(t.startswith("--max-old-space-size=") for t in _tokens):
|
||||
_tokens.append("--max-old-space-size=8192")
|
||||
if "--expose-gc" not in _tokens:
|
||||
_tokens.append("--expose-gc")
|
||||
env["NODE_OPTIONS"] = " ".join(_tokens)
|
||||
# HERMES_TUI_RESUME is an internal hand-off from the Python wrapper to the
|
||||
# Ink app. Because we start from os.environ.copy(), an exported/stale value
|
||||
@@ -1714,20 +1523,19 @@ def cmd_chat(args):
|
||||
print("You can run 'hermes setup' at any time to configure.")
|
||||
sys.exit(1)
|
||||
|
||||
# Start update check in background (runs while other init happens).
|
||||
# On Termux this imports rich/prompt_toolkit in the foreground and then
|
||||
# competes for CPU on single-core devices, so keep it opt-in there.
|
||||
if _termux_should_prefetch_update_check():
|
||||
try:
|
||||
from hermes_cli.banner import prefetch_update_check
|
||||
# Start update check in background (runs while other init happens)
|
||||
try:
|
||||
from hermes_cli.banner import prefetch_update_check
|
||||
|
||||
prefetch_update_check()
|
||||
except Exception:
|
||||
pass
|
||||
prefetch_update_check()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Sync bundled skills on every CLI launch (fast -- skips unchanged skills)
|
||||
try:
|
||||
_sync_bundled_skills_for_startup()
|
||||
from tools.skills_sync import sync_skills
|
||||
|
||||
sync_skills(quiet=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -1763,7 +1571,7 @@ def cmd_chat(args):
|
||||
provider=getattr(args, "provider", None),
|
||||
toolsets=getattr(args, "toolsets", None),
|
||||
skills=getattr(args, "skills", None),
|
||||
verbose=getattr(args, "verbose", None),
|
||||
verbose=getattr(args, "verbose", False),
|
||||
quiet=getattr(args, "quiet", False),
|
||||
query=getattr(args, "query", None),
|
||||
image=getattr(args, "image", None),
|
||||
@@ -1783,7 +1591,7 @@ def cmd_chat(args):
|
||||
"provider": getattr(args, "provider", None),
|
||||
"toolsets": args.toolsets,
|
||||
"skills": getattr(args, "skills", None),
|
||||
"verbose": getattr(args, "verbose", None),
|
||||
"verbose": args.verbose,
|
||||
"quiet": getattr(args, "quiet", False),
|
||||
"query": args.query,
|
||||
"image": getattr(args, "image", None),
|
||||
@@ -1794,7 +1602,6 @@ def cmd_chat(args):
|
||||
"max_turns": getattr(args, "max_turns", None),
|
||||
"ignore_rules": getattr(args, "ignore_rules", False),
|
||||
"ignore_user_config": getattr(args, "ignore_user_config", False),
|
||||
"compact": getattr(args, "compact", False),
|
||||
}
|
||||
# Filter out None values
|
||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
||||
@@ -2498,34 +2305,10 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("title_generation", "Title generation", "session titles"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
("triage_specifier", "Triage specifier", "kanban spec fleshing"),
|
||||
("kanban_decomposer", "Kanban decomposer", "task decomposition"),
|
||||
("profile_describer", "Profile describer", "auto profile descriptions"),
|
||||
("curator", "Curator", "skill-usage review pass"),
|
||||
]
|
||||
|
||||
|
||||
def _all_aux_tasks() -> list[tuple[str, str, str]]:
|
||||
"""Return built-in + plugin-registered auxiliary tasks for picker/menu use.
|
||||
|
||||
Built-in tasks come first (preserving order), followed by plugin tasks
|
||||
sorted by key. Used by ``_aux_config_menu``, ``_reset_aux_to_auto``, and
|
||||
display-name lookups so plugin-registered tasks (registered via
|
||||
:meth:`hermes_cli.plugins.PluginContext.register_auxiliary_task`) appear
|
||||
in the same surfaces as built-in ones without core knowing about them.
|
||||
"""
|
||||
tasks = list(_AUX_TASKS)
|
||||
try:
|
||||
from hermes_cli.plugins import get_plugin_auxiliary_tasks
|
||||
for entry in get_plugin_auxiliary_tasks():
|
||||
tasks.append((entry["key"], entry["display_name"], entry["description"]))
|
||||
except Exception:
|
||||
# Plugin discovery failure must not break the aux config UI.
|
||||
# Built-in tasks remain available.
|
||||
pass
|
||||
return tasks
|
||||
|
||||
|
||||
def _format_aux_current(task_cfg: dict) -> str:
|
||||
"""Render the current aux config for display in the task menu."""
|
||||
if not isinstance(task_cfg, dict):
|
||||
@@ -2576,11 +2359,7 @@ def _save_aux_choice(
|
||||
|
||||
|
||||
def _reset_aux_to_auto() -> int:
|
||||
"""Reset every known aux task back to auto/empty. Returns number reset.
|
||||
|
||||
Includes plugin-registered tasks (via ``_all_aux_tasks``) so a plugin
|
||||
that contributed an auxiliary task gets reset alongside built-ins.
|
||||
"""
|
||||
"""Reset every known aux task back to auto/empty. Returns number reset."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
cfg = load_config()
|
||||
@@ -2589,7 +2368,7 @@ def _reset_aux_to_auto() -> int:
|
||||
aux = {}
|
||||
cfg["auxiliary"] = aux
|
||||
count = 0
|
||||
for task, _name, _desc in _all_aux_tasks():
|
||||
for task, _name, _desc in _AUX_TASKS:
|
||||
entry = aux.setdefault(task, {})
|
||||
if not isinstance(entry, dict):
|
||||
entry = {}
|
||||
@@ -2632,11 +2411,10 @@ def _aux_config_menu() -> None:
|
||||
print()
|
||||
|
||||
# Build the task menu with current settings inline
|
||||
all_tasks = _all_aux_tasks()
|
||||
name_col = max(len(name) for _, name, _ in all_tasks) + 2
|
||||
desc_col = max(len(desc) for _, _, desc in all_tasks) + 4
|
||||
name_col = max(len(name) for _, name, _ in _AUX_TASKS) + 2
|
||||
desc_col = max(len(desc) for _, _, desc in _AUX_TASKS) + 4
|
||||
entries: list[tuple[str, str]] = []
|
||||
for task_key, name, desc in all_tasks:
|
||||
for task_key, name, desc in _AUX_TASKS:
|
||||
task_cfg = (
|
||||
aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
|
||||
)
|
||||
@@ -2687,7 +2465,7 @@ def _aux_select_for_task(task: str) -> None:
|
||||
current_model = str(task_cfg.get("model") or "").strip()
|
||||
current_base_url = str(task_cfg.get("base_url") or "").strip()
|
||||
|
||||
display_name = next((name for key, name, _ in _all_aux_tasks() if key == task), task)
|
||||
display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task)
|
||||
|
||||
# Gather authenticated providers (has credentials + curated model list)
|
||||
try:
|
||||
@@ -2758,7 +2536,7 @@ def _aux_flow_provider_model(
|
||||
from hermes_cli.auth import _prompt_model_selection
|
||||
from hermes_cli.models import get_pricing_for_provider
|
||||
|
||||
display_name = next((name for key, name, _ in _all_aux_tasks() if key == task), task)
|
||||
display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task)
|
||||
|
||||
# Fetch live pricing for this provider (non-blocking)
|
||||
pricing: dict = {}
|
||||
@@ -2804,7 +2582,7 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
|
||||
"""Prompt for a direct OpenAI-compatible base_url + optional api_key/model."""
|
||||
import getpass
|
||||
|
||||
display_name = next((name for key, name, _ in _all_aux_tasks() if key == task), task)
|
||||
display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task)
|
||||
current_base_url = str(task_cfg.get("base_url") or "").strip()
|
||||
current_model = str(task_cfg.get("model") or "").strip()
|
||||
|
||||
@@ -4756,9 +4534,7 @@ def _model_flow_copilot(config, current_model=""):
|
||||
source = creds.get("source", "")
|
||||
else:
|
||||
if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
bw_suffix = format_secret_source_suffix(source)
|
||||
print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
|
||||
print(f" GitHub token: {api_key[:8]}... ✓ ({source})")
|
||||
elif source == "gh auth token":
|
||||
print(" GitHub token: ✓ (from `gh auth token`)")
|
||||
else:
|
||||
@@ -5015,10 +4791,7 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
|
||||
return new_key, False
|
||||
|
||||
# Already configured — offer K / R / C ────────────────────────────────
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
|
||||
source_suffix = format_secret_source_suffix(key_env) if key_env else ""
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓{source_suffix}")
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
|
||||
if not key_env:
|
||||
# Nothing we can rewrite; just acknowledge and move on.
|
||||
print()
|
||||
@@ -5301,9 +5074,7 @@ def _model_flow_bedrock_api_key(config, region, current_model=""):
|
||||
# Prompt for API key
|
||||
existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
|
||||
if existing_key:
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
|
||||
print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
|
||||
print(f" Bedrock API Key: {existing_key[:12]}... ✓")
|
||||
else:
|
||||
print(f" Endpoint: {mantle_base_url}")
|
||||
print()
|
||||
@@ -5974,22 +5745,7 @@ def _model_flow_anthropic(config, current_model=""):
|
||||
if has_creds:
|
||||
# Show what we found
|
||||
if existing_key:
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
|
||||
# Surface which env var supplied the key so users with
|
||||
# Bitwarden see "(from Bitwarden)" — without this, a detected
|
||||
# BSM key looks identical to a key in .env and users assume
|
||||
# nothing is wired up.
|
||||
source_suffix = ""
|
||||
for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
|
||||
if os.getenv(var, "").strip() == existing_key:
|
||||
source_suffix = format_secret_source_suffix(var)
|
||||
if source_suffix:
|
||||
break
|
||||
print(
|
||||
f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
|
||||
)
|
||||
print(f" Anthropic credentials: {existing_key[:12]}... ✓")
|
||||
elif cc_available:
|
||||
print(" Claude Code credentials: ✓ (auto-detected)")
|
||||
print()
|
||||
@@ -6123,13 +5879,6 @@ def cmd_webhook(args):
|
||||
webhook_command(args)
|
||||
|
||||
|
||||
def cmd_portal(args):
|
||||
"""Nous Portal status and Tool Gateway routing surface."""
|
||||
from hermes_cli.portal_cli import portal_command
|
||||
|
||||
return portal_command(args)
|
||||
|
||||
|
||||
def cmd_slack(args):
|
||||
"""Slack integration helpers.
|
||||
|
||||
@@ -6182,19 +5931,6 @@ def cmd_doctor(args):
|
||||
run_doctor(args)
|
||||
|
||||
|
||||
def cmd_security(args):
|
||||
"""Dispatch `hermes security <subcmd>`."""
|
||||
sub = getattr(args, "security_command", None)
|
||||
if sub in ("audit", None):
|
||||
from hermes_cli.security_audit import cmd_security_audit
|
||||
|
||||
# Default subcommand is `audit` when no subcmd is given.
|
||||
code = cmd_security_audit(args)
|
||||
sys.exit(int(code or 0))
|
||||
print(f"unknown security subcommand: {sub}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
def cmd_dump(args):
|
||||
"""Dump setup summary for support/debugging."""
|
||||
from hermes_cli.dump import run_dump
|
||||
@@ -6235,7 +5971,8 @@ def cmd_import(args):
|
||||
run_import(args)
|
||||
|
||||
|
||||
def _print_version_info(*, check_updates: bool = True) -> None:
|
||||
def cmd_version(args):
|
||||
"""Show version."""
|
||||
print(f"Hermes Agent v{__version__} ({__release_date__})")
|
||||
print(f"Project: {PROJECT_ROOT}")
|
||||
|
||||
@@ -6255,9 +5992,6 @@ def _print_version_info(*, check_updates: bool = True) -> None:
|
||||
except ImportError:
|
||||
print("OpenAI SDK: Not installed")
|
||||
|
||||
if not check_updates:
|
||||
return
|
||||
|
||||
# Show update status (synchronous — acceptable since user asked for version info)
|
||||
try:
|
||||
from hermes_cli.banner import check_for_updates
|
||||
@@ -6276,11 +6010,6 @@ def _print_version_info(*, check_updates: bool = True) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def cmd_version(args):
|
||||
"""Show version."""
|
||||
_print_version_info(check_updates=True)
|
||||
|
||||
|
||||
def cmd_uninstall(args):
|
||||
"""Uninstall Hermes Agent."""
|
||||
_require_tty("uninstall")
|
||||
@@ -6357,36 +6086,24 @@ def _validate_critical_files_syntax(root) -> tuple[bool, str | None, str | None]
|
||||
them after a successful ``git pull`` so we can auto-roll-back instead of
|
||||
leaving the user with a bricked install.
|
||||
|
||||
The compiled ``.pyc`` is written to a temp directory rather than the
|
||||
source tree's ``__pycache__/`` so we don't race with concurrent test
|
||||
workers that walk the same dir, and so we don't leave a stale pyc
|
||||
behind in production if the next interpreter run picks a different
|
||||
Python version. The pyc is discarded on function return either way —
|
||||
we only care about the compile-or-not signal.
|
||||
|
||||
Returns ``(ok, failing_path, error_message)``. ``ok=True`` means every
|
||||
file parsed cleanly.
|
||||
"""
|
||||
import py_compile
|
||||
import tempfile
|
||||
|
||||
root = Path(root)
|
||||
with tempfile.TemporaryDirectory(prefix="hermes-syntax-check-") as tmpdir:
|
||||
for relpath in _UPDATE_CRITICAL_FILES:
|
||||
path = root / relpath
|
||||
if not path.exists():
|
||||
# Missing file is suspicious but not necessarily fatal — a future
|
||||
# refactor may legitimately remove one of these. Skip and move on.
|
||||
continue
|
||||
# Mirror the relative path under the tmpdir so two different
|
||||
# files with the same basename don't collide on the cfile name.
|
||||
cfile = Path(tmpdir) / (relpath.replace("/", "__") + "c")
|
||||
try:
|
||||
py_compile.compile(str(path), cfile=str(cfile), doraise=True)
|
||||
except py_compile.PyCompileError as exc:
|
||||
return False, str(path), str(exc)
|
||||
except OSError as exc:
|
||||
return False, str(path), f"could not read: {exc}"
|
||||
for relpath in _UPDATE_CRITICAL_FILES:
|
||||
path = root / relpath
|
||||
if not path.exists():
|
||||
# Missing file is suspicious but not necessarily fatal — a future
|
||||
# refactor may legitimately remove one of these. Skip and move on.
|
||||
continue
|
||||
try:
|
||||
py_compile.compile(str(path), doraise=True)
|
||||
except py_compile.PyCompileError as exc:
|
||||
return False, str(path), str(exc)
|
||||
except OSError as exc:
|
||||
return False, str(path), f"could not read: {exc}"
|
||||
return True, None, None
|
||||
|
||||
|
||||
@@ -6971,8 +6688,8 @@ def _update_via_zip(args):
|
||||
)
|
||||
|
||||
print("→ Downloading latest version...")
|
||||
tmp_dir = tempfile.mkdtemp(prefix="hermes-update-")
|
||||
try:
|
||||
tmp_dir = tempfile.mkdtemp(prefix="hermes-update-")
|
||||
zip_path = os.path.join(tmp_dir, f"hermes-agent-{branch}.zip")
|
||||
urlretrieve(zip_url, zip_path)
|
||||
|
||||
@@ -7019,11 +6736,12 @@ def _update_via_zip(args):
|
||||
|
||||
print(f"✓ Updated {update_count} items from ZIP")
|
||||
|
||||
# Cleanup
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ ZIP update failed: {e}")
|
||||
sys.exit(1)
|
||||
finally:
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
|
||||
# Clear stale bytecode after ZIP extraction
|
||||
removed = _clear_bytecode_cache(PROJECT_ROOT)
|
||||
@@ -9855,7 +9573,6 @@ def _coalesce_session_name_args(argv: list) -> list:
|
||||
"honcho",
|
||||
"claw",
|
||||
"plugins",
|
||||
"security",
|
||||
"acp",
|
||||
"webhook",
|
||||
"memory",
|
||||
@@ -10693,10 +10410,10 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
||||
"config", "cron", "curator", "dashboard", "debug", "doctor",
|
||||
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
|
||||
"model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
|
||||
"model", "pairing", "plugins", "postinstall", "profile", "proxy",
|
||||
"send", "sessions", "setup",
|
||||
"skills", "slack", "status", "tools", "uninstall", "update",
|
||||
"version", "webhook", "whatsapp", "chat", "secrets", "security",
|
||||
"version", "webhook", "whatsapp", "chat",
|
||||
# Help-ish invocations — plugin commands not being listed in
|
||||
# top-level --help is an acceptable trade-off for skipping an
|
||||
# expensive eager import of every bundled plugin module.
|
||||
@@ -10786,143 +10503,6 @@ def _plugin_cli_discovery_needed() -> bool:
|
||||
return True
|
||||
|
||||
|
||||
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
|
||||
_AGENT_SUBCOMMANDS = {
|
||||
"cron": ("cron_command", {"run", "tick"}),
|
||||
"gateway": ("gateway_command", {"run"}),
|
||||
"mcp": ("mcp_action", {"serve"}),
|
||||
}
|
||||
|
||||
|
||||
def _prepare_agent_startup(args) -> None:
|
||||
"""Discover plugins/MCP/hooks for commands that can run an agent turn."""
|
||||
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
|
||||
if not (
|
||||
args.command in _AGENT_COMMANDS
|
||||
or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
|
||||
):
|
||||
return
|
||||
|
||||
_accept_hooks = bool(getattr(args, "accept_hooks", False))
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"plugin discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
register_from_config(load_config(), accept_hooks=_accept_hooks)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"shell-hook registration failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
def _set_chat_arg_defaults(args) -> None:
|
||||
for attr, default in [
|
||||
("query", None),
|
||||
("model", None),
|
||||
("provider", None),
|
||||
("toolsets", None),
|
||||
("verbose", False),
|
||||
("resume", None),
|
||||
("continue_last", None),
|
||||
("worktree", False),
|
||||
]:
|
||||
if not hasattr(args, attr):
|
||||
setattr(args, attr, default)
|
||||
|
||||
|
||||
def _try_termux_fast_cli_launch() -> bool:
|
||||
"""Run obvious Termux non-TUI chat/oneshot/version paths on a light parser."""
|
||||
if not _is_termux_startup_environment():
|
||||
return False
|
||||
if os.environ.get("HERMES_TERMUX_DISABLE_FAST_CLI") == "1":
|
||||
return False
|
||||
|
||||
argv = sys.argv[1:]
|
||||
if "-h" in argv or "--help" in argv:
|
||||
return False
|
||||
if os.environ.get("HERMES_TUI") == "1" or "--tui" in argv:
|
||||
return False
|
||||
|
||||
if _is_termux_fast_version_argv(argv):
|
||||
_print_version_info(check_updates=False)
|
||||
return True
|
||||
|
||||
first = _first_positional_argv()
|
||||
has_oneshot = any(
|
||||
arg == "-z" or arg == "--oneshot" or arg.startswith("--oneshot=")
|
||||
for arg in argv
|
||||
)
|
||||
if not has_oneshot and first not in {None, "chat"}:
|
||||
return False
|
||||
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
parser, _subparsers, chat_parser = build_top_level_parser()
|
||||
chat_parser.set_defaults(func=cmd_chat)
|
||||
args = parser.parse_args(_coalesce_session_name_args(argv))
|
||||
|
||||
if getattr(args, "version", False):
|
||||
_print_version_info(check_updates=False)
|
||||
return True
|
||||
|
||||
if getattr(args, "oneshot", None):
|
||||
_prepare_agent_startup(args)
|
||||
from hermes_cli.oneshot import run_oneshot
|
||||
|
||||
sys.exit(
|
||||
run_oneshot(
|
||||
args.oneshot,
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
toolsets=getattr(args, "toolsets", None),
|
||||
)
|
||||
)
|
||||
|
||||
if (args.resume or args.continue_last) and args.command is None:
|
||||
args.command = "chat"
|
||||
|
||||
if args.command in {None, "chat"}:
|
||||
_set_chat_arg_defaults(args)
|
||||
interactive_prompt = not getattr(args, "query", None) and not getattr(args, "image", None)
|
||||
if interactive_prompt:
|
||||
# Bare Termux CLI should reach the prompt first and do agent-only
|
||||
# discovery on the first submitted turn instead of before input.
|
||||
setattr(args, "compact", True)
|
||||
os.environ["HERMES_DEFER_AGENT_STARTUP"] = "1"
|
||||
os.environ["HERMES_FAST_STARTUP_BANNER"] = "1"
|
||||
if getattr(args, "accept_hooks", False):
|
||||
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
else:
|
||||
_prepare_agent_startup(args)
|
||||
cmd_chat(args)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _try_termux_fast_tui_launch() -> bool:
|
||||
"""Launch obvious Termux TUI invocations before building every subparser.
|
||||
|
||||
@@ -10983,8 +10563,6 @@ def main():
|
||||
|
||||
if _try_termux_fast_tui_launch():
|
||||
return
|
||||
if _try_termux_fast_cli_launch():
|
||||
return
|
||||
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
@@ -11082,42 +10660,6 @@ def main():
|
||||
)
|
||||
fallback_parser.set_defaults(func=cmd_fallback)
|
||||
|
||||
# =========================================================================
|
||||
# secrets command — external secret managers (currently: Bitwarden)
|
||||
# =========================================================================
|
||||
secrets_parser = subparsers.add_parser(
|
||||
"secrets",
|
||||
help="Manage external secret sources (Bitwarden Secrets Manager)",
|
||||
description=(
|
||||
"Pull API keys from an external secret manager at process startup "
|
||||
"instead of storing them in ~/.hermes/.env. Currently supports "
|
||||
"Bitwarden Secrets Manager. See: "
|
||||
"https://hermes-agent.nousresearch.com/docs/user-guide/secrets/bitwarden"
|
||||
),
|
||||
)
|
||||
secrets_subparsers = secrets_parser.add_subparsers(dest="secrets_command")
|
||||
|
||||
secrets_bw = secrets_subparsers.add_parser(
|
||||
"bitwarden",
|
||||
aliases=["bw"],
|
||||
help="Bitwarden Secrets Manager integration",
|
||||
)
|
||||
|
||||
# Lazy import — only pays for itself when this subcommand is actually used.
|
||||
from hermes_cli import secrets_cli as _secrets_cli
|
||||
|
||||
_secrets_cli.register_cli(secrets_bw)
|
||||
|
||||
def _dispatch_secrets(args): # noqa: ANN001
|
||||
sub = getattr(args, "secrets_command", None)
|
||||
bw_sub = getattr(args, "secrets_bw_command", None)
|
||||
if sub in ("bitwarden", "bw") and bw_sub is not None:
|
||||
return args.func(args)
|
||||
secrets_parser.print_help()
|
||||
return 0
|
||||
|
||||
secrets_parser.set_defaults(func=_dispatch_secrets)
|
||||
|
||||
# =========================================================================
|
||||
# migrate command
|
||||
# =========================================================================
|
||||
@@ -11430,13 +10972,6 @@ def main():
|
||||
help="On existing installs: only prompt for items that are missing "
|
||||
"or unset, instead of running the full reconfigure wizard.",
|
||||
)
|
||||
setup_parser.add_argument(
|
||||
"--portal",
|
||||
action="store_true",
|
||||
help="One-shot Nous Portal setup: log in via OAuth, set Nous as the "
|
||||
"inference provider, and opt into the Tool Gateway. Skips the "
|
||||
"rest of the wizard.",
|
||||
)
|
||||
setup_parser.set_defaults(func=cmd_setup)
|
||||
|
||||
# =========================================================================
|
||||
@@ -11912,12 +11447,6 @@ def main():
|
||||
|
||||
webhook_parser.set_defaults(func=cmd_webhook)
|
||||
|
||||
# =========================================================================
|
||||
# portal command — Nous Portal status + Tool Gateway routing
|
||||
# =========================================================================
|
||||
from hermes_cli.portal_cli import add_parser as _add_portal_parser
|
||||
_add_portal_parser(subparsers)
|
||||
|
||||
# =========================================================================
|
||||
# kanban command — multi-profile collaboration board
|
||||
# =========================================================================
|
||||
@@ -12016,58 +11545,6 @@ def main():
|
||||
)
|
||||
doctor_parser.set_defaults(func=cmd_doctor)
|
||||
|
||||
# =========================================================================
|
||||
# security command — on-demand supply-chain audit
|
||||
# =========================================================================
|
||||
security_parser = subparsers.add_parser(
|
||||
"security",
|
||||
help="Supply-chain audit (OSV.dev) for venv, plugins, and MCP servers",
|
||||
description=(
|
||||
"On-demand vulnerability scan against OSV.dev. Covers the Hermes "
|
||||
"venv (installed PyPI dists), Python deps declared by plugins under "
|
||||
"~/.hermes/plugins/, and pinned npx/uvx MCP servers in config.yaml. "
|
||||
"Does NOT scan globally-installed packages or editor/browser extensions."
|
||||
),
|
||||
)
|
||||
security_subparsers = security_parser.add_subparsers(
|
||||
dest="security_command",
|
||||
metavar="<subcommand>",
|
||||
)
|
||||
|
||||
audit_parser = security_subparsers.add_parser(
|
||||
"audit",
|
||||
help="Run a one-shot supply-chain audit",
|
||||
description="Query OSV.dev for known vulnerabilities in installed components.",
|
||||
)
|
||||
audit_parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Emit machine-readable JSON instead of human-readable text",
|
||||
)
|
||||
audit_parser.add_argument(
|
||||
"--fail-on",
|
||||
default="critical",
|
||||
choices=["low", "moderate", "high", "critical"],
|
||||
help="Exit non-zero when any finding meets this severity (default: critical)",
|
||||
)
|
||||
audit_parser.add_argument(
|
||||
"--skip-venv",
|
||||
action="store_true",
|
||||
help="Skip scanning the Hermes Python venv",
|
||||
)
|
||||
audit_parser.add_argument(
|
||||
"--skip-plugins",
|
||||
action="store_true",
|
||||
help="Skip scanning plugin requirements files",
|
||||
)
|
||||
audit_parser.add_argument(
|
||||
"--skip-mcp",
|
||||
action="store_true",
|
||||
help="Skip scanning pinned MCP servers in config.yaml",
|
||||
)
|
||||
audit_parser.set_defaults(func=cmd_security)
|
||||
security_parser.set_defaults(func=cmd_security)
|
||||
|
||||
# =========================================================================
|
||||
# dump command
|
||||
# =========================================================================
|
||||
@@ -12393,11 +11870,6 @@ Examples:
|
||||
skills_audit.add_argument(
|
||||
"name", nargs="?", help="Specific skill to audit (default: all)"
|
||||
)
|
||||
skills_audit.add_argument(
|
||||
"--deep",
|
||||
action="store_true",
|
||||
help="Run AST-level analysis on Python files (opt-in diagnostic)",
|
||||
)
|
||||
|
||||
skills_uninstall = skills_subparsers.add_parser(
|
||||
"uninstall", help="Remove a hub-installed skill"
|
||||
@@ -13853,7 +13325,51 @@ Examples:
|
||||
# so introspection/management commands (hermes hooks list, cron
|
||||
# list, gateway status, mcp add, ...) don't pay discovery cost or
|
||||
# trigger consent prompts for hooks the user is still inspecting.
|
||||
_prepare_agent_startup(args)
|
||||
# Groups with mixed admin/CRUD vs. agent-running entries narrow via
|
||||
# the nested subcommand (dest varies by parser).
|
||||
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
|
||||
_AGENT_SUBCOMMANDS = {
|
||||
"cron": ("cron_command", {"run", "tick"}),
|
||||
"gateway": ("gateway_command", {"run"}),
|
||||
"mcp": ("mcp_action", {"serve"}),
|
||||
}
|
||||
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
|
||||
if args.command in _AGENT_COMMANDS or (
|
||||
_sub_attr and getattr(args, _sub_attr, None) in _sub_set
|
||||
):
|
||||
_accept_hooks = bool(getattr(args, "accept_hooks", False))
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"plugin discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
register_from_config(load_config(), accept_hooks=_accept_hooks)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"shell-hook registration failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Handle top-level --oneshot / -z: single-shot mode, stdout = final
|
||||
# response only, nothing else. Bypasses cli.py entirely.
|
||||
@@ -13877,7 +13393,7 @@ Examples:
|
||||
("model", None),
|
||||
("provider", None),
|
||||
("toolsets", None),
|
||||
("verbose", None),
|
||||
("verbose", False),
|
||||
("worktree", False),
|
||||
]:
|
||||
if not hasattr(args, attr):
|
||||
@@ -13892,7 +13408,7 @@ Examples:
|
||||
("model", None),
|
||||
("provider", None),
|
||||
("toolsets", None),
|
||||
("verbose", None),
|
||||
("verbose", False),
|
||||
("resume", None),
|
||||
("continue_last", None),
|
||||
("worktree", False),
|
||||
|
||||
+12
-7
@@ -17,6 +17,7 @@ Model / provider selection mirrors `hermes chat`:
|
||||
|
||||
Env var fallbacks (used when the corresponding arg is not passed):
|
||||
- HERMES_INFERENCE_MODEL
|
||||
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -27,8 +28,6 @@ import sys
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli.fallback_config import get_fallback_chain
|
||||
|
||||
|
||||
def _normalize_toolsets(toolsets: object = None) -> list[str] | None:
|
||||
if not toolsets:
|
||||
@@ -134,8 +133,9 @@ def run_oneshot(
|
||||
prompt: The user message to send.
|
||||
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
|
||||
env var, then config.yaml's model.default / model.model.
|
||||
provider: Optional provider override. Falls back to config.yaml's
|
||||
model.provider, then "auto".
|
||||
provider: Optional provider override. Falls back to
|
||||
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
||||
then "auto".
|
||||
toolsets: Optional comma-separated string or iterable of toolsets.
|
||||
|
||||
Returns the exit code. Caller should sys.exit() with the return.
|
||||
@@ -301,9 +301,14 @@ def _run_agent(
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
|
||||
session_db = _create_session_db_for_oneshot()
|
||||
# Read the effective fallback chain from profile config so oneshot workers
|
||||
# honour the same merge semantics as interactive CLI and gateway sessions.
|
||||
_fb = get_fallback_chain(cfg)
|
||||
# Read fallback chain from profile config — supports both the new list
|
||||
# format (fallback_providers) and the legacy single-dict (fallback_model).
|
||||
# Mirrors the same normalization in cli.py so oneshot workers (e.g. kanban
|
||||
# workers spawned via `hermes -p <profile> chat -q ...`) honour the
|
||||
# profile's fallback chain just like interactive sessions do.
|
||||
_fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or []
|
||||
if isinstance(_fb, dict):
|
||||
_fb = [_fb] if _fb.get("provider") and _fb.get("model") else []
|
||||
|
||||
agent = AIAgent(
|
||||
api_key=runtime.get("api_key"),
|
||||
|
||||
@@ -698,119 +698,6 @@ class PluginContext:
|
||||
|
||||
# -- hook registration --------------------------------------------------
|
||||
|
||||
# -- auxiliary task registration ---------------------------------------
|
||||
|
||||
def register_auxiliary_task(
|
||||
self,
|
||||
key: str,
|
||||
*,
|
||||
display_name: str,
|
||||
description: str,
|
||||
defaults: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Register a plugin-defined auxiliary LLM task.
|
||||
|
||||
Auxiliary tasks are LLM-backed side jobs (vision analysis, web extraction,
|
||||
compression, smart-approval, etc.) that route through ``auxiliary_client.py``.
|
||||
Each task has its own ``auxiliary.<key>`` config block where users can
|
||||
pin a provider/model independent of the main chat model.
|
||||
|
||||
Plugins use this to declare their own auxiliary tasks without touching
|
||||
core files. After registration, the task:
|
||||
|
||||
- Appears in the ``hermes model → Configure auxiliary models`` picker
|
||||
- Has its provider/model/base_url/api_key bridged from config.yaml to
|
||||
``AUXILIARY_<KEY_UPPER>_*`` env vars at gateway startup
|
||||
- Gets default routing fields (provider="auto", model="", etc.) merged
|
||||
into loaded configs so ``cfg.get("auxiliary", {}).get(key)`` works
|
||||
|
||||
Args:
|
||||
key: stable task key (snake_case). Used in config ``auxiliary.<key>``
|
||||
and env vars ``AUXILIARY_<KEY_UPPER>_*``. Must not shadow a
|
||||
built-in task key (vision, compression, web_extract, approval,
|
||||
mcp, title_generation, skills_hub, curator).
|
||||
display_name: human-readable name shown in the picker.
|
||||
description: short one-line description shown next to the name.
|
||||
defaults: optional dict of default routing fields. Recognized keys:
|
||||
``provider`` (default "auto"), ``model`` (default ""),
|
||||
``base_url`` (default ""), ``api_key`` (default ""),
|
||||
``timeout`` (default 60), ``extra_body`` (default {}),
|
||||
plus any task-specific extras (e.g. ``download_timeout``).
|
||||
Unknown keys are preserved verbatim — the plugin owns the
|
||||
schema for its own task.
|
||||
|
||||
Raises:
|
||||
ValueError: if *key* is empty, contains invalid characters, or
|
||||
shadows a built-in auxiliary task key.
|
||||
|
||||
Example:
|
||||
ctx.register_auxiliary_task(
|
||||
key="memory_retain_filter",
|
||||
display_name="Memory retain filter",
|
||||
description="hindsight pre-retain dedup/extract",
|
||||
defaults={"provider": "auto", "timeout": 30},
|
||||
)
|
||||
"""
|
||||
# Validate key shape
|
||||
if not key or not isinstance(key, str):
|
||||
raise ValueError(
|
||||
f"Plugin '{self.manifest.name}' tried to register auxiliary task "
|
||||
f"with invalid key {key!r}"
|
||||
)
|
||||
if not all(c.isalnum() or c == "_" for c in key):
|
||||
raise ValueError(
|
||||
f"Plugin '{self.manifest.name}' auxiliary task key {key!r} "
|
||||
f"must contain only alphanumeric characters and underscores"
|
||||
)
|
||||
|
||||
# Lazy import to avoid circular: hermes_cli.main imports plugins indirectly
|
||||
from hermes_cli.main import _AUX_TASKS as _BUILTIN_AUX_TASKS
|
||||
|
||||
builtin_keys = {k for k, _name, _desc in _BUILTIN_AUX_TASKS}
|
||||
if key in builtin_keys:
|
||||
raise ValueError(
|
||||
f"Plugin '{self.manifest.name}' cannot register auxiliary task "
|
||||
f"{key!r} — that key is reserved for a built-in task. "
|
||||
f"Pick a plugin-namespaced key (e.g. '{self.manifest.name}_{key}')."
|
||||
)
|
||||
|
||||
# Reject duplicate registrations across plugins
|
||||
existing = self._manager._aux_tasks.get(key)
|
||||
if existing is not None and existing.get("plugin") != self.manifest.name:
|
||||
raise ValueError(
|
||||
f"Plugin '{self.manifest.name}' cannot register auxiliary task "
|
||||
f"{key!r} — already registered by plugin "
|
||||
f"'{existing.get('plugin')}'"
|
||||
)
|
||||
|
||||
# Normalize defaults — plugin owns the schema, but we ensure routing
|
||||
# fields exist with sensible types so consumers don't crash.
|
||||
merged_defaults: Dict[str, Any] = {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 60,
|
||||
"extra_body": {},
|
||||
}
|
||||
if defaults:
|
||||
for k, v in defaults.items():
|
||||
merged_defaults[k] = v
|
||||
|
||||
self._manager._aux_tasks[key] = {
|
||||
"key": key,
|
||||
"display_name": display_name,
|
||||
"description": description,
|
||||
"defaults": merged_defaults,
|
||||
"plugin": self.manifest.name,
|
||||
}
|
||||
logger.debug(
|
||||
"Plugin %s registered auxiliary task: %s (%s)",
|
||||
self.manifest.name,
|
||||
key,
|
||||
display_name,
|
||||
)
|
||||
|
||||
def register_hook(self, hook_name: str, callback: Callable) -> None:
|
||||
"""Register a lifecycle hook callback.
|
||||
|
||||
@@ -895,9 +782,6 @@ class PluginManager:
|
||||
self._cli_ref = None # Set by CLI after plugin discovery
|
||||
# Plugin skill registry: qualified name → metadata dict.
|
||||
self._plugin_skills: Dict[str, Dict[str, Any]] = {}
|
||||
# Plugin-registered auxiliary tasks: key → {key, display_name,
|
||||
# description, defaults, plugin}. See PluginContext.register_auxiliary_task.
|
||||
self._aux_tasks: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Public
|
||||
@@ -919,7 +803,6 @@ class PluginManager:
|
||||
self._cli_commands.clear()
|
||||
self._plugin_commands.clear()
|
||||
self._plugin_skills.clear()
|
||||
self._aux_tasks.clear()
|
||||
self._context_engine = None
|
||||
self._discovered = True
|
||||
|
||||
@@ -1665,21 +1548,6 @@ def get_plugin_commands() -> Dict[str, dict]:
|
||||
return _ensure_plugins_discovered()._plugin_commands
|
||||
|
||||
|
||||
def get_plugin_auxiliary_tasks() -> List[Dict[str, Any]]:
|
||||
"""Return all plugin-registered auxiliary tasks as a stable-ordered list.
|
||||
|
||||
Each entry is the registration dict from
|
||||
:meth:`PluginContext.register_auxiliary_task`:
|
||||
``{key, display_name, description, defaults, plugin}``.
|
||||
|
||||
Triggers idempotent plugin discovery so callers can read the registry
|
||||
before any explicit ``discover_plugins()`` call. Sorted by ``key`` for
|
||||
deterministic ordering in pickers and tests.
|
||||
"""
|
||||
manager = _ensure_plugins_discovered()
|
||||
return [manager._aux_tasks[k] for k in sorted(manager._aux_tasks)]
|
||||
|
||||
|
||||
def get_plugin_toolsets() -> List[tuple]:
|
||||
"""Return plugin toolsets as ``(key, label, description)`` tuples.
|
||||
|
||||
|
||||
@@ -76,42 +76,22 @@ def _plugins_dir() -> Path:
|
||||
return plugins
|
||||
|
||||
|
||||
def _sanitize_plugin_name(
|
||||
name: str,
|
||||
plugins_dir: Path,
|
||||
*,
|
||||
allow_subdir: bool = False,
|
||||
) -> Path:
|
||||
def _sanitize_plugin_name(name: str, plugins_dir: Path) -> Path:
|
||||
"""Validate a plugin name and return the safe target path inside *plugins_dir*.
|
||||
|
||||
Raises ``ValueError`` if the name contains path-traversal sequences or would
|
||||
resolve outside the plugins directory.
|
||||
|
||||
``allow_subdir=True`` permits a single forward slash inside *name* so
|
||||
category-namespaced plugin keys like ``observability/langfuse`` or
|
||||
``image_gen/openai`` (the registry keys emitted by ``_discover_all_plugins``)
|
||||
can be looked up. ``..`` and backslash are still rejected, leading and
|
||||
trailing slashes are stripped, and the resolved target must still live
|
||||
inside *plugins_dir*. Install paths leave this at the default ``False``
|
||||
because a freshly-cloned plugin always lands top-level under
|
||||
``~/.hermes/plugins/<name>/``.
|
||||
"""
|
||||
if not name:
|
||||
raise ValueError("Plugin name must not be empty.")
|
||||
|
||||
if allow_subdir:
|
||||
name = name.strip("/")
|
||||
if not name:
|
||||
raise ValueError("Plugin name must not be empty.")
|
||||
|
||||
if name in {".", ".."}:
|
||||
raise ValueError(
|
||||
f"Invalid plugin name '{name}': must not reference the plugins directory itself."
|
||||
)
|
||||
|
||||
# Reject obvious traversal characters
|
||||
bad_chars = ("\\", "..") if allow_subdir else ("/", "\\", "..")
|
||||
for bad in bad_chars:
|
||||
for bad in ("/", "\\", ".."):
|
||||
if bad in name:
|
||||
raise ValueError(f"Invalid plugin name '{name}': must not contain '{bad}'.")
|
||||
|
||||
@@ -346,7 +326,7 @@ def _display_removed(name: str, plugins_dir: Path) -> None:
|
||||
|
||||
def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
|
||||
"""Return the plugin path if it exists, or exit with an error listing installed plugins."""
|
||||
target = _sanitize_plugin_name(name, plugins_dir, allow_subdir=True)
|
||||
target = _sanitize_plugin_name(name, plugins_dir)
|
||||
if not target.exists():
|
||||
installed = ", ".join(d.name for d in plugins_dir.iterdir() if d.is_dir()) or "(none)"
|
||||
console.print(
|
||||
@@ -1071,7 +1051,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
|
||||
curses.init_pair(4, 8, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
@@ -1216,7 +1196,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in {curses.KEY_ENTER, 10, 13}:
|
||||
if cursor < n_plugins:
|
||||
@@ -1248,7 +1228,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in {27, ord("q")}:
|
||||
# Save plugin changes on exit
|
||||
@@ -1528,7 +1508,7 @@ def _user_installed_plugin_dir(name: str) -> Optional[Path]:
|
||||
"""Resolved path under ``~/.hermes/plugins/<name>`` if it exists."""
|
||||
plugins_dir = _plugins_dir()
|
||||
try:
|
||||
target = _sanitize_plugin_name(name, plugins_dir, allow_subdir=True)
|
||||
target = _sanitize_plugin_name(name, plugins_dir)
|
||||
except ValueError:
|
||||
return None
|
||||
return target if target.is_dir() else None
|
||||
|
||||
@@ -1,219 +0,0 @@
|
||||
"""``hermes portal`` — small CLI surface for Nous Portal users.
|
||||
|
||||
Subcommands:
|
||||
status Show Portal auth state + which Tool Gateway tools are routed.
|
||||
open Open the Portal subscription page in the user's default browser.
|
||||
tools List Tool Gateway tools and which are active in the current config.
|
||||
|
||||
This command is intentionally minimal — it does not duplicate functionality
|
||||
already in ``hermes auth`` or ``hermes tools``. It's a discovery + status
|
||||
surface for the Portal subscription itself.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import webbrowser
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
DEFAULT_PORTAL_URL = "https://portal.nousresearch.com"
|
||||
SUBSCRIPTION_URL = "https://portal.nousresearch.com/manage-subscription"
|
||||
DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway"
|
||||
|
||||
|
||||
def _nous_portal_base_url() -> str:
|
||||
"""Resolve the Portal base URL from auth state or default."""
|
||||
try:
|
||||
from hermes_cli.auth import get_nous_auth_status
|
||||
status = get_nous_auth_status() or {}
|
||||
url = status.get("portal_base_url")
|
||||
if isinstance(url, str) and url.strip():
|
||||
return url.rstrip("/")
|
||||
except Exception:
|
||||
pass
|
||||
return DEFAULT_PORTAL_URL
|
||||
|
||||
|
||||
def _cmd_status(args) -> int:
|
||||
"""Show Portal auth + Tool Gateway routing summary."""
|
||||
from hermes_cli.auth import get_nous_auth_status
|
||||
from hermes_cli.nous_subscription import get_nous_subscription_features
|
||||
|
||||
config = load_config() or {}
|
||||
|
||||
try:
|
||||
auth = get_nous_auth_status() or {}
|
||||
except Exception:
|
||||
auth = {}
|
||||
|
||||
logged_in = bool(auth.get("logged_in"))
|
||||
|
||||
print()
|
||||
print(color(" Nous Portal", Colors.MAGENTA))
|
||||
print(color(" ───────────", Colors.MAGENTA))
|
||||
if logged_in:
|
||||
portal = auth.get("portal_base_url") or DEFAULT_PORTAL_URL
|
||||
print(f" Auth: {color('✓ logged in', Colors.GREEN)}")
|
||||
print(f" Portal: {portal}")
|
||||
inference = auth.get("inference_base_url")
|
||||
if inference:
|
||||
print(f" API: {inference}")
|
||||
else:
|
||||
print(f" Auth: {color('not logged in', Colors.YELLOW)}")
|
||||
print(f" Sign up: {SUBSCRIPTION_URL}")
|
||||
print(f" Login: hermes auth add nous --type oauth")
|
||||
|
||||
# Provider selection (independent of auth)
|
||||
model_cfg = config.get("model") if isinstance(config.get("model"), dict) else {}
|
||||
provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
if provider == "nous":
|
||||
print(f" Model: {color('✓ using Nous as inference provider', Colors.GREEN)}")
|
||||
elif provider:
|
||||
print(f" Model: currently {provider} (switch with `hermes model`)")
|
||||
|
||||
# Tool Gateway routing
|
||||
print()
|
||||
print(color(" Tool Gateway", Colors.MAGENTA))
|
||||
print(color(" ────────────", Colors.MAGENTA))
|
||||
try:
|
||||
features = get_nous_subscription_features(config)
|
||||
except Exception:
|
||||
features = None
|
||||
|
||||
if features is None:
|
||||
print(" (could not resolve subscription state)")
|
||||
return 0
|
||||
|
||||
rows = []
|
||||
for feat in features.items():
|
||||
if feat.managed_by_nous:
|
||||
state = color("via Nous Portal", Colors.GREEN)
|
||||
elif feat.active and feat.current_provider:
|
||||
state = feat.current_provider
|
||||
elif feat.active:
|
||||
state = "active"
|
||||
else:
|
||||
state = color("not configured", Colors.DIM)
|
||||
rows.append((feat.label, state))
|
||||
|
||||
width = max((len(r[0]) for r in rows), default=0)
|
||||
for label, state in rows:
|
||||
print(f" {label:<{width}} {state}")
|
||||
|
||||
if not logged_in:
|
||||
print()
|
||||
print(color(f" Docs: {DOCS_URL}", Colors.DIM))
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_open(args) -> int:
|
||||
"""Open the Portal subscription page in the default browser."""
|
||||
target = SUBSCRIPTION_URL
|
||||
print(f"Opening {target}")
|
||||
try:
|
||||
opened = webbrowser.open(target)
|
||||
except Exception:
|
||||
opened = False
|
||||
if not opened:
|
||||
print()
|
||||
print("Could not launch a browser. Visit the URL above manually.")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_tools(args) -> int:
|
||||
"""List the Tool Gateway catalog + current routing."""
|
||||
from hermes_cli.nous_subscription import get_nous_subscription_features
|
||||
|
||||
config = load_config() or {}
|
||||
try:
|
||||
features = get_nous_subscription_features(config)
|
||||
except Exception:
|
||||
print("Could not resolve Tool Gateway state.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Static catalog — the partners Tool Gateway routes to today.
|
||||
catalog = [
|
||||
("web", "Web search & extract", "Firecrawl"),
|
||||
("image_gen", "Image generation", "FAL"),
|
||||
("tts", "Text-to-speech", "OpenAI TTS"),
|
||||
("browser", "Browser automation", "Browser Use"),
|
||||
("modal", "Cloud terminal", "Modal"),
|
||||
]
|
||||
|
||||
print()
|
||||
print(color(" Tool Gateway catalog", Colors.MAGENTA))
|
||||
print(color(" ────────────────────", Colors.MAGENTA))
|
||||
|
||||
if not features.nous_auth_present:
|
||||
print(color(" Not logged into Nous Portal — sign in with `hermes auth add nous --type oauth`.", Colors.YELLOW))
|
||||
print()
|
||||
|
||||
label_width = max(len(label) for _, label, _ in catalog)
|
||||
for key, label, partner in catalog:
|
||||
feat = features.features.get(key)
|
||||
if feat is None:
|
||||
state = color("unknown", Colors.DIM)
|
||||
elif feat.managed_by_nous:
|
||||
state = color("✓ via Nous Portal", Colors.GREEN)
|
||||
elif feat.active and feat.current_provider:
|
||||
state = feat.current_provider
|
||||
elif feat.active:
|
||||
state = "active"
|
||||
else:
|
||||
state = color("not configured", Colors.DIM)
|
||||
print(f" {label:<{label_width}} partner: {partner:<14} {state}")
|
||||
|
||||
print()
|
||||
print(color(f" Manage your subscription: {SUBSCRIPTION_URL}", Colors.DIM))
|
||||
print(color(f" Docs: {DOCS_URL}", Colors.DIM))
|
||||
return 0
|
||||
|
||||
|
||||
def portal_command(args) -> int:
|
||||
"""Top-level dispatch for `hermes portal <subcommand>`."""
|
||||
sub = getattr(args, "portal_command", None)
|
||||
if sub in {None, ""}:
|
||||
# Default to status — matches gh / kubectl conventions where the
|
||||
# subcommand-less form gives a useful overview.
|
||||
return _cmd_status(args)
|
||||
if sub == "status":
|
||||
return _cmd_status(args)
|
||||
if sub == "open":
|
||||
return _cmd_open(args)
|
||||
if sub == "tools":
|
||||
return _cmd_tools(args)
|
||||
print(f"Unknown portal subcommand: {sub}", file=sys.stderr)
|
||||
print("Run `hermes portal -h` for usage.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
def add_parser(subparsers) -> None:
|
||||
"""Register `hermes portal` on the given argparse subparsers object."""
|
||||
portal_parser = subparsers.add_parser(
|
||||
"portal",
|
||||
help="Nous Portal status, subscription, and Tool Gateway routing",
|
||||
description=(
|
||||
"Inspect Nous Portal auth, Tool Gateway routing, and open the "
|
||||
"Portal subscription page. Subcommands: status (default), "
|
||||
"open, tools."
|
||||
),
|
||||
)
|
||||
portal_sub = portal_parser.add_subparsers(dest="portal_command")
|
||||
|
||||
portal_sub.add_parser(
|
||||
"status",
|
||||
help="Show Portal auth + Tool Gateway routing summary (default)",
|
||||
)
|
||||
portal_sub.add_parser(
|
||||
"open",
|
||||
help="Open the Portal subscription page in your default browser",
|
||||
)
|
||||
portal_sub.add_parser(
|
||||
"tools",
|
||||
help="List Tool Gateway tools and which are routed via Nous",
|
||||
)
|
||||
|
||||
portal_parser.set_defaults(func=portal_command)
|
||||
@@ -35,7 +35,6 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -110,7 +109,8 @@ def _collect_skills(profile_dir: Path) -> list[str]:
|
||||
return []
|
||||
names: list[str] = []
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(md):
|
||||
path_str = str(md)
|
||||
if "/.hub/" in path_str or "/.git/" in path_str:
|
||||
continue
|
||||
try:
|
||||
rel = md.relative_to(skills_dir)
|
||||
@@ -201,7 +201,7 @@ def describe_profile(
|
||||
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
|
||||
skill_count = sum(
|
||||
1 for _ in (profile_dir / "skills").rglob("SKILL.md")
|
||||
if not is_excluded_skill_path(_)
|
||||
if "/.hub/" not in str(_) and "/.git/" not in str(_)
|
||||
) if (profile_dir / "skills").is_dir() else 0
|
||||
|
||||
# Read model + provider from the profile's config.
|
||||
|
||||
@@ -70,8 +70,6 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
@@ -465,9 +463,7 @@ def _count_skills(staged: Path) -> int:
|
||||
skills_dir = staged / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return 0
|
||||
return sum(
|
||||
1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
return sum(1 for _ in skills_dir.rglob("SKILL.md"))
|
||||
|
||||
|
||||
def plan_install(
|
||||
|
||||
+3
-115
@@ -30,8 +30,6 @@ from dataclasses import dataclass
|
||||
from pathlib import Path, PurePosixPath, PureWindowsPath
|
||||
from typing import List, Optional
|
||||
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
_PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
|
||||
|
||||
# Directories bootstrapped inside every new profile
|
||||
@@ -487,9 +485,8 @@ def _count_skills(profile_dir: Path) -> int:
|
||||
return 0
|
||||
count = 0
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
if is_excluded_skill_path(md):
|
||||
continue
|
||||
count += 1
|
||||
if "/.hub/" not in str(md) and "/.git/" not in str(md):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
@@ -777,14 +774,6 @@ def create_profile(
|
||||
except Exception:
|
||||
pass # non-fatal — user can describe later with `hermes profile describe`
|
||||
|
||||
# Phase 4: when running inside a container under s6, register the
|
||||
# new profile's gateway as a runtime s6 service so
|
||||
# `hermes -p <profile> gateway start` can supervise it via
|
||||
# `s6-svc -u` instead of spawning a bare process. On host (systemd
|
||||
# / launchd / windows) this is a no-op — the existing per-profile
|
||||
# unit-generation paths handle gateway lifecycle.
|
||||
_maybe_register_gateway_service(canon)
|
||||
|
||||
return profile_dir
|
||||
|
||||
|
||||
@@ -901,10 +890,6 @@ def delete_profile(name: str, yes: bool = False) -> Path:
|
||||
|
||||
# 1. Disable service (prevents auto-restart)
|
||||
_cleanup_gateway_service(canon, profile_dir)
|
||||
# 1b. Phase 4: unregister the s6 service slot (container path).
|
||||
# On host this is a no-op; on container it removes
|
||||
# /run/service/gateway-<profile>/ so s6-supervise drops it.
|
||||
_maybe_unregister_gateway_service(canon)
|
||||
|
||||
# 2. Stop running gateway
|
||||
if gw_running:
|
||||
@@ -917,49 +902,7 @@ def delete_profile(name: str, yes: bool = False) -> Path:
|
||||
|
||||
# 4. Remove profile directory
|
||||
try:
|
||||
def _make_writable(func, path, exc):
|
||||
"""onexc/onerror handler: add +w on PermissionError so rmtree can proceed.
|
||||
|
||||
Handles two cases on NixOS (and other systems with read-only
|
||||
copies from immutable stores):
|
||||
1. The path itself isn't writable (e.g. a file with mode 0444)
|
||||
2. The *parent* directory isn't writable (e.g. mode 0555)
|
||||
|
||||
Compatible with both the ``onexc`` API (3.12+, receives an
|
||||
exception instance) and the ``onerror`` API (3.11-, receives
|
||||
``sys.exc_info()`` tuple).
|
||||
"""
|
||||
import stat as _stat
|
||||
import sys as _sys
|
||||
|
||||
# Normalise the two callback signatures:
|
||||
# onexc(func, path, exc_instance) — 3.12+
|
||||
# onerror(func, path, exc_info_tuple) — 3.11
|
||||
if isinstance(exc, tuple):
|
||||
exc = exc[1] # exc_info → actual exception object
|
||||
|
||||
if isinstance(exc, PermissionError):
|
||||
# Make the path writable
|
||||
try:
|
||||
os.chmod(path, os.stat(path).st_mode | _stat.S_IWUSR)
|
||||
except OSError:
|
||||
pass
|
||||
# Also make the parent writable (needed for unlink/rmdir)
|
||||
parent = os.path.dirname(path)
|
||||
if parent:
|
||||
try:
|
||||
os.chmod(parent, os.stat(parent).st_mode | _stat.S_IWUSR)
|
||||
except OSError:
|
||||
pass
|
||||
func(path)
|
||||
else:
|
||||
raise
|
||||
|
||||
# ``onexc`` was added in 3.12; fall back to ``onerror`` on 3.11.
|
||||
try:
|
||||
shutil.rmtree(profile_dir, onexc=_make_writable)
|
||||
except TypeError:
|
||||
shutil.rmtree(profile_dir, onerror=_make_writable)
|
||||
shutil.rmtree(profile_dir)
|
||||
print(f"✓ Removed {profile_dir}")
|
||||
except Exception as e:
|
||||
print(f"⚠ Could not remove {profile_dir}: {e}")
|
||||
@@ -977,61 +920,6 @@ def delete_profile(name: str, yes: bool = False) -> Path:
|
||||
return profile_dir
|
||||
|
||||
|
||||
def _maybe_register_gateway_service(profile_name: str) -> None:
|
||||
"""Register a profile's gateway with s6 inside the container.
|
||||
|
||||
No-op on host (systemd/launchd/windows) — those backends raise
|
||||
``NotImplementedError`` on ``register_profile_gateway`` and the
|
||||
existing per-profile unit-generation paths handle lifecycle.
|
||||
|
||||
Best-effort: any error (no backend detected, s6 not yet ready,
|
||||
etc.) is logged and swallowed so profile creation doesn't fail
|
||||
because the s6 supervision tree is in a weird state. The user
|
||||
can re-register manually later via the gateway start command,
|
||||
which goes through the same dispatch path.
|
||||
|
||||
Port selection is governed by the profile's ``config.yaml``
|
||||
(``[gateway] port = …``) — there is no Python-side allocator
|
||||
(PR #30136 review item I5 retired the SHA-256-derived range
|
||||
[9200, 9800) because it was dead code through the entire stack).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.service_manager import get_service_manager
|
||||
mgr = get_service_manager()
|
||||
except RuntimeError:
|
||||
return # no backend on this host — nothing to do
|
||||
if not mgr.supports_runtime_registration():
|
||||
return # host backend; no-op
|
||||
try:
|
||||
mgr.register_profile_gateway(profile_name)
|
||||
except ValueError:
|
||||
# Already registered (e.g. the container-boot reconciler ran
|
||||
# first and brought up a stale slot). That's fine.
|
||||
pass
|
||||
except Exception as exc:
|
||||
# Don't fail profile create over a supervision-tree hiccup.
|
||||
print(f"⚠ Could not register s6 gateway service: {exc}")
|
||||
|
||||
|
||||
def _maybe_unregister_gateway_service(profile_name: str) -> None:
|
||||
"""Tear down a profile's s6 gateway service inside the container.
|
||||
|
||||
No-op on host. Idempotent: absent services are silently skipped
|
||||
by ``unregister_profile_gateway``.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.service_manager import get_service_manager
|
||||
mgr = get_service_manager()
|
||||
except RuntimeError:
|
||||
return
|
||||
if not mgr.supports_runtime_registration():
|
||||
return
|
||||
try:
|
||||
mgr.unregister_profile_gateway(profile_name)
|
||||
except Exception as exc:
|
||||
print(f"⚠ Could not unregister s6 gateway service: {exc}")
|
||||
|
||||
|
||||
def _cleanup_gateway_service(name: str, profile_dir: Path) -> None:
|
||||
"""Disable and remove systemd/launchd service for a profile."""
|
||||
import platform as _platform
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user