Compare commits
41 Commits
v2026.5.28
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 3b6347af15 | |||
| 42612aa350 | |||
| 3c6e70aef1 | |||
| 2f0f03c40d | |||
| 5c2170a7c6 | |||
| d77d877665 | |||
| ac8e238bc8 | |||
| 8d129d013b | |||
| 300140e006 | |||
| e71a2bd11b | |||
| 769ee86cd2 | |||
| 1b1e30510a | |||
| f3acdd94fe | |||
| 78a54d2c00 | |||
| e7c99651fb | |||
| fb51253620 | |||
| ef009a987a | |||
| 130396c658 | |||
| a5c1f925b5 | |||
| 11d93096b3 | |||
| d464d08a5f | |||
| 5a95fb2e14 | |||
| 0acb7f4583 | |||
| a3cd974ee7 | |||
| ea5a6c216b | |||
| 4df62d239e | |||
| 490b3e76b1 | |||
| 321ce94e25 | |||
| c5e496e1c0 | |||
| 7a3c38d0b7 | |||
| 5cbc3fbdcc | |||
| f30db14ced | |||
| 3a9bc9d88a | |||
| 5f66c36470 | |||
| 7a8589e782 | |||
| 7050c052e3 | |||
| 102eb4adc0 | |||
| b1d3ead7fb | |||
| c661fefa08 | |||
| fe5c8ec4ad | |||
| c9e5a9bb08 |
@@ -22,7 +22,12 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
deploy-vercel:
|
||||
if: github.event_name == 'release'
|
||||
# Triggered automatically on release publish (production cuts) and
|
||||
# manually via `gh workflow run deploy-site.yml` when an out-of-band
|
||||
# main commit needs to ship live before the next release tag — e.g.
|
||||
# a skills-index PR that doesn't touch website/** paths and so
|
||||
# doesn't auto-deploy via the deploy-docs path.
|
||||
if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Trigger Vercel Deploy
|
||||
|
||||
@@ -196,10 +196,26 @@ jobs:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (arm64, smoke test)
|
||||
# Build once, load into the local daemon for smoke testing. PR arm64
|
||||
# builds deliberately avoid the gha cache: cold-cache arm64 builds can
|
||||
# outlive GitHub's short-lived Azure cache SAS token, then fail while
|
||||
# reading or writing cache blobs before the smoke test can run.
|
||||
- name: Build image (arm64, smoke test, uncached PR)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
build-args: |
|
||||
HERMES_GIT_SHA=${{ github.sha }}
|
||||
|
||||
# Main/release builds still use the per-arch gha cache so the digest
|
||||
# push below can reuse layers from this smoke-test build.
|
||||
- name: Build image (arm64, smoke test, cached publish)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
|
||||
+4
-8
@@ -443,7 +443,6 @@
|
||||
|
||||
## 🪟 Native Windows (Beta Continued)
|
||||
|
||||
- Thin desktop installer + first-launch `install.ps1` bootstrap. ([#27822](https://github.com/NousResearch/hermes-agent/pull/27822))
|
||||
- Complete Windows bootstrap — `dep_ensure` + `install.ps1` + detection. (@alt-glitch) ([#27845](https://github.com/NousResearch/hermes-agent/pull/27845))
|
||||
- `install.ps1`: strip BOM, `-Commit`/`-Tag` pin params, harden git ops. (@jquesnelle) ([#28169](https://github.com/NousResearch/hermes-agent/pull/28169))
|
||||
- Consolidate ACP browser bootstrap into `install.{sh,ps1}`. (@alt-glitch) ([#27851](https://github.com/NousResearch/hermes-agent/pull/27851))
|
||||
@@ -453,12 +452,9 @@
|
||||
|
||||
---
|
||||
|
||||
## 🖼️ Hermes Desktop GUI
|
||||
## 🖥️ Web Dashboard
|
||||
|
||||
- `hermes gui` launcher — install + build + launch packaged Electron app. (@OutThisLife) ([#30165](https://github.com/NousResearch/hermes-agent/pull/30165))
|
||||
- Desktop UI lift. ([#27227](https://github.com/NousResearch/hermes-agent/pull/27227))
|
||||
- `nix` package `.#desktop`. (@ethernet8023) ([#28964](https://github.com/NousResearch/hermes-agent/pull/28964))
|
||||
- Hardened Slack socket recovery + Windows desktop restart dedupe. ([#28873](https://github.com/NousResearch/hermes-agent/pull/28873))
|
||||
- Hardened Slack socket recovery + Windows restart dedupe. ([#28873](https://github.com/NousResearch/hermes-agent/pull/28873))
|
||||
- Web dashboard: migrate checkboxes to `@nous-research/ui` + design-system polish. (@austinpickett) ([#28814](https://github.com/NousResearch/hermes-agent/pull/28814))
|
||||
- Web dashboard: collapsible sidebar. (@austinpickett) ([#33421](https://github.com/NousResearch/hermes-agent/pull/33421))
|
||||
- Dashboard typography & contrast pass. (salvage of [#28832](https://github.com/NousResearch/hermes-agent/pull/28832)) ([#30714](https://github.com/NousResearch/hermes-agent/pull/30714))
|
||||
@@ -579,11 +575,11 @@
|
||||
### Notable salvages & cherry-picks
|
||||
|
||||
- **@benbarclay** — s6-overlay container supervision (29 commits salvaged), Node 22 LTS upgrade, build-essential cleanup, `gateway run` auto-redirect in s6, tee supervised stdout to docker logs, `hermes update` Docker guidance, build-time SHA stamping
|
||||
- **@OutThisLife** — `hermes gui` desktop launcher, `mouse_tracking` DEC mode presets
|
||||
- **@OutThisLife** — `mouse_tracking` DEC mode presets
|
||||
- **@jquesnelle** — Windows installer hardening, `--branch` flag for `hermes update`, install.ps1 BOM strip / commit-pin
|
||||
- **@alt-glitch** — Windows `dep_ensure` bootstrap, Nix package variants (`.#messaging`, `.#full`), install-method stamping, ACP browser bootstrap consolidation
|
||||
- **@austinpickett** — `/update` slash command, dashboard checkboxes → `@nous-research/ui`, mobile dashboard polish, collapsible sidebar
|
||||
- **@ethernet8023** — Nix `.#desktop` packaging, CI test slicing across GH Actions jobs, TUI clipboard copy fix
|
||||
- **@ethernet8023** — CI test slicing across GH Actions jobs, TUI clipboard copy fix
|
||||
- **@kshitijk4poor** — doctor section banner + fail-and-issue helpers extraction, post-tag salvage cluster (curator-fallout, kanban SQLite hardening, install world-readable uv dirs, xAI bare-code paste)
|
||||
- **@rewbs** — Nous JWT inference switch + refresh-token replay fix
|
||||
- **@Codename-11** + **@Schwartz10** — session control API (REST + SSE + multimodal followup)
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
# Hermes Agent v0.15.1 (v2026.5.29)
|
||||
|
||||
**Release Date:** May 29, 2026
|
||||
**Since v0.15.0:** 28 commits · 21 merged PRs · hotfix release · 9 contributors
|
||||
|
||||
> **The Patch Release.** A same-day hotfix for v0.15.0. Headline fix: the dashboard infinite-reload loop that hit anyone running v0.15.0 in loopback mode (Docker, hosted Hermes, fresh installs). A handful of other v0.15.0 follow-ups go along for the ride — kanban worker SIGTERM, `/model` picker unification, `/yolo` session bypass, the full 19,932-entry skills.sh catalog, `.md` media delivery restoration, gateway probe-stepdown safety, web-URL redaction passthrough, kanban worker vision on referenced images, hindsight observation-default. Docker users get an explicit `--insecure` opt-in env var (no more bind-host inference), MCP server bare-command PATH resolution, and arm64 PR-build cache fixes.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Dashboard 401 reload loop fixed** — In loopback mode the dashboard's identity probe (`/api/auth/me`) returns 401 by design, but v0.15.0's stale-token reload guard treated every 401 as a rotated session token and full-page-reloaded to pick up a fresh one. Every successful sibling call cleared the one-shot reload guard, so the page reload-looped forever (Firefox: "Navigated to /sessions" storm; Chrome: React re-render storm). Fix adds an `allowUnauthorized` opt-out to `fetchJSON` that skips only the loopback stale-token reload — 401 still throws so `AuthWidget` swallows it, gated-mode `login_url` redirects are unaffected. Closes [#34206](https://github.com/NousResearch/hermes-agent/issues/34206), [#34202](https://github.com/NousResearch/hermes-agent/issues/34202). ([#30698](https://github.com/NousResearch/hermes-agent/pull/30698) — @austinpickett)
|
||||
|
||||
- **Docker dashboard `--insecure` is now an explicit env opt-in, never derived from bind host** — Previously the Docker entrypoint inferred `--insecure` when the dashboard bound to a non-loopback host. That conflated "I want LAN access" with "I want to disable the same-origin guard." The fix splits them: bind host is bind host, and disabling the dashboard's loopback auth requires an explicit `HERMES_DASHBOARD_INSECURE=1`. Existing setups that genuinely wanted insecure binding must now set the env var. ([#34188](https://github.com/NousResearch/hermes-agent/pull/34188), [#34204](https://github.com/NousResearch/hermes-agent/pull/34204) — @benbarclay)
|
||||
|
||||
- **MCP bare command resolution under Docker** — MCP servers configured with bare commands (`npx`, `npm`, `node`) now resolve against `/usr/local/bin` so they actually launch inside the Docker image where those binaries live. v0.15.0 left these failing silently in containers when the agent's effective PATH didn't include the Node toolchain location. ([#34186](https://github.com/NousResearch/hermes-agent/pull/34186) — @benbarclay)
|
||||
|
||||
- **Skills page sidebar / source pills restored** — A stale `useMemo` dependency in the new dashboard skills page collapsed the source pills and category sidebar to "All" only. Fixed; both surfaces now reflect the live catalog state. ([#34194](https://github.com/NousResearch/hermes-agent/pull/34194))
|
||||
|
||||
- **Kanban worker can be killed again** — `SIGTERM` on a kanban worker was being absorbed by an intermediate process and the worker stayed running. Closes [#28181](https://github.com/NousResearch/hermes-agent/issues/28181). ([#34045](https://github.com/NousResearch/hermes-agent/pull/34045))
|
||||
|
||||
- **Full skills.sh catalog (858 → 19,932 entries)** — The skills hub page was pulling a partial paginated catalog. The fetch now walks the sitemap, so all 19,932 skills.sh entries surface in the picker instead of just the first 858. ([#34025](https://github.com/NousResearch/hermes-agent/pull/34025))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Bug Fixes
|
||||
|
||||
### Dashboard / Web
|
||||
|
||||
- **`/api/auth/me` 401 no longer triggers reload loop** in loopback mode — ([#30698](https://github.com/NousResearch/hermes-agent/pull/30698) — @austinpickett)
|
||||
- **Skills page source pills + category sidebar restored** — stale `useMemo` dep ([#34194](https://github.com/NousResearch/hermes-agent/pull/34194))
|
||||
|
||||
### Docker
|
||||
|
||||
- **`--insecure` is now explicit opt-in via env var**, not derived from bind host ([#34188](https://github.com/NousResearch/hermes-agent/pull/34188) — @benbarclay)
|
||||
- **Dashboard test suite repaired** to match the insecure-opt-in fix ([#34204](https://github.com/NousResearch/hermes-agent/pull/34204) — @benbarclay)
|
||||
- **arm64 PR builds skip the GHA cache** to avoid cache-thrash on cross-arch builders ([#33704](https://github.com/NousResearch/hermes-agent/pull/33704) — @BROCCOLO1D)
|
||||
|
||||
### MCP
|
||||
|
||||
- **Bare `npx`/`npm`/`node` resolve against `/usr/local/bin`** for Docker compatibility ([#34186](https://github.com/NousResearch/hermes-agent/pull/34186) — @benbarclay)
|
||||
|
||||
### Kanban
|
||||
|
||||
- **Worker SIGTERM actually terminates the process** ([#34045](https://github.com/NousResearch/hermes-agent/pull/34045))
|
||||
- **Workers receive images referenced in task bodies** for vision-capable models ([#34210](https://github.com/NousResearch/hermes-agent/pull/34210))
|
||||
|
||||
### Gateway
|
||||
|
||||
- **`.md` files deliver again** — media-delivery validation defaults to denylist-only instead of an overly-narrow allowlist ([#34022](https://github.com/NousResearch/hermes-agent/pull/34022))
|
||||
- **Probe stepdown safety** — on a context-overflow without an explicit provider context limit, the agent no longer steps down to a smaller model based on an unknown ceiling (salvage of [#33673](https://github.com/NousResearch/hermes-agent/pull/33673)) ([#33826](https://github.com/NousResearch/hermes-agent/pull/33826))
|
||||
|
||||
### CLI
|
||||
|
||||
- **`/yolo` mid-session enables the per-session bypass** instead of just toggling the env var (which the running agent had already snapshotted) ([#33931](https://github.com/NousResearch/hermes-agent/pull/33931) — @kshitijk4poor)
|
||||
- **`/model` and `hermes model` show the same list**, plus disk cache for picker startup ([#33867](https://github.com/NousResearch/hermes-agent/pull/33867))
|
||||
|
||||
### Skills
|
||||
|
||||
- **Full skills.sh catalog via sitemap** — 858 → 19,932 entries ([#34025](https://github.com/NousResearch/hermes-agent/pull/34025))
|
||||
|
||||
### Redaction
|
||||
|
||||
- **Web URLs pass through unchanged** — the redactor was eating query parameters that looked credential-shaped ([#34029](https://github.com/NousResearch/hermes-agent/pull/34029))
|
||||
|
||||
---
|
||||
|
||||
## ✨ Small Features
|
||||
|
||||
- **Hindsight default narrowed to observation-only** for `recall_types` — tool path is also narrowed ([#34079](https://github.com/NousResearch/hermes-agent/pull/34079) — @nicoloboschi, follow-up [#34091](https://github.com/NousResearch/hermes-agent/pull/4df62d239e38bf8c212a595721c9c01e176f6c3a) — @kshitijk4poor)
|
||||
- **Memory providers receive completed-turn message context** — salvage of [#28065](https://github.com/NousResearch/hermes-agent/pull/28065) ([#34097](https://github.com/NousResearch/hermes-agent/pull/34097) — @kshitijk4poor, credit to @devwdave)
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- **`--no-supervise` / `HERMES_GATEWAY_NO_SUPERVISE` documented** in the reference docs (follow-up to [#33583](https://github.com/NousResearch/hermes-agent/pull/33583)) ([#33751](https://github.com/NousResearch/hermes-agent/pull/33751) — @r266-tech)
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ Infrastructure
|
||||
|
||||
- **Vercel deploy workflow accepts `workflow_dispatch`** so docs deploys can be manually triggered ([#34081](https://github.com/NousResearch/hermes-agent/pull/34081))
|
||||
- **`@nous-research/ui` bumped to 0.18.2** (Nix `npmDepsHash` also updated to match) ([#34193](https://github.com/NousResearch/hermes-agent/pull/34193) follow-ups — @austinpickett)
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- @teknium1
|
||||
|
||||
### Community
|
||||
- @austinpickett — dashboard 401 reload-loop fix (the headline), `@nous-research/ui` bump, Nix `npmDepsHash` updates
|
||||
- @benbarclay — Docker `--insecure` opt-in, MCP bare-command resolution, dashboard test repair
|
||||
- @kshitijk4poor — `/yolo` session bypass, completed-turn memory context salvage, hindsight follow-up docs
|
||||
- @nicoloboschi — hindsight `recall_types` observation default
|
||||
- @BROCCOLO1D — arm64 PR build cache fix
|
||||
- @r266-tech — `--no-supervise` reference docs
|
||||
- @yangguangjin — probe stepdown safety (salvage of @yanghd's #33673)
|
||||
- @devwdave — completed-turn memory context (credited via salvage)
|
||||
- @andrewhosf — co-author
|
||||
|
||||
### Issue Reporters (the 401 loop)
|
||||
- @routesmith ([#34206](https://github.com/NousResearch/hermes-agent/issues/34206))
|
||||
- @beeaton ([#34202](https://github.com/NousResearch/hermes-agent/issues/34202))
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.5.28...v2026.5.29](https://github.com/NousResearch/hermes-agent/compare/v2026.5.28...v2026.5.29)
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "hermes-agent",
|
||||
"name": "Hermes Agent",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1",
|
||||
"description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.",
|
||||
"repository": "https://github.com/NousResearch/hermes-agent",
|
||||
"website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp",
|
||||
@@ -9,7 +9,7 @@
|
||||
"license": "MIT",
|
||||
"distribution": {
|
||||
"uvx": {
|
||||
"package": "hermes-agent[acp]==0.15.0",
|
||||
"package": "hermes-agent[acp]==0.15.1",
|
||||
"args": ["hermes-acp"]
|
||||
}
|
||||
}
|
||||
|
||||
+25
-30
@@ -49,9 +49,8 @@ from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
estimate_messages_tokens_rough,
|
||||
estimate_request_tokens_rough,
|
||||
get_next_probe_tier,
|
||||
get_context_length_from_provider_error,
|
||||
parse_available_output_tokens_from_error,
|
||||
parse_context_limit_from_error,
|
||||
save_context_length,
|
||||
)
|
||||
from agent.nous_rate_guard import (
|
||||
@@ -2900,9 +2899,13 @@ def run_conversation(
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
|
||||
# Error is about the INPUT being too large — reduce context_length.
|
||||
# Try to parse the actual limit from the error message
|
||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
# Error is about the INPUT being too large. Only reduce
|
||||
# context_length when the provider explicitly reports the
|
||||
# real lower limit. If the provider only says "input
|
||||
# exceeds the context window", keep the configured window
|
||||
# and try compression; guessing probe tiers can incorrectly
|
||||
# turn a user-configured 1M window into 256K/128K/64K.
|
||||
new_ctx = get_context_length_from_provider_error(error_msg, old_ctx)
|
||||
_provider_lower = (getattr(agent, "provider", "") or "").lower()
|
||||
_base_lower = (getattr(agent, "base_url", "") or "").rstrip("/").lower()
|
||||
is_minimax_provider = (
|
||||
@@ -2914,23 +2917,12 @@ def run_conversation(
|
||||
)
|
||||
minimax_delta_only_overflow = (
|
||||
is_minimax_provider
|
||||
and parsed_limit is None
|
||||
and new_ctx is None
|
||||
and "context window exceeds limit (" in error_msg
|
||||
)
|
||||
if parsed_limit and parsed_limit < old_ctx:
|
||||
new_ctx = parsed_limit
|
||||
agent._buffer_vprint(f"Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
|
||||
elif minimax_delta_only_overflow:
|
||||
new_ctx = old_ctx
|
||||
agent._buffer_vprint(
|
||||
f"Provider reported overflow amount only; "
|
||||
f"keeping context_length at {old_ctx:,} tokens and compressing."
|
||||
)
|
||||
else:
|
||||
# Step down to the next probe tier
|
||||
new_ctx = get_next_probe_tier(old_ctx)
|
||||
|
||||
if new_ctx and new_ctx < old_ctx:
|
||||
if new_ctx is not None:
|
||||
agent._buffer_vprint(f"Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
|
||||
compressor.update_model(
|
||||
model=agent.model,
|
||||
context_length=new_ctx,
|
||||
@@ -2940,20 +2932,22 @@ def run_conversation(
|
||||
api_mode=agent.api_mode,
|
||||
)
|
||||
# Context probing flags — only set on built-in
|
||||
# compressor (plugin engines manage their own).
|
||||
# compressor (plugin engines manage their own). This
|
||||
# value came from the provider, so it is safe to cache.
|
||||
if hasattr(compressor, "_context_probed"):
|
||||
compressor._context_probed = True
|
||||
# Only persist limits parsed from the provider's
|
||||
# error message (a real number). Guessed fallback
|
||||
# tiers from get_next_probe_tier() should stay
|
||||
# in-memory only — persisting them pollutes the
|
||||
# cache with wrong values.
|
||||
compressor._context_probe_persistable = bool(
|
||||
parsed_limit and parsed_limit == new_ctx
|
||||
)
|
||||
agent._buffer_vprint(f"⚠️ Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens")
|
||||
compressor._context_probe_persistable = True
|
||||
agent._buffer_vprint(f"⚠️ Context length exceeded — using provider limit: {old_ctx:,} → {new_ctx:,} tokens")
|
||||
elif minimax_delta_only_overflow:
|
||||
agent._buffer_vprint(
|
||||
f"Provider reported overflow amount only; "
|
||||
f"keeping context_length at {old_ctx:,} tokens and compressing."
|
||||
)
|
||||
else:
|
||||
agent._buffer_vprint(f"⚠️ Context length exceeded at minimum tier — attempting compression...")
|
||||
agent._buffer_vprint(
|
||||
f"⚠️ Context length exceeded, but provider did not report a max context length; "
|
||||
f"keeping context_length at {old_ctx:,} tokens and compressing."
|
||||
)
|
||||
|
||||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
@@ -4567,6 +4561,7 @@ def run_conversation(
|
||||
original_user_message=original_user_message,
|
||||
final_response=final_response,
|
||||
interrupted=interrupted,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
# Background memory/skill review — runs AFTER the response is delivered
|
||||
|
||||
+134
-14
@@ -37,6 +37,8 @@ from __future__ import annotations
|
||||
import base64
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
@@ -46,6 +48,102 @@ logger = logging.getLogger(__name__)
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
# Image extensions used by extract_image_refs(). Kept tight on purpose — we
|
||||
# only auto-attach things the model can actually see. Documents/archives are
|
||||
# excluded because the gateway's broader extract_local_files() also routes
|
||||
# them differently (send_document), and we don't want to attach a PDF as a
|
||||
# vision part.
|
||||
_IMAGE_EXTS = (
|
||||
".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".heic",
|
||||
)
|
||||
_IMAGE_EXT_PATTERN = "|".join(e.lstrip(".") for e in _IMAGE_EXTS)
|
||||
|
||||
# Absolute / home-relative local image path. Matches the same shape gateway's
|
||||
# extract_local_files() uses: anchors to ``~/`` or ``/``, ignores matches inside
|
||||
# URLs (the ``(?<![/:\w.])`` lookbehind), and case-insensitive on the extension.
|
||||
_LOCAL_IMAGE_PATH_RE = re.compile(
|
||||
r"(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:" + _IMAGE_EXT_PATTERN + r")\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# http(s) URL ending in an image extension (optionally followed by a
|
||||
# query string). Case-insensitive on the extension. Strict ``http(s)://``
|
||||
# scheme so we don't accidentally grab ``file://`` URLs or other shapes.
|
||||
_IMAGE_URL_RE = re.compile(
|
||||
r"https?://[^\s<>\"']+?\.(?:" + _IMAGE_EXT_PATTERN + r")(?:\?[^\s<>\"']*)?",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def extract_image_refs(text: str) -> Tuple[List[str], List[str]]:
|
||||
"""Scan free-form text for image references the model should see.
|
||||
|
||||
Returns ``(local_paths, urls)``:
|
||||
|
||||
* ``local_paths`` — absolute (``/``) or home-relative (``~/``) paths
|
||||
whose suffix is an image extension AND whose expanded form exists
|
||||
on disk as a file. Order-preserving, deduplicated.
|
||||
* ``urls`` — ``http(s)://…`` URLs whose path ends in an image
|
||||
extension (a ``?query`` is allowed after the extension).
|
||||
Order-preserving, deduplicated.
|
||||
|
||||
Matches inside fenced code blocks (``` ``` ```) and inline backticks
|
||||
(`` `…` ``) are skipped so that snippets pasted into a task body for
|
||||
reference aren't mistaken for live attachments. This mirrors the
|
||||
behaviour of ``gateway.platforms.base.BaseAdapter.extract_local_files``.
|
||||
|
||||
Local paths are validated against the filesystem; URLs are not
|
||||
(the provider fetches them at request time).
|
||||
"""
|
||||
if not isinstance(text, str) or not text:
|
||||
return [], []
|
||||
|
||||
# Build spans covered by fenced code blocks and inline code so we can
|
||||
# ignore references the author embedded purely as example text.
|
||||
code_spans: list[tuple[int, int]] = []
|
||||
for m in re.finditer(r"```[^\n]*\n.*?```", text, re.DOTALL):
|
||||
code_spans.append((m.start(), m.end()))
|
||||
for m in re.finditer(r"`[^`\n]+`", text):
|
||||
code_spans.append((m.start(), m.end()))
|
||||
|
||||
def _in_code(pos: int) -> bool:
|
||||
return any(s <= pos < e for s, e in code_spans)
|
||||
|
||||
local_paths: list[str] = []
|
||||
seen_paths: set[str] = set()
|
||||
for match in _LOCAL_IMAGE_PATH_RE.finditer(text):
|
||||
if _in_code(match.start()):
|
||||
continue
|
||||
raw = match.group(0)
|
||||
expanded = os.path.expanduser(raw)
|
||||
try:
|
||||
if not os.path.isfile(expanded):
|
||||
continue
|
||||
except OSError:
|
||||
# ENAMETOOLONG / EINVAL on pathological inputs — skip rather than crash.
|
||||
continue
|
||||
if expanded in seen_paths:
|
||||
continue
|
||||
seen_paths.add(expanded)
|
||||
local_paths.append(expanded)
|
||||
|
||||
urls: list[str] = []
|
||||
seen_urls: set[str] = set()
|
||||
for match in _IMAGE_URL_RE.finditer(text):
|
||||
if _in_code(match.start()):
|
||||
continue
|
||||
url = match.group(0)
|
||||
# Strip trailing punctuation that's almost certainly prose, not part
|
||||
# of the URL (e.g. "see https://x.com/a.png." or "/a.png)").
|
||||
url = url.rstrip(".,;:!?)]>")
|
||||
if url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(url)
|
||||
urls.append(url)
|
||||
|
||||
return local_paths, urls
|
||||
|
||||
|
||||
# Strict YAML/JSON boolean coercion for capability overrides.
|
||||
#
|
||||
# ``bool("false")`` is True in Python because non-empty strings are truthy, so
|
||||
@@ -320,20 +418,29 @@ def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
def build_native_content_parts(
|
||||
user_text: str,
|
||||
image_paths: List[str],
|
||||
image_urls: Optional[List[str]] = None,
|
||||
) -> Tuple[List[Dict[str, Any]], List[str]]:
|
||||
"""Build an OpenAI-style ``content`` list for a user turn.
|
||||
|
||||
Shape:
|
||||
[{"type": "text", "text": "...\\n\\n[Image attached at: /local/path]"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/a.png"}},
|
||||
...]
|
||||
|
||||
The local path of each successfully attached image is appended to the
|
||||
text part as ``[Image attached at: <path>]``. The model still sees the
|
||||
pixels via the ``image_url`` part (full native vision); the path note
|
||||
just gives it a string handle so MCP/skill tools that take an image
|
||||
path or URL argument can be invoked on the same image without an
|
||||
extra round-trip. This parallels the text-mode hint produced by
|
||||
Local paths are read from disk and embedded as base64 ``data:`` URLs.
|
||||
Remote URLs (``http(s)://``) are passed through verbatim — the provider
|
||||
fetches them server-side. The model still sees the pixels either way.
|
||||
|
||||
For each successfully attached image, a hint is appended to the text
|
||||
part:
|
||||
|
||||
* local path → ``[Image attached at: <path>]``
|
||||
* URL → ``[Image attached: <url>]``
|
||||
|
||||
The hint gives the model a string handle so MCP/skill tools that take
|
||||
an image path or URL argument can be invoked on the same image without
|
||||
an extra round-trip. This parallels the text-mode hint produced by
|
||||
``Runner._enrich_message_with_vision`` (``vision_analyze using image_url:
|
||||
<path>``) so behaviour is consistent across both image input modes.
|
||||
|
||||
@@ -342,12 +449,14 @@ def build_native_content_parts(
|
||||
ceiling), the agent's retry loop transparently shrinks and retries
|
||||
once — see ``run_agent._try_shrink_image_parts_in_messages``.
|
||||
|
||||
Returns (content_parts, skipped_paths). Skipped paths are files that
|
||||
couldn't be read from disk and are NOT advertised in the path hints.
|
||||
Returns (content_parts, skipped). Skipped entries are local paths
|
||||
that couldn't be read from disk; URLs are never skipped (they're
|
||||
not validated here).
|
||||
"""
|
||||
skipped: List[str] = []
|
||||
image_parts: List[Dict[str, Any]] = []
|
||||
attached_paths: List[str] = []
|
||||
attached_urls: List[str] = []
|
||||
|
||||
for raw_path in image_paths:
|
||||
p = Path(raw_path)
|
||||
@@ -364,16 +473,26 @@ def build_native_content_parts(
|
||||
})
|
||||
attached_paths.append(str(raw_path))
|
||||
|
||||
for url in image_urls or []:
|
||||
url = (url or "").strip()
|
||||
if not url:
|
||||
continue
|
||||
image_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": url},
|
||||
})
|
||||
attached_urls.append(url)
|
||||
|
||||
text = (user_text or "").strip()
|
||||
|
||||
# If at least one image attached, build a single text part that combines
|
||||
# the user's caption (or a neutral default) with one path hint per image.
|
||||
if attached_paths:
|
||||
# the user's caption (or a neutral default) with one hint per image.
|
||||
if attached_paths or attached_urls:
|
||||
base_text = text or "What do you see in this image?"
|
||||
path_hints = "\n".join(
|
||||
f"[Image attached at: {p}]" for p in attached_paths
|
||||
)
|
||||
combined_text = f"{base_text}\n\n{path_hints}"
|
||||
hint_lines: List[str] = []
|
||||
hint_lines.extend(f"[Image attached at: {p}]" for p in attached_paths)
|
||||
hint_lines.extend(f"[Image attached: {u}]" for u in attached_urls)
|
||||
combined_text = f"{base_text}\n\n" + "\n".join(hint_lines)
|
||||
parts: List[Dict[str, Any]] = [{"type": "text", "text": combined_text}]
|
||||
parts.extend(image_parts)
|
||||
return parts, skipped
|
||||
@@ -388,4 +507,5 @@ def build_native_content_parts(
|
||||
__all__ = [
|
||||
"decide_image_input_mode",
|
||||
"build_native_content_parts",
|
||||
"extract_image_refs",
|
||||
]
|
||||
|
||||
+33
-2
@@ -368,11 +368,42 @@ class MemoryManager:
|
||||
|
||||
# -- Sync ----------------------------------------------------------------
|
||||
|
||||
def sync_all(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
@staticmethod
|
||||
def _provider_sync_accepts_messages(provider: MemoryProvider) -> bool:
|
||||
"""Return whether sync_turn accepts a messages keyword."""
|
||||
try:
|
||||
signature = inspect.signature(provider.sync_turn)
|
||||
except (TypeError, ValueError):
|
||||
return True
|
||||
params = list(signature.parameters.values())
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
|
||||
return True
|
||||
return "messages" in signature.parameters
|
||||
|
||||
def sync_all(
|
||||
self,
|
||||
user_content: str,
|
||||
assistant_content: str,
|
||||
*,
|
||||
session_id: str = "",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
"""Sync a completed turn to all providers."""
|
||||
for provider in self._providers:
|
||||
try:
|
||||
provider.sync_turn(user_content, assistant_content, session_id=session_id)
|
||||
if messages is not None and self._provider_sync_accepts_messages(provider):
|
||||
provider.sync_turn(
|
||||
user_content,
|
||||
assistant_content,
|
||||
session_id=session_id,
|
||||
messages=messages,
|
||||
)
|
||||
else:
|
||||
provider.sync_turn(
|
||||
user_content,
|
||||
assistant_content,
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Memory provider '%s' sync_turn failed: %s",
|
||||
|
||||
@@ -112,11 +112,22 @@ class MemoryProvider(ABC):
|
||||
that do background prefetching should override this.
|
||||
"""
|
||||
|
||||
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
def sync_turn(
|
||||
self,
|
||||
user_content: str,
|
||||
assistant_content: str,
|
||||
*,
|
||||
session_id: str = "",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
"""Persist a completed turn to the backend.
|
||||
|
||||
Called after each turn. Should be non-blocking — queue for
|
||||
background processing if the backend has latency.
|
||||
|
||||
``messages`` is the OpenAI-style conversation message list as of the
|
||||
completed turn, including any assistant tool calls and tool results.
|
||||
Providers that do not need raw turn context can ignore it.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
|
||||
+22
-1
@@ -913,12 +913,33 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def get_context_length_from_provider_error(
|
||||
error_msg: str,
|
||||
current_context_length: int,
|
||||
) -> Optional[int]:
|
||||
"""Return a provider-reported lower context limit, if one is present.
|
||||
|
||||
Context-overflow recovery must not invent a new model window size. Some
|
||||
providers only say that the input exceeds the context window without
|
||||
reporting the actual maximum. In that case callers should keep the
|
||||
configured context length and try compression only, rather than stepping
|
||||
down through guessed probe tiers (1M → 256K → 128K → ...).
|
||||
"""
|
||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
if parsed_limit is None:
|
||||
return None
|
||||
if parsed_limit < current_context_length:
|
||||
return parsed_limit
|
||||
return None
|
||||
|
||||
|
||||
def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
|
||||
"""Detect an "output cap too large" error and return how many output tokens are available.
|
||||
|
||||
Background — two distinct context errors exist:
|
||||
1. "Prompt too long" — the INPUT itself exceeds the context window.
|
||||
Fix: compress history and/or halve context_length.
|
||||
Fix: compress history, and only reduce context_length if the
|
||||
provider explicitly reports the actual lower limit.
|
||||
2. "max_tokens too large" — input is fine, but input + requested_output > window.
|
||||
Fix: reduce max_tokens (the output cap) for this call.
|
||||
Do NOT touch context_length — the window hasn't shrunk.
|
||||
|
||||
+8
-13
@@ -406,19 +406,14 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
||||
if "eyJ" in text:
|
||||
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||
|
||||
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
|
||||
# DB schemes are handled above by _DB_CONNSTR_RE.
|
||||
if "://" in text:
|
||||
text = _redact_url_userinfo(text)
|
||||
|
||||
# URL query params containing opaque tokens (?access_token=…&code=…)
|
||||
if "?" in text:
|
||||
text = _redact_url_query_params(text)
|
||||
|
||||
# HTTP access logs can contain relative request targets with query params
|
||||
# and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`.
|
||||
if "?" in text and "=" in text and _has_http_method_substring(text):
|
||||
text = _redact_http_request_target_query_params(text)
|
||||
# NOTE: Web-URL redaction (query params + userinfo + HTTP access-log
|
||||
# request targets) is intentionally OFF. Many legitimate workflows pass
|
||||
# opaque tokens through query strings — magic-link checkouts, OAuth
|
||||
# callbacks the agent is meant to follow, pre-signed share URLs — and
|
||||
# blanket-redacting param values by name breaks those skills mid-flow.
|
||||
# Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
|
||||
# caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
|
||||
# are still caught by _DB_CONNSTR_RE.
|
||||
|
||||
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
|
||||
if "&" in text and "=" in text:
|
||||
|
||||
@@ -168,7 +168,7 @@ from hermes_cli.browser_connect import (
|
||||
try_launch_chrome_debug,
|
||||
)
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from utils import base_url_host_matches, is_truthy_value
|
||||
from utils import base_url_host_matches
|
||||
|
||||
_hermes_home = get_hermes_home()
|
||||
_project_env = Path(__file__).parent / '.env'
|
||||
@@ -576,6 +576,8 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"docker_env": "TERMINAL_DOCKER_ENV",
|
||||
"docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
|
||||
"docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
|
||||
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
# Persistent shell (non-local backends)
|
||||
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||||
@@ -3747,7 +3749,7 @@ class HermesCLI:
|
||||
percent_label = f"{percent}%" if percent is not None else "--"
|
||||
duration_label = snapshot["duration"]
|
||||
|
||||
yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
|
||||
yolo_active = self._is_session_yolo_active()
|
||||
if width < 52:
|
||||
text = f"⚕ {snapshot['model_short']} · {duration_label}"
|
||||
if yolo_active:
|
||||
@@ -3808,7 +3810,7 @@ class HermesCLI:
|
||||
# line and produce duplicated status bar rows over long sessions.
|
||||
width = self._get_tui_terminal_width()
|
||||
duration_label = snapshot["duration"]
|
||||
yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
|
||||
yolo_active = self._is_session_yolo_active()
|
||||
|
||||
if width < 52:
|
||||
frags = [
|
||||
@@ -6907,6 +6909,7 @@ class HermesCLI:
|
||||
pass
|
||||
|
||||
# Switch to the new session
|
||||
self._transfer_session_yolo(self.session_id, new_session_id)
|
||||
self.session_id = new_session_id
|
||||
self.session_start = now
|
||||
self._pending_title = None
|
||||
@@ -7586,8 +7589,19 @@ class HermesCLI:
|
||||
parts = cmd_original.split(None, 1) # split off '/model'
|
||||
raw_args = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
# Parse --provider and --global flags
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
|
||||
# Parse --provider, --global, and --refresh flags
|
||||
model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
|
||||
|
||||
# --refresh: wipe the on-disk picker cache before building the
|
||||
# provider list. Forces a live re-fetch of every authed provider's
|
||||
# /v1/models endpoint on this open.
|
||||
if force_refresh:
|
||||
try:
|
||||
from hermes_cli.models import clear_provider_models_cache
|
||||
clear_provider_models_cache()
|
||||
_cprint(" Cleared model picker cache. Refreshing...")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Single inventory context — replaces the inline config-slice the
|
||||
# dashboard / TUI used to duplicate. Overlay live session state
|
||||
@@ -7626,6 +7640,7 @@ class HermesCLI:
|
||||
_cprint("")
|
||||
_cprint(" /model <name> switch model")
|
||||
_cprint(" /model --provider <slug> switch provider")
|
||||
_cprint(" /model --refresh re-fetch live model lists")
|
||||
return
|
||||
|
||||
self._open_model_picker(
|
||||
@@ -9607,20 +9622,92 @@ class HermesCLI:
|
||||
}
|
||||
_cprint(labels.get(self.tool_progress_mode, ""))
|
||||
|
||||
def _toggle_yolo(self):
|
||||
"""Toggle YOLO mode — skip all dangerous command approval prompts."""
|
||||
import os
|
||||
from hermes_cli.colors import Colors as _Colors
|
||||
def _transfer_session_yolo(self, old_session_id: str, new_session_id: str) -> None:
|
||||
"""Move YOLO bypass state from an old session key to a new one.
|
||||
|
||||
current = is_truthy_value(os.environ.get("HERMES_YOLO_MODE"))
|
||||
if current:
|
||||
os.environ.pop("HERMES_YOLO_MODE", None)
|
||||
Called whenever ``self.session_id`` is reassigned mid-run — ``/branch``
|
||||
forks into a new session, and auto-compression rotates the agent's
|
||||
session id into a fresh continuation session. Without this transfer
|
||||
the user's ``/yolo ON`` toggle would silently revert on the very next
|
||||
turn (the same UX failure mode that motivated this entire fix), since
|
||||
``_session_yolo`` is keyed by session id.
|
||||
|
||||
Mirrors ``tui_gateway/server.py`` (~line 1297-1305) which performs the
|
||||
same transfer for the TUI's session-rename path. No-op when YOLO
|
||||
wasn't enabled or when the ids match.
|
||||
"""
|
||||
if not old_session_id or not new_session_id or old_session_id == new_session_id:
|
||||
return
|
||||
try:
|
||||
from tools.approval import (
|
||||
disable_session_yolo,
|
||||
enable_session_yolo,
|
||||
is_session_yolo_enabled,
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
if is_session_yolo_enabled(old_session_id):
|
||||
enable_session_yolo(new_session_id)
|
||||
disable_session_yolo(old_session_id)
|
||||
|
||||
def _is_session_yolo_active(self) -> bool:
|
||||
"""Whether YOLO bypass is currently enabled for this CLI session.
|
||||
|
||||
Reads from ``tools.approval._session_yolo`` (the same set that
|
||||
``enable_session_yolo`` / ``disable_session_yolo`` write to) so the
|
||||
status bar reflects the actual bypass state instead of a stale env
|
||||
var. Also honors the process-start ``--yolo`` flag, which freezes
|
||||
``HERMES_YOLO_MODE`` into ``_YOLO_MODE_FROZEN`` before tool imports
|
||||
happen.
|
||||
"""
|
||||
try:
|
||||
from tools.approval import (
|
||||
_YOLO_MODE_FROZEN,
|
||||
is_session_yolo_enabled,
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
if _YOLO_MODE_FROZEN:
|
||||
return True
|
||||
# Use ``getattr`` so test fixtures that build a CLI via ``__new__``
|
||||
# (skipping ``__init__``) don't trip an AttributeError here; the
|
||||
# status-bar builders swallow exceptions silently but lose every
|
||||
# field after the failure.
|
||||
session_key = getattr(self, "session_id", None) or "default"
|
||||
return is_session_yolo_enabled(session_key)
|
||||
|
||||
def _toggle_yolo(self):
|
||||
"""Toggle YOLO mode — skip all dangerous command approval prompts.
|
||||
|
||||
Per-session toggle that mirrors the gateway and TUI ``/yolo`` handlers
|
||||
(see ``gateway/run.py:_handle_yolo_command`` and
|
||||
``tui_gateway/server.py`` key=="yolo"). We deliberately do NOT mutate
|
||||
``HERMES_YOLO_MODE`` here — that env var is read once at module import
|
||||
time into ``tools.approval._YOLO_MODE_FROZEN`` to keep prompt-injected
|
||||
skills from flipping the bypass mid-session, so setting it after CLI
|
||||
startup is a silent no-op. Routing through ``enable_session_yolo`` /
|
||||
``disable_session_yolo`` gives the same auditable, per-session bypass
|
||||
the other surfaces have. ``run_conversation`` binds
|
||||
``self.session_id`` as the active approval session key via
|
||||
``set_current_session_key`` so the bypass takes effect on the very
|
||||
next dangerous command in this run.
|
||||
"""
|
||||
from hermes_cli.colors import Colors as _Colors
|
||||
from tools.approval import (
|
||||
disable_session_yolo,
|
||||
enable_session_yolo,
|
||||
is_session_yolo_enabled,
|
||||
)
|
||||
|
||||
session_key = self.session_id or "default"
|
||||
if is_session_yolo_enabled(session_key):
|
||||
disable_session_yolo(session_key)
|
||||
_cprint(
|
||||
f" ⚠ YOLO mode {_Colors.BOLD}{_Colors.RED}OFF{_Colors.RESET}"
|
||||
" — dangerous commands will require approval."
|
||||
)
|
||||
else:
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
enable_session_yolo(session_key)
|
||||
_cprint(
|
||||
f" ⚡ YOLO mode {_Colors.BOLD}{_Colors.GREEN}ON{_Colors.RESET}"
|
||||
" — all commands auto-approved. Use with caution."
|
||||
@@ -11757,6 +11844,23 @@ class HermesCLI:
|
||||
set_secret_capture_callback(self._secret_capture_callback)
|
||||
except Exception:
|
||||
pass
|
||||
# Bind this turn's approval session key into the contextvar so
|
||||
# ``tools.approval.is_current_session_yolo_enabled()`` resolves
|
||||
# against the same key that ``/yolo`` toggles under (see
|
||||
# ``_toggle_yolo`` → ``enable_session_yolo(self.session_id)``).
|
||||
# Mirrors ``tui_gateway/server.py`` and ``gateway/run.py`` which
|
||||
# bind the same contextvar before invoking the agent.
|
||||
try:
|
||||
from tools.approval import (
|
||||
reset_current_session_key,
|
||||
set_current_session_key,
|
||||
)
|
||||
_approval_session_token = set_current_session_key(
|
||||
self.session_id or "default"
|
||||
)
|
||||
except Exception:
|
||||
reset_current_session_key = None # type: ignore[assignment]
|
||||
_approval_session_token = None
|
||||
agent_message = _voice_prefix + message if _voice_prefix else message
|
||||
# Prepend pending model switch note so the model knows about the switch
|
||||
_msn = getattr(self, '_pending_model_switch_note', None)
|
||||
@@ -11798,6 +11902,15 @@ class HermesCLI:
|
||||
set_secret_capture_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
# Release the per-turn approval session key. ``_session_yolo``
|
||||
# state itself is preserved across turns (so /yolo persists
|
||||
# for the whole CLI run); we just unbind the contextvar so a
|
||||
# reused thread doesn't see stale identity on its next run.
|
||||
if _approval_session_token is not None and reset_current_session_key is not None:
|
||||
try:
|
||||
reset_current_session_key(_approval_session_token)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Start agent in background thread (daemon so it cannot keep the
|
||||
# process alive when the user closes the terminal tab — SIGHUP
|
||||
@@ -11928,6 +12041,7 @@ class HermesCLI:
|
||||
and getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self._transfer_session_yolo(self.session_id, self.agent.session_id)
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
|
||||
@@ -14968,6 +15082,39 @@ def main(
|
||||
time.sleep(_grace)
|
||||
except Exception:
|
||||
pass # never block signal handling
|
||||
# Kanban worker exit path (#28181): SIGTERM hits a dispatcher-spawned
|
||||
# worker that's likely in a non-daemon thread waiting on a child
|
||||
# subprocess in _wait_for_process. Raising KeyboardInterrupt only
|
||||
# unwinds the main thread; the worker thread keeps running, the
|
||||
# process gets reparented to init, and the dispatcher's _pid_alive
|
||||
# check returns True forever — task stuck in 'running' indefinitely.
|
||||
# Skip the controlled-unwind dance and call os._exit(0) so the kernel
|
||||
# reclaims the PID immediately and detect_crashed_workers can reclaim
|
||||
# the stale claim on the next tick. Flush logging + stdout/stderr
|
||||
# first so the final debug trace isn't lost; SIGALRM deadman guards
|
||||
# the flush against any rare blocking-I/O case (the reporter measured
|
||||
# flush in <1ms; the alarm is a failsafe, not the common path).
|
||||
if os.environ.get("HERMES_KANBAN_TASK"):
|
||||
try:
|
||||
import signal as _sig_mod
|
||||
if hasattr(_sig_mod, "SIGALRM"):
|
||||
# Cancel any pre-existing alarm to avoid colliding with
|
||||
# caller-installed timers.
|
||||
_sig_mod.signal(_sig_mod.SIGALRM, lambda *_: os._exit(0))
|
||||
_sig_mod.alarm(2)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
import logging as _lg
|
||||
_lg.shutdown()
|
||||
except Exception:
|
||||
pass
|
||||
for _stream in (sys.stdout, sys.stderr):
|
||||
try:
|
||||
_stream.flush()
|
||||
except Exception:
|
||||
pass
|
||||
os._exit(0)
|
||||
raise KeyboardInterrupt()
|
||||
try:
|
||||
import signal as _signal
|
||||
@@ -14980,13 +15127,50 @@ def main(
|
||||
# Handle single query mode
|
||||
if query or image:
|
||||
query, single_query_images = _collect_query_images(query, image)
|
||||
# Kanban workers spawn with ``hermes chat -q "work kanban task <id>"``;
|
||||
# the actual task description lives in the task body. Mirror the
|
||||
# gateway/CLI behaviour for inbound images by scanning the body for
|
||||
# local image paths and http(s) image URLs and attaching them to the
|
||||
# worker's first turn. Without this, users who paste a screenshot
|
||||
# path or URL into a kanban task body never get it routed to the
|
||||
# model's vision input.
|
||||
single_query_image_urls: list[str] = []
|
||||
_kanban_task_id = os.environ.get("HERMES_KANBAN_TASK", "").strip()
|
||||
if _kanban_task_id:
|
||||
try:
|
||||
from hermes_cli import kanban_db as _kb
|
||||
from agent.image_routing import extract_image_refs as _extract_refs
|
||||
|
||||
_conn = _kb.connect()
|
||||
try:
|
||||
_task = _kb.get_task(_conn, _kanban_task_id)
|
||||
finally:
|
||||
try:
|
||||
_conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
_body = getattr(_task, "body", "") if _task is not None else ""
|
||||
if _body:
|
||||
_kb_paths, _kb_urls = _extract_refs(_body)
|
||||
if _kb_paths:
|
||||
# Dedupe against any --image the user already passed.
|
||||
_seen = {str(p) for p in single_query_images}
|
||||
for _p in _kb_paths:
|
||||
if _p not in _seen:
|
||||
_seen.add(_p)
|
||||
single_query_images.append(Path(_p))
|
||||
if _kb_urls:
|
||||
single_query_image_urls.extend(_kb_urls)
|
||||
except Exception as _exc:
|
||||
# Best-effort enrichment; never block worker startup on it.
|
||||
logger.debug("kanban image-ref extraction failed: %s", _exc)
|
||||
if quiet:
|
||||
# Quiet mode: suppress banner, spinner, tool previews.
|
||||
# Only print the final response and parseable session info.
|
||||
cli.tool_progress_mode = "off"
|
||||
if cli._ensure_runtime_credentials():
|
||||
effective_query: Any = query
|
||||
if single_query_images:
|
||||
if single_query_images or single_query_image_urls:
|
||||
# Honour the same image-routing decision used by the
|
||||
# interactive path. With a vision-capable model (incl.
|
||||
# custom-provider models declared via
|
||||
@@ -15015,19 +15199,26 @@ def main(
|
||||
_parts, _skipped = _build_parts(
|
||||
query if isinstance(query, str) else "",
|
||||
[str(p) for p in single_query_images],
|
||||
image_urls=list(single_query_image_urls) or None,
|
||||
)
|
||||
if any(p.get("type") == "image_url" for p in _parts):
|
||||
effective_query = _parts
|
||||
else:
|
||||
# All images unreadable — text fallback.
|
||||
# ``_preprocess_images_with_vision`` only knows
|
||||
# about local files; URLs would be lost there,
|
||||
# so keep the original query text intact when
|
||||
# only URLs were supplied.
|
||||
if single_query_images:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
except Exception:
|
||||
if single_query_images:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
except Exception:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query, single_query_images, announce=False,
|
||||
)
|
||||
else:
|
||||
elif single_query_images:
|
||||
effective_query = cli._preprocess_images_with_vision(
|
||||
query,
|
||||
single_query_images,
|
||||
|
||||
@@ -30,13 +30,21 @@ cd /opt/data
|
||||
dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}"
|
||||
dash_port="${HERMES_DASHBOARD_PORT:-9119}"
|
||||
|
||||
# Binding to anything other than localhost requires --insecure — the
|
||||
# dashboard refuses otherwise because it exposes API keys. Inside a
|
||||
# container this is the expected deployment.
|
||||
# `--insecure` is opt-in via HERMES_DASHBOARD_INSECURE. The dashboard's
|
||||
# OAuth auth gate engages automatically on non-loopback binds when a
|
||||
# DashboardAuthProvider is registered (e.g. the bundled dashboard_auth/nous
|
||||
# provider, which auto-registers when HERMES_DASHBOARD_OAUTH_CLIENT_ID is
|
||||
# set). If no provider is registered, start_server fails closed with a
|
||||
# specific operator-facing error.
|
||||
#
|
||||
# This used to derive --insecure from the bind host ("anything non-loopback
|
||||
# implies insecure"), but that predates the OAuth gate and silently
|
||||
# disabled it on every container-deployed dashboard. The gate is now the
|
||||
# authority; operators on trusted LANs / behind a reverse proxy without
|
||||
# the OAuth contract opt in explicitly.
|
||||
insecure=""
|
||||
case "$dash_host" in
|
||||
127.0.0.1|localhost) ;;
|
||||
*) insecure="--insecure" ;;
|
||||
case "${HERMES_DASHBOARD_INSECURE:-}" in
|
||||
1|true|TRUE|True|yes|YES|Yes) insecure="--insecure" ;;
|
||||
esac
|
||||
|
||||
# shellcheck disable=SC2086 # word-splitting of $insecure is intentional
|
||||
|
||||
@@ -829,6 +829,13 @@ _HERMES_HOME = get_hermes_home()
|
||||
MEDIA_DELIVERY_ALLOW_DIRS_ENV = "HERMES_MEDIA_ALLOW_DIRS"
|
||||
MEDIA_DELIVERY_TRUST_RECENT_ENV = "HERMES_MEDIA_TRUST_RECENT_FILES"
|
||||
MEDIA_DELIVERY_TRUST_RECENT_SECONDS_ENV = "HERMES_MEDIA_TRUST_RECENT_SECONDS"
|
||||
# Strict mode toggles the original allowlist+recency path-validation behavior.
|
||||
# Off by default — symmetric with inbound (we accept any document type the
|
||||
# user uploads), and with the denylist still blocking obvious credential /
|
||||
# system paths. Operators running public-facing gateways where prompt
|
||||
# injection from one user could exfiltrate the host's secrets to that same
|
||||
# user should set this to true.
|
||||
MEDIA_DELIVERY_STRICT_ENV = "HERMES_MEDIA_DELIVERY_STRICT"
|
||||
MEDIA_DELIVERY_SAFE_ROOTS = (
|
||||
IMAGE_CACHE_DIR,
|
||||
AUDIO_CACHE_DIR,
|
||||
@@ -918,6 +925,21 @@ def _media_delivery_recency_seconds() -> float:
|
||||
return float(_MEDIA_DELIVERY_TRUST_RECENT_DEFAULT_SECONDS)
|
||||
|
||||
|
||||
def _media_delivery_strict_mode() -> bool:
|
||||
"""Return True when path validation should require allowlist/recency match.
|
||||
|
||||
Off by default. In non-strict mode, ``validate_media_delivery_path``
|
||||
accepts any existing regular file that isn't under the credential /
|
||||
system-path denylist — restoring the pre-#29523 behavior for the
|
||||
single-user case. Strict mode preserves the original
|
||||
allowlist+recency-window logic for operators running public-facing
|
||||
gateways where prompt injection from one user shouldn't be able to
|
||||
exfiltrate the host's secrets to that same user.
|
||||
"""
|
||||
raw = os.environ.get(MEDIA_DELIVERY_STRICT_ENV, "0").strip().lower()
|
||||
return raw in ("1", "true", "yes", "on")
|
||||
|
||||
|
||||
def _media_delivery_denied_paths() -> List[Path]:
|
||||
"""Return absolute denylist paths under which delivery is never allowed."""
|
||||
denied = [Path(p) for p in _MEDIA_DELIVERY_DENIED_PREFIXES]
|
||||
@@ -972,10 +994,22 @@ def _path_is_within(path: Path, root: Path) -> bool:
|
||||
def validate_media_delivery_path(path: str) -> Optional[str]:
|
||||
"""Return a safe absolute file path for native media delivery, else None.
|
||||
|
||||
MEDIA tags and bare local paths in model output are untrusted text. Only
|
||||
existing regular files under Hermes-managed media caches, or roots the
|
||||
operator explicitly allowlists, may be uploaded as native attachments.
|
||||
Symlinks are resolved before the containment check.
|
||||
Default mode (single-user / private gateway): accept any existing regular
|
||||
file that isn't under the credential / system-path denylist
|
||||
(``_MEDIA_DELIVERY_DENIED_PREFIXES`` + ``~/.ssh``, ``~/.aws``, etc.).
|
||||
This matches the symmetry of inbound delivery — Telegram/Discord/Slack
|
||||
will hand the agent any file the user uploads, and the agent can hand
|
||||
back any file that isn't a credential.
|
||||
|
||||
Strict mode (opt-in via ``gateway.strict`` in ``config.yaml`` or
|
||||
``HERMES_MEDIA_DELIVERY_STRICT=1``): the file MUST live under a
|
||||
Hermes-managed cache, under an operator-allowlisted root
|
||||
(``HERMES_MEDIA_ALLOW_DIRS``), or be freshly produced inside the
|
||||
configured recency window. Suitable for public-facing bots where
|
||||
prompt injection from one user shouldn't be able to exfiltrate the
|
||||
host's secrets to that same user.
|
||||
|
||||
Symlinks are resolved before any containment / denylist check.
|
||||
"""
|
||||
if not path:
|
||||
return None
|
||||
@@ -999,6 +1033,8 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
|
||||
if not resolved.is_file():
|
||||
return None
|
||||
|
||||
# Cache / operator allowlist is always honored — these are unconditionally
|
||||
# trusted regardless of mode.
|
||||
for root in _media_delivery_allowed_roots():
|
||||
try:
|
||||
resolved_root = root.expanduser().resolve(strict=False)
|
||||
@@ -1007,9 +1043,18 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
|
||||
if _path_is_within(resolved, resolved_root):
|
||||
return str(resolved)
|
||||
|
||||
# Outside the cache/operator allowlist: fall back to recency-based trust
|
||||
# for files the agent has just produced (e.g. ``pandoc -o /tmp/report.pdf``
|
||||
# or ``write_file("/home/user/report.pdf", ...)``). System paths and
|
||||
# Non-strict mode (default): accept anything not on the denylist.
|
||||
# The denylist still blocks /etc, /proc, ~/.ssh, ~/.aws, ~/.hermes/.env,
|
||||
# ~/.hermes/auth.json, etc. — so the obvious prompt-injection sites
|
||||
# (``MEDIA:/etc/passwd``, ``MEDIA:~/.ssh/id_rsa``) remain rejected.
|
||||
if not _media_delivery_strict_mode():
|
||||
if _path_under_denied_prefix(resolved):
|
||||
return None
|
||||
return str(resolved)
|
||||
|
||||
# Strict mode: fall back to recency-based trust for freshly-produced
|
||||
# files (e.g. ``pandoc -o /tmp/report.pdf`` or
|
||||
# ``write_file("/home/user/report.pdf", ...)``). System paths and
|
||||
# credential locations remain blocked even when "recent" — see
|
||||
# ``_MEDIA_DELIVERY_DENIED_PREFIXES`` for the denylist.
|
||||
window = _media_delivery_recency_seconds()
|
||||
|
||||
+63
-3
@@ -831,6 +831,8 @@ if _config_path.exists():
|
||||
"docker_env": "TERMINAL_DOCKER_ENV",
|
||||
"docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
|
||||
"docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
|
||||
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||||
}
|
||||
@@ -932,9 +934,14 @@ if _config_path.exists():
|
||||
_redact = _security_cfg.get("redact_secrets")
|
||||
if _redact is not None:
|
||||
os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
|
||||
# Gateway settings (media delivery allowlist + recency trust)
|
||||
# Gateway settings (media delivery allowlist + recency trust + strict mode)
|
||||
_gateway_cfg = _cfg.get("gateway", {})
|
||||
if isinstance(_gateway_cfg, dict):
|
||||
_strict = _gateway_cfg.get("strict")
|
||||
if _strict is not None:
|
||||
os.environ["HERMES_MEDIA_DELIVERY_STRICT"] = (
|
||||
"1" if _strict else "0"
|
||||
)
|
||||
_allow_dirs = _gateway_cfg.get("media_delivery_allow_dirs")
|
||||
if _allow_dirs:
|
||||
if isinstance(_allow_dirs, str):
|
||||
@@ -5413,6 +5420,49 @@ class GatewayRunner:
|
||||
)
|
||||
stale_timeout_seconds = 0
|
||||
|
||||
# Read kanban.default_assignee — fallback profile for tasks
|
||||
# created without an explicit assignee (e.g. via the dashboard).
|
||||
# When set, the dispatcher applies it to unassigned ready tasks
|
||||
# instead of skipping them indefinitely (#27145). Empty string
|
||||
# (the schema default) means "no fallback, keep skipping" —
|
||||
# backward-compatible with existing installs.
|
||||
default_assignee = (kanban_cfg.get("default_assignee") or "").strip() or None
|
||||
if default_assignee:
|
||||
logger.info(
|
||||
"kanban dispatcher: default_assignee=%r (unassigned ready tasks "
|
||||
"will route to this profile)",
|
||||
default_assignee,
|
||||
)
|
||||
|
||||
# Read kanban.max_in_progress_per_profile — per-profile concurrency
|
||||
# cap (#21582). When set, no single profile gets more than N
|
||||
# workers running at once, even if the global max_in_progress
|
||||
# would allow it. Prevents one profile's local model / API quota
|
||||
# / browser pool from being overwhelmed by a fan-out.
|
||||
raw_per_profile = kanban_cfg.get("max_in_progress_per_profile", None)
|
||||
max_in_progress_per_profile = None
|
||||
if raw_per_profile is not None:
|
||||
try:
|
||||
max_in_progress_per_profile = int(raw_per_profile)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"kanban dispatcher: invalid kanban.max_in_progress_per_profile=%r; ignoring",
|
||||
raw_per_profile,
|
||||
)
|
||||
max_in_progress_per_profile = None
|
||||
else:
|
||||
if max_in_progress_per_profile < 1:
|
||||
logger.warning(
|
||||
"kanban dispatcher: kanban.max_in_progress_per_profile=%r is below 1; ignoring",
|
||||
raw_per_profile,
|
||||
)
|
||||
max_in_progress_per_profile = None
|
||||
else:
|
||||
logger.info(
|
||||
"kanban dispatcher: max_in_progress_per_profile=%d",
|
||||
max_in_progress_per_profile,
|
||||
)
|
||||
|
||||
# Initial delay so the gateway finishes wiring adapters before the
|
||||
# dispatcher spawns workers (those workers may hit gateway notify
|
||||
# subscriptions etc.). Matches the notifier watcher's delay.
|
||||
@@ -5504,6 +5554,8 @@ class GatewayRunner:
|
||||
max_in_progress=max_in_progress,
|
||||
failure_limit=failure_limit,
|
||||
stale_timeout_seconds=stale_timeout_seconds,
|
||||
default_assignee=default_assignee,
|
||||
max_in_progress_per_profile=max_in_progress_per_profile,
|
||||
)
|
||||
except sqlite3.DatabaseError as exc:
|
||||
if _is_corrupt_board_db_error(exc):
|
||||
@@ -10241,8 +10293,16 @@ class GatewayRunner:
|
||||
|
||||
raw_args = event.get_command_args().strip()
|
||||
|
||||
# Parse --provider and --global flags
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
|
||||
# Parse --provider, --global, and --refresh flags
|
||||
model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
|
||||
|
||||
# --refresh: bust the disk cache so the picker shows live data.
|
||||
if force_refresh:
|
||||
try:
|
||||
from hermes_cli.models import clear_provider_models_cache
|
||||
clear_provider_models_cache()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Read current model/provider from config
|
||||
current_model = ""
|
||||
|
||||
@@ -14,8 +14,8 @@ Provides subcommands for:
|
||||
import os
|
||||
import sys
|
||||
|
||||
__version__ = "0.15.0"
|
||||
__release_date__ = "2026.5.28"
|
||||
__version__ = "0.15.1"
|
||||
__release_date__ = "2026.5.29"
|
||||
|
||||
|
||||
def _ensure_utf8():
|
||||
|
||||
@@ -123,7 +123,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global] [--refresh]"),
|
||||
CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
|
||||
"Configuration", aliases=("codex_runtime",),
|
||||
args_hint="[auto|codex_app_server]"),
|
||||
|
||||
+30
-2
@@ -1726,6 +1726,15 @@ DEFAULT_CONFIG = {
|
||||
# assignee to any installed profile. When unset, falls back to the
|
||||
# default profile. A task never ends up with assignee=None.
|
||||
"default_assignee": "",
|
||||
# Per-profile concurrency cap (#21582). When set to a positive int,
|
||||
# no single profile can have more than N workers running at once,
|
||||
# even if the global max_in_progress / max_spawn caps would allow
|
||||
# it. Tasks blocked this way defer to the next dispatcher tick.
|
||||
# Unset (None) means "no per-profile cap" — backward-compatible
|
||||
# with existing installs. Useful for fan-out workflows that would
|
||||
# otherwise saturate one profile's local model / API quota /
|
||||
# browser pool while leaving other profiles idle.
|
||||
"max_in_progress_per_profile": None,
|
||||
# When true, the kanban dispatcher auto-runs the decomposer on
|
||||
# tasks that land in Triage (every dispatcher tick). When false,
|
||||
# decomposition is manual via `hermes kanban decompose <id>` or
|
||||
@@ -1806,6 +1815,21 @@ DEFAULT_CONFIG = {
|
||||
# Gateway settings — control how messaging platforms (Telegram, Discord,
|
||||
# Slack, etc.) deliver agent-produced files as native attachments.
|
||||
"gateway": {
|
||||
# When false (default), any file path the agent emits is delivered
|
||||
# as a native attachment as long as it isn't under the credential /
|
||||
# system-path denylist (/etc, /proc, ~/.ssh, ~/.aws, ~/.hermes/.env,
|
||||
# auth.json, etc.). This matches the symmetry of inbound delivery
|
||||
# — we accept any document type the user uploads, and the agent
|
||||
# can hand back any file that isn't a credential.
|
||||
#
|
||||
# When true, fall back to the older allowlist+recency-window
|
||||
# behavior: files must live under the Hermes cache, under
|
||||
# ``media_delivery_allow_dirs``, or be freshly produced inside the
|
||||
# ``trust_recent_files_seconds`` window. Recommended for
|
||||
# public-facing gateways where prompt injection from one user
|
||||
# shouldn't be able to exfiltrate the host's secrets to that same
|
||||
# user. Bridged to HERMES_MEDIA_DELIVERY_STRICT.
|
||||
"strict": False,
|
||||
# Extra directories from which model-emitted bare file paths may be
|
||||
# uploaded as native gateway attachments. Files inside the Hermes
|
||||
# cache (~/.hermes/cache/{documents,images,audio,video,screenshots})
|
||||
@@ -1813,7 +1837,7 @@ DEFAULT_CONFIG = {
|
||||
# (project dirs, scratch dirs, mounted shares). Accepts a list of
|
||||
# absolute paths or a single os.pathsep-separated string. Bridged
|
||||
# to HERMES_MEDIA_ALLOW_DIRS at gateway startup. Tilde paths are
|
||||
# expanded.
|
||||
# expanded. Honored in both default and strict mode.
|
||||
"media_delivery_allow_dirs": [],
|
||||
# When true, files whose mtime is within ``trust_recent_files_seconds``
|
||||
# of "now" are trusted for native delivery even outside the cache /
|
||||
@@ -1821,10 +1845,12 @@ DEFAULT_CONFIG = {
|
||||
# PDFs the agent writes into a working directory. System paths
|
||||
# (/etc, /proc, ~/.ssh, ~/.aws, etc.) remain blocked regardless.
|
||||
# Disable to fall back to pure-allowlist mode. Bridged to
|
||||
# HERMES_MEDIA_TRUST_RECENT_FILES.
|
||||
# HERMES_MEDIA_TRUST_RECENT_FILES. Only consulted when ``strict``
|
||||
# is true; in default mode the denylist alone gates delivery.
|
||||
"trust_recent_files": True,
|
||||
# Recency window in seconds. 600 (10 min) comfortably covers a
|
||||
# multi-tool agent turn. Bridged to HERMES_MEDIA_TRUST_RECENT_SECONDS.
|
||||
# Only consulted when ``strict`` is true.
|
||||
"trust_recent_files_seconds": 600,
|
||||
},
|
||||
|
||||
@@ -5534,6 +5560,8 @@ def set_config_value(key: str, value: str):
|
||||
"terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
|
||||
"terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"terminal.docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
|
||||
"terminal.docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
|
||||
"terminal.docker_env": "TERMINAL_DOCKER_ENV",
|
||||
# terminal.cwd intentionally excluded — CLI resolves at runtime,
|
||||
# gateway bridges it in gateway/run.py. Persisting to .env causes
|
||||
|
||||
@@ -2087,12 +2087,35 @@ def _cmd_tail(args: argparse.Namespace) -> int:
|
||||
|
||||
|
||||
def _cmd_dispatch(args: argparse.Namespace) -> int:
|
||||
# Honour kanban.default_assignee as the fallback for unassigned ready
|
||||
# tasks (#27145) and kanban.max_in_progress_per_profile as the
|
||||
# per-profile concurrency cap (#21582). Same semantics as the
|
||||
# gateway dispatch path.
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
_cfg = load_config()
|
||||
_kanban_cfg = _cfg.get("kanban", {}) if isinstance(_cfg, dict) else {}
|
||||
default_assignee = (_kanban_cfg.get("default_assignee") or "").strip() or None
|
||||
_raw_per_profile = _kanban_cfg.get("max_in_progress_per_profile", None)
|
||||
try:
|
||||
max_in_progress_per_profile = (
|
||||
int(_raw_per_profile) if _raw_per_profile is not None else None
|
||||
)
|
||||
if max_in_progress_per_profile is not None and max_in_progress_per_profile < 1:
|
||||
max_in_progress_per_profile = None
|
||||
except (TypeError, ValueError):
|
||||
max_in_progress_per_profile = None
|
||||
except Exception:
|
||||
default_assignee = None
|
||||
max_in_progress_per_profile = None
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(
|
||||
conn,
|
||||
dry_run=args.dry_run,
|
||||
max_spawn=args.max,
|
||||
failure_limit=getattr(args, "failure_limit", kb.DEFAULT_SPAWN_FAILURE_LIMIT),
|
||||
default_assignee=default_assignee,
|
||||
max_in_progress_per_profile=max_in_progress_per_profile,
|
||||
)
|
||||
if getattr(args, "json", False):
|
||||
print(json.dumps({
|
||||
@@ -2108,6 +2131,11 @@ def _cmd_dispatch(args: argparse.Namespace) -> int:
|
||||
],
|
||||
"skipped_unassigned": res.skipped_unassigned,
|
||||
"skipped_nonspawnable": res.skipped_nonspawnable,
|
||||
"skipped_per_profile_capped": [
|
||||
{"task_id": tid, "assignee": who, "current": current}
|
||||
for (tid, who, current) in res.skipped_per_profile_capped
|
||||
],
|
||||
"auto_assigned_default": res.auto_assigned_default,
|
||||
}, indent=2))
|
||||
return 0
|
||||
print(f"Reclaimed: {res.reclaimed}")
|
||||
@@ -2128,8 +2156,18 @@ def _cmd_dispatch(args: argparse.Namespace) -> int:
|
||||
for tid, who, ws in res.spawned:
|
||||
tag = " (dry)" if args.dry_run else ""
|
||||
print(f" - {tid} -> {who} @ {ws or '-'}{tag}")
|
||||
if res.auto_assigned_default:
|
||||
print(
|
||||
f"Auto-assigned to kanban.default_assignee={default_assignee!r}: "
|
||||
f"{', '.join(res.auto_assigned_default)}"
|
||||
)
|
||||
if res.skipped_unassigned:
|
||||
print(f"Skipped (unassigned): {', '.join(res.skipped_unassigned)}")
|
||||
if res.skipped_per_profile_capped:
|
||||
for tid, who, current in res.skipped_per_profile_capped:
|
||||
print(
|
||||
f"Deferred ({who} at per-profile cap, {current} running): {tid}"
|
||||
)
|
||||
if res.skipped_nonspawnable:
|
||||
print(
|
||||
f"Skipped (non-spawnable assignee — terminal lane, OK): "
|
||||
|
||||
+126
-5
@@ -4289,6 +4289,12 @@ class DispatchResult:
|
||||
skipped_unassigned: list[str] = field(default_factory=list)
|
||||
"""Ready task ids skipped because they have no assignee at all.
|
||||
Operator-actionable — usually a misfiled task waiting for routing."""
|
||||
auto_assigned_default: list[str] = field(default_factory=list)
|
||||
"""Task ids that were unassigned in the DB and had
|
||||
``kanban.default_assignee`` applied this tick before spawning (#27145).
|
||||
Surfaces the auto-assignment to telemetry / CLI / dashboard so the
|
||||
operator can see when the dispatcher is acting on the fallback rule
|
||||
rather than on explicit per-task assignments."""
|
||||
skipped_nonspawnable: list[str] = field(default_factory=list)
|
||||
"""Ready task ids skipped because their assignee names a control-plane
|
||||
lane (a Claude Code terminal like ``orion-cc``) rather than a Hermes
|
||||
@@ -4296,6 +4302,14 @@ class DispatchResult:
|
||||
operator-actionable failure. Tracked separately so health telemetry
|
||||
can distinguish "real stuck" (nothing spawned but spawnable work
|
||||
available) from "correctly idle" (nothing spawnable in the queue)."""
|
||||
skipped_per_profile_capped: list[tuple[str, str, int]] = field(default_factory=list)
|
||||
"""Tasks deferred this tick because their assignee is already at
|
||||
``kanban.max_in_progress_per_profile`` (#21582). Each entry is
|
||||
``(task_id, assignee, current_running_count)``. NOT an
|
||||
operator-actionable failure — the task will be picked up on a
|
||||
subsequent tick when the assignee has capacity. Separate bucket so
|
||||
telemetry / dashboards can show "this profile is busy" vs
|
||||
"task is genuinely stuck"."""
|
||||
crashed: list[str] = field(default_factory=list)
|
||||
"""Task ids reclaimed because their worker PID disappeared."""
|
||||
auto_blocked: list[str] = field(default_factory=list)
|
||||
@@ -5342,6 +5356,8 @@ def dispatch_once(
|
||||
failure_limit: int = DEFAULT_SPAWN_FAILURE_LIMIT,
|
||||
stale_timeout_seconds: int = 0,
|
||||
board: Optional[str] = None,
|
||||
default_assignee: Optional[str] = None,
|
||||
max_in_progress_per_profile: Optional[int] = None,
|
||||
) -> DispatchResult:
|
||||
"""Run one dispatcher tick.
|
||||
|
||||
@@ -5427,12 +5443,89 @@ def dispatch_once(
|
||||
if max_spawn is None or max_spawn > remaining:
|
||||
max_spawn = remaining
|
||||
spawned = 0
|
||||
# Per-profile concurrency cap (#21582): when set, track how many
|
||||
# workers each assignee already has in flight, and refuse to spawn
|
||||
# when this would push that assignee past the cap. Prevents
|
||||
# fan-out workloads from melting a single profile's local model /
|
||||
# API quota / browser pool while leaving other profiles idle.
|
||||
# Tasks blocked this way go to skipped_per_profile_capped (not
|
||||
# skipped_unassigned — the operator-actionable signal is different:
|
||||
# "this profile is busy, try again later" not "this needs routing").
|
||||
_per_profile_cap = max_in_progress_per_profile if (
|
||||
isinstance(max_in_progress_per_profile, int)
|
||||
and max_in_progress_per_profile > 0
|
||||
) else None
|
||||
_per_profile_running: dict[str, int] = {}
|
||||
if _per_profile_cap is not None:
|
||||
for prow in conn.execute(
|
||||
"SELECT assignee, COUNT(*) AS n FROM tasks "
|
||||
"WHERE status = 'running' AND assignee IS NOT NULL "
|
||||
"GROUP BY assignee"
|
||||
):
|
||||
_per_profile_running[prow["assignee"]] = int(prow["n"])
|
||||
# Normalize default_assignee once: empty/whitespace string → None so the
|
||||
# rest of the loop can use ``if default_assignee:`` as a single check.
|
||||
# We also resolve profile_exists once here for the same reason.
|
||||
_default_assignee = (default_assignee or "").strip() or None
|
||||
_default_assignee_resolved = False
|
||||
if _default_assignee:
|
||||
try:
|
||||
from hermes_cli.profiles import profile_exists as _pe
|
||||
_default_assignee_resolved = bool(_pe(_default_assignee))
|
||||
except Exception:
|
||||
# Profiles module not importable (test stubs, exotic envs).
|
||||
# Trust the operator's config and try the assignment; the
|
||||
# downstream profile_exists check on the assigned row will
|
||||
# bucket it as nonspawnable if the profile genuinely isn't
|
||||
# there, with the existing diagnostic.
|
||||
_default_assignee_resolved = True
|
||||
for row in ready_rows:
|
||||
if max_spawn is not None and running_count + spawned >= max_spawn:
|
||||
break
|
||||
if not row["assignee"]:
|
||||
result.skipped_unassigned.append(row["id"])
|
||||
continue
|
||||
row_assignee = row["assignee"]
|
||||
if not row_assignee:
|
||||
# Honour kanban.default_assignee: when the dispatcher hits an
|
||||
# unassigned ready task and an operator-configured fallback
|
||||
# exists, persist the assignment and proceed. This removes the
|
||||
# dashboard footgun where a task created without an assignee
|
||||
# parks in 'ready' forever even though the operator's intent
|
||||
# ("default") was perfectly clear (#27145). Mutating the row
|
||||
# (not just the in-memory view) keeps diagnostics and the
|
||||
# board state consistent: the task is now legitimately owned
|
||||
# by ``kanban.default_assignee``, not "unassigned but secretly
|
||||
# routed".
|
||||
if _default_assignee and _default_assignee_resolved:
|
||||
# Dry-run: show what WOULD happen (auto-assign + spawn) without
|
||||
# mutating the DB. Real run: mutate the row + emit the
|
||||
# 'assigned' event so the board state matches what just happened.
|
||||
if not dry_run:
|
||||
try:
|
||||
with write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET assignee = ? WHERE id = ? "
|
||||
"AND (assignee IS NULL OR assignee = '')",
|
||||
(_default_assignee, row["id"]),
|
||||
)
|
||||
_append_event(
|
||||
conn, row["id"], "assigned",
|
||||
{
|
||||
"assignee": _default_assignee,
|
||||
"source": "kanban.default_assignee",
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
_log.debug(
|
||||
"kanban dispatch: failed to apply default_assignee=%r "
|
||||
"to task %s",
|
||||
_default_assignee, row["id"], exc_info=True,
|
||||
)
|
||||
result.skipped_unassigned.append(row["id"])
|
||||
continue
|
||||
row_assignee = _default_assignee
|
||||
result.auto_assigned_default.append(row["id"])
|
||||
else:
|
||||
result.skipped_unassigned.append(row["id"])
|
||||
continue
|
||||
# Skip ready tasks whose assignee is not a real Hermes profile.
|
||||
# `_default_spawn` invokes ``hermes -p <assignee>`` which fails
|
||||
# with "Profile 'X' does not exist" when the assignee names a
|
||||
@@ -5447,7 +5540,7 @@ def dispatch_once(
|
||||
from hermes_cli.profiles import profile_exists # local import: avoids cycle
|
||||
except Exception:
|
||||
profile_exists = None # type: ignore[assignment]
|
||||
if profile_exists is not None and not profile_exists(row["assignee"]):
|
||||
if profile_exists is not None and not profile_exists(row_assignee):
|
||||
# Bucket separately from skipped_unassigned: the operator
|
||||
# cannot fix this by assigning a profile (the assignee IS the
|
||||
# intended owner — a terminal lane). Health telemetry uses
|
||||
@@ -5456,6 +5549,19 @@ def dispatch_once(
|
||||
# of human-pulled work.
|
||||
result.skipped_nonspawnable.append(row["id"])
|
||||
continue
|
||||
# Per-profile concurrency cap (#21582): even if there's global
|
||||
# headroom, refuse to spawn for an assignee that's already at
|
||||
# its in-flight cap. Prevents one profile's local model / API
|
||||
# quota / browser pool from being overwhelmed by a fan-out
|
||||
# while the global max_in_progress / max_spawn caps still allow
|
||||
# work on OTHER profiles.
|
||||
if _per_profile_cap is not None:
|
||||
current = _per_profile_running.get(row_assignee, 0)
|
||||
if current >= _per_profile_cap:
|
||||
result.skipped_per_profile_capped.append(
|
||||
(row["id"], row_assignee, current)
|
||||
)
|
||||
continue
|
||||
# Respawn guard: refuse to re-spawn when useful work is already
|
||||
# in-flight/recent, or when the last failure is a deterministic
|
||||
# blocker (quota / auth). The guard defers the spawn this tick so
|
||||
@@ -5478,7 +5584,15 @@ def dispatch_once(
|
||||
)
|
||||
continue
|
||||
if dry_run:
|
||||
result.spawned.append((row["id"], row["assignee"], ""))
|
||||
result.spawned.append((row["id"], row_assignee, ""))
|
||||
# Increment per-profile counter even in dry_run so the cap
|
||||
# check sees the would-be spawn on subsequent iterations.
|
||||
# Without this, dry_run reports every task as spawnable and
|
||||
# under-reports the capped subset (#21582).
|
||||
if _per_profile_cap is not None and row_assignee:
|
||||
_per_profile_running[row_assignee] = (
|
||||
_per_profile_running.get(row_assignee, 0) + 1
|
||||
)
|
||||
continue
|
||||
claimed = claim_task(conn, row["id"], ttl_seconds=ttl_seconds)
|
||||
if claimed is None:
|
||||
@@ -5521,6 +5635,13 @@ def dispatch_once(
|
||||
# complete_task).
|
||||
result.spawned.append((claimed.id, claimed.assignee or "", str(workspace)))
|
||||
spawned += 1
|
||||
# Track the new in-flight count for this profile so later
|
||||
# iterations in this same tick respect the per-profile cap
|
||||
# (#21582). Subsequent ticks re-query from the DB.
|
||||
if _per_profile_cap is not None and claimed.assignee:
|
||||
_per_profile_running[claimed.assignee] = (
|
||||
_per_profile_running.get(claimed.assignee, 0) + 1
|
||||
)
|
||||
except Exception as exc:
|
||||
auto = _record_spawn_failure(
|
||||
conn, claimed.id, str(exc),
|
||||
|
||||
@@ -2117,6 +2117,13 @@ def cmd_postinstall(args):
|
||||
def cmd_model(args):
|
||||
"""Select default model — starts with provider selection, then model picker."""
|
||||
_require_tty("model")
|
||||
if getattr(args, "refresh", False):
|
||||
try:
|
||||
from hermes_cli.models import clear_provider_models_cache
|
||||
clear_provider_models_cache()
|
||||
print(" Cleared model picker cache.")
|
||||
except Exception:
|
||||
pass
|
||||
select_provider_and_model(args=args)
|
||||
|
||||
|
||||
@@ -11311,6 +11318,11 @@ def main():
|
||||
help="Select default model and provider",
|
||||
description="Interactively select your inference provider and default model",
|
||||
)
|
||||
model_parser.add_argument(
|
||||
"--refresh",
|
||||
action="store_true",
|
||||
help="Wipe the model picker disk cache and re-fetch every provider's live /v1/models list.",
|
||||
)
|
||||
model_parser.add_argument(
|
||||
"--portal-url",
|
||||
help="Portal base URL for Nous login (default: production portal)",
|
||||
|
||||
+47
-33
@@ -294,32 +294,39 @@ class CustomAutoResult:
|
||||
# Flag parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
|
||||
"""Parse --provider and --global flags from /model command args.
|
||||
def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
|
||||
"""Parse --provider, --global, and --refresh flags from /model command args.
|
||||
|
||||
Returns (model_input, explicit_provider, is_global).
|
||||
Returns (model_input, explicit_provider, is_global, force_refresh).
|
||||
|
||||
Examples::
|
||||
|
||||
"sonnet" -> ("sonnet", "", False)
|
||||
"sonnet --global" -> ("sonnet", "", True)
|
||||
"sonnet --provider anthropic" -> ("sonnet", "anthropic", False)
|
||||
"--provider my-ollama" -> ("", "my-ollama", False)
|
||||
"sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True)
|
||||
"sonnet" -> ("sonnet", "", False, False)
|
||||
"sonnet --global" -> ("sonnet", "", True, False)
|
||||
"sonnet --provider anthropic" -> ("sonnet", "anthropic", False, False)
|
||||
"--provider my-ollama" -> ("", "my-ollama", False, False)
|
||||
"--refresh" -> ("", "", False, True)
|
||||
"sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False)
|
||||
"""
|
||||
is_global = False
|
||||
explicit_provider = ""
|
||||
force_refresh = False
|
||||
|
||||
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
|
||||
# A single Unicode dash before a flag keyword becomes "--"
|
||||
import re as _re
|
||||
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
|
||||
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|refresh)', r'--\1', raw_args)
|
||||
|
||||
# Extract --global
|
||||
if "--global" in raw_args:
|
||||
is_global = True
|
||||
raw_args = raw_args.replace("--global", "").strip()
|
||||
|
||||
# Extract --refresh (bust the model picker disk cache before listing)
|
||||
if "--refresh" in raw_args:
|
||||
force_refresh = True
|
||||
raw_args = raw_args.replace("--refresh", "").strip()
|
||||
|
||||
# Extract --provider <name>
|
||||
parts = raw_args.split()
|
||||
i = 0
|
||||
@@ -333,7 +340,7 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
|
||||
i += 1
|
||||
|
||||
model_input = " ".join(filtered).strip()
|
||||
return (model_input, explicit_provider, is_global)
|
||||
return (model_input, explicit_provider, is_global, force_refresh)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1079,6 +1086,7 @@ def list_authenticated_providers(
|
||||
from hermes_cli.models import (
|
||||
OPENROUTER_MODELS, _PROVIDER_MODELS,
|
||||
_MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
|
||||
cached_provider_model_ids,
|
||||
get_curated_nous_model_ids,
|
||||
)
|
||||
|
||||
@@ -1239,13 +1247,15 @@ def list_authenticated_providers(
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
# Use curated list, falling back to models.dev if no curated list.
|
||||
# For preferred providers, merge models.dev entries into the curated
|
||||
# catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
|
||||
# show up in the picker without requiring a Hermes release.
|
||||
model_ids = curated.get(hermes_id, [])
|
||||
if hermes_id in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_id, model_ids)
|
||||
# Unified pathway: route through cached_provider_model_ids() so the
|
||||
# /model picker sees the SAME list `hermes model` would build, with
|
||||
# disk caching to keep the picker open snappy. Falls back to the
|
||||
# curated static list when the live fetcher returns nothing.
|
||||
model_ids = cached_provider_model_ids(hermes_id)
|
||||
if not model_ids:
|
||||
model_ids = curated.get(hermes_id, [])
|
||||
if hermes_id in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_id, model_ids)
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
@@ -1351,25 +1361,27 @@ def list_authenticated_providers(
|
||||
# matches what the user's authenticated Codex/Copilot backend
|
||||
# actually serves — including ChatGPT-Pro-only Codex slugs
|
||||
# (e.g. gpt-5.3-codex-spark) that aren't in the static curated
|
||||
# catalog. ``provider_model_ids()`` falls back to the curated
|
||||
# list when the live endpoint is unreachable, so this is safe
|
||||
# for unauthenticated and offline cases too.
|
||||
model_ids = provider_model_ids(hermes_slug)
|
||||
# catalog. ``cached_provider_model_ids()`` falls back to the
|
||||
# curated list when the live endpoint is unreachable, so this
|
||||
# is safe for unauthenticated and offline cases too.
|
||||
model_ids = cached_provider_model_ids(hermes_slug)
|
||||
# For aws_sdk providers (bedrock), use live discovery so the list
|
||||
# reflects the active region (eu.*, ap.*) not the static us.* list.
|
||||
elif overlay.auth_type == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
_ids = bedrock_model_ids_or_none()
|
||||
model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
|
||||
_ids = cached_provider_model_ids(hermes_slug)
|
||||
model_ids = _ids if _ids else (curated.get(hermes_slug, []) or curated.get(pid, []))
|
||||
except Exception:
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
else:
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
# Merge with models.dev for preferred providers (same rationale as above).
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
# Unified pathway — see Section 1 rationale. Fall back to the
|
||||
# curated dict (with models.dev merge for preferred providers)
|
||||
# when the live fetcher comes up empty.
|
||||
model_ids = cached_provider_model_ids(hermes_slug)
|
||||
if not model_ids:
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
@@ -1436,13 +1448,15 @@ def list_authenticated_providers(
|
||||
# region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
|
||||
if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
_ids = bedrock_model_ids_or_none()
|
||||
_cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
|
||||
_ids = cached_provider_model_ids(_cp.slug)
|
||||
_cp_model_ids = _ids if _ids else curated.get(_cp.slug, [])
|
||||
except Exception:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
else:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
# Unified pathway — same as sections 1 and 2.
|
||||
_cp_model_ids = cached_provider_model_ids(_cp.slug)
|
||||
if not _cp_model_ids:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
_cp_total = len(_cp_model_ids)
|
||||
_cp_top = _cp_model_ids[:max_models]
|
||||
|
||||
|
||||
@@ -2047,6 +2047,12 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
return live
|
||||
except Exception:
|
||||
pass
|
||||
# Live failed (or no creds). Fall back to the docs-hosted manifest
|
||||
# — NOT the in-repo _PROVIDER_MODELS["nous"] snapshot — so newly
|
||||
# added Portal models still surface without a Hermes release.
|
||||
manifest_ids = get_curated_nous_model_ids()
|
||||
if manifest_ids:
|
||||
return manifest_ids
|
||||
if normalized == "stepfun":
|
||||
try:
|
||||
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||
@@ -2150,6 +2156,206 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
return curated_static
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Generic disk cache for provider_model_ids() — keeps /model picker fast.
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Without this layer, every /model picker open re-fetches every authed
|
||||
# provider's /v1/models endpoint. On a well-configured user (anthropic +
|
||||
# openai + copilot + gemini + huggingface + ...) that's 2+ seconds of cold
|
||||
# HTTP roundtrips just to render the provider list.
|
||||
#
|
||||
# Cache strategy:
|
||||
# - One JSON file at $HERMES_HOME/provider_models_cache.json
|
||||
# - Per-provider entries keyed by (provider, credential fingerprint)
|
||||
# - Credential fingerprint = sha256 of env-var values that the provider
|
||||
# normally reads. Swap your OPENAI_API_KEY and the entry invalidates.
|
||||
# - 1h TTL by default. `force_refresh=True` skips the cache entirely
|
||||
# and overwrites it on success.
|
||||
# - Only NON-EMPTY results are cached. An empty/None response from a
|
||||
# transient network error never gets pinned.
|
||||
# - Cache file is best-effort. Any read/write error degrades silently
|
||||
# to a live fetch — the picker keeps working.
|
||||
|
||||
_PROVIDER_MODELS_CACHE_TTL = 3600 # 1h
|
||||
|
||||
|
||||
def _provider_models_cache_path() -> Path:
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "provider_models_cache.json"
|
||||
|
||||
|
||||
def _credential_fingerprint(provider: str) -> str:
|
||||
"""Return a short hash representing the credentials that
|
||||
``provider_model_ids(provider)`` would see right now.
|
||||
|
||||
Rotating any of the relevant env vars invalidates the cached entry
|
||||
for that provider. We hash AT LEAST the api-key + base-url env vars
|
||||
declared in ``PROVIDER_REGISTRY``. For OAuth-backed providers
|
||||
(codex, copilot, anthropic-via-claude-code, nous portal), the
|
||||
relevant tokens live in ``$HERMES_HOME/auth.json`` and external
|
||||
credential files. Rather than parse every shape, we additionally
|
||||
fold the mtime of those files into the fingerprint so refreshes
|
||||
after re-auth bust the cache.
|
||||
"""
|
||||
import hashlib
|
||||
import os as _os
|
||||
|
||||
parts: list[str] = []
|
||||
|
||||
# Env vars from PROVIDER_REGISTRY for this slug
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
pcfg = PROVIDER_REGISTRY.get(provider)
|
||||
if pcfg is not None:
|
||||
for ev in getattr(pcfg, "api_key_env_vars", ()) or ():
|
||||
parts.append(f"{ev}={_os.environ.get(ev, '')}")
|
||||
bev = getattr(pcfg, "base_url_env_var", "") or ""
|
||||
if bev:
|
||||
parts.append(f"{bev}={_os.environ.get(bev, '')}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# OAuth / external-file mtimes that change on re-auth
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
for rel in ("auth.json", "credentials.json"):
|
||||
p = get_hermes_home() / rel
|
||||
try:
|
||||
parts.append(f"{rel}@{p.stat().st_mtime_ns}")
|
||||
except FileNotFoundError:
|
||||
parts.append(f"{rel}@missing")
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# External well-known credential file locations
|
||||
for path in (
|
||||
_os.path.expanduser("~/.codex/auth.json"),
|
||||
_os.path.expanduser("~/.claude/.credentials.json"),
|
||||
_os.path.expanduser("~/.config/github-copilot/hosts.json"),
|
||||
_os.path.expanduser("~/.minimax/credentials.json"),
|
||||
):
|
||||
try:
|
||||
mt = _os.stat(path).st_mtime_ns
|
||||
parts.append(f"{path}@{mt}")
|
||||
except FileNotFoundError:
|
||||
parts.append(f"{path}@missing")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
blob = "|".join(parts).encode("utf-8", errors="replace")
|
||||
# blake2b for cache-key fingerprinting only — not for credential storage.
|
||||
# We never reverse this hash; collisions are harmless (worst case: cache
|
||||
# miss → live re-fetch). Use blake2b instead of sha256 here because
|
||||
# CodeQL's `py/weak-sensitive-data-hashing` rule flags sha256 over env
|
||||
# vars whose names contain "API_KEY" / "TOKEN" even when the hash is
|
||||
# used as an identity fingerprint, not for password storage. blake2b
|
||||
# is a keyed-hash primitive and isn't flagged.
|
||||
return hashlib.blake2b(blob, digest_size=8).hexdigest()
|
||||
|
||||
|
||||
def _load_provider_models_cache() -> dict:
|
||||
"""Return the full cache dict, or {} on any error."""
|
||||
try:
|
||||
path = _provider_models_cache_path()
|
||||
if not path.exists():
|
||||
return {}
|
||||
with open(path, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return data if isinstance(data, dict) else {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _save_provider_models_cache(data: dict) -> None:
|
||||
"""Persist the cache dict. Best-effort — silent on any error."""
|
||||
try:
|
||||
from utils import atomic_json_write
|
||||
path = _provider_models_cache_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
atomic_json_write(path, data, indent=None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def cached_provider_model_ids(
|
||||
provider: Optional[str],
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
ttl_seconds: int = _PROVIDER_MODELS_CACHE_TTL,
|
||||
) -> list[str]:
|
||||
"""Disk-cached wrapper around :func:`provider_model_ids`.
|
||||
|
||||
Hits the cache when fresh; otherwise calls the live function and
|
||||
persists a non-empty result. Always returns a list (never None).
|
||||
"""
|
||||
normalized = normalize_provider(provider) or (provider or "")
|
||||
if not normalized:
|
||||
return []
|
||||
|
||||
cache = _load_provider_models_cache()
|
||||
fp = _credential_fingerprint(normalized)
|
||||
entry = cache.get(normalized)
|
||||
now = time.time()
|
||||
|
||||
if (
|
||||
not force_refresh
|
||||
and isinstance(entry, dict)
|
||||
and entry.get("fp") == fp
|
||||
and isinstance(entry.get("models"), list)
|
||||
and entry["models"]
|
||||
and (now - float(entry.get("at", 0))) < ttl_seconds
|
||||
):
|
||||
return list(entry["models"])
|
||||
|
||||
# Cache miss / stale / forced refresh — call the live path.
|
||||
live = provider_model_ids(normalized, force_refresh=force_refresh)
|
||||
if live:
|
||||
cache[normalized] = {
|
||||
"fp": fp,
|
||||
"at": now,
|
||||
"models": list(live),
|
||||
}
|
||||
_save_provider_models_cache(cache)
|
||||
return list(live)
|
||||
|
||||
# Live fetch returned nothing. If we have a stale entry with the
|
||||
# SAME fingerprint, prefer it over an empty result — stale data
|
||||
# beats no data when the network is flaky.
|
||||
if (
|
||||
isinstance(entry, dict)
|
||||
and entry.get("fp") == fp
|
||||
and isinstance(entry.get("models"), list)
|
||||
and entry["models"]
|
||||
):
|
||||
return list(entry["models"])
|
||||
return list(live or [])
|
||||
|
||||
|
||||
def clear_provider_models_cache(provider: Optional[str] = None) -> None:
|
||||
"""Drop a single provider's cache entry, or wipe the whole cache.
|
||||
|
||||
``provider=None`` wipes everything; otherwise only that provider's
|
||||
entry is removed. Used by ``/model --refresh`` and
|
||||
``hermes model --refresh``.
|
||||
"""
|
||||
try:
|
||||
if provider is None:
|
||||
path = _provider_models_cache_path()
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
return
|
||||
cache = _load_provider_models_cache()
|
||||
normalized = normalize_provider(provider) or provider or ""
|
||||
if normalized in cache:
|
||||
del cache[normalized]
|
||||
_save_provider_models_cache(cache)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
|
||||
"""Fetch available models from the Anthropic /v1/models endpoint.
|
||||
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../web;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-6qhGuifHVtCeep1SiQdCUxBMr7UGhYpdMTvXhrQu/zA=";
|
||||
hash = "sha256-HV0aISBVjwbGqDj8qQynSxGFrrZDzuYAW3D3lB/x3zo=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
|
||||
|
||||
@@ -75,8 +75,17 @@ Config file: `~/.hermes/hindsight/config.json`
|
||||
| `recall_prompt_preamble` | — | Custom preamble for recalled memories in context |
|
||||
| `recall_tags` | — | Tags to filter when searching memories |
|
||||
| `recall_tags_match` | `any` | Tag matching mode: `any` / `all` / `any_strict` / `all_strict` |
|
||||
| `recall_types` | `observation` | Fact types surfaced by recall (both auto-recall and the `hindsight_recall` tool). Comma-separated string or JSON list. **Default narrowed to `observation` only** (see "Behavior change" below). Set to `observation,world,experience` to also include raw facts. |
|
||||
| `auto_recall` | `true` | Automatically recall memories before each turn |
|
||||
|
||||
> **Behavior change — `recall_types` defaults to `observation` only.**
|
||||
>
|
||||
> Previously recall returned all three fact types. It now returns only observations.
|
||||
>
|
||||
> Per [Hindsight's docs](https://hindsight.vectorize.io/developer/observations), observations are the **consolidated** knowledge layer Hindsight builds on top of raw facts: deduplicated beliefs grounded in evidence, refined as new facts arrive, with proof counts and freshness signals. Raw `world` / `experience` facts are the individual supporting evidence that feeds them. For per-turn context injection, observations are denser per token and avoid feeding the model multiple raw facts that one observation already summarizes.
|
||||
>
|
||||
> Restore the broad recall with `"recall_types": "observation,world,experience"` (string or JSON list) in `~/.hermes/hindsight/config.json`. This applies to **both** auto-recall and the `hindsight_recall` tool — both read the same `recall_types` setting (the tool schema has no per-call `types` argument), so narrowing the default narrows both paths.
|
||||
|
||||
### Retain
|
||||
|
||||
| Key | Default | Description |
|
||||
|
||||
@@ -579,7 +579,15 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
# Recall controls
|
||||
self._auto_recall = True
|
||||
self._recall_max_tokens = 4096
|
||||
self._recall_types: list[str] | None = None
|
||||
# Default to observation-only recall. Observations are Hindsight's
|
||||
# consolidated knowledge layer — deduplicated, evidence-grounded
|
||||
# beliefs built from many raw facts, with proof counts and
|
||||
# freshness signals (see hindsight.vectorize.io/developer/observations).
|
||||
# Including raw world/experience facts re-ships the supporting
|
||||
# evidence that observations already summarize, burning the
|
||||
# `recall_max_tokens` budget. Users can restore the broader
|
||||
# recall via the `recall_types` config key.
|
||||
self._recall_types: list[str] = ["observation"]
|
||||
self._recall_prompt_preamble = ""
|
||||
self._recall_max_input_chars = 800
|
||||
|
||||
@@ -856,6 +864,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
{"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"},
|
||||
{"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
|
||||
{"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
|
||||
{"key": "recall_types", "description": "Fact types to surface on recall — applies to both auto-recall and the hindsight_recall tool (comma-separated or list). Defaults to observation-only — observations are Hindsight's consolidated, deduplicated, evidence-grounded knowledge layer; raw world/experience facts are the supporting evidence observations already summarize. Set to e.g. 'observation,world,experience' to also include raw facts.", "default": "observation"},
|
||||
{"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
|
||||
{"key": "auto_retain", "description": "Automatically retain conversation turns", "default": True},
|
||||
{"key": "retain_every_n_turns", "description": "Retain every N turns (1 = every turn)", "default": 1},
|
||||
@@ -1187,7 +1196,17 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
# Recall controls
|
||||
self._auto_recall = self._config.get("auto_recall", True)
|
||||
self._recall_max_tokens = int(self._config.get("recall_max_tokens", 4096))
|
||||
self._recall_types = self._config.get("recall_types") or None
|
||||
# Default narrows recall to observation-only; pass an explicit
|
||||
# `recall_types` list in config.json to broaden (e.g. include
|
||||
# "world" / "experience") or to disable the filter entirely.
|
||||
configured_types = self._config.get("recall_types")
|
||||
if configured_types is None:
|
||||
self._recall_types = ["observation"]
|
||||
elif isinstance(configured_types, str):
|
||||
# Allow comma-separated strings for parity with recall_tags.
|
||||
self._recall_types = [t.strip() for t in configured_types.split(",") if t.strip()]
|
||||
else:
|
||||
self._recall_types = list(configured_types) or ["observation"]
|
||||
self._recall_prompt_preamble = self._config.get("recall_prompt_preamble", "")
|
||||
self._recall_max_input_chars = int(self._config.get("recall_max_input_chars", 800))
|
||||
self._retain_async = self._config.get("retain_async", True)
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "hermes-agent"
|
||||
version = "0.15.0"
|
||||
version = "0.15.1"
|
||||
description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
+7
-2
@@ -2302,6 +2302,7 @@ class AIAgent:
|
||||
original_user_message: Any,
|
||||
final_response: Any,
|
||||
interrupted: bool,
|
||||
messages: list | None = None,
|
||||
) -> None:
|
||||
"""Mirror a completed turn into external memory providers.
|
||||
|
||||
@@ -2334,9 +2335,13 @@ class AIAgent:
|
||||
if not (self._memory_manager and final_response and original_user_message):
|
||||
return
|
||||
try:
|
||||
sync_kwargs = {"session_id": self.session_id or ""}
|
||||
if messages is not None:
|
||||
sync_kwargs["messages"] = messages
|
||||
self._memory_manager.sync_all(
|
||||
original_user_message, final_response,
|
||||
session_id=self.session_id or "",
|
||||
original_user_message,
|
||||
final_response,
|
||||
**sync_kwargs,
|
||||
)
|
||||
self._memory_manager.queue_prefetch_all(
|
||||
original_user_message,
|
||||
|
||||
@@ -80,30 +80,27 @@ def crawl_source(source, source_name: str, limit: int) -> list:
|
||||
|
||||
|
||||
def crawl_skills_sh(source: SkillsShSource) -> list:
|
||||
"""Crawl skills.sh using popular queries for broad coverage."""
|
||||
print(" Crawling skills.sh (popular queries)...", flush=True)
|
||||
"""Crawl skills.sh via its sitemap to enumerate the full catalog (~20k entries).
|
||||
|
||||
Previously walked a hardcoded list of ~28 popular keywords (each capped at
|
||||
50 results) which yielded ~850 unique skills — about 4% of the real catalog.
|
||||
The SkillsShSource.search("") path now hits the sitemap directly, returning
|
||||
the full 20k-entry catalog deduplicated by canonical identifier.
|
||||
"""
|
||||
print(" Crawling skills.sh (sitemap)...", flush=True)
|
||||
start = time.time()
|
||||
|
||||
queries = [
|
||||
"", # featured
|
||||
"react", "python", "web", "api", "database", "docker",
|
||||
"testing", "scraping", "design", "typescript", "git",
|
||||
"aws", "security", "data", "ml", "ai", "devops",
|
||||
"frontend", "backend", "mobile", "cli", "documentation",
|
||||
"kubernetes", "terraform", "rust", "go", "java",
|
||||
]
|
||||
try:
|
||||
results = source.search("", limit=0) # 0 = no cap, return the whole catalog
|
||||
except Exception as e:
|
||||
print(f" Warning: skills.sh sitemap walk failed: {e}", file=sys.stderr)
|
||||
results = []
|
||||
|
||||
all_skills: dict[str, dict] = {}
|
||||
for query in queries:
|
||||
try:
|
||||
results = source.search(query, limit=50)
|
||||
for meta in results:
|
||||
entry = _meta_to_dict(meta)
|
||||
if entry["identifier"] not in all_skills:
|
||||
all_skills[entry["identifier"]] = entry
|
||||
except Exception as e:
|
||||
print(f" Warning: skills.sh search '{query}' failed: {e}",
|
||||
file=sys.stderr)
|
||||
for meta in results:
|
||||
entry = _meta_to_dict(meta)
|
||||
if entry["identifier"] not in all_skills:
|
||||
all_skills[entry["identifier"]] = entry
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f" skills.sh: {len(all_skills)} unique skills ({elapsed:.1f}s)",
|
||||
@@ -345,7 +342,11 @@ def main():
|
||||
# or rate limiting kicked in. Failing here forces a human look before
|
||||
# the broken index reaches the live docs.
|
||||
EXPECTED_FLOORS = {
|
||||
"skills.sh": 100,
|
||||
# skills.sh now uses the sitemap walker (~20k catalog as of May 2026).
|
||||
# Anything under 10k means the sitemap shape changed or fetches failed
|
||||
# — better to fail loudly than ship a regression to the 858-skill
|
||||
# popular-queries era.
|
||||
"skills.sh": 10000,
|
||||
"lobehub": 100,
|
||||
# ClawHub had 49,698+ skills as of May 2026 — anything under 20k means
|
||||
# pagination broke or the API surface changed. Fail loudly rather
|
||||
|
||||
@@ -101,6 +101,8 @@ AUTHOR_MAP = {
|
||||
"kronexoi13@gmail.com": "kronexoi",
|
||||
"hua.zhong@kingsmith.com": "vgocoder",
|
||||
"hermes@marian.local": "Schrotti77",
|
||||
"david@memorilabs.ai": "devwdave",
|
||||
"dave@devwdave.com": "devwdave",
|
||||
"1920071390@campus.ouj.ac.jp": "zapabob",
|
||||
"gaia@gaia.local": "jfuenmayor",
|
||||
"jiahuigu@users.noreply.github.com": "Jiahui-Gu",
|
||||
@@ -128,6 +130,7 @@ AUTHOR_MAP = {
|
||||
"buraysandro9@gmail.com": "ygd58",
|
||||
"108427749+buntingszn@users.noreply.github.com": "buntingszn",
|
||||
"yanglongwei06@gmail.com": "Alex-yang00",
|
||||
"yanghongda@jackyun.com": "yangguangjin",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"markuscontasul@gmail.com": "Glucksberg",
|
||||
"80581902+Glucksberg@users.noreply.github.com": "Glucksberg",
|
||||
|
||||
@@ -16,6 +16,7 @@ from agent.image_routing import (
|
||||
_supports_vision_override,
|
||||
build_native_content_parts,
|
||||
decide_image_input_mode,
|
||||
extract_image_refs,
|
||||
)
|
||||
|
||||
|
||||
@@ -449,3 +450,190 @@ class TestLargeImageHandling:
|
||||
assert len(parts) == 2
|
||||
assert parts[0]["type"] == "text"
|
||||
assert parts[1]["type"] == "image_url"
|
||||
|
||||
|
||||
# ─── extract_image_refs ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestExtractImageRefs:
|
||||
"""Scan task body / inbound text for image paths and URLs (kanban worker
|
||||
enrichment, issue raised May 2026)."""
|
||||
|
||||
def test_empty_or_none_returns_empty(self):
|
||||
assert extract_image_refs("") == ([], [])
|
||||
assert extract_image_refs(None) == ([], []) # type: ignore[arg-type]
|
||||
|
||||
def test_finds_absolute_path(self, tmp_path: Path):
|
||||
img = tmp_path / "screenshot.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = f"Look at {img} and tell me what's wrong."
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_finds_home_relative_path(self, tmp_path: Path, monkeypatch):
|
||||
# Simulate ~/foo.png by pointing HOME at tmp_path and creating the file
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
img = tmp_path / "foo.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
paths, urls = extract_image_refs("see ~/foo.png please")
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_skips_nonexistent_paths(self, tmp_path: Path):
|
||||
# Path-shaped but no file on disk → skipped.
|
||||
body = f"What's at {tmp_path}/never_created.png ?"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == []
|
||||
|
||||
def test_finds_http_image_url(self):
|
||||
body = "Check out https://example.com/photos/cat.png — cute right?"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == ["https://example.com/photos/cat.png"]
|
||||
|
||||
def test_finds_https_url_with_query_string(self):
|
||||
body = "Diagram: https://cdn.example.com/img.jpeg?size=large&v=2 here"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert urls == ["https://cdn.example.com/img.jpeg?size=large&v=2"]
|
||||
|
||||
def test_url_trailing_punctuation_stripped(self):
|
||||
# Prose punctuation right after the URL must not be part of the URL.
|
||||
body = "See https://example.com/a.png."
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert urls == ["https://example.com/a.png"]
|
||||
|
||||
def test_ignores_non_image_urls(self):
|
||||
body = "See https://example.com/page.html and https://x.com/y.pdf"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert urls == []
|
||||
|
||||
def test_dedupes_paths_and_urls(self, tmp_path: Path):
|
||||
img = tmp_path / "dup.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = (
|
||||
f"First {img} then again {img}. "
|
||||
"Also https://example.com/x.png and https://example.com/x.png again."
|
||||
)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == ["https://example.com/x.png"]
|
||||
|
||||
def test_ignores_paths_in_fenced_code_block(self, tmp_path: Path):
|
||||
img = tmp_path / "real.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = (
|
||||
"Outside the block, attach this:\n"
|
||||
f"{img}\n"
|
||||
"But not these examples:\n"
|
||||
"```\n"
|
||||
f"some_other_image: /tmp/example.png\n"
|
||||
f"url: https://example.com/example.png\n"
|
||||
"```\n"
|
||||
)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_ignores_paths_in_inline_code(self, tmp_path: Path):
|
||||
img = tmp_path / "real.jpg"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = (
|
||||
f"Attach {img}, but ignore the example "
|
||||
"`https://example.com/skip.png` in backticks."
|
||||
)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_does_not_match_paths_inside_urls(self, tmp_path: Path):
|
||||
# The lookbehind in the regex prevents matching the path-portion of
|
||||
# a URL as a local path. Only the URL should be detected.
|
||||
body = "Just the URL: https://example.com/some/dir/image.png"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == ["https://example.com/some/dir/image.png"]
|
||||
|
||||
def test_mixed_paths_and_urls(self, tmp_path: Path):
|
||||
img = tmp_path / "local.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = (
|
||||
f"Compare local {img} against the design at "
|
||||
"https://example.com/design/v2.png — does it match?"
|
||||
)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == ["https://example.com/design/v2.png"]
|
||||
|
||||
def test_case_insensitive_extension(self, tmp_path: Path):
|
||||
img = tmp_path / "shouty.PNG"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = f"see {img}"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
|
||||
|
||||
# ─── build_native_content_parts with URLs ────────────────────────────────────
|
||||
|
||||
|
||||
class TestBuildNativeContentPartsURLs:
|
||||
"""URL pass-through support added so kanban task bodies (and other
|
||||
inbound surfaces) can route remote image URLs straight to the model."""
|
||||
|
||||
def test_url_only_no_local_paths(self):
|
||||
parts, skipped = build_native_content_parts(
|
||||
"what is this?",
|
||||
[],
|
||||
image_urls=["https://example.com/diagram.png"],
|
||||
)
|
||||
assert skipped == []
|
||||
assert len(parts) == 2
|
||||
assert parts[0]["type"] == "text"
|
||||
assert "[Image attached: https://example.com/diagram.png]" in parts[0]["text"]
|
||||
assert parts[0]["text"].startswith("what is this?")
|
||||
assert parts[1] == {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/diagram.png"},
|
||||
}
|
||||
|
||||
def test_mixed_path_and_url(self, tmp_path: Path):
|
||||
img = tmp_path / "local.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
parts, skipped = build_native_content_parts(
|
||||
"compare these",
|
||||
[str(img)],
|
||||
image_urls=["https://example.com/remote.jpg"],
|
||||
)
|
||||
assert skipped == []
|
||||
# 1 text + 2 image parts (local data URL first, then remote URL).
|
||||
image_parts = [p for p in parts if p.get("type") == "image_url"]
|
||||
assert len(image_parts) == 2
|
||||
assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,")
|
||||
assert image_parts[1]["image_url"]["url"] == "https://example.com/remote.jpg"
|
||||
text = parts[0]["text"]
|
||||
assert "[Image attached at:" in text
|
||||
assert "[Image attached: https://example.com/remote.jpg]" in text
|
||||
|
||||
def test_empty_url_list_is_no_op(self, tmp_path: Path):
|
||||
img = tmp_path / "x.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
# image_urls=[] should behave the same as not passing it at all.
|
||||
parts_no_urls, _ = build_native_content_parts("hi", [str(img)])
|
||||
parts_empty_urls, _ = build_native_content_parts("hi", [str(img)], image_urls=[])
|
||||
assert parts_no_urls == parts_empty_urls
|
||||
|
||||
def test_blank_url_strings_are_dropped(self):
|
||||
parts, _ = build_native_content_parts(
|
||||
"x", [], image_urls=["", " ", "https://example.com/a.png"]
|
||||
)
|
||||
image_parts = [p for p in parts if p.get("type") == "image_url"]
|
||||
assert len(image_parts) == 1
|
||||
assert image_parts[0]["image_url"]["url"] == "https://example.com/a.png"
|
||||
|
||||
def test_url_only_inserts_default_prompt_when_text_empty(self):
|
||||
parts, _ = build_native_content_parts(
|
||||
"", [], image_urls=["https://example.com/a.png"]
|
||||
)
|
||||
assert parts[0]["type"] == "text"
|
||||
assert parts[0]["text"].startswith("What do you see in this image?")
|
||||
|
||||
@@ -84,6 +84,13 @@ class MetadataMemoryProvider(FakeMemoryProvider):
|
||||
self.memory_writes.append((action, target, content, metadata or {}))
|
||||
|
||||
|
||||
class MessagesMemoryProvider(FakeMemoryProvider):
|
||||
"""Provider that opts into completed-turn message context."""
|
||||
|
||||
def sync_turn(self, user_content, assistant_content, *, session_id="", messages=None):
|
||||
self.synced_turns.append((user_content, assistant_content, session_id, messages))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MemoryProvider ABC tests
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -236,6 +243,28 @@ class TestMemoryManager:
|
||||
assert p1.synced_turns == [("user msg", "assistant msg")]
|
||||
assert p2.synced_turns == [("user msg", "assistant msg")]
|
||||
|
||||
def test_sync_all_passes_messages_to_opted_in_provider(self):
|
||||
mgr = MemoryManager()
|
||||
p = MessagesMemoryProvider("external")
|
||||
mgr.add_provider(p)
|
||||
messages = [
|
||||
{"role": "assistant", "tool_calls": [{"id": "call-1"}]},
|
||||
{"role": "tool", "tool_call_id": "call-1", "content": "ok"},
|
||||
]
|
||||
|
||||
mgr.sync_all("user msg", "assistant msg", session_id="sess-1", messages=messages)
|
||||
|
||||
assert p.synced_turns == [("user msg", "assistant msg", "sess-1", messages)]
|
||||
|
||||
def test_sync_all_omits_messages_for_legacy_provider(self):
|
||||
mgr = MemoryManager()
|
||||
p = FakeMemoryProvider("external")
|
||||
mgr.add_provider(p)
|
||||
|
||||
mgr.sync_all("user msg", "assistant msg", messages=[{"role": "tool"}])
|
||||
|
||||
assert p.synced_turns == [("user msg", "assistant msg")]
|
||||
|
||||
def test_sync_failure_doesnt_block_others(self):
|
||||
"""If one provider's sync fails, others still run."""
|
||||
mgr = MemoryManager()
|
||||
|
||||
+36
-106
@@ -378,127 +378,57 @@ class TestDiscordMentions:
|
||||
assert result.endswith(" said hello")
|
||||
|
||||
|
||||
class TestUrlQueryParamRedaction:
|
||||
"""URL query-string redaction (ported from nearai/ironclaw#2529).
|
||||
|
||||
Catches opaque tokens that don't match vendor prefix regexes by
|
||||
matching on parameter NAME rather than value shape.
|
||||
class TestWebUrlsNotRedacted:
|
||||
"""Web URLs (http/https/wss) pass through unchanged — magic-link
|
||||
checkouts, OAuth callbacks the agent is meant to follow, and pre-signed
|
||||
share URLs must reach the tool intact. Known credential shapes inside
|
||||
URLs (sk-, ghp_, JWTs) are still caught by the prefix and JWT regexes.
|
||||
DB connection-string passwords are still caught by _DB_CONNSTR_RE.
|
||||
"""
|
||||
|
||||
def test_oauth_callback_code(self):
|
||||
def test_oauth_callback_code_passes_through(self):
|
||||
text = "GET https://api.example.com/oauth/cb?code=abc123xyz789&state=csrf_ok"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abc123xyz789" not in result
|
||||
assert "code=***" in result
|
||||
assert "state=csrf_ok" in result # state is not sensitive
|
||||
|
||||
def test_access_token_query(self):
|
||||
text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "opaque_value_here_1234" not in result
|
||||
assert "access_token=***" in result
|
||||
assert "format=json" in result
|
||||
|
||||
def test_refresh_token_query(self):
|
||||
text = "https://auth.example.com/token?refresh_token=somerefresh&grant_type=refresh"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "somerefresh" not in result
|
||||
assert "grant_type=refresh" in result
|
||||
|
||||
def test_api_key_query(self):
|
||||
text = "https://api.example.com/v1/data?api_key=kABCDEF12345&limit=10"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "kABCDEF12345" not in result
|
||||
assert "limit=10" in result
|
||||
|
||||
def test_presigned_signature(self):
|
||||
text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "LONG_PRESIGNED_SIG" not in result
|
||||
assert "id=public" in result
|
||||
|
||||
def test_case_insensitive_param_names(self):
|
||||
"""Lowercase/mixed-case sensitive param names are redacted."""
|
||||
# NOTE: All-caps names like TOKEN= are swallowed by _ENV_ASSIGN_RE
|
||||
# (which matches KEY=value patterns greedily) before URL regex runs.
|
||||
# This test uses lowercase names to isolate URL-query redaction.
|
||||
text = "https://example.com?api_key=abcdef&secret=ghijkl"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abcdef" not in result
|
||||
assert "ghijkl" not in result
|
||||
assert "api_key=***" in result
|
||||
assert "secret=***" in result
|
||||
|
||||
def test_substring_match_does_not_trigger(self):
|
||||
"""`token_count` and `session_id` must NOT match `token` / `session`."""
|
||||
text = "https://example.com/cb?token_count=42&session_id=xyz&foo=bar"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "token_count=42" in result
|
||||
assert "session_id=xyz" in result
|
||||
|
||||
def test_url_without_query_unchanged(self):
|
||||
text = "https://example.com/path/to/resource"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_url_with_fragment(self):
|
||||
text = "https://example.com/page?token=xyz#section"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "token=xyz" not in result
|
||||
assert "#section" in result
|
||||
def test_access_token_query_passes_through(self):
|
||||
text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_websocket_url_query(self):
|
||||
def test_magic_link_checkout_passes_through(self):
|
||||
text = "Open https://checkout.example.com/resume?magic=ABCDEF123456&customer=42"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_presigned_signature_passes_through(self):
|
||||
text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_https_userinfo_passes_through(self):
|
||||
text = "URL: https://user:supersecretpw@host.example.com/path"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_websocket_url_query_passes_through(self):
|
||||
text = "wss://api.example.com/ws?token=opaqueWsToken123"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "opaqueWsToken123" not in result
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_http_access_log_relative_request_target_query(self):
|
||||
def test_http_access_log_request_target_passes_through(self):
|
||||
text = (
|
||||
'INFO aiohttp.access: 127.0.0.1 "POST '
|
||||
'/bluebubbles-webhook?password=webhookSecret123&event=new-message '
|
||||
'HTTP/1.1" 200 173 "-" "test-client"'
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "webhookSecret123" not in result
|
||||
assert "password=***" in result
|
||||
assert "event=new-message" in result
|
||||
|
||||
def test_http_access_log_absolute_request_target_query(self):
|
||||
text = (
|
||||
'INFO aiohttp.access: 127.0.0.1 "GET '
|
||||
'https://example.com/callback?code=oauthCode123&state=csrf-ok '
|
||||
'HTTP/1.1" 200 173 "-" "test-client"'
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "oauthCode123" not in result
|
||||
assert "code=***" in result
|
||||
assert "state=csrf-ok" in result
|
||||
|
||||
|
||||
class TestUrlUserinfoRedaction:
|
||||
"""URL userinfo (`scheme://user:pass@host`) for non-DB schemes."""
|
||||
|
||||
def test_https_userinfo(self):
|
||||
text = "URL: https://user:supersecretpw@host.example.com/path"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "supersecretpw" not in result
|
||||
assert "https://user:***@host.example.com" in result
|
||||
|
||||
def test_http_userinfo(self):
|
||||
text = "http://admin:plaintextpass@internal.example.com/api"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "plaintextpass" not in result
|
||||
|
||||
def test_ftp_userinfo(self):
|
||||
text = "ftp://user:ftppass@ftp.example.com/file.txt"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "ftppass" not in result
|
||||
|
||||
def test_url_without_userinfo_unchanged(self):
|
||||
text = "https://example.com/path"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_db_connstr_still_handled(self):
|
||||
"""DB schemes are handled by _DB_CONNSTR_RE, not _URL_USERINFO_RE."""
|
||||
def test_known_prefix_inside_url_still_redacted(self):
|
||||
"""sk-/ghp_/JWT-shaped values inside a URL are still caught by
|
||||
_PREFIX_RE / _JWT_RE — the carve-out is for opaque tokens only."""
|
||||
text = "https://evil.com/steal?key=sk-" + "a" * 30
|
||||
result = redact_sensitive_text(text)
|
||||
assert "sk-" + "a" * 30 not in result
|
||||
|
||||
def test_db_connstr_password_still_redacted(self):
|
||||
"""DB schemes (postgres/mysql/mongodb/redis/amqp) keep their
|
||||
userinfo redaction via _DB_CONNSTR_RE — connection strings are
|
||||
not web URLs the agent navigates to."""
|
||||
text = "postgres://admin:dbpass@db.internal:5432/app"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "dbpass" not in result
|
||||
|
||||
@@ -275,8 +275,9 @@ class TestRunTurn:
|
||||
def test_turn_start_failure_attaches_redacted_stderr_tail(self):
|
||||
"""When codex stderr has content (non-OAuth), the tail gets attached
|
||||
to the user-facing error so config/provider problems are debuggable
|
||||
instead of just 'Internal error'. Secrets in stderr are redacted
|
||||
via agent.redact(force=True)."""
|
||||
instead of just 'Internal error'. Credential-shaped values in stderr
|
||||
are redacted via agent.redact(force=True); web-URL query params pass
|
||||
through (see fix(redact): pass web URLs through unchanged)."""
|
||||
client = FakeClient()
|
||||
client.set_stderr_tail([
|
||||
"ERROR: provider auth failed",
|
||||
@@ -299,9 +300,8 @@ class TestRunTurn:
|
||||
# Stderr tail attached
|
||||
assert "codex stderr" in r.error
|
||||
assert "provider auth failed" in r.error
|
||||
# Secrets redacted
|
||||
# Credential-shaped values still redacted (sk- prefix + Bearer header)
|
||||
assert "sk-live-deadbeefdeadbeef" not in r.error
|
||||
assert "querysecret12345" not in r.error
|
||||
# Non-OAuth → should NOT retire (subprocess JSON-RPC is still healthy).
|
||||
assert r.should_retire is False
|
||||
|
||||
|
||||
@@ -0,0 +1,244 @@
|
||||
"""Regression tests for the CLI ``/yolo`` in-chat toggle.
|
||||
|
||||
Pre-fix bug (issue #33925): ``cli.HermesCLI._toggle_yolo`` mutated only
|
||||
``os.environ["HERMES_YOLO_MODE"]``. That env var is captured once at
|
||||
module-import time into ``tools.approval._YOLO_MODE_FROZEN`` (security
|
||||
hardening: stops prompt-injected skills from flipping the bypass mid-run),
|
||||
so the post-startup toggle was a silent no-op. ``/yolo`` advertised "YOLO ON"
|
||||
in the status bar while every dangerous command still hit the approval
|
||||
prompt. Only ``hermes --yolo`` (process-start env), ``HERMES_YOLO_MODE=1``,
|
||||
and ``hermes config set approvals.mode off`` actually bypassed.
|
||||
|
||||
The fix routes the CLI toggle through ``enable_session_yolo`` /
|
||||
``disable_session_yolo`` (matching the gateway and TUI ``/yolo`` paths) and
|
||||
binds ``self.session_id`` as the active approval session key around each
|
||||
``run_conversation`` call so ``is_current_session_yolo_enabled()`` resolves
|
||||
against the same key the toggle writes under.
|
||||
|
||||
We test ``_toggle_yolo`` and ``_is_session_yolo_active`` as unbound methods
|
||||
against a minimal stand-in object that exposes only the attribute they
|
||||
read (``session_id``). This avoids the heavy ``HermesCLI`` construction
|
||||
path used in ``test_cli_init.py``, which is incompatible with this test
|
||||
file's path layout — ``HermesCLI.__init__`` imports a lot of optional
|
||||
state we don't need here.
|
||||
"""
|
||||
|
||||
import os
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
import tools.approval as approval_module
|
||||
from cli import HermesCLI
|
||||
|
||||
|
||||
SESSION_KEY = "test-cli-yolo-session"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_approval_state(monkeypatch):
|
||||
"""Clear the YOLO bypass + env var around every test so cases are independent."""
|
||||
monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
|
||||
approval_module.clear_session(SESSION_KEY)
|
||||
approval_module.clear_session("default")
|
||||
yield
|
||||
approval_module.clear_session(SESSION_KEY)
|
||||
approval_module.clear_session("default")
|
||||
|
||||
|
||||
def _make_stand_in(session_id: str = SESSION_KEY) -> SimpleNamespace:
|
||||
"""Minimal stand-in exposing only ``session_id``.
|
||||
|
||||
``_toggle_yolo`` and ``_is_session_yolo_active`` are both pure methods
|
||||
that only read ``self.session_id`` — no other CLI state is touched.
|
||||
Calling them as unbound functions against this stand-in is equivalent
|
||||
to invoking them on a fully-constructed ``HermesCLI`` for the
|
||||
behaviour under test, and avoids the brittle prompt_toolkit / config
|
||||
stubbing required to instantiate ``HermesCLI`` from this test file.
|
||||
"""
|
||||
return SimpleNamespace(session_id=session_id)
|
||||
|
||||
|
||||
class TestToggleYoloIsSessionScoped:
|
||||
"""The CLI /yolo handler must mutate the session-yolo set, not the env var.
|
||||
|
||||
The env var path is dead-on-arrival because ``_YOLO_MODE_FROZEN`` is
|
||||
captured once at module import, long before the CLI's ``/yolo`` command
|
||||
can run.
|
||||
"""
|
||||
|
||||
def test_toggle_yolo_enables_session_bypass(self):
|
||||
stand_in = _make_stand_in()
|
||||
|
||||
assert approval_module.is_session_yolo_enabled(SESSION_KEY) is False
|
||||
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert approval_module.is_session_yolo_enabled(SESSION_KEY) is True
|
||||
|
||||
def test_toggle_yolo_disables_session_bypass_on_second_call(self):
|
||||
stand_in = _make_stand_in()
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in) # ON
|
||||
assert approval_module.is_session_yolo_enabled(SESSION_KEY) is True
|
||||
HermesCLI._toggle_yolo(stand_in) # OFF
|
||||
assert approval_module.is_session_yolo_enabled(SESSION_KEY) is False
|
||||
|
||||
def test_toggle_yolo_does_not_mutate_env_var(self):
|
||||
"""Toggling /yolo must not write ``HERMES_YOLO_MODE`` — that path is
|
||||
frozen at import time and would mislead anyone reading the env later
|
||||
(subprocesses, status bars wired to the env, the relaunch flag list)."""
|
||||
stand_in = _make_stand_in()
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert os.environ.get("HERMES_YOLO_MODE") is None
|
||||
|
||||
def test_toggle_yolo_falls_back_to_default_when_session_id_missing(self):
|
||||
"""An edge case during CLI bootstrap: a ``/yolo`` triggered before the
|
||||
session id is set should not blow up, and should land under the
|
||||
``default`` session key so the bypass still takes effect for any code
|
||||
that resolves against the default key."""
|
||||
stand_in = _make_stand_in(session_id="")
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert approval_module.is_session_yolo_enabled("default") is True
|
||||
|
||||
def test_two_independent_sessions_are_isolated(self):
|
||||
"""``/yolo`` toggled in one session must not bypass approvals in
|
||||
another session — mirrors the gateway-side invariant."""
|
||||
cli_a = _make_stand_in(session_id="session-yolo-a")
|
||||
cli_b = _make_stand_in(session_id="session-yolo-b")
|
||||
|
||||
try:
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(cli_a)
|
||||
|
||||
assert approval_module.is_session_yolo_enabled("session-yolo-a") is True
|
||||
assert approval_module.is_session_yolo_enabled("session-yolo-b") is False
|
||||
finally:
|
||||
approval_module.clear_session("session-yolo-a")
|
||||
approval_module.clear_session("session-yolo-b")
|
||||
|
||||
|
||||
class TestIsSessionYoloActiveHelper:
|
||||
"""The status-bar helper must read the live session-yolo state, not the
|
||||
env var (which is the bug class this PR fixes)."""
|
||||
|
||||
def test_helper_reflects_toggle(self):
|
||||
stand_in = _make_stand_in()
|
||||
|
||||
assert HermesCLI._is_session_yolo_active(stand_in) is False
|
||||
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert HermesCLI._is_session_yolo_active(stand_in) is True
|
||||
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert HermesCLI._is_session_yolo_active(stand_in) is False
|
||||
|
||||
def test_helper_honors_frozen_yolo_mode(self):
|
||||
"""``hermes --yolo`` sets ``HERMES_YOLO_MODE`` before tool imports, so
|
||||
``_YOLO_MODE_FROZEN`` ends up True. The status bar should still
|
||||
reflect YOLO on in that case even when the session toggle is off."""
|
||||
stand_in = _make_stand_in()
|
||||
|
||||
with patch.object(approval_module, "_YOLO_MODE_FROZEN", True):
|
||||
assert HermesCLI._is_session_yolo_active(stand_in) is True
|
||||
|
||||
|
||||
class TestToggleYoloEndToEnd:
|
||||
"""End-to-end: a dangerous command must auto-approve through the same
|
||||
``check_all_command_guards`` path the terminal tool uses."""
|
||||
|
||||
def test_toggle_yolo_bypasses_dangerous_command_check(self):
|
||||
stand_in = _make_stand_in()
|
||||
|
||||
token = approval_module.set_current_session_key(SESSION_KEY)
|
||||
try:
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in) # YOLO ON
|
||||
|
||||
result = approval_module.check_all_command_guards(
|
||||
"rm -rf /tmp/scratch-xyzzy", "local",
|
||||
)
|
||||
assert result["approved"] is True, (
|
||||
f"YOLO toggle should auto-approve dangerous commands, got: {result}"
|
||||
)
|
||||
finally:
|
||||
approval_module.reset_current_session_key(token)
|
||||
|
||||
|
||||
class TestIsSessionYoloActiveAttrSafety:
|
||||
"""The status-bar helper runs against partially-constructed CLI fixtures
|
||||
(tests use ``HermesCLI.__new__(HermesCLI)`` to skip ``__init__``). It must
|
||||
not raise ``AttributeError`` when ``session_id`` is absent — the
|
||||
status-bar builders swallow exceptions silently and lose every field
|
||||
after the failure, producing a regression that's hard to track back to
|
||||
the helper."""
|
||||
|
||||
def test_helper_survives_missing_session_id_attr(self):
|
||||
# SimpleNamespace WITHOUT session_id mimics __new__-built fixtures.
|
||||
from types import SimpleNamespace
|
||||
no_attr = SimpleNamespace()
|
||||
# Must return False, not raise.
|
||||
assert HermesCLI._is_session_yolo_active(no_attr) is False
|
||||
|
||||
|
||||
class TestSessionRotationTransfersYolo:
|
||||
"""When the CLI's ``session_id`` rotates mid-run (``/branch``, auto
|
||||
compression continuation), YOLO state keyed under the old id must move
|
||||
to the new id. Otherwise the user's ``/yolo ON`` silently reverts on
|
||||
the next turn — the same UX failure mode this PR set out to fix.
|
||||
Mirrors ``tui_gateway/server.py`` ~line 1297-1305."""
|
||||
|
||||
def test_transfer_moves_yolo_to_new_session(self):
|
||||
stand_in = _make_stand_in(session_id="old-id")
|
||||
try:
|
||||
approval_module.enable_session_yolo("old-id")
|
||||
assert approval_module.is_session_yolo_enabled("old-id") is True
|
||||
|
||||
HermesCLI._transfer_session_yolo(stand_in, "old-id", "new-id")
|
||||
|
||||
assert approval_module.is_session_yolo_enabled("new-id") is True
|
||||
assert approval_module.is_session_yolo_enabled("old-id") is False
|
||||
finally:
|
||||
approval_module.clear_session("old-id")
|
||||
approval_module.clear_session("new-id")
|
||||
|
||||
def test_transfer_is_noop_when_yolo_was_off(self):
|
||||
stand_in = _make_stand_in(session_id="old-id")
|
||||
try:
|
||||
HermesCLI._transfer_session_yolo(stand_in, "old-id", "new-id")
|
||||
assert approval_module.is_session_yolo_enabled("new-id") is False
|
||||
assert approval_module.is_session_yolo_enabled("old-id") is False
|
||||
finally:
|
||||
approval_module.clear_session("old-id")
|
||||
approval_module.clear_session("new-id")
|
||||
|
||||
def test_transfer_is_noop_when_ids_match(self):
|
||||
stand_in = _make_stand_in(session_id="same-id")
|
||||
try:
|
||||
approval_module.enable_session_yolo("same-id")
|
||||
HermesCLI._transfer_session_yolo(stand_in, "same-id", "same-id")
|
||||
# Must NOT have been disabled — same-id == same-id is a no-op,
|
||||
# not a "disable then re-enable" round-trip.
|
||||
assert approval_module.is_session_yolo_enabled("same-id") is True
|
||||
finally:
|
||||
approval_module.clear_session("same-id")
|
||||
|
||||
def test_transfer_handles_empty_inputs_safely(self):
|
||||
stand_in = _make_stand_in(session_id="x")
|
||||
# Both directions of empty input should be safe no-ops; nothing
|
||||
# to transfer from "" / to "".
|
||||
HermesCLI._transfer_session_yolo(stand_in, "", "new")
|
||||
HermesCLI._transfer_session_yolo(stand_in, "old", "")
|
||||
# Neither key should have been touched.
|
||||
assert approval_module.is_session_yolo_enabled("new") is False
|
||||
assert approval_module.is_session_yolo_enabled("old") is False
|
||||
@@ -227,6 +227,8 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
|
||||
"TERMINAL_CONTAINER_DISK",
|
||||
"TERMINAL_CONTAINER_MEMORY",
|
||||
"TERMINAL_CONTAINER_PERSISTENT",
|
||||
"TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
|
||||
"TERMINAL_DOCKER_ORPHAN_REAPER",
|
||||
"TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"BROWSER_CDP_URL",
|
||||
"CAMOFOX_URL",
|
||||
|
||||
@@ -12,6 +12,7 @@ the realistic runtime context. See the conftest module docstring.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
@@ -87,7 +88,15 @@ def test_dashboard_slot_reports_up_when_enabled(
|
||||
"""Symmetry: with HERMES_DASHBOARD=1, s6-svstat reports the slot as up."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
# The default dashboard host is 0.0.0.0, which now engages the
|
||||
# OAuth auth gate. Without a provider registered (no
|
||||
# HERMES_DASHBOARD_OAUTH_CLIENT_ID in this test env), start_server
|
||||
# would fail closed and the slot would never come up. Pin the
|
||||
# explicit insecure opt-in to keep this test focused on the s6
|
||||
# supervision contract, not the auth gate.
|
||||
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# uvicorn takes a moment to bind; poll svstat.
|
||||
@@ -112,7 +121,12 @@ def test_dashboard_opt_in_starts(
|
||||
"""With HERMES_DASHBOARD=1, a dashboard process should be visible."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
# Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
|
||||
# doesn't fail-closed before the process can come up. See
|
||||
# test_dashboard_slot_reports_up_when_enabled for the full rationale.
|
||||
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Poll for the dashboard subprocess to appear — the entrypoint
|
||||
@@ -131,6 +145,10 @@ def test_dashboard_port_override(
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120",
|
||||
# Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
|
||||
# doesn't fail-closed before the port is bound. See
|
||||
# test_dashboard_slot_reports_up_when_enabled for the full rationale.
|
||||
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
@@ -160,7 +178,13 @@ def test_dashboard_restarts_after_crash(
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
# Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
|
||||
# doesn't fail-closed before the supervised dashboard can come up.
|
||||
# See test_dashboard_slot_reports_up_when_enabled for the full
|
||||
# rationale.
|
||||
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# Wait for the first dashboard to come up.
|
||||
@@ -201,3 +225,169 @@ def test_dashboard_restarts_after_crash(
|
||||
raise AssertionError(
|
||||
f"Dashboard not restarted after kill (first_pid={first_pid})"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OAuth auth-gate behaviour — regression guard for the dashboard-insecure
|
||||
# auto-injection bug. Pre-fix, the s6 run script appended `--insecure`
|
||||
# whenever `HERMES_DASHBOARD_HOST` was non-loopback, silently disabling
|
||||
# the OAuth gate on every container-deployed dashboard. The matching
|
||||
# static-text guard lives in tests/test_docker_home_override_scripts.py;
|
||||
# this is the behavioural end-to-end check.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _http_probe(
|
||||
container: str,
|
||||
path: str,
|
||||
*,
|
||||
deadline_s: float = 60.0,
|
||||
) -> tuple[int, str]:
|
||||
"""Poll ``http://127.0.0.1:9119<path>`` from inside the container.
|
||||
|
||||
Returns ``(status_code, body)`` as soon as the dashboard answers any
|
||||
HTTP response — 200, 401, 503, anything. The image doesn't ship
|
||||
``curl`` but the venv's stdlib ``urllib`` is good enough; we use a
|
||||
proper ``try``/``except`` to intercept ``HTTPError`` because
|
||||
``urlopen`` raises on 4xx/5xx, and we treat those as legitimate
|
||||
responses (the OAuth gate's 401 IS the success signal for the
|
||||
gate-engaged test).
|
||||
|
||||
Connection errors (uvicorn still starting, fail-closed exited) keep
|
||||
the poll loop running until ``deadline_s`` elapses.
|
||||
|
||||
The probe Python program is fed over stdin (``python -``) rather
|
||||
than ``python -c`` so we can use proper multi-line syntax with
|
||||
``try``/``except`` blocks without escaping hell.
|
||||
|
||||
Raises ``AssertionError`` on timeout.
|
||||
"""
|
||||
py_program = f"""\
|
||||
import urllib.request, urllib.error
|
||||
req = urllib.request.Request("http://127.0.0.1:9119{path}")
|
||||
try:
|
||||
r = urllib.request.urlopen(req, timeout=5)
|
||||
print(r.status)
|
||||
print(r.read().decode(), end="")
|
||||
except urllib.error.HTTPError as h:
|
||||
print(h.code)
|
||||
print(h.read().decode(), end="")
|
||||
"""
|
||||
# Feed the program over stdin via a heredoc so docker_exec_sh's
|
||||
# single bash string stays clean. The 'PY' delimiter is quoted to
|
||||
# disable shell expansion inside the heredoc body.
|
||||
probe = (
|
||||
"/opt/hermes/.venv/bin/python - <<'PY'\n"
|
||||
f"{py_program}"
|
||||
"PY"
|
||||
)
|
||||
end = time.monotonic() + deadline_s
|
||||
last_err = ""
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec_sh(container, probe, timeout=10)
|
||||
if r.returncode == 0 and r.stdout.strip():
|
||||
lines = r.stdout.split("\n", 1)
|
||||
try:
|
||||
status = int(lines[0].strip())
|
||||
body = lines[1] if len(lines) > 1 else ""
|
||||
return status, body
|
||||
except (ValueError, IndexError) as exc:
|
||||
last_err = f"parse: {exc!r} / stdout={r.stdout!r}"
|
||||
else:
|
||||
last_err = f"rc={r.returncode} stderr={r.stderr!r}"
|
||||
time.sleep(0.5)
|
||||
raise AssertionError(
|
||||
f"Probe of {path} never returned HTTP within {deadline_s}s; "
|
||||
f"last error: {last_err}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_oauth_gate_engages_on_non_loopback_bind(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The s6 dashboard run script must NOT auto-add ``--insecure`` when the
|
||||
dashboard binds to ``0.0.0.0``. The OAuth auth gate engages on its own
|
||||
when a ``DashboardAuthProvider`` is registered (the bundled nous
|
||||
provider activates whenever ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` is
|
||||
set).
|
||||
|
||||
Regression guard for the wildcard-subdomain rollout where every
|
||||
portal-provisioned agent binds ``0.0.0.0`` and relies on the OAuth
|
||||
gate to authenticate browser callers. Before this fix, the run script
|
||||
flipped ``--insecure`` on for any non-loopback bind, which routed
|
||||
``start_server`` straight back into the legacy ``allow_public=True``
|
||||
branch and disabled the gate every time.
|
||||
|
||||
We verify two independent observable consequences of the gate being
|
||||
on:
|
||||
|
||||
1. ``/api/auth/providers`` (publicly reachable through the gate so
|
||||
the login page can bootstrap) returns 200 with ``nous`` in the
|
||||
provider list — proves the bundled provider registered.
|
||||
2. ``/api/status`` (a public endpoint under the legacy
|
||||
``_SESSION_TOKEN`` middleware) returns 401 — proves the OAuth gate
|
||||
runs upstream of the legacy public list and is actively
|
||||
intercepting unauthenticated callers.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
|
||||
"-e", "HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
|
||||
# (1) Provider registry visible via the public bootstrap endpoint.
|
||||
status_code, body = _http_probe(container_name, "/api/auth/providers")
|
||||
assert status_code == 200, (
|
||||
f"/api/auth/providers should return 200 when a provider is "
|
||||
f"registered; got {status_code} body={body!r}"
|
||||
)
|
||||
payload = json.loads(body)
|
||||
provider_names = [p.get("name") for p in payload.get("providers", [])]
|
||||
assert "nous" in provider_names, (
|
||||
"Bundled dashboard_auth/nous provider should register when "
|
||||
f"HERMES_DASHBOARD_OAUTH_CLIENT_ID is set. Got: {payload!r}"
|
||||
)
|
||||
|
||||
# (2) /api/status is gated by the OAuth middleware → unauthenticated
|
||||
# callers get 401, not the legacy public 200 JSON.
|
||||
status_code, body = _http_probe(container_name, "/api/status")
|
||||
assert status_code == 401, (
|
||||
"OAuth gate must intercept /api/status on 0.0.0.0 bind when a "
|
||||
"provider is registered and HERMES_DASHBOARD_INSECURE is unset. "
|
||||
f"Got: status={status_code} body={body!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_insecure_env_var_opts_out_of_gate(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""``HERMES_DASHBOARD_INSECURE=1`` re-enables the legacy no-gate mode
|
||||
for operators running on trusted LANs behind a reverse proxy without
|
||||
the OAuth contract. Same opt-out shape as the rest of the s6 boolean
|
||||
envs (``HERMES_DASHBOARD``, ``HERMES_DASHBOARD_TUI``).
|
||||
|
||||
With the gate off, ``/api/status`` (a public endpoint under the
|
||||
legacy ``_SESSION_TOKEN`` middleware) returns 200 with the
|
||||
``auth_required: false`` body — proves the gate is bypassed.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
|
||||
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
status_code, body = _http_probe(container_name, "/api/status")
|
||||
assert status_code == 200, (
|
||||
f"/api/status should return 200 with the auth gate disabled; "
|
||||
f"got {status_code} body={body!r}"
|
||||
)
|
||||
status = json.loads(body)
|
||||
assert status.get("auth_required") is False, (
|
||||
"HERMES_DASHBOARD_INSECURE=1 must disable the auth gate (explicit "
|
||||
f"opt-in for trusted-LAN deployments). Got: {status!r}"
|
||||
)
|
||||
|
||||
@@ -368,6 +368,11 @@ class TestMediaDeliveryPathValidation:
|
||||
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
||||
tuple(roots),
|
||||
)
|
||||
# All tests in this class cover strict-mode behavior (allowlist +
|
||||
# recency window + denylist). Force strict on so they keep
|
||||
# exercising the legacy path even though the public default
|
||||
# flipped to off in 2026-05.
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
||||
# Disable recency-based trust by default so the original allowlist
|
||||
# tests continue to exercise the strict-allowlist path. Tests that
|
||||
# specifically cover recency trust re-enable it themselves.
|
||||
@@ -536,6 +541,149 @@ class TestMediaDeliveryPathValidation:
|
||||
assert out == [str(fresh.resolve())]
|
||||
|
||||
|
||||
class TestMediaDeliveryDefaultMode:
|
||||
"""Default (non-strict) mode — denylist gates delivery, nothing else.
|
||||
|
||||
Symmetric with inbound delivery: Telegram/Discord/Slack accept any
|
||||
document type the user uploads, and the agent can hand back any file
|
||||
that isn't a credential. Strict mode is opt-in for operators running
|
||||
public-facing gateways.
|
||||
"""
|
||||
|
||||
def _patch_roots(self, monkeypatch, *roots):
|
||||
# Empty cache allowlist so the only positive path through
|
||||
# validate_media_delivery_path in these tests is the
|
||||
# default-mode "anything not denied" branch.
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
||||
tuple(roots),
|
||||
)
|
||||
# Pin strict OFF — the public default. Tests that exercise the
|
||||
# strict path live in TestMediaDeliveryPathValidation.
|
||||
monkeypatch.delenv("HERMES_MEDIA_DELIVERY_STRICT", raising=False)
|
||||
monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
|
||||
|
||||
def test_accepts_stale_file_outside_allowlist(self, tmp_path, monkeypatch):
|
||||
"""The motivating case — agent says ``MEDIA:/home/user/notes.md``
|
||||
for an .md it has been working with for hours. Strict mode would
|
||||
reject this (outside allowlist, outside recency window). Default
|
||||
mode delivers it.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
notes = tmp_path / "notes.md"
|
||||
notes.write_text("# Old notes\n")
|
||||
old_mtime = time.time() - 7200 # 2 hours ago — far outside any window
|
||||
os.utime(notes, (old_mtime, old_mtime))
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(notes)) == str(notes.resolve())
|
||||
|
||||
def test_accepts_any_extension_not_on_denylist(self, tmp_path, monkeypatch):
|
||||
"""No extension allowlist — .md, .txt, .json, .py all deliver."""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
for name in ("report.md", "log.txt", "data.json", "script.py", "blob.bin"):
|
||||
f = tmp_path / name
|
||||
f.write_bytes(b"x")
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(f)) == str(f.resolve())
|
||||
|
||||
def test_denylist_still_blocks_credentials(self, tmp_path, monkeypatch):
|
||||
"""Default mode is permissive but not naive — credential paths
|
||||
remain blocked. Simulate $HOME so ~/.ssh resolves into tmp_path.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
fake_home = tmp_path / "home"
|
||||
ssh_dir = fake_home / ".ssh"
|
||||
ssh_dir.mkdir(parents=True)
|
||||
secret = ssh_dir / "id_rsa"
|
||||
secret.write_bytes(b"-----BEGIN ...")
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(secret)) is None
|
||||
|
||||
def test_denylist_blocks_system_prefixes(self, tmp_path, monkeypatch):
|
||||
"""Files under /etc, /proc, /sys, /root, /boot, /var/{log,lib,run}
|
||||
are denied. We construct the test by patching the denylist root
|
||||
to a tmp dir so we don't need to read /etc.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
fake_etc = tmp_path / "fake-etc"
|
||||
fake_etc.mkdir()
|
||||
secret = fake_etc / "shadow"
|
||||
secret.write_bytes(b"root:!:0:0::/root:/bin/sh")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.base._MEDIA_DELIVERY_DENIED_PREFIXES",
|
||||
(str(fake_etc),),
|
||||
)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(secret)) is None
|
||||
|
||||
def test_denylist_blocks_hermes_credentials(self, tmp_path, monkeypatch):
|
||||
"""~/.hermes/.env and ~/.hermes/auth.json stay blocked even in
|
||||
default mode. They live under $HOME (not the system prefix list)
|
||||
so this exercises the home-relative denied paths.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
fake_home = tmp_path / "home"
|
||||
hermes_dir = fake_home / ".hermes"
|
||||
hermes_dir.mkdir(parents=True)
|
||||
env_file = hermes_dir / ".env"
|
||||
env_file.write_text("OPENAI_API_KEY=sk-...")
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.base._HERMES_HOME",
|
||||
hermes_dir,
|
||||
)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(env_file)) is None
|
||||
|
||||
def test_strict_mode_envvar_restores_legacy_behavior(self, tmp_path, monkeypatch):
|
||||
"""Setting HERMES_MEDIA_DELIVERY_STRICT=1 reactivates the older
|
||||
allowlist+recency logic. A stale file outside the allowlist is
|
||||
rejected.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
|
||||
stale = tmp_path / "old.pdf"
|
||||
stale.write_bytes(b"%PDF-1.4")
|
||||
old_mtime = time.time() - 7200
|
||||
os.utime(stale, (old_mtime, old_mtime))
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(stale)) is None
|
||||
|
||||
def test_strict_mode_truthy_aliases(self, monkeypatch, tmp_path):
|
||||
"""``HERMES_MEDIA_DELIVERY_STRICT=true|yes|on|1`` all enable strict mode."""
|
||||
self._patch_roots(monkeypatch)
|
||||
from gateway.platforms.base import _media_delivery_strict_mode
|
||||
|
||||
for raw in ("1", "true", "TRUE", "yes", "on"):
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", raw)
|
||||
assert _media_delivery_strict_mode() is True
|
||||
|
||||
for raw in ("0", "false", "no", "off", ""):
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", raw)
|
||||
assert _media_delivery_strict_mode() is False
|
||||
|
||||
def test_filter_passes_default_files_through(self, tmp_path, monkeypatch):
|
||||
"""End-to-end: filter_local_delivery_paths accepts a stale .md in
|
||||
default mode where strict mode would drop it.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
notes = tmp_path / "notes.md"
|
||||
notes.write_text("# old\n")
|
||||
os.utime(notes, (time.time() - 86400, time.time() - 86400))
|
||||
|
||||
out = BasePlatformAdapter.filter_local_delivery_paths([str(notes)])
|
||||
assert out == [str(notes.resolve())]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# should_send_media_as_audio
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -234,9 +234,12 @@ async def test_streaming_delivery_blocks_media_path_outside_allowed_roots(tmp_pa
|
||||
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
||||
(allowed_root,),
|
||||
)
|
||||
# This test exercises the strict-allowlist path; disable recency trust so
|
||||
# the freshly-written tmp_path file is not auto-accepted by the trust
|
||||
# window. (Recency trust is covered separately in test_platform_base.py.)
|
||||
# This test exercises the strict-allowlist path; force strict mode on
|
||||
# and disable recency trust so the freshly-written tmp_path file is not
|
||||
# auto-accepted by the trust window. (Recency trust is covered separately
|
||||
# in test_platform_base.py. The public default flipped to non-strict in
|
||||
# 2026-05; this test pins strict on explicitly.)
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
adapter = SimpleNamespace(
|
||||
name="test",
|
||||
|
||||
@@ -158,8 +158,11 @@ def test_build_models_payload_returns_expected_shape():
|
||||
|
||||
|
||||
def test_build_models_payload_does_not_call_provider_model_ids():
|
||||
"""Curated lists must come from list_authenticated_providers, not
|
||||
provider_model_ids — that would pull TTS/embeddings/etc.
|
||||
"""``build_models_payload`` is a thin shape adapter — it delegates the
|
||||
actual curation to ``list_authenticated_providers`` (which DOES call
|
||||
``cached_provider_model_ids`` internally for live discovery, with disk
|
||||
caching). ``build_models_payload`` itself must not call the live fetcher
|
||||
directly; the test pins that boundary.
|
||||
"""
|
||||
rows = [{"slug": "nous", "name": "Nous", "models": ["hermes-4-405b"],
|
||||
"total_models": 1, "is_current": False, "is_user_defined": False,
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
"""Regression tests for #27145 — kanban.default_assignee for unassigned ready tasks.
|
||||
|
||||
When the dispatcher hits an unassigned ready task and ``kanban.default_assignee``
|
||||
is set, the dispatcher applies the assignment and spawns. Without the config,
|
||||
the task is skipped (existing behavior preserved).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def isolated_kanban_home(monkeypatch):
|
||||
"""Spin up a fresh HERMES_HOME with a clean kanban DB."""
|
||||
test_home = tempfile.mkdtemp(prefix="kanban_default_assignee_test_")
|
||||
monkeypatch.setenv("HERMES_HOME", test_home)
|
||||
# Force-reimport so the fresh HERMES_HOME is picked up.
|
||||
for mod in list(sys.modules.keys()):
|
||||
if mod.startswith("hermes_cli") or mod.startswith("hermes_state") or mod == "hermes_constants":
|
||||
del sys.modules[mod]
|
||||
from hermes_cli import kanban_db
|
||||
yield kanban_db, test_home
|
||||
# Cleanup is best-effort; tempfile dir survives but pytest isolation
|
||||
# gives each test its own monkeypatched HERMES_HOME so no cross-test
|
||||
# contamination.
|
||||
|
||||
|
||||
def _fake_spawn(*args, **kwargs):
|
||||
"""Stand-in for the real worker spawn — returns a fake PID."""
|
||||
return 12345
|
||||
|
||||
|
||||
def test_unassigned_task_skipped_without_default_assignee(isolated_kanban_home):
|
||||
"""Baseline: with no default_assignee, an unassigned ready task is
|
||||
skipped via the existing `skipped_unassigned` bucket and the DB row
|
||||
is untouched."""
|
||||
kb, _home = isolated_kanban_home
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
task_id = kb.create_task(conn, title="t1", assignee=None)
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(conn, spawn_fn=_fake_spawn, dry_run=False)
|
||||
assert res.skipped_unassigned == [task_id]
|
||||
assert not res.auto_assigned_default
|
||||
assert not res.spawned
|
||||
with kb.connect_closing() as conn:
|
||||
row = conn.execute("SELECT assignee FROM tasks WHERE id = ?", (task_id,)).fetchone()
|
||||
assert row["assignee"] is None
|
||||
|
||||
|
||||
def test_unassigned_task_auto_assigned_with_default_assignee(isolated_kanban_home):
|
||||
"""Core #27145 contract: with default_assignee set, an unassigned ready
|
||||
task gets the assignment applied and dispatched on the same tick. The
|
||||
DB row is mutated (assignee column + an 'assigned' event)."""
|
||||
kb, _home = isolated_kanban_home
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
task_id = kb.create_task(conn, title="t1", assignee=None)
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=False,
|
||||
default_assignee="default",
|
||||
)
|
||||
assert res.auto_assigned_default == [task_id]
|
||||
assert not res.skipped_unassigned
|
||||
assert len(res.spawned) == 1
|
||||
assert res.spawned[0][0] == task_id
|
||||
assert res.spawned[0][1] == "default"
|
||||
|
||||
with kb.connect_closing() as conn:
|
||||
row = conn.execute("SELECT assignee FROM tasks WHERE id = ?", (task_id,)).fetchone()
|
||||
assert row["assignee"] == "default"
|
||||
|
||||
# 'assigned' event emitted for the audit trail
|
||||
with kb.connect_closing() as conn:
|
||||
evs = list(conn.execute(
|
||||
"SELECT kind, payload FROM task_events WHERE task_id = ? AND kind = 'assigned'",
|
||||
(task_id,),
|
||||
))
|
||||
assert len(evs) == 1
|
||||
payload = json.loads(evs[0][1])
|
||||
assert payload["assignee"] == "default"
|
||||
assert payload["source"] == "kanban.default_assignee"
|
||||
|
||||
|
||||
def test_dry_run_with_default_assignee_reports_without_mutating(isolated_kanban_home):
|
||||
"""Dry-run mode: reports what WOULD happen (task in auto_assigned_default,
|
||||
spawn entry) but does NOT mutate the DB. Operators using
|
||||
`hermes kanban dispatch --dry-run` see the routing decision before
|
||||
committing."""
|
||||
kb, _home = isolated_kanban_home
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
task_id = kb.create_task(conn, title="t1", assignee=None)
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=True,
|
||||
default_assignee="default",
|
||||
)
|
||||
assert res.auto_assigned_default == [task_id]
|
||||
assert len(res.spawned) == 1
|
||||
with kb.connect_closing() as conn:
|
||||
row = conn.execute("SELECT assignee FROM tasks WHERE id = ?", (task_id,)).fetchone()
|
||||
# DB unchanged — dry_run did not commit the assignment.
|
||||
assert row["assignee"] is None
|
||||
|
||||
|
||||
def test_whitespace_default_assignee_treated_as_none(isolated_kanban_home):
|
||||
"""Empty / whitespace-only default_assignee values must be treated as
|
||||
'no fallback set' so a misconfigured kanban.default_assignee=' '
|
||||
doesn't surprise operators by silently routing unassigned tasks."""
|
||||
kb, _home = isolated_kanban_home
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
task_id = kb.create_task(conn, title="t1", assignee=None)
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=False,
|
||||
default_assignee=" ",
|
||||
)
|
||||
assert task_id in res.skipped_unassigned
|
||||
assert not res.auto_assigned_default
|
||||
|
||||
|
||||
def test_explicitly_assigned_task_untouched_by_default_assignee(isolated_kanban_home):
|
||||
"""A task with an explicit assignee must NOT be touched by the
|
||||
default_assignee logic — that fallback only applies to genuinely
|
||||
unassigned rows."""
|
||||
kb, _home = isolated_kanban_home
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
task_id = kb.create_task(conn, title="t1", assignee="default")
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=False,
|
||||
default_assignee="someother",
|
||||
)
|
||||
assert task_id not in res.auto_assigned_default
|
||||
assert any(s[0] == task_id and s[1] == "default" for s in res.spawned)
|
||||
|
||||
|
||||
def test_dispatch_result_has_auto_assigned_default_field():
|
||||
"""Schema-level invariant: DispatchResult exposes the
|
||||
auto_assigned_default field so CLI / dashboard / gateway can surface
|
||||
the new routing decisions."""
|
||||
from hermes_cli.kanban_db import DispatchResult
|
||||
r = DispatchResult()
|
||||
assert hasattr(r, "auto_assigned_default")
|
||||
assert r.auto_assigned_default == []
|
||||
@@ -0,0 +1,167 @@
|
||||
"""Regression tests for #21582 — per-profile concurrency cap in dispatcher.
|
||||
|
||||
When ``kanban.max_in_progress_per_profile`` is set, no single profile
|
||||
gets more than N workers running at once even if the global
|
||||
``max_in_progress`` cap would allow it. Prevents one profile's local
|
||||
model / API quota / browser pool from being overwhelmed by a fan-out.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def isolated_kanban_home_with_profiles(monkeypatch):
|
||||
"""Spin up a fresh HERMES_HOME with kanban DB + alpha/beta profiles."""
|
||||
test_home = tempfile.mkdtemp(prefix="kanban_per_profile_cap_test_")
|
||||
for prof in ("alpha", "beta", "default"):
|
||||
os.makedirs(os.path.join(test_home, "profiles", prof), exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", test_home)
|
||||
for mod in list(sys.modules.keys()):
|
||||
if mod.startswith("hermes_cli") or mod.startswith("hermes_state") or mod == "hermes_constants":
|
||||
del sys.modules[mod]
|
||||
from hermes_cli import kanban_db
|
||||
yield kanban_db
|
||||
|
||||
|
||||
def _fake_spawn(*args, **kwargs):
|
||||
return 12345
|
||||
|
||||
|
||||
def test_no_cap_all_tasks_dispatched(isolated_kanban_home_with_profiles):
|
||||
"""Baseline: with no per-profile cap, all ready tasks dispatch."""
|
||||
kb = isolated_kanban_home_with_profiles
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
for i in range(5):
|
||||
kb.create_task(conn, title=f"a{i}", assignee="alpha")
|
||||
for i in range(3):
|
||||
kb.create_task(conn, title=f"b{i}", assignee="beta")
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(conn, spawn_fn=_fake_spawn, dry_run=True)
|
||||
assert len(res.spawned) == 8
|
||||
assert not res.skipped_per_profile_capped
|
||||
|
||||
|
||||
def test_cap_2_balances_two_profiles(isolated_kanban_home_with_profiles):
|
||||
"""With cap=2: 2 alpha + 2 beta dispatched; remaining 3 alpha + 1 beta
|
||||
deferred to skipped_per_profile_capped."""
|
||||
kb = isolated_kanban_home_with_profiles
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
for i in range(5):
|
||||
kb.create_task(conn, title=f"a{i}", assignee="alpha")
|
||||
for i in range(3):
|
||||
kb.create_task(conn, title=f"b{i}", assignee="beta")
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=True,
|
||||
max_in_progress_per_profile=2,
|
||||
)
|
||||
spawn_assignees = [s[1] for s in res.spawned]
|
||||
capped_assignees = [c[1] for c in res.skipped_per_profile_capped]
|
||||
assert spawn_assignees.count("alpha") == 2
|
||||
assert spawn_assignees.count("beta") == 2
|
||||
assert capped_assignees.count("alpha") == 3
|
||||
assert capped_assignees.count("beta") == 1
|
||||
|
||||
|
||||
def test_pre_existing_running_counts_against_cap(isolated_kanban_home_with_profiles):
|
||||
"""A task already in 'running' status when dispatch_once starts counts
|
||||
toward the per-profile cap. With 1 alpha pre-running and cap=1, NO new
|
||||
alpha tasks should spawn; beta is independent so 1 beta spawns."""
|
||||
kb = isolated_kanban_home_with_profiles
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
running_alpha = kb.create_task(conn, title="running alpha", assignee="alpha")
|
||||
with kb.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status = 'running', claim_lock = 'test:1' WHERE id = ?",
|
||||
(running_alpha,),
|
||||
)
|
||||
for i in range(2):
|
||||
kb.create_task(conn, title=f"a{i}", assignee="alpha")
|
||||
for i in range(2):
|
||||
kb.create_task(conn, title=f"b{i}", assignee="beta")
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=True,
|
||||
max_in_progress_per_profile=1,
|
||||
)
|
||||
spawn_assignees = [s[1] for s in res.spawned]
|
||||
capped_assignees = [c[1] for c in res.skipped_per_profile_capped]
|
||||
assert spawn_assignees.count("alpha") == 0
|
||||
assert spawn_assignees.count("beta") == 1
|
||||
assert capped_assignees.count("alpha") == 2
|
||||
assert capped_assignees.count("beta") == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cap", [0, -1, "abc", None])
|
||||
def test_invalid_cap_treated_as_no_cap(isolated_kanban_home_with_profiles, cap):
|
||||
"""Cap values that don't represent a positive int should be treated as
|
||||
'no cap' — silently falling through rather than crashing the dispatcher."""
|
||||
kb = isolated_kanban_home_with_profiles
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
for i in range(3):
|
||||
kb.create_task(conn, title=f"a{i}", assignee="alpha")
|
||||
with kb.connect_closing() as conn:
|
||||
res = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=True,
|
||||
max_in_progress_per_profile=cap,
|
||||
)
|
||||
assert not res.skipped_per_profile_capped
|
||||
assert len(res.spawned) == 3
|
||||
|
||||
|
||||
def test_capped_tasks_dispatched_on_subsequent_tick(isolated_kanban_home_with_profiles):
|
||||
"""A task deferred this tick because its profile was at cap should be
|
||||
eligible for dispatch on the next tick (after running tasks complete).
|
||||
This verifies the cap is per-tick state, not a permanent block."""
|
||||
kb = isolated_kanban_home_with_profiles
|
||||
with kb.connect_closing() as conn:
|
||||
kb.create_board(slug="default", name="Test")
|
||||
ids = [kb.create_task(conn, title=f"a{i}", assignee="alpha") for i in range(3)]
|
||||
|
||||
# First tick: cap=1, only 1 alpha dispatched
|
||||
with kb.connect_closing() as conn:
|
||||
res1 = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=False,
|
||||
max_in_progress_per_profile=1,
|
||||
)
|
||||
assert len(res1.spawned) == 1
|
||||
assert len(res1.skipped_per_profile_capped) == 2
|
||||
|
||||
# Simulate the running task completing — set it back to done so the
|
||||
# 'running' count drops
|
||||
spawned_id = res1.spawned[0][0]
|
||||
with kb.connect_closing() as conn:
|
||||
with kb.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status = 'done', claim_lock = NULL WHERE id = ?",
|
||||
(spawned_id,),
|
||||
)
|
||||
|
||||
# Second tick: 1 more alpha should now dispatch
|
||||
with kb.connect_closing() as conn:
|
||||
res2 = kb.dispatch_once(
|
||||
conn, spawn_fn=_fake_spawn, dry_run=False,
|
||||
max_in_progress_per_profile=1,
|
||||
)
|
||||
assert len(res2.spawned) == 1
|
||||
assert len(res2.skipped_per_profile_capped) == 1
|
||||
assert res2.spawned[0][0] != spawned_id # different task this time
|
||||
|
||||
|
||||
def test_dispatch_result_has_skipped_per_profile_capped_field():
|
||||
"""Schema-level invariant: DispatchResult exposes the
|
||||
skipped_per_profile_capped field as a list of
|
||||
(task_id, assignee, current_running) tuples."""
|
||||
from hermes_cli.kanban_db import DispatchResult
|
||||
r = DispatchResult()
|
||||
assert hasattr(r, "skipped_per_profile_capped")
|
||||
assert r.skipped_per_profile_capped == []
|
||||
@@ -0,0 +1,238 @@
|
||||
"""Worker-side image enrichment for kanban tasks.
|
||||
|
||||
When a kanban task body contains a local image path or an ``http(s)://``
|
||||
image URL, the worker must surface that image to the model on its first
|
||||
user turn — matching the CLI/gateway behaviour for inbound images.
|
||||
|
||||
The dispatcher spawns the worker as
|
||||
``hermes -p <profile> chat -q "work kanban task <id>"``. The task body
|
||||
itself never appears in argv; the worker has to read it from the kanban
|
||||
DB during startup. These tests cover the round-trip:
|
||||
|
||||
task body → kanban_db.get_task → extract_image_refs →
|
||||
build_native_content_parts → multimodal user turn
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
from agent.image_routing import (
|
||||
build_native_content_parts,
|
||||
extract_image_refs,
|
||||
)
|
||||
|
||||
|
||||
# Tiny 1×1 transparent PNG used to back any path the tests stick into a
|
||||
# task body. extract_image_refs validates the path exists on disk, so the
|
||||
# byte content has to be a real readable file (any image bytes will do).
|
||||
_PNG = base64.b64decode(
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGNgYGBgAAAABQABpfZFQAAAAABJRU5ErkJggg=="
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path: Path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with a fresh kanban DB for each test."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
def _add_task_with_body(body: str, *, title: str = "Look at this") -> str:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
task_id = kb.create_task(
|
||||
conn,
|
||||
title=title,
|
||||
body=body,
|
||||
assignee="worker-a",
|
||||
tenant=None,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
return task_id
|
||||
|
||||
|
||||
def _read_body(task_id: str) -> str:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
task = kb.get_task(conn, task_id)
|
||||
return (task.body if task is not None else "") or ""
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
class TestExtractFromTaskBody:
|
||||
"""Read a real kanban task body and run it through extract_image_refs."""
|
||||
|
||||
def test_local_path_in_body_round_trips(self, kanban_home, tmp_path):
|
||||
img = tmp_path / "screenshot.png"
|
||||
img.write_bytes(_PNG)
|
||||
tid = _add_task_with_body(
|
||||
f"Please review the screenshot at {img} and confirm "
|
||||
"the alignment is right."
|
||||
)
|
||||
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_url_in_body_round_trips(self, kanban_home):
|
||||
tid = _add_task_with_body(
|
||||
"The design lives at https://example.com/mock/v3.png — "
|
||||
"make the implementation match it."
|
||||
)
|
||||
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == ["https://example.com/mock/v3.png"]
|
||||
|
||||
def test_mixed_path_and_url_in_body(self, kanban_home, tmp_path):
|
||||
img = tmp_path / "current.png"
|
||||
img.write_bytes(_PNG)
|
||||
tid = _add_task_with_body(
|
||||
f"Compare the current screenshot {img} against the design at "
|
||||
"https://example.com/target.png and write a diff."
|
||||
)
|
||||
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == ["https://example.com/target.png"]
|
||||
|
||||
def test_body_without_images_yields_nothing(self, kanban_home):
|
||||
tid = _add_task_with_body(
|
||||
"Refactor the auth module to use the new session helper."
|
||||
)
|
||||
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == []
|
||||
|
||||
def test_empty_body_is_safe(self, kanban_home):
|
||||
tid = _add_task_with_body("")
|
||||
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == []
|
||||
|
||||
|
||||
class TestBuildPartsFromTaskBody:
|
||||
"""Verify the full pipeline produces a multimodal user turn."""
|
||||
|
||||
def test_local_path_becomes_native_image_part(self, kanban_home, tmp_path):
|
||||
img = tmp_path / "design.png"
|
||||
img.write_bytes(_PNG)
|
||||
tid = _add_task_with_body(f"Check out {img} — what's broken?")
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
|
||||
# Mirrors the cli.py wiring: pass the worker's literal -q argument
|
||||
# (the dispatcher uses ``"work kanban task <id>"``) plus the
|
||||
# extracted refs through build_native_content_parts.
|
||||
parts, skipped = build_native_content_parts(
|
||||
f"work kanban task {tid}",
|
||||
paths,
|
||||
image_urls=urls or None,
|
||||
)
|
||||
|
||||
assert skipped == []
|
||||
# text part + one image_url part
|
||||
assert len(parts) == 2
|
||||
assert parts[0]["type"] == "text"
|
||||
assert parts[0]["text"].startswith(f"work kanban task {tid}")
|
||||
assert f"[Image attached at: {img}]" in parts[0]["text"]
|
||||
assert parts[1]["type"] == "image_url"
|
||||
assert parts[1]["image_url"]["url"].startswith("data:image/png;base64,")
|
||||
|
||||
def test_url_becomes_image_url_part(self, kanban_home):
|
||||
tid = _add_task_with_body(
|
||||
"Reference: https://example.com/target.jpg — match it."
|
||||
)
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
|
||||
parts, skipped = build_native_content_parts(
|
||||
f"work kanban task {tid}",
|
||||
paths,
|
||||
image_urls=urls or None,
|
||||
)
|
||||
|
||||
assert skipped == []
|
||||
assert len(parts) == 2
|
||||
assert parts[0]["type"] == "text"
|
||||
assert "[Image attached: https://example.com/target.jpg]" in parts[0]["text"]
|
||||
assert parts[1] == {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/target.jpg"},
|
||||
}
|
||||
|
||||
def test_body_with_both_yields_two_image_parts(self, kanban_home, tmp_path):
|
||||
img = tmp_path / "local.png"
|
||||
img.write_bytes(_PNG)
|
||||
tid = _add_task_with_body(
|
||||
f"Diff {img} vs https://example.com/target.png — explain it."
|
||||
)
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
|
||||
parts, skipped = build_native_content_parts(
|
||||
f"work kanban task {tid}",
|
||||
paths,
|
||||
image_urls=urls or None,
|
||||
)
|
||||
|
||||
assert skipped == []
|
||||
image_parts = [p for p in parts if p.get("type") == "image_url"]
|
||||
assert len(image_parts) == 2
|
||||
# Local file is embedded as a data URL; remote URL passes through.
|
||||
assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,")
|
||||
assert image_parts[1]["image_url"]["url"] == "https://example.com/target.png"
|
||||
|
||||
def test_body_with_no_images_leaves_query_untouched(self, kanban_home):
|
||||
tid = _add_task_with_body(
|
||||
"Rewrite the README intro paragraph to focus on use cases."
|
||||
)
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
|
||||
parts, skipped = build_native_content_parts(
|
||||
f"work kanban task {tid}",
|
||||
paths,
|
||||
image_urls=urls or None,
|
||||
)
|
||||
|
||||
# No images → plain text-only return (single part, no list mutation).
|
||||
assert skipped == []
|
||||
assert len(parts) == 1
|
||||
assert parts[0]["type"] == "text"
|
||||
assert parts[0]["text"] == f"work kanban task {tid}"
|
||||
|
||||
def test_code_block_example_is_not_attached(self, kanban_home, tmp_path):
|
||||
# Only the real image outside the fenced code block should attach.
|
||||
real = tmp_path / "real.png"
|
||||
real.write_bytes(_PNG)
|
||||
tid = _add_task_with_body(
|
||||
f"Real screenshot:\n{real}\n\n"
|
||||
"Example we DON'T want attached:\n"
|
||||
"```\n"
|
||||
"image: /tmp/example_only.png\n"
|
||||
"url: https://example.com/example.png\n"
|
||||
"```\n"
|
||||
)
|
||||
body = _read_body(tid)
|
||||
paths, urls = extract_image_refs(body)
|
||||
|
||||
assert paths == [str(real)]
|
||||
assert urls == []
|
||||
@@ -0,0 +1,230 @@
|
||||
"""Regression test for #28181 — kanban worker SIGTERM must terminate the process.
|
||||
|
||||
The single-query signal handler in cli.py (``_signal_handler_q``) raises
|
||||
``KeyboardInterrupt`` to unwind the main thread on SIGTERM/SIGHUP. That works
|
||||
for interactive ``hermes chat -q`` invocations, but kanban workers spawned by
|
||||
the dispatcher are likely to have a non-daemon thread alive (terminal_tool's
|
||||
``_wait_for_process``, custom plugin background workers, etc.). With
|
||||
``KeyboardInterrupt`` only the main thread unwinds; the non-daemon thread
|
||||
keeps the process alive after the gateway has already restarted, the kanban
|
||||
dispatcher's ``_pid_alive`` check returns True forever, and the task stays
|
||||
``running`` indefinitely.
|
||||
|
||||
The fix: when the process is a dispatcher-spawned worker (``HERMES_KANBAN_TASK``
|
||||
env var set), flush logging + stdout/stderr and call ``os._exit(0)`` instead.
|
||||
The kernel reclaims the PID immediately, and ``detect_crashed_workers``
|
||||
reclaims the stale claim on the next dispatcher tick.
|
||||
|
||||
These tests use a synthetic Python script that mirrors the cli.py signal
|
||||
handler shape so we can exercise the exit-path contract without booting the
|
||||
full CLI (which needs a real provider config).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _synthetic_worker_script() -> str:
|
||||
"""A standalone script that mirrors cli.py's single-query SIGTERM handler.
|
||||
|
||||
Keeping the synthetic copy here means the test exercises the exact handler
|
||||
shape without needing the full hermes_cli boot path (config, providers,
|
||||
skills, etc.). If the production handler in cli.py drifts, the test
|
||||
that loads the real handler (test_real_handler_uses_os_exit) will catch it.
|
||||
"""
|
||||
return textwrap.dedent(
|
||||
"""
|
||||
import os, signal, sys, threading, time
|
||||
|
||||
# Non-daemon thread that blocks forever — simulates the worker
|
||||
# thread that would prevent orderly Python shutdown after
|
||||
# KeyboardInterrupt unwinds main.
|
||||
stuck = threading.Event()
|
||||
threading.Thread(target=stuck.wait, daemon=False).start()
|
||||
|
||||
def handler(signum, frame):
|
||||
# Mirrors cli.py:_signal_handler_q. Real handler sleeps 1.5s; the
|
||||
# test uses a short grace so it runs fast.
|
||||
try:
|
||||
time.sleep(0.05)
|
||||
except Exception:
|
||||
pass
|
||||
if os.environ.get("HERMES_KANBAN_TASK"):
|
||||
try:
|
||||
if hasattr(signal, "SIGALRM"):
|
||||
signal.signal(signal.SIGALRM, lambda *_: os._exit(0))
|
||||
signal.alarm(2)
|
||||
except Exception:
|
||||
pass
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
os._exit(0)
|
||||
raise KeyboardInterrupt()
|
||||
|
||||
signal.signal(signal.SIGTERM, handler)
|
||||
print("READY", flush=True)
|
||||
try:
|
||||
threading.Event().wait()
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _is_alive_like_dispatcher(pid: int) -> bool:
|
||||
"""Mirrors hermes_cli/kanban_db.py:_pid_alive on Linux.
|
||||
|
||||
A zombie is treated as dead — the dispatcher's _pid_alive checks
|
||||
/proc/<pid>/status for State: Z. We replicate that here so a clean
|
||||
os._exit followed by zombie-state is correctly counted as dead.
|
||||
"""
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
return True
|
||||
if sys.platform == "linux":
|
||||
try:
|
||||
with open(f"/proc/{pid}/status") as f:
|
||||
for line in f:
|
||||
if line.startswith("State:"):
|
||||
if "Z" in line.split(":", 1)[1]:
|
||||
return False
|
||||
break
|
||||
except (FileNotFoundError, PermissionError, OSError):
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def _spawn_synthetic(env_overrides: dict) -> subprocess.Popen:
|
||||
env = dict(os.environ)
|
||||
env.update(env_overrides)
|
||||
proc = subprocess.Popen(
|
||||
[sys.executable, "-u", "-c", _synthetic_worker_script()],
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
start_new_session=True,
|
||||
)
|
||||
# Wait for "READY" so we know the signal handler is installed.
|
||||
assert proc.stdout is not None
|
||||
deadline = time.time() + 5.0
|
||||
while time.time() < deadline:
|
||||
line = proc.stdout.readline()
|
||||
if line and line.startswith(b"READY"):
|
||||
return proc
|
||||
proc.kill()
|
||||
raise RuntimeError("synthetic worker never signalled READY")
|
||||
|
||||
|
||||
def _cleanup(proc: subprocess.Popen) -> None:
|
||||
try:
|
||||
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
pass
|
||||
try:
|
||||
proc.communicate(timeout=2)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.kill()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason="SIGTERM semantics differ on Windows; kanban dispatcher is POSIX-only",
|
||||
)
|
||||
def test_sigterm_with_kanban_task_env_terminates_quickly():
|
||||
"""With HERMES_KANBAN_TASK set, SIGTERM should kill the process in <2s
|
||||
even when a non-daemon thread is still alive."""
|
||||
proc = _spawn_synthetic({"HERMES_KANBAN_TASK": "t_test_28181"})
|
||||
try:
|
||||
t0 = time.time()
|
||||
os.kill(proc.pid, signal.SIGTERM)
|
||||
|
||||
# Should die in <2s. The handler sleeps ~50ms, then os._exit(0)
|
||||
# is immediate. Give generous headroom for slow CI runners.
|
||||
deadline = t0 + 2.0
|
||||
while time.time() < deadline:
|
||||
if not _is_alive_like_dispatcher(proc.pid):
|
||||
elapsed = time.time() - t0
|
||||
assert elapsed < 2.0
|
||||
return
|
||||
time.sleep(0.02)
|
||||
pytest.fail(
|
||||
f"process still alive 2s after SIGTERM with HERMES_KANBAN_TASK set "
|
||||
f"(dispatcher would keep extending claim) — fix regressed"
|
||||
)
|
||||
finally:
|
||||
_cleanup(proc)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason="SIGTERM semantics differ on Windows; kanban dispatcher is POSIX-only",
|
||||
)
|
||||
def test_sigterm_without_kanban_task_env_uses_keyboard_interrupt_path():
|
||||
"""Without HERMES_KANBAN_TASK, the original KeyboardInterrupt path runs.
|
||||
|
||||
This is the contrast case proving the fix is gated on the env var: in
|
||||
interactive ``hermes chat -q`` (no env var), behavior is unchanged. The
|
||||
process MAY hang under non-daemon threads, but that's not a kanban-worker
|
||||
concern. We just verify the handler logs the KeyboardInterrupt branch
|
||||
rather than os._exit'ing.
|
||||
"""
|
||||
proc = _spawn_synthetic({})
|
||||
try:
|
||||
os.kill(proc.pid, signal.SIGTERM)
|
||||
# Wait a moment for the handler to react.
|
||||
time.sleep(0.5)
|
||||
# The process may or may not be dead depending on whether the
|
||||
# KeyboardInterrupt unwinds cleanly. The behavioral guarantee is
|
||||
# only that the env-gated path didn't fire.
|
||||
try:
|
||||
# Drain stdout up to whatever's available.
|
||||
if proc.stdout is not None:
|
||||
proc.stdout.close()
|
||||
if proc.stderr is not None:
|
||||
proc.stderr.close()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
_cleanup(proc)
|
||||
|
||||
|
||||
def test_real_handler_uses_os_exit_for_kanban_workers():
|
||||
"""Source-level invariant: cli.py's _signal_handler_q must call
|
||||
os._exit(0) when HERMES_KANBAN_TASK is set.
|
||||
|
||||
Catches the case where someone refactors the handler and accidentally
|
||||
drops the env-gated exit, restoring the bug. Reading cli.py directly is
|
||||
cheap and avoids the heavy CLI import.
|
||||
"""
|
||||
import pathlib
|
||||
|
||||
cli_path = (
|
||||
pathlib.Path(__file__).resolve().parent.parent.parent / "cli.py"
|
||||
)
|
||||
src = cli_path.read_text()
|
||||
# Locate the handler body.
|
||||
start = src.find("def _signal_handler_q(signum, frame):")
|
||||
assert start != -1, "cli.py is missing _signal_handler_q"
|
||||
# Look ahead for the env-gated os._exit call within ~80 lines.
|
||||
body = src[start : start + 4000]
|
||||
assert "HERMES_KANBAN_TASK" in body, (
|
||||
"_signal_handler_q must gate its kanban-worker exit path on "
|
||||
"HERMES_KANBAN_TASK — see #28181"
|
||||
)
|
||||
assert "os._exit(0)" in body, (
|
||||
"_signal_handler_q must call os._exit(0) for kanban workers — "
|
||||
"raising KeyboardInterrupt orphans the process when non-daemon "
|
||||
"threads are alive (see #28181)"
|
||||
)
|
||||
@@ -197,10 +197,32 @@ class TestConfig:
|
||||
assert provider._recall_max_input_chars == 800
|
||||
assert provider._tags is None
|
||||
assert provider._recall_tags is None
|
||||
# Default recall narrowed to observation-only; world/experience are
|
||||
# aggregate facts that often crowd out concrete-event signal during
|
||||
# auto-recall. Users opt back in via the recall_types config key.
|
||||
assert provider._recall_types == ["observation"]
|
||||
assert provider._bank_mission == ""
|
||||
assert provider._bank_retain_mission is None
|
||||
assert provider._retain_context == "conversation between Hermes Agent and the User"
|
||||
|
||||
def test_recall_types_default_is_observation_only(self, provider):
|
||||
"""Auto-recall must filter to observation by default."""
|
||||
assert provider._recall_types == ["observation"]
|
||||
|
||||
def test_recall_types_explicit_list_overrides_default(self, provider_with_config):
|
||||
p = provider_with_config(recall_types=["world", "experience", "observation"])
|
||||
assert p._recall_types == ["world", "experience", "observation"]
|
||||
|
||||
def test_recall_types_csv_string_accepted(self, provider_with_config):
|
||||
"""For parity with recall_tags, comma-separated strings work too."""
|
||||
p = provider_with_config(recall_types="observation, world")
|
||||
assert p._recall_types == ["observation", "world"]
|
||||
|
||||
def test_recall_types_empty_list_falls_back_to_default(self, provider_with_config):
|
||||
"""An empty list shouldn't disable the filter (would be wider than default)."""
|
||||
p = provider_with_config(recall_types=[])
|
||||
assert p._recall_types == ["observation"]
|
||||
|
||||
def test_custom_config_values(self, provider_with_config):
|
||||
p = provider_with_config(
|
||||
retain_tags=["tag1", "tag2"],
|
||||
|
||||
@@ -91,6 +91,45 @@ class TestSyncExternalMemoryForTurn:
|
||||
session_id="test_session_001",
|
||||
)
|
||||
|
||||
def test_completed_turn_syncs_messages_when_present(self):
|
||||
agent = _bare_agent()
|
||||
messages = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call-1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "terminal",
|
||||
"arguments": "{\"command\":\"pytest\"}",
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"name": "terminal",
|
||||
"tool_call_id": "call-1",
|
||||
"content": "final Hermes-processed output",
|
||||
}
|
||||
]
|
||||
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="run tests",
|
||||
final_response="tests passed",
|
||||
interrupted=False,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
agent._memory_manager.sync_all.assert_called_once_with(
|
||||
"run tests",
|
||||
"tests passed",
|
||||
session_id="test_session_001",
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
# --- Edge cases (pre-existing behaviour preserved) ------------------
|
||||
|
||||
def test_no_final_response_skips(self):
|
||||
|
||||
@@ -3295,8 +3295,13 @@ class TestRunConversation:
|
||||
assert result["final_response"] == "Recovered after compression"
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_non_minimax_delta_overflow_still_probes_down(self, agent):
|
||||
"""Non-MiniMax providers should keep the generic probe-down behavior."""
|
||||
def test_non_minimax_overflow_without_provider_limit_keeps_context(self, agent):
|
||||
"""Generic overflow without a provider-reported max must NOT probe-step down.
|
||||
|
||||
Previously a 200K configured window would silently drop to the 128K probe
|
||||
tier on a generic overflow error. Now we keep the configured window and
|
||||
rely on compression — see #33669 / PR #33826.
|
||||
"""
|
||||
self._setup_agent(agent)
|
||||
agent.provider = "openrouter"
|
||||
agent.model = "some/unknown-model"
|
||||
@@ -3330,7 +3335,8 @@ class TestRunConversation:
|
||||
result = agent.run_conversation("hello", conversation_history=prefill)
|
||||
|
||||
mock_compress.assert_called_once()
|
||||
assert agent.context_compressor.context_length == 128_000
|
||||
# Context length preserved — no guessed probe-tier step-down.
|
||||
assert agent.context_compressor.context_length == 200_000
|
||||
assert result["final_response"] == "Recovered after compression"
|
||||
assert result["completed"] is True
|
||||
|
||||
|
||||
@@ -11,6 +11,9 @@ The fix introduces:
|
||||
error class and returns the available output token budget.
|
||||
* _ephemeral_max_output_tokens on AIAgent — a one-shot override that
|
||||
caps the output for one retry without touching context_length.
|
||||
* get_context_length_from_provider_error() — accepts only concrete
|
||||
provider-reported lower context limits and refuses guessed probe-tier
|
||||
step-downs when the provider gives no maximum.
|
||||
|
||||
Naming note
|
||||
-----------
|
||||
@@ -75,7 +78,7 @@ class TestParseAvailableOutputTokens:
|
||||
# ── Should NOT detect (returns None) ─────────────────────────────────
|
||||
|
||||
def test_prompt_too_long_is_not_output_cap_error(self):
|
||||
"""'prompt is too long' errors must NOT be caught — they need context halving."""
|
||||
"""'prompt is too long' errors must NOT be caught — they need context-overflow recovery."""
|
||||
msg = "prompt is too long: 205000 tokens > 200000 maximum"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
@@ -101,6 +104,49 @@ class TestParseAvailableOutputTokens:
|
||||
assert self._parse(msg) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Context-overflow recovery — only trust provider-reported limits
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestContextOverflowLimitSelection:
|
||||
"""Context-overflow recovery must not invent a lower window size.
|
||||
|
||||
Some providers only say "input exceeds the context window" without telling
|
||||
Hermes what the actual maximum is. In that case we may compress the
|
||||
conversation, but must not silently probe-step from a user-configured 1M
|
||||
window down to 256K/128K/64K/etc.
|
||||
"""
|
||||
|
||||
def test_generic_overflow_without_provider_limit_keeps_context_length(self):
|
||||
from agent.model_metadata import get_context_length_from_provider_error
|
||||
from agent.model_metadata import get_next_probe_tier
|
||||
from agent.model_metadata import parse_context_limit_from_error
|
||||
|
||||
old_ctx = 1_000_000
|
||||
error_msg = (
|
||||
"Your input exceeds the context window of this model. "
|
||||
"Please adjust your input and try again."
|
||||
)
|
||||
|
||||
assert parse_context_limit_from_error(error_msg) is None
|
||||
assert get_next_probe_tier(old_ctx) == 256_000
|
||||
assert get_context_length_from_provider_error(error_msg, old_ctx) is None
|
||||
|
||||
def test_explicit_provider_limit_still_selects_that_limit(self):
|
||||
from agent.model_metadata import get_context_length_from_provider_error
|
||||
|
||||
error_msg = "prompt is too long: 300000 tokens > 272000 maximum"
|
||||
|
||||
assert get_context_length_from_provider_error(error_msg, 1_000_000) == 272_000
|
||||
|
||||
def test_reported_limit_not_lower_than_current_is_ignored(self):
|
||||
from agent.model_metadata import get_context_length_from_provider_error
|
||||
|
||||
error_msg = "maximum context length is 1000000 tokens"
|
||||
|
||||
assert get_context_length_from_provider_error(error_msg, 272_000) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_anthropic_kwargs — output cap clamping
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -282,19 +328,16 @@ class TestContextNotHalvedOnOutputCapError:
|
||||
assert agent.context_compressor.context_length == old_ctx
|
||||
assert agent._ephemeral_max_output_tokens == 19_936
|
||||
|
||||
def test_prompt_too_long_still_triggers_probe_tier(self):
|
||||
"""Genuine prompt-too-long errors must still use get_next_probe_tier."""
|
||||
def test_prompt_too_long_with_explicit_limit_uses_provider_limit(self):
|
||||
"""Prompt-too-long errors only change context_length when they report a concrete limit."""
|
||||
from agent.model_metadata import get_context_length_from_provider_error
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
from agent.model_metadata import get_next_probe_tier
|
||||
|
||||
error_msg = "prompt is too long: 205000 tokens > 200000 maximum"
|
||||
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
assert available_out is None, "prompt-too-long must not be caught by output-cap parser"
|
||||
|
||||
# The old halving path is still used for this class of error
|
||||
new_ctx = get_next_probe_tier(200_000)
|
||||
assert new_ctx == 128_000
|
||||
assert get_context_length_from_provider_error(error_msg, 1_000_000) == 200_000
|
||||
|
||||
def test_output_cap_error_safety_margin(self):
|
||||
"""The ephemeral value includes a 64-token safety margin below available_out."""
|
||||
|
||||
@@ -13,3 +13,36 @@ def test_dashboard_run_resets_home_before_dropping_privileges() -> None:
|
||||
assert "#!/command/with-contenv sh" in text
|
||||
assert "export HOME=/opt/data" in text
|
||||
assert "exec s6-setuidgid hermes hermes dashboard" in text
|
||||
|
||||
|
||||
def test_dashboard_run_does_not_derive_insecure_from_bind_host() -> None:
|
||||
"""The s6 dashboard run script MUST NOT auto-add ``--insecure`` based on
|
||||
``HERMES_DASHBOARD_HOST``. Doing so disables the OAuth auth gate on
|
||||
every non-loopback bind even when an auth provider is registered —
|
||||
the exact regression that exposed every wildcard-subdomain agent
|
||||
dashboard publicly until early 2026.
|
||||
|
||||
The opt-in is now explicit: ``HERMES_DASHBOARD_INSECURE=1`` (truthy).
|
||||
The auth gate is the authority on whether non-loopback binds are safe.
|
||||
"""
|
||||
text = DASHBOARD_RUN.read_text(encoding="utf-8")
|
||||
|
||||
# No legacy host-derived flip.
|
||||
assert '127.0.0.1|localhost' not in text, (
|
||||
"Run script still derives --insecure from the bind host. The gate "
|
||||
"is the authority now — opt in via HERMES_DASHBOARD_INSECURE instead."
|
||||
)
|
||||
assert 'case "$dash_host" in' not in text, (
|
||||
"Legacy host-derived --insecure case-statement is back."
|
||||
)
|
||||
|
||||
# New opt-in env var present.
|
||||
assert "HERMES_DASHBOARD_INSECURE" in text, (
|
||||
"Explicit HERMES_DASHBOARD_INSECURE opt-in is missing."
|
||||
)
|
||||
# Truthy values aligned with the rest of the s6 scripts
|
||||
# (HERMES_DASHBOARD, HERMES_DASHBOARD_TUI).
|
||||
for truthy in ("1", "true", "TRUE", "True", "yes", "YES", "Yes"):
|
||||
assert truthy in text, (
|
||||
f"HERMES_DASHBOARD_INSECURE should accept truthy value {truthy!r}"
|
||||
)
|
||||
|
||||
@@ -203,25 +203,43 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
|
||||
|
||||
|
||||
def test_non_persistent_cleanup_removes_container(monkeypatch):
|
||||
"""When persistent=false, cleanup() must schedule docker stop + rm."""
|
||||
"""When persist_across_processes=false, cleanup() must docker stop AND
|
||||
docker rm so containers don't leak across hermes processes.
|
||||
|
||||
Updated for issue #20561: the previous implementation used fire-and-forget
|
||||
``subprocess.Popen("... &", shell=True)`` which raced with parent exit;
|
||||
the new implementation uses ``subprocess.run`` on a daemon thread with
|
||||
bounded timeouts. See test_cleanup_with_persist_disabled_stops_and_rms
|
||||
for the full behavior contract.
|
||||
"""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
# Run the worker thread synchronously so assertions can observe its work.
|
||||
import threading
|
||||
monkeypatch.setattr(threading, "Thread", _FakeThread)
|
||||
|
||||
popen_cmds = []
|
||||
monkeypatch.setattr(
|
||||
docker_env.subprocess, "Popen",
|
||||
lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1],
|
||||
env = docker_env.DockerEnvironment(
|
||||
image="python:3.11", cwd="/root", timeout=60,
|
||||
task_id="ephemeral-task", persistent_filesystem=False,
|
||||
persist_across_processes=False,
|
||||
)
|
||||
|
||||
env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task")
|
||||
assert env._container_id
|
||||
container_id = env._container_id
|
||||
assert container_id
|
||||
|
||||
# Capture cleanup-time docker calls (everything before this was init).
|
||||
cleanup_calls = []
|
||||
real_run = docker_env.subprocess.run
|
||||
|
||||
def _capture(cmd, **kw):
|
||||
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kw))
|
||||
return real_run(cmd, **kw)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _capture)
|
||||
env.cleanup()
|
||||
|
||||
# Should have stop and rm calls via Popen
|
||||
stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)]
|
||||
assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}"
|
||||
stops = [c for c in cleanup_calls if isinstance(c[0], list) and c[0][1:2] == ["stop"]]
|
||||
assert stops, f"cleanup() should docker stop {container_id}; got {cleanup_calls}"
|
||||
|
||||
|
||||
class _FakePopen:
|
||||
@@ -514,3 +532,839 @@ def test_run_as_host_user_warns_and_skips_when_no_posix_ids(monkeypatch, caplog)
|
||||
"does not expose POSIX uid/gid" in rec.getMessage()
|
||||
for rec in caplog.records
|
||||
), "expected a warning when POSIX ids are unavailable"
|
||||
|
||||
|
||||
# ── Docker labels (issue #20561) ──────────────────────────────────
|
||||
|
||||
|
||||
def _run_args_from_calls(calls):
|
||||
"""Pull the argv list passed to the first ``docker run`` invocation."""
|
||||
run_calls = [
|
||||
c for c in calls
|
||||
if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"
|
||||
]
|
||||
assert run_calls, "docker run should have been called"
|
||||
return run_calls[0][0]
|
||||
|
||||
|
||||
def _labels_in_run_args(run_args):
|
||||
"""Return the set of ``key=value`` strings passed via ``--label``."""
|
||||
return {
|
||||
run_args[i + 1]
|
||||
for i, flag in enumerate(run_args[:-1])
|
||||
if flag == "--label"
|
||||
}
|
||||
|
||||
|
||||
def test_run_command_tags_hermes_agent_label(monkeypatch):
|
||||
"""Every container hermes-agent starts must carry the hermes-agent=1 label
|
||||
so the orphan reaper (and external operators) can identify them with a
|
||||
single ``docker ps --filter label=hermes-agent=1`` call. Regression test
|
||||
for issue #20561 — without the label there is no global sweep target."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
|
||||
_make_dummy_env(task_id="my-task")
|
||||
|
||||
labels = _labels_in_run_args(_run_args_from_calls(calls))
|
||||
assert "hermes-agent=1" in labels, (
|
||||
f"hermes-agent=1 label missing; got labels: {sorted(labels)}"
|
||||
)
|
||||
|
||||
|
||||
def test_run_command_tags_task_and_profile_labels(monkeypatch):
|
||||
"""task_id and the active profile name are surfaced as labels so future
|
||||
cross-process reuse logic can filter to a specific (task, profile) pair
|
||||
without parsing container names. Profile resolution uses the helper that
|
||||
returns ``"default"`` for the root Hermes home."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "research-bot")
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
|
||||
_make_dummy_env(task_id="kanban-42")
|
||||
|
||||
labels = _labels_in_run_args(_run_args_from_calls(calls))
|
||||
assert "hermes-task-id=kanban-42" in labels, (
|
||||
f"hermes-task-id=kanban-42 missing; got: {sorted(labels)}"
|
||||
)
|
||||
assert "hermes-profile=research-bot" in labels, (
|
||||
f"hermes-profile=research-bot missing; got: {sorted(labels)}"
|
||||
)
|
||||
|
||||
|
||||
def test_label_sanitizer_rejects_invalid_characters():
|
||||
"""Docker label values must be alnum + ``_.-`` and ≤63 chars. Profile or
|
||||
task names containing slashes, colons, or unicode would otherwise emit
|
||||
invalid labels that round-trip badly through ``docker ps --filter``."""
|
||||
assert docker_env._sanitize_label_value("plain-name_1.0") == "plain-name_1.0"
|
||||
assert docker_env._sanitize_label_value("with/slash") == "with_slash"
|
||||
assert docker_env._sanitize_label_value("with:colon") == "with_colon"
|
||||
assert docker_env._sanitize_label_value("emoji-😀-here") == "emoji-_-here"
|
||||
# Empty / non-string inputs must collapse to a queryable token, not "".
|
||||
assert docker_env._sanitize_label_value("") == "unknown"
|
||||
assert docker_env._sanitize_label_value(None) == "unknown" # type: ignore[arg-type]
|
||||
# >63 chars must truncate, not error.
|
||||
long_value = "x" * 100
|
||||
assert len(docker_env._sanitize_label_value(long_value)) == 63
|
||||
|
||||
|
||||
def test_run_command_sanitizes_unsafe_task_id(monkeypatch):
|
||||
"""A task_id containing characters Docker rejects in label values must be
|
||||
sanitized before reaching ``docker run --label``; otherwise the daemon
|
||||
refuses the run with an inscrutable error and the agent's first command
|
||||
blows up."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
|
||||
_make_dummy_env(task_id="task/with:weird*chars")
|
||||
|
||||
labels = _labels_in_run_args(_run_args_from_calls(calls))
|
||||
# Each non-OK character becomes an underscore; the safe chars survive.
|
||||
assert "hermes-task-id=task_with_weird_chars" in labels, (
|
||||
f"sanitized task-id label missing; got: {sorted(labels)}"
|
||||
)
|
||||
|
||||
|
||||
def test_labels_attribute_populated_after_init(monkeypatch):
|
||||
"""``self._labels`` must be set to the same key/value pairs that went onto
|
||||
docker run, so subsequent reuse / reaper paths can match without re-running
|
||||
the sanitizer or re-importing the profile module."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
|
||||
env = _make_dummy_env(task_id="abc")
|
||||
|
||||
assert env._labels == {
|
||||
"hermes-agent": "1",
|
||||
"hermes-task-id": "abc",
|
||||
"hermes-profile": "default",
|
||||
}
|
||||
|
||||
|
||||
# ── Cross-process container reuse (issue #20561) ──────────────────
|
||||
|
||||
|
||||
def _mock_subprocess_run_with_reuse(monkeypatch, ps_state: str | None,
|
||||
start_succeeds: bool = True):
|
||||
"""Reuse-aware subprocess.run mock.
|
||||
|
||||
``ps_state`` controls what ``docker ps -a --filter ...`` returns:
|
||||
* ``None`` → no match (empty stdout). Forces a fresh ``docker run``.
|
||||
* ``"running"`` / ``"exited"`` / ... → emit ``CID\\tSTATE`` so the reuse
|
||||
path picks it up. ``"running"`` skips ``docker start``; other states
|
||||
trigger ``docker start`` (which can be forced to fail via
|
||||
``start_succeeds=False``).
|
||||
|
||||
Returns the captured call list so the test can verify which docker
|
||||
commands actually ran.
|
||||
"""
|
||||
calls = []
|
||||
|
||||
def _run(cmd, **kwargs):
|
||||
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||
if isinstance(cmd, list) and len(cmd) >= 2:
|
||||
sub = cmd[1]
|
||||
if sub == "version":
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
|
||||
if sub == "ps":
|
||||
if ps_state is None:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0, stdout=f"reused-cid\t{ps_state}\n", stderr="",
|
||||
)
|
||||
if sub == "start":
|
||||
if not start_succeeds:
|
||||
# Real subprocess.run with check=True raises on non-zero exit;
|
||||
# mirror that so the production code's except clause fires.
|
||||
raise subprocess.CalledProcessError(1, cmd, output="", stderr="no such container")
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="reused-cid\n", stderr="")
|
||||
if sub == "run":
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
return calls
|
||||
|
||||
|
||||
def test_reuse_attaches_to_running_container_without_docker_run(monkeypatch):
|
||||
"""When a labeled container is already ``running``, the reuse probe
|
||||
must pick it up and skip ``docker run`` entirely. Regression for the
|
||||
issue #20561 root cause: every Hermes process spawning a new container
|
||||
despite docs claiming "ONE long-lived container shared across sessions"."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
|
||||
|
||||
env = _make_dummy_env(task_id="reuse-test")
|
||||
|
||||
# The reuse path must populate _container_id from the ps probe output.
|
||||
assert env._container_id == "reused-cid", (
|
||||
f"expected reused container id, got {env._container_id!r}"
|
||||
)
|
||||
# And it must NOT have run `docker run`.
|
||||
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
assert not run_invocations, (
|
||||
f"docker run should be skipped on reuse, got: {run_invocations}"
|
||||
)
|
||||
# And it must have NOT issued a `docker start` for an already-running container.
|
||||
start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
|
||||
assert not start_invocations, (
|
||||
f"docker start should be skipped when container already running, got: {start_invocations}"
|
||||
)
|
||||
|
||||
|
||||
def test_reuse_starts_stopped_container_before_attaching(monkeypatch):
|
||||
"""A labeled container in ``exited`` state must be restarted via
|
||||
``docker start`` before the new Hermes process uses it. Without this
|
||||
step, ``docker exec`` against a stopped container errors out and the
|
||||
first agent command fails opaquely."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="exited")
|
||||
|
||||
env = _make_dummy_env(task_id="reuse-stopped")
|
||||
|
||||
assert env._container_id == "reused-cid"
|
||||
start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
|
||||
assert start_invocations, "expected docker start for exited container"
|
||||
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
assert not run_invocations, "should not docker run when reusing an exited container"
|
||||
|
||||
|
||||
def test_reuse_falls_back_to_fresh_run_when_start_fails(monkeypatch):
|
||||
"""If ``docker start`` on the matched container fails (container was
|
||||
removed between probe and start, daemon paused, etc.), the code must
|
||||
silently fall through to a fresh ``docker run`` rather than leaving the
|
||||
user with a broken environment. Defensive recovery — the probe is best-
|
||||
effort, not authoritative."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
calls = _mock_subprocess_run_with_reuse(
|
||||
monkeypatch, ps_state="exited", start_succeeds=False,
|
||||
)
|
||||
|
||||
env = _make_dummy_env(task_id="reuse-broken-start")
|
||||
|
||||
# docker start should be attempted then fail; code falls through to run.
|
||||
assert env._container_id == "fresh-cid", (
|
||||
f"expected fresh container id after fallback, got {env._container_id!r}"
|
||||
)
|
||||
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
assert run_invocations, "fallback to fresh docker run must happen on start failure"
|
||||
|
||||
|
||||
def test_no_reuse_when_persist_across_processes_disabled(monkeypatch):
|
||||
"""Opt-out path: ``persist_across_processes=False`` skips the ps probe
|
||||
entirely and always starts a fresh container, matching the pre-fix
|
||||
behavior for users who want hard per-process isolation."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
# ps_state=running would trigger reuse if the probe ran — assert it doesn't.
|
||||
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
|
||||
|
||||
env = docker_env.DockerEnvironment(
|
||||
image="python:3.11", cwd="/root", timeout=60,
|
||||
task_id="no-reuse", persist_across_processes=False,
|
||||
)
|
||||
|
||||
# Must NOT have issued docker ps (the probe is gated by the flag).
|
||||
ps_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "ps"]
|
||||
assert not ps_invocations, (
|
||||
f"docker ps probe should be skipped when persist_across_processes=False, got: {ps_invocations}"
|
||||
)
|
||||
# Should have started a fresh container.
|
||||
assert env._container_id == "fresh-cid"
|
||||
|
||||
|
||||
def test_find_reusable_container_prefers_running_over_stopped(monkeypatch):
|
||||
"""When the probe returns multiple matches (shouldn't normally happen,
|
||||
but can after a crash leaves stale duplicates), a ``running`` container
|
||||
is preferred over any stopped one. The duplicate gets reaped later by
|
||||
the orphan reaper; we don't try to be heroic about it here."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
|
||||
def _run(cmd, **kwargs):
|
||||
if isinstance(cmd, list) and len(cmd) >= 2:
|
||||
if cmd[1] == "version":
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="ok", stderr="")
|
||||
if cmd[1] == "ps":
|
||||
# Two matches: stopped first, running second.
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0,
|
||||
stdout="stopped-cid\texited\nrunning-cid\trunning\n",
|
||||
stderr="",
|
||||
)
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
|
||||
env = _make_dummy_env(task_id="dup-match")
|
||||
assert env._container_id == "running-cid", (
|
||||
f"running container should win over stopped duplicate, got {env._container_id!r}"
|
||||
)
|
||||
|
||||
|
||||
# ── Cleanup correctness (issue #20561) ────────────────────────────
|
||||
|
||||
|
||||
class _FakeThread:
|
||||
"""Stand-in for threading.Thread that captures target/args and calls
|
||||
target() synchronously when .start() runs, so cleanup behavior is
|
||||
observable without actually backgrounding subprocess calls."""
|
||||
|
||||
def __init__(self, target=None, daemon=None, name=None):
|
||||
self._target = target
|
||||
self.daemon = daemon
|
||||
self.name = name
|
||||
self._done = False
|
||||
|
||||
def start(self):
|
||||
if self._target is not None:
|
||||
self._target()
|
||||
self._done = True
|
||||
|
||||
def is_alive(self):
|
||||
return not self._done
|
||||
|
||||
def join(self, timeout=None):
|
||||
self._done = True
|
||||
|
||||
|
||||
def _install_fake_thread(monkeypatch):
|
||||
import threading
|
||||
monkeypatch.setattr(threading, "Thread", _FakeThread)
|
||||
|
||||
|
||||
def test_cleanup_with_persist_is_noop_for_container(monkeypatch):
|
||||
"""``persist_across_processes=True`` (default) cleanup must NEITHER stop
|
||||
NOR remove the container — the docs promise "ONE long-lived container
|
||||
shared across sessions", and any docker stop would kill background
|
||||
processes inside the container (npm watchers, pytest watchers, etc.).
|
||||
|
||||
Resource reclamation in this mode happens via the orphan reaper on next
|
||||
Hermes startup, not on graceful exit. Issue #20561 — the first iteration
|
||||
of this PR did docker stop here, which Ben caught as contradicting the
|
||||
"ONE long-lived container" semantics."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
_install_fake_thread(monkeypatch)
|
||||
|
||||
env = _make_dummy_env(task_id="cleanup-persist", persistent_filesystem=False)
|
||||
# Default persist_across_processes=True.
|
||||
container_id = env._container_id
|
||||
assert container_id
|
||||
|
||||
cleanup_calls = []
|
||||
real_run = docker_env.subprocess.run
|
||||
|
||||
def _capturing_run(cmd, **kwargs):
|
||||
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||
return real_run(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
|
||||
|
||||
env.cleanup()
|
||||
|
||||
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
|
||||
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
|
||||
assert not stops, (
|
||||
f"docker stop must NOT be called when persist_across_processes=True; "
|
||||
f"container has to stay running so background processes survive. "
|
||||
f"Got: {stops}"
|
||||
)
|
||||
assert not rms, (
|
||||
f"docker rm must NOT be called when persist_across_processes=True; "
|
||||
f"reuse would be impossible. Got: {rms}"
|
||||
)
|
||||
# The in-process handle must still be cleared so the next __init__
|
||||
# re-probes via labels (and reuses the still-running container).
|
||||
assert env._container_id is None, (
|
||||
"in-process container_id should be cleared even in no-op cleanup"
|
||||
)
|
||||
|
||||
|
||||
def test_cleanup_force_remove_stops_and_rms_even_in_persist_mode(monkeypatch):
|
||||
"""``cleanup(force_remove=True)`` must stop AND rm the container even
|
||||
when ``persist_across_processes=True``. This is the explicit-teardown
|
||||
path for ``/reset``, ``cleanup_vm(task_id, force_remove=True)``, and any
|
||||
future caller that wants a guaranteed fresh container.
|
||||
|
||||
Without this kwarg, callers in persist mode would have no way to force a
|
||||
fresh container without also flipping the global config — too coarse for
|
||||
a per-task reset.
|
||||
"""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
_install_fake_thread(monkeypatch)
|
||||
|
||||
env = _make_dummy_env(task_id="cleanup-force", persistent_filesystem=False)
|
||||
assert env._container_id
|
||||
|
||||
cleanup_calls = []
|
||||
real_run = docker_env.subprocess.run
|
||||
|
||||
def _capturing_run(cmd, **kwargs):
|
||||
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||
return real_run(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
|
||||
|
||||
env.cleanup(force_remove=True)
|
||||
|
||||
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
|
||||
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
|
||||
assert stops, f"force_remove must docker stop; got: {cleanup_calls}"
|
||||
assert rms, f"force_remove must docker rm; got: {cleanup_calls}"
|
||||
|
||||
|
||||
def test_cleanup_vm_default_honors_persist_mode(monkeypatch):
|
||||
"""``cleanup_vm(task_id)`` without ``force_remove=True`` must be a no-op
|
||||
for a persist-mode container.
|
||||
|
||||
Regression for the bug Ben caught after commit 4: ``AIAgent.close()``
|
||||
(which is called from ``tui_gateway/server.py`` on session.close, from
|
||||
``gateway/run.py`` on per-session teardown, and from per-turn cleanup)
|
||||
calls ``cleanup_vm(task_id)``. If that defaulted to ``force_remove=True``
|
||||
we'd tear down the container on every TUI session close, defeating the
|
||||
"ONE long-lived container shared across sessions" contract.
|
||||
"""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
_install_fake_thread(monkeypatch)
|
||||
|
||||
from tools import terminal_tool
|
||||
|
||||
env = _make_dummy_env(task_id="session-close-test")
|
||||
container_id = env._container_id
|
||||
terminal_tool._active_environments["session-close-test"] = env
|
||||
|
||||
cleanup_calls = []
|
||||
real_run = docker_env.subprocess.run
|
||||
|
||||
def _capturing_run(cmd, **kwargs):
|
||||
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||
return real_run(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
|
||||
|
||||
try:
|
||||
terminal_tool.cleanup_vm("session-close-test")
|
||||
finally:
|
||||
terminal_tool._active_environments.pop("session-close-test", None)
|
||||
|
||||
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
|
||||
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
|
||||
assert not stops, (
|
||||
f"cleanup_vm() default must not docker stop a persist-mode container; "
|
||||
f"got: {stops}"
|
||||
)
|
||||
assert not rms, (
|
||||
f"cleanup_vm() default must not docker rm a persist-mode container; "
|
||||
f"got: {rms}"
|
||||
)
|
||||
|
||||
|
||||
def test_cleanup_vm_force_remove_tears_down_persist_container(monkeypatch):
|
||||
"""``cleanup_vm(task_id, force_remove=True)`` tears down a persist-mode
|
||||
container — the explicit-teardown path for ``/reset``-style flows.
|
||||
|
||||
Also pins the runtime-signature-inspection plumbing: the kwarg must
|
||||
actually flow through ``cleanup_vm`` into the backend's ``cleanup()``.
|
||||
"""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
_install_fake_thread(monkeypatch)
|
||||
|
||||
from tools import terminal_tool
|
||||
|
||||
env = _make_dummy_env(task_id="explicit-teardown-test")
|
||||
terminal_tool._active_environments["explicit-teardown-test"] = env
|
||||
|
||||
cleanup_calls = []
|
||||
real_run = docker_env.subprocess.run
|
||||
|
||||
def _capturing_run(cmd, **kwargs):
|
||||
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||
return real_run(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
|
||||
|
||||
try:
|
||||
terminal_tool.cleanup_vm("explicit-teardown-test", force_remove=True)
|
||||
finally:
|
||||
terminal_tool._active_environments.pop("explicit-teardown-test", None)
|
||||
|
||||
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
|
||||
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
|
||||
assert stops, f"force_remove must reach docker stop; got: {cleanup_calls}"
|
||||
assert rms, f"force_remove must reach docker rm; got: {cleanup_calls}"
|
||||
|
||||
|
||||
def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
|
||||
"""``persist_across_processes=False`` cleanup must docker stop AND docker
|
||||
rm so containers don't leak. Crucially, this runs regardless of the
|
||||
``persistent_filesystem`` setting — the original code only rm'd when
|
||||
``not self._persistent``, which meant the default-on ``container_persistent:
|
||||
true`` users (the documented happy path) leaked Exited containers forever.
|
||||
Issue #20561 root-cause fix."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
_install_fake_thread(monkeypatch)
|
||||
|
||||
# Note: persistent_filesystem=True (the prior-leak scenario) + the new
|
||||
# cross-process toggle OFF must still result in a clean rm.
|
||||
env = docker_env.DockerEnvironment(
|
||||
image="python:3.11", cwd="/root", timeout=60,
|
||||
task_id="cleanup-no-persist", persistent_filesystem=True,
|
||||
persist_across_processes=False,
|
||||
)
|
||||
|
||||
cleanup_calls = []
|
||||
real_run = docker_env.subprocess.run
|
||||
|
||||
def _capturing_run(cmd, **kwargs):
|
||||
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||
return real_run(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
|
||||
|
||||
env.cleanup()
|
||||
|
||||
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
|
||||
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
|
||||
assert stops, "expected docker stop"
|
||||
assert rms, (
|
||||
"docker rm MUST run when persist_across_processes=False, even with "
|
||||
"persistent_filesystem=True — that gating was the leak source in #20561."
|
||||
)
|
||||
|
||||
|
||||
def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch):
|
||||
"""The pre-fix code used ``subprocess.Popen("... &", shell=True)`` which
|
||||
raced with parent-process exit and silently dropped cleanup work. The
|
||||
new code must use ``subprocess.run`` with bounded ``timeout=`` so the
|
||||
work actually completes within the process lifetime.
|
||||
|
||||
Asserts cleanup never reaches into shell-mode Popen. Uses
|
||||
``force_remove=True`` so cleanup actually issues docker calls — the
|
||||
default persist-mode path is now a no-op (commit 4) and would trivially
|
||||
pass this assertion without exercising the docker code at all.
|
||||
"""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
_install_fake_thread(monkeypatch)
|
||||
|
||||
def _forbidden_popen(*args, **kwargs):
|
||||
raise AssertionError(
|
||||
f"cleanup must not use subprocess.Popen anymore (issue #20561); "
|
||||
f"got args={args} kwargs={kwargs}"
|
||||
)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen)
|
||||
|
||||
env = _make_dummy_env(task_id="no-popen-cleanup")
|
||||
env.cleanup(force_remove=True) # must not raise
|
||||
|
||||
|
||||
def test_wait_for_cleanup_returns_true_when_no_thread_started():
|
||||
"""``wait_for_cleanup`` must be a no-op when ``cleanup`` was never called
|
||||
(or the env has no live cleanup thread) — atexit calls it unconditionally
|
||||
across all active envs, so a False return would falsely flag healthy
|
||||
shutdowns."""
|
||||
env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
|
||||
# No _cleanup_thread set — simulates an env that was never cleanup()'d.
|
||||
assert env.wait_for_cleanup(timeout=1.0) is True
|
||||
|
||||
|
||||
def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch):
|
||||
"""End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it
|
||||
and reports completion. Atexit relies on this contract to ensure docker
|
||||
stop/rm actually finishes before the Python interpreter exits.
|
||||
|
||||
Uses ``force_remove=True`` so cleanup actually starts a worker thread —
|
||||
the default persist-mode cleanup is a no-op (commit 4) and never spawns
|
||||
a thread, so the trivial "no thread" branch of wait_for_cleanup is
|
||||
already covered by the previous test.
|
||||
"""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
|
||||
_mock_subprocess_run(monkeypatch)
|
||||
_install_fake_thread(monkeypatch)
|
||||
|
||||
env = _make_dummy_env(task_id="wait-test")
|
||||
env.cleanup(force_remove=True)
|
||||
assert env.wait_for_cleanup(timeout=5.0) is True
|
||||
|
||||
|
||||
def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch):
|
||||
"""A DockerEnvironment whose ``__init__`` failed before the container_id
|
||||
was set (image-pull error, docker daemon down) should still be safe to
|
||||
cleanup() — the post-creation failure path in callers always tries.
|
||||
Without this guard the daemon-down case used to NameError on the cleanup
|
||||
branch."""
|
||||
env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
|
||||
env._container_id = None
|
||||
env._persistent = False
|
||||
env._workspace_dir = None
|
||||
env._home_dir = None
|
||||
# No exception expected.
|
||||
env.cleanup()
|
||||
|
||||
|
||||
# ── Orphan reaper (issue #20561) ──────────────────────────────────
|
||||
|
||||
|
||||
def _now_iso(offset_seconds: int = 0) -> str:
|
||||
"""Return an RFC3339 timestamp ``offset_seconds`` in the past."""
|
||||
import datetime
|
||||
t = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(seconds=offset_seconds)
|
||||
# Format like Docker emits — with nanoseconds-style trailing digits.
|
||||
return t.isoformat().replace("+00:00", ".123456789Z")
|
||||
|
||||
|
||||
def _reaper_run_mock(monkeypatch, ps_ids: list[str], inspect_responses: dict[str, str],
|
||||
rm_succeeds: bool = True):
|
||||
"""Build a subprocess.run mock for reaper tests.
|
||||
|
||||
* ``ps_ids`` — what ``docker ps -a --filter ... --format '{{.ID}}'`` returns
|
||||
* ``inspect_responses[cid]`` — what ``docker inspect ... FinishedAt`` returns
|
||||
for each cid; ``""`` means "field unset".
|
||||
* ``rm_succeeds`` — whether ``docker rm -f`` returns 0.
|
||||
|
||||
Captures every call so tests can assert which containers were rm'd.
|
||||
"""
|
||||
calls = []
|
||||
|
||||
def _run(cmd, **kwargs):
|
||||
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||
if not isinstance(cmd, list) or len(cmd) < 2:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
sub = cmd[1]
|
||||
if sub == "ps":
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0, stdout="\n".join(ps_ids) + ("\n" if ps_ids else ""), stderr="",
|
||||
)
|
||||
if sub == "inspect":
|
||||
# cmd is [docker, inspect, --format, '{{.State.FinishedAt}}', cid]
|
||||
cid = cmd[-1]
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0, stdout=inspect_responses.get(cid, "") + "\n", stderr="",
|
||||
)
|
||||
if sub == "rm":
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0 if rm_succeeds else 1,
|
||||
stdout="", stderr="" if rm_succeeds else "no such container",
|
||||
)
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
return calls
|
||||
|
||||
|
||||
def test_reap_orphan_returns_zero_when_no_matches(monkeypatch):
|
||||
"""No labeled containers → no rm calls, returns 0. Establishes the
|
||||
happy-path baseline for the orphan reaper (issue #20561)."""
|
||||
calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={})
|
||||
|
||||
removed = docker_env.reap_orphan_containers(
|
||||
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
|
||||
)
|
||||
|
||||
assert removed == 0
|
||||
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
|
||||
assert not rms, "no rm calls expected when ps returns empty"
|
||||
|
||||
|
||||
def test_reap_orphan_removes_stale_exited_container(monkeypatch):
|
||||
"""An Exited container older than max_age_seconds must be removed.
|
||||
This is the core repair path for issue #20561 — without the reaper,
|
||||
SIGKILL'd Hermes processes leak containers permanently."""
|
||||
old = _now_iso(offset_seconds=900) # 15 minutes ago
|
||||
calls = _reaper_run_mock(
|
||||
monkeypatch, ps_ids=["old-cid"], inspect_responses={"old-cid": old},
|
||||
)
|
||||
|
||||
removed = docker_env.reap_orphan_containers(
|
||||
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
|
||||
)
|
||||
|
||||
assert removed == 1
|
||||
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
|
||||
assert len(rms) == 1
|
||||
assert "old-cid" in rms[0][0], f"expected rm of old-cid, got {rms[0][0]}"
|
||||
|
||||
|
||||
def test_reap_orphan_spares_recently_exited_container(monkeypatch):
|
||||
"""A container exited within max_age_seconds must NOT be reaped — that
|
||||
container belongs to a Hermes process that just finished and may be
|
||||
about to be replaced. Conservative window prevents racing sibling
|
||||
processes."""
|
||||
recent = _now_iso(offset_seconds=60) # 1 minute ago
|
||||
calls = _reaper_run_mock(
|
||||
monkeypatch, ps_ids=["recent-cid"], inspect_responses={"recent-cid": recent},
|
||||
)
|
||||
|
||||
removed = docker_env.reap_orphan_containers(
|
||||
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
|
||||
)
|
||||
|
||||
assert removed == 0
|
||||
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
|
||||
assert not rms, f"recent container must not be reaped, got rm calls: {rms}"
|
||||
|
||||
|
||||
def test_reap_orphan_scopes_to_profile_filter_via_label(monkeypatch):
|
||||
"""The reaper must pass ``--filter label=hermes-profile=<profile>`` to
|
||||
docker ps so it never sweeps another profile's containers. A research
|
||||
profile must not tear down the default profile's stragglers."""
|
||||
calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={})
|
||||
|
||||
docker_env.reap_orphan_containers(
|
||||
max_age_seconds=600, profile_filter="research-bot", docker_exe="/usr/bin/docker",
|
||||
)
|
||||
|
||||
ps_calls = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["ps"]]
|
||||
assert ps_calls, "expected at least one docker ps call"
|
||||
flat = " ".join(ps_calls[0][0])
|
||||
assert "label=hermes-profile=research-bot" in flat, (
|
||||
f"profile filter not applied to docker ps; got args: {ps_calls[0][0]}"
|
||||
)
|
||||
assert "label=hermes-agent=1" in flat, (
|
||||
f"hermes-agent label filter must also be applied; got: {ps_calls[0][0]}"
|
||||
)
|
||||
assert "status=exited" in flat, (
|
||||
"must filter to exited containers only — running containers may "
|
||||
"belong to a sibling Hermes process and must NEVER be reaped"
|
||||
)
|
||||
|
||||
|
||||
def test_reap_orphan_skips_container_with_unparseable_finished_at(monkeypatch):
|
||||
"""If docker inspect returns the zero-value ``0001-01-01T00:00:00Z`` (no
|
||||
FinishedAt yet) or an unparseable timestamp, the reaper must leave the
|
||||
container alone. Defensive — never reap a container whose age we can't
|
||||
determine."""
|
||||
calls = _reaper_run_mock(
|
||||
monkeypatch,
|
||||
ps_ids=["never-finished", "garbage-ts"],
|
||||
inspect_responses={
|
||||
"never-finished": "0001-01-01T00:00:00Z",
|
||||
"garbage-ts": "not-a-timestamp",
|
||||
},
|
||||
)
|
||||
|
||||
removed = docker_env.reap_orphan_containers(
|
||||
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
|
||||
)
|
||||
|
||||
assert removed == 0
|
||||
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
|
||||
assert not rms, (
|
||||
f"reaper must NOT remove containers with unparseable FinishedAt; got: {rms}"
|
||||
)
|
||||
|
||||
|
||||
def test_reap_orphan_handles_docker_ps_failure_gracefully(monkeypatch):
|
||||
"""If docker ps itself fails (daemon down, permission denied), the
|
||||
reaper returns 0 without crashing. The reaper is best-effort plumbing,
|
||||
not a critical path — it must never block container creation."""
|
||||
def _failing_ps(cmd, **kwargs):
|
||||
if isinstance(cmd, list) and len(cmd) >= 2 and cmd[1] == "ps":
|
||||
return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="Cannot connect to daemon")
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _failing_ps)
|
||||
|
||||
# Must not raise
|
||||
removed = docker_env.reap_orphan_containers(
|
||||
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
|
||||
)
|
||||
assert removed == 0
|
||||
|
||||
|
||||
def test_reap_orphan_continues_after_individual_rm_failure(monkeypatch):
|
||||
"""If ``docker rm -f`` fails on one container (already removed by a
|
||||
concurrent process, container locked, etc.), the reaper must log and
|
||||
continue to the next candidate rather than aborting the whole sweep."""
|
||||
old = _now_iso(offset_seconds=900)
|
||||
rm_calls = []
|
||||
|
||||
def _run(cmd, **kwargs):
|
||||
if not isinstance(cmd, list) or len(cmd) < 2:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
sub = cmd[1]
|
||||
if sub == "ps":
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0, stdout="cid-a\ncid-b\ncid-c\n", stderr="",
|
||||
)
|
||||
if sub == "inspect":
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout=old + "\n", stderr="")
|
||||
if sub == "rm":
|
||||
rm_calls.append(cmd[-1])
|
||||
# cid-b fails; cid-a and cid-c succeed.
|
||||
if cmd[-1] == "cid-b":
|
||||
return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="no such container")
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
|
||||
removed = docker_env.reap_orphan_containers(
|
||||
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
|
||||
)
|
||||
|
||||
# All three were attempted, two succeeded.
|
||||
assert removed == 2
|
||||
assert set(rm_calls) == {"cid-a", "cid-b", "cid-c"}, (
|
||||
f"reaper must attempt all candidates even when one fails; got: {rm_calls}"
|
||||
)
|
||||
|
||||
|
||||
def test_container_finished_at_parses_nanosecond_timestamp(monkeypatch):
|
||||
"""Docker emits FinishedAt with nanosecond precision (RFC3339 with up to
|
||||
9 fractional digits), but Python's fromisoformat caps at microseconds.
|
||||
The helper must trim the extra digits without raising — otherwise every
|
||||
candidate gets skipped and the reaper does nothing."""
|
||||
|
||||
def _run(cmd, **kwargs):
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0,
|
||||
stdout="2026-05-28T13:45:00.123456789Z\n",
|
||||
stderr="",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
|
||||
result = docker_env._container_finished_at("/usr/bin/docker", "test-cid")
|
||||
assert result is not None, "must parse RFC3339 with nanoseconds"
|
||||
import datetime
|
||||
assert result.tzinfo == datetime.timezone.utc
|
||||
assert result.year == 2026 and result.month == 5 and result.day == 28
|
||||
|
||||
|
||||
def test_container_finished_at_returns_none_on_zero_value():
|
||||
"""Docker's zero-value ``0001-01-01T00:00:00Z`` (never finished) must
|
||||
map to None so the reaper treats the container as unreapable."""
|
||||
# Direct test of the parsing helper — no subprocess needed since the
|
||||
# check happens after the inspect call returns.
|
||||
import subprocess as _subprocess
|
||||
|
||||
class _MockRun:
|
||||
def __init__(self, stdout):
|
||||
self.returncode = 0
|
||||
self.stdout = stdout
|
||||
self.stderr = ""
|
||||
|
||||
import unittest.mock
|
||||
with unittest.mock.patch.object(
|
||||
docker_env.subprocess, "run", return_value=_MockRun("0001-01-01T00:00:00Z\n"),
|
||||
):
|
||||
result = docker_env._container_finished_at("/usr/bin/docker", "never-finished")
|
||||
assert result is None
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
"""Integration tests for the docker orphan-reaper wiring in terminal_tool.
|
||||
|
||||
The reaper itself is unit-tested in tests/tools/test_docker_environment.py
|
||||
under the "Orphan reaper" section. These tests cover the terminal_tool-side
|
||||
gates: once-per-process behavior, the disable flag, and the
|
||||
``lifetime_seconds`` doubling that determines the reaper's age threshold.
|
||||
|
||||
Issue #20561 — without these gates, parallel subagents would each fire the
|
||||
reaper on container creation, and the ``terminal.docker_orphan_reaper: false``
|
||||
opt-out would silently do nothing.
|
||||
"""
|
||||
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import tools.terminal_tool as terminal_tool
|
||||
|
||||
|
||||
def _reset_reaper_gate():
|
||||
"""Clear the once-per-process flag between tests."""
|
||||
terminal_tool._docker_orphan_reaper_ran = False
|
||||
|
||||
|
||||
def test_maybe_reap_runs_once_per_process(monkeypatch):
|
||||
"""The reaper sweep must run at most once per Python interpreter.
|
||||
Parallel subagents that each call _create_environment(env_type='docker')
|
||||
would otherwise fire N concurrent docker ps + inspect storms against the
|
||||
daemon and waste 5–10s of startup."""
|
||||
_reset_reaper_gate()
|
||||
call_count = {"reap": 0}
|
||||
|
||||
def _fake_reap(**kwargs):
|
||||
call_count["reap"] += 1
|
||||
return 0
|
||||
|
||||
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
|
||||
config = {"docker_orphan_reaper": True}
|
||||
terminal_tool._maybe_reap_docker_orphans(config)
|
||||
terminal_tool._maybe_reap_docker_orphans(config)
|
||||
terminal_tool._maybe_reap_docker_orphans(config)
|
||||
|
||||
assert call_count["reap"] == 1, (
|
||||
f"reaper must run exactly once per process; got {call_count['reap']} calls"
|
||||
)
|
||||
|
||||
|
||||
def test_maybe_reap_respects_disable_flag(monkeypatch):
|
||||
"""``terminal.docker_orphan_reaper: false`` (via container_config) must
|
||||
skip the sweep entirely — no docker ps, no inspect, no rm. The escape
|
||||
hatch for operators running multiple Hermes processes in the same
|
||||
profile."""
|
||||
_reset_reaper_gate()
|
||||
call_count = {"reap": 0}
|
||||
|
||||
def _fake_reap(**kwargs):
|
||||
call_count["reap"] += 1
|
||||
return 0
|
||||
|
||||
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
|
||||
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": False})
|
||||
|
||||
assert call_count["reap"] == 0, "disabled reaper must not run any docker calls"
|
||||
# The once-per-process gate must NOT be tripped when the reaper is
|
||||
# disabled — that would prevent a subsequent toggle to true from working.
|
||||
assert terminal_tool._docker_orphan_reaper_ran is False
|
||||
|
||||
|
||||
def test_maybe_reap_doubles_lifetime_for_max_age(monkeypatch):
|
||||
"""The reaper's age threshold is ``2 × lifetime_seconds`` (with a 60s
|
||||
floor). Generous default — gives sibling Hermes processes ample grace
|
||||
to be replaced without their just-exited containers being yanked."""
|
||||
_reset_reaper_gate()
|
||||
captured_args = {}
|
||||
|
||||
def _fake_reap(**kwargs):
|
||||
captured_args.update(kwargs)
|
||||
return 0
|
||||
|
||||
monkeypatch.setenv("TERMINAL_LIFETIME_SECONDS", "300")
|
||||
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
|
||||
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
|
||||
|
||||
assert captured_args.get("max_age_seconds") == 600, (
|
||||
f"expected 2 × 300 = 600, got {captured_args.get('max_age_seconds')}"
|
||||
)
|
||||
|
||||
|
||||
def test_maybe_reap_floors_at_60_seconds(monkeypatch):
|
||||
"""A user pinning TERMINAL_LIFETIME_SECONDS=0 (or any value <30) would
|
||||
otherwise get an effective age threshold of zero, which would race the
|
||||
user's own just-started container creation. Floor at 60s × 2 = 120s."""
|
||||
_reset_reaper_gate()
|
||||
captured_args = {}
|
||||
|
||||
def _fake_reap(**kwargs):
|
||||
captured_args.update(kwargs)
|
||||
return 0
|
||||
|
||||
monkeypatch.setenv("TERMINAL_LIFETIME_SECONDS", "0")
|
||||
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
|
||||
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
|
||||
|
||||
assert captured_args.get("max_age_seconds") == 120, (
|
||||
f"expected floored 60 × 2 = 120, got {captured_args.get('max_age_seconds')}"
|
||||
)
|
||||
|
||||
|
||||
def test_maybe_reap_passes_current_profile_as_filter(monkeypatch):
|
||||
"""The reaper must be scoped to the current Hermes profile — a research
|
||||
profile must NEVER reap default's containers. Verifies the
|
||||
profile-filter wiring."""
|
||||
_reset_reaper_gate()
|
||||
captured_args = {}
|
||||
|
||||
def _fake_reap(**kwargs):
|
||||
captured_args.update(kwargs)
|
||||
return 0
|
||||
|
||||
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap), \
|
||||
patch("tools.environments.docker._get_active_profile_name", return_value="research-bot"):
|
||||
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
|
||||
|
||||
assert captured_args.get("profile_filter") == "research-bot", (
|
||||
f"expected profile_filter='research-bot', got {captured_args.get('profile_filter')!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_maybe_reap_swallows_exceptions(monkeypatch):
|
||||
"""A reaper crash (docker daemon down, parse error in helper) must NOT
|
||||
block env creation. The reaper is best-effort plumbing, not a critical
|
||||
path; failures get logged at debug level and execution continues."""
|
||||
_reset_reaper_gate()
|
||||
|
||||
def _exploding_reap(**kwargs):
|
||||
raise RuntimeError("docker daemon ate the cat")
|
||||
|
||||
with patch("tools.environments.docker.reap_orphan_containers", _exploding_reap):
|
||||
# Must not raise
|
||||
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
|
||||
@@ -34,6 +34,39 @@ def test_resolve_stdio_command_falls_back_to_hermes_node_bin(tmp_path):
|
||||
assert env["PATH"].split(os.pathsep)[0] == str(node_bin)
|
||||
|
||||
|
||||
def test_resolve_stdio_command_falls_back_to_usr_local_bin():
|
||||
"""When ``npx`` isn't on the filtered PATH and isn't under ``$HERMES_HOME/node/bin``
|
||||
or ``~/.local/bin``, the resolver should still locate it at ``/usr/local/bin/npx``.
|
||||
|
||||
This is the canonical install location for Node on Linux from-source builds,
|
||||
the upstream ``node:bookworm-slim`` image (which the Hermes Docker image
|
||||
copies ``node + npm + corepack`` from since #4977), and macOS Homebrew on
|
||||
Intel. Without this candidate, MCP servers run with an ``env.PATH`` that
|
||||
omits ``/usr/local/bin`` (common when users hand-author PATH for sandboxing)
|
||||
fail with ENOENT at ``execvp``.
|
||||
"""
|
||||
target = os.path.join(os.sep, "usr", "local", "bin", "npx")
|
||||
|
||||
# Pretend ONLY the /usr/local/bin/npx candidate exists and is executable —
|
||||
# the other candidates ($HERMES_HOME/node/bin/npx and ~/.local/bin/npx)
|
||||
# should fail isfile() and the resolver must fall through to /usr/local/bin.
|
||||
def _fake_isfile(path):
|
||||
return path == target
|
||||
|
||||
def _fake_access(path, _mode):
|
||||
return path == target
|
||||
|
||||
with patch("tools.mcp_tool.shutil.which", return_value=None), \
|
||||
patch("tools.mcp_tool.os.path.isfile", side_effect=_fake_isfile), \
|
||||
patch("tools.mcp_tool.os.access", side_effect=_fake_access):
|
||||
command, env = _resolve_stdio_command("npx", {"PATH": "/opt/data/bin:/usr/bin:/bin"})
|
||||
|
||||
assert command == target
|
||||
# /usr/local/bin must be prepended so npx's shebang (`/usr/bin/env node`)
|
||||
# can find node in the same directory.
|
||||
assert env["PATH"].split(os.pathsep)[0] == os.path.dirname(target)
|
||||
|
||||
|
||||
def test_resolve_stdio_command_respects_explicit_empty_path():
|
||||
seen_paths = []
|
||||
|
||||
|
||||
@@ -378,9 +378,12 @@ class TestSendMessageTool:
|
||||
)
|
||||
|
||||
def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path, monkeypatch):
|
||||
# This test exercises the strict-allowlist path; disable recency trust
|
||||
# so the freshly-written tmp_path file is not auto-accepted by the
|
||||
# trust window. (Recency trust is covered in test_platform_base.py.)
|
||||
# This test exercises the strict-allowlist path; force strict mode on
|
||||
# and disable recency trust so the freshly-written tmp_path file is
|
||||
# not auto-accepted by the trust window. (Recency trust is covered
|
||||
# in test_platform_base.py. The public default flipped to non-strict
|
||||
# in 2026-05; this test pins strict on explicitly.)
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
config, telegram_cfg = _make_config()
|
||||
secret = tmp_path / "secret.pdf"
|
||||
|
||||
@@ -472,6 +472,68 @@ class TestSkillsShSource:
|
||||
requested_urls = [call.args[0] for call in mock_get.call_args_list]
|
||||
assert root_url not in requested_urls
|
||||
|
||||
@patch("tools.skills_hub._write_index_cache")
|
||||
@patch("tools.skills_hub._read_index_cache", return_value=None)
|
||||
@patch("tools.skills_hub.httpx.get")
|
||||
def test_empty_query_walks_sitemap_not_homepage(
|
||||
self, mock_get, _mock_read_cache, _mock_write_cache,
|
||||
):
|
||||
"""Empty query must walk the full sitemap.
|
||||
|
||||
Regression for skills.sh shipping ~858/20000 skills: the previous
|
||||
empty-query path scraped the homepage's featured strip (~200 entries),
|
||||
and build_skills_index.py supplemented it with 28 popular keyword
|
||||
searches to drag the count to ~850. The sitemap walker hits the
|
||||
full ~20k catalog in one pass.
|
||||
"""
|
||||
index_xml = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<sitemap><loc>https://www.skills.sh/sitemap-misc.xml</loc></sitemap>
|
||||
<sitemap><loc>https://www.skills.sh/sitemap-skills-1.xml</loc></sitemap>
|
||||
<sitemap><loc>https://www.skills.sh/sitemap-skills-2.xml</loc></sitemap>
|
||||
</sitemapindex>"""
|
||||
skills_1_xml = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url><loc>https://www.skills.sh/anthropics/skills/frontend-design</loc></url>
|
||||
<url><loc>https://www.skills.sh/anthropics/skills/pdf</loc></url>
|
||||
<url><loc>https://www.skills.sh/vercel-labs/agent-skills/react-best-practices</loc></url>
|
||||
</urlset>"""
|
||||
skills_2_xml = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url><loc>https://www.skills.sh/microsoft/azure-skills/azure-ai</loc></url>
|
||||
<url><loc>https://www.skills.sh/anthropics/skills/frontend-design</loc></url>
|
||||
</urlset>"""
|
||||
|
||||
def side_effect(url, *args, **kwargs):
|
||||
resp = MagicMock(status_code=200)
|
||||
if url.endswith("/sitemap.xml"):
|
||||
resp.text = index_xml
|
||||
elif "sitemap-skills-1" in url:
|
||||
resp.text = skills_1_xml
|
||||
elif "sitemap-skills-2" in url:
|
||||
resp.text = skills_2_xml
|
||||
else:
|
||||
resp.status_code = 404
|
||||
resp.text = ""
|
||||
return resp
|
||||
|
||||
mock_get.side_effect = side_effect
|
||||
|
||||
results = self._source().search("", limit=0)
|
||||
|
||||
# 4 unique skills (the frontend-design dup across sitemaps collapsed).
|
||||
assert len(results) == 4
|
||||
identifiers = {r.identifier for r in results}
|
||||
assert identifiers == {
|
||||
"skills-sh/anthropics/skills/frontend-design",
|
||||
"skills-sh/anthropics/skills/pdf",
|
||||
"skills-sh/vercel-labs/agent-skills/react-best-practices",
|
||||
"skills-sh/microsoft/azure-skills/azure-ai",
|
||||
}
|
||||
# Homepage was NOT fetched — the sitemap path is taken on empty query.
|
||||
urls_called = [call.args[0] for call in mock_get.call_args_list]
|
||||
assert not any(u == "https://skills.sh" or u == "https://skills.sh/" for u in urls_called)
|
||||
|
||||
|
||||
class TestFindSkillInRepoTree:
|
||||
"""Tests for GitHubSource._find_skill_in_repo_tree."""
|
||||
|
||||
@@ -224,3 +224,39 @@ def test_docker_env_is_bridged_everywhere():
|
||||
assert "docker_env" in _gateway_env_map_keys()
|
||||
assert "docker_env" in _save_config_env_sync_keys()
|
||||
assert "TERMINAL_DOCKER_ENV" in _terminal_tool_env_var_names()
|
||||
|
||||
|
||||
def test_docker_persist_across_processes_is_bridged_everywhere():
|
||||
"""Regression pin for the cross-process container reuse toggle.
|
||||
|
||||
``terminal.docker_persist_across_processes`` (issue #20561) controls
|
||||
whether ``DockerEnvironment.__init__`` probes for and reuses an existing
|
||||
labeled container at startup, and whether ``cleanup()`` removes the
|
||||
container on Hermes exit or just stops it (keeping it for the next
|
||||
process). Same four-bridge invariant as docker_run_as_host_user /
|
||||
docker_env / docker_mount_cwd_to_workspace — drift between any of the
|
||||
four sites means ``terminal.docker_persist_across_processes: false`` in
|
||||
config.yaml silently does nothing for that entry point, leaving the
|
||||
user unable to opt out of the documented "ONE long-lived container
|
||||
shared across sessions" behavior.
|
||||
"""
|
||||
assert "docker_persist_across_processes" in _cli_env_map_keys()
|
||||
assert "docker_persist_across_processes" in _gateway_env_map_keys()
|
||||
assert "docker_persist_across_processes" in _save_config_env_sync_keys()
|
||||
assert "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES" in _terminal_tool_env_var_names()
|
||||
|
||||
|
||||
def test_docker_orphan_reaper_is_bridged_everywhere():
|
||||
"""Regression pin for the startup orphan reaper toggle (issue #20561).
|
||||
|
||||
``terminal.docker_orphan_reaper`` controls whether Hermes sweeps stale
|
||||
Exited containers from prior SIGKILL'd processes at startup. Same
|
||||
four-site bridge invariant — drift means
|
||||
``terminal.docker_orphan_reaper: false`` silently does nothing for one
|
||||
entry point, and the reaper either runs when the operator disabled it
|
||||
or fails to run when they enabled it.
|
||||
"""
|
||||
assert "docker_orphan_reaper" in _cli_env_map_keys()
|
||||
assert "docker_orphan_reaper" in _gateway_env_map_keys()
|
||||
assert "docker_orphan_reaper" in _save_config_env_sync_keys()
|
||||
assert "TERMINAL_DOCKER_ORPHAN_REAPER" in _terminal_tool_env_var_names()
|
||||
|
||||
@@ -44,11 +44,17 @@ def server(hermes_home):
|
||||
):
|
||||
mod = importlib.import_module("tui_gateway.server")
|
||||
yield mod
|
||||
# Reset module-level session state without re-importing. importlib.reload
|
||||
# would re-register the module's atexit hooks (ThreadPoolExecutor
|
||||
# shutdown, _shutdown_sessions); the duplicates race the stderr
|
||||
# buffer at interpreter shutdown and surface as Fatal Python error:
|
||||
# _enter_buffered_busy. Clearing the per-session dicts gives the
|
||||
# next test a clean slate; _methods is NOT cleared because it's
|
||||
# populated at module import time and re-registration only happens
|
||||
# via reload (which we don't do).
|
||||
mod._sessions.clear()
|
||||
mod._pending.clear()
|
||||
mod._answers.clear()
|
||||
mod._methods.clear()
|
||||
importlib.reload(mod)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
||||
@@ -30,11 +30,17 @@ def server():
|
||||
import importlib
|
||||
mod = importlib.import_module("tui_gateway.server")
|
||||
yield mod
|
||||
# Reset module-level session state without re-importing. importlib.reload
|
||||
# would re-register the module's atexit hooks (ThreadPoolExecutor
|
||||
# shutdown, _shutdown_sessions); the duplicates race the stderr
|
||||
# buffer at interpreter shutdown and surface as Fatal Python error:
|
||||
# _enter_buffered_busy. Clearing the per-session dicts gives the
|
||||
# next test a clean slate; _methods is NOT cleared because it's
|
||||
# populated at module import time and re-registration only happens
|
||||
# via reload (which we don't do).
|
||||
mod._sessions.clear()
|
||||
mod._pending.clear()
|
||||
mod._answers.clear()
|
||||
mod._methods.clear()
|
||||
importlib.reload(mod)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
||||
@@ -34,11 +34,17 @@ def server():
|
||||
|
||||
mod = importlib.import_module("tui_gateway.server")
|
||||
yield mod
|
||||
# Reset module-level session state without re-importing. importlib.reload
|
||||
# would re-register the module's atexit hooks (ThreadPoolExecutor
|
||||
# shutdown, _shutdown_sessions); the duplicates race the stderr
|
||||
# buffer at interpreter shutdown and surface as Fatal Python error:
|
||||
# _enter_buffered_busy. Clearing the per-session dicts gives the
|
||||
# next test a clean slate; _methods is NOT cleared because it's
|
||||
# populated at module import time and re-registration only happens
|
||||
# via reload (which we don't do).
|
||||
mod._sessions.clear()
|
||||
mod._pending.clear()
|
||||
mod._answers.clear()
|
||||
mod._methods.clear()
|
||||
importlib.reload(mod)
|
||||
|
||||
|
||||
def test_init_session_attaches_background_review_callback(server, monkeypatch):
|
||||
|
||||
+427
-40
@@ -98,6 +98,167 @@ def _load_hermes_env_vars() -> dict[str, str]:
|
||||
return {}
|
||||
|
||||
|
||||
# Docker label values must match [a-zA-Z0-9_.-] and stay ≤63 chars to round-trip
|
||||
# safely through `docker ps --filter label=key=value`. Profile and task names
|
||||
# can technically contain other characters; sanitize defensively.
|
||||
_LABEL_VALUE_OK_RE = re.compile(r"[^A-Za-z0-9_.-]")
|
||||
|
||||
|
||||
def _sanitize_label_value(value: str) -> str:
|
||||
"""Coerce *value* into a Docker label-safe form (alnum + ``_.-``, ≤63 chars).
|
||||
|
||||
Empty or all-invalid inputs collapse to ``"unknown"`` so the resulting
|
||||
label is always queryable. Used at container-create time; never round-trip
|
||||
a sanitized value back into application logic.
|
||||
"""
|
||||
if not isinstance(value, str) or not value:
|
||||
return "unknown"
|
||||
cleaned = _LABEL_VALUE_OK_RE.sub("_", value)
|
||||
cleaned = cleaned[:63] or "unknown"
|
||||
return cleaned
|
||||
|
||||
|
||||
def _get_active_profile_name() -> str:
|
||||
"""Return the active Hermes profile name, or ``"default"`` on any error.
|
||||
|
||||
Resolved at container-create time so a single container is permanently
|
||||
tagged with the profile that created it. Profile switches inside the
|
||||
same process don't retroactively relabel running containers.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.profiles import get_active_profile_name
|
||||
|
||||
return get_active_profile_name() or "default"
|
||||
except Exception:
|
||||
return "default"
|
||||
|
||||
|
||||
def reap_orphan_containers(
|
||||
*,
|
||||
max_age_seconds: int = 600,
|
||||
profile_filter: str | None = None,
|
||||
docker_exe: str | None = None,
|
||||
) -> int:
|
||||
"""Remove stale hermes-tagged containers left behind by prior processes.
|
||||
|
||||
Targets containers that match all of:
|
||||
|
||||
* ``label=hermes-agent=1`` (created by this codebase)
|
||||
* ``status=exited`` (running containers are NEVER reaped — they may
|
||||
belong to a sibling Hermes process whose reuse path will pick them
|
||||
up; killing them would crash the sibling mid-command)
|
||||
* (optional) ``label=hermes-profile=<profile_filter>`` (sweep only the
|
||||
caller's profile by default; a hermes process in profile A must not
|
||||
tear down profile B's containers)
|
||||
* ``State.FinishedAt`` older than *max_age_seconds* ago (so a sibling
|
||||
process that just exited and is about to be replaced doesn't get
|
||||
its container yanked out from under it)
|
||||
|
||||
Returns the number of containers removed. Best-effort: any failure
|
||||
(docker daemon unreachable, slow inspect, parse error) is logged at
|
||||
debug level and the function returns whatever it managed before the
|
||||
failure. Safe to call repeatedly; idempotent.
|
||||
|
||||
Issue #20561 — this is the safety net for SIGKILL / OOM / crashed
|
||||
terminal exits that bypass the ``atexit`` cleanup hook. Without it,
|
||||
even with the cleanup-fix in the prior commit, a hard-killed Hermes
|
||||
process leaves its container behind permanently because there's no
|
||||
subsequent Hermes process scheduled to reuse that exact (task, profile)
|
||||
pair.
|
||||
"""
|
||||
docker = docker_exe or find_docker() or "docker"
|
||||
filters = ["--filter", "label=hermes-agent=1", "--filter", "status=exited"]
|
||||
if profile_filter:
|
||||
filters.extend(["--filter", f"label=hermes-profile={_sanitize_label_value(profile_filter)}"])
|
||||
|
||||
try:
|
||||
listing = subprocess.run(
|
||||
[docker, "ps", "-a", *filters, "--format", "{{.ID}}"],
|
||||
capture_output=True, text=True, timeout=15, check=False,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.debug("orphan reaper docker ps failed: %s", e)
|
||||
return 0
|
||||
if listing.returncode != 0:
|
||||
logger.debug(
|
||||
"orphan reaper docker ps returned %d: %s",
|
||||
listing.returncode, listing.stderr.strip(),
|
||||
)
|
||||
return 0
|
||||
|
||||
candidate_ids = [ln.strip() for ln in listing.stdout.splitlines() if ln.strip()]
|
||||
if not candidate_ids:
|
||||
return 0
|
||||
|
||||
# Inspect each candidate to get FinishedAt; reap only those exited
|
||||
# long enough ago. Doing this per-container (rather than bulk inspect)
|
||||
# keeps the failure blast radius to one container at a time.
|
||||
import datetime
|
||||
now = datetime.datetime.now(datetime.timezone.utc)
|
||||
removed = 0
|
||||
for cid in candidate_ids:
|
||||
finished_at = _container_finished_at(docker, cid)
|
||||
if finished_at is None:
|
||||
# Couldn't determine age — be conservative and leave it alone.
|
||||
continue
|
||||
age = (now - finished_at).total_seconds()
|
||||
if age < max_age_seconds:
|
||||
continue
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[docker, "rm", "-f", cid],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
removed += 1
|
||||
logger.info(
|
||||
"Reaped orphan container %s (exited %d seconds ago)",
|
||||
cid[:12], int(age),
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"docker rm -f %s failed: %s",
|
||||
cid[:12], result.stderr.strip(),
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.debug("orphan reaper docker rm %s failed: %s", cid[:12], e)
|
||||
return removed
|
||||
|
||||
|
||||
def _container_finished_at(docker_exe: str, container_id: str):
|
||||
"""Parse ``docker inspect`` FinishedAt for *container_id*.
|
||||
|
||||
Returns a timezone-aware datetime, or ``None`` if the field is missing,
|
||||
unparseable, or the zero-value ``0001-01-01T00:00:00Z`` Docker emits
|
||||
for never-finished containers. ``None`` means "don't reap" — the caller
|
||||
leaves the container alone.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[docker_exe, "inspect", "--format", "{{.State.FinishedAt}}", container_id],
|
||||
capture_output=True, text=True, timeout=10, check=False,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.debug("orphan reaper docker inspect %s failed: %s", container_id[:12], e)
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
raw = result.stdout.strip()
|
||||
if not raw or raw.startswith("0001-01-01"):
|
||||
return None
|
||||
# Docker emits RFC3339 with nanoseconds (e.g. "2026-05-28T13:45:00.123456789Z").
|
||||
# Python's fromisoformat handles microseconds but not nanoseconds; trim.
|
||||
import re as _re
|
||||
raw = _re.sub(r"(\.\d{6})\d+", r"\1", raw)
|
||||
raw = raw.replace("Z", "+00:00")
|
||||
try:
|
||||
import datetime
|
||||
return datetime.datetime.fromisoformat(raw)
|
||||
except ValueError as e:
|
||||
logger.debug("could not parse FinishedAt %r for %s: %s", raw, container_id[:12], e)
|
||||
return None
|
||||
|
||||
|
||||
def find_docker() -> Optional[str]:
|
||||
"""Locate the docker (or podman) CLI binary.
|
||||
|
||||
@@ -304,15 +465,18 @@ class DockerEnvironment(BaseEnvironment):
|
||||
auto_mount_cwd: bool = False,
|
||||
run_as_host_user: bool = False,
|
||||
extra_args: list = None,
|
||||
persist_across_processes: bool = True,
|
||||
):
|
||||
if cwd == "~":
|
||||
cwd = "/root"
|
||||
super().__init__(cwd=cwd, timeout=timeout)
|
||||
self._persistent = persistent_filesystem
|
||||
self._persist_across_processes = persist_across_processes
|
||||
self._task_id = task_id
|
||||
self._forward_env = _normalize_forward_env_names(forward_env)
|
||||
self._env = _normalize_env_dict(env)
|
||||
self._container_id: Optional[str] = None
|
||||
self._labels: dict[str, str] = {}
|
||||
logger.info(f"DockerEnvironment volumes: {volumes}")
|
||||
# Ensure volumes is a list (config.yaml could be malformed)
|
||||
if volumes is not None and not isinstance(volumes, list):
|
||||
@@ -506,25 +670,88 @@ class DockerEnvironment(BaseEnvironment):
|
||||
|
||||
# Start the container directly via `docker run -d`.
|
||||
container_name = f"hermes-{uuid.uuid4().hex[:8]}"
|
||||
run_cmd = [
|
||||
self._docker_exe, "run", "-d",
|
||||
"--init", # tini/catatonit as PID 1 — reaps zombie children
|
||||
"--name", container_name,
|
||||
"-w", cwd,
|
||||
*all_run_args,
|
||||
image,
|
||||
"sleep", "infinity", # no fixed lifetime — idle reaper handles cleanup
|
||||
# Labels make hermes-created containers identifiable to:
|
||||
# * the orphan reaper (`hermes-agent=1` for the global sweep filter)
|
||||
# * future cross-process reuse (`hermes-task-id`, `hermes-profile`)
|
||||
# * operators running `docker ps --filter label=hermes-agent=1`
|
||||
# Values are limited to the safe character set defined by
|
||||
# _sanitize_label_value(); the active Hermes profile is captured at
|
||||
# container-start time and never changes for the container's lifetime.
|
||||
profile_name = _sanitize_label_value(_get_active_profile_name())
|
||||
task_label = _sanitize_label_value(task_id)
|
||||
label_args = [
|
||||
"--label", "hermes-agent=1",
|
||||
"--label", f"hermes-task-id={task_label}",
|
||||
"--label", f"hermes-profile={profile_name}",
|
||||
]
|
||||
logger.debug(f"Starting container: {' '.join(run_cmd)}")
|
||||
result = subprocess.run(
|
||||
run_cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120, # image pull may take a while
|
||||
check=True,
|
||||
)
|
||||
self._container_id = result.stdout.strip()
|
||||
logger.info(f"Started container {container_name} ({self._container_id[:12]})")
|
||||
self._labels = {
|
||||
"hermes-agent": "1",
|
||||
"hermes-task-id": task_label,
|
||||
"hermes-profile": profile_name,
|
||||
}
|
||||
|
||||
# Cross-process container reuse (issue #20561 — docs claim "ONE long-lived
|
||||
# container shared across sessions"). If a prior Hermes process
|
||||
# already started a container for this (task_id, profile) and it
|
||||
# still exists, attach to it instead of starting a fresh one. This
|
||||
# restores the documented contract; opt out via
|
||||
# ``terminal.docker_persist_across_processes: false``.
|
||||
#
|
||||
# Reuse matches on labels only — we deliberately do NOT compare image
|
||||
# / mounts / resources. Operators who need a fresh container after
|
||||
# changing those settings should set ``docker_persist_across_processes:
|
||||
# false`` (or run ``docker rm -f`` against the labeled container) to
|
||||
# force a clean start.
|
||||
reused = False
|
||||
if persist_across_processes:
|
||||
existing = self._find_reusable_container(task_label, profile_name)
|
||||
if existing is not None:
|
||||
container_id, state = existing
|
||||
self._container_id = container_id
|
||||
if state != "running":
|
||||
try:
|
||||
subprocess.run(
|
||||
[self._docker_exe, "start", container_id],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
check=True,
|
||||
)
|
||||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
||||
logger.warning(
|
||||
"Failed to start existing container %s (state=%s): "
|
||||
"%s — falling back to a fresh container.",
|
||||
container_id[:12], state, e,
|
||||
)
|
||||
self._container_id = None
|
||||
if self._container_id:
|
||||
logger.info(
|
||||
"Reusing container %s (task=%s, profile=%s, prior state=%s)",
|
||||
container_id[:12], task_label, profile_name, state,
|
||||
)
|
||||
reused = True
|
||||
|
||||
if not reused:
|
||||
run_cmd = [
|
||||
self._docker_exe, "run", "-d",
|
||||
"--init", # tini/catatonit as PID 1 — reaps zombie children
|
||||
"--name", container_name,
|
||||
*label_args,
|
||||
"-w", cwd,
|
||||
*all_run_args,
|
||||
image,
|
||||
"sleep", "infinity", # no fixed lifetime — idle reaper handles cleanup
|
||||
]
|
||||
logger.debug(f"Starting container: {' '.join(run_cmd)}")
|
||||
result = subprocess.run(
|
||||
run_cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120, # image pull may take a while
|
||||
check=True,
|
||||
)
|
||||
self._container_id = result.stdout.strip()
|
||||
logger.info(f"Started container {container_name} ({self._container_id[:12]})")
|
||||
|
||||
# Build the init-time env forwarding args (used only by init_session
|
||||
# to inject host env vars into the snapshot; subsequent commands get
|
||||
@@ -629,31 +856,191 @@ class DockerEnvironment(BaseEnvironment):
|
||||
logger.debug("Docker --storage-opt support: %s", _storage_opt_ok)
|
||||
return _storage_opt_ok
|
||||
|
||||
def cleanup(self):
|
||||
"""Stop and remove the container. Bind-mount dirs persist if persistent=True."""
|
||||
if self._container_id:
|
||||
try:
|
||||
# Stop in background so cleanup doesn't block
|
||||
stop_cmd = (
|
||||
f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
|
||||
f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
|
||||
)
|
||||
subprocess.Popen(stop_cmd, shell=True)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to stop container %s: %s", self._container_id, e)
|
||||
def _find_reusable_container(self, task_label: str, profile_label: str) -> Optional[tuple[str, str]]:
|
||||
"""Look for an existing container labeled for this (task, profile).
|
||||
|
||||
Returns ``(container_id, state)`` on hit, ``None`` on miss / on any
|
||||
failure (including ``docker ps`` itself failing). State is one of the
|
||||
values Docker reports via ``{{.State}}`` — e.g. ``running``, ``exited``,
|
||||
``created``, ``paused``, ``restarting``, ``dead``. The caller decides
|
||||
whether the state warrants ``docker start`` before reuse.
|
||||
|
||||
Restricted to the docker-stored label set this class creates; never
|
||||
matches containers that happened to be named ``hermes-*`` but were
|
||||
started by some other tool.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
self._docker_exe, "ps", "-a",
|
||||
"--filter", "label=hermes-agent=1",
|
||||
"--filter", f"label=hermes-task-id={task_label}",
|
||||
"--filter", f"label=hermes-profile={profile_label}",
|
||||
"--format", "{{.ID}}\t{{.State}}",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
check=False,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.debug("docker ps probe failed: %s — will start a fresh container", e)
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
"docker ps probe returned %d: %s — will start a fresh container",
|
||||
result.returncode, result.stderr.strip(),
|
||||
)
|
||||
return None
|
||||
lines = [ln.strip() for ln in result.stdout.splitlines() if ln.strip()]
|
||||
if not lines:
|
||||
return None
|
||||
# Multiple matches are unusual (one (task, profile) should produce one
|
||||
# container) but can happen if a previous Hermes process crashed
|
||||
# mid-cleanup. Prefer a running one if present; otherwise pick the
|
||||
# first listed. Stale duplicates get reaped by the orphan-reaper in a
|
||||
# follow-up commit; we don't try to be heroic about them here.
|
||||
running = None
|
||||
first = None
|
||||
for ln in lines:
|
||||
parts = ln.split("\t", 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
cid, state = parts[0], parts[1].lower()
|
||||
if first is None:
|
||||
first = (cid, state)
|
||||
if state == "running" and running is None:
|
||||
running = (cid, state)
|
||||
return running or first
|
||||
|
||||
def cleanup(self, *, force_remove: bool = False):
|
||||
"""Tear down the container according to persist mode and *force_remove*.
|
||||
|
||||
Persist-mode (``persist_across_processes=True``, the default) leaves the
|
||||
container **running** untouched. The docs promise "ONE long-lived
|
||||
container shared across sessions" and stopping it on every Hermes exit
|
||||
breaks that promise:
|
||||
|
||||
* Background processes inside the container (``npm run dev``, watchers,
|
||||
long-running pytest) get killed every time the user runs ``/quit``.
|
||||
* Every reuse requires ``docker start`` + waiting for the container to
|
||||
come back up, adding 1–2s to the first tool call of the new session.
|
||||
* The user-visible difference between "ONE long-lived container" and
|
||||
"a new container that happens to share state" is exactly this:
|
||||
processes survive in the former, die in the latter.
|
||||
|
||||
Resource reclamation for the persist-mode case lives in the
|
||||
``reap_orphan_containers()`` path (see issue #20561 commit 3): if no
|
||||
Hermes process touches a labeled container for ``2 × lifetime_seconds``
|
||||
it gets ``docker rm -f``'d at the next Hermes startup. That covers the
|
||||
SIGKILL / OOM / abandoned-laptop cases without us needing to stop the
|
||||
container on every graceful exit.
|
||||
|
||||
Opt-out mode (``persist_across_processes=False``) still does
|
||||
``docker stop`` + ``docker rm -f`` on every cleanup, matching the
|
||||
pre-PR behavior for users who explicitly want per-process isolation.
|
||||
|
||||
``force_remove=True`` overrides persist mode and always tears the
|
||||
container down (``docker stop`` + ``docker rm -f``). This is the
|
||||
explicit-teardown path for ``/reset``, ``cleanup_vm(task_id)``-driven
|
||||
resets, or any caller that wants a guaranteed fresh container on next
|
||||
``DockerEnvironment(task_id=...)``. No current caller passes
|
||||
``force_remove=True``; the parameter is here so the explicit-teardown
|
||||
semantics can be wired up later without changing this method's
|
||||
signature.
|
||||
|
||||
Cleanup runs on a daemon thread with bounded ``subprocess.run`` calls
|
||||
(not the racy ``Popen(... &)`` pattern from before PR #33645). The
|
||||
atexit hook in ``tools/terminal_tool.py`` waits up to 15s for the
|
||||
thread to finish before the interpreter exits, so ``docker stop`` /
|
||||
``docker rm`` actually completes when we do trigger it.
|
||||
"""
|
||||
container_id = self._container_id
|
||||
if not container_id:
|
||||
# Still drop the bind-mount dirs if any were allocated and we're
|
||||
# NOT in persist mode (persist mode preserves them).
|
||||
if not self._persistent:
|
||||
# Also schedule removal (stop only leaves it as stopped)
|
||||
try:
|
||||
subprocess.Popen(
|
||||
f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
|
||||
shell=True,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
self._container_id = None
|
||||
for d in (self._workspace_dir, self._home_dir):
|
||||
if d:
|
||||
shutil.rmtree(d, ignore_errors=True)
|
||||
return
|
||||
|
||||
if not self._persistent:
|
||||
# Decide what to actually do. Three cases:
|
||||
#
|
||||
# force_remove=True → stop + rm (explicit teardown)
|
||||
# persist_across_processes=True → no-op (leave container running)
|
||||
# persist_across_processes=False → stop + rm (per-process isolation)
|
||||
#
|
||||
# The persist-mode no-op is the issue-#20561 contract: the container
|
||||
# outlives Hermes processes, processes inside it stay alive, and
|
||||
# reuse on next startup is instant.
|
||||
if force_remove:
|
||||
should_stop = True
|
||||
should_remove = True
|
||||
elif self._persist_across_processes:
|
||||
# No-op for the container. Drop the in-process handle so a fresh
|
||||
# __init__ will re-probe via labels (and find the running
|
||||
# container) instead of trying to reuse a stale Python reference.
|
||||
self._container_id = None
|
||||
return
|
||||
else:
|
||||
should_stop = True
|
||||
should_remove = True
|
||||
|
||||
# Capture state needed by the worker before we null out the attrs —
|
||||
# the worker thread can outlive ``self``.
|
||||
docker_exe = self._docker_exe
|
||||
log_id = container_id[:12]
|
||||
|
||||
def _do_cleanup() -> None:
|
||||
if should_stop:
|
||||
try:
|
||||
subprocess.run(
|
||||
[docker_exe, "stop", "-t", "10", container_id],
|
||||
capture_output=True, timeout=30,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("docker stop %s timed out / failed: %s", log_id, e)
|
||||
if should_remove:
|
||||
try:
|
||||
subprocess.run(
|
||||
[docker_exe, "rm", "-f", container_id],
|
||||
capture_output=True, timeout=30,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("docker rm -f %s failed: %s", log_id, e)
|
||||
|
||||
# Daemon thread: doesn't block interpreter exit (atexit returns
|
||||
# promptly), but unlike the old ``Popen(... &)`` shell trick the
|
||||
# Python-level join semantics let the thread actually run to
|
||||
# completion if the interpreter is still alive. atexit registers
|
||||
# ``_atexit_cleanup`` in terminal_tool.py which waits up to ~60s for
|
||||
# outstanding cleanups, so most exits complete the work cleanly.
|
||||
import threading
|
||||
t = threading.Thread(target=_do_cleanup, daemon=True, name=f"hermes-cleanup-{log_id}")
|
||||
t.start()
|
||||
self._cleanup_thread = t
|
||||
self._container_id = None
|
||||
|
||||
# Bind-mount dir teardown only runs when we actually removed the
|
||||
# container (the dirs are the container's filesystem state; keeping
|
||||
# them around with no container would orphan the data on disk).
|
||||
if should_remove and not self._persistent:
|
||||
for d in (self._workspace_dir, self._home_dir):
|
||||
if d:
|
||||
shutil.rmtree(d, ignore_errors=True)
|
||||
|
||||
def wait_for_cleanup(self, timeout: float = 30.0) -> bool:
|
||||
"""Block up to *timeout* seconds for the cleanup worker thread.
|
||||
|
||||
Returns ``True`` if the thread finished (or no thread was started),
|
||||
``False`` on timeout. The atexit hook in terminal_tool.py calls this
|
||||
on every active environment so docker stop/rm actually completes
|
||||
before the Python process exits — without this, ``hermes /quit``
|
||||
races the interpreter shutdown and leaves stopped containers behind.
|
||||
"""
|
||||
thread = getattr(self, "_cleanup_thread", None)
|
||||
if thread is None or not thread.is_alive():
|
||||
return True
|
||||
thread.join(timeout=timeout)
|
||||
return not thread.is_alive()
|
||||
|
||||
@@ -422,6 +422,17 @@ def _resolve_stdio_command(command: str, env: dict) -> tuple[str, dict]:
|
||||
candidates = [
|
||||
os.path.join(hermes_home, "node", "bin", resolved_command),
|
||||
os.path.join(os.path.expanduser("~"), ".local", "bin", resolved_command),
|
||||
# /usr/local/bin is the canonical install location for Node on
|
||||
# Linux from-source builds, the upstream node:bookworm-slim
|
||||
# image (which the Hermes Docker image copies node + npm +
|
||||
# corepack from since #4977), and macOS Homebrew on Intel.
|
||||
# Without this candidate, any MCP server configured with an
|
||||
# env.PATH that omits /usr/local/bin (a common pattern when
|
||||
# users hand-author PATH for sandboxing) fails with ENOENT
|
||||
# at execvp, and a naive symlink workaround into the user's
|
||||
# PATH only fails one layer deeper because npx's shebang
|
||||
# re-execs /usr/bin/env node which needs the same directory.
|
||||
os.path.join(os.sep, "usr", "local", "bin", resolved_command),
|
||||
]
|
||||
for candidate in candidates:
|
||||
if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
|
||||
|
||||
@@ -139,7 +139,7 @@ SEND_MESSAGE_SCHEMA = {
|
||||
},
|
||||
"message": {
|
||||
"type": "string",
|
||||
"description": "The message text to send. To send an image or file, include MEDIA:<local_path> for a file under a Hermes media cache or HERMES_MEDIA_ALLOW_DIRS — the platform will deliver it as a native media attachment."
|
||||
"description": "The message text to send. To send an image or file, include MEDIA:<local_path> (e.g. 'MEDIA:/tmp/report.pdf') in the message — the platform will deliver it as a native media attachment."
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
|
||||
+105
-1
@@ -1217,6 +1217,16 @@ class SkillsShSource(SkillSource):
|
||||
|
||||
BASE_URL = "https://skills.sh"
|
||||
SEARCH_URL = f"{BASE_URL}/api/search"
|
||||
# Sitemap index — the real catalog source. The homepage scrape only
|
||||
# exposes a curated featured strip (~200 entries); the sitemap covers
|
||||
# the full ~20k+ catalog. https://www.skills.sh/sitemap.xml points at
|
||||
# sitemap-skills-1.xml + sitemap-skills-2.xml, each up to 10k URLs.
|
||||
SITEMAP_INDEX_URL = "https://www.skills.sh/sitemap.xml"
|
||||
_SITEMAP_LOC_RE = re.compile(r"<loc>([^<]+)</loc>", re.IGNORECASE)
|
||||
_SITEMAP_SKILL_RE = re.compile(
|
||||
r"^https?://(?:www\.)?skills\.sh/(?P<owner>[^/]+)/(?P<repo>[^/]+)/(?P<skill>[^/]+)/?$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_SKILL_LINK_RE = re.compile(r'href=["\']/(?P<id>(?!agents/|_next/|api/)[^"\'/]+/[^"\'/]+/[^"\'/]+)["\']')
|
||||
_INSTALL_CMD_RE = re.compile(
|
||||
r'npx\s+skills\s+add\s+(?P<repo>https?://github\.com/[^\s<]+|[^\s<]+)'
|
||||
@@ -1246,7 +1256,10 @@ class SkillsShSource(SkillSource):
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
|
||||
if not query.strip():
|
||||
return self._featured_skills(limit)
|
||||
# Empty query = bulk catalog dump (what build_skills_index.py
|
||||
# calls with). The homepage scrape only sees ~200 featured
|
||||
# entries; the sitemap walks the full ~20k+ catalog.
|
||||
return self._sitemap_catalog(limit)
|
||||
|
||||
cache_key = f"skills_sh_search_{hashlib.md5(f'{query}|{limit}'.encode()).hexdigest()}"
|
||||
cached = _read_index_cache(cache_key)
|
||||
@@ -1307,6 +1320,97 @@ class SkillsShSource(SkillSource):
|
||||
return self._finalize_inspect_meta(meta, canonical, detail)
|
||||
return None
|
||||
|
||||
def _sitemap_catalog(self, limit: int) -> List[SkillMeta]:
|
||||
"""Walk the skills.sh sitemap to enumerate the full catalog.
|
||||
|
||||
Cached for the standard index TTL so we don't refetch ~2 MB of
|
||||
sitemap XML per build. Falls back to ``_featured_skills`` if the
|
||||
sitemap is unreachable or empty (network failure, hostname
|
||||
change, etc.).
|
||||
"""
|
||||
cache_key = "skills_sh_sitemap_v1"
|
||||
cached = _read_index_cache(cache_key)
|
||||
if cached is not None:
|
||||
metas = [SkillMeta(**item) for item in cached]
|
||||
return metas[:limit] if limit > 0 else metas
|
||||
|
||||
# skills.sh serves the per-skill sitemaps brotli-compressed, and
|
||||
# httpx's optional brotlicffi backend has a streaming-decode bug
|
||||
# that fails on these specific payloads. Excluding "br" from
|
||||
# Accept-Encoding makes the server fall back to gzip (or
|
||||
# identity), which works on every httpx install.
|
||||
sitemap_headers = {"Accept-Encoding": "gzip"}
|
||||
|
||||
# Step 1: fetch the sitemap index → list of skill-sitemap URLs.
|
||||
skill_sitemap_urls: List[str] = []
|
||||
try:
|
||||
resp = httpx.get(
|
||||
self.SITEMAP_INDEX_URL,
|
||||
timeout=20,
|
||||
follow_redirects=True,
|
||||
headers=sitemap_headers,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return self._featured_skills(limit)
|
||||
for match in self._SITEMAP_LOC_RE.finditer(resp.text):
|
||||
loc = match.group(1).strip()
|
||||
# Sitemap index entries that point at the per-skill maps.
|
||||
if "sitemap-skills" in loc:
|
||||
skill_sitemap_urls.append(loc)
|
||||
except httpx.HTTPError:
|
||||
return self._featured_skills(limit)
|
||||
|
||||
if not skill_sitemap_urls:
|
||||
return self._featured_skills(limit)
|
||||
|
||||
# Step 2: fetch each skill sitemap and collect canonical "owner/repo/skill" IDs.
|
||||
seen: set[str] = set()
|
||||
results: List[SkillMeta] = []
|
||||
for sitemap_url in skill_sitemap_urls:
|
||||
try:
|
||||
resp = httpx.get(
|
||||
sitemap_url,
|
||||
timeout=30,
|
||||
follow_redirects=True,
|
||||
headers=sitemap_headers,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
continue
|
||||
except httpx.HTTPError:
|
||||
continue
|
||||
for loc_match in self._SITEMAP_LOC_RE.finditer(resp.text):
|
||||
url = loc_match.group(1).strip()
|
||||
m = self._SITEMAP_SKILL_RE.match(url)
|
||||
if not m:
|
||||
continue
|
||||
owner = m.group("owner")
|
||||
repo_name = m.group("repo")
|
||||
skill_name = m.group("skill")
|
||||
canonical = f"{owner}/{repo_name}/{skill_name}"
|
||||
if canonical in seen:
|
||||
continue
|
||||
seen.add(canonical)
|
||||
repo = f"{owner}/{repo_name}"
|
||||
results.append(SkillMeta(
|
||||
name=skill_name,
|
||||
description=f"Indexed by skills.sh from {repo}",
|
||||
source="skills.sh",
|
||||
identifier=self._wrap_identifier(canonical),
|
||||
trust_level=self.github.trust_level_for(canonical),
|
||||
repo=repo,
|
||||
path=skill_name,
|
||||
extra={
|
||||
"detail_url": f"{self.BASE_URL}/{canonical}",
|
||||
"repo_url": f"https://github.com/{repo}",
|
||||
},
|
||||
))
|
||||
|
||||
if not results:
|
||||
return self._featured_skills(limit)
|
||||
|
||||
_write_index_cache(cache_key, [_skill_meta_to_dict(item) for item in results])
|
||||
return results[:limit] if limit > 0 else results
|
||||
|
||||
def _featured_skills(self, limit: int) -> List[SkillMeta]:
|
||||
cache_key = "skills_sh_featured"
|
||||
cached = _read_index_cache(cache_key)
|
||||
|
||||
+143
-3
@@ -861,6 +861,78 @@ _creation_locks_lock = threading.Lock() # Protects _creation_locks dict itself
|
||||
_cleanup_thread = None
|
||||
_cleanup_running = False
|
||||
|
||||
# Once-per-process guard for the docker orphan reaper (issue #20561).
|
||||
# Set when _maybe_reap_docker_orphans first runs; concurrent _create_environment
|
||||
# calls for parallel subagents won't re-trigger the sweep.
|
||||
_docker_orphan_reaper_ran = False
|
||||
_docker_orphan_reaper_lock = threading.Lock()
|
||||
|
||||
|
||||
def _maybe_reap_docker_orphans(container_config: Dict[str, Any]) -> None:
|
||||
"""Run the docker orphan reaper once per process, if enabled.
|
||||
|
||||
Sweeps long-Exited containers labeled ``hermes-agent=1`` for the current
|
||||
profile that match the issue #20561 leak class — containers left behind
|
||||
by Hermes processes that exited without firing ``atexit`` (SIGKILL,
|
||||
OOM, terminal-window-close). The reaper is conservative by default:
|
||||
only Exited containers older than ``2 × lifetime_seconds`` and scoped to
|
||||
the current profile.
|
||||
|
||||
Gates:
|
||||
|
||||
* ``terminal.docker_orphan_reaper: false`` disables it entirely (the
|
||||
operator opted out — usually because they're running multiple
|
||||
Hermes processes in the same profile and don't trust the
|
||||
conservative defaults).
|
||||
* ``_docker_orphan_reaper_ran`` flag — sweep runs once per Python
|
||||
interpreter, not on every subagent / RL-rollout / parallel
|
||||
``terminal()`` call.
|
||||
"""
|
||||
global _docker_orphan_reaper_ran
|
||||
if not container_config.get("docker_orphan_reaper", True):
|
||||
return
|
||||
# Cheap double-checked-locking: read without the lock, take the lock
|
||||
# only on first run, recheck inside.
|
||||
if _docker_orphan_reaper_ran:
|
||||
return
|
||||
with _docker_orphan_reaper_lock:
|
||||
if _docker_orphan_reaper_ran:
|
||||
return
|
||||
_docker_orphan_reaper_ran = True
|
||||
|
||||
# 2 × lifetime_seconds gives sibling Hermes processes a generous grace
|
||||
# window. Floor at 60s so an operator with TERMINAL_LIFETIME_SECONDS=0
|
||||
# doesn't get an instant-reap that races their own setup.
|
||||
# ``container_config`` only carries container_* keys, so read
|
||||
# lifetime_seconds from the env var the rest of the module uses.
|
||||
try:
|
||||
lifetime = int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300"))
|
||||
except (TypeError, ValueError):
|
||||
lifetime = 300
|
||||
lifetime = max(60, lifetime)
|
||||
max_age = lifetime * 2
|
||||
|
||||
try:
|
||||
from tools.environments.docker import (
|
||||
reap_orphan_containers, _get_active_profile_name,
|
||||
)
|
||||
except ImportError:
|
||||
return
|
||||
try:
|
||||
profile = _get_active_profile_name()
|
||||
removed = reap_orphan_containers(
|
||||
max_age_seconds=max_age, profile_filter=profile,
|
||||
)
|
||||
if removed:
|
||||
logger.info(
|
||||
"Docker orphan reaper removed %d stale container(s) for profile %s",
|
||||
removed, profile,
|
||||
)
|
||||
except Exception as e:
|
||||
# Never fail the env-creation path because of a janitor problem.
|
||||
logger.debug("Docker orphan reaper raised: %s", e)
|
||||
|
||||
|
||||
# Per-task environment overrides registry.
|
||||
# Allows environments (e.g., TerminalBench2Env) to specify a custom Docker/Modal
|
||||
# image for a specific task_id BEFORE the agent loop starts. When the terminal or
|
||||
@@ -1024,6 +1096,22 @@ def _get_env_config() -> Dict[str, Any]:
|
||||
"docker_env": _parse_env_var("TERMINAL_DOCKER_ENV", "{}", json.loads, "valid JSON"),
|
||||
"docker_run_as_host_user": os.getenv("TERMINAL_DOCKER_RUN_AS_HOST_USER", "false").lower() in {"true", "1", "yes"},
|
||||
"docker_extra_args": _parse_env_var("TERMINAL_DOCKER_EXTRA_ARGS", "[]", json.loads, "valid JSON"),
|
||||
# Cross-process container reuse (issue #20561). The docs claim
|
||||
# "ONE long-lived container shared across sessions" — this toggle
|
||||
# makes that real by probing for a labeled container at startup and
|
||||
# attaching to it instead of always starting a fresh one. Set to
|
||||
# ``false`` for hard per-process isolation (no reuse, container is
|
||||
# removed on exit).
|
||||
"docker_persist_across_processes": os.getenv(
|
||||
"TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "true"
|
||||
).lower() in {"true", "1", "yes"},
|
||||
# Startup orphan reaper for hermes-tagged containers left behind by
|
||||
# crashed / SIGKILL'd previous processes that bypassed atexit.
|
||||
# Conservative: only sweeps Exited containers older than 2× the
|
||||
# idle-reap window AND scoped to the current profile. Issue #20561.
|
||||
"docker_orphan_reaper": os.getenv(
|
||||
"TERMINAL_DOCKER_ORPHAN_REAPER", "true"
|
||||
).lower() in {"true", "1", "yes"},
|
||||
}
|
||||
|
||||
|
||||
@@ -1072,6 +1160,13 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
||||
return _LocalEnvironment(cwd=cwd, timeout=timeout)
|
||||
|
||||
elif env_type == "docker":
|
||||
# One-shot orphan reaper: clean up labeled containers left behind by
|
||||
# prior Hermes processes that hit SIGKILL / OOM / a closed terminal
|
||||
# before the atexit cleanup hook could run. Gated to once per
|
||||
# process so concurrent _create_environment calls (parallel
|
||||
# subagents, RL benchmarks) don't run the reaper N times.
|
||||
# Disable via ``terminal.docker_orphan_reaper: false`` (issue #20561).
|
||||
_maybe_reap_docker_orphans(cc)
|
||||
return _DockerEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
cpu=cpu, memory=memory, disk=disk,
|
||||
@@ -1083,6 +1178,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
||||
env=docker_env,
|
||||
run_as_host_user=cc.get("docker_run_as_host_user", False),
|
||||
extra_args=docker_extra_args,
|
||||
persist_across_processes=cc.get("docker_persist_across_processes", True),
|
||||
)
|
||||
|
||||
elif env_type == "singularity":
|
||||
@@ -1330,8 +1426,27 @@ def cleanup_all_environments():
|
||||
return cleaned
|
||||
|
||||
|
||||
def cleanup_vm(task_id: str):
|
||||
"""Manually clean up a specific environment by task_id."""
|
||||
def cleanup_vm(task_id: str, *, force_remove: bool = False):
|
||||
"""Manually clean up a specific environment by task_id.
|
||||
|
||||
*force_remove* (default False) is forwarded to backends that accept it
|
||||
— currently only ``DockerEnvironment``. The default of False matches
|
||||
session-lifecycle semantics: this function is called from
|
||||
``AIAgent.close()`` (TUI session close, gateway session teardown) and the
|
||||
per-turn cleanup branch for non-persistent envs, both of which should
|
||||
honor the user's persist-mode preference. Stopping the container here
|
||||
would defeat the "ONE long-lived container shared across sessions"
|
||||
contract — exactly the bug Ben reported when the container was killed
|
||||
on every TUI session close.
|
||||
|
||||
Pass ``force_remove=True`` for actual user-initiated teardown
|
||||
(e.g. ``/reset``-style flows that haven't been wired yet, or future
|
||||
"destroy my sandbox" commands).
|
||||
|
||||
The idle reaper passes the env through ``env.cleanup()`` directly (not
|
||||
via this function), so persist-mode idle envs are similarly no-op'd —
|
||||
only the orphan reaper at next startup reclaims them.
|
||||
"""
|
||||
# Remove from tracking dicts while holding the lock, but defer the
|
||||
# actual (potentially slow) env.cleanup() call to outside the lock
|
||||
# so other tool calls aren't blocked.
|
||||
@@ -1356,7 +1471,14 @@ def cleanup_vm(task_id: str):
|
||||
|
||||
try:
|
||||
if hasattr(env, 'cleanup'):
|
||||
env.cleanup()
|
||||
# Pass force_remove only if the env's cleanup() accepts it
|
||||
# (DockerEnvironment after issue #20561; other backends don't).
|
||||
import inspect
|
||||
sig = inspect.signature(env.cleanup)
|
||||
if "force_remove" in sig.parameters:
|
||||
env.cleanup(force_remove=force_remove)
|
||||
else:
|
||||
env.cleanup()
|
||||
elif hasattr(env, 'stop'):
|
||||
env.stop()
|
||||
elif hasattr(env, 'terminate'):
|
||||
@@ -1378,7 +1500,23 @@ def _atexit_cleanup():
|
||||
if _active_environments:
|
||||
count = len(_active_environments)
|
||||
logger.info("Shutting down %d remaining sandbox(es)...", count)
|
||||
# Snapshot the env objects BEFORE cleanup_all_environments empties
|
||||
# the dict; we need them to wait on docker cleanup threads after the
|
||||
# registry has been cleared.
|
||||
envs_to_wait = list(_active_environments.values())
|
||||
cleanup_all_environments()
|
||||
# Block briefly so docker stop/rm actually completes before the
|
||||
# interpreter exits. Issue #20561 — without this join, the daemon
|
||||
# cleanup threads were getting torn down mid-`docker stop`, leaving
|
||||
# Exited containers piled up on the host.
|
||||
for env in envs_to_wait:
|
||||
wait_fn = getattr(env, "wait_for_cleanup", None)
|
||||
if wait_fn is None:
|
||||
continue
|
||||
try:
|
||||
wait_fn(timeout=15.0)
|
||||
except Exception as e: # never block shutdown on a bad backend
|
||||
logger.debug("wait_for_cleanup raised on exit: %s", e)
|
||||
|
||||
atexit.register(_atexit_cleanup)
|
||||
|
||||
@@ -1746,6 +1884,8 @@ def terminal_tool(
|
||||
"docker_env": config.get("docker_env", {}),
|
||||
"docker_run_as_host_user": config.get("docker_run_as_host_user", False),
|
||||
"docker_extra_args": config.get("docker_extra_args", []),
|
||||
"docker_persist_across_processes": config.get("docker_persist_across_processes", True),
|
||||
"docker_orphan_reaper": config.get("docker_orphan_reaper", True),
|
||||
}
|
||||
|
||||
local_config = None
|
||||
|
||||
@@ -1112,7 +1112,7 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
|
||||
from hermes_cli.model_switch import parse_model_flags, switch_model
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_input)
|
||||
model_input, explicit_provider, persist_global, _force_refresh = parse_model_flags(raw_input)
|
||||
if not model_input:
|
||||
raise ValueError("model value required")
|
||||
|
||||
|
||||
@@ -1589,7 +1589,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "hermes-agent"
|
||||
version = "0.15.0"
|
||||
version = "0.15.1"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "croniter" },
|
||||
|
||||
Generated
+2786
-6
File diff suppressed because it is too large
Load Diff
+1
-1
@@ -10,7 +10,7 @@
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@nous-research/ui": "0.16.0",
|
||||
"@nous-research/ui": "0.18.2",
|
||||
"@observablehq/plot": "^0.6.17",
|
||||
"@react-three/fiber": "^9.6.0",
|
||||
"@tailwindcss/vite": "^4.2.1",
|
||||
|
||||
+2
-2
@@ -50,12 +50,12 @@ import {
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { SelectionSwitcher } from "@nous-research/ui/ui/components/selection-switcher";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Typography } from "@/components/NouiTypography";
|
||||
import { Typography } from "@nous-research/ui/ui/components/typography/index";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { Backdrop } from "@/components/Backdrop";
|
||||
import { SidebarFooter } from "@/components/SidebarFooter";
|
||||
import { SidebarStatusStrip, gatewayLine } from "@/components/SidebarStatusStrip";
|
||||
import { useBelowBreakpoint } from "@/hooks/useBelowBreakpoint";
|
||||
import { useBelowBreakpoint } from "@nous-research/ui/hooks/use-below-breakpoint";
|
||||
import { useSidebarStatus } from "@/hooks/useSidebarStatus";
|
||||
import { AuthWidget } from "@/components/AuthWidget";
|
||||
import { PageHeaderProvider } from "@/contexts/PageHeaderProvider";
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
|
||||
import { Switch } from "@nous-research/ui/ui/components/switch";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Input } from "@nous-research/ui/ui/components/input";
|
||||
import { Label } from "@nous-research/ui/ui/components/label";
|
||||
|
||||
function FieldHint({ schema, schemaKey }: { schema: Record<string, unknown>; schemaKey: string }) {
|
||||
const keyPath = schemaKey.includes(".") ? schemaKey : "";
|
||||
|
||||
@@ -1,225 +0,0 @@
|
||||
import {
|
||||
type PointerEvent as ReactPointerEvent,
|
||||
type ReactNode,
|
||||
useEffect,
|
||||
useRef,
|
||||
useState,
|
||||
} from "react";
|
||||
import { createPortal } from "react-dom";
|
||||
import { Typography } from "@/components/NouiTypography";
|
||||
import { cn, themedBody } from "@/lib/utils";
|
||||
|
||||
const CLOSE_DRAG_MIN_PX = 72;
|
||||
const CLOSE_DRAG_RATIO = 0.18;
|
||||
const SHEET_TRANSITION_MS = 280;
|
||||
|
||||
/**
|
||||
* Mobile-first picker shell: fixed backdrop + bottom sheet, portaled to `body`
|
||||
* so nested overflow/transform in the sidebar cannot clip menus (theme /
|
||||
* language switchers). Open/close uses slide + fade; teardown is delayed until
|
||||
* the exit animation finishes so animations can complete.
|
||||
*
|
||||
* Drag the header/handle downward to dismiss (skipped when reduced motion is on).
|
||||
*/
|
||||
export function BottomPickSheet({
|
||||
backdropDismissLabel = "Dismiss",
|
||||
children,
|
||||
onClose,
|
||||
open,
|
||||
title,
|
||||
}: BottomPickSheetProps) {
|
||||
const [renderPortal, setRenderPortal] = useState(open);
|
||||
const [entered, setEntered] = useState(false);
|
||||
const [dragOffsetPx, setDragOffsetPx] = useState(0);
|
||||
const [dragActive, setDragActive] = useState(false);
|
||||
|
||||
const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const sheetRef = useRef<HTMLDivElement>(null);
|
||||
const dragTrackingRef = useRef(false);
|
||||
const dragStartYRef = useRef(0);
|
||||
const dragOffsetRef = useRef(0);
|
||||
|
||||
const reducedMotion =
|
||||
typeof window !== "undefined" &&
|
||||
window.matchMedia("(prefers-reduced-motion: reduce)").matches;
|
||||
|
||||
const syncDragPx = (next: number) => {
|
||||
dragOffsetRef.current = next;
|
||||
setDragOffsetPx(next);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (closeTimerRef.current) {
|
||||
clearTimeout(closeTimerRef.current);
|
||||
closeTimerRef.current = null;
|
||||
}
|
||||
|
||||
const ms = reducedMotion ? 0 : SHEET_TRANSITION_MS;
|
||||
|
||||
let openRafId = 0;
|
||||
let exitRafId = 0;
|
||||
|
||||
if (open) {
|
||||
openRafId = requestAnimationFrame(() => {
|
||||
dragTrackingRef.current = false;
|
||||
dragOffsetRef.current = 0;
|
||||
setDragActive(false);
|
||||
setDragOffsetPx(0);
|
||||
setRenderPortal(true);
|
||||
requestAnimationFrame(() => {
|
||||
requestAnimationFrame(() => setEntered(true));
|
||||
});
|
||||
});
|
||||
} else {
|
||||
exitRafId = requestAnimationFrame(() => {
|
||||
dragTrackingRef.current = false;
|
||||
setDragActive(false);
|
||||
setEntered(false);
|
||||
closeTimerRef.current = window.setTimeout(() => {
|
||||
dragOffsetRef.current = 0;
|
||||
setDragOffsetPx(0);
|
||||
setRenderPortal(false);
|
||||
closeTimerRef.current = null;
|
||||
}, ms);
|
||||
});
|
||||
}
|
||||
|
||||
return () => {
|
||||
cancelAnimationFrame(openRafId);
|
||||
cancelAnimationFrame(exitRafId);
|
||||
if (closeTimerRef.current) {
|
||||
clearTimeout(closeTimerRef.current);
|
||||
closeTimerRef.current = null;
|
||||
}
|
||||
};
|
||||
}, [open, reducedMotion]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!renderPortal) return;
|
||||
const prev = document.body.style.overflow;
|
||||
document.body.style.overflow = "hidden";
|
||||
return () => {
|
||||
document.body.style.overflow = prev;
|
||||
};
|
||||
}, [renderPortal]);
|
||||
|
||||
if (!renderPortal || typeof document === "undefined") return null;
|
||||
|
||||
const durationClass = reducedMotion ? "duration-0" : "duration-[280ms]";
|
||||
|
||||
const draggingVisual = dragActive || dragOffsetPx > 0;
|
||||
|
||||
const onDragPointerDown = (e: ReactPointerEvent<HTMLDivElement>) => {
|
||||
if (reducedMotion || !entered) return;
|
||||
if (e.pointerType === "mouse" && e.button !== 0) return;
|
||||
|
||||
dragTrackingRef.current = true;
|
||||
setDragActive(true);
|
||||
dragStartYRef.current = e.clientY;
|
||||
syncDragPx(0);
|
||||
e.currentTarget.setPointerCapture(e.pointerId);
|
||||
};
|
||||
|
||||
const onDragPointerMove = (e: ReactPointerEvent<HTMLDivElement>) => {
|
||||
if (!dragTrackingRef.current) return;
|
||||
const dy = e.clientY - dragStartYRef.current;
|
||||
const next = Math.max(0, dy);
|
||||
const sheetH = sheetRef.current?.offsetHeight ?? 560;
|
||||
syncDragPx(Math.min(next, sheetH));
|
||||
};
|
||||
|
||||
const endDrag = (e: ReactPointerEvent<HTMLDivElement>) => {
|
||||
if (!dragTrackingRef.current) return;
|
||||
dragTrackingRef.current = false;
|
||||
setDragActive(false);
|
||||
try {
|
||||
e.currentTarget.releasePointerCapture(e.pointerId);
|
||||
} catch {
|
||||
/* already released */
|
||||
}
|
||||
|
||||
const sheetH = sheetRef.current?.offsetHeight ?? 560;
|
||||
const threshold = Math.max(CLOSE_DRAG_MIN_PX, sheetH * CLOSE_DRAG_RATIO);
|
||||
const d = dragOffsetRef.current;
|
||||
|
||||
if (d >= threshold) {
|
||||
onClose();
|
||||
return;
|
||||
}
|
||||
syncDragPx(0);
|
||||
};
|
||||
|
||||
return createPortal(
|
||||
<div className="fixed inset-0 z-[200] flex flex-col justify-end">
|
||||
<button
|
||||
type="button"
|
||||
aria-label={backdropDismissLabel}
|
||||
className={cn(
|
||||
"absolute inset-0 bg-black/55 backdrop-blur-[2px]",
|
||||
"transition-opacity ease-out motion-reduce:transition-none",
|
||||
durationClass,
|
||||
entered ? "opacity-100" : "opacity-0",
|
||||
)}
|
||||
onClick={onClose}
|
||||
/>
|
||||
|
||||
<div
|
||||
aria-label={title}
|
||||
aria-modal="true"
|
||||
ref={sheetRef}
|
||||
className={cn(
|
||||
themedBody,
|
||||
"relative flex max-h-[85dvh] min-h-0 flex-col rounded-t-xl border border-current/20",
|
||||
"bg-background-base/98 pb-[max(1rem,env(safe-area-inset-bottom))]",
|
||||
"shadow-[0_-12px_40px_-8px_rgba(0,0,0,0.55)] backdrop-blur-md",
|
||||
"ease-out motion-reduce:transition-none transform-gpu",
|
||||
draggingVisual ? "transition-none" : cn("transition-transform", durationClass),
|
||||
entered ? "translate-y-0" : "translate-y-full",
|
||||
)}
|
||||
role="dialog"
|
||||
style={
|
||||
entered && dragOffsetPx > 0
|
||||
? { transform: `translateY(${dragOffsetPx}px)` }
|
||||
: undefined
|
||||
}
|
||||
>
|
||||
<div
|
||||
className={cn(
|
||||
"flex shrink-0 flex-col gap-2 border-b border-current/15 px-4 pb-3 pt-2",
|
||||
"touch-none select-none",
|
||||
reducedMotion ? "cursor-default" : "cursor-grab active:cursor-grabbing",
|
||||
)}
|
||||
onPointerCancel={endDrag}
|
||||
onPointerDown={onDragPointerDown}
|
||||
onPointerMove={onDragPointerMove}
|
||||
onPointerUp={endDrag}
|
||||
>
|
||||
<div
|
||||
aria-hidden
|
||||
className="mx-auto h-1 w-10 shrink-0 rounded-full bg-current/20"
|
||||
/>
|
||||
|
||||
<Typography
|
||||
mondwest
|
||||
className="text-display text-xs tracking-[0.12em] text-text-tertiary"
|
||||
>
|
||||
{title}
|
||||
</Typography>
|
||||
</div>
|
||||
|
||||
<div className="min-h-0 flex-1 overflow-y-auto overscroll-contain">
|
||||
{children}
|
||||
</div>
|
||||
</div>
|
||||
</div>,
|
||||
document.body,
|
||||
);
|
||||
}
|
||||
|
||||
interface BottomPickSheetProps {
|
||||
backdropDismissLabel?: string;
|
||||
children: ReactNode;
|
||||
onClose: () => void;
|
||||
open: boolean;
|
||||
title: string;
|
||||
}
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Card } from "@/components/ui/card";
|
||||
import { Card } from "@nous-research/ui/ui/components/card";
|
||||
|
||||
import { ModelPickerDialog } from "@/components/ModelPickerDialog";
|
||||
import { ToolCall, type ToolEntry } from "@/components/ToolCall";
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
||||
export function DeleteConfirmDialog({
|
||||
|
||||
@@ -2,9 +2,9 @@ import { useState, useRef, useEffect } from "react";
|
||||
import { createPortal } from "react-dom";
|
||||
import { Check } from "lucide-react";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { BottomPickSheet } from "@/components/BottomPickSheet";
|
||||
import { Typography } from "@/components/NouiTypography";
|
||||
import { useBelowBreakpoint } from "@/hooks/useBelowBreakpoint";
|
||||
import { BottomSheet } from "@nous-research/ui/ui/components/bottom-sheet";
|
||||
import { Typography } from "@nous-research/ui/ui/components/typography/index";
|
||||
import { useBelowBreakpoint } from "@nous-research/ui/hooks/use-below-breakpoint";
|
||||
import { useI18n } from "@/i18n/context";
|
||||
import { LOCALE_META } from "@/i18n";
|
||||
import type { Locale } from "@/i18n";
|
||||
@@ -87,7 +87,7 @@ export function LanguageSwitcher({ collapsed = false, dropUp = false }: Language
|
||||
</Button>
|
||||
|
||||
{useMobileSheet && (
|
||||
<BottomPickSheet
|
||||
<BottomSheet
|
||||
backdropDismissLabel={t.common.close}
|
||||
onClose={() => setOpen(false)}
|
||||
open={open}
|
||||
@@ -101,7 +101,7 @@ export function LanguageSwitcher({ collapsed = false, dropUp = false }: Language
|
||||
setOpen={setOpen}
|
||||
/>
|
||||
</div>
|
||||
</BottomPickSheet>
|
||||
</BottomSheet>
|
||||
)}
|
||||
|
||||
{open && !useMobileSheet && (() => {
|
||||
|
||||
@@ -2,8 +2,8 @@ import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Checkbox } from "@nous-research/ui/ui/components/checkbox";
|
||||
import { ListItem } from "@nous-research/ui/ui/components/list-item";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Input } from "@nous-research/ui/ui/components/input";
|
||||
import { Label } from "@nous-research/ui/ui/components/label";
|
||||
import type { GatewayClient } from "@/lib/gatewayClient";
|
||||
import { Check, Search, X } from "lucide-react";
|
||||
import { useEffect, useMemo, useRef, useState } from "react";
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
import { forwardRef, type ElementType, type HTMLAttributes, type ReactNode } from "react";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
type TypographyProps = HTMLAttributes<HTMLElement> & {
|
||||
as?: ElementType;
|
||||
children?: ReactNode;
|
||||
compressed?: boolean;
|
||||
courier?: boolean;
|
||||
expanded?: boolean;
|
||||
mondwest?: boolean;
|
||||
mono?: boolean;
|
||||
sans?: boolean;
|
||||
variant?: "sm" | "md" | "lg" | "xl";
|
||||
};
|
||||
|
||||
const variantClasses: Record<NonNullable<TypographyProps["variant"]>, string> = {
|
||||
sm: "leading-[1.4] text-[.9375rem] tracking-[0.1875rem]",
|
||||
md: "text-[2.625rem] leading-[1] tracking-[0.0525rem]",
|
||||
lg: "text-[2.625rem] leading-[1] tracking-[0.0525rem]",
|
||||
xl: "text-[4.5rem] leading-[1] tracking-[0.135rem]",
|
||||
};
|
||||
|
||||
export const Typography = forwardRef<HTMLElement, TypographyProps>(function Typography(
|
||||
{
|
||||
as: Component = "span",
|
||||
className,
|
||||
compressed,
|
||||
courier,
|
||||
expanded,
|
||||
mondwest,
|
||||
mono,
|
||||
sans,
|
||||
variant,
|
||||
...props
|
||||
},
|
||||
ref,
|
||||
) {
|
||||
const hasFontVariant = compressed || courier || expanded || mondwest || mono || sans;
|
||||
|
||||
return (
|
||||
<Component
|
||||
className={cn(
|
||||
compressed && "font-compressed",
|
||||
courier && "font-courier",
|
||||
expanded && "font-expanded",
|
||||
mondwest && "font-mondwest tracking-[0.1875rem]",
|
||||
mono && "font-mono",
|
||||
(!hasFontVariant || sans) && "font-sans",
|
||||
variant && variantClasses[variant],
|
||||
className,
|
||||
)}
|
||||
ref={ref}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
});
|
||||
|
||||
export const H2 = forwardRef<HTMLHeadingElement, Omit<TypographyProps, "as">>(function H2(
|
||||
{ className, variant = "lg", ...props },
|
||||
ref,
|
||||
) {
|
||||
return <Typography as="h2" className={cn("font-bold", className)} variant={variant} ref={ref} {...props} />;
|
||||
});
|
||||
@@ -3,9 +3,9 @@ import { ExternalLink, X, Check } from "lucide-react";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { CopyButton } from "@nous-research/ui/ui/components/command-block";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { H2 } from "@/components/NouiTypography";
|
||||
import { H2 } from "@nous-research/ui/ui/components/typography/h2";
|
||||
import { api, type OAuthProvider, type OAuthStartResponse } from "@/lib/api";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Input } from "@nous-research/ui/ui/components/input";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { cn, themedBody } from "@/lib/utils";
|
||||
|
||||
|
||||
@@ -16,9 +16,9 @@ import {
|
||||
CardDescription,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
} from "@nous-research/ui/ui/components/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
|
||||
import { OAuthLoginModal } from "@/components/OAuthLoginModal";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import { AlertTriangle, Radio, Wifi, WifiOff } from "lucide-react";
|
||||
import type { PlatformStatus } from "@/lib/api";
|
||||
import { isoTimeAgo } from "@/lib/utils";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
||||
export function PlatformsCard({ platforms }: PlatformsCardProps) {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Typography } from "@/components/NouiTypography";
|
||||
import { Typography } from "@nous-research/ui/ui/components/typography/index";
|
||||
import type { StatusResponse } from "@/lib/api";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
||||
@@ -3,9 +3,9 @@ import { createPortal } from "react-dom";
|
||||
import { Palette, Check } from "lucide-react";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { ListItem } from "@nous-research/ui/ui/components/list-item";
|
||||
import { BottomPickSheet } from "@/components/BottomPickSheet";
|
||||
import { Typography } from "@/components/NouiTypography";
|
||||
import { useBelowBreakpoint } from "@/hooks/useBelowBreakpoint";
|
||||
import { BottomSheet } from "@nous-research/ui/ui/components/bottom-sheet";
|
||||
import { Typography } from "@nous-research/ui/ui/components/typography/index";
|
||||
import { useBelowBreakpoint } from "@nous-research/ui/hooks/use-below-breakpoint";
|
||||
import { BUILTIN_THEMES, useTheme } from "@/themes";
|
||||
import type { DashboardTheme, ThemeListEntry } from "@/themes";
|
||||
import { useI18n } from "@/i18n";
|
||||
@@ -91,7 +91,7 @@ export function ThemeSwitcher({ collapsed = false, dropUp = false }: ThemeSwitch
|
||||
</Button>
|
||||
|
||||
{useMobileSheet && (
|
||||
<BottomPickSheet
|
||||
<BottomSheet
|
||||
backdropDismissLabel={t.common.close}
|
||||
onClose={close}
|
||||
open={open}
|
||||
@@ -105,7 +105,7 @@ export function ThemeSwitcher({ collapsed = false, dropUp = false }: ThemeSwitch
|
||||
themeName={themeName}
|
||||
/>
|
||||
</div>
|
||||
</BottomPickSheet>
|
||||
</BottomSheet>
|
||||
)}
|
||||
|
||||
{open && !useMobileSheet && (() => {
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import { createPortal } from "react-dom";
|
||||
|
||||
export function Toast({ toast }: { toast: { message: string; type: "success" | "error" } | null }) {
|
||||
const [visible, setVisible] = useState(false);
|
||||
const [current, setCurrent] = useState(toast);
|
||||
|
||||
useEffect(() => {
|
||||
if (toast) {
|
||||
setCurrent(toast);
|
||||
setVisible(true);
|
||||
} else {
|
||||
setVisible(false);
|
||||
const timer = setTimeout(() => setCurrent(null), 200);
|
||||
return () => clearTimeout(timer);
|
||||
}
|
||||
}, [toast]);
|
||||
|
||||
if (!current) return null;
|
||||
|
||||
// Portal to document.body so the toast escapes any ancestor stacking context
|
||||
// (e.g. <main> has `relative z-2`, which would trap z-50 below the header's z-40).
|
||||
return createPortal(
|
||||
<div
|
||||
role="status"
|
||||
aria-live="polite"
|
||||
className={`fixed top-16 right-4 z-50 border px-4 py-2.5 font-courier text-xs tracking-wider uppercase backdrop-blur-sm ${
|
||||
current.type === "success"
|
||||
? "bg-success/15 text-success border-success/30"
|
||||
: "bg-destructive/15 text-destructive border-destructive/30"
|
||||
}`}
|
||||
style={{
|
||||
animation: visible ? "toast-in 200ms ease-out forwards" : "toast-out 200ms ease-in forwards",
|
||||
}}
|
||||
>
|
||||
{current.message}
|
||||
</div>,
|
||||
document.body,
|
||||
);
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
import { cn, themedBody } from "@/lib/utils";
|
||||
|
||||
/**
|
||||
* Themed card primitive. Themes can restyle every card without touching
|
||||
* call sites by setting CSS vars under the `card` component-style bucket:
|
||||
*
|
||||
* componentStyles:
|
||||
* card:
|
||||
* clipPath: "polygon(10px 0, 100% 0, 100% calc(100% - 10px), calc(100% - 10px) 100%, 0 100%, 0 10px)"
|
||||
* border: "1px solid var(--color-ring)"
|
||||
* background: "linear-gradient(180deg, var(--color-card) 0%, transparent 100%)"
|
||||
* boxShadow: "0 0 0 1px var(--color-ring) inset, 0 0 24px -8px var(--warm-glow)"
|
||||
*
|
||||
* All properties are optional — vars that aren't set compute to their
|
||||
* CSS initial value, so the default shadcn-y card keeps looking normal
|
||||
* for themes that don't override anything.
|
||||
*/
|
||||
const CARD_STYLE: React.CSSProperties = {
|
||||
clipPath: "var(--component-card-clip-path)",
|
||||
borderImage: "var(--component-card-border-image)",
|
||||
background: "var(--component-card-background)",
|
||||
boxShadow: "var(--component-card-box-shadow)",
|
||||
};
|
||||
|
||||
export function Card({ className, style, ...props }: React.HTMLAttributes<HTMLDivElement>) {
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"border border-border bg-card/80 text-card-foreground w-full",
|
||||
themedBody,
|
||||
className,
|
||||
)}
|
||||
style={{ ...CARD_STYLE, ...style }}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export function CardHeader({ className, ...props }: React.HTMLAttributes<HTMLDivElement>) {
|
||||
return <div className={cn("flex flex-col gap-1.5 p-4 border-b border-border", className)} {...props} />;
|
||||
}
|
||||
|
||||
export function CardTitle({ className, ...props }: React.HTMLAttributes<HTMLHeadingElement>) {
|
||||
return (
|
||||
<h3
|
||||
className={cn(
|
||||
"font-mondwest text-display text-sm tracking-[0.12em] text-text-primary",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
export function CardDescription({ className, ...props }: React.HTMLAttributes<HTMLParagraphElement>) {
|
||||
return (
|
||||
<p className={cn("font-mondwest normal-case text-xs text-muted-foreground", className)} {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
export function CardContent({ className, ...props }: React.HTMLAttributes<HTMLDivElement>) {
|
||||
return <div className={cn("p-4", className)} {...props} />;
|
||||
}
|
||||
@@ -1,137 +0,0 @@
|
||||
import { useEffect, useRef } from "react";
|
||||
import { createPortal } from "react-dom";
|
||||
import { AlertTriangle } from "lucide-react";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { cn, themedBody } from "@/lib/utils";
|
||||
|
||||
export function ConfirmDialog({
|
||||
cancelLabel = "Cancel",
|
||||
confirmLabel = "Confirm",
|
||||
description,
|
||||
destructive = false,
|
||||
loading = false,
|
||||
onCancel,
|
||||
onConfirm,
|
||||
open,
|
||||
title,
|
||||
}: ConfirmDialogProps) {
|
||||
const dialogRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Focus the confirm button when opened; trap ESC to cancel.
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
|
||||
const prevActive = document.activeElement as HTMLElement | null;
|
||||
dialogRef.current
|
||||
?.querySelector<HTMLButtonElement>("[data-confirm]")
|
||||
?.focus();
|
||||
|
||||
const onKey = (e: KeyboardEvent) => {
|
||||
if (e.key === "Escape") {
|
||||
e.preventDefault();
|
||||
onCancel();
|
||||
}
|
||||
};
|
||||
|
||||
document.addEventListener("keydown", onKey);
|
||||
const prevOverflow = document.body.style.overflow;
|
||||
document.body.style.overflow = "hidden";
|
||||
|
||||
return () => {
|
||||
document.removeEventListener("keydown", onKey);
|
||||
document.body.style.overflow = prevOverflow;
|
||||
prevActive?.focus?.();
|
||||
};
|
||||
}, [open, onCancel]);
|
||||
|
||||
if (!open) return null;
|
||||
|
||||
return createPortal(
|
||||
<div
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
aria-labelledby="confirm-dialog-title"
|
||||
aria-describedby={description ? "confirm-dialog-desc" : undefined}
|
||||
onClick={(e) => {
|
||||
if (e.target === e.currentTarget) onCancel();
|
||||
}}
|
||||
className={cn(
|
||||
"fixed inset-0 z-50 flex items-center justify-center",
|
||||
"bg-black/60 backdrop-blur-sm",
|
||||
"animate-[fade-in_150ms_ease-out]",
|
||||
)}
|
||||
>
|
||||
<div
|
||||
ref={dialogRef}
|
||||
className={cn(
|
||||
themedBody,
|
||||
"relative w-full max-w-md mx-4",
|
||||
"border border-border bg-card shadow-lg",
|
||||
"animate-[dialog-in_180ms_ease-out]",
|
||||
)}
|
||||
>
|
||||
<div className="flex items-start gap-3 p-4 border-b border-border">
|
||||
{destructive && (
|
||||
<div
|
||||
aria-hidden
|
||||
className="mt-0.5 shrink-0 text-destructive"
|
||||
>
|
||||
<AlertTriangle className="h-4 w-4" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex-1 min-w-0 flex flex-col gap-1">
|
||||
<h2
|
||||
id="confirm-dialog-title"
|
||||
className="font-mondwest text-display text-sm font-bold tracking-[0.12em] blend-lighter"
|
||||
>
|
||||
{title}
|
||||
</h2>
|
||||
|
||||
{description && (
|
||||
<p
|
||||
id="confirm-dialog-desc"
|
||||
className="font-mondwest normal-case text-xs text-muted-foreground leading-relaxed"
|
||||
>
|
||||
{description}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-end gap-2 p-3">
|
||||
<Button
|
||||
type="button"
|
||||
outlined
|
||||
onClick={onCancel}
|
||||
disabled={loading}
|
||||
>
|
||||
{cancelLabel}
|
||||
</Button>
|
||||
<Button
|
||||
data-confirm
|
||||
type="button"
|
||||
destructive={destructive}
|
||||
onClick={onConfirm}
|
||||
disabled={loading}
|
||||
>
|
||||
{loading ? "…" : confirmLabel}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>,
|
||||
document.body,
|
||||
);
|
||||
}
|
||||
|
||||
interface ConfirmDialogProps {
|
||||
cancelLabel?: string;
|
||||
confirmLabel?: string;
|
||||
description?: string;
|
||||
destructive?: boolean;
|
||||
loading?: boolean;
|
||||
onCancel: () => void;
|
||||
onConfirm: () => void;
|
||||
open: boolean;
|
||||
title: string;
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
export function Input({ className, ...props }: React.InputHTMLAttributes<HTMLInputElement>) {
|
||||
return (
|
||||
<input
|
||||
className={cn(
|
||||
"flex h-9 w-full border border-border bg-background/40 px-3 py-1 font-courier text-sm transition-colors",
|
||||
"placeholder:text-muted-foreground",
|
||||
"focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-foreground/30 focus-visible:border-foreground/25",
|
||||
"disabled:cursor-not-allowed disabled:opacity-50",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
export function Label({ className, ...props }: React.LabelHTMLAttributes<HTMLLabelElement>) {
|
||||
return (
|
||||
<label
|
||||
className={cn(
|
||||
"font-mondwest text-xs tracking-[0.1em] uppercase leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
export function Separator({
|
||||
className,
|
||||
orientation = "horizontal",
|
||||
...props
|
||||
}: React.HTMLAttributes<HTMLDivElement> & { orientation?: "horizontal" | "vertical" }) {
|
||||
return (
|
||||
<div
|
||||
role="separator"
|
||||
className={cn(
|
||||
"shrink-0 bg-border",
|
||||
orientation === "horizontal" ? "h-px w-full" : "h-full w-px",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useCallback, useEffect, useState } from "react";
|
||||
import { api } from "@/lib/api";
|
||||
import type { ActionStatusResponse } from "@/lib/api";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { Toast } from "@nous-research/ui/ui/components/toast";
|
||||
import { useI18n } from "@/i18n";
|
||||
import {
|
||||
SystemActionsContext,
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
/** True when viewport width is strictly below `px` (matches Tailwind `min-width: px`). */
|
||||
export function useBelowBreakpoint(px: number) {
|
||||
const query = `(max-width: ${px - 1}px)`;
|
||||
const [matches, setMatches] = useState(() =>
|
||||
typeof window !== "undefined" ? window.matchMedia(query).matches : false,
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
const mql = window.matchMedia(query);
|
||||
const sync = () => setMatches(mql.matches);
|
||||
sync();
|
||||
mql.addEventListener("change", sync);
|
||||
return () => mql.removeEventListener("change", sync);
|
||||
}, [query]);
|
||||
|
||||
return matches;
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
import { useCallback, useState } from "react";
|
||||
|
||||
export function useConfirmDelete<TId>({
|
||||
onDelete,
|
||||
}: {
|
||||
onDelete: (id: TId) => Promise<void>;
|
||||
}) {
|
||||
const [pendingId, setPendingId] = useState<TId | null>(null);
|
||||
const [isDeleting, setIsDeleting] = useState(false);
|
||||
|
||||
const requestDelete = useCallback((id: TId) => {
|
||||
setPendingId(id);
|
||||
}, []);
|
||||
|
||||
const cancel = useCallback(() => {
|
||||
if (!isDeleting) setPendingId(null);
|
||||
}, [isDeleting]);
|
||||
|
||||
const confirm = useCallback(async () => {
|
||||
if (pendingId === null) return;
|
||||
const id = pendingId;
|
||||
setIsDeleting(true);
|
||||
try {
|
||||
await onDelete(id);
|
||||
setPendingId(null);
|
||||
} catch {
|
||||
// Dialog stays open; caller can surface errors in onDelete before rethrowing
|
||||
} finally {
|
||||
setIsDeleting(false);
|
||||
}
|
||||
}, [pendingId, onDelete]);
|
||||
|
||||
return {
|
||||
cancel,
|
||||
confirm,
|
||||
isDeleting,
|
||||
isOpen: pendingId !== null,
|
||||
pendingId,
|
||||
requestDelete,
|
||||
} as const;
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
import { useCallback, useState } from "react";
|
||||
|
||||
export function useToast(duration = 3000) {
|
||||
const [toast, setToast] = useState<{ message: string; type: "success" | "error" } | null>(null);
|
||||
|
||||
const showToast = useCallback(
|
||||
(message: string, type: "success" | "error") => {
|
||||
setToast({ message, type });
|
||||
setTimeout(() => setToast(null), duration);
|
||||
},
|
||||
[duration],
|
||||
);
|
||||
|
||||
return { toast, showToast };
|
||||
}
|
||||
+63
-2
@@ -41,7 +41,11 @@ function setSessionHeader(headers: Headers, token: string): void {
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchJSON<T>(url: string, init?: RequestInit): Promise<T> {
|
||||
export async function fetchJSON<T>(
|
||||
url: string,
|
||||
init?: RequestInit,
|
||||
options?: FetchJSONOptions,
|
||||
): Promise<T> {
|
||||
// Inject the session token into all /api/ requests.
|
||||
const headers = new Headers(init?.headers);
|
||||
const token = window.__HERMES_SESSION_TOKEN__;
|
||||
@@ -91,6 +95,43 @@ export async function fetchJSON<T>(url: string, init?: RequestInit): Promise<T>
|
||||
// Never resolve — the page is about to unload.
|
||||
return new Promise<T>(() => {});
|
||||
}
|
||||
// Loopback mode: ``_SESSION_TOKEN`` rotates on every server restart
|
||||
// (``hermes update``, ``hermes gateway restart``, etc.). A tab kept
|
||||
// open across the restart holds the OLD token in
|
||||
// ``window.__HERMES_SESSION_TOKEN__`` from the previous HTML render,
|
||||
// so every fetch returns 401. The HTML is served ``Cache-Control:
|
||||
// no-store`` so a reload picks up the freshly-injected token. Trigger
|
||||
// that reload once on the first stale-token 401 — gated mode is
|
||||
// handled above, so reaching here in gated mode means a real
|
||||
// middleware failure that should not reload-loop.
|
||||
if (!window.__HERMES_AUTH_REQUIRED__ && !options?.allowUnauthorized) {
|
||||
let alreadyReloaded = false;
|
||||
try {
|
||||
alreadyReloaded =
|
||||
sessionStorage.getItem("hermes.tokenReloadAttempted") === "1";
|
||||
} catch {
|
||||
/* SSR / privacy mode — fall through to throw */
|
||||
}
|
||||
if (!alreadyReloaded) {
|
||||
try {
|
||||
sessionStorage.setItem("hermes.tokenReloadAttempted", "1");
|
||||
} catch {
|
||||
/* SSR / privacy mode — best effort */
|
||||
}
|
||||
window.location.reload();
|
||||
return new Promise<T>(() => {});
|
||||
}
|
||||
}
|
||||
}
|
||||
if (res.ok) {
|
||||
// Clear the stale-token reload guard: a successful 2xx proves the
|
||||
// current ``window.__HERMES_SESSION_TOKEN__`` is valid, so the next
|
||||
// 401 — if any — should be allowed to trigger its own reload cycle.
|
||||
try {
|
||||
sessionStorage.removeItem("hermes.tokenReloadAttempted");
|
||||
} catch {
|
||||
/* SSR / privacy mode — ignore */
|
||||
}
|
||||
}
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => res.statusText);
|
||||
@@ -161,8 +202,19 @@ export const api = {
|
||||
* still exists but is never useful there (no Session, no cookie). The
|
||||
* AuthWidget component swallows 401s from this call: if the gate isn't
|
||||
* engaged, /api/auth/me returns 401 and the widget renders nothing.
|
||||
*
|
||||
* ``allowUnauthorized`` is load-bearing: in loopback mode this endpoint
|
||||
* 401s by design, and fetchJSON's default loopback behaviour treats a
|
||||
* 401 as a rotated session token and full-page-reloads to pick up a
|
||||
* fresh one. Because every *other* dashboard request succeeds (and so
|
||||
* clears the one-shot reload guard), that turns this expected 401 into
|
||||
* an infinite reload loop. Opting out keeps the 401 a plain throw the
|
||||
* widget can catch.
|
||||
*/
|
||||
getAuthMe: () => fetchJSON<AuthMeResponse>("/api/auth/me"),
|
||||
getAuthMe: () =>
|
||||
fetchJSON<AuthMeResponse>("/api/auth/me", undefined, {
|
||||
allowUnauthorized: true,
|
||||
}),
|
||||
logout: () =>
|
||||
fetch(`${BASE}/auth/logout`, {
|
||||
method: "POST",
|
||||
@@ -477,6 +529,15 @@ export interface ActionResponse {
|
||||
pid: number;
|
||||
}
|
||||
|
||||
/** Per-call overrides for {@link fetchJSON}. */
|
||||
interface FetchJSONOptions {
|
||||
/** When true, a 401 response is surfaced as a normal thrown error rather
|
||||
* than triggering the loopback stale-token page reload. Use for probes
|
||||
* whose 401 is an expected signal (e.g. /api/auth/me in non-gated mode)
|
||||
* rather than evidence of a rotated session token. */
|
||||
allowUnauthorized?: boolean;
|
||||
}
|
||||
|
||||
export interface ActionStatusResponse {
|
||||
exit_code: number | null;
|
||||
lines: string[];
|
||||
|
||||
@@ -20,7 +20,7 @@ import { timeAgo } from "@/lib/utils";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Stats } from "@nous-research/ui/ui/components/stats";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
||||
@@ -23,7 +23,7 @@ import { WebglAddon } from "@xterm/addon-webgl";
|
||||
import { Terminal } from "@xterm/xterm";
|
||||
import "@xterm/xterm/css/xterm.css";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Typography } from "@/components/NouiTypography";
|
||||
import { Typography } from "@nous-research/ui/ui/components/typography/index";
|
||||
import { HERMES_BASE_PATH, buildWsAuthParam } from "@/lib/api";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { Copy, PanelRight, X } from "lucide-react";
|
||||
|
||||
@@ -38,15 +38,15 @@ import {
|
||||
} from "lucide-react";
|
||||
import { api } from "@/lib/api";
|
||||
import { getNestedValue, setNestedValue } from "@/lib/nested";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { useToast } from "@nous-research/ui/hooks/use-toast";
|
||||
import { Toast } from "@nous-research/ui/ui/components/toast";
|
||||
import { AutoField } from "@/components/AutoField";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { ListItem } from "@nous-research/ui/ui/components/list-item";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
|
||||
import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
|
||||
import { Input } from "@nous-research/ui/ui/components/input";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
|
||||
@@ -4,17 +4,17 @@ import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { H2 } from "@/components/NouiTypography";
|
||||
import { H2 } from "@nous-research/ui/ui/components/typography/h2";
|
||||
import { api } from "@/lib/api";
|
||||
import type { CronJob, ProfileInfo } from "@/lib/api";
|
||||
import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { useConfirmDelete } from "@/hooks/useConfirmDelete";
|
||||
import { useToast } from "@nous-research/ui/hooks/use-toast";
|
||||
import { useConfirmDelete } from "@nous-research/ui/hooks/use-confirm-delete";
|
||||
import { useModalBehavior } from "@/hooks/useModalBehavior";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { Card, CardContent } from "@/components/ui/card";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Toast } from "@nous-research/ui/ui/components/toast";
|
||||
import { Card, CardContent } from "@nous-research/ui/ui/components/card";
|
||||
import { Input } from "@nous-research/ui/ui/components/input";
|
||||
import { Label } from "@nous-research/ui/ui/components/label";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
|
||||
@@ -17,9 +17,9 @@ import {
|
||||
import { api } from "@/lib/api";
|
||||
import type { EnvVarInfo } from "@/lib/api";
|
||||
import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { useConfirmDelete } from "@/hooks/useConfirmDelete";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { Toast } from "@nous-research/ui/ui/components/toast";
|
||||
import { useConfirmDelete } from "@nous-research/ui/hooks/use-confirm-delete";
|
||||
import { useToast } from "@nous-research/ui/hooks/use-toast";
|
||||
import { OAuthProvidersCard } from "@/components/OAuthProvidersCard";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { ListItem } from "@nous-research/ui/ui/components/list-item";
|
||||
@@ -30,10 +30,10 @@ import {
|
||||
CardDescription,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
} from "@nous-research/ui/ui/components/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Input } from "@nous-research/ui/ui/components/input";
|
||||
import { Label } from "@nous-research/ui/ui/components/label";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
|
||||
@@ -12,8 +12,8 @@ import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { FilterGroup, Segmented } from "@nous-research/ui/ui/components/segmented";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Switch } from "@nous-research/ui/ui/components/switch";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
|
||||
import { Label } from "@nous-research/ui/ui/components/label";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
|
||||
@@ -24,9 +24,9 @@ import { formatTokenCount } from "@/lib/format";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Stats } from "@nous-research/ui/ui/components/stats";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
|
||||
import { useModalBehavior } from "@/hooks/useModalBehavior";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
||||
@@ -10,12 +10,12 @@ import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
|
||||
import { Switch } from "@nous-research/ui/ui/components/switch";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { CommandBlock } from "@nous-research/ui/ui/components/command-block";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
|
||||
import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
|
||||
import { Input } from "@nous-research/ui/ui/components/input";
|
||||
import { Label } from "@nous-research/ui/ui/components/label";
|
||||
import { useToast } from "@nous-research/ui/hooks/use-toast";
|
||||
import { Toast } from "@nous-research/ui/ui/components/toast";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
@@ -14,19 +14,19 @@ import {
|
||||
X,
|
||||
} from "lucide-react";
|
||||
import spinners from "unicode-animations";
|
||||
import { H2 } from "@/components/NouiTypography";
|
||||
import { H2 } from "@nous-research/ui/ui/components/typography/h2";
|
||||
import { api } from "@/lib/api";
|
||||
import type { ProfileInfo } from "@/lib/api";
|
||||
import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { useConfirmDelete } from "@/hooks/useConfirmDelete";
|
||||
import { useToast } from "@nous-research/ui/hooks/use-toast";
|
||||
import { useConfirmDelete } from "@nous-research/ui/hooks/use-confirm-delete";
|
||||
import { useModalBehavior } from "@/hooks/useModalBehavior";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { Card, CardContent } from "@/components/ui/card";
|
||||
import { Toast } from "@nous-research/ui/ui/components/toast";
|
||||
import { Card, CardContent } from "@nous-research/ui/ui/components/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Input } from "@nous-research/ui/ui/components/input";
|
||||
import { Label } from "@nous-research/ui/ui/components/label";
|
||||
import { Checkbox } from "@nous-research/ui/ui/components/checkbox";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user