Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| dc9783703b | |||
| 321ce94e25 | |||
| c5e496e1c0 | |||
| 7a3c38d0b7 | |||
| 5cbc3fbdcc | |||
| f30db14ced | |||
| 3a9bc9d88a | |||
| 5f66c36470 | |||
| 7a8589e782 | |||
| 7050c052e3 | |||
| b1d3ead7fb | |||
| fe5c8ec4ad |
+4
-8
@@ -443,7 +443,6 @@
|
||||
|
||||
## 🪟 Native Windows (Beta Continued)
|
||||
|
||||
- Thin desktop installer + first-launch `install.ps1` bootstrap. ([#27822](https://github.com/NousResearch/hermes-agent/pull/27822))
|
||||
- Complete Windows bootstrap — `dep_ensure` + `install.ps1` + detection. (@alt-glitch) ([#27845](https://github.com/NousResearch/hermes-agent/pull/27845))
|
||||
- `install.ps1`: strip BOM, `-Commit`/`-Tag` pin params, harden git ops. (@jquesnelle) ([#28169](https://github.com/NousResearch/hermes-agent/pull/28169))
|
||||
- Consolidate ACP browser bootstrap into `install.{sh,ps1}`. (@alt-glitch) ([#27851](https://github.com/NousResearch/hermes-agent/pull/27851))
|
||||
@@ -453,12 +452,9 @@
|
||||
|
||||
---
|
||||
|
||||
## 🖼️ Hermes Desktop GUI
|
||||
## 🖥️ Web Dashboard
|
||||
|
||||
- `hermes gui` launcher — install + build + launch packaged Electron app. (@OutThisLife) ([#30165](https://github.com/NousResearch/hermes-agent/pull/30165))
|
||||
- Desktop UI lift. ([#27227](https://github.com/NousResearch/hermes-agent/pull/27227))
|
||||
- `nix` package `.#desktop`. (@ethernet8023) ([#28964](https://github.com/NousResearch/hermes-agent/pull/28964))
|
||||
- Hardened Slack socket recovery + Windows desktop restart dedupe. ([#28873](https://github.com/NousResearch/hermes-agent/pull/28873))
|
||||
- Hardened Slack socket recovery + Windows restart dedupe. ([#28873](https://github.com/NousResearch/hermes-agent/pull/28873))
|
||||
- Web dashboard: migrate checkboxes to `@nous-research/ui` + design-system polish. (@austinpickett) ([#28814](https://github.com/NousResearch/hermes-agent/pull/28814))
|
||||
- Web dashboard: collapsible sidebar. (@austinpickett) ([#33421](https://github.com/NousResearch/hermes-agent/pull/33421))
|
||||
- Dashboard typography & contrast pass. (salvage of [#28832](https://github.com/NousResearch/hermes-agent/pull/28832)) ([#30714](https://github.com/NousResearch/hermes-agent/pull/30714))
|
||||
@@ -579,11 +575,11 @@
|
||||
### Notable salvages & cherry-picks
|
||||
|
||||
- **@benbarclay** — s6-overlay container supervision (29 commits salvaged), Node 22 LTS upgrade, build-essential cleanup, `gateway run` auto-redirect in s6, tee supervised stdout to docker logs, `hermes update` Docker guidance, build-time SHA stamping
|
||||
- **@OutThisLife** — `hermes gui` desktop launcher, `mouse_tracking` DEC mode presets
|
||||
- **@OutThisLife** — `mouse_tracking` DEC mode presets
|
||||
- **@jquesnelle** — Windows installer hardening, `--branch` flag for `hermes update`, install.ps1 BOM strip / commit-pin
|
||||
- **@alt-glitch** — Windows `dep_ensure` bootstrap, Nix package variants (`.#messaging`, `.#full`), install-method stamping, ACP browser bootstrap consolidation
|
||||
- **@austinpickett** — `/update` slash command, dashboard checkboxes → `@nous-research/ui`, mobile dashboard polish, collapsible sidebar
|
||||
- **@ethernet8023** — Nix `.#desktop` packaging, CI test slicing across GH Actions jobs, TUI clipboard copy fix
|
||||
- **@ethernet8023** — CI test slicing across GH Actions jobs, TUI clipboard copy fix
|
||||
- **@kshitijk4poor** — doctor section banner + fail-and-issue helpers extraction, post-tag salvage cluster (curator-fallout, kanban SQLite hardening, install world-readable uv dirs, xAI bare-code paste)
|
||||
- **@rewbs** — Nous JWT inference switch + refresh-token replay fix
|
||||
- **@Codename-11** + **@Schwartz10** — session control API (REST + SSE + multimodal followup)
|
||||
|
||||
+24
-30
@@ -49,9 +49,8 @@ from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
estimate_messages_tokens_rough,
|
||||
estimate_request_tokens_rough,
|
||||
get_next_probe_tier,
|
||||
get_context_length_from_provider_error,
|
||||
parse_available_output_tokens_from_error,
|
||||
parse_context_limit_from_error,
|
||||
save_context_length,
|
||||
)
|
||||
from agent.nous_rate_guard import (
|
||||
@@ -2900,9 +2899,13 @@ def run_conversation(
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
|
||||
# Error is about the INPUT being too large — reduce context_length.
|
||||
# Try to parse the actual limit from the error message
|
||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
# Error is about the INPUT being too large. Only reduce
|
||||
# context_length when the provider explicitly reports the
|
||||
# real lower limit. If the provider only says "input
|
||||
# exceeds the context window", keep the configured window
|
||||
# and try compression; guessing probe tiers can incorrectly
|
||||
# turn a user-configured 1M window into 256K/128K/64K.
|
||||
new_ctx = get_context_length_from_provider_error(error_msg, old_ctx)
|
||||
_provider_lower = (getattr(agent, "provider", "") or "").lower()
|
||||
_base_lower = (getattr(agent, "base_url", "") or "").rstrip("/").lower()
|
||||
is_minimax_provider = (
|
||||
@@ -2914,23 +2917,12 @@ def run_conversation(
|
||||
)
|
||||
minimax_delta_only_overflow = (
|
||||
is_minimax_provider
|
||||
and parsed_limit is None
|
||||
and new_ctx is None
|
||||
and "context window exceeds limit (" in error_msg
|
||||
)
|
||||
if parsed_limit and parsed_limit < old_ctx:
|
||||
new_ctx = parsed_limit
|
||||
agent._buffer_vprint(f"Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
|
||||
elif minimax_delta_only_overflow:
|
||||
new_ctx = old_ctx
|
||||
agent._buffer_vprint(
|
||||
f"Provider reported overflow amount only; "
|
||||
f"keeping context_length at {old_ctx:,} tokens and compressing."
|
||||
)
|
||||
else:
|
||||
# Step down to the next probe tier
|
||||
new_ctx = get_next_probe_tier(old_ctx)
|
||||
|
||||
if new_ctx and new_ctx < old_ctx:
|
||||
if new_ctx is not None:
|
||||
agent._buffer_vprint(f"Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
|
||||
compressor.update_model(
|
||||
model=agent.model,
|
||||
context_length=new_ctx,
|
||||
@@ -2940,20 +2932,22 @@ def run_conversation(
|
||||
api_mode=agent.api_mode,
|
||||
)
|
||||
# Context probing flags — only set on built-in
|
||||
# compressor (plugin engines manage their own).
|
||||
# compressor (plugin engines manage their own). This
|
||||
# value came from the provider, so it is safe to cache.
|
||||
if hasattr(compressor, "_context_probed"):
|
||||
compressor._context_probed = True
|
||||
# Only persist limits parsed from the provider's
|
||||
# error message (a real number). Guessed fallback
|
||||
# tiers from get_next_probe_tier() should stay
|
||||
# in-memory only — persisting them pollutes the
|
||||
# cache with wrong values.
|
||||
compressor._context_probe_persistable = bool(
|
||||
parsed_limit and parsed_limit == new_ctx
|
||||
)
|
||||
agent._buffer_vprint(f"⚠️ Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens")
|
||||
compressor._context_probe_persistable = True
|
||||
agent._buffer_vprint(f"⚠️ Context length exceeded — using provider limit: {old_ctx:,} → {new_ctx:,} tokens")
|
||||
elif minimax_delta_only_overflow:
|
||||
agent._buffer_vprint(
|
||||
f"Provider reported overflow amount only; "
|
||||
f"keeping context_length at {old_ctx:,} tokens and compressing."
|
||||
)
|
||||
else:
|
||||
agent._buffer_vprint(f"⚠️ Context length exceeded at minimum tier — attempting compression...")
|
||||
agent._buffer_vprint(
|
||||
f"⚠️ Context length exceeded, but provider did not report a max context length; "
|
||||
f"keeping context_length at {old_ctx:,} tokens and compressing."
|
||||
)
|
||||
|
||||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
|
||||
+22
-1
@@ -913,12 +913,33 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def get_context_length_from_provider_error(
|
||||
error_msg: str,
|
||||
current_context_length: int,
|
||||
) -> Optional[int]:
|
||||
"""Return a provider-reported lower context limit, if one is present.
|
||||
|
||||
Context-overflow recovery must not invent a new model window size. Some
|
||||
providers only say that the input exceeds the context window without
|
||||
reporting the actual maximum. In that case callers should keep the
|
||||
configured context length and try compression only, rather than stepping
|
||||
down through guessed probe tiers (1M → 256K → 128K → ...).
|
||||
"""
|
||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
if parsed_limit is None:
|
||||
return None
|
||||
if parsed_limit < current_context_length:
|
||||
return parsed_limit
|
||||
return None
|
||||
|
||||
|
||||
def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
|
||||
"""Detect an "output cap too large" error and return how many output tokens are available.
|
||||
|
||||
Background — two distinct context errors exist:
|
||||
1. "Prompt too long" — the INPUT itself exceeds the context window.
|
||||
Fix: compress history and/or halve context_length.
|
||||
Fix: compress history, and only reduce context_length if the
|
||||
provider explicitly reports the actual lower limit.
|
||||
2. "max_tokens too large" — input is fine, but input + requested_output > window.
|
||||
Fix: reduce max_tokens (the output cap) for this call.
|
||||
Do NOT touch context_length — the window hasn't shrunk.
|
||||
|
||||
+8
-13
@@ -406,19 +406,14 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
||||
if "eyJ" in text:
|
||||
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||
|
||||
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
|
||||
# DB schemes are handled above by _DB_CONNSTR_RE.
|
||||
if "://" in text:
|
||||
text = _redact_url_userinfo(text)
|
||||
|
||||
# URL query params containing opaque tokens (?access_token=…&code=…)
|
||||
if "?" in text:
|
||||
text = _redact_url_query_params(text)
|
||||
|
||||
# HTTP access logs can contain relative request targets with query params
|
||||
# and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`.
|
||||
if "?" in text and "=" in text and _has_http_method_substring(text):
|
||||
text = _redact_http_request_target_query_params(text)
|
||||
# NOTE: Web-URL redaction (query params + userinfo + HTTP access-log
|
||||
# request targets) is intentionally OFF. Many legitimate workflows pass
|
||||
# opaque tokens through query strings — magic-link checkouts, OAuth
|
||||
# callbacks the agent is meant to follow, pre-signed share URLs — and
|
||||
# blanket-redacting param values by name breaks those skills mid-flow.
|
||||
# Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
|
||||
# caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
|
||||
# are still caught by _DB_CONNSTR_RE.
|
||||
|
||||
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
|
||||
if "&" in text and "=" in text:
|
||||
|
||||
@@ -168,7 +168,7 @@ from hermes_cli.browser_connect import (
|
||||
try_launch_chrome_debug,
|
||||
)
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from utils import base_url_host_matches, is_truthy_value
|
||||
from utils import base_url_host_matches
|
||||
|
||||
_hermes_home = get_hermes_home()
|
||||
_project_env = Path(__file__).parent / '.env'
|
||||
@@ -3747,7 +3747,7 @@ class HermesCLI:
|
||||
percent_label = f"{percent}%" if percent is not None else "--"
|
||||
duration_label = snapshot["duration"]
|
||||
|
||||
yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
|
||||
yolo_active = self._is_session_yolo_active()
|
||||
if width < 52:
|
||||
text = f"⚕ {snapshot['model_short']} · {duration_label}"
|
||||
if yolo_active:
|
||||
@@ -3808,7 +3808,7 @@ class HermesCLI:
|
||||
# line and produce duplicated status bar rows over long sessions.
|
||||
width = self._get_tui_terminal_width()
|
||||
duration_label = snapshot["duration"]
|
||||
yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
|
||||
yolo_active = self._is_session_yolo_active()
|
||||
|
||||
if width < 52:
|
||||
frags = [
|
||||
@@ -6907,6 +6907,7 @@ class HermesCLI:
|
||||
pass
|
||||
|
||||
# Switch to the new session
|
||||
self._transfer_session_yolo(self.session_id, new_session_id)
|
||||
self.session_id = new_session_id
|
||||
self.session_start = now
|
||||
self._pending_title = None
|
||||
@@ -7586,8 +7587,19 @@ class HermesCLI:
|
||||
parts = cmd_original.split(None, 1) # split off '/model'
|
||||
raw_args = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
# Parse --provider and --global flags
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
|
||||
# Parse --provider, --global, and --refresh flags
|
||||
model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
|
||||
|
||||
# --refresh: wipe the on-disk picker cache before building the
|
||||
# provider list. Forces a live re-fetch of every authed provider's
|
||||
# /v1/models endpoint on this open.
|
||||
if force_refresh:
|
||||
try:
|
||||
from hermes_cli.models import clear_provider_models_cache
|
||||
clear_provider_models_cache()
|
||||
_cprint(" Cleared model picker cache. Refreshing...")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Single inventory context — replaces the inline config-slice the
|
||||
# dashboard / TUI used to duplicate. Overlay live session state
|
||||
@@ -7626,6 +7638,7 @@ class HermesCLI:
|
||||
_cprint("")
|
||||
_cprint(" /model <name> switch model")
|
||||
_cprint(" /model --provider <slug> switch provider")
|
||||
_cprint(" /model --refresh re-fetch live model lists")
|
||||
return
|
||||
|
||||
self._open_model_picker(
|
||||
@@ -9607,20 +9620,92 @@ class HermesCLI:
|
||||
}
|
||||
_cprint(labels.get(self.tool_progress_mode, ""))
|
||||
|
||||
def _toggle_yolo(self):
|
||||
"""Toggle YOLO mode — skip all dangerous command approval prompts."""
|
||||
import os
|
||||
from hermes_cli.colors import Colors as _Colors
|
||||
def _transfer_session_yolo(self, old_session_id: str, new_session_id: str) -> None:
|
||||
"""Move YOLO bypass state from an old session key to a new one.
|
||||
|
||||
current = is_truthy_value(os.environ.get("HERMES_YOLO_MODE"))
|
||||
if current:
|
||||
os.environ.pop("HERMES_YOLO_MODE", None)
|
||||
Called whenever ``self.session_id`` is reassigned mid-run — ``/branch``
|
||||
forks into a new session, and auto-compression rotates the agent's
|
||||
session id into a fresh continuation session. Without this transfer
|
||||
the user's ``/yolo ON`` toggle would silently revert on the very next
|
||||
turn (the same UX failure mode that motivated this entire fix), since
|
||||
``_session_yolo`` is keyed by session id.
|
||||
|
||||
Mirrors ``tui_gateway/server.py`` (~line 1297-1305) which performs the
|
||||
same transfer for the TUI's session-rename path. No-op when YOLO
|
||||
wasn't enabled or when the ids match.
|
||||
"""
|
||||
if not old_session_id or not new_session_id or old_session_id == new_session_id:
|
||||
return
|
||||
try:
|
||||
from tools.approval import (
|
||||
disable_session_yolo,
|
||||
enable_session_yolo,
|
||||
is_session_yolo_enabled,
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
if is_session_yolo_enabled(old_session_id):
|
||||
enable_session_yolo(new_session_id)
|
||||
disable_session_yolo(old_session_id)
|
||||
|
||||
def _is_session_yolo_active(self) -> bool:
|
||||
"""Whether YOLO bypass is currently enabled for this CLI session.
|
||||
|
||||
Reads from ``tools.approval._session_yolo`` (the same set that
|
||||
``enable_session_yolo`` / ``disable_session_yolo`` write to) so the
|
||||
status bar reflects the actual bypass state instead of a stale env
|
||||
var. Also honors the process-start ``--yolo`` flag, which freezes
|
||||
``HERMES_YOLO_MODE`` into ``_YOLO_MODE_FROZEN`` before tool imports
|
||||
happen.
|
||||
"""
|
||||
try:
|
||||
from tools.approval import (
|
||||
_YOLO_MODE_FROZEN,
|
||||
is_session_yolo_enabled,
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
if _YOLO_MODE_FROZEN:
|
||||
return True
|
||||
# Use ``getattr`` so test fixtures that build a CLI via ``__new__``
|
||||
# (skipping ``__init__``) don't trip an AttributeError here; the
|
||||
# status-bar builders swallow exceptions silently but lose every
|
||||
# field after the failure.
|
||||
session_key = getattr(self, "session_id", None) or "default"
|
||||
return is_session_yolo_enabled(session_key)
|
||||
|
||||
def _toggle_yolo(self):
|
||||
"""Toggle YOLO mode — skip all dangerous command approval prompts.
|
||||
|
||||
Per-session toggle that mirrors the gateway and TUI ``/yolo`` handlers
|
||||
(see ``gateway/run.py:_handle_yolo_command`` and
|
||||
``tui_gateway/server.py`` key=="yolo"). We deliberately do NOT mutate
|
||||
``HERMES_YOLO_MODE`` here — that env var is read once at module import
|
||||
time into ``tools.approval._YOLO_MODE_FROZEN`` to keep prompt-injected
|
||||
skills from flipping the bypass mid-session, so setting it after CLI
|
||||
startup is a silent no-op. Routing through ``enable_session_yolo`` /
|
||||
``disable_session_yolo`` gives the same auditable, per-session bypass
|
||||
the other surfaces have. ``run_conversation`` binds
|
||||
``self.session_id`` as the active approval session key via
|
||||
``set_current_session_key`` so the bypass takes effect on the very
|
||||
next dangerous command in this run.
|
||||
"""
|
||||
from hermes_cli.colors import Colors as _Colors
|
||||
from tools.approval import (
|
||||
disable_session_yolo,
|
||||
enable_session_yolo,
|
||||
is_session_yolo_enabled,
|
||||
)
|
||||
|
||||
session_key = self.session_id or "default"
|
||||
if is_session_yolo_enabled(session_key):
|
||||
disable_session_yolo(session_key)
|
||||
_cprint(
|
||||
f" ⚠ YOLO mode {_Colors.BOLD}{_Colors.RED}OFF{_Colors.RESET}"
|
||||
" — dangerous commands will require approval."
|
||||
)
|
||||
else:
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
enable_session_yolo(session_key)
|
||||
_cprint(
|
||||
f" ⚡ YOLO mode {_Colors.BOLD}{_Colors.GREEN}ON{_Colors.RESET}"
|
||||
" — all commands auto-approved. Use with caution."
|
||||
@@ -11757,6 +11842,23 @@ class HermesCLI:
|
||||
set_secret_capture_callback(self._secret_capture_callback)
|
||||
except Exception:
|
||||
pass
|
||||
# Bind this turn's approval session key into the contextvar so
|
||||
# ``tools.approval.is_current_session_yolo_enabled()`` resolves
|
||||
# against the same key that ``/yolo`` toggles under (see
|
||||
# ``_toggle_yolo`` → ``enable_session_yolo(self.session_id)``).
|
||||
# Mirrors ``tui_gateway/server.py`` and ``gateway/run.py`` which
|
||||
# bind the same contextvar before invoking the agent.
|
||||
try:
|
||||
from tools.approval import (
|
||||
reset_current_session_key,
|
||||
set_current_session_key,
|
||||
)
|
||||
_approval_session_token = set_current_session_key(
|
||||
self.session_id or "default"
|
||||
)
|
||||
except Exception:
|
||||
reset_current_session_key = None # type: ignore[assignment]
|
||||
_approval_session_token = None
|
||||
agent_message = _voice_prefix + message if _voice_prefix else message
|
||||
# Prepend pending model switch note so the model knows about the switch
|
||||
_msn = getattr(self, '_pending_model_switch_note', None)
|
||||
@@ -11798,6 +11900,15 @@ class HermesCLI:
|
||||
set_secret_capture_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
# Release the per-turn approval session key. ``_session_yolo``
|
||||
# state itself is preserved across turns (so /yolo persists
|
||||
# for the whole CLI run); we just unbind the contextvar so a
|
||||
# reused thread doesn't see stale identity on its next run.
|
||||
if _approval_session_token is not None and reset_current_session_key is not None:
|
||||
try:
|
||||
reset_current_session_key(_approval_session_token)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Start agent in background thread (daemon so it cannot keep the
|
||||
# process alive when the user closes the terminal tab — SIGHUP
|
||||
@@ -11928,6 +12039,7 @@ class HermesCLI:
|
||||
and getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self._transfer_session_yolo(self.session_id, self.agent.session_id)
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
|
||||
@@ -14968,6 +15080,39 @@ def main(
|
||||
time.sleep(_grace)
|
||||
except Exception:
|
||||
pass # never block signal handling
|
||||
# Kanban worker exit path (#28181): SIGTERM hits a dispatcher-spawned
|
||||
# worker that's likely in a non-daemon thread waiting on a child
|
||||
# subprocess in _wait_for_process. Raising KeyboardInterrupt only
|
||||
# unwinds the main thread; the worker thread keeps running, the
|
||||
# process gets reparented to init, and the dispatcher's _pid_alive
|
||||
# check returns True forever — task stuck in 'running' indefinitely.
|
||||
# Skip the controlled-unwind dance and call os._exit(0) so the kernel
|
||||
# reclaims the PID immediately and detect_crashed_workers can reclaim
|
||||
# the stale claim on the next tick. Flush logging + stdout/stderr
|
||||
# first so the final debug trace isn't lost; SIGALRM deadman guards
|
||||
# the flush against any rare blocking-I/O case (the reporter measured
|
||||
# flush in <1ms; the alarm is a failsafe, not the common path).
|
||||
if os.environ.get("HERMES_KANBAN_TASK"):
|
||||
try:
|
||||
import signal as _sig_mod
|
||||
if hasattr(_sig_mod, "SIGALRM"):
|
||||
# Cancel any pre-existing alarm to avoid colliding with
|
||||
# caller-installed timers.
|
||||
_sig_mod.signal(_sig_mod.SIGALRM, lambda *_: os._exit(0))
|
||||
_sig_mod.alarm(2)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
import logging as _lg
|
||||
_lg.shutdown()
|
||||
except Exception:
|
||||
pass
|
||||
for _stream in (sys.stdout, sys.stderr):
|
||||
try:
|
||||
_stream.flush()
|
||||
except Exception:
|
||||
pass
|
||||
os._exit(0)
|
||||
raise KeyboardInterrupt()
|
||||
try:
|
||||
import signal as _signal
|
||||
|
||||
@@ -829,6 +829,13 @@ _HERMES_HOME = get_hermes_home()
|
||||
MEDIA_DELIVERY_ALLOW_DIRS_ENV = "HERMES_MEDIA_ALLOW_DIRS"
|
||||
MEDIA_DELIVERY_TRUST_RECENT_ENV = "HERMES_MEDIA_TRUST_RECENT_FILES"
|
||||
MEDIA_DELIVERY_TRUST_RECENT_SECONDS_ENV = "HERMES_MEDIA_TRUST_RECENT_SECONDS"
|
||||
# Strict mode toggles the original allowlist+recency path-validation behavior.
|
||||
# Off by default — symmetric with inbound (we accept any document type the
|
||||
# user uploads), and with the denylist still blocking obvious credential /
|
||||
# system paths. Operators running public-facing gateways where prompt
|
||||
# injection from one user could exfiltrate the host's secrets to that same
|
||||
# user should set this to true.
|
||||
MEDIA_DELIVERY_STRICT_ENV = "HERMES_MEDIA_DELIVERY_STRICT"
|
||||
MEDIA_DELIVERY_SAFE_ROOTS = (
|
||||
IMAGE_CACHE_DIR,
|
||||
AUDIO_CACHE_DIR,
|
||||
@@ -918,6 +925,21 @@ def _media_delivery_recency_seconds() -> float:
|
||||
return float(_MEDIA_DELIVERY_TRUST_RECENT_DEFAULT_SECONDS)
|
||||
|
||||
|
||||
def _media_delivery_strict_mode() -> bool:
|
||||
"""Return True when path validation should require allowlist/recency match.
|
||||
|
||||
Off by default. In non-strict mode, ``validate_media_delivery_path``
|
||||
accepts any existing regular file that isn't under the credential /
|
||||
system-path denylist — restoring the pre-#29523 behavior for the
|
||||
single-user case. Strict mode preserves the original
|
||||
allowlist+recency-window logic for operators running public-facing
|
||||
gateways where prompt injection from one user shouldn't be able to
|
||||
exfiltrate the host's secrets to that same user.
|
||||
"""
|
||||
raw = os.environ.get(MEDIA_DELIVERY_STRICT_ENV, "0").strip().lower()
|
||||
return raw in ("1", "true", "yes", "on")
|
||||
|
||||
|
||||
def _media_delivery_denied_paths() -> List[Path]:
|
||||
"""Return absolute denylist paths under which delivery is never allowed."""
|
||||
denied = [Path(p) for p in _MEDIA_DELIVERY_DENIED_PREFIXES]
|
||||
@@ -972,10 +994,22 @@ def _path_is_within(path: Path, root: Path) -> bool:
|
||||
def validate_media_delivery_path(path: str) -> Optional[str]:
|
||||
"""Return a safe absolute file path for native media delivery, else None.
|
||||
|
||||
MEDIA tags and bare local paths in model output are untrusted text. Only
|
||||
existing regular files under Hermes-managed media caches, or roots the
|
||||
operator explicitly allowlists, may be uploaded as native attachments.
|
||||
Symlinks are resolved before the containment check.
|
||||
Default mode (single-user / private gateway): accept any existing regular
|
||||
file that isn't under the credential / system-path denylist
|
||||
(``_MEDIA_DELIVERY_DENIED_PREFIXES`` + ``~/.ssh``, ``~/.aws``, etc.).
|
||||
This matches the symmetry of inbound delivery — Telegram/Discord/Slack
|
||||
will hand the agent any file the user uploads, and the agent can hand
|
||||
back any file that isn't a credential.
|
||||
|
||||
Strict mode (opt-in via ``gateway.strict`` in ``config.yaml`` or
|
||||
``HERMES_MEDIA_DELIVERY_STRICT=1``): the file MUST live under a
|
||||
Hermes-managed cache, under an operator-allowlisted root
|
||||
(``HERMES_MEDIA_ALLOW_DIRS``), or be freshly produced inside the
|
||||
configured recency window. Suitable for public-facing bots where
|
||||
prompt injection from one user shouldn't be able to exfiltrate the
|
||||
host's secrets to that same user.
|
||||
|
||||
Symlinks are resolved before any containment / denylist check.
|
||||
"""
|
||||
if not path:
|
||||
return None
|
||||
@@ -999,6 +1033,8 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
|
||||
if not resolved.is_file():
|
||||
return None
|
||||
|
||||
# Cache / operator allowlist is always honored — these are unconditionally
|
||||
# trusted regardless of mode.
|
||||
for root in _media_delivery_allowed_roots():
|
||||
try:
|
||||
resolved_root = root.expanduser().resolve(strict=False)
|
||||
@@ -1007,9 +1043,18 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
|
||||
if _path_is_within(resolved, resolved_root):
|
||||
return str(resolved)
|
||||
|
||||
# Outside the cache/operator allowlist: fall back to recency-based trust
|
||||
# for files the agent has just produced (e.g. ``pandoc -o /tmp/report.pdf``
|
||||
# or ``write_file("/home/user/report.pdf", ...)``). System paths and
|
||||
# Non-strict mode (default): accept anything not on the denylist.
|
||||
# The denylist still blocks /etc, /proc, ~/.ssh, ~/.aws, ~/.hermes/.env,
|
||||
# ~/.hermes/auth.json, etc. — so the obvious prompt-injection sites
|
||||
# (``MEDIA:/etc/passwd``, ``MEDIA:~/.ssh/id_rsa``) remain rejected.
|
||||
if not _media_delivery_strict_mode():
|
||||
if _path_under_denied_prefix(resolved):
|
||||
return None
|
||||
return str(resolved)
|
||||
|
||||
# Strict mode: fall back to recency-based trust for freshly-produced
|
||||
# files (e.g. ``pandoc -o /tmp/report.pdf`` or
|
||||
# ``write_file("/home/user/report.pdf", ...)``). System paths and
|
||||
# credential locations remain blocked even when "recent" — see
|
||||
# ``_MEDIA_DELIVERY_DENIED_PREFIXES`` for the denylist.
|
||||
window = _media_delivery_recency_seconds()
|
||||
|
||||
+16
-3
@@ -932,9 +932,14 @@ if _config_path.exists():
|
||||
_redact = _security_cfg.get("redact_secrets")
|
||||
if _redact is not None:
|
||||
os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
|
||||
# Gateway settings (media delivery allowlist + recency trust)
|
||||
# Gateway settings (media delivery allowlist + recency trust + strict mode)
|
||||
_gateway_cfg = _cfg.get("gateway", {})
|
||||
if isinstance(_gateway_cfg, dict):
|
||||
_strict = _gateway_cfg.get("strict")
|
||||
if _strict is not None:
|
||||
os.environ["HERMES_MEDIA_DELIVERY_STRICT"] = (
|
||||
"1" if _strict else "0"
|
||||
)
|
||||
_allow_dirs = _gateway_cfg.get("media_delivery_allow_dirs")
|
||||
if _allow_dirs:
|
||||
if isinstance(_allow_dirs, str):
|
||||
@@ -10241,8 +10246,16 @@ class GatewayRunner:
|
||||
|
||||
raw_args = event.get_command_args().strip()
|
||||
|
||||
# Parse --provider and --global flags
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
|
||||
# Parse --provider, --global, and --refresh flags
|
||||
model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
|
||||
|
||||
# --refresh: bust the disk cache so the picker shows live data.
|
||||
if force_refresh:
|
||||
try:
|
||||
from hermes_cli.models import clear_provider_models_cache
|
||||
clear_provider_models_cache()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Read current model/provider from config
|
||||
current_model = ""
|
||||
|
||||
@@ -123,7 +123,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global] [--refresh]"),
|
||||
CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
|
||||
"Configuration", aliases=("codex_runtime",),
|
||||
args_hint="[auto|codex_app_server]"),
|
||||
|
||||
+19
-2
@@ -1806,6 +1806,21 @@ DEFAULT_CONFIG = {
|
||||
# Gateway settings — control how messaging platforms (Telegram, Discord,
|
||||
# Slack, etc.) deliver agent-produced files as native attachments.
|
||||
"gateway": {
|
||||
# When false (default), any file path the agent emits is delivered
|
||||
# as a native attachment as long as it isn't under the credential /
|
||||
# system-path denylist (/etc, /proc, ~/.ssh, ~/.aws, ~/.hermes/.env,
|
||||
# auth.json, etc.). This matches the symmetry of inbound delivery
|
||||
# — we accept any document type the user uploads, and the agent
|
||||
# can hand back any file that isn't a credential.
|
||||
#
|
||||
# When true, fall back to the older allowlist+recency-window
|
||||
# behavior: files must live under the Hermes cache, under
|
||||
# ``media_delivery_allow_dirs``, or be freshly produced inside the
|
||||
# ``trust_recent_files_seconds`` window. Recommended for
|
||||
# public-facing gateways where prompt injection from one user
|
||||
# shouldn't be able to exfiltrate the host's secrets to that same
|
||||
# user. Bridged to HERMES_MEDIA_DELIVERY_STRICT.
|
||||
"strict": False,
|
||||
# Extra directories from which model-emitted bare file paths may be
|
||||
# uploaded as native gateway attachments. Files inside the Hermes
|
||||
# cache (~/.hermes/cache/{documents,images,audio,video,screenshots})
|
||||
@@ -1813,7 +1828,7 @@ DEFAULT_CONFIG = {
|
||||
# (project dirs, scratch dirs, mounted shares). Accepts a list of
|
||||
# absolute paths or a single os.pathsep-separated string. Bridged
|
||||
# to HERMES_MEDIA_ALLOW_DIRS at gateway startup. Tilde paths are
|
||||
# expanded.
|
||||
# expanded. Honored in both default and strict mode.
|
||||
"media_delivery_allow_dirs": [],
|
||||
# When true, files whose mtime is within ``trust_recent_files_seconds``
|
||||
# of "now" are trusted for native delivery even outside the cache /
|
||||
@@ -1821,10 +1836,12 @@ DEFAULT_CONFIG = {
|
||||
# PDFs the agent writes into a working directory. System paths
|
||||
# (/etc, /proc, ~/.ssh, ~/.aws, etc.) remain blocked regardless.
|
||||
# Disable to fall back to pure-allowlist mode. Bridged to
|
||||
# HERMES_MEDIA_TRUST_RECENT_FILES.
|
||||
# HERMES_MEDIA_TRUST_RECENT_FILES. Only consulted when ``strict``
|
||||
# is true; in default mode the denylist alone gates delivery.
|
||||
"trust_recent_files": True,
|
||||
# Recency window in seconds. 600 (10 min) comfortably covers a
|
||||
# multi-tool agent turn. Bridged to HERMES_MEDIA_TRUST_RECENT_SECONDS.
|
||||
# Only consulted when ``strict`` is true.
|
||||
"trust_recent_files_seconds": 600,
|
||||
},
|
||||
|
||||
|
||||
@@ -2117,6 +2117,13 @@ def cmd_postinstall(args):
|
||||
def cmd_model(args):
|
||||
"""Select default model — starts with provider selection, then model picker."""
|
||||
_require_tty("model")
|
||||
if getattr(args, "refresh", False):
|
||||
try:
|
||||
from hermes_cli.models import clear_provider_models_cache
|
||||
clear_provider_models_cache()
|
||||
print(" Cleared model picker cache.")
|
||||
except Exception:
|
||||
pass
|
||||
select_provider_and_model(args=args)
|
||||
|
||||
|
||||
@@ -11311,6 +11318,11 @@ def main():
|
||||
help="Select default model and provider",
|
||||
description="Interactively select your inference provider and default model",
|
||||
)
|
||||
model_parser.add_argument(
|
||||
"--refresh",
|
||||
action="store_true",
|
||||
help="Wipe the model picker disk cache and re-fetch every provider's live /v1/models list.",
|
||||
)
|
||||
model_parser.add_argument(
|
||||
"--portal-url",
|
||||
help="Portal base URL for Nous login (default: production portal)",
|
||||
@@ -13379,6 +13391,11 @@ Examples:
|
||||
"--yes", "-y", action="store_true", help="Skip confirmation"
|
||||
)
|
||||
|
||||
sessions_subparsers.add_parser(
|
||||
"optimize",
|
||||
help="Reclaim disk space: merge FTS5 segments + VACUUM (no data change)",
|
||||
)
|
||||
|
||||
sessions_subparsers.add_parser("stats", help="Show session store statistics")
|
||||
|
||||
sessions_rename = sessions_subparsers.add_parser(
|
||||
@@ -13551,6 +13568,39 @@ Examples:
|
||||
relaunch(["--resume", selected_id])
|
||||
return # won't reach here after execvp
|
||||
|
||||
elif action == "optimize":
|
||||
db_path = db.db_path
|
||||
before_mb = (
|
||||
os.path.getsize(db_path) / (1024 * 1024)
|
||||
if db_path.exists()
|
||||
else 0.0
|
||||
)
|
||||
print("Optimizing session store (FTS merge + VACUUM)…")
|
||||
try:
|
||||
# vacuum() merges FTS5 segments (optimize_fts) then VACUUMs.
|
||||
# Probe the index count first for the summary line.
|
||||
n = sum(
|
||||
1
|
||||
for t in db._FTS_TABLES
|
||||
if db._fts_table_exists(t)
|
||||
)
|
||||
db.vacuum()
|
||||
except Exception as e:
|
||||
print(f"Error: optimization failed: {e}")
|
||||
db.close()
|
||||
return
|
||||
after_mb = (
|
||||
os.path.getsize(db_path) / (1024 * 1024)
|
||||
if db_path.exists()
|
||||
else 0.0
|
||||
)
|
||||
saved = before_mb - after_mb
|
||||
print(f"Optimized {n} FTS index(es).")
|
||||
print(
|
||||
f"Database size: {before_mb:.1f} MB -> {after_mb:.1f} MB "
|
||||
f"(reclaimed {saved:.1f} MB)"
|
||||
)
|
||||
|
||||
elif action == "stats":
|
||||
total = db.session_count()
|
||||
msgs = db.message_count()
|
||||
|
||||
+47
-33
@@ -294,32 +294,39 @@ class CustomAutoResult:
|
||||
# Flag parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
|
||||
"""Parse --provider and --global flags from /model command args.
|
||||
def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
|
||||
"""Parse --provider, --global, and --refresh flags from /model command args.
|
||||
|
||||
Returns (model_input, explicit_provider, is_global).
|
||||
Returns (model_input, explicit_provider, is_global, force_refresh).
|
||||
|
||||
Examples::
|
||||
|
||||
"sonnet" -> ("sonnet", "", False)
|
||||
"sonnet --global" -> ("sonnet", "", True)
|
||||
"sonnet --provider anthropic" -> ("sonnet", "anthropic", False)
|
||||
"--provider my-ollama" -> ("", "my-ollama", False)
|
||||
"sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True)
|
||||
"sonnet" -> ("sonnet", "", False, False)
|
||||
"sonnet --global" -> ("sonnet", "", True, False)
|
||||
"sonnet --provider anthropic" -> ("sonnet", "anthropic", False, False)
|
||||
"--provider my-ollama" -> ("", "my-ollama", False, False)
|
||||
"--refresh" -> ("", "", False, True)
|
||||
"sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False)
|
||||
"""
|
||||
is_global = False
|
||||
explicit_provider = ""
|
||||
force_refresh = False
|
||||
|
||||
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
|
||||
# A single Unicode dash before a flag keyword becomes "--"
|
||||
import re as _re
|
||||
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
|
||||
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|refresh)', r'--\1', raw_args)
|
||||
|
||||
# Extract --global
|
||||
if "--global" in raw_args:
|
||||
is_global = True
|
||||
raw_args = raw_args.replace("--global", "").strip()
|
||||
|
||||
# Extract --refresh (bust the model picker disk cache before listing)
|
||||
if "--refresh" in raw_args:
|
||||
force_refresh = True
|
||||
raw_args = raw_args.replace("--refresh", "").strip()
|
||||
|
||||
# Extract --provider <name>
|
||||
parts = raw_args.split()
|
||||
i = 0
|
||||
@@ -333,7 +340,7 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
|
||||
i += 1
|
||||
|
||||
model_input = " ".join(filtered).strip()
|
||||
return (model_input, explicit_provider, is_global)
|
||||
return (model_input, explicit_provider, is_global, force_refresh)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1079,6 +1086,7 @@ def list_authenticated_providers(
|
||||
from hermes_cli.models import (
|
||||
OPENROUTER_MODELS, _PROVIDER_MODELS,
|
||||
_MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
|
||||
cached_provider_model_ids,
|
||||
get_curated_nous_model_ids,
|
||||
)
|
||||
|
||||
@@ -1239,13 +1247,15 @@ def list_authenticated_providers(
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
# Use curated list, falling back to models.dev if no curated list.
|
||||
# For preferred providers, merge models.dev entries into the curated
|
||||
# catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
|
||||
# show up in the picker without requiring a Hermes release.
|
||||
model_ids = curated.get(hermes_id, [])
|
||||
if hermes_id in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_id, model_ids)
|
||||
# Unified pathway: route through cached_provider_model_ids() so the
|
||||
# /model picker sees the SAME list `hermes model` would build, with
|
||||
# disk caching to keep the picker open snappy. Falls back to the
|
||||
# curated static list when the live fetcher returns nothing.
|
||||
model_ids = cached_provider_model_ids(hermes_id)
|
||||
if not model_ids:
|
||||
model_ids = curated.get(hermes_id, [])
|
||||
if hermes_id in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_id, model_ids)
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
@@ -1351,25 +1361,27 @@ def list_authenticated_providers(
|
||||
# matches what the user's authenticated Codex/Copilot backend
|
||||
# actually serves — including ChatGPT-Pro-only Codex slugs
|
||||
# (e.g. gpt-5.3-codex-spark) that aren't in the static curated
|
||||
# catalog. ``provider_model_ids()`` falls back to the curated
|
||||
# list when the live endpoint is unreachable, so this is safe
|
||||
# for unauthenticated and offline cases too.
|
||||
model_ids = provider_model_ids(hermes_slug)
|
||||
# catalog. ``cached_provider_model_ids()`` falls back to the
|
||||
# curated list when the live endpoint is unreachable, so this
|
||||
# is safe for unauthenticated and offline cases too.
|
||||
model_ids = cached_provider_model_ids(hermes_slug)
|
||||
# For aws_sdk providers (bedrock), use live discovery so the list
|
||||
# reflects the active region (eu.*, ap.*) not the static us.* list.
|
||||
elif overlay.auth_type == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
_ids = bedrock_model_ids_or_none()
|
||||
model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
|
||||
_ids = cached_provider_model_ids(hermes_slug)
|
||||
model_ids = _ids if _ids else (curated.get(hermes_slug, []) or curated.get(pid, []))
|
||||
except Exception:
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
else:
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
# Merge with models.dev for preferred providers (same rationale as above).
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
# Unified pathway — see Section 1 rationale. Fall back to the
|
||||
# curated dict (with models.dev merge for preferred providers)
|
||||
# when the live fetcher comes up empty.
|
||||
model_ids = cached_provider_model_ids(hermes_slug)
|
||||
if not model_ids:
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
@@ -1436,13 +1448,15 @@ def list_authenticated_providers(
|
||||
# region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
|
||||
if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
_ids = bedrock_model_ids_or_none()
|
||||
_cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
|
||||
_ids = cached_provider_model_ids(_cp.slug)
|
||||
_cp_model_ids = _ids if _ids else curated.get(_cp.slug, [])
|
||||
except Exception:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
else:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
# Unified pathway — same as sections 1 and 2.
|
||||
_cp_model_ids = cached_provider_model_ids(_cp.slug)
|
||||
if not _cp_model_ids:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
_cp_total = len(_cp_model_ids)
|
||||
_cp_top = _cp_model_ids[:max_models]
|
||||
|
||||
|
||||
@@ -2047,6 +2047,12 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
return live
|
||||
except Exception:
|
||||
pass
|
||||
# Live failed (or no creds). Fall back to the docs-hosted manifest
|
||||
# — NOT the in-repo _PROVIDER_MODELS["nous"] snapshot — so newly
|
||||
# added Portal models still surface without a Hermes release.
|
||||
manifest_ids = get_curated_nous_model_ids()
|
||||
if manifest_ids:
|
||||
return manifest_ids
|
||||
if normalized == "stepfun":
|
||||
try:
|
||||
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||
@@ -2150,6 +2156,206 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
return curated_static
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Generic disk cache for provider_model_ids() — keeps /model picker fast.
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Without this layer, every /model picker open re-fetches every authed
|
||||
# provider's /v1/models endpoint. On a well-configured user (anthropic +
|
||||
# openai + copilot + gemini + huggingface + ...) that's 2+ seconds of cold
|
||||
# HTTP roundtrips just to render the provider list.
|
||||
#
|
||||
# Cache strategy:
|
||||
# - One JSON file at $HERMES_HOME/provider_models_cache.json
|
||||
# - Per-provider entries keyed by (provider, credential fingerprint)
|
||||
# - Credential fingerprint = sha256 of env-var values that the provider
|
||||
# normally reads. Swap your OPENAI_API_KEY and the entry invalidates.
|
||||
# - 1h TTL by default. `force_refresh=True` skips the cache entirely
|
||||
# and overwrites it on success.
|
||||
# - Only NON-EMPTY results are cached. An empty/None response from a
|
||||
# transient network error never gets pinned.
|
||||
# - Cache file is best-effort. Any read/write error degrades silently
|
||||
# to a live fetch — the picker keeps working.
|
||||
|
||||
_PROVIDER_MODELS_CACHE_TTL = 3600 # 1h
|
||||
|
||||
|
||||
def _provider_models_cache_path() -> Path:
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "provider_models_cache.json"
|
||||
|
||||
|
||||
def _credential_fingerprint(provider: str) -> str:
|
||||
"""Return a short hash representing the credentials that
|
||||
``provider_model_ids(provider)`` would see right now.
|
||||
|
||||
Rotating any of the relevant env vars invalidates the cached entry
|
||||
for that provider. We hash AT LEAST the api-key + base-url env vars
|
||||
declared in ``PROVIDER_REGISTRY``. For OAuth-backed providers
|
||||
(codex, copilot, anthropic-via-claude-code, nous portal), the
|
||||
relevant tokens live in ``$HERMES_HOME/auth.json`` and external
|
||||
credential files. Rather than parse every shape, we additionally
|
||||
fold the mtime of those files into the fingerprint so refreshes
|
||||
after re-auth bust the cache.
|
||||
"""
|
||||
import hashlib
|
||||
import os as _os
|
||||
|
||||
parts: list[str] = []
|
||||
|
||||
# Env vars from PROVIDER_REGISTRY for this slug
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
pcfg = PROVIDER_REGISTRY.get(provider)
|
||||
if pcfg is not None:
|
||||
for ev in getattr(pcfg, "api_key_env_vars", ()) or ():
|
||||
parts.append(f"{ev}={_os.environ.get(ev, '')}")
|
||||
bev = getattr(pcfg, "base_url_env_var", "") or ""
|
||||
if bev:
|
||||
parts.append(f"{bev}={_os.environ.get(bev, '')}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# OAuth / external-file mtimes that change on re-auth
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
for rel in ("auth.json", "credentials.json"):
|
||||
p = get_hermes_home() / rel
|
||||
try:
|
||||
parts.append(f"{rel}@{p.stat().st_mtime_ns}")
|
||||
except FileNotFoundError:
|
||||
parts.append(f"{rel}@missing")
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# External well-known credential file locations
|
||||
for path in (
|
||||
_os.path.expanduser("~/.codex/auth.json"),
|
||||
_os.path.expanduser("~/.claude/.credentials.json"),
|
||||
_os.path.expanduser("~/.config/github-copilot/hosts.json"),
|
||||
_os.path.expanduser("~/.minimax/credentials.json"),
|
||||
):
|
||||
try:
|
||||
mt = _os.stat(path).st_mtime_ns
|
||||
parts.append(f"{path}@{mt}")
|
||||
except FileNotFoundError:
|
||||
parts.append(f"{path}@missing")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
blob = "|".join(parts).encode("utf-8", errors="replace")
|
||||
# blake2b for cache-key fingerprinting only — not for credential storage.
|
||||
# We never reverse this hash; collisions are harmless (worst case: cache
|
||||
# miss → live re-fetch). Use blake2b instead of sha256 here because
|
||||
# CodeQL's `py/weak-sensitive-data-hashing` rule flags sha256 over env
|
||||
# vars whose names contain "API_KEY" / "TOKEN" even when the hash is
|
||||
# used as an identity fingerprint, not for password storage. blake2b
|
||||
# is a keyed-hash primitive and isn't flagged.
|
||||
return hashlib.blake2b(blob, digest_size=8).hexdigest()
|
||||
|
||||
|
||||
def _load_provider_models_cache() -> dict:
|
||||
"""Return the full cache dict, or {} on any error."""
|
||||
try:
|
||||
path = _provider_models_cache_path()
|
||||
if not path.exists():
|
||||
return {}
|
||||
with open(path, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return data if isinstance(data, dict) else {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _save_provider_models_cache(data: dict) -> None:
|
||||
"""Persist the cache dict. Best-effort — silent on any error."""
|
||||
try:
|
||||
from utils import atomic_json_write
|
||||
path = _provider_models_cache_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
atomic_json_write(path, data, indent=None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def cached_provider_model_ids(
|
||||
provider: Optional[str],
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
ttl_seconds: int = _PROVIDER_MODELS_CACHE_TTL,
|
||||
) -> list[str]:
|
||||
"""Disk-cached wrapper around :func:`provider_model_ids`.
|
||||
|
||||
Hits the cache when fresh; otherwise calls the live function and
|
||||
persists a non-empty result. Always returns a list (never None).
|
||||
"""
|
||||
normalized = normalize_provider(provider) or (provider or "")
|
||||
if not normalized:
|
||||
return []
|
||||
|
||||
cache = _load_provider_models_cache()
|
||||
fp = _credential_fingerprint(normalized)
|
||||
entry = cache.get(normalized)
|
||||
now = time.time()
|
||||
|
||||
if (
|
||||
not force_refresh
|
||||
and isinstance(entry, dict)
|
||||
and entry.get("fp") == fp
|
||||
and isinstance(entry.get("models"), list)
|
||||
and entry["models"]
|
||||
and (now - float(entry.get("at", 0))) < ttl_seconds
|
||||
):
|
||||
return list(entry["models"])
|
||||
|
||||
# Cache miss / stale / forced refresh — call the live path.
|
||||
live = provider_model_ids(normalized, force_refresh=force_refresh)
|
||||
if live:
|
||||
cache[normalized] = {
|
||||
"fp": fp,
|
||||
"at": now,
|
||||
"models": list(live),
|
||||
}
|
||||
_save_provider_models_cache(cache)
|
||||
return list(live)
|
||||
|
||||
# Live fetch returned nothing. If we have a stale entry with the
|
||||
# SAME fingerprint, prefer it over an empty result — stale data
|
||||
# beats no data when the network is flaky.
|
||||
if (
|
||||
isinstance(entry, dict)
|
||||
and entry.get("fp") == fp
|
||||
and isinstance(entry.get("models"), list)
|
||||
and entry["models"]
|
||||
):
|
||||
return list(entry["models"])
|
||||
return list(live or [])
|
||||
|
||||
|
||||
def clear_provider_models_cache(provider: Optional[str] = None) -> None:
|
||||
"""Drop a single provider's cache entry, or wipe the whole cache.
|
||||
|
||||
``provider=None`` wipes everything; otherwise only that provider's
|
||||
entry is removed. Used by ``/model --refresh`` and
|
||||
``hermes model --refresh``.
|
||||
"""
|
||||
try:
|
||||
if provider is None:
|
||||
path = _provider_models_cache_path()
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
return
|
||||
cache = _load_provider_models_cache()
|
||||
normalized = normalize_provider(provider) or provider or ""
|
||||
if normalized in cache:
|
||||
del cache[normalized]
|
||||
_save_provider_models_cache(cache)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
|
||||
"""Fetch available models from the Anthropic /v1/models endpoint.
|
||||
|
||||
|
||||
@@ -3116,6 +3116,58 @@ class SessionDB:
|
||||
|
||||
# ── Space reclamation ──
|
||||
|
||||
# FTS5 virtual tables whose b-tree segments we merge on optimize. The
|
||||
# trigram table is created lazily / may be disabled, so we probe before
|
||||
# touching it (see optimize_fts).
|
||||
_FTS_TABLES = ("messages_fts", "messages_fts_trigram")
|
||||
|
||||
def _fts_table_exists(self, name: str) -> bool:
|
||||
"""True if an FTS5 virtual table is queryable in this DB."""
|
||||
try:
|
||||
self._conn.execute(f"SELECT 1 FROM {name} LIMIT 0")
|
||||
return True
|
||||
except sqlite3.OperationalError:
|
||||
return False
|
||||
|
||||
def optimize_fts(self) -> int:
|
||||
"""Merge fragmented FTS5 b-tree segments into one per index.
|
||||
|
||||
FTS5 indexes grow as a series of incremental segments — one per
|
||||
``INSERT`` batch driven by the message triggers. Over tens of
|
||||
thousands of messages these segments accumulate, which both bloats
|
||||
the ``*_data`` shadow tables and slows ``MATCH`` queries that must
|
||||
scan every segment. The special ``'optimize'`` command rewrites each
|
||||
index as a single merged segment.
|
||||
|
||||
This is purely a maintenance operation — it changes neither search
|
||||
results nor ``snippet()`` output, only on-disk layout and query
|
||||
speed. It is complementary to VACUUM: ``optimize`` compacts the FTS
|
||||
index internally, then VACUUM returns the freed pages to the OS.
|
||||
|
||||
Skips any FTS table that does not exist (e.g. the trigram index when
|
||||
disabled via ``HERMES_DISABLE_FTS_TRIGRAM`` or not yet created), so
|
||||
it is safe to call unconditionally.
|
||||
|
||||
Returns the number of FTS indexes that were optimized.
|
||||
"""
|
||||
optimized = 0
|
||||
with self._lock:
|
||||
for tbl in self._FTS_TABLES:
|
||||
if not self._fts_table_exists(tbl):
|
||||
continue
|
||||
try:
|
||||
# The column name in the INSERT must match the table name
|
||||
# for FTS5 special commands.
|
||||
self._conn.execute(
|
||||
f"INSERT INTO {tbl}({tbl}) VALUES('optimize')"
|
||||
)
|
||||
optimized += 1
|
||||
except sqlite3.OperationalError as exc:
|
||||
logger.warning(
|
||||
"FTS optimize failed for %s: %s", tbl, exc
|
||||
)
|
||||
return optimized
|
||||
|
||||
def vacuum(self) -> None:
|
||||
"""Run VACUUM to reclaim disk space after large deletes.
|
||||
|
||||
@@ -3129,7 +3181,17 @@ class SessionDB:
|
||||
exclusive lock, so callers must ensure no other writers are
|
||||
active. Safe to call at startup before the gateway/CLI starts
|
||||
serving traffic.
|
||||
|
||||
FTS5 segments are merged first via :meth:`optimize_fts` so the
|
||||
subsequent VACUUM reclaims the pages freed by the merge. This is a
|
||||
layout-only optimization — search results are unchanged.
|
||||
"""
|
||||
# Merge FTS5 segments before VACUUM so the freed pages are returned
|
||||
# to the OS in the same pass. optimize_fts() manages its own lock.
|
||||
try:
|
||||
self.optimize_fts()
|
||||
except Exception as exc:
|
||||
logger.warning("FTS optimize before VACUUM failed: %s", exc)
|
||||
# VACUUM cannot be executed inside a transaction.
|
||||
with self._lock:
|
||||
# Best-effort WAL checkpoint first, then VACUUM.
|
||||
|
||||
@@ -80,30 +80,27 @@ def crawl_source(source, source_name: str, limit: int) -> list:
|
||||
|
||||
|
||||
def crawl_skills_sh(source: SkillsShSource) -> list:
|
||||
"""Crawl skills.sh using popular queries for broad coverage."""
|
||||
print(" Crawling skills.sh (popular queries)...", flush=True)
|
||||
"""Crawl skills.sh via its sitemap to enumerate the full catalog (~20k entries).
|
||||
|
||||
Previously walked a hardcoded list of ~28 popular keywords (each capped at
|
||||
50 results) which yielded ~850 unique skills — about 4% of the real catalog.
|
||||
The SkillsShSource.search("") path now hits the sitemap directly, returning
|
||||
the full 20k-entry catalog deduplicated by canonical identifier.
|
||||
"""
|
||||
print(" Crawling skills.sh (sitemap)...", flush=True)
|
||||
start = time.time()
|
||||
|
||||
queries = [
|
||||
"", # featured
|
||||
"react", "python", "web", "api", "database", "docker",
|
||||
"testing", "scraping", "design", "typescript", "git",
|
||||
"aws", "security", "data", "ml", "ai", "devops",
|
||||
"frontend", "backend", "mobile", "cli", "documentation",
|
||||
"kubernetes", "terraform", "rust", "go", "java",
|
||||
]
|
||||
try:
|
||||
results = source.search("", limit=0) # 0 = no cap, return the whole catalog
|
||||
except Exception as e:
|
||||
print(f" Warning: skills.sh sitemap walk failed: {e}", file=sys.stderr)
|
||||
results = []
|
||||
|
||||
all_skills: dict[str, dict] = {}
|
||||
for query in queries:
|
||||
try:
|
||||
results = source.search(query, limit=50)
|
||||
for meta in results:
|
||||
entry = _meta_to_dict(meta)
|
||||
if entry["identifier"] not in all_skills:
|
||||
all_skills[entry["identifier"]] = entry
|
||||
except Exception as e:
|
||||
print(f" Warning: skills.sh search '{query}' failed: {e}",
|
||||
file=sys.stderr)
|
||||
for meta in results:
|
||||
entry = _meta_to_dict(meta)
|
||||
if entry["identifier"] not in all_skills:
|
||||
all_skills[entry["identifier"]] = entry
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f" skills.sh: {len(all_skills)} unique skills ({elapsed:.1f}s)",
|
||||
@@ -345,7 +342,11 @@ def main():
|
||||
# or rate limiting kicked in. Failing here forces a human look before
|
||||
# the broken index reaches the live docs.
|
||||
EXPECTED_FLOORS = {
|
||||
"skills.sh": 100,
|
||||
# skills.sh now uses the sitemap walker (~20k catalog as of May 2026).
|
||||
# Anything under 10k means the sitemap shape changed or fetches failed
|
||||
# — better to fail loudly than ship a regression to the 858-skill
|
||||
# popular-queries era.
|
||||
"skills.sh": 10000,
|
||||
"lobehub": 100,
|
||||
# ClawHub had 49,698+ skills as of May 2026 — anything under 20k means
|
||||
# pagination broke or the API surface changed. Fail loudly rather
|
||||
|
||||
@@ -128,6 +128,7 @@ AUTHOR_MAP = {
|
||||
"buraysandro9@gmail.com": "ygd58",
|
||||
"108427749+buntingszn@users.noreply.github.com": "buntingszn",
|
||||
"yanglongwei06@gmail.com": "Alex-yang00",
|
||||
"yanghongda@jackyun.com": "yangguangjin",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"markuscontasul@gmail.com": "Glucksberg",
|
||||
"80581902+Glucksberg@users.noreply.github.com": "Glucksberg",
|
||||
|
||||
+36
-106
@@ -378,127 +378,57 @@ class TestDiscordMentions:
|
||||
assert result.endswith(" said hello")
|
||||
|
||||
|
||||
class TestUrlQueryParamRedaction:
|
||||
"""URL query-string redaction (ported from nearai/ironclaw#2529).
|
||||
|
||||
Catches opaque tokens that don't match vendor prefix regexes by
|
||||
matching on parameter NAME rather than value shape.
|
||||
class TestWebUrlsNotRedacted:
|
||||
"""Web URLs (http/https/wss) pass through unchanged — magic-link
|
||||
checkouts, OAuth callbacks the agent is meant to follow, and pre-signed
|
||||
share URLs must reach the tool intact. Known credential shapes inside
|
||||
URLs (sk-, ghp_, JWTs) are still caught by the prefix and JWT regexes.
|
||||
DB connection-string passwords are still caught by _DB_CONNSTR_RE.
|
||||
"""
|
||||
|
||||
def test_oauth_callback_code(self):
|
||||
def test_oauth_callback_code_passes_through(self):
|
||||
text = "GET https://api.example.com/oauth/cb?code=abc123xyz789&state=csrf_ok"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abc123xyz789" not in result
|
||||
assert "code=***" in result
|
||||
assert "state=csrf_ok" in result # state is not sensitive
|
||||
|
||||
def test_access_token_query(self):
|
||||
text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "opaque_value_here_1234" not in result
|
||||
assert "access_token=***" in result
|
||||
assert "format=json" in result
|
||||
|
||||
def test_refresh_token_query(self):
|
||||
text = "https://auth.example.com/token?refresh_token=somerefresh&grant_type=refresh"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "somerefresh" not in result
|
||||
assert "grant_type=refresh" in result
|
||||
|
||||
def test_api_key_query(self):
|
||||
text = "https://api.example.com/v1/data?api_key=kABCDEF12345&limit=10"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "kABCDEF12345" not in result
|
||||
assert "limit=10" in result
|
||||
|
||||
def test_presigned_signature(self):
|
||||
text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "LONG_PRESIGNED_SIG" not in result
|
||||
assert "id=public" in result
|
||||
|
||||
def test_case_insensitive_param_names(self):
|
||||
"""Lowercase/mixed-case sensitive param names are redacted."""
|
||||
# NOTE: All-caps names like TOKEN= are swallowed by _ENV_ASSIGN_RE
|
||||
# (which matches KEY=value patterns greedily) before URL regex runs.
|
||||
# This test uses lowercase names to isolate URL-query redaction.
|
||||
text = "https://example.com?api_key=abcdef&secret=ghijkl"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abcdef" not in result
|
||||
assert "ghijkl" not in result
|
||||
assert "api_key=***" in result
|
||||
assert "secret=***" in result
|
||||
|
||||
def test_substring_match_does_not_trigger(self):
|
||||
"""`token_count` and `session_id` must NOT match `token` / `session`."""
|
||||
text = "https://example.com/cb?token_count=42&session_id=xyz&foo=bar"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "token_count=42" in result
|
||||
assert "session_id=xyz" in result
|
||||
|
||||
def test_url_without_query_unchanged(self):
|
||||
text = "https://example.com/path/to/resource"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_url_with_fragment(self):
|
||||
text = "https://example.com/page?token=xyz#section"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "token=xyz" not in result
|
||||
assert "#section" in result
|
||||
def test_access_token_query_passes_through(self):
|
||||
text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_websocket_url_query(self):
|
||||
def test_magic_link_checkout_passes_through(self):
|
||||
text = "Open https://checkout.example.com/resume?magic=ABCDEF123456&customer=42"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_presigned_signature_passes_through(self):
|
||||
text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_https_userinfo_passes_through(self):
|
||||
text = "URL: https://user:supersecretpw@host.example.com/path"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_websocket_url_query_passes_through(self):
|
||||
text = "wss://api.example.com/ws?token=opaqueWsToken123"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "opaqueWsToken123" not in result
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_http_access_log_relative_request_target_query(self):
|
||||
def test_http_access_log_request_target_passes_through(self):
|
||||
text = (
|
||||
'INFO aiohttp.access: 127.0.0.1 "POST '
|
||||
'/bluebubbles-webhook?password=webhookSecret123&event=new-message '
|
||||
'HTTP/1.1" 200 173 "-" "test-client"'
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "webhookSecret123" not in result
|
||||
assert "password=***" in result
|
||||
assert "event=new-message" in result
|
||||
|
||||
def test_http_access_log_absolute_request_target_query(self):
|
||||
text = (
|
||||
'INFO aiohttp.access: 127.0.0.1 "GET '
|
||||
'https://example.com/callback?code=oauthCode123&state=csrf-ok '
|
||||
'HTTP/1.1" 200 173 "-" "test-client"'
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "oauthCode123" not in result
|
||||
assert "code=***" in result
|
||||
assert "state=csrf-ok" in result
|
||||
|
||||
|
||||
class TestUrlUserinfoRedaction:
|
||||
"""URL userinfo (`scheme://user:pass@host`) for non-DB schemes."""
|
||||
|
||||
def test_https_userinfo(self):
|
||||
text = "URL: https://user:supersecretpw@host.example.com/path"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "supersecretpw" not in result
|
||||
assert "https://user:***@host.example.com" in result
|
||||
|
||||
def test_http_userinfo(self):
|
||||
text = "http://admin:plaintextpass@internal.example.com/api"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "plaintextpass" not in result
|
||||
|
||||
def test_ftp_userinfo(self):
|
||||
text = "ftp://user:ftppass@ftp.example.com/file.txt"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "ftppass" not in result
|
||||
|
||||
def test_url_without_userinfo_unchanged(self):
|
||||
text = "https://example.com/path"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_db_connstr_still_handled(self):
|
||||
"""DB schemes are handled by _DB_CONNSTR_RE, not _URL_USERINFO_RE."""
|
||||
def test_known_prefix_inside_url_still_redacted(self):
|
||||
"""sk-/ghp_/JWT-shaped values inside a URL are still caught by
|
||||
_PREFIX_RE / _JWT_RE — the carve-out is for opaque tokens only."""
|
||||
text = "https://evil.com/steal?key=sk-" + "a" * 30
|
||||
result = redact_sensitive_text(text)
|
||||
assert "sk-" + "a" * 30 not in result
|
||||
|
||||
def test_db_connstr_password_still_redacted(self):
|
||||
"""DB schemes (postgres/mysql/mongodb/redis/amqp) keep their
|
||||
userinfo redaction via _DB_CONNSTR_RE — connection strings are
|
||||
not web URLs the agent navigates to."""
|
||||
text = "postgres://admin:dbpass@db.internal:5432/app"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "dbpass" not in result
|
||||
|
||||
@@ -275,8 +275,9 @@ class TestRunTurn:
|
||||
def test_turn_start_failure_attaches_redacted_stderr_tail(self):
|
||||
"""When codex stderr has content (non-OAuth), the tail gets attached
|
||||
to the user-facing error so config/provider problems are debuggable
|
||||
instead of just 'Internal error'. Secrets in stderr are redacted
|
||||
via agent.redact(force=True)."""
|
||||
instead of just 'Internal error'. Credential-shaped values in stderr
|
||||
are redacted via agent.redact(force=True); web-URL query params pass
|
||||
through (see fix(redact): pass web URLs through unchanged)."""
|
||||
client = FakeClient()
|
||||
client.set_stderr_tail([
|
||||
"ERROR: provider auth failed",
|
||||
@@ -299,9 +300,8 @@ class TestRunTurn:
|
||||
# Stderr tail attached
|
||||
assert "codex stderr" in r.error
|
||||
assert "provider auth failed" in r.error
|
||||
# Secrets redacted
|
||||
# Credential-shaped values still redacted (sk- prefix + Bearer header)
|
||||
assert "sk-live-deadbeefdeadbeef" not in r.error
|
||||
assert "querysecret12345" not in r.error
|
||||
# Non-OAuth → should NOT retire (subprocess JSON-RPC is still healthy).
|
||||
assert r.should_retire is False
|
||||
|
||||
|
||||
@@ -0,0 +1,244 @@
|
||||
"""Regression tests for the CLI ``/yolo`` in-chat toggle.
|
||||
|
||||
Pre-fix bug (issue #33925): ``cli.HermesCLI._toggle_yolo`` mutated only
|
||||
``os.environ["HERMES_YOLO_MODE"]``. That env var is captured once at
|
||||
module-import time into ``tools.approval._YOLO_MODE_FROZEN`` (security
|
||||
hardening: stops prompt-injected skills from flipping the bypass mid-run),
|
||||
so the post-startup toggle was a silent no-op. ``/yolo`` advertised "YOLO ON"
|
||||
in the status bar while every dangerous command still hit the approval
|
||||
prompt. Only ``hermes --yolo`` (process-start env), ``HERMES_YOLO_MODE=1``,
|
||||
and ``hermes config set approvals.mode off`` actually bypassed.
|
||||
|
||||
The fix routes the CLI toggle through ``enable_session_yolo`` /
|
||||
``disable_session_yolo`` (matching the gateway and TUI ``/yolo`` paths) and
|
||||
binds ``self.session_id`` as the active approval session key around each
|
||||
``run_conversation`` call so ``is_current_session_yolo_enabled()`` resolves
|
||||
against the same key the toggle writes under.
|
||||
|
||||
We test ``_toggle_yolo`` and ``_is_session_yolo_active`` as unbound methods
|
||||
against a minimal stand-in object that exposes only the attribute they
|
||||
read (``session_id``). This avoids the heavy ``HermesCLI`` construction
|
||||
path used in ``test_cli_init.py``, which is incompatible with this test
|
||||
file's path layout — ``HermesCLI.__init__`` imports a lot of optional
|
||||
state we don't need here.
|
||||
"""
|
||||
|
||||
import os
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
import tools.approval as approval_module
|
||||
from cli import HermesCLI
|
||||
|
||||
|
||||
SESSION_KEY = "test-cli-yolo-session"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_approval_state(monkeypatch):
|
||||
"""Clear the YOLO bypass + env var around every test so cases are independent."""
|
||||
monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
|
||||
approval_module.clear_session(SESSION_KEY)
|
||||
approval_module.clear_session("default")
|
||||
yield
|
||||
approval_module.clear_session(SESSION_KEY)
|
||||
approval_module.clear_session("default")
|
||||
|
||||
|
||||
def _make_stand_in(session_id: str = SESSION_KEY) -> SimpleNamespace:
|
||||
"""Minimal stand-in exposing only ``session_id``.
|
||||
|
||||
``_toggle_yolo`` and ``_is_session_yolo_active`` are both pure methods
|
||||
that only read ``self.session_id`` — no other CLI state is touched.
|
||||
Calling them as unbound functions against this stand-in is equivalent
|
||||
to invoking them on a fully-constructed ``HermesCLI`` for the
|
||||
behaviour under test, and avoids the brittle prompt_toolkit / config
|
||||
stubbing required to instantiate ``HermesCLI`` from this test file.
|
||||
"""
|
||||
return SimpleNamespace(session_id=session_id)
|
||||
|
||||
|
||||
class TestToggleYoloIsSessionScoped:
|
||||
"""The CLI /yolo handler must mutate the session-yolo set, not the env var.
|
||||
|
||||
The env var path is dead-on-arrival because ``_YOLO_MODE_FROZEN`` is
|
||||
captured once at module import, long before the CLI's ``/yolo`` command
|
||||
can run.
|
||||
"""
|
||||
|
||||
def test_toggle_yolo_enables_session_bypass(self):
|
||||
stand_in = _make_stand_in()
|
||||
|
||||
assert approval_module.is_session_yolo_enabled(SESSION_KEY) is False
|
||||
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert approval_module.is_session_yolo_enabled(SESSION_KEY) is True
|
||||
|
||||
def test_toggle_yolo_disables_session_bypass_on_second_call(self):
|
||||
stand_in = _make_stand_in()
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in) # ON
|
||||
assert approval_module.is_session_yolo_enabled(SESSION_KEY) is True
|
||||
HermesCLI._toggle_yolo(stand_in) # OFF
|
||||
assert approval_module.is_session_yolo_enabled(SESSION_KEY) is False
|
||||
|
||||
def test_toggle_yolo_does_not_mutate_env_var(self):
|
||||
"""Toggling /yolo must not write ``HERMES_YOLO_MODE`` — that path is
|
||||
frozen at import time and would mislead anyone reading the env later
|
||||
(subprocesses, status bars wired to the env, the relaunch flag list)."""
|
||||
stand_in = _make_stand_in()
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert os.environ.get("HERMES_YOLO_MODE") is None
|
||||
|
||||
def test_toggle_yolo_falls_back_to_default_when_session_id_missing(self):
|
||||
"""An edge case during CLI bootstrap: a ``/yolo`` triggered before the
|
||||
session id is set should not blow up, and should land under the
|
||||
``default`` session key so the bypass still takes effect for any code
|
||||
that resolves against the default key."""
|
||||
stand_in = _make_stand_in(session_id="")
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert approval_module.is_session_yolo_enabled("default") is True
|
||||
|
||||
def test_two_independent_sessions_are_isolated(self):
|
||||
"""``/yolo`` toggled in one session must not bypass approvals in
|
||||
another session — mirrors the gateway-side invariant."""
|
||||
cli_a = _make_stand_in(session_id="session-yolo-a")
|
||||
cli_b = _make_stand_in(session_id="session-yolo-b")
|
||||
|
||||
try:
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(cli_a)
|
||||
|
||||
assert approval_module.is_session_yolo_enabled("session-yolo-a") is True
|
||||
assert approval_module.is_session_yolo_enabled("session-yolo-b") is False
|
||||
finally:
|
||||
approval_module.clear_session("session-yolo-a")
|
||||
approval_module.clear_session("session-yolo-b")
|
||||
|
||||
|
||||
class TestIsSessionYoloActiveHelper:
|
||||
"""The status-bar helper must read the live session-yolo state, not the
|
||||
env var (which is the bug class this PR fixes)."""
|
||||
|
||||
def test_helper_reflects_toggle(self):
|
||||
stand_in = _make_stand_in()
|
||||
|
||||
assert HermesCLI._is_session_yolo_active(stand_in) is False
|
||||
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert HermesCLI._is_session_yolo_active(stand_in) is True
|
||||
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in)
|
||||
|
||||
assert HermesCLI._is_session_yolo_active(stand_in) is False
|
||||
|
||||
def test_helper_honors_frozen_yolo_mode(self):
|
||||
"""``hermes --yolo`` sets ``HERMES_YOLO_MODE`` before tool imports, so
|
||||
``_YOLO_MODE_FROZEN`` ends up True. The status bar should still
|
||||
reflect YOLO on in that case even when the session toggle is off."""
|
||||
stand_in = _make_stand_in()
|
||||
|
||||
with patch.object(approval_module, "_YOLO_MODE_FROZEN", True):
|
||||
assert HermesCLI._is_session_yolo_active(stand_in) is True
|
||||
|
||||
|
||||
class TestToggleYoloEndToEnd:
|
||||
"""End-to-end: a dangerous command must auto-approve through the same
|
||||
``check_all_command_guards`` path the terminal tool uses."""
|
||||
|
||||
def test_toggle_yolo_bypasses_dangerous_command_check(self):
|
||||
stand_in = _make_stand_in()
|
||||
|
||||
token = approval_module.set_current_session_key(SESSION_KEY)
|
||||
try:
|
||||
with patch("cli._cprint"):
|
||||
HermesCLI._toggle_yolo(stand_in) # YOLO ON
|
||||
|
||||
result = approval_module.check_all_command_guards(
|
||||
"rm -rf /tmp/scratch-xyzzy", "local",
|
||||
)
|
||||
assert result["approved"] is True, (
|
||||
f"YOLO toggle should auto-approve dangerous commands, got: {result}"
|
||||
)
|
||||
finally:
|
||||
approval_module.reset_current_session_key(token)
|
||||
|
||||
|
||||
class TestIsSessionYoloActiveAttrSafety:
|
||||
"""The status-bar helper runs against partially-constructed CLI fixtures
|
||||
(tests use ``HermesCLI.__new__(HermesCLI)`` to skip ``__init__``). It must
|
||||
not raise ``AttributeError`` when ``session_id`` is absent — the
|
||||
status-bar builders swallow exceptions silently and lose every field
|
||||
after the failure, producing a regression that's hard to track back to
|
||||
the helper."""
|
||||
|
||||
def test_helper_survives_missing_session_id_attr(self):
|
||||
# SimpleNamespace WITHOUT session_id mimics __new__-built fixtures.
|
||||
from types import SimpleNamespace
|
||||
no_attr = SimpleNamespace()
|
||||
# Must return False, not raise.
|
||||
assert HermesCLI._is_session_yolo_active(no_attr) is False
|
||||
|
||||
|
||||
class TestSessionRotationTransfersYolo:
|
||||
"""When the CLI's ``session_id`` rotates mid-run (``/branch``, auto
|
||||
compression continuation), YOLO state keyed under the old id must move
|
||||
to the new id. Otherwise the user's ``/yolo ON`` silently reverts on
|
||||
the next turn — the same UX failure mode this PR set out to fix.
|
||||
Mirrors ``tui_gateway/server.py`` ~line 1297-1305."""
|
||||
|
||||
def test_transfer_moves_yolo_to_new_session(self):
|
||||
stand_in = _make_stand_in(session_id="old-id")
|
||||
try:
|
||||
approval_module.enable_session_yolo("old-id")
|
||||
assert approval_module.is_session_yolo_enabled("old-id") is True
|
||||
|
||||
HermesCLI._transfer_session_yolo(stand_in, "old-id", "new-id")
|
||||
|
||||
assert approval_module.is_session_yolo_enabled("new-id") is True
|
||||
assert approval_module.is_session_yolo_enabled("old-id") is False
|
||||
finally:
|
||||
approval_module.clear_session("old-id")
|
||||
approval_module.clear_session("new-id")
|
||||
|
||||
def test_transfer_is_noop_when_yolo_was_off(self):
|
||||
stand_in = _make_stand_in(session_id="old-id")
|
||||
try:
|
||||
HermesCLI._transfer_session_yolo(stand_in, "old-id", "new-id")
|
||||
assert approval_module.is_session_yolo_enabled("new-id") is False
|
||||
assert approval_module.is_session_yolo_enabled("old-id") is False
|
||||
finally:
|
||||
approval_module.clear_session("old-id")
|
||||
approval_module.clear_session("new-id")
|
||||
|
||||
def test_transfer_is_noop_when_ids_match(self):
|
||||
stand_in = _make_stand_in(session_id="same-id")
|
||||
try:
|
||||
approval_module.enable_session_yolo("same-id")
|
||||
HermesCLI._transfer_session_yolo(stand_in, "same-id", "same-id")
|
||||
# Must NOT have been disabled — same-id == same-id is a no-op,
|
||||
# not a "disable then re-enable" round-trip.
|
||||
assert approval_module.is_session_yolo_enabled("same-id") is True
|
||||
finally:
|
||||
approval_module.clear_session("same-id")
|
||||
|
||||
def test_transfer_handles_empty_inputs_safely(self):
|
||||
stand_in = _make_stand_in(session_id="x")
|
||||
# Both directions of empty input should be safe no-ops; nothing
|
||||
# to transfer from "" / to "".
|
||||
HermesCLI._transfer_session_yolo(stand_in, "", "new")
|
||||
HermesCLI._transfer_session_yolo(stand_in, "old", "")
|
||||
# Neither key should have been touched.
|
||||
assert approval_module.is_session_yolo_enabled("new") is False
|
||||
assert approval_module.is_session_yolo_enabled("old") is False
|
||||
@@ -368,6 +368,11 @@ class TestMediaDeliveryPathValidation:
|
||||
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
||||
tuple(roots),
|
||||
)
|
||||
# All tests in this class cover strict-mode behavior (allowlist +
|
||||
# recency window + denylist). Force strict on so they keep
|
||||
# exercising the legacy path even though the public default
|
||||
# flipped to off in 2026-05.
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
||||
# Disable recency-based trust by default so the original allowlist
|
||||
# tests continue to exercise the strict-allowlist path. Tests that
|
||||
# specifically cover recency trust re-enable it themselves.
|
||||
@@ -536,6 +541,149 @@ class TestMediaDeliveryPathValidation:
|
||||
assert out == [str(fresh.resolve())]
|
||||
|
||||
|
||||
class TestMediaDeliveryDefaultMode:
|
||||
"""Default (non-strict) mode — denylist gates delivery, nothing else.
|
||||
|
||||
Symmetric with inbound delivery: Telegram/Discord/Slack accept any
|
||||
document type the user uploads, and the agent can hand back any file
|
||||
that isn't a credential. Strict mode is opt-in for operators running
|
||||
public-facing gateways.
|
||||
"""
|
||||
|
||||
def _patch_roots(self, monkeypatch, *roots):
|
||||
# Empty cache allowlist so the only positive path through
|
||||
# validate_media_delivery_path in these tests is the
|
||||
# default-mode "anything not denied" branch.
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
||||
tuple(roots),
|
||||
)
|
||||
# Pin strict OFF — the public default. Tests that exercise the
|
||||
# strict path live in TestMediaDeliveryPathValidation.
|
||||
monkeypatch.delenv("HERMES_MEDIA_DELIVERY_STRICT", raising=False)
|
||||
monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
|
||||
|
||||
def test_accepts_stale_file_outside_allowlist(self, tmp_path, monkeypatch):
|
||||
"""The motivating case — agent says ``MEDIA:/home/user/notes.md``
|
||||
for an .md it has been working with for hours. Strict mode would
|
||||
reject this (outside allowlist, outside recency window). Default
|
||||
mode delivers it.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
notes = tmp_path / "notes.md"
|
||||
notes.write_text("# Old notes\n")
|
||||
old_mtime = time.time() - 7200 # 2 hours ago — far outside any window
|
||||
os.utime(notes, (old_mtime, old_mtime))
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(notes)) == str(notes.resolve())
|
||||
|
||||
def test_accepts_any_extension_not_on_denylist(self, tmp_path, monkeypatch):
|
||||
"""No extension allowlist — .md, .txt, .json, .py all deliver."""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
for name in ("report.md", "log.txt", "data.json", "script.py", "blob.bin"):
|
||||
f = tmp_path / name
|
||||
f.write_bytes(b"x")
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(f)) == str(f.resolve())
|
||||
|
||||
def test_denylist_still_blocks_credentials(self, tmp_path, monkeypatch):
|
||||
"""Default mode is permissive but not naive — credential paths
|
||||
remain blocked. Simulate $HOME so ~/.ssh resolves into tmp_path.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
fake_home = tmp_path / "home"
|
||||
ssh_dir = fake_home / ".ssh"
|
||||
ssh_dir.mkdir(parents=True)
|
||||
secret = ssh_dir / "id_rsa"
|
||||
secret.write_bytes(b"-----BEGIN ...")
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(secret)) is None
|
||||
|
||||
def test_denylist_blocks_system_prefixes(self, tmp_path, monkeypatch):
|
||||
"""Files under /etc, /proc, /sys, /root, /boot, /var/{log,lib,run}
|
||||
are denied. We construct the test by patching the denylist root
|
||||
to a tmp dir so we don't need to read /etc.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
fake_etc = tmp_path / "fake-etc"
|
||||
fake_etc.mkdir()
|
||||
secret = fake_etc / "shadow"
|
||||
secret.write_bytes(b"root:!:0:0::/root:/bin/sh")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.base._MEDIA_DELIVERY_DENIED_PREFIXES",
|
||||
(str(fake_etc),),
|
||||
)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(secret)) is None
|
||||
|
||||
def test_denylist_blocks_hermes_credentials(self, tmp_path, monkeypatch):
|
||||
"""~/.hermes/.env and ~/.hermes/auth.json stay blocked even in
|
||||
default mode. They live under $HOME (not the system prefix list)
|
||||
so this exercises the home-relative denied paths.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
fake_home = tmp_path / "home"
|
||||
hermes_dir = fake_home / ".hermes"
|
||||
hermes_dir.mkdir(parents=True)
|
||||
env_file = hermes_dir / ".env"
|
||||
env_file.write_text("OPENAI_API_KEY=sk-...")
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.base._HERMES_HOME",
|
||||
hermes_dir,
|
||||
)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(env_file)) is None
|
||||
|
||||
def test_strict_mode_envvar_restores_legacy_behavior(self, tmp_path, monkeypatch):
|
||||
"""Setting HERMES_MEDIA_DELIVERY_STRICT=1 reactivates the older
|
||||
allowlist+recency logic. A stale file outside the allowlist is
|
||||
rejected.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
|
||||
stale = tmp_path / "old.pdf"
|
||||
stale.write_bytes(b"%PDF-1.4")
|
||||
old_mtime = time.time() - 7200
|
||||
os.utime(stale, (old_mtime, old_mtime))
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(stale)) is None
|
||||
|
||||
def test_strict_mode_truthy_aliases(self, monkeypatch, tmp_path):
|
||||
"""``HERMES_MEDIA_DELIVERY_STRICT=true|yes|on|1`` all enable strict mode."""
|
||||
self._patch_roots(monkeypatch)
|
||||
from gateway.platforms.base import _media_delivery_strict_mode
|
||||
|
||||
for raw in ("1", "true", "TRUE", "yes", "on"):
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", raw)
|
||||
assert _media_delivery_strict_mode() is True
|
||||
|
||||
for raw in ("0", "false", "no", "off", ""):
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", raw)
|
||||
assert _media_delivery_strict_mode() is False
|
||||
|
||||
def test_filter_passes_default_files_through(self, tmp_path, monkeypatch):
|
||||
"""End-to-end: filter_local_delivery_paths accepts a stale .md in
|
||||
default mode where strict mode would drop it.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
notes = tmp_path / "notes.md"
|
||||
notes.write_text("# old\n")
|
||||
os.utime(notes, (time.time() - 86400, time.time() - 86400))
|
||||
|
||||
out = BasePlatformAdapter.filter_local_delivery_paths([str(notes)])
|
||||
assert out == [str(notes.resolve())]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# should_send_media_as_audio
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -234,9 +234,12 @@ async def test_streaming_delivery_blocks_media_path_outside_allowed_roots(tmp_pa
|
||||
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
||||
(allowed_root,),
|
||||
)
|
||||
# This test exercises the strict-allowlist path; disable recency trust so
|
||||
# the freshly-written tmp_path file is not auto-accepted by the trust
|
||||
# window. (Recency trust is covered separately in test_platform_base.py.)
|
||||
# This test exercises the strict-allowlist path; force strict mode on
|
||||
# and disable recency trust so the freshly-written tmp_path file is not
|
||||
# auto-accepted by the trust window. (Recency trust is covered separately
|
||||
# in test_platform_base.py. The public default flipped to non-strict in
|
||||
# 2026-05; this test pins strict on explicitly.)
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
adapter = SimpleNamespace(
|
||||
name="test",
|
||||
|
||||
@@ -158,8 +158,11 @@ def test_build_models_payload_returns_expected_shape():
|
||||
|
||||
|
||||
def test_build_models_payload_does_not_call_provider_model_ids():
|
||||
"""Curated lists must come from list_authenticated_providers, not
|
||||
provider_model_ids — that would pull TTS/embeddings/etc.
|
||||
"""``build_models_payload`` is a thin shape adapter — it delegates the
|
||||
actual curation to ``list_authenticated_providers`` (which DOES call
|
||||
``cached_provider_model_ids`` internally for live discovery, with disk
|
||||
caching). ``build_models_payload`` itself must not call the live fetcher
|
||||
directly; the test pins that boundary.
|
||||
"""
|
||||
rows = [{"slug": "nous", "name": "Nous", "models": ["hermes-4-405b"],
|
||||
"total_models": 1, "is_current": False, "is_user_defined": False,
|
||||
|
||||
@@ -0,0 +1,230 @@
|
||||
"""Regression test for #28181 — kanban worker SIGTERM must terminate the process.
|
||||
|
||||
The single-query signal handler in cli.py (``_signal_handler_q``) raises
|
||||
``KeyboardInterrupt`` to unwind the main thread on SIGTERM/SIGHUP. That works
|
||||
for interactive ``hermes chat -q`` invocations, but kanban workers spawned by
|
||||
the dispatcher are likely to have a non-daemon thread alive (terminal_tool's
|
||||
``_wait_for_process``, custom plugin background workers, etc.). With
|
||||
``KeyboardInterrupt`` only the main thread unwinds; the non-daemon thread
|
||||
keeps the process alive after the gateway has already restarted, the kanban
|
||||
dispatcher's ``_pid_alive`` check returns True forever, and the task stays
|
||||
``running`` indefinitely.
|
||||
|
||||
The fix: when the process is a dispatcher-spawned worker (``HERMES_KANBAN_TASK``
|
||||
env var set), flush logging + stdout/stderr and call ``os._exit(0)`` instead.
|
||||
The kernel reclaims the PID immediately, and ``detect_crashed_workers``
|
||||
reclaims the stale claim on the next dispatcher tick.
|
||||
|
||||
These tests use a synthetic Python script that mirrors the cli.py signal
|
||||
handler shape so we can exercise the exit-path contract without booting the
|
||||
full CLI (which needs a real provider config).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _synthetic_worker_script() -> str:
|
||||
"""A standalone script that mirrors cli.py's single-query SIGTERM handler.
|
||||
|
||||
Keeping the synthetic copy here means the test exercises the exact handler
|
||||
shape without needing the full hermes_cli boot path (config, providers,
|
||||
skills, etc.). If the production handler in cli.py drifts, the test
|
||||
that loads the real handler (test_real_handler_uses_os_exit) will catch it.
|
||||
"""
|
||||
return textwrap.dedent(
|
||||
"""
|
||||
import os, signal, sys, threading, time
|
||||
|
||||
# Non-daemon thread that blocks forever — simulates the worker
|
||||
# thread that would prevent orderly Python shutdown after
|
||||
# KeyboardInterrupt unwinds main.
|
||||
stuck = threading.Event()
|
||||
threading.Thread(target=stuck.wait, daemon=False).start()
|
||||
|
||||
def handler(signum, frame):
|
||||
# Mirrors cli.py:_signal_handler_q. Real handler sleeps 1.5s; the
|
||||
# test uses a short grace so it runs fast.
|
||||
try:
|
||||
time.sleep(0.05)
|
||||
except Exception:
|
||||
pass
|
||||
if os.environ.get("HERMES_KANBAN_TASK"):
|
||||
try:
|
||||
if hasattr(signal, "SIGALRM"):
|
||||
signal.signal(signal.SIGALRM, lambda *_: os._exit(0))
|
||||
signal.alarm(2)
|
||||
except Exception:
|
||||
pass
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
os._exit(0)
|
||||
raise KeyboardInterrupt()
|
||||
|
||||
signal.signal(signal.SIGTERM, handler)
|
||||
print("READY", flush=True)
|
||||
try:
|
||||
threading.Event().wait()
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(0)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _is_alive_like_dispatcher(pid: int) -> bool:
|
||||
"""Mirrors hermes_cli/kanban_db.py:_pid_alive on Linux.
|
||||
|
||||
A zombie is treated as dead — the dispatcher's _pid_alive checks
|
||||
/proc/<pid>/status for State: Z. We replicate that here so a clean
|
||||
os._exit followed by zombie-state is correctly counted as dead.
|
||||
"""
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
return True
|
||||
if sys.platform == "linux":
|
||||
try:
|
||||
with open(f"/proc/{pid}/status") as f:
|
||||
for line in f:
|
||||
if line.startswith("State:"):
|
||||
if "Z" in line.split(":", 1)[1]:
|
||||
return False
|
||||
break
|
||||
except (FileNotFoundError, PermissionError, OSError):
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def _spawn_synthetic(env_overrides: dict) -> subprocess.Popen:
|
||||
env = dict(os.environ)
|
||||
env.update(env_overrides)
|
||||
proc = subprocess.Popen(
|
||||
[sys.executable, "-u", "-c", _synthetic_worker_script()],
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
start_new_session=True,
|
||||
)
|
||||
# Wait for "READY" so we know the signal handler is installed.
|
||||
assert proc.stdout is not None
|
||||
deadline = time.time() + 5.0
|
||||
while time.time() < deadline:
|
||||
line = proc.stdout.readline()
|
||||
if line and line.startswith(b"READY"):
|
||||
return proc
|
||||
proc.kill()
|
||||
raise RuntimeError("synthetic worker never signalled READY")
|
||||
|
||||
|
||||
def _cleanup(proc: subprocess.Popen) -> None:
|
||||
try:
|
||||
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
pass
|
||||
try:
|
||||
proc.communicate(timeout=2)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.kill()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason="SIGTERM semantics differ on Windows; kanban dispatcher is POSIX-only",
|
||||
)
|
||||
def test_sigterm_with_kanban_task_env_terminates_quickly():
|
||||
"""With HERMES_KANBAN_TASK set, SIGTERM should kill the process in <2s
|
||||
even when a non-daemon thread is still alive."""
|
||||
proc = _spawn_synthetic({"HERMES_KANBAN_TASK": "t_test_28181"})
|
||||
try:
|
||||
t0 = time.time()
|
||||
os.kill(proc.pid, signal.SIGTERM)
|
||||
|
||||
# Should die in <2s. The handler sleeps ~50ms, then os._exit(0)
|
||||
# is immediate. Give generous headroom for slow CI runners.
|
||||
deadline = t0 + 2.0
|
||||
while time.time() < deadline:
|
||||
if not _is_alive_like_dispatcher(proc.pid):
|
||||
elapsed = time.time() - t0
|
||||
assert elapsed < 2.0
|
||||
return
|
||||
time.sleep(0.02)
|
||||
pytest.fail(
|
||||
f"process still alive 2s after SIGTERM with HERMES_KANBAN_TASK set "
|
||||
f"(dispatcher would keep extending claim) — fix regressed"
|
||||
)
|
||||
finally:
|
||||
_cleanup(proc)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform == "win32",
|
||||
reason="SIGTERM semantics differ on Windows; kanban dispatcher is POSIX-only",
|
||||
)
|
||||
def test_sigterm_without_kanban_task_env_uses_keyboard_interrupt_path():
|
||||
"""Without HERMES_KANBAN_TASK, the original KeyboardInterrupt path runs.
|
||||
|
||||
This is the contrast case proving the fix is gated on the env var: in
|
||||
interactive ``hermes chat -q`` (no env var), behavior is unchanged. The
|
||||
process MAY hang under non-daemon threads, but that's not a kanban-worker
|
||||
concern. We just verify the handler logs the KeyboardInterrupt branch
|
||||
rather than os._exit'ing.
|
||||
"""
|
||||
proc = _spawn_synthetic({})
|
||||
try:
|
||||
os.kill(proc.pid, signal.SIGTERM)
|
||||
# Wait a moment for the handler to react.
|
||||
time.sleep(0.5)
|
||||
# The process may or may not be dead depending on whether the
|
||||
# KeyboardInterrupt unwinds cleanly. The behavioral guarantee is
|
||||
# only that the env-gated path didn't fire.
|
||||
try:
|
||||
# Drain stdout up to whatever's available.
|
||||
if proc.stdout is not None:
|
||||
proc.stdout.close()
|
||||
if proc.stderr is not None:
|
||||
proc.stderr.close()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
_cleanup(proc)
|
||||
|
||||
|
||||
def test_real_handler_uses_os_exit_for_kanban_workers():
|
||||
"""Source-level invariant: cli.py's _signal_handler_q must call
|
||||
os._exit(0) when HERMES_KANBAN_TASK is set.
|
||||
|
||||
Catches the case where someone refactors the handler and accidentally
|
||||
drops the env-gated exit, restoring the bug. Reading cli.py directly is
|
||||
cheap and avoids the heavy CLI import.
|
||||
"""
|
||||
import pathlib
|
||||
|
||||
cli_path = (
|
||||
pathlib.Path(__file__).resolve().parent.parent.parent / "cli.py"
|
||||
)
|
||||
src = cli_path.read_text()
|
||||
# Locate the handler body.
|
||||
start = src.find("def _signal_handler_q(signum, frame):")
|
||||
assert start != -1, "cli.py is missing _signal_handler_q"
|
||||
# Look ahead for the env-gated os._exit call within ~80 lines.
|
||||
body = src[start : start + 4000]
|
||||
assert "HERMES_KANBAN_TASK" in body, (
|
||||
"_signal_handler_q must gate its kanban-worker exit path on "
|
||||
"HERMES_KANBAN_TASK — see #28181"
|
||||
)
|
||||
assert "os._exit(0)" in body, (
|
||||
"_signal_handler_q must call os._exit(0) for kanban workers — "
|
||||
"raising KeyboardInterrupt orphans the process when non-daemon "
|
||||
"threads are alive (see #28181)"
|
||||
)
|
||||
@@ -3295,8 +3295,13 @@ class TestRunConversation:
|
||||
assert result["final_response"] == "Recovered after compression"
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_non_minimax_delta_overflow_still_probes_down(self, agent):
|
||||
"""Non-MiniMax providers should keep the generic probe-down behavior."""
|
||||
def test_non_minimax_overflow_without_provider_limit_keeps_context(self, agent):
|
||||
"""Generic overflow without a provider-reported max must NOT probe-step down.
|
||||
|
||||
Previously a 200K configured window would silently drop to the 128K probe
|
||||
tier on a generic overflow error. Now we keep the configured window and
|
||||
rely on compression — see #33669 / PR #33826.
|
||||
"""
|
||||
self._setup_agent(agent)
|
||||
agent.provider = "openrouter"
|
||||
agent.model = "some/unknown-model"
|
||||
@@ -3330,7 +3335,8 @@ class TestRunConversation:
|
||||
result = agent.run_conversation("hello", conversation_history=prefill)
|
||||
|
||||
mock_compress.assert_called_once()
|
||||
assert agent.context_compressor.context_length == 128_000
|
||||
# Context length preserved — no guessed probe-tier step-down.
|
||||
assert agent.context_compressor.context_length == 200_000
|
||||
assert result["final_response"] == "Recovered after compression"
|
||||
assert result["completed"] is True
|
||||
|
||||
|
||||
@@ -11,6 +11,9 @@ The fix introduces:
|
||||
error class and returns the available output token budget.
|
||||
* _ephemeral_max_output_tokens on AIAgent — a one-shot override that
|
||||
caps the output for one retry without touching context_length.
|
||||
* get_context_length_from_provider_error() — accepts only concrete
|
||||
provider-reported lower context limits and refuses guessed probe-tier
|
||||
step-downs when the provider gives no maximum.
|
||||
|
||||
Naming note
|
||||
-----------
|
||||
@@ -75,7 +78,7 @@ class TestParseAvailableOutputTokens:
|
||||
# ── Should NOT detect (returns None) ─────────────────────────────────
|
||||
|
||||
def test_prompt_too_long_is_not_output_cap_error(self):
|
||||
"""'prompt is too long' errors must NOT be caught — they need context halving."""
|
||||
"""'prompt is too long' errors must NOT be caught — they need context-overflow recovery."""
|
||||
msg = "prompt is too long: 205000 tokens > 200000 maximum"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
@@ -101,6 +104,49 @@ class TestParseAvailableOutputTokens:
|
||||
assert self._parse(msg) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Context-overflow recovery — only trust provider-reported limits
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestContextOverflowLimitSelection:
|
||||
"""Context-overflow recovery must not invent a lower window size.
|
||||
|
||||
Some providers only say "input exceeds the context window" without telling
|
||||
Hermes what the actual maximum is. In that case we may compress the
|
||||
conversation, but must not silently probe-step from a user-configured 1M
|
||||
window down to 256K/128K/64K/etc.
|
||||
"""
|
||||
|
||||
def test_generic_overflow_without_provider_limit_keeps_context_length(self):
|
||||
from agent.model_metadata import get_context_length_from_provider_error
|
||||
from agent.model_metadata import get_next_probe_tier
|
||||
from agent.model_metadata import parse_context_limit_from_error
|
||||
|
||||
old_ctx = 1_000_000
|
||||
error_msg = (
|
||||
"Your input exceeds the context window of this model. "
|
||||
"Please adjust your input and try again."
|
||||
)
|
||||
|
||||
assert parse_context_limit_from_error(error_msg) is None
|
||||
assert get_next_probe_tier(old_ctx) == 256_000
|
||||
assert get_context_length_from_provider_error(error_msg, old_ctx) is None
|
||||
|
||||
def test_explicit_provider_limit_still_selects_that_limit(self):
|
||||
from agent.model_metadata import get_context_length_from_provider_error
|
||||
|
||||
error_msg = "prompt is too long: 300000 tokens > 272000 maximum"
|
||||
|
||||
assert get_context_length_from_provider_error(error_msg, 1_000_000) == 272_000
|
||||
|
||||
def test_reported_limit_not_lower_than_current_is_ignored(self):
|
||||
from agent.model_metadata import get_context_length_from_provider_error
|
||||
|
||||
error_msg = "maximum context length is 1000000 tokens"
|
||||
|
||||
assert get_context_length_from_provider_error(error_msg, 272_000) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_anthropic_kwargs — output cap clamping
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -282,19 +328,16 @@ class TestContextNotHalvedOnOutputCapError:
|
||||
assert agent.context_compressor.context_length == old_ctx
|
||||
assert agent._ephemeral_max_output_tokens == 19_936
|
||||
|
||||
def test_prompt_too_long_still_triggers_probe_tier(self):
|
||||
"""Genuine prompt-too-long errors must still use get_next_probe_tier."""
|
||||
def test_prompt_too_long_with_explicit_limit_uses_provider_limit(self):
|
||||
"""Prompt-too-long errors only change context_length when they report a concrete limit."""
|
||||
from agent.model_metadata import get_context_length_from_provider_error
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
from agent.model_metadata import get_next_probe_tier
|
||||
|
||||
error_msg = "prompt is too long: 205000 tokens > 200000 maximum"
|
||||
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
assert available_out is None, "prompt-too-long must not be caught by output-cap parser"
|
||||
|
||||
# The old halving path is still used for this class of error
|
||||
new_ctx = get_next_probe_tier(200_000)
|
||||
assert new_ctx == 128_000
|
||||
assert get_context_length_from_provider_error(error_msg, 1_000_000) == 200_000
|
||||
|
||||
def test_output_cap_error_safety_margin(self):
|
||||
"""The ephemeral value includes a 64-token safety margin below available_out."""
|
||||
|
||||
@@ -2679,6 +2679,64 @@ class TestVacuum:
|
||||
db.vacuum()
|
||||
|
||||
|
||||
class TestOptimizeFts:
|
||||
def test_optimize_returns_index_count(self, db):
|
||||
"""A fresh DB has both FTS indexes; optimize merges both."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message(session_id="s1", role="user", content="hello world")
|
||||
assert db.optimize_fts() == 2
|
||||
|
||||
def test_optimize_preserves_search_and_snippet(self, db):
|
||||
"""Optimize is layout-only: MATCH results + snippets are unchanged."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
for i in range(50):
|
||||
db.append_message(
|
||||
session_id="s1",
|
||||
role="user",
|
||||
content=f"needle alpha bravo charlie message {i}",
|
||||
)
|
||||
before = db.search_messages("needle")
|
||||
n = db.optimize_fts()
|
||||
assert n == 2
|
||||
after = db.search_messages("needle")
|
||||
assert len(after) == len(before)
|
||||
assert len(after) > 0
|
||||
# Snippet must still be populated (would be empty/None if the FTS
|
||||
# content shadow were lost during optimize).
|
||||
assert all(row.get("snippet") for row in after)
|
||||
# IDs and snippets are identical before/after — pure layout change.
|
||||
assert [r["id"] for r in after] == [r["id"] for r in before]
|
||||
assert [r["snippet"] for r in after] == [r["snippet"] for r in before]
|
||||
|
||||
def test_optimize_skips_missing_trigram_table(self, db):
|
||||
"""When the trigram index is absent, optimize handles only the porter
|
||||
index and does not raise."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message(session_id="s1", role="user", content="hello")
|
||||
# Drop the trigram table + triggers to simulate a disabled/absent index.
|
||||
with db._lock:
|
||||
for trig in (
|
||||
"messages_fts_trigram_insert",
|
||||
"messages_fts_trigram_delete",
|
||||
"messages_fts_trigram_update",
|
||||
):
|
||||
db._conn.execute(f"DROP TRIGGER IF EXISTS {trig}")
|
||||
db._conn.execute("DROP TABLE IF EXISTS messages_fts_trigram")
|
||||
assert db._fts_table_exists("messages_fts_trigram") is False
|
||||
assert db._fts_table_exists("messages_fts") is True
|
||||
# Only the porter index remains -> 1 optimized, no error.
|
||||
assert db.optimize_fts() == 1
|
||||
|
||||
def test_optimize_idempotent(self, db):
|
||||
"""Running optimize twice is safe (second pass is a no-op merge)."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message(session_id="s1", role="user", content="repeat me")
|
||||
assert db.optimize_fts() == 2
|
||||
assert db.optimize_fts() == 2
|
||||
# Search still works after repeated optimization.
|
||||
assert len(db.search_messages("repeat")) == 1
|
||||
|
||||
|
||||
class TestAutoMaintenance:
|
||||
def _make_old_ended(self, db, sid: str, days_old: int = 100):
|
||||
"""Create a session that is ended and was started `days_old` days ago."""
|
||||
|
||||
@@ -378,9 +378,12 @@ class TestSendMessageTool:
|
||||
)
|
||||
|
||||
def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path, monkeypatch):
|
||||
# This test exercises the strict-allowlist path; disable recency trust
|
||||
# so the freshly-written tmp_path file is not auto-accepted by the
|
||||
# trust window. (Recency trust is covered in test_platform_base.py.)
|
||||
# This test exercises the strict-allowlist path; force strict mode on
|
||||
# and disable recency trust so the freshly-written tmp_path file is
|
||||
# not auto-accepted by the trust window. (Recency trust is covered
|
||||
# in test_platform_base.py. The public default flipped to non-strict
|
||||
# in 2026-05; this test pins strict on explicitly.)
|
||||
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
config, telegram_cfg = _make_config()
|
||||
secret = tmp_path / "secret.pdf"
|
||||
|
||||
@@ -472,6 +472,68 @@ class TestSkillsShSource:
|
||||
requested_urls = [call.args[0] for call in mock_get.call_args_list]
|
||||
assert root_url not in requested_urls
|
||||
|
||||
@patch("tools.skills_hub._write_index_cache")
|
||||
@patch("tools.skills_hub._read_index_cache", return_value=None)
|
||||
@patch("tools.skills_hub.httpx.get")
|
||||
def test_empty_query_walks_sitemap_not_homepage(
|
||||
self, mock_get, _mock_read_cache, _mock_write_cache,
|
||||
):
|
||||
"""Empty query must walk the full sitemap.
|
||||
|
||||
Regression for skills.sh shipping ~858/20000 skills: the previous
|
||||
empty-query path scraped the homepage's featured strip (~200 entries),
|
||||
and build_skills_index.py supplemented it with 28 popular keyword
|
||||
searches to drag the count to ~850. The sitemap walker hits the
|
||||
full ~20k catalog in one pass.
|
||||
"""
|
||||
index_xml = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<sitemap><loc>https://www.skills.sh/sitemap-misc.xml</loc></sitemap>
|
||||
<sitemap><loc>https://www.skills.sh/sitemap-skills-1.xml</loc></sitemap>
|
||||
<sitemap><loc>https://www.skills.sh/sitemap-skills-2.xml</loc></sitemap>
|
||||
</sitemapindex>"""
|
||||
skills_1_xml = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url><loc>https://www.skills.sh/anthropics/skills/frontend-design</loc></url>
|
||||
<url><loc>https://www.skills.sh/anthropics/skills/pdf</loc></url>
|
||||
<url><loc>https://www.skills.sh/vercel-labs/agent-skills/react-best-practices</loc></url>
|
||||
</urlset>"""
|
||||
skills_2_xml = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url><loc>https://www.skills.sh/microsoft/azure-skills/azure-ai</loc></url>
|
||||
<url><loc>https://www.skills.sh/anthropics/skills/frontend-design</loc></url>
|
||||
</urlset>"""
|
||||
|
||||
def side_effect(url, *args, **kwargs):
|
||||
resp = MagicMock(status_code=200)
|
||||
if url.endswith("/sitemap.xml"):
|
||||
resp.text = index_xml
|
||||
elif "sitemap-skills-1" in url:
|
||||
resp.text = skills_1_xml
|
||||
elif "sitemap-skills-2" in url:
|
||||
resp.text = skills_2_xml
|
||||
else:
|
||||
resp.status_code = 404
|
||||
resp.text = ""
|
||||
return resp
|
||||
|
||||
mock_get.side_effect = side_effect
|
||||
|
||||
results = self._source().search("", limit=0)
|
||||
|
||||
# 4 unique skills (the frontend-design dup across sitemaps collapsed).
|
||||
assert len(results) == 4
|
||||
identifiers = {r.identifier for r in results}
|
||||
assert identifiers == {
|
||||
"skills-sh/anthropics/skills/frontend-design",
|
||||
"skills-sh/anthropics/skills/pdf",
|
||||
"skills-sh/vercel-labs/agent-skills/react-best-practices",
|
||||
"skills-sh/microsoft/azure-skills/azure-ai",
|
||||
}
|
||||
# Homepage was NOT fetched — the sitemap path is taken on empty query.
|
||||
urls_called = [call.args[0] for call in mock_get.call_args_list]
|
||||
assert not any(u == "https://skills.sh" or u == "https://skills.sh/" for u in urls_called)
|
||||
|
||||
|
||||
class TestFindSkillInRepoTree:
|
||||
"""Tests for GitHubSource._find_skill_in_repo_tree."""
|
||||
|
||||
@@ -139,7 +139,7 @@ SEND_MESSAGE_SCHEMA = {
|
||||
},
|
||||
"message": {
|
||||
"type": "string",
|
||||
"description": "The message text to send. To send an image or file, include MEDIA:<local_path> for a file under a Hermes media cache or HERMES_MEDIA_ALLOW_DIRS — the platform will deliver it as a native media attachment."
|
||||
"description": "The message text to send. To send an image or file, include MEDIA:<local_path> (e.g. 'MEDIA:/tmp/report.pdf') in the message — the platform will deliver it as a native media attachment."
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
|
||||
+105
-1
@@ -1217,6 +1217,16 @@ class SkillsShSource(SkillSource):
|
||||
|
||||
BASE_URL = "https://skills.sh"
|
||||
SEARCH_URL = f"{BASE_URL}/api/search"
|
||||
# Sitemap index — the real catalog source. The homepage scrape only
|
||||
# exposes a curated featured strip (~200 entries); the sitemap covers
|
||||
# the full ~20k+ catalog. https://www.skills.sh/sitemap.xml points at
|
||||
# sitemap-skills-1.xml + sitemap-skills-2.xml, each up to 10k URLs.
|
||||
SITEMAP_INDEX_URL = "https://www.skills.sh/sitemap.xml"
|
||||
_SITEMAP_LOC_RE = re.compile(r"<loc>([^<]+)</loc>", re.IGNORECASE)
|
||||
_SITEMAP_SKILL_RE = re.compile(
|
||||
r"^https?://(?:www\.)?skills\.sh/(?P<owner>[^/]+)/(?P<repo>[^/]+)/(?P<skill>[^/]+)/?$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_SKILL_LINK_RE = re.compile(r'href=["\']/(?P<id>(?!agents/|_next/|api/)[^"\'/]+/[^"\'/]+/[^"\'/]+)["\']')
|
||||
_INSTALL_CMD_RE = re.compile(
|
||||
r'npx\s+skills\s+add\s+(?P<repo>https?://github\.com/[^\s<]+|[^\s<]+)'
|
||||
@@ -1246,7 +1256,10 @@ class SkillsShSource(SkillSource):
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
|
||||
if not query.strip():
|
||||
return self._featured_skills(limit)
|
||||
# Empty query = bulk catalog dump (what build_skills_index.py
|
||||
# calls with). The homepage scrape only sees ~200 featured
|
||||
# entries; the sitemap walks the full ~20k+ catalog.
|
||||
return self._sitemap_catalog(limit)
|
||||
|
||||
cache_key = f"skills_sh_search_{hashlib.md5(f'{query}|{limit}'.encode()).hexdigest()}"
|
||||
cached = _read_index_cache(cache_key)
|
||||
@@ -1307,6 +1320,97 @@ class SkillsShSource(SkillSource):
|
||||
return self._finalize_inspect_meta(meta, canonical, detail)
|
||||
return None
|
||||
|
||||
def _sitemap_catalog(self, limit: int) -> List[SkillMeta]:
|
||||
"""Walk the skills.sh sitemap to enumerate the full catalog.
|
||||
|
||||
Cached for the standard index TTL so we don't refetch ~2 MB of
|
||||
sitemap XML per build. Falls back to ``_featured_skills`` if the
|
||||
sitemap is unreachable or empty (network failure, hostname
|
||||
change, etc.).
|
||||
"""
|
||||
cache_key = "skills_sh_sitemap_v1"
|
||||
cached = _read_index_cache(cache_key)
|
||||
if cached is not None:
|
||||
metas = [SkillMeta(**item) for item in cached]
|
||||
return metas[:limit] if limit > 0 else metas
|
||||
|
||||
# skills.sh serves the per-skill sitemaps brotli-compressed, and
|
||||
# httpx's optional brotlicffi backend has a streaming-decode bug
|
||||
# that fails on these specific payloads. Excluding "br" from
|
||||
# Accept-Encoding makes the server fall back to gzip (or
|
||||
# identity), which works on every httpx install.
|
||||
sitemap_headers = {"Accept-Encoding": "gzip"}
|
||||
|
||||
# Step 1: fetch the sitemap index → list of skill-sitemap URLs.
|
||||
skill_sitemap_urls: List[str] = []
|
||||
try:
|
||||
resp = httpx.get(
|
||||
self.SITEMAP_INDEX_URL,
|
||||
timeout=20,
|
||||
follow_redirects=True,
|
||||
headers=sitemap_headers,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return self._featured_skills(limit)
|
||||
for match in self._SITEMAP_LOC_RE.finditer(resp.text):
|
||||
loc = match.group(1).strip()
|
||||
# Sitemap index entries that point at the per-skill maps.
|
||||
if "sitemap-skills" in loc:
|
||||
skill_sitemap_urls.append(loc)
|
||||
except httpx.HTTPError:
|
||||
return self._featured_skills(limit)
|
||||
|
||||
if not skill_sitemap_urls:
|
||||
return self._featured_skills(limit)
|
||||
|
||||
# Step 2: fetch each skill sitemap and collect canonical "owner/repo/skill" IDs.
|
||||
seen: set[str] = set()
|
||||
results: List[SkillMeta] = []
|
||||
for sitemap_url in skill_sitemap_urls:
|
||||
try:
|
||||
resp = httpx.get(
|
||||
sitemap_url,
|
||||
timeout=30,
|
||||
follow_redirects=True,
|
||||
headers=sitemap_headers,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
continue
|
||||
except httpx.HTTPError:
|
||||
continue
|
||||
for loc_match in self._SITEMAP_LOC_RE.finditer(resp.text):
|
||||
url = loc_match.group(1).strip()
|
||||
m = self._SITEMAP_SKILL_RE.match(url)
|
||||
if not m:
|
||||
continue
|
||||
owner = m.group("owner")
|
||||
repo_name = m.group("repo")
|
||||
skill_name = m.group("skill")
|
||||
canonical = f"{owner}/{repo_name}/{skill_name}"
|
||||
if canonical in seen:
|
||||
continue
|
||||
seen.add(canonical)
|
||||
repo = f"{owner}/{repo_name}"
|
||||
results.append(SkillMeta(
|
||||
name=skill_name,
|
||||
description=f"Indexed by skills.sh from {repo}",
|
||||
source="skills.sh",
|
||||
identifier=self._wrap_identifier(canonical),
|
||||
trust_level=self.github.trust_level_for(canonical),
|
||||
repo=repo,
|
||||
path=skill_name,
|
||||
extra={
|
||||
"detail_url": f"{self.BASE_URL}/{canonical}",
|
||||
"repo_url": f"https://github.com/{repo}",
|
||||
},
|
||||
))
|
||||
|
||||
if not results:
|
||||
return self._featured_skills(limit)
|
||||
|
||||
_write_index_cache(cache_key, [_skill_meta_to_dict(item) for item in results])
|
||||
return results[:limit] if limit > 0 else results
|
||||
|
||||
def _featured_skills(self, limit: int) -> List[SkillMeta]:
|
||||
cache_key = "skills_sh_featured"
|
||||
cached = _read_index_cache(cache_key)
|
||||
|
||||
@@ -1112,7 +1112,7 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
|
||||
from hermes_cli.model_switch import parse_model_flags, switch_model
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_input)
|
||||
model_input, explicit_provider, persist_global, _force_refresh = parse_model_flags(raw_input)
|
||||
if not model_input:
|
||||
raise ValueError("model value required")
|
||||
|
||||
|
||||
@@ -91,6 +91,43 @@ export async function fetchJSON<T>(url: string, init?: RequestInit): Promise<T>
|
||||
// Never resolve — the page is about to unload.
|
||||
return new Promise<T>(() => {});
|
||||
}
|
||||
// Loopback mode: ``_SESSION_TOKEN`` rotates on every server restart
|
||||
// (``hermes update``, ``hermes gateway restart``, etc.). A tab kept
|
||||
// open across the restart holds the OLD token in
|
||||
// ``window.__HERMES_SESSION_TOKEN__`` from the previous HTML render,
|
||||
// so every fetch returns 401. The HTML is served ``Cache-Control:
|
||||
// no-store`` so a reload picks up the freshly-injected token. Trigger
|
||||
// that reload once on the first stale-token 401 — gated mode is
|
||||
// handled above, so reaching here in gated mode means a real
|
||||
// middleware failure that should not reload-loop.
|
||||
if (!window.__HERMES_AUTH_REQUIRED__) {
|
||||
let alreadyReloaded = false;
|
||||
try {
|
||||
alreadyReloaded =
|
||||
sessionStorage.getItem("hermes.tokenReloadAttempted") === "1";
|
||||
} catch {
|
||||
/* SSR / privacy mode — fall through to throw */
|
||||
}
|
||||
if (!alreadyReloaded) {
|
||||
try {
|
||||
sessionStorage.setItem("hermes.tokenReloadAttempted", "1");
|
||||
} catch {
|
||||
/* SSR / privacy mode — best effort */
|
||||
}
|
||||
window.location.reload();
|
||||
return new Promise<T>(() => {});
|
||||
}
|
||||
}
|
||||
}
|
||||
if (res.ok) {
|
||||
// Clear the stale-token reload guard: a successful 2xx proves the
|
||||
// current ``window.__HERMES_SESSION_TOKEN__`` is valid, so the next
|
||||
// 401 — if any — should be allowed to trigger its own reload cycle.
|
||||
try {
|
||||
sessionStorage.removeItem("hermes.tokenReloadAttempted");
|
||||
} catch {
|
||||
/* SSR / privacy mode — ignore */
|
||||
}
|
||||
}
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => res.statusText);
|
||||
|
||||
Reference in New Issue
Block a user