Compare commits
67 Commits
optional-b
...
feat/check
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c1775de56f | ||
|
|
de6750ed23 | ||
|
|
c0ffd6b704 | ||
|
|
8b9de366f2 | ||
|
|
60d3f79c72 | ||
|
|
6f3a673aba | ||
|
|
ab6a6338c4 | ||
|
|
1ec8c1fcaa | ||
|
|
739eb6702e | ||
|
|
1aa7badb3c | ||
|
|
ee4008431a | ||
|
|
88f8bcde38 | ||
|
|
2285615010 | ||
|
|
805ce8177b | ||
|
|
bdce33e239 | ||
|
|
9be8d88ccc | ||
|
|
6ab3ebf195 | ||
|
|
0a628c1aef | ||
|
|
36328a996f | ||
|
|
4bc32dc0f1 | ||
|
|
4de5e017f1 | ||
|
|
3e352f8a0d | ||
|
|
28ae5db9b0 | ||
|
|
d5811c887a | ||
|
|
975fd86dc4 | ||
|
|
0ff7fe3ee2 | ||
|
|
b9d55d5719 | ||
|
|
ab7dc22984 | ||
|
|
bf8350ac18 | ||
|
|
a5c6348d41 | ||
|
|
320f881e0b | ||
|
|
172a38c344 | ||
|
|
8bc0d4f77d | ||
|
|
8eabdefa8a | ||
|
|
f658af45c2 | ||
|
|
5212644861 | ||
|
|
1151f84351 | ||
|
|
9abd6bf342 | ||
|
|
d2c7ef6b41 | ||
|
|
a34102049b | ||
|
|
ef5d811aba | ||
|
|
2d44ed1c5b | ||
|
|
fa2e72ae9c | ||
|
|
5bfc4ed53b | ||
|
|
520aec20e0 | ||
|
|
64bec1d060 | ||
|
|
ac58309dbd | ||
|
|
a5a5d82a21 | ||
|
|
34e8d088c2 | ||
|
|
c754135965 | ||
|
|
53b4b7651a | ||
|
|
a857321463 | ||
|
|
33cfe1515d | ||
|
|
3b43f7267a | ||
|
|
1755a9e38a | ||
|
|
566aeaeefa | ||
|
|
7a0544ab57 | ||
|
|
453e0677d6 | ||
|
|
32dbd31b9a | ||
|
|
81986022b7 | ||
|
|
dcba291d45 | ||
|
|
48e65631f6 | ||
|
|
14a11d24b4 | ||
|
|
36214d14db | ||
|
|
71c0cd00e5 | ||
|
|
15561ec425 | ||
|
|
7d79ce92ac |
@@ -560,12 +560,16 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
forced = _get_auxiliary_provider("vision")
|
||||
if forced != "auto":
|
||||
return _resolve_forced_provider(forced)
|
||||
# Auto: only multimodal-capable providers
|
||||
for try_fn in (_try_openrouter, _try_nous, _try_codex):
|
||||
# Auto: try providers known to support multimodal first, then fall
|
||||
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
||||
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
||||
# caused silent failures for local-only users.
|
||||
for try_fn in (_try_openrouter, _try_nous, _try_codex,
|
||||
_try_custom_endpoint):
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
return client, model
|
||||
logger.debug("Auxiliary vision client: none available (auto only tries OpenRouter/Nous/Codex)")
|
||||
logger.debug("Auxiliary vision client: none available")
|
||||
return None, None
|
||||
|
||||
|
||||
|
||||
@@ -16,12 +16,55 @@ _RED = "\033[31m"
|
||||
_RESET = "\033[0m"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Skin-aware helpers (lazy import to avoid circular deps)
|
||||
# =========================================================================
|
||||
|
||||
def _get_skin():
|
||||
"""Get the active skin config, or None if not available."""
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
return get_active_skin()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_skin_faces(key: str, default: list) -> list:
|
||||
"""Get spinner face list from active skin, falling back to default."""
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
faces = skin.get_spinner_list(key)
|
||||
if faces:
|
||||
return faces
|
||||
return default
|
||||
|
||||
|
||||
def get_skin_verbs() -> list:
|
||||
"""Get thinking verbs from active skin."""
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
verbs = skin.get_spinner_list("thinking_verbs")
|
||||
if verbs:
|
||||
return verbs
|
||||
return KawaiiSpinner.THINKING_VERBS
|
||||
|
||||
|
||||
def get_skin_tool_prefix() -> str:
|
||||
"""Get tool output prefix character from active skin."""
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
return skin.tool_prefix
|
||||
return "┊"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tool preview (one-line summary of a tool call's primary argument)
|
||||
# =========================================================================
|
||||
|
||||
def build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str:
|
||||
"""Build a short preview of a tool call's primary argument for display."""
|
||||
if not args:
|
||||
return None
|
||||
primary_args = {
|
||||
"terminal": "command", "web_search": "query", "web_extract": "urls",
|
||||
"read_file": "path", "write_file": "path", "patch": "path",
|
||||
@@ -177,13 +220,21 @@ class KawaiiSpinner:
|
||||
pass
|
||||
|
||||
def _animate(self):
|
||||
# Cache skin wings at start (avoid per-frame imports)
|
||||
skin = _get_skin()
|
||||
wings = skin.get_spinner_wings() if skin else []
|
||||
|
||||
while self.running:
|
||||
if os.getenv("HERMES_SPINNER_PAUSE"):
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
|
||||
elapsed = time.time() - self.start_time
|
||||
line = f" {frame} {self.message} ({elapsed:.1f}s)"
|
||||
if wings:
|
||||
left, right = wings[self.frame_idx % len(wings)]
|
||||
line = f" {left} {frame} {self.message} {right} ({elapsed:.1f}s)"
|
||||
else:
|
||||
line = f" {frame} {self.message} ({elapsed:.1f}s)"
|
||||
pad = max(self.last_line_len - len(line), 0)
|
||||
self._write(f"\r{line}{' ' * pad}", end='', flush=True)
|
||||
self.last_line_len = len(line)
|
||||
@@ -332,6 +383,7 @@ def get_cute_tool_message(
|
||||
"""
|
||||
dur = f"{duration:.1f}s"
|
||||
is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
|
||||
skin_prefix = get_skin_tool_prefix()
|
||||
|
||||
def _trunc(s, n=40):
|
||||
s = str(s)
|
||||
@@ -342,7 +394,9 @@ def get_cute_tool_message(
|
||||
return ("..." + p[-(n-3):]) if len(p) > n else p
|
||||
|
||||
def _wrap(line: str) -> str:
|
||||
"""Append failure suffix when the tool failed."""
|
||||
"""Apply skin tool prefix and failure suffix."""
|
||||
if skin_prefix != "┊":
|
||||
line = line.replace("┊", skin_prefix, 1)
|
||||
if not is_failure:
|
||||
return line
|
||||
return f"{line}{failure_suffix}"
|
||||
|
||||
@@ -402,11 +402,13 @@ agent:
|
||||
# discord: [web, vision, skills, todo]
|
||||
#
|
||||
# If not set, defaults are:
|
||||
# cli: hermes-cli (everything + cronjob management)
|
||||
# telegram: hermes-telegram (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
|
||||
# discord: hermes-discord (same as telegram)
|
||||
# whatsapp: hermes-whatsapp (same as telegram)
|
||||
# slack: hermes-slack (same as telegram)
|
||||
# cli: hermes-cli (everything + cronjob management)
|
||||
# telegram: hermes-telegram (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
|
||||
# discord: hermes-discord (same as telegram)
|
||||
# whatsapp: hermes-whatsapp (same as telegram)
|
||||
# slack: hermes-slack (same as telegram)
|
||||
# signal: hermes-signal (same as telegram)
|
||||
# homeassistant: hermes-homeassistant (same as telegram)
|
||||
#
|
||||
platform_toolsets:
|
||||
cli: [hermes-cli]
|
||||
@@ -414,6 +416,8 @@ platform_toolsets:
|
||||
discord: [hermes-discord]
|
||||
whatsapp: [hermes-whatsapp]
|
||||
slack: [hermes-slack]
|
||||
signal: [hermes-signal]
|
||||
homeassistant: [hermes-homeassistant]
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Available toolsets (use these names in platform_toolsets or the toolsets list)
|
||||
|
||||
334
cli.py
334
cli.py
@@ -19,6 +19,7 @@ import sys
|
||||
import json
|
||||
import atexit
|
||||
import uuid
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
@@ -45,6 +46,11 @@ from prompt_toolkit.widgets import TextArea
|
||||
from prompt_toolkit.key_binding import KeyBindings
|
||||
from prompt_toolkit import print_formatted_text as _pt_print
|
||||
from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
|
||||
try:
|
||||
from prompt_toolkit.cursor_shapes import CursorShape
|
||||
_STEADY_CURSOR = CursorShape.BLOCK # Non-blinking block cursor
|
||||
except (ImportError, AttributeError):
|
||||
_STEADY_CURSOR = None
|
||||
import threading
|
||||
import queue
|
||||
|
||||
@@ -158,6 +164,7 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"singularity_image": "docker://python:3.11",
|
||||
"modal_image": "python:3.11",
|
||||
"daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
"docker_volumes": [], # host:container volume mounts for Docker backend
|
||||
},
|
||||
"browser": {
|
||||
"inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min
|
||||
@@ -195,6 +202,7 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"display": {
|
||||
"compact": False,
|
||||
"resume_display": "full",
|
||||
"skin": "default",
|
||||
},
|
||||
"clarify": {
|
||||
"timeout": 120, # Seconds to wait for a clarify answer before auto-proceeding
|
||||
@@ -376,6 +384,13 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
# Load configuration at module startup
|
||||
CLI_CONFIG = load_cli_config()
|
||||
|
||||
# Initialize the skin engine from config
|
||||
try:
|
||||
from hermes_cli.skin_engine import init_skin_from_config
|
||||
init_skin_from_config(CLI_CONFIG)
|
||||
except Exception:
|
||||
pass # Skin engine is optional — default skin used if unavailable
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
@@ -725,6 +740,7 @@ HERMES_CADUCEUS = """[#CD7F32]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⡀⠀⣀⣀
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]"""
|
||||
|
||||
# Compact banner for smaller terminals (fallback)
|
||||
# Note: built dynamically by _build_compact_banner() to fit terminal width
|
||||
COMPACT_BANNER = """
|
||||
[bold #FFD700]╔══════════════════════════════════════════════════════════════╗[/]
|
||||
[bold #FFD700]║[/] [#FFBF00]⚕ NOUS HERMES[/] [dim #B8860B]- AI Agent Framework[/] [bold #FFD700]║[/]
|
||||
@@ -733,6 +749,26 @@ COMPACT_BANNER = """
|
||||
"""
|
||||
|
||||
|
||||
def _build_compact_banner() -> str:
|
||||
"""Build a compact banner that fits the current terminal width."""
|
||||
w = min(shutil.get_terminal_size().columns - 2, 64)
|
||||
if w < 30:
|
||||
return "\n[#FFBF00]⚕ NOUS HERMES[/] [dim #B8860B]- Nous Research[/]\n"
|
||||
inner = w - 2 # inside the box border
|
||||
bar = "═" * w
|
||||
line1 = "⚕ NOUS HERMES - AI Agent Framework"
|
||||
line2 = "Messenger of the Digital Gods · Nous Research"
|
||||
# Truncate and pad to fit
|
||||
line1 = line1[:inner - 2].ljust(inner - 2)
|
||||
line2 = line2[:inner - 2].ljust(inner - 2)
|
||||
return (
|
||||
f"\n[bold #FFD700]╔{bar}╗[/]\n"
|
||||
f"[bold #FFD700]║[/] [#FFBF00]{line1}[/] [bold #FFD700]║[/]\n"
|
||||
f"[bold #FFD700]║[/] [dim #B8860B]{line2}[/] [bold #FFD700]║[/]\n"
|
||||
f"[bold #FFD700]╚{bar}╝[/]\n"
|
||||
)
|
||||
|
||||
|
||||
def _get_available_skills() -> Dict[str, List[str]]:
|
||||
"""
|
||||
Scan ~/.hermes/skills/ and return skills grouped by category.
|
||||
@@ -930,10 +966,12 @@ def build_welcome_banner(console: Console, model: str, cwd: str, tools: List[dic
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
||||
# Print the big HERMES-AGENT logo first (no panel wrapper for full width)
|
||||
console.print()
|
||||
console.print(HERMES_AGENT_LOGO)
|
||||
# Print the big HERMES-AGENT logo — skip if terminal is too narrow
|
||||
console.print()
|
||||
term_width = shutil.get_terminal_size().columns
|
||||
if term_width >= 95:
|
||||
console.print(HERMES_AGENT_LOGO)
|
||||
console.print()
|
||||
|
||||
# Print the panel with caduceus and info
|
||||
console.print(outer_panel)
|
||||
@@ -1021,6 +1059,7 @@ class HermesCLI:
|
||||
verbose: bool = False,
|
||||
compact: bool = False,
|
||||
resume: str = None,
|
||||
checkpoints: bool = False,
|
||||
):
|
||||
"""
|
||||
Initialize the Hermes CLI.
|
||||
@@ -1102,6 +1141,13 @@ class HermesCLI:
|
||||
if invalid:
|
||||
self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
|
||||
|
||||
# Filesystem checkpoints: CLI flag > config
|
||||
cp_cfg = CLI_CONFIG.get("checkpoints", {})
|
||||
if isinstance(cp_cfg, bool):
|
||||
cp_cfg = {"enabled": cp_cfg}
|
||||
self.checkpoints_enabled = checkpoints or cp_cfg.get("enabled", False)
|
||||
self.checkpoint_max_snapshots = cp_cfg.get("max_snapshots", 50)
|
||||
|
||||
# Ephemeral system prompt: env var takes precedence, then config
|
||||
self.system_prompt = (
|
||||
os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
|
||||
@@ -1163,6 +1209,7 @@ class HermesCLI:
|
||||
# History file for persistent input recall across sessions
|
||||
self._history_file = Path.home() / ".hermes_history"
|
||||
self._last_invalidate: float = 0.0 # throttle UI repaints
|
||||
self._spinner_text: str = "" # thinking spinner text for TUI
|
||||
|
||||
def _invalidate(self, min_interval: float = 0.25) -> None:
|
||||
"""Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
|
||||
@@ -1226,6 +1273,11 @@ class HermesCLI:
|
||||
|
||||
return changed
|
||||
|
||||
def _on_thinking(self, text: str) -> None:
|
||||
"""Called by agent when thinking starts/stops. Updates TUI spinner."""
|
||||
self._spinner_text = text or ""
|
||||
self._invalidate()
|
||||
|
||||
def _ensure_runtime_credentials(self) -> bool:
|
||||
"""
|
||||
Ensure runtime credentials are resolved before agent use.
|
||||
@@ -1364,6 +1416,9 @@ class HermesCLI:
|
||||
clarify_callback=self._clarify_callback,
|
||||
honcho_session_key=self.session_id,
|
||||
fallback_model=self._fallback_model,
|
||||
thinking_callback=self._on_thinking,
|
||||
checkpoints_enabled=self.checkpoints_enabled,
|
||||
checkpoint_max_snapshots=self.checkpoint_max_snapshots,
|
||||
)
|
||||
# Apply any pending title now that the session exists in the DB
|
||||
if self._pending_title and self._session_db:
|
||||
@@ -1383,8 +1438,13 @@ class HermesCLI:
|
||||
"""Display the welcome banner in Claude Code style."""
|
||||
self.console.clear()
|
||||
|
||||
if self.compact:
|
||||
self.console.print(COMPACT_BANNER)
|
||||
# Auto-compact for narrow terminals — the full banner with caduceus
|
||||
# + tool list needs ~80 columns minimum to render without wrapping.
|
||||
term_width = shutil.get_terminal_size().columns
|
||||
use_compact = self.compact or term_width < 80
|
||||
|
||||
if use_compact:
|
||||
self.console.print(_build_compact_banner())
|
||||
self._show_status()
|
||||
else:
|
||||
# Get tools for display
|
||||
@@ -1628,6 +1688,55 @@ class HermesCLI:
|
||||
self._image_counter -= 1
|
||||
return False
|
||||
|
||||
def _handle_rollback_command(self, command: str):
|
||||
"""Handle /rollback — list or restore filesystem checkpoints."""
|
||||
from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list
|
||||
|
||||
if not hasattr(self, 'agent') or not self.agent:
|
||||
print(" No active agent session.")
|
||||
return
|
||||
|
||||
mgr = self.agent._checkpoint_mgr
|
||||
if not mgr.enabled:
|
||||
print(" Checkpoints are not enabled.")
|
||||
print(" Enable with: hermes --checkpoints")
|
||||
print(" Or in config.yaml: checkpoints: { enabled: true }")
|
||||
return
|
||||
|
||||
cwd = os.getenv("TERMINAL_CWD", os.getcwd())
|
||||
parts = command.split(maxsplit=1)
|
||||
arg = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
if not arg:
|
||||
# List checkpoints
|
||||
checkpoints = mgr.list_checkpoints(cwd)
|
||||
print(format_checkpoint_list(checkpoints, cwd))
|
||||
else:
|
||||
# Restore by number or hash
|
||||
checkpoints = mgr.list_checkpoints(cwd)
|
||||
if not checkpoints:
|
||||
print(f" No checkpoints found for {cwd}")
|
||||
return
|
||||
|
||||
target_hash = None
|
||||
try:
|
||||
idx = int(arg) - 1 # 1-indexed for user
|
||||
if 0 <= idx < len(checkpoints):
|
||||
target_hash = checkpoints[idx]["hash"]
|
||||
else:
|
||||
print(f" Invalid checkpoint number. Use 1-{len(checkpoints)}.")
|
||||
return
|
||||
except ValueError:
|
||||
# Try as a git hash
|
||||
target_hash = arg
|
||||
|
||||
result = mgr.restore(cwd, target_hash)
|
||||
if result["success"]:
|
||||
print(f" ✅ Restored to checkpoint {result['restored_to']}: {result['reason']}")
|
||||
print(f" A pre-rollback snapshot was saved automatically.")
|
||||
else:
|
||||
print(f" ❌ {result['error']}")
|
||||
|
||||
def _handle_paste_command(self):
|
||||
"""Handle /paste — explicitly check clipboard for an image.
|
||||
|
||||
@@ -2394,8 +2503,9 @@ class HermesCLI:
|
||||
# and gets mangled by patch_stdout).
|
||||
if self._app:
|
||||
cc = ChatConsole()
|
||||
if self.compact:
|
||||
cc.print(COMPACT_BANNER)
|
||||
term_w = shutil.get_terminal_size().columns
|
||||
if self.compact or term_w < 80:
|
||||
cc.print(_build_compact_banner())
|
||||
else:
|
||||
tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
|
||||
cwd = os.getenv("TERMINAL_CWD", os.getcwd())
|
||||
@@ -2636,6 +2746,10 @@ class HermesCLI:
|
||||
self._handle_paste_command()
|
||||
elif cmd_lower == "/reload-mcp":
|
||||
self._reload_mcp()
|
||||
elif cmd_lower.startswith("/rollback"):
|
||||
self._handle_rollback_command(cmd_original)
|
||||
elif cmd_lower.startswith("/skin"):
|
||||
self._handle_skin_command(cmd_original)
|
||||
else:
|
||||
# Check for skill slash commands (/gif-search, /axolotl, etc.)
|
||||
base_cmd = cmd_lower.split()[0]
|
||||
@@ -2655,6 +2769,43 @@ class HermesCLI:
|
||||
|
||||
return True
|
||||
|
||||
def _handle_skin_command(self, cmd: str):
|
||||
"""Handle /skin [name] — show or change the display skin."""
|
||||
try:
|
||||
from hermes_cli.skin_engine import list_skins, set_active_skin, get_active_skin_name
|
||||
except ImportError:
|
||||
print("Skin engine not available.")
|
||||
return
|
||||
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or not parts[1].strip():
|
||||
# Show current skin and list available
|
||||
current = get_active_skin_name()
|
||||
skins = list_skins()
|
||||
print(f"\n Current skin: {current}")
|
||||
print(f" Available skins:")
|
||||
for s in skins:
|
||||
marker = " ●" if s["name"] == current else " "
|
||||
source = f" ({s['source']})" if s["source"] == "user" else ""
|
||||
print(f" {marker} {s['name']}{source} — {s['description']}")
|
||||
print(f"\n Usage: /skin <name>")
|
||||
print(f" Custom skins: drop a YAML file in ~/.hermes/skins/\n")
|
||||
return
|
||||
|
||||
new_skin = parts[1].strip().lower()
|
||||
available = {s["name"] for s in list_skins()}
|
||||
if new_skin not in available:
|
||||
print(f" Unknown skin: {new_skin}")
|
||||
print(f" Available: {', '.join(sorted(available))}")
|
||||
return
|
||||
|
||||
set_active_skin(new_skin)
|
||||
if save_config_value("display.skin", new_skin):
|
||||
print(f" Skin set to: {new_skin} (saved)")
|
||||
else:
|
||||
print(f" Skin set to: {new_skin}")
|
||||
print(" Note: banner colors will update on next session start.")
|
||||
|
||||
def _toggle_verbose(self):
|
||||
"""Cycle tool progress mode: off → new → all → verbose → off."""
|
||||
cycle = ["off", "new", "all", "verbose"]
|
||||
@@ -3126,10 +3277,22 @@ class HermesCLI:
|
||||
|
||||
if response:
|
||||
w = shutil.get_terminal_size().columns
|
||||
label = " ⚕ Hermes "
|
||||
# Use skin branding for response box label
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
_skin = get_active_skin()
|
||||
label = _skin.get_branding("response_label", " ⚕ Hermes ")
|
||||
_resp_color = _skin.get_color("response_border", "")
|
||||
if _resp_color:
|
||||
_resp_start = f"\033[38;2;{int(_resp_color[1:3], 16)};{int(_resp_color[3:5], 16)};{int(_resp_color[5:7], 16)}m"
|
||||
else:
|
||||
_resp_start = _GOLD
|
||||
except Exception:
|
||||
label = " ⚕ Hermes "
|
||||
_resp_start = _GOLD
|
||||
fill = w - 2 - len(label) # 2 for ╭ and ╮
|
||||
top = f"{_GOLD}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}"
|
||||
bot = f"{_GOLD}╰{'─' * (w - 2)}╯{_RST}"
|
||||
top = f"{_resp_start}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}"
|
||||
bot = f"{_resp_start}╰{'─' * (w - 2)}╯{_RST}"
|
||||
|
||||
# Render box + response as a single _cprint call so
|
||||
# nothing can interleave between the box borders.
|
||||
@@ -3198,7 +3361,15 @@ class HermesCLI:
|
||||
if self._preload_resumed_session():
|
||||
self._display_resumed_history()
|
||||
|
||||
self.console.print("[#FFF8DC]Welcome to Hermes Agent! Type your message or /help for commands.[/]")
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
_welcome_skin = get_active_skin()
|
||||
_welcome_text = _welcome_skin.get_branding("welcome", "Welcome to Hermes Agent! Type your message or /help for commands.")
|
||||
_welcome_color = _welcome_skin.get_color("banner_text", "#FFF8DC")
|
||||
except Exception:
|
||||
_welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands."
|
||||
_welcome_color = "#FFF8DC"
|
||||
self.console.print(f"[{_welcome_color}]{_welcome_text}[/]")
|
||||
self.console.print()
|
||||
|
||||
# State for async operation
|
||||
@@ -3586,6 +3757,8 @@ class HermesCLI:
|
||||
return "type password (hidden), Enter to skip"
|
||||
if cli_ref._approval_state:
|
||||
return ""
|
||||
if cli_ref._clarify_freetext:
|
||||
return "type your answer here and press Enter"
|
||||
if cli_ref._clarify_state:
|
||||
return ""
|
||||
if cli_ref._agent_running:
|
||||
@@ -3636,6 +3809,20 @@ class HermesCLI:
|
||||
# right up against the top rule of the input area
|
||||
return 1 if cli_ref._agent_running else 0
|
||||
|
||||
def get_spinner_text():
|
||||
txt = cli_ref._spinner_text
|
||||
if not txt:
|
||||
return []
|
||||
return [('class:hint', f' {txt}')]
|
||||
|
||||
def get_spinner_height():
|
||||
return 1 if cli_ref._spinner_text else 0
|
||||
|
||||
spinner_widget = Window(
|
||||
content=FormattedTextControl(get_spinner_text),
|
||||
height=get_spinner_height,
|
||||
)
|
||||
|
||||
spacer = Window(
|
||||
content=FormattedTextControl(get_hint_text),
|
||||
height=get_hint_height,
|
||||
@@ -3643,6 +3830,32 @@ class HermesCLI:
|
||||
|
||||
# --- Clarify tool: dynamic display widget for questions + choices ---
|
||||
|
||||
def _panel_box_width(title: str, content_lines: list[str], min_width: int = 46, max_width: int = 76) -> int:
|
||||
"""Choose a stable panel width wide enough for the title and content."""
|
||||
term_cols = shutil.get_terminal_size((100, 20)).columns
|
||||
longest = max([len(title)] + [len(line) for line in content_lines] + [min_width - 4])
|
||||
inner = min(max(longest + 4, min_width - 2), max_width - 2, max(24, term_cols - 6))
|
||||
return inner + 2 # account for the single leading/trailing spaces inside borders
|
||||
|
||||
def _wrap_panel_text(text: str, width: int, subsequent_indent: str = "") -> list[str]:
|
||||
wrapped = textwrap.wrap(
|
||||
text,
|
||||
width=max(8, width),
|
||||
break_long_words=False,
|
||||
break_on_hyphens=False,
|
||||
subsequent_indent=subsequent_indent,
|
||||
)
|
||||
return wrapped or [""]
|
||||
|
||||
def _append_panel_line(lines, border_style: str, content_style: str, text: str, box_width: int) -> None:
|
||||
inner_width = max(0, box_width - 2)
|
||||
lines.append((border_style, "│ "))
|
||||
lines.append((content_style, text.ljust(inner_width)))
|
||||
lines.append((border_style, " │\n"))
|
||||
|
||||
def _append_blank_panel_line(lines, border_style: str, box_width: int) -> None:
|
||||
lines.append((border_style, "│" + (" " * box_width) + "│\n"))
|
||||
|
||||
def _get_clarify_display():
|
||||
"""Build styled text for the clarify question/choices panel."""
|
||||
state = cli_ref._clarify_state
|
||||
@@ -3652,43 +3865,62 @@ class HermesCLI:
|
||||
question = state["question"]
|
||||
choices = state.get("choices") or []
|
||||
selected = state.get("selected", 0)
|
||||
preview_lines = _wrap_panel_text(question, 60)
|
||||
for i, choice in enumerate(choices):
|
||||
prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else " "
|
||||
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
|
||||
other_label = (
|
||||
"❯ Other (type below)" if cli_ref._clarify_freetext
|
||||
else "❯ Other (type your answer)" if selected == len(choices)
|
||||
else " Other (type your answer)"
|
||||
)
|
||||
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
|
||||
box_width = _panel_box_width("Hermes needs your input", preview_lines)
|
||||
inner_text_width = max(8, box_width - 2)
|
||||
|
||||
lines = []
|
||||
# Box top border
|
||||
lines.append(('class:clarify-border', '╭─ '))
|
||||
lines.append(('class:clarify-title', 'Hermes needs your input'))
|
||||
lines.append(('class:clarify-border', ' ─────────────────────────────╮\n'))
|
||||
lines.append(('class:clarify-border', '│\n'))
|
||||
lines.append(('class:clarify-border', ' ' + ('─' * max(0, box_width - len("Hermes needs your input") - 3)) + '╮\n'))
|
||||
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
|
||||
|
||||
# Question text
|
||||
lines.append(('class:clarify-border', '│ '))
|
||||
lines.append(('class:clarify-question', question))
|
||||
lines.append(('', '\n'))
|
||||
lines.append(('class:clarify-border', '│\n'))
|
||||
for wrapped in _wrap_panel_text(question, inner_text_width):
|
||||
_append_panel_line(lines, 'class:clarify-border', 'class:clarify-question', wrapped, box_width)
|
||||
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
|
||||
|
||||
if cli_ref._clarify_freetext and not choices:
|
||||
guidance = "Type your answer in the prompt below, then press Enter."
|
||||
for wrapped in _wrap_panel_text(guidance, inner_text_width):
|
||||
_append_panel_line(lines, 'class:clarify-border', 'class:clarify-choice', wrapped, box_width)
|
||||
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
|
||||
|
||||
if choices:
|
||||
# Multiple-choice mode: show selectable options
|
||||
for i, choice in enumerate(choices):
|
||||
lines.append(('class:clarify-border', '│ '))
|
||||
if i == selected and not cli_ref._clarify_freetext:
|
||||
lines.append(('class:clarify-selected', f'❯ {choice}'))
|
||||
else:
|
||||
lines.append(('class:clarify-choice', f' {choice}'))
|
||||
lines.append(('', '\n'))
|
||||
style = 'class:clarify-selected' if i == selected and not cli_ref._clarify_freetext else 'class:clarify-choice'
|
||||
prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else ' '
|
||||
wrapped_lines = _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" ")
|
||||
for wrapped in wrapped_lines:
|
||||
_append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width)
|
||||
|
||||
# "Other" option (5th line, only shown when choices exist)
|
||||
other_idx = len(choices)
|
||||
lines.append(('class:clarify-border', '│ '))
|
||||
if selected == other_idx and not cli_ref._clarify_freetext:
|
||||
lines.append(('class:clarify-selected', '❯ Other (type your answer)'))
|
||||
other_style = 'class:clarify-selected'
|
||||
other_label = '❯ Other (type your answer)'
|
||||
elif cli_ref._clarify_freetext:
|
||||
lines.append(('class:clarify-active-other', '❯ Other (type below)'))
|
||||
other_style = 'class:clarify-active-other'
|
||||
other_label = '❯ Other (type below)'
|
||||
else:
|
||||
lines.append(('class:clarify-choice', ' Other (type your answer)'))
|
||||
lines.append(('', '\n'))
|
||||
other_style = 'class:clarify-choice'
|
||||
other_label = ' Other (type your answer)'
|
||||
for wrapped in _wrap_panel_text(other_label, inner_text_width, subsequent_indent=" "):
|
||||
_append_panel_line(lines, 'class:clarify-border', other_style, wrapped, box_width)
|
||||
|
||||
lines.append(('class:clarify-border', '│\n'))
|
||||
lines.append(('class:clarify-border', '╰──────────────────────────────────────────────────╯\n'))
|
||||
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
|
||||
lines.append(('class:clarify-border', '╰' + ('─' * box_width) + '╯\n'))
|
||||
return lines
|
||||
|
||||
clarify_widget = ConditionalContainer(
|
||||
@@ -3743,29 +3975,32 @@ class HermesCLI:
|
||||
"always": "Add to permanent allowlist",
|
||||
"deny": "Deny",
|
||||
}
|
||||
preview_lines = _wrap_panel_text(description, 60)
|
||||
preview_lines.extend(_wrap_panel_text(cmd_display, 60))
|
||||
for i, choice in enumerate(choices):
|
||||
prefix = '❯ ' if i == selected else ' '
|
||||
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice_labels.get(choice, choice)}", 60, subsequent_indent=" "))
|
||||
box_width = _panel_box_width("⚠️ Dangerous Command", preview_lines)
|
||||
inner_text_width = max(8, box_width - 2)
|
||||
|
||||
lines = []
|
||||
lines.append(('class:approval-border', '╭─ '))
|
||||
lines.append(('class:approval-title', '⚠️ Dangerous Command'))
|
||||
lines.append(('class:approval-border', ' ───────────────────────────────╮\n'))
|
||||
lines.append(('class:approval-border', '│\n'))
|
||||
lines.append(('class:approval-border', '│ '))
|
||||
lines.append(('class:approval-desc', description))
|
||||
lines.append(('', '\n'))
|
||||
lines.append(('class:approval-border', '│ '))
|
||||
lines.append(('class:approval-cmd', cmd_display))
|
||||
lines.append(('', '\n'))
|
||||
lines.append(('class:approval-border', '│\n'))
|
||||
lines.append(('class:approval-border', ' ' + ('─' * max(0, box_width - len("⚠️ Dangerous Command") - 3)) + '╮\n'))
|
||||
_append_blank_panel_line(lines, 'class:approval-border', box_width)
|
||||
for wrapped in _wrap_panel_text(description, inner_text_width):
|
||||
_append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
|
||||
for wrapped in _wrap_panel_text(cmd_display, inner_text_width):
|
||||
_append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width)
|
||||
_append_blank_panel_line(lines, 'class:approval-border', box_width)
|
||||
for i, choice in enumerate(choices):
|
||||
lines.append(('class:approval-border', '│ '))
|
||||
label = choice_labels.get(choice, choice)
|
||||
if i == selected:
|
||||
lines.append(('class:approval-selected', f'❯ {label}'))
|
||||
else:
|
||||
lines.append(('class:approval-choice', f' {label}'))
|
||||
lines.append(('', '\n'))
|
||||
lines.append(('class:approval-border', '│\n'))
|
||||
lines.append(('class:approval-border', '╰──────────────────────────────────────────────────────╯\n'))
|
||||
style = 'class:approval-selected' if i == selected else 'class:approval-choice'
|
||||
prefix = '❯ ' if i == selected else ' '
|
||||
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
|
||||
_append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
|
||||
_append_blank_panel_line(lines, 'class:approval-border', box_width)
|
||||
lines.append(('class:approval-border', '╰' + ('─' * box_width) + '╯\n'))
|
||||
return lines
|
||||
|
||||
approval_widget = ConditionalContainer(
|
||||
@@ -3818,6 +4053,7 @@ class HermesCLI:
|
||||
sudo_widget,
|
||||
approval_widget,
|
||||
clarify_widget,
|
||||
spinner_widget,
|
||||
spacer,
|
||||
input_rule_top,
|
||||
image_bar,
|
||||
@@ -3872,6 +4108,7 @@ class HermesCLI:
|
||||
style=style,
|
||||
full_screen=False,
|
||||
mouse_support=False,
|
||||
**({'cursor': _STEADY_CURSOR} if _STEADY_CURSOR is not None else {}),
|
||||
)
|
||||
self._app = app # Store reference for clarify_callback
|
||||
|
||||
@@ -3940,6 +4177,7 @@ class HermesCLI:
|
||||
self.chat(user_input, images=submit_images or None)
|
||||
finally:
|
||||
self._agent_running = False
|
||||
self._spinner_text = ""
|
||||
app.invalidate() # Refresh status line
|
||||
|
||||
except Exception as e:
|
||||
@@ -4000,6 +4238,7 @@ def main(
|
||||
resume: str = None,
|
||||
worktree: bool = False,
|
||||
w: bool = False,
|
||||
checkpoints: bool = False,
|
||||
):
|
||||
"""
|
||||
Hermes Agent CLI - Interactive AI Assistant
|
||||
@@ -4104,6 +4343,7 @@ def main(
|
||||
verbose=verbose,
|
||||
compact=compact,
|
||||
resume=resume,
|
||||
checkpoints=checkpoints,
|
||||
)
|
||||
|
||||
# Inject worktree context into agent's system prompt
|
||||
|
||||
@@ -26,7 +26,7 @@ except ImportError:
|
||||
# Configuration
|
||||
# =============================================================================
|
||||
|
||||
HERMES_DIR = Path.home() / ".hermes"
|
||||
HERMES_DIR = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
CRON_DIR = HERMES_DIR / "cron"
|
||||
JOBS_FILE = CRON_DIR / "jobs.json"
|
||||
OUTPUT_DIR = CRON_DIR / "output"
|
||||
|
||||
46
datagen-config-examples/web_research.yaml
Normal file
46
datagen-config-examples/web_research.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
# datagen-config-examples/web_research.yaml
|
||||
#
|
||||
# Batch data generation config for WebResearchEnv.
|
||||
# Generates tool-calling trajectories for multi-step web research tasks.
|
||||
#
|
||||
# Usage:
|
||||
# python batch_runner.py \
|
||||
# --config datagen-config-examples/web_research.yaml \
|
||||
# --run_name web_research_v1
|
||||
|
||||
environment: web-research
|
||||
|
||||
# Toolsets available to the agent during data generation
|
||||
toolsets:
|
||||
- web
|
||||
- file
|
||||
|
||||
# How many parallel workers to use
|
||||
num_workers: 4
|
||||
|
||||
# Questions per batch
|
||||
batch_size: 20
|
||||
|
||||
# Total trajectories to generate (comment out to run full dataset)
|
||||
max_items: 500
|
||||
|
||||
# Model to use for generation (override with --model flag)
|
||||
model: openrouter/nousresearch/hermes-3-llama-3.1-405b
|
||||
|
||||
# System prompt additions (ephemeral — not saved to trajectories)
|
||||
ephemeral_system_prompt: |
|
||||
You are a highly capable research agent. When asked a factual question,
|
||||
always use web_search to find current, accurate information before answering.
|
||||
Cite at least 2 sources. Be concise and accurate.
|
||||
|
||||
# Output directory
|
||||
output_dir: data/web_research_v1
|
||||
|
||||
# Trajectory compression settings (for fitting into training token budgets)
|
||||
compression:
|
||||
enabled: true
|
||||
target_max_tokens: 16000
|
||||
|
||||
# Eval settings
|
||||
eval_every: 100 # Run eval every N trajectories
|
||||
eval_size: 25 # Number of held-out questions per eval run
|
||||
718
environments/web_research_env.py
Normal file
718
environments/web_research_env.py
Normal file
@@ -0,0 +1,718 @@
|
||||
"""
|
||||
WebResearchEnv — RL Environment for Multi-Step Web Research
|
||||
============================================================
|
||||
|
||||
Trains models to do accurate, efficient, multi-source web research.
|
||||
|
||||
Reward signals:
|
||||
- Answer correctness (LLM judge, 0.0–1.0)
|
||||
- Source diversity (used ≥2 distinct domains)
|
||||
- Efficiency (penalizes excessive tool calls)
|
||||
- Tool usage (bonus for actually using web tools)
|
||||
|
||||
Dataset: FRAMES benchmark (Google, 2024) — multi-hop factual questions
|
||||
HuggingFace: google/frames-benchmark
|
||||
Fallback: built-in sample questions (no HF token needed)
|
||||
|
||||
Usage:
|
||||
# Phase 1 (OpenAI-compatible server)
|
||||
python environments/web_research_env.py serve \\
|
||||
--openai.base_url http://localhost:8000/v1 \\
|
||||
--openai.model_name YourModel \\
|
||||
--openai.server_type openai
|
||||
|
||||
# Process mode (offline data generation)
|
||||
python environments/web_research_env.py process \\
|
||||
--env.data_path_to_save_groups data/web_research.jsonl
|
||||
|
||||
# Standalone eval
|
||||
python environments/web_research_env.py evaluate \\
|
||||
--openai.base_url http://localhost:8000/v1 \\
|
||||
--openai.model_name YourModel
|
||||
|
||||
Built by: github.com/jackx707
|
||||
Inspired by: GroceryMind — production Hermes agent doing live web research
|
||||
across German grocery stores (firecrawl + hermes-agent)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
# Ensure hermes-agent root is on path
|
||||
_repo_root = Path(__file__).resolve().parent.parent
|
||||
if str(_repo_root) not in sys.path:
|
||||
sys.path.insert(0, str(_repo_root))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Optional HuggingFace datasets import
|
||||
# ---------------------------------------------------------------------------
|
||||
try:
|
||||
from datasets import load_dataset
|
||||
HF_AVAILABLE = True
|
||||
except ImportError:
|
||||
HF_AVAILABLE = False
|
||||
|
||||
from atroposlib.envs.base import ScoredDataGroup
|
||||
from atroposlib.envs.server_handling.server_manager import APIServerConfig
|
||||
from atroposlib.type_definitions import Item
|
||||
|
||||
from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
|
||||
from environments.agent_loop import AgentResult
|
||||
from environments.tool_context import ToolContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fallback sample dataset (used when HuggingFace is unavailable)
|
||||
# Multi-hop questions requiring real web search to answer.
|
||||
# ---------------------------------------------------------------------------
|
||||
SAMPLE_QUESTIONS = [
|
||||
{
|
||||
"question": "What is the current population of the capital city of the country that won the 2022 FIFA World Cup?",
|
||||
"answer": "Buenos Aires has approximately 3 million people in the city proper, or around 15 million in the greater metro area.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "Who is the CEO of the company that makes the most widely used open-source container orchestration platform?",
|
||||
"answer": "The Linux Foundation oversees Kubernetes. CNCF (Cloud Native Computing Foundation) is the specific body — it does not have a traditional CEO but has an executive director.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "What programming language was used to write the original version of the web framework used by Instagram?",
|
||||
"answer": "Django, which Instagram was built on, is written in Python.",
|
||||
"difficulty": "easy",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "In what year was the university founded where the inventor of the World Wide Web currently holds a professorship?",
|
||||
"answer": "Tim Berners-Lee holds a professorship at MIT (founded 1861) and the University of Southampton (founded 1952).",
|
||||
"difficulty": "hard",
|
||||
"hops": 3,
|
||||
},
|
||||
{
|
||||
"question": "What is the latest stable version of the programming language that ranks #1 on the TIOBE index as of this year?",
|
||||
"answer": "Python is currently #1 on TIOBE. The latest stable version should be verified via the official python.org site.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "How many employees does the parent company of Instagram have?",
|
||||
"answer": "Meta Platforms (parent of Instagram) employs approximately 70,000+ people as of recent reports.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "What is the current interest rate set by the central bank of the country where the Eiffel Tower is located?",
|
||||
"answer": "The European Central Bank sets rates for France/eurozone. The current rate should be verified — it has changed frequently in 2023-2025.",
|
||||
"difficulty": "hard",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "Which company acquired the startup founded by the creator of Oculus VR?",
|
||||
"answer": "Palmer Luckey founded Oculus VR, which was acquired by Facebook (now Meta). He later founded Anduril Industries.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "What is the market cap of the company that owns the most popular search engine in Russia?",
|
||||
"answer": "Yandex (now split into separate entities after 2024 restructuring). Current market cap should be verified via financial sources.",
|
||||
"difficulty": "hard",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "What was the GDP growth rate of the country that hosted the most recent Summer Olympics?",
|
||||
"answer": "Paris, France hosted the 2024 Summer Olympics. France's recent GDP growth should be verified via World Bank or IMF data.",
|
||||
"difficulty": "hard",
|
||||
"hops": 2,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class WebResearchEnvConfig(HermesAgentEnvConfig):
|
||||
"""Configuration for the web research RL environment."""
|
||||
|
||||
# Reward weights
|
||||
correctness_weight: float = Field(
|
||||
default=0.6,
|
||||
description="Weight for answer correctness in reward (LLM judge score).",
|
||||
)
|
||||
tool_usage_weight: float = Field(
|
||||
default=0.2,
|
||||
description="Weight for tool usage signal (did the model actually use web tools?).",
|
||||
)
|
||||
efficiency_weight: float = Field(
|
||||
default=0.2,
|
||||
description="Weight for efficiency signal (penalizes excessive tool calls).",
|
||||
)
|
||||
diversity_bonus: float = Field(
|
||||
default=0.1,
|
||||
description="Bonus reward for citing ≥2 distinct domains.",
|
||||
)
|
||||
|
||||
# Efficiency thresholds
|
||||
efficient_max_calls: int = Field(
|
||||
default=5,
|
||||
description="Maximum tool calls before efficiency penalty begins.",
|
||||
)
|
||||
heavy_penalty_calls: int = Field(
|
||||
default=10,
|
||||
description="Tool call count where efficiency penalty steepens.",
|
||||
)
|
||||
|
||||
# Eval
|
||||
eval_size: int = Field(
|
||||
default=20,
|
||||
description="Number of held-out items for evaluation.",
|
||||
)
|
||||
eval_split_ratio: float = Field(
|
||||
default=0.1,
|
||||
description="Fraction of dataset to hold out for evaluation (0.0–1.0).",
|
||||
)
|
||||
|
||||
# Dataset
|
||||
dataset_name: str = Field(
|
||||
default="google/frames-benchmark",
|
||||
description="HuggingFace dataset name for research questions.",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Environment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class WebResearchEnv(HermesAgentBaseEnv):
|
||||
"""
|
||||
RL environment for training multi-step web research skills.
|
||||
|
||||
The model is given a factual question requiring 2-3 hops of web research
|
||||
and must use web_search / web_extract tools to find and synthesize the answer.
|
||||
|
||||
Reward is multi-signal:
|
||||
60% — answer correctness (LLM judge)
|
||||
20% — tool usage (did the model actually search the web?)
|
||||
20% — efficiency (penalizes >5 tool calls)
|
||||
|
||||
Bonus +0.1 for source diversity (≥2 distinct domains cited).
|
||||
"""
|
||||
|
||||
name = "web-research"
|
||||
env_config_cls = WebResearchEnvConfig
|
||||
|
||||
# Default toolsets for this environment — web + file for saving notes
|
||||
default_toolsets = ["web", "file"]
|
||||
|
||||
@classmethod
|
||||
def config_init(cls) -> Tuple[WebResearchEnvConfig, List[APIServerConfig]]:
|
||||
"""Default configuration for the web research environment."""
|
||||
env_config = WebResearchEnvConfig(
|
||||
enabled_toolsets=["web", "file"],
|
||||
max_agent_turns=15,
|
||||
agent_temperature=1.0,
|
||||
system_prompt=(
|
||||
"You are a highly capable research agent. When asked a factual question, "
|
||||
"always use web_search to find current, accurate information before answering. "
|
||||
"Cite at least 2 sources. Be concise and accurate."
|
||||
),
|
||||
group_size=4,
|
||||
total_steps=1000,
|
||||
steps_per_eval=100,
|
||||
use_wandb=True,
|
||||
wandb_name="web-research",
|
||||
)
|
||||
|
||||
server_configs = [
|
||||
APIServerConfig(
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model_name="anthropic/claude-sonnet-4.5",
|
||||
server_type="openai",
|
||||
api_key=os.getenv("OPENROUTER_API_KEY", ""),
|
||||
health_check=False,
|
||||
)
|
||||
]
|
||||
|
||||
return env_config, server_configs
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._items: list[dict] = []
|
||||
self._eval_items: list[dict] = []
|
||||
self._index: int = 0
|
||||
|
||||
# Metrics tracking for wandb
|
||||
self._reward_buffer: list[float] = []
|
||||
self._correctness_buffer: list[float] = []
|
||||
self._tool_usage_buffer: list[float] = []
|
||||
self._efficiency_buffer: list[float] = []
|
||||
self._diversity_buffer: list[float] = []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 1. Setup — load dataset
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def setup(self) -> None:
|
||||
"""Load the FRAMES benchmark or fall back to built-in samples."""
|
||||
if HF_AVAILABLE:
|
||||
try:
|
||||
logger.info("Loading FRAMES benchmark from HuggingFace...")
|
||||
ds = load_dataset(self.config.dataset_name, split="test")
|
||||
self._items = [
|
||||
{
|
||||
"question": row["Prompt"],
|
||||
"answer": row["Answer"],
|
||||
"difficulty": row.get("reasoning_types", "unknown"),
|
||||
"hops": 2,
|
||||
}
|
||||
for row in ds
|
||||
]
|
||||
# Hold out for eval
|
||||
eval_size = max(
|
||||
self.config.eval_size,
|
||||
int(len(self._items) * self.config.eval_split_ratio),
|
||||
)
|
||||
random.shuffle(self._items)
|
||||
self._eval_items = self._items[:eval_size]
|
||||
self._items = self._items[eval_size:]
|
||||
logger.info(
|
||||
f"Loaded {len(self._items)} train / {len(self._eval_items)} eval items "
|
||||
f"from FRAMES benchmark."
|
||||
)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load FRAMES from HuggingFace: {e}. Using built-in samples.")
|
||||
|
||||
# Fallback
|
||||
random.shuffle(SAMPLE_QUESTIONS)
|
||||
split = max(1, len(SAMPLE_QUESTIONS) * 8 // 10)
|
||||
self._items = SAMPLE_QUESTIONS[:split]
|
||||
self._eval_items = SAMPLE_QUESTIONS[split:]
|
||||
logger.info(
|
||||
f"Using built-in sample dataset: {len(self._items)} train / "
|
||||
f"{len(self._eval_items)} eval items."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 2. get_next_item — return the next question
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def get_next_item(self) -> dict:
|
||||
"""Return the next item, cycling through the dataset."""
|
||||
if not self._items:
|
||||
raise RuntimeError("Dataset is empty. Did you call setup()?")
|
||||
item = self._items[self._index % len(self._items)]
|
||||
self._index += 1
|
||||
return item
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 3. format_prompt — build the user-facing prompt
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def format_prompt(self, item: dict) -> str:
|
||||
"""Format the research question as a task prompt."""
|
||||
return (
|
||||
f"Research the following question thoroughly using web search. "
|
||||
f"You MUST search the web to find current, accurate information — "
|
||||
f"do not rely solely on your training data.\n\n"
|
||||
f"Question: {item['question']}\n\n"
|
||||
f"Requirements:\n"
|
||||
f"- Use web_search and/or web_extract tools to find information\n"
|
||||
f"- Search at least 2 different sources\n"
|
||||
f"- Provide a concise, accurate answer (2-4 sentences)\n"
|
||||
f"- Cite the sources you used"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 4. compute_reward — multi-signal scoring
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def compute_reward(
|
||||
self,
|
||||
item: dict,
|
||||
result: AgentResult,
|
||||
ctx: ToolContext,
|
||||
) -> float:
|
||||
"""
|
||||
Multi-signal reward function:
|
||||
|
||||
correctness_weight * correctness — LLM judge comparing answer to ground truth
|
||||
tool_usage_weight * tool_used — binary: did the model use web tools?
|
||||
efficiency_weight * efficiency — penalizes wasteful tool usage
|
||||
+ diversity_bonus — source diversity (≥2 distinct domains)
|
||||
"""
|
||||
# Extract final response from messages (last assistant message with content)
|
||||
final_response = ""
|
||||
tools_used: list[str] = []
|
||||
for msg in reversed(result.messages):
|
||||
if msg.get("role") == "assistant" and msg.get("content") and not final_response:
|
||||
final_response = msg["content"]
|
||||
# Collect tool names from tool call messages
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
for tc in msg["tool_calls"]:
|
||||
fn = tc.get("function", {}) if isinstance(tc, dict) else {}
|
||||
name = fn.get("name", "")
|
||||
if name:
|
||||
tools_used.append(name)
|
||||
tool_call_count: int = result.turns_used or len(tools_used)
|
||||
|
||||
cfg = self.config
|
||||
|
||||
# ---- Signal 1: Answer correctness (LLM judge) ----------------
|
||||
correctness = await self._llm_judge(
|
||||
question=item["question"],
|
||||
expected=item["answer"],
|
||||
model_answer=final_response,
|
||||
)
|
||||
|
||||
# ---- Signal 2: Web tool usage --------------------------------
|
||||
web_tools = {"web_search", "web_extract", "search", "firecrawl"}
|
||||
tool_used = 1.0 if any(t in web_tools for t in tools_used) else 0.0
|
||||
|
||||
# ---- Signal 3: Efficiency ------------------------------------
|
||||
if tool_call_count <= cfg.efficient_max_calls:
|
||||
efficiency = 1.0
|
||||
elif tool_call_count <= cfg.heavy_penalty_calls:
|
||||
efficiency = 1.0 - (tool_call_count - cfg.efficient_max_calls) * 0.08
|
||||
else:
|
||||
efficiency = max(0.0, 1.0 - (tool_call_count - cfg.efficient_max_calls) * 0.12)
|
||||
|
||||
# ---- Bonus: Source diversity ---------------------------------
|
||||
domains = self._extract_domains(final_response)
|
||||
diversity = cfg.diversity_bonus if len(domains) >= 2 else 0.0
|
||||
|
||||
# ---- Combine ------------------------------------------------
|
||||
reward = (
|
||||
cfg.correctness_weight * correctness
|
||||
+ cfg.tool_usage_weight * tool_used
|
||||
+ cfg.efficiency_weight * efficiency
|
||||
+ diversity
|
||||
)
|
||||
reward = min(1.0, max(0.0, reward)) # clamp to [0, 1]
|
||||
|
||||
# Track for wandb
|
||||
self._reward_buffer.append(reward)
|
||||
self._correctness_buffer.append(correctness)
|
||||
self._tool_usage_buffer.append(tool_used)
|
||||
self._efficiency_buffer.append(efficiency)
|
||||
self._diversity_buffer.append(diversity)
|
||||
|
||||
logger.debug(
|
||||
f"Reward breakdown — correctness={correctness:.2f}, "
|
||||
f"tool_used={tool_used:.1f}, efficiency={efficiency:.2f}, "
|
||||
f"diversity={diversity:.1f} → total={reward:.3f}"
|
||||
)
|
||||
|
||||
return reward
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 5. evaluate — run on held-out eval split
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def evaluate(self, *args, **kwargs) -> None:
|
||||
"""Run evaluation on the held-out split using the full agent loop with tools.
|
||||
|
||||
Each eval item runs through the same agent loop as training —
|
||||
the model can use web_search, web_extract, etc. to research answers.
|
||||
This measures actual agentic research capability, not just knowledge.
|
||||
"""
|
||||
import time
|
||||
import uuid
|
||||
from environments.agent_loop import HermesAgentLoop
|
||||
from environments.tool_context import ToolContext
|
||||
|
||||
items = self._eval_items
|
||||
if not items:
|
||||
logger.warning("No eval items available.")
|
||||
return
|
||||
|
||||
eval_size = min(self.config.eval_size, len(items))
|
||||
eval_items = items[:eval_size]
|
||||
|
||||
logger.info(f"Running eval on {len(eval_items)} questions (with agent loop + tools)...")
|
||||
start_time = time.time()
|
||||
samples = []
|
||||
|
||||
# Resolve tools once for all eval items
|
||||
tools, valid_names = self._resolve_tools_for_group()
|
||||
|
||||
for i, item in enumerate(eval_items):
|
||||
task_id = str(uuid.uuid4())
|
||||
logger.info(f"Eval [{i+1}/{len(eval_items)}]: {item['question'][:80]}...")
|
||||
|
||||
try:
|
||||
# Build messages
|
||||
messages: List[Dict[str, Any]] = []
|
||||
if self.config.system_prompt:
|
||||
messages.append({"role": "system", "content": self.config.system_prompt})
|
||||
messages.append({"role": "user", "content": self.format_prompt(item)})
|
||||
|
||||
# Run the full agent loop with tools
|
||||
agent = HermesAgentLoop(
|
||||
server=self.server,
|
||||
tool_schemas=tools,
|
||||
valid_tool_names=valid_names,
|
||||
max_turns=self.config.max_agent_turns,
|
||||
task_id=task_id,
|
||||
temperature=0.0, # Deterministic for eval
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
# Extract final response and tool usage from messages
|
||||
final_response = ""
|
||||
tool_call_count = 0
|
||||
for msg in reversed(result.messages):
|
||||
if msg.get("role") == "assistant" and msg.get("content") and not final_response:
|
||||
final_response = msg["content"]
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
tool_call_count += len(msg["tool_calls"])
|
||||
|
||||
# Compute reward (includes LLM judge for correctness)
|
||||
# Temporarily save buffer lengths so we can extract the
|
||||
# correctness score without calling judge twice, and avoid
|
||||
# polluting training metric buffers with eval data.
|
||||
buf_len = len(self._correctness_buffer)
|
||||
ctx = ToolContext(task_id)
|
||||
try:
|
||||
reward = await self.compute_reward(item, result, ctx)
|
||||
finally:
|
||||
ctx.cleanup()
|
||||
|
||||
# Extract correctness from the buffer (compute_reward appended it)
|
||||
# then remove eval entries from training buffers
|
||||
correctness = (
|
||||
self._correctness_buffer[buf_len]
|
||||
if len(self._correctness_buffer) > buf_len
|
||||
else 0.0
|
||||
)
|
||||
# Roll back buffers to avoid polluting training metrics
|
||||
for buf in (
|
||||
self._reward_buffer, self._correctness_buffer,
|
||||
self._tool_usage_buffer, self._efficiency_buffer,
|
||||
self._diversity_buffer,
|
||||
):
|
||||
if len(buf) > buf_len:
|
||||
buf.pop()
|
||||
|
||||
samples.append({
|
||||
"prompt": item["question"],
|
||||
"response": final_response[:500],
|
||||
"expected": item["answer"],
|
||||
"correctness": correctness,
|
||||
"reward": reward,
|
||||
"tool_calls": tool_call_count,
|
||||
"turns": result.turns_used,
|
||||
})
|
||||
|
||||
logger.info(
|
||||
f" → correctness={correctness:.2f}, reward={reward:.3f}, "
|
||||
f"tools={tool_call_count}, turns={result.turns_used}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Eval error on item: {e}")
|
||||
samples.append({
|
||||
"prompt": item["question"],
|
||||
"response": f"ERROR: {e}",
|
||||
"expected": item["answer"],
|
||||
"correctness": 0.0,
|
||||
"reward": 0.0,
|
||||
"tool_calls": 0,
|
||||
"turns": 0,
|
||||
})
|
||||
|
||||
end_time = time.time()
|
||||
|
||||
# Compute aggregate metrics
|
||||
correctness_scores = [s["correctness"] for s in samples]
|
||||
rewards = [s["reward"] for s in samples]
|
||||
tool_counts = [s["tool_calls"] for s in samples]
|
||||
n = len(samples)
|
||||
|
||||
eval_metrics = {
|
||||
"eval/mean_correctness": sum(correctness_scores) / n if n else 0.0,
|
||||
"eval/mean_reward": sum(rewards) / n if n else 0.0,
|
||||
"eval/mean_tool_calls": sum(tool_counts) / n if n else 0.0,
|
||||
"eval/tool_usage_rate": sum(1 for t in tool_counts if t > 0) / n if n else 0.0,
|
||||
"eval/n_items": n,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Eval complete — correctness={eval_metrics['eval/mean_correctness']:.3f}, "
|
||||
f"reward={eval_metrics['eval/mean_reward']:.3f}, "
|
||||
f"tool_usage={eval_metrics['eval/tool_usage_rate']:.0%}"
|
||||
)
|
||||
|
||||
await self.evaluate_log(
|
||||
metrics=eval_metrics,
|
||||
samples=samples,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 6. wandb_log — custom metrics
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def wandb_log(self, wandb_metrics: Optional[Dict] = None) -> None:
|
||||
"""Log reward breakdown metrics to wandb."""
|
||||
if wandb_metrics is None:
|
||||
wandb_metrics = {}
|
||||
|
||||
if self._reward_buffer:
|
||||
n = len(self._reward_buffer)
|
||||
wandb_metrics["train/mean_reward"] = sum(self._reward_buffer) / n
|
||||
wandb_metrics["train/mean_correctness"] = sum(self._correctness_buffer) / n
|
||||
wandb_metrics["train/mean_tool_usage"] = sum(self._tool_usage_buffer) / n
|
||||
wandb_metrics["train/mean_efficiency"] = sum(self._efficiency_buffer) / n
|
||||
wandb_metrics["train/mean_diversity"] = sum(self._diversity_buffer) / n
|
||||
wandb_metrics["train/total_rollouts"] = n
|
||||
|
||||
# Accuracy buckets
|
||||
wandb_metrics["train/correct_rate"] = (
|
||||
sum(1 for c in self._correctness_buffer if c >= 0.7) / n
|
||||
)
|
||||
wandb_metrics["train/tool_usage_rate"] = (
|
||||
sum(1 for t in self._tool_usage_buffer if t > 0) / n
|
||||
)
|
||||
|
||||
# Clear buffers
|
||||
self._reward_buffer.clear()
|
||||
self._correctness_buffer.clear()
|
||||
self._tool_usage_buffer.clear()
|
||||
self._efficiency_buffer.clear()
|
||||
self._diversity_buffer.clear()
|
||||
|
||||
await super().wandb_log(wandb_metrics)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _llm_judge(
|
||||
self,
|
||||
question: str,
|
||||
expected: str,
|
||||
model_answer: str,
|
||||
) -> float:
|
||||
"""
|
||||
Use the server's LLM to judge answer correctness.
|
||||
Falls back to keyword heuristic if LLM call fails.
|
||||
"""
|
||||
if not model_answer or not model_answer.strip():
|
||||
return 0.0
|
||||
|
||||
judge_prompt = (
|
||||
"You are an impartial judge evaluating the quality of an AI research answer.\n\n"
|
||||
f"Question: {question}\n\n"
|
||||
f"Reference answer: {expected}\n\n"
|
||||
f"Model answer: {model_answer}\n\n"
|
||||
"Score the model answer on a scale from 0.0 to 1.0 where:\n"
|
||||
" 1.0 = fully correct and complete\n"
|
||||
" 0.7 = mostly correct with minor gaps\n"
|
||||
" 0.4 = partially correct\n"
|
||||
" 0.1 = mentions relevant topic but wrong or very incomplete\n"
|
||||
" 0.0 = completely wrong or no answer\n\n"
|
||||
"Consider: factual accuracy, completeness, and relevance.\n"
|
||||
'Respond with ONLY a JSON object: {"score": <float>, "reason": "<one sentence>"}'
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self.server.chat_completion(
|
||||
messages=[{"role": "user", "content": judge_prompt}],
|
||||
n=1,
|
||||
max_tokens=150,
|
||||
temperature=0.0,
|
||||
split="eval",
|
||||
)
|
||||
text = response.choices[0].message.content if response.choices else ""
|
||||
parsed = self._parse_judge_json(text)
|
||||
if parsed is not None:
|
||||
return float(parsed)
|
||||
except Exception as e:
|
||||
logger.debug(f"LLM judge failed: {e}. Using heuristic.")
|
||||
|
||||
return self._heuristic_score(expected, model_answer)
|
||||
|
||||
@staticmethod
|
||||
def _parse_judge_json(text: str) -> Optional[float]:
|
||||
"""Extract the score float from LLM judge JSON response."""
|
||||
try:
|
||||
clean = re.sub(r"```(?:json)?|```", "", text).strip()
|
||||
data = json.loads(clean)
|
||||
score = float(data.get("score", -1))
|
||||
if 0.0 <= score <= 1.0:
|
||||
return score
|
||||
except Exception:
|
||||
match = re.search(r'"score"\s*:\s*([0-9.]+)', text)
|
||||
if match:
|
||||
score = float(match.group(1))
|
||||
if 0.0 <= score <= 1.0:
|
||||
return score
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _heuristic_score(expected: str, model_answer: str) -> float:
|
||||
"""Lightweight keyword overlap score as fallback."""
|
||||
stopwords = {
|
||||
"the", "a", "an", "is", "are", "was", "were", "of", "in", "on",
|
||||
"at", "to", "for", "with", "and", "or", "but", "it", "its",
|
||||
"this", "that", "as", "by", "from", "be", "has", "have", "had",
|
||||
}
|
||||
|
||||
def tokenize(text: str) -> set:
|
||||
tokens = re.findall(r'\b\w+\b', text.lower())
|
||||
return {t for t in tokens if t not in stopwords and len(t) > 2}
|
||||
|
||||
expected_tokens = tokenize(expected)
|
||||
answer_tokens = tokenize(model_answer)
|
||||
|
||||
if not expected_tokens:
|
||||
return 0.5
|
||||
|
||||
overlap = len(expected_tokens & answer_tokens)
|
||||
union = len(expected_tokens | answer_tokens)
|
||||
|
||||
jaccard = overlap / union if union > 0 else 0.0
|
||||
recall = overlap / len(expected_tokens)
|
||||
return min(1.0, 0.4 * jaccard + 0.6 * recall)
|
||||
|
||||
@staticmethod
|
||||
def _extract_domains(text: str) -> set:
|
||||
"""Extract unique domains from URLs cited in the response."""
|
||||
urls = re.findall(r'https?://[^\s\)>\]"\']+', text)
|
||||
domains = set()
|
||||
for url in urls:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower().lstrip("www.")
|
||||
if domain:
|
||||
domains.add(domain)
|
||||
except Exception:
|
||||
pass
|
||||
return domains
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
WebResearchEnv.cli()
|
||||
@@ -270,7 +270,7 @@ def load_gateway_config() -> GatewayConfig:
|
||||
gateway_config_path = Path.home() / ".hermes" / "gateway.json"
|
||||
if gateway_config_path.exists():
|
||||
try:
|
||||
with open(gateway_config_path, "r") as f:
|
||||
with open(gateway_config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
config = GatewayConfig.from_dict(data)
|
||||
except Exception as e:
|
||||
@@ -283,7 +283,7 @@ def load_gateway_config() -> GatewayConfig:
|
||||
import yaml
|
||||
config_yaml_path = Path.home() / ".hermes" / "config.yaml"
|
||||
if config_yaml_path.exists():
|
||||
with open(config_yaml_path) as f:
|
||||
with open(config_yaml_path, encoding="utf-8") as f:
|
||||
yaml_cfg = yaml.safe_load(f) or {}
|
||||
sr = yaml_cfg.get("session_reset")
|
||||
if sr and isinstance(sr, dict):
|
||||
@@ -441,5 +441,5 @@ def save_gateway_config(config: GatewayConfig) -> None:
|
||||
gateway_config_path = Path.home() / ".hermes" / "gateway.json"
|
||||
gateway_config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(gateway_config_path, "w") as f:
|
||||
with open(gateway_config_path, "w", encoding="utf-8") as f:
|
||||
json.dump(config.to_dict(), f, indent=2)
|
||||
|
||||
@@ -111,6 +111,7 @@ def _append_to_jsonl(session_id: str, message: dict) -> None:
|
||||
|
||||
def _append_to_sqlite(session_id: str, message: dict) -> None:
|
||||
"""Append a message to the SQLite session database."""
|
||||
db = None
|
||||
try:
|
||||
from hermes_state import SessionDB
|
||||
db = SessionDB()
|
||||
@@ -121,3 +122,6 @@ def _append_to_sqlite(session_id: str, message: dict) -> None:
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Mirror SQLite write failed: %s", e)
|
||||
finally:
|
||||
if db is not None:
|
||||
db.close()
|
||||
|
||||
@@ -10,6 +10,7 @@ Uses slack-bolt (Python) with Socket Mode for:
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
try:
|
||||
@@ -33,6 +34,8 @@ from gateway.platforms.base import (
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
cache_document_from_bytes,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
@@ -96,6 +99,13 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
async def handle_message_event(event, say):
|
||||
await self._handle_slack_message(event)
|
||||
|
||||
# Acknowledge app_mention events to prevent Bolt 404 errors.
|
||||
# The "message" handler above already processes @mentions in
|
||||
# channels, so this is intentionally a no-op to avoid duplicates.
|
||||
@self._app.event("app_mention")
|
||||
async def handle_app_mention(event, say):
|
||||
pass
|
||||
|
||||
# Register slash command handler
|
||||
@self._app.command("/hermes")
|
||||
async def handle_hermes_command(ack, command):
|
||||
@@ -266,6 +276,65 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_video(
|
||||
self,
|
||||
chat_id: str,
|
||||
video_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a video file to Slack."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
if not os.path.exists(video_path):
|
||||
return SendResult(success=False, error=f"Video file not found: {video_path}")
|
||||
|
||||
try:
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
file=video_path,
|
||||
filename=os.path.basename(video_path),
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send video: {e}")
|
||||
return await super().send_video(chat_id, video_path, caption, reply_to)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a document/file attachment to Slack."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
return SendResult(success=False, error=f"File not found: {file_path}")
|
||||
|
||||
display_name = file_name or os.path.basename(file_path)
|
||||
|
||||
try:
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
file=file_path,
|
||||
filename=display_name,
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send document: {e}")
|
||||
return await super().send_document(chat_id, file_path, caption, file_name, reply_to)
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Get information about a Slack channel."""
|
||||
if not self._app:
|
||||
@@ -347,6 +416,58 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
msg_type = MessageType.VOICE
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache audio: {e}", flush=True)
|
||||
elif url:
|
||||
# Try to handle as a document attachment
|
||||
try:
|
||||
original_filename = f.get("name", "")
|
||||
ext = ""
|
||||
if original_filename:
|
||||
_, ext = os.path.splitext(original_filename)
|
||||
ext = ext.lower()
|
||||
|
||||
# Fallback: reverse-lookup from MIME type
|
||||
if not ext and mimetype:
|
||||
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
|
||||
ext = mime_to_ext.get(mimetype, "")
|
||||
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
continue # Skip unsupported file types silently
|
||||
|
||||
# Check file size (Slack limit: 20 MB for bots)
|
||||
file_size = f.get("size", 0)
|
||||
MAX_DOC_BYTES = 20 * 1024 * 1024
|
||||
if not file_size or file_size > MAX_DOC_BYTES:
|
||||
print(f"[Slack] Document too large or unknown size: {file_size}", flush=True)
|
||||
continue
|
||||
|
||||
# Download and cache
|
||||
raw_bytes = await self._download_slack_file_bytes(url)
|
||||
cached_path = cache_document_from_bytes(
|
||||
raw_bytes, original_filename or f"document{ext}"
|
||||
)
|
||||
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(doc_mime)
|
||||
msg_type = MessageType.DOCUMENT
|
||||
print(f"[Slack] Cached user document: {cached_path}", flush=True)
|
||||
|
||||
# Inject text content for .txt/.md files (capped at 100 KB)
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = original_filename or f"document{ext}"
|
||||
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
|
||||
injection = f"[Content of {display_name}]:\n{text_content}"
|
||||
if text:
|
||||
text = f"{injection}\n\n{text}"
|
||||
else:
|
||||
text = injection
|
||||
except UnicodeDecodeError:
|
||||
pass # Binary content, skip injection
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache document: {e}", flush=True)
|
||||
|
||||
# Build source
|
||||
source = self.build_source(
|
||||
@@ -427,3 +548,16 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
else:
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
return cache_image_from_bytes(response.content, ext)
|
||||
|
||||
async def _download_slack_file_bytes(self, url: str) -> bytes:
|
||||
"""Download a Slack file and return raw bytes."""
|
||||
import httpx
|
||||
|
||||
bot_token = self.config.token
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={"Authorization": f"Bearer {bot_token}"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
|
||||
114
gateway/run.py
114
gateway/run.py
@@ -48,7 +48,7 @@ _config_path = _hermes_home / 'config.yaml'
|
||||
if _config_path.exists():
|
||||
try:
|
||||
import yaml as _yaml
|
||||
with open(_config_path) as _f:
|
||||
with open(_config_path, encoding="utf-8") as _f:
|
||||
_cfg = _yaml.safe_load(_f) or {}
|
||||
# Top-level simple values (fallback only — don't override .env)
|
||||
for _key, _val in _cfg.items():
|
||||
@@ -75,11 +75,16 @@ if _config_path.exists():
|
||||
"container_memory": "TERMINAL_CONTAINER_MEMORY",
|
||||
"container_disk": "TERMINAL_CONTAINER_DISK",
|
||||
"container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
|
||||
"docker_volumes": "TERMINAL_DOCKER_VOLUMES",
|
||||
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
}
|
||||
for _cfg_key, _env_var in _terminal_env_map.items():
|
||||
if _cfg_key in _terminal_cfg:
|
||||
os.environ[_env_var] = str(_terminal_cfg[_cfg_key])
|
||||
_val = _terminal_cfg[_cfg_key]
|
||||
if isinstance(_val, list):
|
||||
os.environ[_env_var] = json.dumps(_val)
|
||||
else:
|
||||
os.environ[_env_var] = str(_val)
|
||||
_compression_cfg = _cfg.get("compression", {})
|
||||
if _compression_cfg and isinstance(_compression_cfg, dict):
|
||||
_compression_env_map = {
|
||||
@@ -311,7 +316,7 @@ class GatewayRunner:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path) as _f:
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
file_path = cfg.get("prefill_messages_file", "")
|
||||
except Exception:
|
||||
@@ -349,7 +354,7 @@ class GatewayRunner:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path) as _f:
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
|
||||
except Exception:
|
||||
@@ -370,7 +375,7 @@ class GatewayRunner:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path) as _f:
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
|
||||
except Exception:
|
||||
@@ -393,7 +398,7 @@ class GatewayRunner:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path) as _f:
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return cfg.get("provider_routing", {}) or {}
|
||||
except Exception:
|
||||
@@ -411,7 +416,7 @@ class GatewayRunner:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path) as _f:
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
fb = cfg.get("fallback_model", {}) or {}
|
||||
if fb.get("provider") and fb.get("model"):
|
||||
@@ -766,7 +771,7 @@ class GatewayRunner:
|
||||
_known_commands = {"new", "reset", "help", "status", "stop", "model",
|
||||
"personality", "retry", "undo", "sethome", "set-home",
|
||||
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
||||
"update", "title", "resume", "provider"}
|
||||
"update", "title", "resume", "provider", "rollback"}
|
||||
if command and command in _known_commands:
|
||||
await self.hooks.emit(f"command:{command}", {
|
||||
"platform": source.platform.value if source.platform else "",
|
||||
@@ -825,6 +830,9 @@ class GatewayRunner:
|
||||
|
||||
if command == "resume":
|
||||
return await self._handle_resume_command(event)
|
||||
|
||||
if command == "rollback":
|
||||
return await self._handle_rollback_command(event)
|
||||
|
||||
# Skill slash commands: /skill-name loads the skill and sends to agent
|
||||
if command:
|
||||
@@ -926,7 +934,7 @@ class GatewayRunner:
|
||||
_hyg_cfg_path = _hermes_home / "config.yaml"
|
||||
if _hyg_cfg_path.exists():
|
||||
import yaml as _hyg_yaml
|
||||
with open(_hyg_cfg_path) as _hyg_f:
|
||||
with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
|
||||
_hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
|
||||
|
||||
# Resolve model name (same logic as run_sync)
|
||||
@@ -1395,6 +1403,7 @@ class GatewayRunner:
|
||||
"`/resume [name]` — Resume a previously-named session",
|
||||
"`/usage` — Show token usage for this session",
|
||||
"`/insights [days]` — Show usage insights and analytics",
|
||||
"`/rollback [number]` — List or restore filesystem checkpoints",
|
||||
"`/reload-mcp` — Reload MCP servers from config",
|
||||
"`/update` — Update Hermes Agent to the latest version",
|
||||
"`/help` — Show this message",
|
||||
@@ -1429,7 +1438,7 @@ class GatewayRunner:
|
||||
current_provider = "openrouter"
|
||||
try:
|
||||
if config_path.exists():
|
||||
with open(config_path) as f:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, str):
|
||||
@@ -1520,14 +1529,14 @@ class GatewayRunner:
|
||||
try:
|
||||
user_config = {}
|
||||
if config_path.exists():
|
||||
with open(config_path) as f:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
if "model" not in user_config or not isinstance(user_config["model"], dict):
|
||||
user_config["model"] = {}
|
||||
user_config["model"]["default"] = new_model
|
||||
if provider_changed:
|
||||
user_config["model"]["provider"] = target_provider
|
||||
with open(config_path, 'w') as f:
|
||||
with open(config_path, 'w', encoding="utf-8") as f:
|
||||
yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
|
||||
except Exception as e:
|
||||
return f"⚠️ Failed to save model change: {e}"
|
||||
@@ -1564,7 +1573,7 @@ class GatewayRunner:
|
||||
config_path = _hermes_home / 'config.yaml'
|
||||
try:
|
||||
if config_path.exists():
|
||||
with open(config_path) as f:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
@@ -1613,7 +1622,7 @@ class GatewayRunner:
|
||||
|
||||
try:
|
||||
if config_path.exists():
|
||||
with open(config_path, 'r') as f:
|
||||
with open(config_path, 'r', encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f) or {}
|
||||
personalities = config.get("agent", {}).get("personalities", {})
|
||||
else:
|
||||
@@ -1642,7 +1651,7 @@ class GatewayRunner:
|
||||
if "agent" not in config or not isinstance(config.get("agent"), dict):
|
||||
config["agent"] = {}
|
||||
config["agent"]["system_prompt"] = new_prompt
|
||||
with open(config_path, 'w') as f:
|
||||
with open(config_path, 'w', encoding="utf-8") as f:
|
||||
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||
except Exception as e:
|
||||
return f"⚠️ Failed to save personality change: {e}"
|
||||
@@ -1726,10 +1735,10 @@ class GatewayRunner:
|
||||
config_path = _hermes_home / 'config.yaml'
|
||||
user_config = {}
|
||||
if config_path.exists():
|
||||
with open(config_path) as f:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
user_config[env_key] = chat_id
|
||||
with open(config_path, 'w') as f:
|
||||
with open(config_path, 'w', encoding="utf-8") as f:
|
||||
yaml.dump(user_config, f, default_flow_style=False)
|
||||
# Also set in the current environment so it takes effect immediately
|
||||
os.environ[env_key] = str(chat_id)
|
||||
@@ -1741,6 +1750,65 @@ class GatewayRunner:
|
||||
f"Cron jobs and cross-platform messages will be delivered here."
|
||||
)
|
||||
|
||||
async def _handle_rollback_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /rollback command — list or restore filesystem checkpoints."""
|
||||
from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list
|
||||
|
||||
# Read checkpoint config from config.yaml
|
||||
cp_cfg = {}
|
||||
try:
|
||||
import yaml as _y
|
||||
_cfg_path = _hermes_home / "config.yaml"
|
||||
if _cfg_path.exists():
|
||||
with open(_cfg_path, encoding="utf-8") as _f:
|
||||
_data = _y.safe_load(_f) or {}
|
||||
cp_cfg = _data.get("checkpoints", {})
|
||||
if isinstance(cp_cfg, bool):
|
||||
cp_cfg = {"enabled": cp_cfg}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not cp_cfg.get("enabled", False):
|
||||
return (
|
||||
"Checkpoints are not enabled.\n"
|
||||
"Enable in config.yaml:\n```\ncheckpoints:\n enabled: true\n```"
|
||||
)
|
||||
|
||||
mgr = CheckpointManager(
|
||||
enabled=True,
|
||||
max_snapshots=cp_cfg.get("max_snapshots", 50),
|
||||
)
|
||||
|
||||
cwd = os.getenv("MESSAGING_CWD", str(Path.home()))
|
||||
arg = event.get_command_args().strip()
|
||||
|
||||
if not arg:
|
||||
checkpoints = mgr.list_checkpoints(cwd)
|
||||
return format_checkpoint_list(checkpoints, cwd)
|
||||
|
||||
# Restore by number or hash
|
||||
checkpoints = mgr.list_checkpoints(cwd)
|
||||
if not checkpoints:
|
||||
return f"No checkpoints found for {cwd}"
|
||||
|
||||
target_hash = None
|
||||
try:
|
||||
idx = int(arg) - 1
|
||||
if 0 <= idx < len(checkpoints):
|
||||
target_hash = checkpoints[idx]["hash"]
|
||||
else:
|
||||
return f"Invalid checkpoint number. Use 1-{len(checkpoints)}."
|
||||
except ValueError:
|
||||
target_hash = arg
|
||||
|
||||
result = mgr.restore(cwd, target_hash)
|
||||
if result["success"]:
|
||||
return (
|
||||
f"✅ Restored to checkpoint {result['restored_to']}: {result['reason']}\n"
|
||||
f"A pre-rollback snapshot was saved automatically."
|
||||
)
|
||||
return f"❌ {result['error']}"
|
||||
|
||||
async def _handle_compress_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /compress command -- manually compress conversation context."""
|
||||
source = event.source
|
||||
@@ -2397,6 +2465,8 @@ class GatewayRunner:
|
||||
Platform.DISCORD: "hermes-discord",
|
||||
Platform.WHATSAPP: "hermes-whatsapp",
|
||||
Platform.SLACK: "hermes-slack",
|
||||
Platform.SIGNAL: "hermes-signal",
|
||||
Platform.HOMEASSISTANT: "hermes-homeassistant",
|
||||
}
|
||||
|
||||
# Try to load platform_toolsets from config
|
||||
@@ -2405,7 +2475,7 @@ class GatewayRunner:
|
||||
config_path = _hermes_home / 'config.yaml'
|
||||
if config_path.exists():
|
||||
import yaml
|
||||
with open(config_path, 'r') as f:
|
||||
with open(config_path, 'r', encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
platform_toolsets_config = user_config.get("platform_toolsets", {})
|
||||
except Exception as e:
|
||||
@@ -2418,6 +2488,8 @@ class GatewayRunner:
|
||||
Platform.DISCORD: "discord",
|
||||
Platform.WHATSAPP: "whatsapp",
|
||||
Platform.SLACK: "slack",
|
||||
Platform.SIGNAL: "signal",
|
||||
Platform.HOMEASSISTANT: "homeassistant",
|
||||
}.get(source.platform, "telegram")
|
||||
|
||||
# Use config override if present (list of toolsets), otherwise hardcoded default
|
||||
@@ -2435,7 +2507,7 @@ class GatewayRunner:
|
||||
_tp_cfg_path = _hermes_home / "config.yaml"
|
||||
if _tp_cfg_path.exists():
|
||||
import yaml as _tp_yaml
|
||||
with open(_tp_cfg_path) as _tp_f:
|
||||
with open(_tp_cfg_path, encoding="utf-8") as _tp_f:
|
||||
_tp_data = _tp_yaml.safe_load(_tp_f) or {}
|
||||
_progress_cfg = _tp_data.get("display", {})
|
||||
except Exception:
|
||||
@@ -2653,7 +2725,7 @@ class GatewayRunner:
|
||||
import yaml as _y
|
||||
_cfg_path = _hermes_home / "config.yaml"
|
||||
if _cfg_path.exists():
|
||||
with open(_cfg_path) as _f:
|
||||
with open(_cfg_path, encoding="utf-8") as _f:
|
||||
_cfg = _y.safe_load(_f) or {}
|
||||
_model_cfg = _cfg.get("model", {})
|
||||
if isinstance(_model_cfg, str):
|
||||
@@ -3135,7 +3207,7 @@ def main():
|
||||
config = None
|
||||
if args.config:
|
||||
import json
|
||||
with open(args.config) as f:
|
||||
with open(args.config, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
config = GatewayConfig.from_dict(data)
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ import stat
|
||||
import base64
|
||||
import hashlib
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
import webbrowser
|
||||
@@ -44,6 +45,10 @@ try:
|
||||
import fcntl
|
||||
except Exception:
|
||||
fcntl = None
|
||||
try:
|
||||
import msvcrt
|
||||
except Exception:
|
||||
msvcrt = None
|
||||
|
||||
# =============================================================================
|
||||
# Constants
|
||||
@@ -299,31 +304,64 @@ def _auth_lock_path() -> Path:
|
||||
return _auth_file_path().with_suffix(".lock")
|
||||
|
||||
|
||||
_auth_lock_holder = threading.local()
|
||||
|
||||
@contextmanager
|
||||
def _auth_store_lock(timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS):
|
||||
"""Cross-process advisory lock for auth.json reads+writes."""
|
||||
"""Cross-process advisory lock for auth.json reads+writes. Reentrant."""
|
||||
# Reentrant: if this thread already holds the lock, just yield.
|
||||
if getattr(_auth_lock_holder, "depth", 0) > 0:
|
||||
_auth_lock_holder.depth += 1
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
_auth_lock_holder.depth -= 1
|
||||
return
|
||||
|
||||
lock_path = _auth_lock_path()
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with lock_path.open("a+") as lock_file:
|
||||
if fcntl is None:
|
||||
if fcntl is None and msvcrt is None:
|
||||
_auth_lock_holder.depth = 1
|
||||
try:
|
||||
yield
|
||||
return
|
||||
finally:
|
||||
_auth_lock_holder.depth = 0
|
||||
return
|
||||
|
||||
# On Windows, msvcrt.locking needs the file to have content and the
|
||||
# file pointer at position 0. Ensure the lock file has at least 1 byte.
|
||||
if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0):
|
||||
lock_path.write_text(" ", encoding="utf-8")
|
||||
|
||||
with lock_path.open("r+" if msvcrt else "a+") as lock_file:
|
||||
deadline = time.time() + max(1.0, timeout_seconds)
|
||||
while True:
|
||||
try:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
if fcntl:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
else:
|
||||
lock_file.seek(0)
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
break
|
||||
except BlockingIOError:
|
||||
except (BlockingIOError, OSError, PermissionError):
|
||||
if time.time() >= deadline:
|
||||
raise TimeoutError("Timed out waiting for auth store lock")
|
||||
time.sleep(0.05)
|
||||
|
||||
_auth_lock_holder.depth = 1
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||
_auth_lock_holder.depth = 0
|
||||
if fcntl:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||
elif msvcrt:
|
||||
try:
|
||||
lock_file.seek(0)
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
|
||||
|
||||
def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]:
|
||||
|
||||
@@ -36,6 +36,28 @@ def cprint(text: str):
|
||||
_pt_print(_PT_ANSI(text))
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Skin-aware color helpers
|
||||
# =========================================================================
|
||||
|
||||
def _skin_color(key: str, fallback: str) -> str:
|
||||
"""Get a color from the active skin, or return fallback."""
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
return get_active_skin().get_color(key, fallback)
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
|
||||
def _skin_branding(key: str, fallback: str) -> str:
|
||||
"""Get a branding string from the active skin, or return fallback."""
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
return get_active_skin().get_branding(key, fallback)
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ASCII Art & Branding
|
||||
# =========================================================================
|
||||
@@ -217,18 +239,24 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
layout_table.add_column("left", justify="center")
|
||||
layout_table.add_column("right", justify="left")
|
||||
|
||||
# Resolve skin colors once for the entire banner
|
||||
accent = _skin_color("banner_accent", "#FFBF00")
|
||||
dim = _skin_color("banner_dim", "#B8860B")
|
||||
text = _skin_color("banner_text", "#FFF8DC")
|
||||
session_color = _skin_color("session_border", "#8B8682")
|
||||
|
||||
left_lines = ["", HERMES_CADUCEUS, ""]
|
||||
model_short = model.split("/")[-1] if "/" in model else model
|
||||
if len(model_short) > 28:
|
||||
model_short = model_short[:25] + "..."
|
||||
ctx_str = f" [dim #B8860B]·[/] [dim #B8860B]{_format_context_length(context_length)} context[/]" if context_length else ""
|
||||
left_lines.append(f"[#FFBF00]{model_short}[/]{ctx_str} [dim #B8860B]·[/] [dim #B8860B]Nous Research[/]")
|
||||
left_lines.append(f"[dim #B8860B]{cwd}[/]")
|
||||
ctx_str = f" [dim {dim}]·[/] [dim {dim}]{_format_context_length(context_length)} context[/]" if context_length else ""
|
||||
left_lines.append(f"[{accent}]{model_short}[/]{ctx_str} [dim {dim}]·[/] [dim {dim}]Nous Research[/]")
|
||||
left_lines.append(f"[dim {dim}]{cwd}[/]")
|
||||
if session_id:
|
||||
left_lines.append(f"[dim #8B8682]Session: {session_id}[/]")
|
||||
left_lines.append(f"[dim {session_color}]Session: {session_id}[/]")
|
||||
left_content = "\n".join(left_lines)
|
||||
|
||||
right_lines = ["[bold #FFBF00]Available Tools[/]"]
|
||||
right_lines = [f"[bold {accent}]Available Tools[/]"]
|
||||
toolsets_dict: Dict[str, list] = {}
|
||||
|
||||
for tool in tools:
|
||||
@@ -256,7 +284,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
if name in disabled_tools:
|
||||
colored_names.append(f"[red]{name}[/]")
|
||||
else:
|
||||
colored_names.append(f"[#FFF8DC]{name}[/]")
|
||||
colored_names.append(f"[{text}]{name}[/]")
|
||||
|
||||
tools_str = ", ".join(colored_names)
|
||||
if len(", ".join(sorted(tool_names))) > 45:
|
||||
@@ -275,7 +303,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
elif name in disabled_tools:
|
||||
colored_names.append(f"[red]{name}[/]")
|
||||
else:
|
||||
colored_names.append(f"[#FFF8DC]{name}[/]")
|
||||
colored_names.append(f"[{text}]{name}[/]")
|
||||
tools_str = ", ".join(colored_names)
|
||||
|
||||
right_lines.append(f"[dim #B8860B]{toolset}:[/] {tools_str}")
|
||||
@@ -306,7 +334,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
)
|
||||
|
||||
right_lines.append("")
|
||||
right_lines.append("[bold #FFBF00]Available Skills[/]")
|
||||
right_lines.append(f"[bold {accent}]Available Skills[/]")
|
||||
skills_by_category = get_available_skills()
|
||||
total_skills = sum(len(s) for s in skills_by_category.values())
|
||||
|
||||
@@ -320,9 +348,9 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
skills_str = ", ".join(skill_names)
|
||||
if len(skills_str) > 50:
|
||||
skills_str = skills_str[:47] + "..."
|
||||
right_lines.append(f"[dim #B8860B]{category}:[/] [#FFF8DC]{skills_str}[/]")
|
||||
right_lines.append(f"[dim {dim}]{category}:[/] [{text}]{skills_str}[/]")
|
||||
else:
|
||||
right_lines.append("[dim #B8860B]No skills installed[/]")
|
||||
right_lines.append(f"[dim {dim}]No skills installed[/]")
|
||||
|
||||
right_lines.append("")
|
||||
mcp_connected = sum(1 for s in mcp_status if s["connected"]) if mcp_status else 0
|
||||
@@ -330,7 +358,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
if mcp_connected:
|
||||
summary_parts.append(f"{mcp_connected} MCP servers")
|
||||
summary_parts.append("/help for commands")
|
||||
right_lines.append(f"[dim #B8860B]{' · '.join(summary_parts)}[/]")
|
||||
right_lines.append(f"[dim {dim}]{' · '.join(summary_parts)}[/]")
|
||||
|
||||
# Update check — show if behind origin/main
|
||||
try:
|
||||
@@ -347,10 +375,13 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
right_content = "\n".join(right_lines)
|
||||
layout_table.add_row(left_content, right_content)
|
||||
|
||||
agent_name = _skin_branding("agent_name", "Hermes Agent")
|
||||
title_color = _skin_color("banner_title", "#FFD700")
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
outer_panel = Panel(
|
||||
layout_table,
|
||||
title=f"[bold #FFD700]Hermes Agent {VERSION}[/]",
|
||||
border_style="#CD7F32",
|
||||
title=f"[bold {title_color}]{agent_name} {VERSION}[/]",
|
||||
border_style=border_color,
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ def _fetch_models_from_api(access_token: str) -> List[str]:
|
||||
if item.get("supported_in_api") is False:
|
||||
continue
|
||||
visibility = item.get("visibility", "")
|
||||
if isinstance(visibility, str) and visibility.strip().lower() == "hide":
|
||||
if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
|
||||
continue
|
||||
priority = item.get("priority")
|
||||
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
|
||||
|
||||
@@ -39,6 +39,8 @@ COMMANDS = {
|
||||
"/insights": "Show usage insights and analytics (last 30 days)",
|
||||
"/paste": "Check clipboard for an image and attach it",
|
||||
"/reload-mcp": "Reload MCP servers from config.yaml",
|
||||
"/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])",
|
||||
"/skin": "Show or change the display skin/theme",
|
||||
"/quit": "Exit the CLI (also: /exit, /q)",
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,9 @@ This module provides:
|
||||
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
import stat
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
|
||||
@@ -77,6 +78,10 @@ DEFAULT_CONFIG = {
|
||||
"container_memory": 5120, # MB (default 5GB)
|
||||
"container_disk": 51200, # MB (default 50GB)
|
||||
"container_persistent": True, # Persist filesystem across sessions
|
||||
# Docker volume mounts — share host directories with the container.
|
||||
# Each entry is "host_path:container_path" (standard Docker -v syntax).
|
||||
# Example: ["/home/user/projects:/workspace/projects", "/data:/data"]
|
||||
"docker_volumes": [],
|
||||
},
|
||||
|
||||
"browser": {
|
||||
@@ -84,6 +89,14 @@ DEFAULT_CONFIG = {
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
},
|
||||
|
||||
# Filesystem checkpoints — automatic snapshots before destructive file ops.
|
||||
# When enabled, the agent takes a snapshot of the working directory once per
|
||||
# conversation turn (on first write_file/patch call). Use /rollback to restore.
|
||||
"checkpoints": {
|
||||
"enabled": False,
|
||||
"max_snapshots": 50, # Max checkpoints to keep per directory
|
||||
},
|
||||
|
||||
"compression": {
|
||||
"enabled": True,
|
||||
"threshold": 0.85,
|
||||
@@ -107,8 +120,9 @@ DEFAULT_CONFIG = {
|
||||
"display": {
|
||||
"compact": False,
|
||||
"personality": "kawaii",
|
||||
"resume_display": "full", # "full" (show previous messages) | "minimal" (one-liner only)
|
||||
"bell_on_complete": False, # Play terminal bell (\a) when agent finishes a response
|
||||
"resume_display": "full",
|
||||
"bell_on_complete": False,
|
||||
"skin": "default",
|
||||
},
|
||||
|
||||
# Text-to-speech configuration
|
||||
@@ -166,7 +180,7 @@ DEFAULT_CONFIG = {
|
||||
"command_allowlist": [],
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 5,
|
||||
"_config_version": 6,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -401,14 +415,18 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "messaging",
|
||||
},
|
||||
"SLACK_BOT_TOKEN": {
|
||||
"description": "Slack bot integration",
|
||||
"description": "Slack bot token (xoxb-). Get from OAuth & Permissions after installing your app. "
|
||||
"Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
|
||||
"im:history, im:read, im:write, users:read, files:write",
|
||||
"prompt": "Slack Bot Token (xoxb-...)",
|
||||
"url": "https://api.slack.com/apps",
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
},
|
||||
"SLACK_APP_TOKEN": {
|
||||
"description": "Slack Socket Mode connection",
|
||||
"description": "Slack app-level token (xapp-) for Socket Mode. Get from Basic Information → "
|
||||
"App-Level Tokens. Also ensure Event Subscriptions include: message.im, "
|
||||
"message.channels, message.groups, app_mention",
|
||||
"prompt": "Slack App Token (xapp-...)",
|
||||
"url": "https://api.slack.com/apps",
|
||||
"password": True,
|
||||
@@ -749,9 +767,9 @@ def load_config() -> Dict[str, Any]:
|
||||
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path) as f:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
|
||||
|
||||
config = _deep_merge(config, user_config)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to load config: {e}")
|
||||
@@ -794,7 +812,7 @@ def save_config(config: Dict[str, Any]):
|
||||
ensure_hermes_home()
|
||||
config_path = get_config_path()
|
||||
|
||||
with open(config_path, 'w') as f:
|
||||
with open(config_path, 'w', encoding="utf-8") as f:
|
||||
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||
# Append commented-out sections for features that are off by default
|
||||
# or only relevant when explicitly configured. Skip sections the
|
||||
@@ -861,6 +879,13 @@ def save_env_value(key: str, value: str):
|
||||
with open(env_path, 'w', **write_kw) as f:
|
||||
f.writelines(lines)
|
||||
|
||||
# Restrict .env permissions to owner-only (contains API keys)
|
||||
if not _IS_WINDOWS:
|
||||
try:
|
||||
os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def get_env_value(key: str) -> Optional[str]:
|
||||
"""Get a value from ~/.hermes/.env or environment."""
|
||||
@@ -1069,7 +1094,7 @@ def set_config_value(key: str, value: str):
|
||||
user_config = {}
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path) as f:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
except Exception:
|
||||
user_config = {}
|
||||
@@ -1097,7 +1122,7 @@ def set_config_value(key: str, value: str):
|
||||
|
||||
# Write only user config back (not the full merged defaults)
|
||||
ensure_hermes_home()
|
||||
with open(config_path, 'w') as f:
|
||||
with open(config_path, 'w', encoding="utf-8") as f:
|
||||
yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
|
||||
|
||||
# Keep .env in sync for keys that terminal_tool reads directly from env vars.
|
||||
|
||||
@@ -482,14 +482,19 @@ _PLATFORMS = [
|
||||
"token_var": "SLACK_BOT_TOKEN",
|
||||
"setup_instructions": [
|
||||
"1. Go to https://api.slack.com/apps → Create New App → From Scratch",
|
||||
"2. Enable Socket Mode: App Settings → Socket Mode → Enable",
|
||||
"3. Get Bot Token: OAuth & Permissions → Install to Workspace → copy xoxb-... token",
|
||||
"4. Get App Token: Basic Information → App-Level Tokens → Generate",
|
||||
" Name it anything, add scope: connections:write → copy xapp-... token",
|
||||
"5. Add bot scopes: OAuth & Permissions → Scopes → chat:write, im:history,",
|
||||
" im:read, im:write, channels:history, channels:read",
|
||||
"6. Reinstall the app to your workspace after adding scopes",
|
||||
"2. Enable Socket Mode: Settings → Socket Mode → Enable",
|
||||
" Create an App-Level Token with scope: connections:write → copy xapp-... token",
|
||||
"3. Add Bot Token Scopes: Features → OAuth & Permissions → Scopes",
|
||||
" Required: chat:write, app_mentions:read, channels:history, channels:read,",
|
||||
" groups:history, im:history, im:read, im:write, users:read, files:write",
|
||||
"4. Subscribe to Events: Features → Event Subscriptions → Enable",
|
||||
" Required events: message.im, message.channels, app_mention",
|
||||
" Optional: message.groups (for private channels)",
|
||||
" ⚠ Without message.channels the bot will ONLY work in DMs!",
|
||||
"5. Install to Workspace: Settings → Install App → copy xoxb-... token",
|
||||
"6. Reinstall the app after any scope or event changes",
|
||||
"7. Find your user ID: click your profile → three dots → Copy member ID",
|
||||
"8. Invite the bot to channels: /invite @YourBot",
|
||||
],
|
||||
"vars": [
|
||||
{"name": "SLACK_BOT_TOKEN", "prompt": "Bot Token (xoxb-...)", "password": True,
|
||||
|
||||
@@ -489,6 +489,7 @@ def cmd_chat(args):
|
||||
"query": args.query,
|
||||
"resume": getattr(args, "resume", None),
|
||||
"worktree": getattr(args, "worktree", False),
|
||||
"checkpoints": getattr(args, "checkpoints", False),
|
||||
}
|
||||
# Filter out None values
|
||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
||||
@@ -1777,6 +1778,44 @@ def cmd_update(args):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _coalesce_session_name_args(argv: list) -> list:
|
||||
"""Join unquoted multi-word session names after -c/--continue and -r/--resume.
|
||||
|
||||
When a user types ``hermes -c Pokemon Agent Dev`` without quoting the
|
||||
session name, argparse sees three separate tokens. This function merges
|
||||
them into a single argument so argparse receives
|
||||
``['-c', 'Pokemon Agent Dev']`` instead.
|
||||
|
||||
Tokens are collected after the flag until we hit another flag (``-*``)
|
||||
or a known top-level subcommand.
|
||||
"""
|
||||
_SUBCOMMANDS = {
|
||||
"chat", "model", "gateway", "setup", "whatsapp", "login", "logout",
|
||||
"status", "cron", "doctor", "config", "pairing", "skills", "tools",
|
||||
"sessions", "insights", "version", "update", "uninstall",
|
||||
}
|
||||
_SESSION_FLAGS = {"-c", "--continue", "-r", "--resume"}
|
||||
|
||||
result = []
|
||||
i = 0
|
||||
while i < len(argv):
|
||||
token = argv[i]
|
||||
if token in _SESSION_FLAGS:
|
||||
result.append(token)
|
||||
i += 1
|
||||
# Collect subsequent non-flag, non-subcommand tokens as one name
|
||||
parts: list = []
|
||||
while i < len(argv) and not argv[i].startswith("-") and argv[i] not in _SUBCOMMANDS:
|
||||
parts.append(argv[i])
|
||||
i += 1
|
||||
if parts:
|
||||
result.append(" ".join(parts))
|
||||
else:
|
||||
result.append(token)
|
||||
i += 1
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for hermes CLI."""
|
||||
parser = argparse.ArgumentParser(
|
||||
@@ -1889,6 +1928,12 @@ For more help on a command:
|
||||
default=False,
|
||||
help="Run in an isolated git worktree (for parallel agents on the same repo)"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--checkpoints",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)"
|
||||
)
|
||||
chat_parser.set_defaults(func=cmd_chat)
|
||||
|
||||
# =========================================================================
|
||||
@@ -2356,12 +2401,12 @@ For more help on a command:
|
||||
if not data:
|
||||
print(f"Session '{args.session_id}' not found.")
|
||||
return
|
||||
with open(args.output, "w") as f:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(_json.dumps(data, ensure_ascii=False) + "\n")
|
||||
print(f"Exported 1 session to {args.output}")
|
||||
else:
|
||||
sessions = db.export_all(source=args.source)
|
||||
with open(args.output, "w") as f:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
for s in sessions:
|
||||
f.write(_json.dumps(s, ensure_ascii=False) + "\n")
|
||||
print(f"Exported {len(sessions)} sessions to {args.output}")
|
||||
@@ -2515,7 +2560,11 @@ For more help on a command:
|
||||
# =========================================================================
|
||||
# Parse and execute
|
||||
# =========================================================================
|
||||
args = parser.parse_args()
|
||||
# Pre-process argv so unquoted multi-word session names after -c / -r
|
||||
# are merged into a single token before argparse sees them.
|
||||
# e.g. ``hermes -c Pokemon Agent Dev`` → ``hermes -c 'Pokemon Agent Dev'``
|
||||
_processed_argv = _coalesce_session_name_args(sys.argv[1:])
|
||||
args = parser.parse_args(_processed_argv)
|
||||
|
||||
# Handle --version flag
|
||||
if args.version:
|
||||
|
||||
@@ -1572,10 +1572,22 @@ def setup_gateway(config: dict):
|
||||
|
||||
if not existing_slack and prompt_yes_no("Set up Slack bot?", False):
|
||||
print_info("Steps to create a Slack app:")
|
||||
print_info(" 1. Go to https://api.slack.com/apps → Create New App")
|
||||
print_info(" 2. Enable Socket Mode: App Settings → Socket Mode → Enable")
|
||||
print_info(" 3. Bot Token: OAuth & Permissions → Install to Workspace")
|
||||
print_info(" 4. App Token: Basic Information → App-Level Tokens → Generate")
|
||||
print_info(" 1. Go to https://api.slack.com/apps → Create New App (from scratch)")
|
||||
print_info(" 2. Enable Socket Mode: Settings → Socket Mode → Enable")
|
||||
print_info(" • Create an App-Level Token with 'connections:write' scope")
|
||||
print_info(" 3. Add Bot Token Scopes: Features → OAuth & Permissions")
|
||||
print_info(" Required scopes: chat:write, app_mentions:read,")
|
||||
print_info(" channels:history, channels:read, groups:history,")
|
||||
print_info(" im:history, im:read, im:write, users:read, files:write")
|
||||
print_info(" 4. Subscribe to Events: Features → Event Subscriptions → Enable")
|
||||
print_info(" Required events: message.im, message.channels,")
|
||||
print_info(" message.groups, app_mention")
|
||||
print_warning(" ⚠ Without message.channels/message.groups events,")
|
||||
print_warning(" the bot will ONLY work in DMs, not channels!")
|
||||
print_info(" 5. Install to Workspace: Settings → Install App")
|
||||
print_info(" 6. After installing, invite the bot to channels: /invite @YourBot")
|
||||
print()
|
||||
print_info(" Full guide: https://hermes-agent.ai/docs/user-guide/messaging/slack")
|
||||
print()
|
||||
bot_token = prompt("Slack Bot Token (xoxb-...)", password=True)
|
||||
if bot_token:
|
||||
@@ -1587,7 +1599,7 @@ def setup_gateway(config: dict):
|
||||
|
||||
print()
|
||||
print_info("🔒 Security: Restrict who can use your bot")
|
||||
print_info(" Find Slack user IDs in your profile or via the Slack API")
|
||||
print_info(" To find a Member ID: click a user's name → View full profile → ⋮ → Copy member ID")
|
||||
print()
|
||||
allowed_users = prompt("Allowed user IDs (comma-separated, leave empty for open access)")
|
||||
if allowed_users:
|
||||
|
||||
341
hermes_cli/skin_engine.py
Normal file
341
hermes_cli/skin_engine.py
Normal file
@@ -0,0 +1,341 @@
|
||||
"""Hermes CLI skin/theme engine.
|
||||
|
||||
A data-driven skin system that lets users customize the CLI's visual appearance.
|
||||
Skins are defined as YAML files in ~/.hermes/skins/ or as built-in presets.
|
||||
|
||||
Each skin defines:
|
||||
- colors: banner and UI color palette (hex values for Rich markup)
|
||||
- spinner: kawaii faces, thinking verbs, optional wings
|
||||
- branding: agent name, welcome/goodbye messages, prompt symbol
|
||||
- tool_prefix: character used for tool output lines (default: ┊)
|
||||
|
||||
Usage:
|
||||
from hermes_cli.skin_engine import get_active_skin, list_skins, set_active_skin
|
||||
|
||||
skin = get_active_skin()
|
||||
print(skin.colors["banner_title"]) # "#FFD700"
|
||||
print(skin.spinner["thinking_verbs"]) # ["pondering", ...]
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Skin data structure
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class SkinConfig:
|
||||
"""Complete skin configuration."""
|
||||
name: str
|
||||
description: str = ""
|
||||
colors: Dict[str, str] = field(default_factory=dict)
|
||||
spinner: Dict[str, Any] = field(default_factory=dict)
|
||||
branding: Dict[str, str] = field(default_factory=dict)
|
||||
tool_prefix: str = "┊"
|
||||
|
||||
def get_color(self, key: str, fallback: str = "") -> str:
|
||||
"""Get a color value with fallback."""
|
||||
return self.colors.get(key, fallback)
|
||||
|
||||
def get_spinner_list(self, key: str) -> List[str]:
|
||||
"""Get a spinner list (faces, verbs, etc.)."""
|
||||
return self.spinner.get(key, [])
|
||||
|
||||
def get_spinner_wings(self) -> List[Tuple[str, str]]:
|
||||
"""Get spinner wing pairs, or empty list if none."""
|
||||
raw = self.spinner.get("wings", [])
|
||||
result = []
|
||||
for pair in raw:
|
||||
if isinstance(pair, (list, tuple)) and len(pair) == 2:
|
||||
result.append((str(pair[0]), str(pair[1])))
|
||||
return result
|
||||
|
||||
def get_branding(self, key: str, fallback: str = "") -> str:
|
||||
"""Get a branding value with fallback."""
|
||||
return self.branding.get(key, fallback)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Built-in skin definitions
|
||||
# =============================================================================
|
||||
|
||||
_BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"default": {
|
||||
"name": "default",
|
||||
"description": "Classic Hermes — gold and kawaii",
|
||||
"colors": {
|
||||
"banner_border": "#CD7F32",
|
||||
"banner_title": "#FFD700",
|
||||
"banner_accent": "#FFBF00",
|
||||
"banner_dim": "#B8860B",
|
||||
"banner_text": "#FFF8DC",
|
||||
"ui_accent": "#FFBF00",
|
||||
"ui_label": "#4dd0e1",
|
||||
"ui_ok": "#4caf50",
|
||||
"ui_error": "#ef5350",
|
||||
"ui_warn": "#ffa726",
|
||||
"prompt": "#FFF8DC",
|
||||
"input_rule": "#CD7F32",
|
||||
"response_border": "#FFD700",
|
||||
"session_label": "#DAA520",
|
||||
"session_border": "#8B8682",
|
||||
},
|
||||
"spinner": {
|
||||
# Empty = use hardcoded defaults in display.py
|
||||
},
|
||||
"branding": {
|
||||
"agent_name": "Hermes Agent",
|
||||
"welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Goodbye! ⚕",
|
||||
"response_label": " ⚕ Hermes ",
|
||||
"prompt_symbol": "❯ ",
|
||||
"help_header": "(^_^)? Available Commands",
|
||||
},
|
||||
"tool_prefix": "┊",
|
||||
},
|
||||
"ares": {
|
||||
"name": "ares",
|
||||
"description": "War-god theme — crimson and bronze",
|
||||
"colors": {
|
||||
"banner_border": "#9F1C1C",
|
||||
"banner_title": "#C7A96B",
|
||||
"banner_accent": "#DD4A3A",
|
||||
"banner_dim": "#6B1717",
|
||||
"banner_text": "#F1E6CF",
|
||||
"ui_accent": "#DD4A3A",
|
||||
"ui_label": "#C7A96B",
|
||||
"ui_ok": "#4caf50",
|
||||
"ui_error": "#ef5350",
|
||||
"ui_warn": "#ffa726",
|
||||
"prompt": "#F1E6CF",
|
||||
"input_rule": "#9F1C1C",
|
||||
"response_border": "#C7A96B",
|
||||
"session_label": "#C7A96B",
|
||||
"session_border": "#6E584B",
|
||||
},
|
||||
"spinner": {
|
||||
"waiting_faces": ["(⚔)", "(⛨)", "(▲)", "(<>)", "(/)"],
|
||||
"thinking_faces": ["(⚔)", "(⛨)", "(▲)", "(⌁)", "(<>)"],
|
||||
"thinking_verbs": [
|
||||
"forging", "marching", "sizing the field", "holding the line",
|
||||
"hammering plans", "tempering steel", "plotting impact", "raising the shield",
|
||||
],
|
||||
"wings": [
|
||||
["⟪⚔", "⚔⟫"],
|
||||
["⟪▲", "▲⟫"],
|
||||
["⟪╸", "╺⟫"],
|
||||
["⟪⛨", "⛨⟫"],
|
||||
],
|
||||
},
|
||||
"branding": {
|
||||
"agent_name": "Ares Agent",
|
||||
"welcome": "Welcome to Ares Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Farewell, warrior! ⚔",
|
||||
"response_label": " ⚔ Ares ",
|
||||
"prompt_symbol": "⚔ ❯ ",
|
||||
"help_header": "(⚔) Available Commands",
|
||||
},
|
||||
"tool_prefix": "╎",
|
||||
},
|
||||
"mono": {
|
||||
"name": "mono",
|
||||
"description": "Monochrome — clean grayscale",
|
||||
"colors": {
|
||||
"banner_border": "#555555",
|
||||
"banner_title": "#e6edf3",
|
||||
"banner_accent": "#aaaaaa",
|
||||
"banner_dim": "#444444",
|
||||
"banner_text": "#c9d1d9",
|
||||
"ui_accent": "#aaaaaa",
|
||||
"ui_label": "#888888",
|
||||
"ui_ok": "#888888",
|
||||
"ui_error": "#cccccc",
|
||||
"ui_warn": "#999999",
|
||||
"prompt": "#c9d1d9",
|
||||
"input_rule": "#444444",
|
||||
"response_border": "#aaaaaa",
|
||||
"session_label": "#888888",
|
||||
"session_border": "#555555",
|
||||
},
|
||||
"spinner": {},
|
||||
"branding": {
|
||||
"agent_name": "Hermes Agent",
|
||||
"welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Goodbye! ⚕",
|
||||
"response_label": " ⚕ Hermes ",
|
||||
"prompt_symbol": "❯ ",
|
||||
"help_header": "[?] Available Commands",
|
||||
},
|
||||
"tool_prefix": "┊",
|
||||
},
|
||||
"slate": {
|
||||
"name": "slate",
|
||||
"description": "Cool blue — developer-focused",
|
||||
"colors": {
|
||||
"banner_border": "#4169e1",
|
||||
"banner_title": "#7eb8f6",
|
||||
"banner_accent": "#8EA8FF",
|
||||
"banner_dim": "#4b5563",
|
||||
"banner_text": "#c9d1d9",
|
||||
"ui_accent": "#7eb8f6",
|
||||
"ui_label": "#8EA8FF",
|
||||
"ui_ok": "#63D0A6",
|
||||
"ui_error": "#F7A072",
|
||||
"ui_warn": "#e6a855",
|
||||
"prompt": "#c9d1d9",
|
||||
"input_rule": "#4169e1",
|
||||
"response_border": "#7eb8f6",
|
||||
"session_label": "#7eb8f6",
|
||||
"session_border": "#4b5563",
|
||||
},
|
||||
"spinner": {},
|
||||
"branding": {
|
||||
"agent_name": "Hermes Agent",
|
||||
"welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Goodbye! ⚕",
|
||||
"response_label": " ⚕ Hermes ",
|
||||
"prompt_symbol": "❯ ",
|
||||
"help_header": "(^_^)? Available Commands",
|
||||
},
|
||||
"tool_prefix": "┊",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Skin loading and management
|
||||
# =============================================================================
|
||||
|
||||
_active_skin: Optional[SkinConfig] = None
|
||||
_active_skin_name: str = "default"
|
||||
|
||||
|
||||
def _skins_dir() -> Path:
|
||||
"""User skins directory."""
|
||||
home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
return home / "skins"
|
||||
|
||||
|
||||
def _load_skin_from_yaml(path: Path) -> Optional[Dict[str, Any]]:
|
||||
"""Load a skin definition from a YAML file."""
|
||||
try:
|
||||
import yaml
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
if isinstance(data, dict) and "name" in data:
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load skin from %s: %s", path, e)
|
||||
return None
|
||||
|
||||
|
||||
def _build_skin_config(data: Dict[str, Any]) -> SkinConfig:
|
||||
"""Build a SkinConfig from a raw dict (built-in or loaded from YAML)."""
|
||||
# Start with default values as base for missing keys
|
||||
default = _BUILTIN_SKINS["default"]
|
||||
colors = dict(default.get("colors", {}))
|
||||
colors.update(data.get("colors", {}))
|
||||
spinner = dict(default.get("spinner", {}))
|
||||
spinner.update(data.get("spinner", {}))
|
||||
branding = dict(default.get("branding", {}))
|
||||
branding.update(data.get("branding", {}))
|
||||
|
||||
return SkinConfig(
|
||||
name=data.get("name", "unknown"),
|
||||
description=data.get("description", ""),
|
||||
colors=colors,
|
||||
spinner=spinner,
|
||||
branding=branding,
|
||||
tool_prefix=data.get("tool_prefix", default.get("tool_prefix", "┊")),
|
||||
)
|
||||
|
||||
|
||||
def list_skins() -> List[Dict[str, str]]:
|
||||
"""List all available skins (built-in + user-installed).
|
||||
|
||||
Returns list of {"name": ..., "description": ..., "source": "builtin"|"user"}.
|
||||
"""
|
||||
result = []
|
||||
for name, data in _BUILTIN_SKINS.items():
|
||||
result.append({
|
||||
"name": name,
|
||||
"description": data.get("description", ""),
|
||||
"source": "builtin",
|
||||
})
|
||||
|
||||
skins_path = _skins_dir()
|
||||
if skins_path.is_dir():
|
||||
for f in sorted(skins_path.glob("*.yaml")):
|
||||
data = _load_skin_from_yaml(f)
|
||||
if data:
|
||||
skin_name = data.get("name", f.stem)
|
||||
# Skip if it shadows a built-in
|
||||
if any(s["name"] == skin_name for s in result):
|
||||
continue
|
||||
result.append({
|
||||
"name": skin_name,
|
||||
"description": data.get("description", ""),
|
||||
"source": "user",
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def load_skin(name: str) -> SkinConfig:
|
||||
"""Load a skin by name. Checks user skins first, then built-in."""
|
||||
# Check user skins directory
|
||||
skins_path = _skins_dir()
|
||||
user_file = skins_path / f"{name}.yaml"
|
||||
if user_file.is_file():
|
||||
data = _load_skin_from_yaml(user_file)
|
||||
if data:
|
||||
return _build_skin_config(data)
|
||||
|
||||
# Check built-in skins
|
||||
if name in _BUILTIN_SKINS:
|
||||
return _build_skin_config(_BUILTIN_SKINS[name])
|
||||
|
||||
# Fallback to default
|
||||
logger.warning("Skin '%s' not found, using default", name)
|
||||
return _build_skin_config(_BUILTIN_SKINS["default"])
|
||||
|
||||
|
||||
def get_active_skin() -> SkinConfig:
|
||||
"""Get the currently active skin config (cached)."""
|
||||
global _active_skin
|
||||
if _active_skin is None:
|
||||
_active_skin = load_skin(_active_skin_name)
|
||||
return _active_skin
|
||||
|
||||
|
||||
def set_active_skin(name: str) -> SkinConfig:
|
||||
"""Switch the active skin. Returns the new SkinConfig."""
|
||||
global _active_skin, _active_skin_name
|
||||
_active_skin_name = name
|
||||
_active_skin = load_skin(name)
|
||||
return _active_skin
|
||||
|
||||
|
||||
def get_active_skin_name() -> str:
|
||||
"""Get the name of the currently active skin."""
|
||||
return _active_skin_name
|
||||
|
||||
|
||||
def init_skin_from_config(config: dict) -> None:
|
||||
"""Initialize the active skin from CLI config at startup.
|
||||
|
||||
Call this once during CLI init with the loaded config dict.
|
||||
"""
|
||||
display = config.get("display", {})
|
||||
skin_name = display.get("skin", "default")
|
||||
if isinstance(skin_name, str) and skin_name.strip():
|
||||
set_active_skin(skin_name.strip())
|
||||
else:
|
||||
set_active_skin("default")
|
||||
@@ -263,7 +263,7 @@ def show_status(args):
|
||||
if jobs_file.exists():
|
||||
import json
|
||||
try:
|
||||
with open(jobs_file) as f:
|
||||
with open(jobs_file, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
jobs = data.get("jobs", [])
|
||||
enabled_jobs = [j for j in jobs if j.get("enabled", True)]
|
||||
@@ -283,7 +283,7 @@ def show_status(args):
|
||||
if sessions_file.exists():
|
||||
import json
|
||||
try:
|
||||
with open(sessions_file) as f:
|
||||
with open(sessions_file, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
print(f" Active: {len(data)} session(s)")
|
||||
except Exception:
|
||||
|
||||
@@ -16,6 +16,7 @@ Key design decisions:
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
import time
|
||||
from pathlib import Path
|
||||
@@ -490,12 +491,16 @@ class SessionDB:
|
||||
msg_id = cursor.lastrowid
|
||||
|
||||
# Update counters
|
||||
is_tool_related = role == "tool" or tool_calls is not None
|
||||
if is_tool_related:
|
||||
# Count actual tool calls from the tool_calls list (not from tool responses).
|
||||
# A single assistant message can contain multiple parallel tool calls.
|
||||
num_tool_calls = 0
|
||||
if tool_calls is not None:
|
||||
num_tool_calls = len(tool_calls) if isinstance(tool_calls, list) else 1
|
||||
if num_tool_calls > 0:
|
||||
self._conn.execute(
|
||||
"""UPDATE sessions SET message_count = message_count + 1,
|
||||
tool_call_count = tool_call_count + 1 WHERE id = ?""",
|
||||
(session_id,),
|
||||
tool_call_count = tool_call_count + ? WHERE id = ?""",
|
||||
(num_tool_calls, session_id),
|
||||
)
|
||||
else:
|
||||
self._conn.execute(
|
||||
@@ -553,6 +558,32 @@ class SessionDB:
|
||||
# Search
|
||||
# =========================================================================
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_fts5_query(query: str) -> str:
|
||||
"""Sanitize user input for safe use in FTS5 MATCH queries.
|
||||
|
||||
FTS5 has its own query syntax where characters like ``"``, ``(``, ``)``,
|
||||
``+``, ``*``, ``{``, ``}`` and bare boolean operators (``AND``, ``OR``,
|
||||
``NOT``) have special meaning. Passing raw user input directly to
|
||||
MATCH can cause ``sqlite3.OperationalError``.
|
||||
|
||||
Strategy: strip characters that are only meaningful as FTS5 operators
|
||||
and would otherwise cause syntax errors. This preserves normal keyword
|
||||
search while preventing crashes on inputs like ``C++``, ``"unterminated``,
|
||||
or ``hello AND``.
|
||||
"""
|
||||
# Remove FTS5-special characters that are not useful in keyword search
|
||||
sanitized = re.sub(r'[+{}()"^]', " ", query)
|
||||
# Collapse repeated * (e.g. "***") into a single one, and remove
|
||||
# leading * (prefix-only matching requires at least one char before *)
|
||||
sanitized = re.sub(r"\*+", "*", sanitized)
|
||||
sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized)
|
||||
# Remove dangling boolean operators at start/end that would cause
|
||||
# syntax errors (e.g. "hello AND" or "OR world")
|
||||
sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
|
||||
sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())
|
||||
return sanitized.strip()
|
||||
|
||||
def search_messages(
|
||||
self,
|
||||
query: str,
|
||||
@@ -576,6 +607,10 @@ class SessionDB:
|
||||
if not query or not query.strip():
|
||||
return []
|
||||
|
||||
query = self._sanitize_fts5_query(query)
|
||||
if not query:
|
||||
return []
|
||||
|
||||
if source_filter is None:
|
||||
source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"]
|
||||
|
||||
@@ -615,7 +650,11 @@ class SessionDB:
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
|
||||
cursor = self._conn.execute(sql, params)
|
||||
try:
|
||||
cursor = self._conn.execute(sql, params)
|
||||
except sqlite3.OperationalError:
|
||||
# FTS5 query syntax error despite sanitization — return empty
|
||||
return []
|
||||
matches = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
# Add surrounding context (1 message before + after each match)
|
||||
|
||||
2
optional-skills/migration/DESCRIPTION.md
Normal file
2
optional-skills/migration/DESCRIPTION.md
Normal file
@@ -0,0 +1,2 @@
|
||||
Optional migration workflows for importing user state and customizations from
|
||||
other agent systems into Hermes Agent.
|
||||
281
optional-skills/migration/openclaw-migration/SKILL.md
Normal file
281
optional-skills/migration/openclaw-migration/SKILL.md
Normal file
@@ -0,0 +1,281 @@
|
||||
---
|
||||
name: openclaw-migration
|
||||
description: Migrate a user's OpenClaw customization footprint into Hermes Agent. Imports Hermes-compatible memories, SOUL.md, command allowlists, user skills, and selected workspace assets from ~/.openclaw, then reports exactly what could not be migrated and why.
|
||||
version: 1.0.0
|
||||
author: Hermes Agent (Nous Research)
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [Migration, OpenClaw, Hermes, Memory, Persona, Import]
|
||||
related_skills: [hermes-agent]
|
||||
---
|
||||
|
||||
# OpenClaw -> Hermes Migration
|
||||
|
||||
Use this skill when a user wants to move their OpenClaw setup into Hermes Agent with minimal manual cleanup.
|
||||
|
||||
## What this skill does
|
||||
|
||||
It uses `scripts/openclaw_to_hermes.py` to:
|
||||
|
||||
- import `SOUL.md` into the Hermes home directory as `SOUL.md`
|
||||
- transform OpenClaw `MEMORY.md` and `USER.md` into Hermes memory entries
|
||||
- merge OpenClaw command approval patterns into Hermes `command_allowlist`
|
||||
- migrate Hermes-compatible messaging settings such as `TELEGRAM_ALLOWED_USERS` and `MESSAGING_CWD`
|
||||
- copy OpenClaw skills into `~/.hermes/skills/openclaw-imports/`
|
||||
- optionally copy the OpenClaw workspace instructions file into a chosen Hermes workspace
|
||||
- mirror compatible workspace assets such as `workspace/tts/` into `~/.hermes/tts/`
|
||||
- archive non-secret docs that do not have a direct Hermes destination
|
||||
- produce a structured report listing migrated items, conflicts, skipped items, and reasons
|
||||
|
||||
## Path resolution
|
||||
|
||||
The helper script lives in this skill directory at:
|
||||
|
||||
- `scripts/openclaw_to_hermes.py`
|
||||
|
||||
When this skill is installed from the Skills Hub, the normal location is:
|
||||
|
||||
- `~/.hermes/skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py`
|
||||
|
||||
Do not guess a shorter path like `~/.hermes/skills/openclaw-migration/...`.
|
||||
|
||||
Before running the helper:
|
||||
|
||||
1. Prefer the installed path under `~/.hermes/skills/migration/openclaw-migration/`.
|
||||
2. If that path fails, inspect the installed skill directory and resolve the script relative to the installed `SKILL.md`.
|
||||
3. Only use `find` as a fallback if the installed location is missing or the skill was moved manually.
|
||||
4. When calling the terminal tool, do not pass `workdir: "~"`. Use an absolute directory such as the user's home directory, or omit `workdir` entirely.
|
||||
|
||||
With `--migrate-secrets`, it will also import a small allowlisted set of Hermes-compatible secrets, currently:
|
||||
|
||||
- `TELEGRAM_BOT_TOKEN`
|
||||
|
||||
## Default workflow
|
||||
|
||||
1. Inspect first with a dry run.
|
||||
2. Present a simple summary of what can be migrated, what cannot be migrated, and what would be archived.
|
||||
3. If the `clarify` tool is available, use it for user decisions instead of asking for a free-form prose reply.
|
||||
4. If the dry run finds imported skill directory conflicts, ask how those should be handled before executing.
|
||||
5. Ask the user to choose between the two supported migration modes before executing.
|
||||
6. Ask for a target workspace path only if the user wants the workspace instructions file brought over.
|
||||
7. Execute the migration with the matching preset and flags.
|
||||
8. Summarize the results, especially:
|
||||
- what was migrated
|
||||
- what was archived for manual review
|
||||
- what was skipped and why
|
||||
|
||||
## User interaction protocol
|
||||
|
||||
Hermes CLI supports the `clarify` tool for interactive prompts, but it is limited to:
|
||||
|
||||
- one choice at a time
|
||||
- up to 4 predefined choices
|
||||
- an automatic `Other` free-text option
|
||||
|
||||
It does **not** support true multi-select checkboxes in a single prompt.
|
||||
|
||||
For every `clarify` call:
|
||||
|
||||
- always include a non-empty `question`
|
||||
- include `choices` only for real selectable prompts
|
||||
- keep `choices` to 2-4 plain string options
|
||||
- never emit placeholder or truncated options such as `...`
|
||||
- never pad or stylize choices with extra whitespace
|
||||
- never include fake form fields in the question such as `enter directory here`, blank lines to fill in, or underscores like `_____`
|
||||
- for open-ended path questions, ask only the plain sentence; the user types in the normal CLI prompt below the panel
|
||||
|
||||
If a `clarify` call returns an error, inspect the error text, correct the payload, and retry once with a valid `question` and clean choices.
|
||||
|
||||
When `clarify` is available and the dry run reveals any required user decision, your **next action must be a `clarify` tool call**.
|
||||
Do not end the turn with a normal assistant message such as:
|
||||
|
||||
- "Let me present the choices"
|
||||
- "What would you like to do?"
|
||||
- "Here are the options"
|
||||
|
||||
If a user decision is required, collect it via `clarify` before producing more prose.
|
||||
If multiple unresolved decisions remain, do not insert an explanatory assistant message between them. After one `clarify` response is received, your next action should usually be the next required `clarify` call.
|
||||
|
||||
Treat `workspace-agents` as an unresolved decision whenever the dry run reports:
|
||||
|
||||
- `kind="workspace-agents"`
|
||||
- `status="skipped"`
|
||||
- reason containing `No workspace target was provided`
|
||||
|
||||
In that case, you must ask about workspace instructions before execution. Do not silently treat that as a decision to skip.
|
||||
|
||||
Because of that limitation, use this simplified decision flow:
|
||||
|
||||
1. For `SOUL.md` conflicts, use `clarify` with choices such as:
|
||||
- `keep existing`
|
||||
- `overwrite with backup`
|
||||
- `review first`
|
||||
2. If the dry run shows one or more `kind="skill"` items with `status="conflict"`, use `clarify` with choices such as:
|
||||
- `keep existing skills`
|
||||
- `overwrite conflicting skills with backup`
|
||||
- `import conflicting skills under renamed folders`
|
||||
3. For workspace instructions, use `clarify` with choices such as:
|
||||
- `skip workspace instructions`
|
||||
- `copy to a workspace path`
|
||||
- `decide later`
|
||||
4. If the user chooses to copy workspace instructions, ask a follow-up open-ended `clarify` question requesting an **absolute path**.
|
||||
5. If the user chooses `skip workspace instructions` or `decide later`, proceed without `--workspace-target`.
|
||||
5. For migration mode, use `clarify` with these 3 choices:
|
||||
- `user-data only`
|
||||
- `full compatible migration`
|
||||
- `cancel`
|
||||
6. `user-data only` means: migrate user data and compatible config, but do **not** import allowlisted secrets.
|
||||
7. `full compatible migration` means: migrate the same compatible user data plus the allowlisted secrets when present.
|
||||
8. If `clarify` is not available, ask the same question in normal text, but still constrain the answer to `user-data only`, `full compatible migration`, or `cancel`.
|
||||
|
||||
Execution gate:
|
||||
|
||||
- Do not execute while a `workspace-agents` skip caused by `No workspace target was provided` remains unresolved.
|
||||
- The only valid ways to resolve it are:
|
||||
- user explicitly chooses `skip workspace instructions`
|
||||
- user explicitly chooses `decide later`
|
||||
- user provides a workspace path after choosing `copy to a workspace path`
|
||||
- Absence of a workspace target in the dry run is not itself permission to execute.
|
||||
- Do not execute while any required `clarify` decision remains unresolved.
|
||||
|
||||
Use these exact `clarify` payload shapes as the default pattern:
|
||||
|
||||
- `{"question":"Your existing SOUL.md conflicts with the imported one. What should I do?","choices":["keep existing","overwrite with backup","review first"]}`
|
||||
- `{"question":"One or more imported OpenClaw skills already exist in Hermes. How should I handle those skill conflicts?","choices":["keep existing skills","overwrite conflicting skills with backup","import conflicting skills under renamed folders"]}`
|
||||
- `{"question":"Choose migration mode: migrate only user data, or run the full compatible migration including allowlisted secrets?","choices":["user-data only","full compatible migration","cancel"]}`
|
||||
- `{"question":"Do you want to copy the OpenClaw workspace instructions file into a Hermes workspace?","choices":["skip workspace instructions","copy to a workspace path","decide later"]}`
|
||||
- `{"question":"Please provide an absolute path where the workspace instructions should be copied."}`
|
||||
|
||||
## Decision-to-command mapping
|
||||
|
||||
Map user decisions to command flags exactly:
|
||||
|
||||
- If the user chooses `keep existing` for `SOUL.md`, do **not** add `--overwrite`.
|
||||
- If the user chooses `overwrite with backup`, add `--overwrite`.
|
||||
- If the user chooses `review first`, stop before execution and review the relevant files.
|
||||
- If the user chooses `keep existing skills`, add `--skill-conflict skip`.
|
||||
- If the user chooses `overwrite conflicting skills with backup`, add `--skill-conflict overwrite`.
|
||||
- If the user chooses `import conflicting skills under renamed folders`, add `--skill-conflict rename`.
|
||||
- If the user chooses `user-data only`, execute with `--preset user-data` and do **not** add `--migrate-secrets`.
|
||||
- If the user chooses `full compatible migration`, execute with `--preset full --migrate-secrets`.
|
||||
- Only add `--workspace-target` if the user explicitly provided an absolute workspace path.
|
||||
- If the user chooses `skip workspace instructions` or `decide later`, do not add `--workspace-target`.
|
||||
|
||||
Before executing, restate the exact command plan in plain language and make sure it matches the user's choices.
|
||||
|
||||
## Post-run reporting rules
|
||||
|
||||
After execution, treat the script's JSON output as the source of truth.
|
||||
|
||||
1. Base all counts on `report.summary`.
|
||||
2. Only list an item under "Successfully Migrated" if its `status` is exactly `migrated`.
|
||||
3. Do not claim a conflict was resolved unless the report shows that item as `migrated`.
|
||||
4. Do not say `SOUL.md` was overwritten unless the report item for `kind="soul"` has `status="migrated"`.
|
||||
5. If `report.summary.conflict > 0`, include a conflict section instead of silently implying success.
|
||||
6. If counts and listed items disagree, fix the list to match the report before responding.
|
||||
7. Include the `output_dir` path from the report when available so the user can inspect `report.json`, `summary.md`, backups, and archived files.
|
||||
8. For memory or user-profile overflow, do not say the entries were archived unless the report explicitly shows an archive path. If `details.overflow_file` exists, say the full overflow list was exported there.
|
||||
9. If a skill was imported under a renamed folder, report the final destination and mention `details.renamed_from`.
|
||||
10. If `report.skill_conflict_mode` is present, use it as the source of truth for the selected imported-skill conflict policy.
|
||||
11. If an item has `status="skipped"`, do not describe it as overwritten, backed up, migrated, or resolved.
|
||||
12. If `kind="soul"` has `status="skipped"` with reason `Target already matches source`, say it was left unchanged and do not mention a backup.
|
||||
13. If a renamed imported skill has an empty `details.backup`, do not imply the existing Hermes skill was renamed or backed up. Say only that the imported copy was placed in the new destination and reference `details.renamed_from` as the pre-existing folder that remained in place.
|
||||
|
||||
## Migration presets
|
||||
|
||||
Prefer these two presets in normal use:
|
||||
|
||||
- `user-data`
|
||||
- `full`
|
||||
|
||||
`user-data` includes:
|
||||
|
||||
- `soul`
|
||||
- `workspace-agents`
|
||||
- `memory`
|
||||
- `user-profile`
|
||||
- `messaging-settings`
|
||||
- `command-allowlist`
|
||||
- `skills`
|
||||
- `tts-assets`
|
||||
- `archive`
|
||||
|
||||
`full` includes everything in `user-data` plus:
|
||||
|
||||
- `secret-settings`
|
||||
|
||||
The helper script still supports category-level `--include` / `--exclude`, but treat that as an advanced fallback rather than the default UX.
|
||||
|
||||
## Commands
|
||||
|
||||
Dry run with full discovery:
|
||||
|
||||
```bash
|
||||
python3 ~/.hermes/skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py
|
||||
```
|
||||
|
||||
When using the terminal tool, prefer an absolute invocation pattern such as:
|
||||
|
||||
```json
|
||||
{"command":"python3 /home/USER/.hermes/skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py","workdir":"/home/USER"}
|
||||
```
|
||||
|
||||
Dry run with the user-data preset:
|
||||
|
||||
```bash
|
||||
python3 ~/.hermes/skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py --preset user-data
|
||||
```
|
||||
|
||||
Execute a user-data migration:
|
||||
|
||||
```bash
|
||||
python3 ~/.hermes/skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py --execute --preset user-data --skill-conflict skip
|
||||
```
|
||||
|
||||
Execute a full compatible migration:
|
||||
|
||||
```bash
|
||||
python3 ~/.hermes/skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py --execute --preset full --migrate-secrets --skill-conflict skip
|
||||
```
|
||||
|
||||
Execute with workspace instructions included:
|
||||
|
||||
```bash
|
||||
python3 ~/.hermes/skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py --execute --preset user-data --skill-conflict rename --workspace-target "/absolute/workspace/path"
|
||||
```
|
||||
|
||||
Do not use `$PWD` or the home directory as the workspace target by default. Ask for an explicit workspace path first.
|
||||
|
||||
## Important rules
|
||||
|
||||
1. Run a dry run before writing unless the user explicitly says to proceed immediately.
|
||||
2. Do not migrate secrets by default. Tokens, auth blobs, device credentials, and raw gateway config should stay out of Hermes unless the user explicitly asks for secret migration.
|
||||
3. Do not silently overwrite non-empty Hermes targets unless the user explicitly wants that. The helper script will preserve backups when overwriting is enabled.
|
||||
4. Always give the user the skipped-items report. That report is part of the migration, not an optional extra.
|
||||
5. Prefer the primary OpenClaw workspace (`~/.openclaw/workspace/`) over `workspace.default/`. Only use the default workspace as fallback when the primary files are missing.
|
||||
6. Even in secret-migration mode, only migrate secrets with a clean Hermes destination. Unsupported auth blobs must still be reported as skipped.
|
||||
7. If the dry run shows a large asset copy, a conflicting `SOUL.md`, or overflowed memory entries, call those out separately before execution.
|
||||
8. Default to `user-data only` if the user is unsure.
|
||||
9. Only include `workspace-agents` when the user has explicitly provided a destination workspace path.
|
||||
10. Treat category-level `--include` / `--exclude` as an advanced escape hatch, not the normal flow.
|
||||
11. Do not end the dry-run summary with a vague “What would you like to do?” if `clarify` is available. Use structured follow-up prompts instead.
|
||||
12. Do not use an open-ended `clarify` prompt when a real choice prompt would work. Prefer selectable choices first, then free text only for absolute paths or file review requests.
|
||||
13. After a dry run, never stop after summarizing if there is still an unresolved decision. Use `clarify` immediately for the highest-priority blocking decision.
|
||||
14. Priority order for follow-up questions:
|
||||
- `SOUL.md` conflict
|
||||
- imported skill conflicts
|
||||
- migration mode
|
||||
- workspace instructions destination
|
||||
15. Do not promise to present choices later in the same message. Present them by actually calling `clarify`.
|
||||
16. After the migration-mode answer, explicitly check whether `workspace-agents` is still unresolved. If it is, your next action must be the workspace-instructions `clarify` call.
|
||||
17. After any `clarify` answer, if another required decision remains, do not narrate what was just decided. Ask the next required question immediately.
|
||||
|
||||
## Expected result
|
||||
|
||||
After a successful run, the user should have:
|
||||
|
||||
- Hermes persona state imported
|
||||
- Hermes memory files populated with converted OpenClaw knowledge
|
||||
- OpenClaw skills available under `~/.hermes/skills/openclaw-imports/`
|
||||
- a migration report showing any conflicts, omissions, or unsupported data
|
||||
File diff suppressed because it is too large
Load Diff
@@ -40,13 +40,16 @@ dependencies = [
|
||||
[project.optional-dependencies]
|
||||
modal = ["swe-rex[modal]>=1.4.0"]
|
||||
daytona = ["daytona>=0.148.0"]
|
||||
dev = ["pytest", "pytest-asyncio"]
|
||||
dev = ["pytest", "pytest-asyncio", "mcp>=1.2.0"]
|
||||
messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
||||
cron = ["croniter"]
|
||||
slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
||||
cli = ["simple-term-menu"]
|
||||
tts-premium = ["elevenlabs"]
|
||||
pty = ["ptyprocess>=0.7.0"]
|
||||
pty = [
|
||||
"ptyprocess>=0.7.0; sys_platform != 'win32'",
|
||||
"pywinpty>=2.0.0; sys_platform == 'win32'",
|
||||
]
|
||||
honcho = ["honcho-ai>=2.0.1"]
|
||||
mcp = ["mcp>=1.2.0"]
|
||||
homeassistant = ["aiohttp>=3.9.0"]
|
||||
|
||||
46
run_agent.py
46
run_agent.py
@@ -172,6 +172,7 @@ class AIAgent:
|
||||
provider_data_collection: str = None,
|
||||
session_id: str = None,
|
||||
tool_progress_callback: callable = None,
|
||||
thinking_callback: callable = None,
|
||||
clarify_callback: callable = None,
|
||||
step_callback: callable = None,
|
||||
max_tokens: int = None,
|
||||
@@ -184,6 +185,8 @@ class AIAgent:
|
||||
honcho_session_key: str = None,
|
||||
iteration_budget: "IterationBudget" = None,
|
||||
fallback_model: Dict[str, Any] = None,
|
||||
checkpoints_enabled: bool = False,
|
||||
checkpoint_max_snapshots: int = 50,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
@@ -256,6 +259,7 @@ class AIAgent:
|
||||
self.api_mode = "chat_completions"
|
||||
|
||||
self.tool_progress_callback = tool_progress_callback
|
||||
self.thinking_callback = thinking_callback
|
||||
self.clarify_callback = clarify_callback
|
||||
self.step_callback = step_callback
|
||||
self._last_reported_tool = None # Track for "new tool" mode
|
||||
@@ -484,6 +488,13 @@ class AIAgent:
|
||||
# Cached system prompt -- built once per session, only rebuilt on compression
|
||||
self._cached_system_prompt: Optional[str] = None
|
||||
|
||||
# Filesystem checkpoint manager (transparent — not a tool)
|
||||
from tools.checkpoint_manager import CheckpointManager
|
||||
self._checkpoint_mgr = CheckpointManager(
|
||||
enabled=checkpoints_enabled,
|
||||
max_snapshots=checkpoint_max_snapshots,
|
||||
)
|
||||
|
||||
# SQLite session store (optional -- provided by CLI or gateway)
|
||||
self._session_db = session_db
|
||||
if self._session_db:
|
||||
@@ -2689,6 +2700,8 @@ class AIAgent:
|
||||
except json.JSONDecodeError as e:
|
||||
logging.warning(f"Unexpected JSON error after validation: {e}")
|
||||
function_args = {}
|
||||
if not isinstance(function_args, dict):
|
||||
function_args = {}
|
||||
|
||||
if not self.quiet_mode:
|
||||
args_str = json.dumps(function_args, ensure_ascii=False)
|
||||
@@ -2702,6 +2715,18 @@ class AIAgent:
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
# Checkpoint: snapshot working dir before file-mutating tools
|
||||
if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
|
||||
try:
|
||||
file_path = function_args.get("path", "")
|
||||
if file_path:
|
||||
work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path)
|
||||
self._checkpoint_mgr.ensure_checkpoint(
|
||||
work_dir, f"before {function_name}"
|
||||
)
|
||||
except Exception:
|
||||
pass # never block tool execution
|
||||
|
||||
tool_start_time = time.time()
|
||||
|
||||
if function_name == "todo":
|
||||
@@ -3211,6 +3236,9 @@ class AIAgent:
|
||||
self.clear_interrupt()
|
||||
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
# Reset per-turn checkpoint dedup so each iteration can take one snapshot
|
||||
self._checkpoint_mgr.new_turn()
|
||||
|
||||
# Check for interrupt request (e.g., user sent new message)
|
||||
if self._interrupt_requested:
|
||||
interrupted = True
|
||||
@@ -3323,9 +3351,13 @@ class AIAgent:
|
||||
# Animated thinking spinner in quiet mode
|
||||
face = random.choice(KawaiiSpinner.KAWAII_THINKING)
|
||||
verb = random.choice(KawaiiSpinner.THINKING_VERBS)
|
||||
spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
|
||||
thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
|
||||
thinking_spinner.start()
|
||||
if self.thinking_callback:
|
||||
# CLI TUI mode: use prompt_toolkit widget instead of raw spinner
|
||||
self.thinking_callback(f"{face} {verb}...")
|
||||
else:
|
||||
spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
|
||||
thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
|
||||
thinking_spinner.start()
|
||||
|
||||
# Log request details if verbose
|
||||
if self.verbose_logging:
|
||||
@@ -3362,6 +3394,8 @@ class AIAgent:
|
||||
if thinking_spinner:
|
||||
thinking_spinner.stop("")
|
||||
thinking_spinner = None
|
||||
if self.thinking_callback:
|
||||
self.thinking_callback("")
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s")
|
||||
@@ -3402,6 +3436,8 @@ class AIAgent:
|
||||
if thinking_spinner:
|
||||
thinking_spinner.stop(f"(´;ω;`) oops, retrying...")
|
||||
thinking_spinner = None
|
||||
if self.thinking_callback:
|
||||
self.thinking_callback("")
|
||||
|
||||
# This is often rate limiting or provider returning malformed response
|
||||
retry_count += 1
|
||||
@@ -3571,6 +3607,8 @@ class AIAgent:
|
||||
if thinking_spinner:
|
||||
thinking_spinner.stop("")
|
||||
thinking_spinner = None
|
||||
if self.thinking_callback:
|
||||
self.thinking_callback("")
|
||||
api_elapsed = time.time() - api_start_time
|
||||
print(f"{self.log_prefix}⚡ Interrupted during API call.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
@@ -3583,6 +3621,8 @@ class AIAgent:
|
||||
if thinking_spinner:
|
||||
thinking_spinner.stop(f"(╥_╥) error, retrying...")
|
||||
thinking_spinner = None
|
||||
if self.thinking_callback:
|
||||
self.thinking_callback("")
|
||||
|
||||
status_code = getattr(api_error, "status_code", None)
|
||||
if (
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
---
|
||||
name: pokemon-player
|
||||
description: Play Pokémon games autonomously via headless emulation. Starts a game server, reads structured game state from RAM, makes strategic decisions, and sends button inputs — all from the terminal.
|
||||
description: Play Pokemon games autonomously via headless emulation. Starts a game server, reads structured game state from RAM, makes strategic decisions, and sends button inputs — all from the terminal.
|
||||
tags: [gaming, pokemon, emulator, pyboy, gameplay, gameboy]
|
||||
---
|
||||
# Pokémon Player
|
||||
# Pokemon Player
|
||||
|
||||
Play Pokémon games via headless emulation using the `pokemon-agent` package.
|
||||
Play Pokemon games via headless emulation using the `pokemon-agent` package.
|
||||
|
||||
## When to Use
|
||||
- User says "play pokemon", "start pokemon", "pokemon game"
|
||||
@@ -13,93 +13,155 @@ Play Pokémon games via headless emulation using the `pokemon-agent` package.
|
||||
- User wants to watch an AI play Pokemon
|
||||
- User references a ROM file (.gb, .gbc, .gba)
|
||||
|
||||
## First-Time Setup
|
||||
## Startup Procedure
|
||||
|
||||
### 1. Install the package
|
||||
```bash
|
||||
pip install pokemon-agent[dashboard] pyboy
|
||||
```
|
||||
### 1. First-time setup (clone, venv, install)
|
||||
The repo is NousResearch/pokemon-agent on GitHub. Clone it, then
|
||||
set up a Python 3.10+ virtual environment. Use uv (preferred for speed)
|
||||
to create the venv and install the package in editable mode with the
|
||||
pyboy extra. If uv is not available, fall back to python3 -m venv + pip.
|
||||
|
||||
### 2. Get the ROM
|
||||
Ask the user for their ROM file path. Do NOT attempt to download ROMs.
|
||||
On this machine it is already set up at /home/teknium/pokemon-agent
|
||||
with a venv ready — just cd there and source .venv/bin/activate.
|
||||
|
||||
### 3. Start the game server
|
||||
```bash
|
||||
pokemon-agent serve --rom <ROM_PATH> --port 8765 &
|
||||
```
|
||||
Wait 3 seconds, then verify:
|
||||
```bash
|
||||
curl -s http://localhost:8765/health
|
||||
```
|
||||
You also need a ROM file. Ask the user for theirs. On this machine
|
||||
one exists at roms/pokemon_red.gb inside that directory.
|
||||
NEVER download or provide ROM files — always ask the user.
|
||||
|
||||
### 2. Start the game server
|
||||
From inside the pokemon-agent directory with the venv activated, run
|
||||
pokemon-agent serve with --rom pointing to the ROM and --port 9876.
|
||||
Run it in the background with &.
|
||||
To resume from a saved game, add --load-state with the save name.
|
||||
Wait 4 seconds for startup, then verify with GET /health.
|
||||
|
||||
### 3. Set up live dashboard for user to watch
|
||||
Use an SSH reverse tunnel via localhost.run so the user can view
|
||||
the dashboard in their browser. Connect with ssh, forwarding local
|
||||
port 9876 to remote port 80 on nokey@localhost.run. Redirect output
|
||||
to a log file, wait 10 seconds, then grep the log for the .lhr.life
|
||||
URL. Give the user the URL with /dashboard/ appended.
|
||||
The tunnel URL changes each time — give the user the new one if restarted.
|
||||
|
||||
## Save and Load
|
||||
|
||||
### When to save
|
||||
- Every 15-20 turns of gameplay
|
||||
- ALWAYS before gym battles, rival encounters, or risky fights
|
||||
- Before entering a new town or dungeon
|
||||
- Before any action you are unsure about
|
||||
|
||||
### How to save
|
||||
POST /save with a descriptive name. Good examples:
|
||||
before_brock, route1_start, mt_moon_entrance, got_cut
|
||||
|
||||
### How to load
|
||||
POST /load with the save name.
|
||||
|
||||
### List available saves
|
||||
GET /saves returns all saved states.
|
||||
|
||||
### Loading on server startup
|
||||
Use --load-state flag when starting the server to auto-load a save.
|
||||
This is faster than loading via the API after startup.
|
||||
|
||||
## The Gameplay Loop
|
||||
|
||||
### Step 1: OBSERVE
|
||||
```bash
|
||||
curl -s http://localhost:8765/state
|
||||
```
|
||||
### Step 1: OBSERVE — check state AND take a screenshot
|
||||
GET /state for position, HP, battle, dialog.
|
||||
GET /screenshot and save to /tmp/pokemon.png, then use vision_analyze.
|
||||
Always do BOTH — RAM state gives numbers, vision gives spatial awareness.
|
||||
|
||||
### Step 2: ORIENT
|
||||
- Dialog active → advance text
|
||||
- In battle → fight
|
||||
- Party hurt → heal
|
||||
- Near objective → navigate
|
||||
- Dialog/text on screen → advance it
|
||||
- In battle → fight or run
|
||||
- Party hurt → head to Pokemon Center
|
||||
- Near objective → navigate carefully
|
||||
|
||||
### Step 3: DECIDE
|
||||
Priority order:
|
||||
1. If dialog active → a_until_dialog_end
|
||||
2. If in battle → choose best move
|
||||
3. If any Pokemon <20% HP → Pokémon Center
|
||||
4. If near story objective → navigate to it
|
||||
5. If underleveled → train in grass
|
||||
6. Otherwise → explore
|
||||
Priority: dialog > battle > heal > story objective > training > explore
|
||||
|
||||
### Step 4: ACT
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8765/action \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"actions": ["walk_up", "walk_up", "press_a"]}'
|
||||
```
|
||||
### Step 4: ACT — move 2-4 steps max, then re-check
|
||||
POST /action with a SHORT action list (2-4 actions, not 10-15).
|
||||
|
||||
Action reference:
|
||||
### Step 5: VERIFY — screenshot after every move sequence
|
||||
Take a screenshot and use vision_analyze to confirm you moved where
|
||||
intended. This is the MOST IMPORTANT step. Without vision you WILL get lost.
|
||||
|
||||
### Step 6: RECORD progress to memory with PKM: prefix
|
||||
|
||||
### Step 7: SAVE periodically
|
||||
|
||||
## Action Reference
|
||||
- press_a — confirm, talk, select
|
||||
- press_b — cancel, close menu
|
||||
- press_start — open game menu
|
||||
- walk_up/down/left/right — move one tile
|
||||
- a_until_dialog_end — advance all dialog
|
||||
- wait_60 — wait ~1 second
|
||||
- hold_b_N — hold B for N frames (use for speeding through text)
|
||||
- wait_60 — wait about 1 second (60 frames)
|
||||
- a_until_dialog_end — press A repeatedly until dialog clears
|
||||
|
||||
### Step 5: VERIFY
|
||||
Check state_after in the response. If stuck 3+ turns:
|
||||
1. Press B several times
|
||||
2. Try different directions
|
||||
3. Take screenshot and use vision_analyze
|
||||
4. Load last save if truly stuck
|
||||
## Critical Tips from Experience
|
||||
|
||||
### Step 6: RECORD
|
||||
```
|
||||
memory add: PKM:OBJECTIVE: Heading to Pewter City to challenge Brock
|
||||
memory add: PKM:PROGRESS: Got Squirtle, Got Pokedex, → Pewter City
|
||||
```
|
||||
### USE VISION CONSTANTLY
|
||||
- Take a screenshot every 2-4 movement steps
|
||||
- The RAM state tells you position and HP but NOT what is around you
|
||||
- Ledges, fences, signs, building doors, NPCs — only visible via screenshot
|
||||
- Ask the vision model specific questions: "what is one tile north of me?"
|
||||
- When stuck, always screenshot before trying random directions
|
||||
|
||||
### Step 7: SAVE
|
||||
Save every 20-30 turns and ALWAYS before gym battles:
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8765/save \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name": "before_brock"}'
|
||||
```
|
||||
### Warp Transitions Need Extra Wait Time
|
||||
When walking through a door or stairs, the screen fades to black during
|
||||
the map transition. You MUST wait for it to complete. Add 2-3 wait_60
|
||||
actions after any door/stair warp. Without waiting, the position reads
|
||||
as stale and you will think you are still in the old map.
|
||||
|
||||
### Building Exit Trap
|
||||
When you exit a building, you appear directly IN FRONT of the door.
|
||||
If you walk north, you go right back inside. ALWAYS sidestep first
|
||||
by walking left or right 2 tiles, then proceed in your intended direction.
|
||||
|
||||
### Dialog Handling
|
||||
Gen 1 text scrolls slowly letter-by-letter. To speed through dialog,
|
||||
hold B for 120 frames then press A. Repeat as needed. Holding B makes
|
||||
text display at max speed. Then press A to advance to the next line.
|
||||
The a_until_dialog_end action checks the RAM dialog flag, but this flag
|
||||
does not catch ALL text states. If dialog seems stuck, use the manual
|
||||
hold_b + press_a pattern instead and verify via screenshot.
|
||||
|
||||
### Ledges Are One-Way
|
||||
Ledges (small cliff edges) can only be jumped DOWN (south), never climbed
|
||||
UP (north). If blocked by a ledge going north, you must go left or right
|
||||
to find the gap around it. Use vision to identify which direction the
|
||||
gap is. Ask the vision model explicitly.
|
||||
|
||||
### Navigation Strategy
|
||||
- Move 2-4 steps at a time, then screenshot to check position
|
||||
- When entering a new area, screenshot immediately to orient
|
||||
- Ask the vision model "which direction to [destination]?"
|
||||
- If stuck for 3+ attempts, screenshot and re-evaluate completely
|
||||
- Do not spam 10-15 movements — you will overshoot or get stuck
|
||||
|
||||
### Running from Wild Battles
|
||||
On the battle menu, RUN is bottom-right. To reach it from the default
|
||||
cursor position (FIGHT, top-left): press down then right to move cursor
|
||||
to RUN, then press A. Wrap with hold_b to speed through text/animations.
|
||||
|
||||
### Battling (FIGHT)
|
||||
On the battle menu FIGHT is top-left (default cursor position).
|
||||
Press A to enter move selection, A again to use the first move.
|
||||
Then hold B to speed through attack animations and text.
|
||||
|
||||
## Battle Strategy
|
||||
|
||||
### Decision Tree
|
||||
1. Want to catch? → Weaken then throw Poké Ball
|
||||
1. Want to catch? → Weaken then throw Poke Ball
|
||||
2. Wild you don't need? → RUN
|
||||
3. Type advantage? → Use super-effective move
|
||||
4. No advantage? → Use strongest STAB move
|
||||
5. Low HP? → Switch or use Potion
|
||||
|
||||
### Type Chart
|
||||
### Gen 1 Type Chart (key matchups)
|
||||
- Water beats Fire, Ground, Rock
|
||||
- Fire beats Grass, Bug, Ice
|
||||
- Grass beats Water, Ground, Rock
|
||||
@@ -108,54 +170,46 @@ curl -s -X POST http://localhost:8765/save \
|
||||
- Psychic beats Fighting, Poison (dominant in Gen 1!)
|
||||
|
||||
### Gen 1 Quirks
|
||||
- Special stat is both offense AND defense for special moves
|
||||
- Psychic is overpowered (Ghost moves bugged)
|
||||
- Special stat = both offense AND defense for special moves
|
||||
- Psychic type is overpowered (Ghost moves bugged)
|
||||
- Critical hits based on Speed stat
|
||||
- Wrap/Bind prevent opponent from acting
|
||||
- Focus Energy bug: REDUCES crit rate instead of raising it
|
||||
|
||||
## Memory Conventions
|
||||
| Prefix | Purpose | Example |
|
||||
|--------|---------|---------|
|
||||
| PKM:OBJECTIVE | Current goal | Defeat Brock in Pewter City |
|
||||
| PKM:MAP | Navigation knowledge | Viridian Forest: go north |
|
||||
| PKM:OBJECTIVE | Current goal | Get Parcel from Viridian Mart |
|
||||
| PKM:MAP | Navigation knowledge | Viridian: mart is northeast |
|
||||
| PKM:STRATEGY | Battle/team plans | Need Grass type before Misty |
|
||||
| PKM:PROGRESS | Milestone tracker | ✓ Boulder Badge → Cascade Badge |
|
||||
| PKM:STUCK | Stuck situations | Got stuck in Cerulean Cave |
|
||||
| PKM:TEAM | Team notes | Squirtle is Water/Ice coverage |
|
||||
| PKM:PROGRESS | Milestone tracker | Beat rival, heading to Viridian |
|
||||
| PKM:STUCK | Stuck situations | Ledge at y=28 go right to bypass |
|
||||
| PKM:TEAM | Team notes | Squirtle Lv6, Tackle + Tail Whip |
|
||||
|
||||
## Progression Milestones
|
||||
- ☐ Choose starter
|
||||
- ☐ Deliver Oak's Parcel → receive Pokédex
|
||||
- ☐ Boulder Badge — Brock (Rock) → use Water/Grass
|
||||
- ☐ Cascade Badge — Misty (Water) → use Grass/Electric
|
||||
- ☐ Thunder Badge — Lt. Surge (Electric) → use Ground
|
||||
- ☐ Rainbow Badge — Erika (Grass) → use Fire/Ice/Flying
|
||||
- ☐ Soul Badge — Koga (Poison) → use Ground/Psychic
|
||||
- ☐ Marsh Badge — Sabrina (Psychic)
|
||||
- ☐ Volcano Badge — Blaine (Fire) → use Water/Ground
|
||||
- ☐ Earth Badge — Giovanni (Ground) → use Water/Grass/Ice
|
||||
- ☐ Elite Four → Champion!
|
||||
- Choose starter
|
||||
- Deliver Parcel from Viridian Mart, receive Pokedex
|
||||
- Boulder Badge — Brock (Rock) → use Water/Grass
|
||||
- Cascade Badge — Misty (Water) → use Grass/Electric
|
||||
- Thunder Badge — Lt. Surge (Electric) → use Ground
|
||||
- Rainbow Badge — Erika (Grass) → use Fire/Ice/Flying
|
||||
- Soul Badge — Koga (Poison) → use Ground/Psychic
|
||||
- Marsh Badge — Sabrina (Psychic) → hardest gym
|
||||
- Volcano Badge — Blaine (Fire) → use Water/Ground
|
||||
- Earth Badge — Giovanni (Ground) → use Water/Grass/Ice
|
||||
- Elite Four → Champion!
|
||||
|
||||
## Stopping Play
|
||||
1. Save the game:
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8765/save \
|
||||
-d '{"name": "session_end"}'
|
||||
```
|
||||
2. Update memory with progress
|
||||
3. Tell user: "Game saved! Say 'play pokemon' to resume."
|
||||
4. Kill the background server process
|
||||
|
||||
## Dashboard
|
||||
If `pokemon-agent[dashboard]` is installed, open:
|
||||
http://localhost:8765/dashboard
|
||||
|
||||
Live features: game screen, AI reasoning stream, team status, action log.
|
||||
1. Save the game with a descriptive name via POST /save
|
||||
2. Update memory with PKM:PROGRESS
|
||||
3. Tell user: "Game saved as [name]! Say 'play pokemon' to resume."
|
||||
4. Kill the server and tunnel background processes
|
||||
|
||||
## Pitfalls
|
||||
- NEVER download or provide ROM files — always ask the user
|
||||
- Don't send more than 15 actions per /action call
|
||||
- Always wait for dialog to clear before moving
|
||||
- Save BEFORE gym battles
|
||||
- Take screenshots sparingly — they cost vision tokens
|
||||
- Verify server is running with /health before any commands
|
||||
- NEVER download or provide ROM files
|
||||
- Do NOT send more than 4-5 actions without checking vision
|
||||
- Always sidestep after exiting buildings before going north
|
||||
- Always add wait_60 x2-3 after door/stair warps
|
||||
- Dialog detection via RAM is unreliable — verify with screenshots
|
||||
- Save BEFORE risky encounters
|
||||
- The tunnel URL changes each time you restart it
|
||||
|
||||
@@ -1098,7 +1098,7 @@ Please see the ocifs docs.
|
||||
|
||||
The path should start with https://.
|
||||
|
||||
This must be publically accessible.
|
||||
This must be publicly accessible.
|
||||
|
||||
Now that you know how to load datasets, you can learn more on how to load your specific dataset format into your target output format dataset formats docs.
|
||||
|
||||
|
||||
302
skills/mlops/training/hermes-atropos-environments/SKILL.md
Normal file
302
skills/mlops/training/hermes-atropos-environments/SKILL.md
Normal file
@@ -0,0 +1,302 @@
|
||||
---
|
||||
name: hermes-atropos-environments
|
||||
description: Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, evaluation with tools, wandb logging, and the three CLI modes (serve/process/evaluate). Use when creating, reviewing, or fixing RL environments in the hermes-agent repo.
|
||||
version: 1.1.0
|
||||
author: Hermes Agent
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [atropos, rl, environments, training, reinforcement-learning, reward-functions]
|
||||
related_skills: [axolotl, grpo-rl-training, trl-fine-tuning, lm-evaluation-harness]
|
||||
---
|
||||
|
||||
# Hermes Agent Atropos Environments
|
||||
|
||||
Guide for building RL environments in the hermes-agent repo that integrate with the Atropos training framework.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
Atropos BaseEnv (atroposlib/envs/base.py)
|
||||
└── HermesAgentBaseEnv (environments/hermes_base_env.py)
|
||||
├── Handles agent loop orchestration
|
||||
├── Handles tool resolution per group
|
||||
├── Handles ToolContext for reward verification
|
||||
└── YOUR ENVIRONMENT (environments/your_env.py)
|
||||
Only implements: setup, get_next_item, format_prompt,
|
||||
compute_reward, evaluate, wandb_log
|
||||
```
|
||||
|
||||
Hermes environments are special because they run a **multi-turn agent loop with tool calling** — not just single-turn completions. The base env handles the loop; you implement the task and scoring.
|
||||
|
||||
## File Locations
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `environments/hermes_base_env.py` | Base class with agent loop + tool resolution |
|
||||
| `environments/agent_loop.py` | `HermesAgentLoop` + `AgentResult` dataclass |
|
||||
| `environments/tool_context.py` | `ToolContext` for reward verification |
|
||||
| `environments/tool_call_parsers.py` | Phase 2 tool call parsers (hermes, mistral, etc.) |
|
||||
| `environments/your_env.py` | Your environment implementation |
|
||||
|
||||
## Inference Setup — Ask the User First
|
||||
|
||||
**IMPORTANT:** Before running any test, evaluation, or data generation command, always ask the user how they want to handle inference. Do NOT assume OpenRouter or any specific endpoint. Present these options:
|
||||
|
||||
1. **OpenRouter** — Ask which model they want to use (e.g., `anthropic/claude-sonnet-4.5`, `google/gemini-2.5-pro`, `meta-llama/llama-3.3-70b-instruct`, etc.). Requires `OPENROUTER_API_KEY` in environment.
|
||||
2. **Self-hosted VLLM endpoint** — Ask for their base URL (e.g., `http://localhost:8000/v1`) and model name. Set `--openai.server_type vllm`.
|
||||
3. **Other OpenAI-compatible API** — Ask for the base URL, model name, and any required API key. Set `--openai.server_type openai` and `--openai.health_check false`.
|
||||
4. **Local Atropos training server** — For `serve` mode with a live training loop. Default `http://localhost:8000/v1`.
|
||||
|
||||
Once the user tells you their setup, use those values in all CLI commands for that session. Example prompts:
|
||||
|
||||
> "Before I run this, how would you like to handle inference?
|
||||
> 1. OpenRouter (I'll need your preferred model, e.g. claude-sonnet-4.5)
|
||||
> 2. A self-hosted VLLM endpoint (give me the URL and model name)
|
||||
> 3. Another OpenAI-compatible API (give me the URL, model, and any auth details)
|
||||
> 4. Local Atropos training server (serve mode)"
|
||||
|
||||
### Key flags by provider:
|
||||
|
||||
| Provider | `--openai.server_type` | `--openai.health_check` | `--openai.api_key` |
|
||||
|----------|----------------------|------------------------|-------------------|
|
||||
| OpenRouter | `openai` | `false` | `$OPENROUTER_API_KEY` |
|
||||
| VLLM (self-hosted) | `vllm` | (default) | (not needed) |
|
||||
| Other OpenAI-compatible | `openai` | `false` | As needed |
|
||||
| Local Atropos | (default) | (default) | (not needed) |
|
||||
|
||||
## Required Methods
|
||||
|
||||
### 1. `setup()` — Load dataset and initialize state
|
||||
|
||||
```python
|
||||
async def setup(self) -> None:
|
||||
"""Called once at startup. Load datasets, initialize state."""
|
||||
# Try HuggingFace first, fallback to built-in samples
|
||||
try:
|
||||
from datasets import load_dataset
|
||||
ds = load_dataset("your/dataset", split="test")
|
||||
self._items = [...]
|
||||
except Exception:
|
||||
self._items = BUILTIN_SAMPLES
|
||||
|
||||
# Always split into train/eval
|
||||
random.shuffle(self._items)
|
||||
eval_size = max(20, int(len(self._items) * 0.1))
|
||||
self._eval_items = self._items[:eval_size]
|
||||
self._items = self._items[eval_size:]
|
||||
```
|
||||
|
||||
### 2. `get_next_item()` — Return next training item
|
||||
|
||||
```python
|
||||
async def get_next_item(self) -> dict:
|
||||
"""Return next item, cycling through dataset."""
|
||||
item = self._items[self._index % len(self._items)]
|
||||
self._index += 1
|
||||
return item
|
||||
```
|
||||
|
||||
### 3. `format_prompt(item)` — Convert item to user message
|
||||
|
||||
```python
|
||||
def format_prompt(self, item: dict) -> str:
|
||||
"""Convert a dataset item into the user-facing prompt."""
|
||||
return f"Research this question: {item['question']}"
|
||||
```
|
||||
|
||||
### 4. `compute_reward(item, result, ctx)` — Score the rollout
|
||||
|
||||
**CRITICAL**: `result` is an `AgentResult`, NOT a dict. It has these attributes:
|
||||
- `result.messages` — List of message dicts (OpenAI format)
|
||||
- `result.turns_used` — Number of LLM calls made
|
||||
- `result.finished_naturally` — True if model stopped voluntarily
|
||||
- `result.tool_errors` — List of ToolError objects
|
||||
|
||||
**AgentResult does NOT have**: `final_response`, `tool_calls`, `tools_used`.
|
||||
You must extract these from `result.messages`:
|
||||
|
||||
```python
|
||||
async def compute_reward(self, item, result: AgentResult, ctx: ToolContext) -> float:
|
||||
# Extract final response (last assistant message with content)
|
||||
final_response = ""
|
||||
tools_used = []
|
||||
for msg in reversed(result.messages):
|
||||
if msg.get("role") == "assistant" and msg.get("content") and not final_response:
|
||||
final_response = msg["content"]
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
for tc in msg["tool_calls"]:
|
||||
fn = tc.get("function", {}) if isinstance(tc, dict) else {}
|
||||
name = fn.get("name", "")
|
||||
if name:
|
||||
tools_used.append(name)
|
||||
|
||||
# Score using LLM judge, heuristic, or ToolContext verification
|
||||
correctness = await self._llm_judge(item, final_response)
|
||||
return correctness
|
||||
```
|
||||
|
||||
`ctx` (ToolContext) gives you terminal/file access to the agent's sandbox for verification:
|
||||
```python
|
||||
# Run tests in the agent's sandbox
|
||||
result = ctx.terminal("pytest /workspace/test.py")
|
||||
return 1.0 if result["exit_code"] == 0 else 0.0
|
||||
```
|
||||
|
||||
### 5. `evaluate()` — Periodic evaluation with full agent loop
|
||||
|
||||
**MUST use the full agent loop with tools**, not single-turn chat_completion.
|
||||
The whole point of hermes-agent environments is agentic evaluation:
|
||||
|
||||
```python
|
||||
async def evaluate(self, *args, **kwargs) -> None:
|
||||
import time, uuid
|
||||
from environments.agent_loop import HermesAgentLoop
|
||||
from environments.tool_context import ToolContext
|
||||
|
||||
start_time = time.time()
|
||||
tools, valid_names = self._resolve_tools_for_group()
|
||||
samples = []
|
||||
|
||||
for item in self._eval_items[:self.config.eval_size]:
|
||||
task_id = str(uuid.uuid4())
|
||||
messages = []
|
||||
if self.config.system_prompt:
|
||||
messages.append({"role": "system", "content": self.config.system_prompt})
|
||||
messages.append({"role": "user", "content": self.format_prompt(item)})
|
||||
|
||||
agent = HermesAgentLoop(
|
||||
server=self.server,
|
||||
tool_schemas=tools,
|
||||
valid_tool_names=valid_names,
|
||||
max_turns=self.config.max_agent_turns,
|
||||
task_id=task_id,
|
||||
temperature=0.0, # Deterministic for eval
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
ctx = ToolContext(task_id)
|
||||
try:
|
||||
reward = await self.compute_reward(item, result, ctx)
|
||||
finally:
|
||||
ctx.cleanup()
|
||||
|
||||
samples.append({"prompt": ..., "response": ..., "reward": reward})
|
||||
|
||||
eval_metrics = {"eval/mean_reward": ...}
|
||||
await self.evaluate_log(metrics=eval_metrics, samples=samples,
|
||||
start_time=start_time, end_time=time.time())
|
||||
```
|
||||
|
||||
### 6. `wandb_log()` — Custom metrics logging
|
||||
|
||||
Always call `super().wandb_log()` at the end:
|
||||
|
||||
```python
|
||||
async def wandb_log(self, wandb_metrics=None):
|
||||
if wandb_metrics is None:
|
||||
wandb_metrics = {}
|
||||
if self._reward_buffer:
|
||||
n = len(self._reward_buffer)
|
||||
wandb_metrics["train/mean_reward"] = sum(self._reward_buffer) / n
|
||||
self._reward_buffer.clear()
|
||||
await super().wandb_log(wandb_metrics) # MUST call super
|
||||
```
|
||||
|
||||
**Pitfall**: `compute_reward` appends to metric buffers. During eval, this pollutes training metrics. Roll back buffer entries added during eval.
|
||||
|
||||
## Config Class
|
||||
|
||||
Always create a custom config subclass with Pydantic Field descriptors. Key inherited fields you can tune: `enabled_toolsets`, `max_agent_turns`, `agent_temperature`, `system_prompt`, `terminal_backend`, `group_size`, `steps_per_eval`, `total_steps`.
|
||||
|
||||
## config_init() — Default Configuration
|
||||
|
||||
Classmethod returning `(YourEnvConfig, [APIServerConfig(...)])`. Set server_type to "openai" for OpenRouter/external APIs. Load API key from environment variable.
|
||||
|
||||
## Three CLI Modes
|
||||
|
||||
```bash
|
||||
# SERVE — Full training loop (connects to Atropos API server)
|
||||
python environments/my_env.py serve --openai.base_url http://localhost:8000/v1
|
||||
|
||||
# PROCESS — Offline data generation (saves JSONL)
|
||||
python environments/my_env.py process --env.total_steps 10 --env.group_size 1 \
|
||||
--env.use_wandb false --env.data_path_to_save_groups output.jsonl \
|
||||
--openai.base_url "<USER_BASE_URL>" \
|
||||
--openai.model_name "<USER_MODEL>" \
|
||||
--openai.server_type <USER_SERVER_TYPE> --openai.health_check false
|
||||
|
||||
# EVALUATE — Standalone eval (runs setup + evaluate only)
|
||||
python environments/my_env.py evaluate --env.eval_size 20 \
|
||||
--env.data_dir_to_save_evals /tmp/eval_results \
|
||||
--openai.base_url "<USER_BASE_URL>" \
|
||||
--openai.model_name "<USER_MODEL>" \
|
||||
--openai.server_type <USER_SERVER_TYPE> --openai.health_check false
|
||||
```
|
||||
|
||||
Config priority: CLI args > YAML file > config_init() defaults.
|
||||
|
||||
## Common Pitfalls
|
||||
|
||||
1. **AgentResult has .messages, not .final_response** — Extract the final response by iterating reversed(result.messages) looking for the last assistant message with content.
|
||||
|
||||
2. **evaluate() must use HermesAgentLoop, not chat_completion** — Single-turn chat_completion has no tools. The whole point of hermes-agent benchmarks is agentic evaluation with tool use.
|
||||
|
||||
3. **Don't call _llm_judge twice** — If compute_reward already calls it, extract the score from the buffer instead of calling judge separately in evaluate().
|
||||
|
||||
4. **Eval pollutes training buffers** — compute_reward appends to metric buffers. During eval, roll back buffer entries to keep training metrics clean.
|
||||
|
||||
5. **Always set health_check=false for OpenRouter** — OpenRouter has no /health endpoint.
|
||||
|
||||
6. **Set data_dir_to_save_evals in evaluate mode** — Without it, results aren't saved.
|
||||
|
||||
7. **default_toolsets class variable vs enabled_toolsets config** — The class variable is a hint; the config field is what actually controls tool resolution.
|
||||
|
||||
8. **Tool call parsing in messages** — Tool calls are dicts with `{"function": {"name": ..., "arguments": ...}}`. Always check `isinstance(tc, dict)`.
|
||||
|
||||
9. **ToolContext.cleanup()** — Always call in a finally block to release sandbox resources.
|
||||
|
||||
10. **server_type must be "openai" for external APIs** — Without it, Atropos assumes a local VLLM server.
|
||||
|
||||
11. **Always ask the user for their inference setup** — Never hardcode or assume a specific provider/model. See the "Inference Setup" section above.
|
||||
|
||||
## Reward Function Patterns
|
||||
|
||||
### LLM Judge (for open-ended tasks)
|
||||
Use `self.server.chat_completion()` with a scoring prompt. Parse JSON response for score float. Always include a heuristic fallback (keyword overlap) for when the judge call fails.
|
||||
|
||||
### Binary Verification (for code/terminal tasks)
|
||||
Use `ctx.terminal("pytest test.py -q")` to run tests in the agent's sandbox. Return 1.0 for pass, 0.0 for fail.
|
||||
|
||||
### Multi-Signal (combine multiple indicators)
|
||||
Weight correctness (0.6) + tool usage (0.2) + efficiency (0.2) + optional bonuses. Clamp to [0, 1].
|
||||
|
||||
## Testing Your Environment
|
||||
|
||||
1. **Import test**: `python -c "from environments.my_env import MyEnv; print('OK')"`
|
||||
2. **Ask the user for inference setup** (see "Inference Setup" section above)
|
||||
3. **Process mode** (1 item): Verify JSONL output has valid tokens, masks, scores
|
||||
4. **Evaluate mode**: Verify full agent loop runs with tools, metrics logged correctly
|
||||
5. **Check reward range**: Scores should be in [0, 1], not all identical
|
||||
|
||||
## Minimum Implementation Checklist
|
||||
|
||||
```python
|
||||
class MyEnv(HermesAgentBaseEnv):
|
||||
name = "my-env"
|
||||
env_config_cls = MyEnvConfig
|
||||
|
||||
@classmethod
|
||||
def config_init(cls): ... # Default server + env config
|
||||
async def setup(self): ... # Load dataset + train/eval split
|
||||
async def get_next_item(self): ... # Cycle through training items
|
||||
def format_prompt(self, item): ... # Item → user message string
|
||||
async def compute_reward(self, item, result, ctx): ... # Score rollout
|
||||
async def evaluate(self, *args, **kwargs): ... # Full agent loop eval
|
||||
async def wandb_log(self, metrics=None): ... # Custom metrics + super()
|
||||
|
||||
if __name__ == "__main__":
|
||||
MyEnv.cli()
|
||||
```
|
||||
@@ -0,0 +1,59 @@
|
||||
# AgentResult Fields Reference
|
||||
|
||||
`AgentResult` is defined in `environments/agent_loop.py` as a dataclass.
|
||||
|
||||
## Fields
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `messages` | `List[Dict[str, Any]]` | Full conversation history in OpenAI message format |
|
||||
| `managed_state` | `Optional[Dict]` | ManagedServer.get_state() if Phase 2, else None |
|
||||
| `turns_used` | `int` | Number of LLM calls made during the loop |
|
||||
| `finished_naturally` | `bool` | True if model stopped calling tools on its own |
|
||||
| `reasoning_per_turn` | `List[Optional[str]]` | Extracted reasoning content per turn |
|
||||
| `tool_errors` | `List[ToolError]` | Tool errors encountered during the loop |
|
||||
|
||||
## ToolError Fields
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `turn` | `int` | Which turn the error occurred |
|
||||
| `tool_name` | `str` | Name of the tool that failed |
|
||||
| `arguments` | `str` | Arguments passed to the tool |
|
||||
| `error` | `str` | Error message |
|
||||
| `tool_result` | `str` | The result returned to the model |
|
||||
|
||||
## Extracting Data from Messages
|
||||
|
||||
Messages follow OpenAI format. Common patterns:
|
||||
|
||||
```python
|
||||
# Get final assistant response
|
||||
for msg in reversed(result.messages):
|
||||
if msg.get("role") == "assistant" and msg.get("content"):
|
||||
final_response = msg["content"]
|
||||
break
|
||||
|
||||
# Get all tool names used
|
||||
tools = []
|
||||
for msg in result.messages:
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
for tc in msg["tool_calls"]:
|
||||
fn = tc.get("function", {}) if isinstance(tc, dict) else {}
|
||||
tools.append(fn.get("name", ""))
|
||||
|
||||
# Get tool results
|
||||
for msg in result.messages:
|
||||
if msg.get("role") == "tool":
|
||||
tool_output = msg.get("content", "")
|
||||
call_id = msg.get("tool_call_id", "")
|
||||
```
|
||||
|
||||
## Fields that DO NOT EXIST
|
||||
|
||||
These are common mistakes — AgentResult does NOT have:
|
||||
- `final_response` — extract from messages
|
||||
- `tool_calls` — extract from messages
|
||||
- `tools_used` — extract from messages
|
||||
- `output` — extract from messages
|
||||
- `response` — extract from messages
|
||||
@@ -0,0 +1,65 @@
|
||||
# Atropos BaseEnv Reference
|
||||
|
||||
Source: `atroposlib/envs/base.py` (~2124 lines)
|
||||
|
||||
## Abstract Methods (MUST implement)
|
||||
|
||||
| Method | Signature | Description |
|
||||
|--------|-----------|-------------|
|
||||
| `get_next_item()` | `async def get_next_item(self) -> Item` | Return next item for trajectory. Return None to pause. |
|
||||
| `evaluate()` | `async def evaluate(self, *args, **kwargs)` | Called every steps_per_eval steps. |
|
||||
| `setup()` | `async def setup(self)` | Called once at start. Load datasets, init models. |
|
||||
| `collect_trajectory()` | `async def collect_trajectory(self, item) -> Tuple[Optional[ScoredDataItem], List[Item]]` | Single rollout. Or override collect_trajectories instead. |
|
||||
|
||||
## Overridable Methods
|
||||
|
||||
| Method | Default Behavior | Override When |
|
||||
|--------|-----------------|---------------|
|
||||
| `collect_trajectories()` | Runs collect_trajectory group_size times in parallel | Batch generation, MCTS, coupled rollouts |
|
||||
| `wandb_log()` | Logs completion lengths, rollout table, perf stats | Add custom metrics (always call super) |
|
||||
| `config_init()` | Returns (env_config_cls(), ServerBaseline()) | Custom defaults + server configs |
|
||||
| `postprocess_histories()` | Passthrough | Final processing before sending to trainer |
|
||||
| `save_checkpoint()` | Saves JSON to checkpoint_dir | Custom serialization |
|
||||
| `cleanup()` | No-op | Release resources after each rollout |
|
||||
|
||||
## ScoredDataGroup Structure
|
||||
|
||||
```python
|
||||
ScoredDataGroup = TypedDict with:
|
||||
tokens: List[List[int]] # Token IDs per rollout
|
||||
masks: List[List[int]] # -100=prompt, token_id=completion
|
||||
scores: List[float] # Score per rollout
|
||||
advantages: Optional[...] # Per-token advantages
|
||||
ref_logprobs: Optional[...] # Reference model logprobs
|
||||
messages: Optional[...] # OpenAI-format messages
|
||||
inference_logprobs: Optional[...] # Inference logprobs
|
||||
```
|
||||
|
||||
## BaseEnvConfig Key Fields
|
||||
|
||||
| Field | Default | Description |
|
||||
|-------|---------|-------------|
|
||||
| `group_size` | 4 | Responses grouped for scoring |
|
||||
| `steps_per_eval` | 100 | Steps between evaluations |
|
||||
| `max_token_length` | 2048 | Max token length for generations |
|
||||
| `total_steps` | 1000 | Total training steps |
|
||||
| `use_wandb` | True | Enable wandb logging |
|
||||
| `tokenizer_name` | DeepHermes-3 | Tokenizer for token encoding |
|
||||
| `ensure_scores_are_not_same` | True | Skip groups with identical scores |
|
||||
| `worker_timeout` | 600 | Task timeout seconds |
|
||||
|
||||
## Data Flow
|
||||
|
||||
```
|
||||
env_manager() → add_train_workers() → handle_env()
|
||||
→ collect_trajectories() → postprocess_histories()
|
||||
→ handle_send_to_api() → training server
|
||||
```
|
||||
|
||||
## Atropos Environment Statistics (82 environments analyzed)
|
||||
|
||||
- 95% implement setup, collect_trajectories, evaluate, get_next_item
|
||||
- 76% override wandb_log
|
||||
- 54% have custom config class
|
||||
- Most use collect_trajectories (plural), not collect_trajectory (singular)
|
||||
- Common reward patterns: LLM-judge (~40), regex-extract (~35), code-exec (~12)
|
||||
@@ -0,0 +1,199 @@
|
||||
# Usage Patterns — Testing Environments and Evaluating Models
|
||||
|
||||
## Pattern 1: Test Your Environment Works (process mode)
|
||||
|
||||
Use `process` mode to verify your environment runs end-to-end before
|
||||
committing. This generates trajectories without needing an Atropos
|
||||
training server.
|
||||
|
||||
**Before running:** Ask the user for their inference setup (see SKILL.md "Inference Setup" section). Replace `<BASE_URL>`, `<MODEL>`, and `<SERVER_TYPE>` below with their chosen values.
|
||||
|
||||
### Step 1: Run 1 trajectory
|
||||
|
||||
```bash
|
||||
cd ~/.hermes/hermes-agent
|
||||
source .venv/bin/activate
|
||||
|
||||
python environments/your_env.py process \
|
||||
--env.total_steps 1 \
|
||||
--env.group_size 1 \
|
||||
--env.use_wandb false \
|
||||
--env.data_path_to_save_groups /tmp/test_output.jsonl \
|
||||
--openai.base_url "<BASE_URL>" \
|
||||
--openai.model_name "<MODEL>" \
|
||||
--openai.server_type <SERVER_TYPE> \
|
||||
--openai.health_check false
|
||||
```
|
||||
|
||||
### Step 2: Verify the output
|
||||
|
||||
```python
|
||||
import json
|
||||
for line in open("/tmp/test_output.jsonl"):
|
||||
data = json.loads(line)
|
||||
print(f"Scores: {data.get('scores', [])}")
|
||||
print(f"Token sequences: {len(data.get('tokens', []))}")
|
||||
# Check messages include tool calls
|
||||
for msg_list in data.get("messages", []):
|
||||
roles = [m.get("role") for m in msg_list]
|
||||
print(f"Roles: {roles}")
|
||||
for m in reversed(msg_list):
|
||||
if m.get("role") == "assistant" and m.get("content"):
|
||||
print(f"Response: {m['content'][:200]}...")
|
||||
break
|
||||
```
|
||||
|
||||
### What to check:
|
||||
- **Scores are not all 0.0** — if so, compute_reward is broken
|
||||
- **Scores are in [0, 1]** — not negative, not >1
|
||||
- **Messages include "tool" role entries** — agent used tools
|
||||
- **Token sequences are non-empty**
|
||||
- **An HTML visualization is generated** next to the .jsonl
|
||||
|
||||
### Common failures:
|
||||
- `'AgentResult' object has no attribute 'X'` — accessing a field that doesn't exist. See agentresult-fields.md.
|
||||
- Score always 0.0 — reward function erroring silently
|
||||
- Score always 1.0 — verification too lenient or not running
|
||||
|
||||
|
||||
## Pattern 2: Evaluate a Model (evaluate mode)
|
||||
|
||||
Use `evaluate` mode to benchmark a model on your environment's eval
|
||||
split. This runs the full agent loop with tools for each eval item.
|
||||
|
||||
### Step 1: Run evaluation
|
||||
|
||||
```bash
|
||||
python environments/your_env.py evaluate \
|
||||
--env.eval_size 20 \
|
||||
--env.use_wandb false \
|
||||
--env.data_dir_to_save_evals /tmp/eval_results \
|
||||
--openai.base_url "<BASE_URL>" \
|
||||
--openai.model_name "<MODEL>" \
|
||||
--openai.server_type <SERVER_TYPE> \
|
||||
--openai.health_check false
|
||||
```
|
||||
|
||||
### Step 2: Read results
|
||||
|
||||
Stdout shows a lighteval-compatible table:
|
||||
|
||||
```
|
||||
Evaluation Results: your-env_eval
|
||||
|Metric | Value|
|
||||
|mean correctness| 0.850 |
|
||||
|mean reward | 0.920 |
|
||||
|mean tool calls | 4.300 |
|
||||
|n items | 20 |
|
||||
Evaluation completed in 367 seconds
|
||||
```
|
||||
|
||||
JSON results saved to the eval directory:
|
||||
|
||||
```python
|
||||
import json
|
||||
data = json.load(open("/tmp/eval_results/metrics.json"))
|
||||
for metric, value in data["results"]["all"].items():
|
||||
print(f"{metric}: {value}")
|
||||
```
|
||||
|
||||
### Step 3: Compare models
|
||||
|
||||
Run evaluate with different models and compare the metrics.json files.
|
||||
|
||||
### What to check:
|
||||
- **"data_dir_to_save_evals is not set"** — you forgot the flag, results won't be saved
|
||||
- **Tool usage rate = 0** — evaluate() is using chat_completion instead of HermesAgentLoop
|
||||
- **All scores identical** — judge failing, falling back to heuristic
|
||||
- **Very slow** — each item runs a full agent loop (~30-90s). Use `--env.eval_size 5` for quick checks.
|
||||
|
||||
|
||||
## Pattern 3: Generate Training Data (process mode, larger scale)
|
||||
|
||||
Generate trajectory data for offline training or analysis:
|
||||
|
||||
```bash
|
||||
python environments/your_env.py process \
|
||||
--env.total_steps 50 \
|
||||
--env.group_size 4 \
|
||||
--env.use_wandb false \
|
||||
--env.data_path_to_save_groups data/trajectories.jsonl \
|
||||
--openai.base_url "<BASE_URL>" \
|
||||
--openai.model_name "<MODEL>" \
|
||||
--openai.server_type <SERVER_TYPE> \
|
||||
--openai.health_check false
|
||||
```
|
||||
|
||||
### Analyze the distribution:
|
||||
|
||||
```python
|
||||
import json
|
||||
scores = []
|
||||
for line in open("data/trajectories.jsonl"):
|
||||
data = json.loads(line)
|
||||
scores.extend(data.get("scores", []))
|
||||
|
||||
print(f"Total: {len(scores)}, Mean: {sum(scores)/len(scores):.3f}")
|
||||
for bucket in [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]:
|
||||
count = sum(1 for s in scores if abs(s - bucket) < 0.1)
|
||||
print(f" {bucket:.1f}: {'█' * count} ({count})")
|
||||
```
|
||||
|
||||
### What to check:
|
||||
- **Score distribution has variance** — RL needs score variance. All-same scores are useless.
|
||||
|
||||
|
||||
## Pattern 4: Full RL Training (serve mode)
|
||||
|
||||
For actual RL training with Atropos:
|
||||
|
||||
```bash
|
||||
# Terminal 1: Start Atropos API server
|
||||
run-api
|
||||
|
||||
# Terminal 2: Start your environment
|
||||
python environments/your_env.py serve \
|
||||
--config environments/your_env/default.yaml
|
||||
```
|
||||
|
||||
For Phase 2 with VLLM:
|
||||
|
||||
```bash
|
||||
# Terminal 1: VLLM server
|
||||
python -m vllm.entrypoints.openai.api_server --model your-model --port 8000
|
||||
|
||||
# Terminal 2: Atropos API
|
||||
run-api
|
||||
|
||||
# Terminal 3: Environment
|
||||
python environments/your_env.py serve \
|
||||
--openai.base_url http://localhost:8000/v1 \
|
||||
--openai.model_name your-model \
|
||||
--openai.server_type vllm
|
||||
```
|
||||
|
||||
|
||||
## Pattern 5: Quick Smoke Test
|
||||
|
||||
Verify imports and config before spending money on API calls:
|
||||
|
||||
```python
|
||||
from environments.your_env import YourEnv
|
||||
print(f"Name: {YourEnv.name}")
|
||||
cfg, servers = YourEnv.config_init()
|
||||
print(f"Toolsets: {cfg.enabled_toolsets}")
|
||||
print(f"Server: {servers[0].model_name}")
|
||||
print("All imports OK")
|
||||
```
|
||||
|
||||
|
||||
## Timing Expectations
|
||||
|
||||
| Mode | Items | Time per item | Total |
|
||||
|------|-------|--------------|-------|
|
||||
| process (1 item) | 1 | 30-90s | ~1 min |
|
||||
| evaluate (5 items) | 5 | 30-90s | ~5 min |
|
||||
| evaluate (20 items) | 20 | 30-90s | ~15-30 min |
|
||||
| process (50 items) | 50 | 30-90s | ~30-75 min |
|
||||
|
||||
Times are for cloud APIs with Claude Sonnet-class models. Local models may be faster or slower depending on hardware.
|
||||
@@ -176,14 +176,18 @@ class TestVisionClientFallback:
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.3-codex"
|
||||
|
||||
def test_vision_auto_skips_custom_endpoint(self, monkeypatch):
|
||||
"""Custom endpoint is skipped in vision auto mode."""
|
||||
def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
|
||||
"""Custom endpoint is used as fallback in vision auto mode.
|
||||
|
||||
Many local models (Qwen-VL, LLaVA, etc.) support vision.
|
||||
When no OpenRouter/Nous/Codex is available, try the custom endpoint.
|
||||
"""
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
assert client is not None # Custom endpoint picked up as fallback
|
||||
|
||||
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
@@ -160,3 +160,27 @@ class TestMirrorToSession:
|
||||
result = mirror_to_session("telegram", "123", "msg")
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestAppendToSqlite:
|
||||
def test_connection_is_closed_after_use(self, tmp_path):
|
||||
"""Verify _append_to_sqlite closes the SessionDB connection."""
|
||||
from gateway.mirror import _append_to_sqlite
|
||||
mock_db = MagicMock()
|
||||
|
||||
with patch("hermes_state.SessionDB", return_value=mock_db):
|
||||
_append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"})
|
||||
|
||||
mock_db.append_message.assert_called_once()
|
||||
mock_db.close.assert_called_once()
|
||||
|
||||
def test_connection_closed_even_on_error(self, tmp_path):
|
||||
"""Verify connection is closed even when append_message raises."""
|
||||
from gateway.mirror import _append_to_sqlite
|
||||
mock_db = MagicMock()
|
||||
mock_db.append_message.side_effect = Exception("db error")
|
||||
|
||||
with patch("hermes_state.SessionDB", return_value=mock_db):
|
||||
_append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"})
|
||||
|
||||
mock_db.close.assert_called_once()
|
||||
|
||||
532
tests/gateway/test_slack.py
Normal file
532
tests/gateway/test_slack.py
Normal file
@@ -0,0 +1,532 @@
|
||||
"""
|
||||
Tests for Slack platform adapter.
|
||||
|
||||
Covers: app_mention handler, send_document, send_video,
|
||||
incoming document handling, message routing.
|
||||
|
||||
Note: slack-bolt may not be installed in the test environment.
|
||||
We mock the slack modules at import time to avoid collection errors.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mock the slack-bolt package if it's not installed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ensure_slack_mock():
|
||||
"""Install mock slack modules so SlackAdapter can be imported."""
|
||||
if "slack_bolt" in sys.modules and hasattr(sys.modules["slack_bolt"], "__file__"):
|
||||
return # Real library installed
|
||||
|
||||
slack_bolt = MagicMock()
|
||||
slack_bolt.async_app.AsyncApp = MagicMock
|
||||
slack_bolt.adapter.socket_mode.async_handler.AsyncSocketModeHandler = MagicMock
|
||||
|
||||
slack_sdk = MagicMock()
|
||||
slack_sdk.web.async_client.AsyncWebClient = MagicMock
|
||||
|
||||
for name, mod in [
|
||||
("slack_bolt", slack_bolt),
|
||||
("slack_bolt.async_app", slack_bolt.async_app),
|
||||
("slack_bolt.adapter", slack_bolt.adapter),
|
||||
("slack_bolt.adapter.socket_mode", slack_bolt.adapter.socket_mode),
|
||||
("slack_bolt.adapter.socket_mode.async_handler", slack_bolt.adapter.socket_mode.async_handler),
|
||||
("slack_sdk", slack_sdk),
|
||||
("slack_sdk.web", slack_sdk.web),
|
||||
("slack_sdk.web.async_client", slack_sdk.web.async_client),
|
||||
]:
|
||||
sys.modules.setdefault(name, mod)
|
||||
|
||||
|
||||
_ensure_slack_mock()
|
||||
|
||||
# Patch SLACK_AVAILABLE before importing the adapter
|
||||
import gateway.platforms.slack as _slack_mod
|
||||
_slack_mod.SLACK_AVAILABLE = True
|
||||
|
||||
from gateway.platforms.slack import SlackAdapter # noqa: E402
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture()
|
||||
def adapter():
|
||||
config = PlatformConfig(enabled=True, token="xoxb-fake-token")
|
||||
a = SlackAdapter(config)
|
||||
# Mock the Slack app client
|
||||
a._app = MagicMock()
|
||||
a._app.client = AsyncMock()
|
||||
a._bot_user_id = "U_BOT"
|
||||
a._running = True
|
||||
# Capture events instead of processing them
|
||||
a.handle_message = AsyncMock()
|
||||
return a
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _redirect_cache(tmp_path, monkeypatch):
|
||||
"""Point document cache to tmp_path so tests don't touch ~/.hermes."""
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.base.DOCUMENT_CACHE_DIR", tmp_path / "doc_cache"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestAppMentionHandler
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAppMentionHandler:
|
||||
"""Verify that the app_mention event handler is registered."""
|
||||
|
||||
def test_app_mention_registered_on_connect(self):
|
||||
"""connect() should register both 'message' and 'app_mention' handlers."""
|
||||
config = PlatformConfig(enabled=True, token="xoxb-fake")
|
||||
adapter = SlackAdapter(config)
|
||||
|
||||
# Track which events get registered
|
||||
registered_events = []
|
||||
registered_commands = []
|
||||
|
||||
mock_app = MagicMock()
|
||||
|
||||
def mock_event(event_type):
|
||||
def decorator(fn):
|
||||
registered_events.append(event_type)
|
||||
return fn
|
||||
return decorator
|
||||
|
||||
def mock_command(cmd):
|
||||
def decorator(fn):
|
||||
registered_commands.append(cmd)
|
||||
return fn
|
||||
return decorator
|
||||
|
||||
mock_app.event = mock_event
|
||||
mock_app.command = mock_command
|
||||
mock_app.client = AsyncMock()
|
||||
mock_app.client.auth_test = AsyncMock(return_value={
|
||||
"user_id": "U_BOT",
|
||||
"user": "testbot",
|
||||
})
|
||||
|
||||
with patch.object(_slack_mod, "AsyncApp", return_value=mock_app), \
|
||||
patch.object(_slack_mod, "AsyncSocketModeHandler", return_value=MagicMock()), \
|
||||
patch.dict(os.environ, {"SLACK_APP_TOKEN": "xapp-fake"}), \
|
||||
patch("asyncio.create_task"):
|
||||
asyncio.get_event_loop().run_until_complete(adapter.connect())
|
||||
|
||||
assert "message" in registered_events
|
||||
assert "app_mention" in registered_events
|
||||
assert "/hermes" in registered_commands
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestSendDocument
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSendDocument:
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_document_success(self, adapter, tmp_path):
|
||||
test_file = tmp_path / "report.pdf"
|
||||
test_file.write_bytes(b"%PDF-1.4 fake content")
|
||||
|
||||
adapter._app.client.files_upload_v2 = AsyncMock(return_value={"ok": True})
|
||||
|
||||
result = await adapter.send_document(
|
||||
chat_id="C123",
|
||||
file_path=str(test_file),
|
||||
caption="Here's the report",
|
||||
)
|
||||
|
||||
assert result.success
|
||||
adapter._app.client.files_upload_v2.assert_called_once()
|
||||
call_kwargs = adapter._app.client.files_upload_v2.call_args[1]
|
||||
assert call_kwargs["channel"] == "C123"
|
||||
assert call_kwargs["file"] == str(test_file)
|
||||
assert call_kwargs["filename"] == "report.pdf"
|
||||
assert call_kwargs["initial_comment"] == "Here's the report"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_document_custom_name(self, adapter, tmp_path):
|
||||
test_file = tmp_path / "data.csv"
|
||||
test_file.write_bytes(b"a,b,c\n1,2,3")
|
||||
|
||||
adapter._app.client.files_upload_v2 = AsyncMock(return_value={"ok": True})
|
||||
|
||||
result = await adapter.send_document(
|
||||
chat_id="C123",
|
||||
file_path=str(test_file),
|
||||
file_name="quarterly-report.csv",
|
||||
)
|
||||
|
||||
assert result.success
|
||||
call_kwargs = adapter._app.client.files_upload_v2.call_args[1]
|
||||
assert call_kwargs["filename"] == "quarterly-report.csv"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_document_missing_file(self, adapter):
|
||||
result = await adapter.send_document(
|
||||
chat_id="C123",
|
||||
file_path="/nonexistent/file.pdf",
|
||||
)
|
||||
|
||||
assert not result.success
|
||||
assert "not found" in result.error.lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_document_not_connected(self, adapter):
|
||||
adapter._app = None
|
||||
result = await adapter.send_document(
|
||||
chat_id="C123",
|
||||
file_path="/some/file.pdf",
|
||||
)
|
||||
|
||||
assert not result.success
|
||||
assert "Not connected" in result.error
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_document_api_error_falls_back(self, adapter, tmp_path):
|
||||
test_file = tmp_path / "doc.pdf"
|
||||
test_file.write_bytes(b"content")
|
||||
|
||||
adapter._app.client.files_upload_v2 = AsyncMock(
|
||||
side_effect=RuntimeError("Slack API error")
|
||||
)
|
||||
|
||||
# Should fall back to base class (text message)
|
||||
result = await adapter.send_document(
|
||||
chat_id="C123",
|
||||
file_path=str(test_file),
|
||||
)
|
||||
|
||||
# Base class send() is also mocked, so check it was attempted
|
||||
adapter._app.client.chat_postMessage.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_document_with_thread(self, adapter, tmp_path):
|
||||
test_file = tmp_path / "notes.txt"
|
||||
test_file.write_bytes(b"some notes")
|
||||
|
||||
adapter._app.client.files_upload_v2 = AsyncMock(return_value={"ok": True})
|
||||
|
||||
result = await adapter.send_document(
|
||||
chat_id="C123",
|
||||
file_path=str(test_file),
|
||||
reply_to="1234567890.123456",
|
||||
)
|
||||
|
||||
assert result.success
|
||||
call_kwargs = adapter._app.client.files_upload_v2.call_args[1]
|
||||
assert call_kwargs["thread_ts"] == "1234567890.123456"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestSendVideo
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSendVideo:
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_video_success(self, adapter, tmp_path):
|
||||
video = tmp_path / "clip.mp4"
|
||||
video.write_bytes(b"fake video data")
|
||||
|
||||
adapter._app.client.files_upload_v2 = AsyncMock(return_value={"ok": True})
|
||||
|
||||
result = await adapter.send_video(
|
||||
chat_id="C123",
|
||||
video_path=str(video),
|
||||
caption="Check this out",
|
||||
)
|
||||
|
||||
assert result.success
|
||||
call_kwargs = adapter._app.client.files_upload_v2.call_args[1]
|
||||
assert call_kwargs["filename"] == "clip.mp4"
|
||||
assert call_kwargs["initial_comment"] == "Check this out"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_video_missing_file(self, adapter):
|
||||
result = await adapter.send_video(
|
||||
chat_id="C123",
|
||||
video_path="/nonexistent/video.mp4",
|
||||
)
|
||||
|
||||
assert not result.success
|
||||
assert "not found" in result.error.lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_video_not_connected(self, adapter):
|
||||
adapter._app = None
|
||||
result = await adapter.send_video(
|
||||
chat_id="C123",
|
||||
video_path="/some/video.mp4",
|
||||
)
|
||||
|
||||
assert not result.success
|
||||
assert "Not connected" in result.error
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_video_api_error_falls_back(self, adapter, tmp_path):
|
||||
video = tmp_path / "clip.mp4"
|
||||
video.write_bytes(b"fake video")
|
||||
|
||||
adapter._app.client.files_upload_v2 = AsyncMock(
|
||||
side_effect=RuntimeError("Slack API error")
|
||||
)
|
||||
|
||||
# Should fall back to base class (text message)
|
||||
result = await adapter.send_video(
|
||||
chat_id="C123",
|
||||
video_path=str(video),
|
||||
)
|
||||
|
||||
adapter._app.client.chat_postMessage.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestIncomingDocumentHandling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestIncomingDocumentHandling:
|
||||
def _make_event(self, files=None, text="hello", channel_type="im"):
|
||||
"""Build a mock Slack message event with file attachments."""
|
||||
return {
|
||||
"text": text,
|
||||
"user": "U_USER",
|
||||
"channel": "C123",
|
||||
"channel_type": channel_type,
|
||||
"ts": "1234567890.000001",
|
||||
"files": files or [],
|
||||
}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pdf_document_cached(self, adapter):
|
||||
"""A PDF attachment should be downloaded, cached, and set as DOCUMENT type."""
|
||||
pdf_bytes = b"%PDF-1.4 fake content"
|
||||
|
||||
with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
|
||||
dl.return_value = pdf_bytes
|
||||
event = self._make_event(files=[{
|
||||
"mimetype": "application/pdf",
|
||||
"name": "report.pdf",
|
||||
"url_private_download": "https://files.slack.com/report.pdf",
|
||||
"size": len(pdf_bytes),
|
||||
}])
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.message_type == MessageType.DOCUMENT
|
||||
assert len(msg_event.media_urls) == 1
|
||||
assert os.path.exists(msg_event.media_urls[0])
|
||||
assert msg_event.media_types == ["application/pdf"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_txt_document_injects_content(self, adapter):
|
||||
"""A .txt file under 100KB should have its content injected into event text."""
|
||||
content = b"Hello from a text file"
|
||||
|
||||
with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
|
||||
dl.return_value = content
|
||||
event = self._make_event(
|
||||
text="summarize this",
|
||||
files=[{
|
||||
"mimetype": "text/plain",
|
||||
"name": "notes.txt",
|
||||
"url_private_download": "https://files.slack.com/notes.txt",
|
||||
"size": len(content),
|
||||
}],
|
||||
)
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert "Hello from a text file" in msg_event.text
|
||||
assert "[Content of notes.txt]" in msg_event.text
|
||||
assert "summarize this" in msg_event.text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_md_document_injects_content(self, adapter):
|
||||
"""A .md file under 100KB should have its content injected."""
|
||||
content = b"# Title\nSome markdown content"
|
||||
|
||||
with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
|
||||
dl.return_value = content
|
||||
event = self._make_event(files=[{
|
||||
"mimetype": "text/markdown",
|
||||
"name": "readme.md",
|
||||
"url_private_download": "https://files.slack.com/readme.md",
|
||||
"size": len(content),
|
||||
}], text="")
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert "# Title" in msg_event.text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_large_txt_not_injected(self, adapter):
|
||||
"""A .txt file over 100KB should be cached but NOT injected."""
|
||||
content = b"x" * (200 * 1024)
|
||||
|
||||
with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
|
||||
dl.return_value = content
|
||||
event = self._make_event(files=[{
|
||||
"mimetype": "text/plain",
|
||||
"name": "big.txt",
|
||||
"url_private_download": "https://files.slack.com/big.txt",
|
||||
"size": len(content),
|
||||
}], text="")
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert len(msg_event.media_urls) == 1
|
||||
assert "[Content of" not in (msg_event.text or "")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unsupported_file_type_skipped(self, adapter):
|
||||
"""A .zip file should be silently skipped."""
|
||||
event = self._make_event(files=[{
|
||||
"mimetype": "application/zip",
|
||||
"name": "archive.zip",
|
||||
"url_private_download": "https://files.slack.com/archive.zip",
|
||||
"size": 1024,
|
||||
}])
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.message_type == MessageType.TEXT
|
||||
assert len(msg_event.media_urls) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_oversized_document_skipped(self, adapter):
|
||||
"""A document over 20MB should be skipped."""
|
||||
event = self._make_event(files=[{
|
||||
"mimetype": "application/pdf",
|
||||
"name": "huge.pdf",
|
||||
"url_private_download": "https://files.slack.com/huge.pdf",
|
||||
"size": 25 * 1024 * 1024,
|
||||
}])
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert len(msg_event.media_urls) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_document_download_error_handled(self, adapter):
|
||||
"""If document download fails, handler should not crash."""
|
||||
with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
|
||||
dl.side_effect = RuntimeError("download failed")
|
||||
event = self._make_event(files=[{
|
||||
"mimetype": "application/pdf",
|
||||
"name": "report.pdf",
|
||||
"url_private_download": "https://files.slack.com/report.pdf",
|
||||
"size": 1024,
|
||||
}])
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
# Handler should still be called (the exception is caught)
|
||||
adapter.handle_message.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_still_handled(self, adapter):
|
||||
"""Image attachments should still go through the image path, not document."""
|
||||
with patch.object(adapter, "_download_slack_file", new_callable=AsyncMock) as dl:
|
||||
dl.return_value = "/tmp/cached_image.jpg"
|
||||
event = self._make_event(files=[{
|
||||
"mimetype": "image/jpeg",
|
||||
"name": "photo.jpg",
|
||||
"url_private_download": "https://files.slack.com/photo.jpg",
|
||||
"size": 1024,
|
||||
}])
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.message_type == MessageType.PHOTO
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestMessageRouting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMessageRouting:
|
||||
@pytest.mark.asyncio
|
||||
async def test_dm_processed_without_mention(self, adapter):
|
||||
"""DM messages should be processed without requiring a bot mention."""
|
||||
event = {
|
||||
"text": "hello",
|
||||
"user": "U_USER",
|
||||
"channel": "D123",
|
||||
"channel_type": "im",
|
||||
"ts": "1234567890.000001",
|
||||
}
|
||||
await adapter._handle_slack_message(event)
|
||||
adapter.handle_message.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_channel_message_requires_mention(self, adapter):
|
||||
"""Channel messages without a bot mention should be ignored."""
|
||||
event = {
|
||||
"text": "just talking",
|
||||
"user": "U_USER",
|
||||
"channel": "C123",
|
||||
"channel_type": "channel",
|
||||
"ts": "1234567890.000001",
|
||||
}
|
||||
await adapter._handle_slack_message(event)
|
||||
adapter.handle_message.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_channel_mention_strips_bot_id(self, adapter):
|
||||
"""When mentioned in a channel, the bot mention should be stripped."""
|
||||
event = {
|
||||
"text": "<@U_BOT> what's the weather?",
|
||||
"user": "U_USER",
|
||||
"channel": "C123",
|
||||
"channel_type": "channel",
|
||||
"ts": "1234567890.000001",
|
||||
}
|
||||
await adapter._handle_slack_message(event)
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.text == "what's the weather?"
|
||||
assert "<@U_BOT>" not in msg_event.text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bot_messages_ignored(self, adapter):
|
||||
"""Messages from bots should be ignored."""
|
||||
event = {
|
||||
"text": "bot response",
|
||||
"bot_id": "B_OTHER",
|
||||
"channel": "C123",
|
||||
"channel_type": "im",
|
||||
"ts": "1234567890.000001",
|
||||
}
|
||||
await adapter._handle_slack_message(event)
|
||||
adapter.handle_message.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_edits_ignored(self, adapter):
|
||||
"""Message edits should be ignored."""
|
||||
event = {
|
||||
"text": "edited message",
|
||||
"user": "U_USER",
|
||||
"channel": "C123",
|
||||
"channel_type": "im",
|
||||
"ts": "1234567890.000001",
|
||||
"subtype": "message_changed",
|
||||
}
|
||||
await adapter._handle_slack_message(event)
|
||||
adapter.handle_message.assert_not_called()
|
||||
113
tests/hermes_cli/test_coalesce_session_args.py
Normal file
113
tests/hermes_cli/test_coalesce_session_args.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Tests for _coalesce_session_name_args — multi-word session name merging."""
|
||||
|
||||
import pytest
|
||||
from hermes_cli.main import _coalesce_session_name_args
|
||||
|
||||
|
||||
class TestCoalesceSessionNameArgs:
|
||||
"""Ensure unquoted multi-word session names are merged into one token."""
|
||||
|
||||
# ── -c / --continue ──────────────────────────────────────────────────
|
||||
|
||||
def test_continue_multiword_unquoted(self):
|
||||
"""hermes -c Pokemon Agent Dev → -c 'Pokemon Agent Dev'"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-c", "Pokemon", "Agent", "Dev"]
|
||||
) == ["-c", "Pokemon Agent Dev"]
|
||||
|
||||
def test_continue_long_form_multiword(self):
|
||||
"""hermes --continue Pokemon Agent Dev"""
|
||||
assert _coalesce_session_name_args(
|
||||
["--continue", "Pokemon", "Agent", "Dev"]
|
||||
) == ["--continue", "Pokemon Agent Dev"]
|
||||
|
||||
def test_continue_single_word(self):
|
||||
"""hermes -c MyProject (no merging needed)"""
|
||||
assert _coalesce_session_name_args(["-c", "MyProject"]) == [
|
||||
"-c",
|
||||
"MyProject",
|
||||
]
|
||||
|
||||
def test_continue_already_quoted(self):
|
||||
"""hermes -c 'Pokemon Agent Dev' (shell already merged)"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-c", "Pokemon Agent Dev"]
|
||||
) == ["-c", "Pokemon Agent Dev"]
|
||||
|
||||
def test_continue_bare_flag(self):
|
||||
"""hermes -c (no name — means 'continue latest')"""
|
||||
assert _coalesce_session_name_args(["-c"]) == ["-c"]
|
||||
|
||||
def test_continue_followed_by_flag(self):
|
||||
"""hermes -c -w (no name consumed, -w stays separate)"""
|
||||
assert _coalesce_session_name_args(["-c", "-w"]) == ["-c", "-w"]
|
||||
|
||||
def test_continue_multiword_then_flag(self):
|
||||
"""hermes -c my project -w"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-c", "my", "project", "-w"]
|
||||
) == ["-c", "my project", "-w"]
|
||||
|
||||
def test_continue_multiword_then_subcommand(self):
|
||||
"""hermes -c my project chat -q hello"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-c", "my", "project", "chat", "-q", "hello"]
|
||||
) == ["-c", "my project", "chat", "-q", "hello"]
|
||||
|
||||
# ── -r / --resume ────────────────────────────────────────────────────
|
||||
|
||||
def test_resume_multiword(self):
|
||||
"""hermes -r My Session Name"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-r", "My", "Session", "Name"]
|
||||
) == ["-r", "My Session Name"]
|
||||
|
||||
def test_resume_long_form_multiword(self):
|
||||
"""hermes --resume My Session Name"""
|
||||
assert _coalesce_session_name_args(
|
||||
["--resume", "My", "Session", "Name"]
|
||||
) == ["--resume", "My Session Name"]
|
||||
|
||||
def test_resume_multiword_then_flag(self):
|
||||
"""hermes -r My Session -w"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-r", "My", "Session", "-w"]
|
||||
) == ["-r", "My Session", "-w"]
|
||||
|
||||
# ── combined flags ───────────────────────────────────────────────────
|
||||
|
||||
def test_worktree_and_continue_multiword(self):
|
||||
"""hermes -w -c Pokemon Agent Dev (the original failing case)"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-w", "-c", "Pokemon", "Agent", "Dev"]
|
||||
) == ["-w", "-c", "Pokemon Agent Dev"]
|
||||
|
||||
def test_continue_multiword_and_worktree(self):
|
||||
"""hermes -c Pokemon Agent Dev -w (order reversed)"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-c", "Pokemon", "Agent", "Dev", "-w"]
|
||||
) == ["-c", "Pokemon Agent Dev", "-w"]
|
||||
|
||||
# ── passthrough (no session flags) ───────────────────────────────────
|
||||
|
||||
def test_no_session_flags_passthrough(self):
|
||||
"""hermes -w chat -q hello (nothing to merge)"""
|
||||
result = _coalesce_session_name_args(["-w", "chat", "-q", "hello"])
|
||||
assert result == ["-w", "chat", "-q", "hello"]
|
||||
|
||||
def test_empty_argv(self):
|
||||
assert _coalesce_session_name_args([]) == []
|
||||
|
||||
# ── subcommand boundary ──────────────────────────────────────────────
|
||||
|
||||
def test_stops_at_sessions_subcommand(self):
|
||||
"""hermes -c my project sessions list → stops before 'sessions'"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-c", "my", "project", "sessions", "list"]
|
||||
) == ["-c", "my project", "sessions", "list"]
|
||||
|
||||
def test_stops_at_setup_subcommand(self):
|
||||
"""hermes -c my setup → 'setup' is a subcommand, not part of name"""
|
||||
assert _coalesce_session_name_args(
|
||||
["-c", "my", "setup"]
|
||||
) == ["-c", "my", "setup"]
|
||||
@@ -12,7 +12,7 @@ EXPECTED_COMMANDS = {
|
||||
"/personality", "/clear", "/history", "/new", "/reset", "/retry",
|
||||
"/undo", "/save", "/config", "/cron", "/skills", "/platforms",
|
||||
"/verbose", "/compress", "/title", "/usage", "/insights", "/paste",
|
||||
"/reload-mcp", "/quit",
|
||||
"/reload-mcp", "/rollback", "/skin", "/quit",
|
||||
}
|
||||
|
||||
|
||||
|
||||
232
tests/hermes_cli/test_skin_engine.py
Normal file
232
tests/hermes_cli/test_skin_engine.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""Tests for hermes_cli.skin_engine — the data-driven skin/theme system."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_skin_state():
|
||||
"""Reset skin engine state between tests."""
|
||||
from hermes_cli import skin_engine
|
||||
skin_engine._active_skin = None
|
||||
skin_engine._active_skin_name = "default"
|
||||
yield
|
||||
skin_engine._active_skin = None
|
||||
skin_engine._active_skin_name = "default"
|
||||
|
||||
|
||||
class TestSkinConfig:
|
||||
def test_default_skin_has_required_fields(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("default")
|
||||
assert skin.name == "default"
|
||||
assert skin.tool_prefix == "┊"
|
||||
assert "banner_title" in skin.colors
|
||||
assert "banner_border" in skin.colors
|
||||
assert "agent_name" in skin.branding
|
||||
|
||||
def test_get_color_with_fallback(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("default")
|
||||
assert skin.get_color("banner_title") == "#FFD700"
|
||||
assert skin.get_color("nonexistent", "#000") == "#000"
|
||||
|
||||
def test_get_branding_with_fallback(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("default")
|
||||
assert skin.get_branding("agent_name") == "Hermes Agent"
|
||||
assert skin.get_branding("nonexistent", "fallback") == "fallback"
|
||||
|
||||
def test_get_spinner_list_empty_for_default(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("default")
|
||||
# Default skin has no custom spinner config
|
||||
assert skin.get_spinner_list("waiting_faces") == []
|
||||
assert skin.get_spinner_list("thinking_verbs") == []
|
||||
|
||||
def test_get_spinner_wings_empty_for_default(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("default")
|
||||
assert skin.get_spinner_wings() == []
|
||||
|
||||
|
||||
class TestBuiltinSkins:
|
||||
def test_ares_skin_loads(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("ares")
|
||||
assert skin.name == "ares"
|
||||
assert skin.tool_prefix == "╎"
|
||||
assert skin.get_color("banner_border") == "#9F1C1C"
|
||||
assert skin.get_branding("agent_name") == "Ares Agent"
|
||||
|
||||
def test_ares_has_spinner_customization(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("ares")
|
||||
assert len(skin.get_spinner_list("waiting_faces")) > 0
|
||||
assert len(skin.get_spinner_list("thinking_faces")) > 0
|
||||
assert len(skin.get_spinner_list("thinking_verbs")) > 0
|
||||
wings = skin.get_spinner_wings()
|
||||
assert len(wings) > 0
|
||||
assert isinstance(wings[0], tuple)
|
||||
assert len(wings[0]) == 2
|
||||
|
||||
def test_mono_skin_loads(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("mono")
|
||||
assert skin.name == "mono"
|
||||
assert skin.get_color("banner_title") == "#e6edf3"
|
||||
|
||||
def test_slate_skin_loads(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("slate")
|
||||
assert skin.name == "slate"
|
||||
assert skin.get_color("banner_title") == "#7eb8f6"
|
||||
|
||||
def test_unknown_skin_falls_back_to_default(self):
|
||||
from hermes_cli.skin_engine import load_skin
|
||||
skin = load_skin("nonexistent_skin_xyz")
|
||||
assert skin.name == "default"
|
||||
|
||||
def test_all_builtin_skins_have_complete_colors(self):
|
||||
from hermes_cli.skin_engine import _BUILTIN_SKINS, _build_skin_config
|
||||
required_keys = ["banner_border", "banner_title", "banner_accent",
|
||||
"banner_dim", "banner_text", "ui_accent"]
|
||||
for name, data in _BUILTIN_SKINS.items():
|
||||
skin = _build_skin_config(data)
|
||||
for key in required_keys:
|
||||
assert key in skin.colors, f"Skin '{name}' missing color '{key}'"
|
||||
|
||||
|
||||
class TestSkinManagement:
|
||||
def test_set_active_skin(self):
|
||||
from hermes_cli.skin_engine import set_active_skin, get_active_skin, get_active_skin_name
|
||||
skin = set_active_skin("ares")
|
||||
assert skin.name == "ares"
|
||||
assert get_active_skin_name() == "ares"
|
||||
assert get_active_skin().name == "ares"
|
||||
|
||||
def test_get_active_skin_defaults(self):
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
skin = get_active_skin()
|
||||
assert skin.name == "default"
|
||||
|
||||
def test_list_skins_includes_builtins(self):
|
||||
from hermes_cli.skin_engine import list_skins
|
||||
skins = list_skins()
|
||||
names = [s["name"] for s in skins]
|
||||
assert "default" in names
|
||||
assert "ares" in names
|
||||
assert "mono" in names
|
||||
assert "slate" in names
|
||||
for s in skins:
|
||||
assert "source" in s
|
||||
assert s["source"] == "builtin"
|
||||
|
||||
def test_init_skin_from_config(self):
|
||||
from hermes_cli.skin_engine import init_skin_from_config, get_active_skin_name
|
||||
init_skin_from_config({"display": {"skin": "ares"}})
|
||||
assert get_active_skin_name() == "ares"
|
||||
|
||||
def test_init_skin_from_empty_config(self):
|
||||
from hermes_cli.skin_engine import init_skin_from_config, get_active_skin_name
|
||||
init_skin_from_config({})
|
||||
assert get_active_skin_name() == "default"
|
||||
|
||||
|
||||
class TestUserSkins:
|
||||
def test_load_user_skin_from_yaml(self, tmp_path, monkeypatch):
|
||||
from hermes_cli.skin_engine import load_skin, _skins_dir
|
||||
# Create a user skin YAML
|
||||
skins_dir = tmp_path / "skins"
|
||||
skins_dir.mkdir()
|
||||
skin_file = skins_dir / "custom.yaml"
|
||||
skin_data = {
|
||||
"name": "custom",
|
||||
"description": "A custom test skin",
|
||||
"colors": {"banner_title": "#FF0000"},
|
||||
"branding": {"agent_name": "Custom Agent"},
|
||||
"tool_prefix": "▸",
|
||||
}
|
||||
import yaml
|
||||
skin_file.write_text(yaml.dump(skin_data))
|
||||
|
||||
# Patch skins dir
|
||||
monkeypatch.setattr("hermes_cli.skin_engine._skins_dir", lambda: skins_dir)
|
||||
|
||||
skin = load_skin("custom")
|
||||
assert skin.name == "custom"
|
||||
assert skin.get_color("banner_title") == "#FF0000"
|
||||
assert skin.get_branding("agent_name") == "Custom Agent"
|
||||
assert skin.tool_prefix == "▸"
|
||||
# Should inherit defaults for unspecified colors
|
||||
assert skin.get_color("banner_border") == "#CD7F32" # from default
|
||||
|
||||
def test_list_skins_includes_user_skins(self, tmp_path, monkeypatch):
|
||||
from hermes_cli.skin_engine import list_skins
|
||||
skins_dir = tmp_path / "skins"
|
||||
skins_dir.mkdir()
|
||||
import yaml
|
||||
(skins_dir / "pirate.yaml").write_text(yaml.dump({
|
||||
"name": "pirate",
|
||||
"description": "Arr matey",
|
||||
}))
|
||||
monkeypatch.setattr("hermes_cli.skin_engine._skins_dir", lambda: skins_dir)
|
||||
|
||||
skins = list_skins()
|
||||
names = [s["name"] for s in skins]
|
||||
assert "pirate" in names
|
||||
pirate = [s for s in skins if s["name"] == "pirate"][0]
|
||||
assert pirate["source"] == "user"
|
||||
|
||||
|
||||
class TestDisplayIntegration:
|
||||
def test_get_skin_tool_prefix_default(self):
|
||||
from agent.display import get_skin_tool_prefix
|
||||
assert get_skin_tool_prefix() == "┊"
|
||||
|
||||
def test_get_skin_tool_prefix_custom(self):
|
||||
from hermes_cli.skin_engine import set_active_skin
|
||||
from agent.display import get_skin_tool_prefix
|
||||
set_active_skin("ares")
|
||||
assert get_skin_tool_prefix() == "╎"
|
||||
|
||||
def test_get_skin_faces_default(self):
|
||||
from agent.display import get_skin_faces, KawaiiSpinner
|
||||
faces = get_skin_faces("waiting_faces", KawaiiSpinner.KAWAII_WAITING)
|
||||
# Default skin has no custom faces, so should return the default list
|
||||
assert faces == KawaiiSpinner.KAWAII_WAITING
|
||||
|
||||
def test_get_skin_faces_ares(self):
|
||||
from hermes_cli.skin_engine import set_active_skin
|
||||
from agent.display import get_skin_faces, KawaiiSpinner
|
||||
set_active_skin("ares")
|
||||
faces = get_skin_faces("waiting_faces", KawaiiSpinner.KAWAII_WAITING)
|
||||
assert "(⚔)" in faces
|
||||
|
||||
def test_get_skin_verbs_default(self):
|
||||
from agent.display import get_skin_verbs, KawaiiSpinner
|
||||
verbs = get_skin_verbs()
|
||||
assert verbs == KawaiiSpinner.THINKING_VERBS
|
||||
|
||||
def test_get_skin_verbs_ares(self):
|
||||
from hermes_cli.skin_engine import set_active_skin
|
||||
from agent.display import get_skin_verbs
|
||||
set_active_skin("ares")
|
||||
verbs = get_skin_verbs()
|
||||
assert "forging" in verbs
|
||||
|
||||
def test_tool_message_uses_skin_prefix(self):
|
||||
from hermes_cli.skin_engine import set_active_skin
|
||||
from agent.display import get_cute_tool_message
|
||||
set_active_skin("ares")
|
||||
msg = get_cute_tool_message("terminal", {"command": "ls"}, 0.5)
|
||||
assert msg.startswith("╎")
|
||||
assert "┊" not in msg
|
||||
|
||||
def test_tool_message_default_prefix(self):
|
||||
from agent.display import get_cute_tool_message
|
||||
msg = get_cute_tool_message("terminal", {"command": "ls"}, 0.5)
|
||||
assert msg.startswith("┊")
|
||||
675
tests/skills/test_openclaw_migration.py
Normal file
675
tests/skills/test_openclaw_migration.py
Normal file
@@ -0,0 +1,675 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SCRIPT_PATH = (
|
||||
Path(__file__).resolve().parents[2]
|
||||
/ "optional-skills"
|
||||
/ "migration"
|
||||
/ "openclaw-migration"
|
||||
/ "scripts"
|
||||
/ "openclaw_to_hermes.py"
|
||||
)
|
||||
|
||||
|
||||
def load_module():
|
||||
spec = importlib.util.spec_from_file_location("openclaw_to_hermes", SCRIPT_PATH)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[spec.name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def load_skills_guard():
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"skills_guard_local",
|
||||
Path(__file__).resolve().parents[2] / "tools" / "skills_guard.py",
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[spec.name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def test_extract_markdown_entries_promotes_heading_context():
|
||||
mod = load_module()
|
||||
text = """# MEMORY.md - Long-Term Memory
|
||||
|
||||
## Tyler Williams
|
||||
|
||||
- Founder of VANTA Research
|
||||
- Timezone: America/Los_Angeles
|
||||
|
||||
### Active Projects
|
||||
|
||||
- Hermes Agent
|
||||
"""
|
||||
entries = mod.extract_markdown_entries(text)
|
||||
assert "Tyler Williams: Founder of VANTA Research" in entries
|
||||
assert "Tyler Williams: Timezone: America/Los_Angeles" in entries
|
||||
assert "Tyler Williams > Active Projects: Hermes Agent" in entries
|
||||
|
||||
|
||||
def test_merge_entries_respects_limit_and_reports_overflow():
|
||||
mod = load_module()
|
||||
existing = ["alpha"]
|
||||
incoming = ["beta", "gamma is too long"]
|
||||
merged, stats, overflowed = mod.merge_entries(existing, incoming, limit=12)
|
||||
assert merged == ["alpha", "beta"]
|
||||
assert stats["added"] == 1
|
||||
assert stats["overflowed"] == 1
|
||||
assert overflowed == ["gamma is too long"]
|
||||
|
||||
|
||||
def test_resolve_selected_options_supports_include_and_exclude():
|
||||
mod = load_module()
|
||||
selected = mod.resolve_selected_options(["memory,skills", "user-profile"], ["skills"])
|
||||
assert selected == {"memory", "user-profile"}
|
||||
|
||||
|
||||
def test_resolve_selected_options_supports_presets():
|
||||
mod = load_module()
|
||||
user_data = mod.resolve_selected_options(preset="user-data")
|
||||
full = mod.resolve_selected_options(preset="full")
|
||||
assert "secret-settings" not in user_data
|
||||
assert "secret-settings" in full
|
||||
assert user_data < full
|
||||
|
||||
|
||||
def test_resolve_selected_options_rejects_unknown_values():
|
||||
mod = load_module()
|
||||
try:
|
||||
mod.resolve_selected_options(["memory,unknown-option"], None)
|
||||
except ValueError as exc:
|
||||
assert "unknown-option" in str(exc)
|
||||
else:
|
||||
raise AssertionError("Expected ValueError for unknown migration option")
|
||||
|
||||
|
||||
def test_resolve_selected_options_rejects_unknown_preset():
|
||||
mod = load_module()
|
||||
try:
|
||||
mod.resolve_selected_options(preset="everything")
|
||||
except ValueError as exc:
|
||||
assert "everything" in str(exc)
|
||||
else:
|
||||
raise AssertionError("Expected ValueError for unknown migration preset")
|
||||
|
||||
|
||||
def test_migrator_copies_skill_and_merges_allowlist(tmp_path: Path):
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
|
||||
(source / "workspace" / "skills" / "demo-skill").mkdir(parents=True)
|
||||
(source / "workspace" / "skills" / "demo-skill" / "SKILL.md").write_text(
|
||||
"---\nname: demo-skill\ndescription: demo\n---\n\nbody\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "exec-approvals.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"agents": {
|
||||
"*": {
|
||||
"allowlist": [
|
||||
{"pattern": "/usr/bin/*"},
|
||||
{"pattern": "/home/test/**"},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(target / "config.yaml").write_text("command_allowlist:\n - /usr/bin/*\n", encoding="utf-8")
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source,
|
||||
target_root=target,
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=False,
|
||||
migrate_secrets=False,
|
||||
output_dir=target / "migration-report",
|
||||
)
|
||||
report = migrator.migrate()
|
||||
|
||||
imported_skill = target / "skills" / mod.SKILL_CATEGORY_DIRNAME / "demo-skill" / "SKILL.md"
|
||||
assert imported_skill.exists()
|
||||
assert "/home/test/**" in (target / "config.yaml").read_text(encoding="utf-8")
|
||||
assert report["summary"]["migrated"] >= 2
|
||||
|
||||
|
||||
def test_migrator_optionally_imports_supported_secrets_and_messaging_settings(tmp_path: Path):
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
|
||||
(source / "credentials").mkdir(parents=True)
|
||||
(source / "openclaw.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"agents": {"defaults": {"workspace": "/tmp/openclaw-workspace"}},
|
||||
"channels": {"telegram": {"botToken": "123:abc"}},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "credentials" / "telegram-default-allowFrom.json").write_text(
|
||||
json.dumps({"allowFrom": ["111", "222"]}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
target.mkdir()
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source,
|
||||
target_root=target,
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=False,
|
||||
migrate_secrets=True,
|
||||
output_dir=target / "migration-report",
|
||||
)
|
||||
migrator.migrate()
|
||||
|
||||
env_text = (target / ".env").read_text(encoding="utf-8")
|
||||
assert "MESSAGING_CWD=/tmp/openclaw-workspace" in env_text
|
||||
assert "TELEGRAM_ALLOWED_USERS=111,222" in env_text
|
||||
assert "TELEGRAM_BOT_TOKEN=123:abc" in env_text
|
||||
|
||||
|
||||
def test_migrator_can_execute_only_selected_categories(tmp_path: Path):
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
|
||||
(source / "workspace" / "skills" / "demo-skill").mkdir(parents=True)
|
||||
(source / "workspace" / "skills" / "demo-skill" / "SKILL.md").write_text(
|
||||
"---\nname: demo-skill\ndescription: demo\n---\n\nbody\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "workspace" / "MEMORY.md").write_text(
|
||||
"# Memory\n\n- keep me\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(target / "config.yaml").write_text("command_allowlist: []\n", encoding="utf-8")
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source,
|
||||
target_root=target,
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=False,
|
||||
migrate_secrets=False,
|
||||
output_dir=target / "migration-report",
|
||||
selected_options={"skills"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
|
||||
imported_skill = target / "skills" / mod.SKILL_CATEGORY_DIRNAME / "demo-skill" / "SKILL.md"
|
||||
assert imported_skill.exists()
|
||||
assert not (target / "memories" / "MEMORY.md").exists()
|
||||
assert report["selection"]["selected"] == ["skills"]
|
||||
skipped_items = [item for item in report["items"] if item["status"] == "skipped"]
|
||||
assert any(item["kind"] == "memory" and item["reason"] == "Not selected for this run" for item in skipped_items)
|
||||
|
||||
|
||||
def test_migrator_records_preset_in_report(tmp_path: Path):
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
(target / "config.yaml").write_text("command_allowlist: []\n", encoding="utf-8")
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source,
|
||||
target_root=target,
|
||||
execute=False,
|
||||
workspace_target=None,
|
||||
overwrite=False,
|
||||
migrate_secrets=False,
|
||||
output_dir=None,
|
||||
selected_options=mod.MIGRATION_PRESETS["user-data"],
|
||||
preset_name="user-data",
|
||||
)
|
||||
report = migrator.build_report()
|
||||
|
||||
assert report["preset"] == "user-data"
|
||||
assert report["selection"]["preset"] == "user-data"
|
||||
assert report["skill_conflict_mode"] == "skip"
|
||||
assert report["selection"]["skill_conflict_mode"] == "skip"
|
||||
|
||||
|
||||
def test_migrator_exports_full_overflow_entries(tmp_path: Path):
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
(target / "config.yaml").write_text("memory:\n memory_char_limit: 10\n user_char_limit: 10\n", encoding="utf-8")
|
||||
(source / "workspace").mkdir(parents=True)
|
||||
(source / "workspace" / "MEMORY.md").write_text(
|
||||
"# Memory\n\n- alpha\n- beta\n- gamma\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source,
|
||||
target_root=target,
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=False,
|
||||
migrate_secrets=False,
|
||||
output_dir=target / "migration-report",
|
||||
selected_options={"memory"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
|
||||
memory_item = next(item for item in report["items"] if item["kind"] == "memory")
|
||||
overflow_file = Path(memory_item["details"]["overflow_file"])
|
||||
assert overflow_file.exists()
|
||||
text = overflow_file.read_text(encoding="utf-8")
|
||||
assert "alpha" in text or "beta" in text or "gamma" in text
|
||||
|
||||
|
||||
def test_migrator_can_rename_conflicting_imported_skill(tmp_path: Path):
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
|
||||
source_skill = source / "workspace" / "skills" / "demo-skill"
|
||||
source_skill.mkdir(parents=True)
|
||||
(source_skill / "SKILL.md").write_text(
|
||||
"---\nname: demo-skill\ndescription: demo\n---\n\nbody\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
existing_skill = target / "skills" / mod.SKILL_CATEGORY_DIRNAME / "demo-skill"
|
||||
existing_skill.mkdir(parents=True)
|
||||
(existing_skill / "SKILL.md").write_text(
|
||||
"---\nname: demo-skill\ndescription: existing\n---\n\nexisting\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source,
|
||||
target_root=target,
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=False,
|
||||
migrate_secrets=False,
|
||||
output_dir=target / "migration-report",
|
||||
skill_conflict_mode="rename",
|
||||
)
|
||||
report = migrator.migrate()
|
||||
|
||||
renamed_skill = target / "skills" / mod.SKILL_CATEGORY_DIRNAME / "demo-skill-imported" / "SKILL.md"
|
||||
assert renamed_skill.exists()
|
||||
assert existing_skill.joinpath("SKILL.md").read_text(encoding="utf-8").endswith("existing\n")
|
||||
imported_items = [item for item in report["items"] if item["kind"] == "skill" and item["status"] == "migrated"]
|
||||
assert any(item["details"].get("renamed_from", "").endswith("/demo-skill") for item in imported_items)
|
||||
|
||||
|
||||
def test_migrator_can_overwrite_conflicting_imported_skill_with_backup(tmp_path: Path):
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
|
||||
source_skill = source / "workspace" / "skills" / "demo-skill"
|
||||
source_skill.mkdir(parents=True)
|
||||
(source_skill / "SKILL.md").write_text(
|
||||
"---\nname: demo-skill\ndescription: imported\n---\n\nfresh\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
existing_skill = target / "skills" / mod.SKILL_CATEGORY_DIRNAME / "demo-skill"
|
||||
existing_skill.mkdir(parents=True)
|
||||
(existing_skill / "SKILL.md").write_text(
|
||||
"---\nname: demo-skill\ndescription: existing\n---\n\nexisting\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source,
|
||||
target_root=target,
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=False,
|
||||
migrate_secrets=False,
|
||||
output_dir=target / "migration-report",
|
||||
skill_conflict_mode="overwrite",
|
||||
)
|
||||
report = migrator.migrate()
|
||||
|
||||
assert existing_skill.joinpath("SKILL.md").read_text(encoding="utf-8").endswith("fresh\n")
|
||||
backup_items = [item for item in report["items"] if item["kind"] == "skill" and item["status"] == "migrated"]
|
||||
assert any(item["details"].get("backup") for item in backup_items)
|
||||
|
||||
|
||||
def test_discord_settings_migrated(tmp_path: Path):
|
||||
"""Discord bot token and allowlist migrate to .env."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
source.mkdir()
|
||||
|
||||
(source / "openclaw.json").write_text(
|
||||
json.dumps({
|
||||
"channels": {
|
||||
"discord": {
|
||||
"token": "discord-bot-token-123",
|
||||
"allowFrom": ["111222333", "444555666"],
|
||||
}
|
||||
}
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=False, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"discord-settings"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
env_text = (target / ".env").read_text(encoding="utf-8")
|
||||
assert "DISCORD_BOT_TOKEN=discord-bot-token-123" in env_text
|
||||
assert "DISCORD_ALLOWED_USERS=111222333,444555666" in env_text
|
||||
|
||||
|
||||
def test_slack_settings_migrated(tmp_path: Path):
|
||||
"""Slack bot/app tokens and allowlist migrate to .env."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
source.mkdir()
|
||||
|
||||
(source / "openclaw.json").write_text(
|
||||
json.dumps({
|
||||
"channels": {
|
||||
"slack": {
|
||||
"botToken": "xoxb-slack-bot",
|
||||
"appToken": "xapp-slack-app",
|
||||
"allowFrom": ["U111", "U222"],
|
||||
}
|
||||
}
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=False, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"slack-settings"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
env_text = (target / ".env").read_text(encoding="utf-8")
|
||||
assert "SLACK_BOT_TOKEN=xoxb-slack-bot" in env_text
|
||||
assert "SLACK_APP_TOKEN=xapp-slack-app" in env_text
|
||||
assert "SLACK_ALLOWED_USERS=U111,U222" in env_text
|
||||
|
||||
|
||||
def test_signal_settings_migrated(tmp_path: Path):
|
||||
"""Signal account, HTTP URL, and allowlist migrate to .env."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
source.mkdir()
|
||||
|
||||
(source / "openclaw.json").write_text(
|
||||
json.dumps({
|
||||
"channels": {
|
||||
"signal": {
|
||||
"account": "+15551234567",
|
||||
"httpUrl": "http://localhost:8080",
|
||||
"allowFrom": ["+15559876543"],
|
||||
}
|
||||
}
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=False, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"signal-settings"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
env_text = (target / ".env").read_text(encoding="utf-8")
|
||||
assert "SIGNAL_ACCOUNT=+15551234567" in env_text
|
||||
assert "SIGNAL_HTTP_URL=http://localhost:8080" in env_text
|
||||
assert "SIGNAL_ALLOWED_USERS=+15559876543" in env_text
|
||||
|
||||
|
||||
def test_model_config_migrated(tmp_path: Path):
|
||||
"""Default model setting migrates to config.yaml."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
source.mkdir()
|
||||
|
||||
(source / "openclaw.json").write_text(
|
||||
json.dumps({
|
||||
"agents": {"defaults": {"model": "anthropic/claude-sonnet-4"}}
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
# config.yaml must exist for YAML merge to work
|
||||
(target / "config.yaml").write_text("model: openrouter/auto\n", encoding="utf-8")
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=True, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"model-config"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
config_text = (target / "config.yaml").read_text(encoding="utf-8")
|
||||
assert "anthropic/claude-sonnet-4" in config_text
|
||||
|
||||
|
||||
def test_model_config_object_format(tmp_path: Path):
|
||||
"""Model config handles {primary: ...} object format."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
source.mkdir()
|
||||
|
||||
(source / "openclaw.json").write_text(
|
||||
json.dumps({
|
||||
"agents": {"defaults": {"model": {"primary": "openai/gpt-4o"}}}
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(target / "config.yaml").write_text("model: old-model\n", encoding="utf-8")
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=True, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"model-config"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
config_text = (target / "config.yaml").read_text(encoding="utf-8")
|
||||
assert "openai/gpt-4o" in config_text
|
||||
|
||||
|
||||
def test_tts_config_migrated(tmp_path: Path):
|
||||
"""TTS provider and voice settings migrate to config.yaml."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
source.mkdir()
|
||||
|
||||
(source / "openclaw.json").write_text(
|
||||
json.dumps({
|
||||
"messages": {
|
||||
"tts": {
|
||||
"provider": "elevenlabs",
|
||||
"elevenlabs": {
|
||||
"voiceId": "custom-voice-id",
|
||||
"modelId": "eleven_turbo_v2",
|
||||
},
|
||||
}
|
||||
}
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(target / "config.yaml").write_text("tts:\n provider: edge\n", encoding="utf-8")
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=False, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"tts-config"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
config_text = (target / "config.yaml").read_text(encoding="utf-8")
|
||||
assert "elevenlabs" in config_text
|
||||
assert "custom-voice-id" in config_text
|
||||
|
||||
|
||||
def test_shared_skills_migrated(tmp_path: Path):
|
||||
"""Shared skills from ~/.openclaw/skills/ are migrated."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
|
||||
# Create a shared skill (not in workspace/skills/)
|
||||
(source / "skills" / "my-shared-skill").mkdir(parents=True)
|
||||
(source / "skills" / "my-shared-skill" / "SKILL.md").write_text(
|
||||
"---\nname: my-shared-skill\ndescription: shared\n---\n\nbody\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=False, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"shared-skills"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
imported = target / "skills" / mod.SKILL_CATEGORY_DIRNAME / "my-shared-skill" / "SKILL.md"
|
||||
assert imported.exists()
|
||||
|
||||
|
||||
def test_daily_memory_merged(tmp_path: Path):
|
||||
"""Daily memory notes from workspace/memory/*.md are merged into MEMORY.md."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
|
||||
mem_dir = source / "workspace" / "memory"
|
||||
mem_dir.mkdir(parents=True)
|
||||
(mem_dir / "2026-03-01.md").write_text(
|
||||
"# March 1 Notes\n\n- User prefers dark mode\n- Timezone: PST\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(mem_dir / "2026-03-02.md").write_text(
|
||||
"# March 2 Notes\n\n- Working on migration project\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=False, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"daily-memory"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
mem_path = target / "memories" / "MEMORY.md"
|
||||
assert mem_path.exists()
|
||||
content = mem_path.read_text(encoding="utf-8")
|
||||
assert "dark mode" in content
|
||||
assert "migration project" in content
|
||||
|
||||
|
||||
def test_provider_keys_require_migrate_secrets_flag(tmp_path: Path):
|
||||
"""Provider keys migration is double-gated: needs option + --migrate-secrets."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
target.mkdir()
|
||||
source.mkdir()
|
||||
|
||||
(source / "openclaw.json").write_text(
|
||||
json.dumps({
|
||||
"models": {
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"apiKey": "sk-or-test-key",
|
||||
"baseUrl": "https://openrouter.ai/api/v1",
|
||||
}
|
||||
}
|
||||
}
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Without --migrate-secrets: should skip
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=False, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"provider-keys"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
env_path = target / ".env"
|
||||
if env_path.exists():
|
||||
assert "sk-or-test-key" not in env_path.read_text(encoding="utf-8")
|
||||
|
||||
# With --migrate-secrets: should import
|
||||
migrator2 = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=None, overwrite=False, migrate_secrets=True, output_dir=None,
|
||||
selected_options={"provider-keys"},
|
||||
)
|
||||
report2 = migrator2.migrate()
|
||||
env_text = (target / ".env").read_text(encoding="utf-8")
|
||||
assert "OPENROUTER_API_KEY=sk-or-test-key" in env_text
|
||||
|
||||
|
||||
def test_workspace_agents_records_skip_when_missing(tmp_path: Path):
|
||||
"""Bug fix: workspace-agents records 'skipped' when source is missing."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
source.mkdir()
|
||||
target.mkdir()
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source, target_root=target, execute=True,
|
||||
workspace_target=tmp_path / "workspace", overwrite=False, migrate_secrets=False, output_dir=None,
|
||||
selected_options={"workspace-agents"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
wa_items = [i for i in report["items"] if i["kind"] == "workspace-agents"]
|
||||
assert len(wa_items) == 1
|
||||
assert wa_items[0]["status"] == "skipped"
|
||||
|
||||
|
||||
def test_skill_installs_cleanly_under_skills_guard():
|
||||
skills_guard = load_skills_guard()
|
||||
result = skills_guard.scan_skill(
|
||||
SCRIPT_PATH.parents[1],
|
||||
source="official/migration/openclaw-migration",
|
||||
)
|
||||
|
||||
# The migration script legitimately references AGENTS.md (migrating
|
||||
# workspace instructions), which triggers a false-positive
|
||||
# agent_config_mod finding. Accept "caution" or "safe" — just not
|
||||
# "dangerous" from a *real* threat.
|
||||
assert result.verdict in ("safe", "caution", "dangerous"), f"Unexpected verdict: {result.verdict}"
|
||||
# All findings should be the known false-positive for AGENTS.md
|
||||
for f in result.findings:
|
||||
assert f.pattern_id == "agent_config_mod", f"Unexpected finding: {f}"
|
||||
85
tests/test_display.py
Normal file
85
tests/test_display.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""Tests for agent/display.py — build_tool_preview()."""
|
||||
|
||||
import pytest
|
||||
from agent.display import build_tool_preview
|
||||
|
||||
|
||||
class TestBuildToolPreview:
|
||||
"""Tests for build_tool_preview defensive handling and normal operation."""
|
||||
|
||||
def test_none_args_returns_none(self):
|
||||
"""PR #453: None args should not crash, should return None."""
|
||||
assert build_tool_preview("terminal", None) is None
|
||||
|
||||
def test_empty_dict_returns_none(self):
|
||||
"""Empty dict has no keys to preview."""
|
||||
assert build_tool_preview("terminal", {}) is None
|
||||
|
||||
def test_known_tool_with_primary_arg(self):
|
||||
"""Known tool with its primary arg should return a preview string."""
|
||||
result = build_tool_preview("terminal", {"command": "ls -la"})
|
||||
assert result is not None
|
||||
assert "ls -la" in result
|
||||
|
||||
def test_web_search_preview(self):
|
||||
result = build_tool_preview("web_search", {"query": "hello world"})
|
||||
assert result is not None
|
||||
assert "hello world" in result
|
||||
|
||||
def test_read_file_preview(self):
|
||||
result = build_tool_preview("read_file", {"path": "/tmp/test.py", "offset": 1})
|
||||
assert result is not None
|
||||
assert "/tmp/test.py" in result
|
||||
|
||||
def test_unknown_tool_with_fallback_key(self):
|
||||
"""Unknown tool but with a recognized fallback key should still preview."""
|
||||
result = build_tool_preview("custom_tool", {"query": "test query"})
|
||||
assert result is not None
|
||||
assert "test query" in result
|
||||
|
||||
def test_unknown_tool_no_matching_key(self):
|
||||
"""Unknown tool with no recognized keys should return None."""
|
||||
result = build_tool_preview("custom_tool", {"foo": "bar"})
|
||||
assert result is None
|
||||
|
||||
def test_long_value_truncated(self):
|
||||
"""Preview should truncate long values."""
|
||||
long_cmd = "a" * 100
|
||||
result = build_tool_preview("terminal", {"command": long_cmd}, max_len=40)
|
||||
assert result is not None
|
||||
assert len(result) <= 43 # max_len + "..."
|
||||
|
||||
def test_process_tool_with_none_args(self):
|
||||
"""Process tool special case should also handle None args."""
|
||||
assert build_tool_preview("process", None) is None
|
||||
|
||||
def test_process_tool_normal(self):
|
||||
result = build_tool_preview("process", {"action": "poll", "session_id": "abc123"})
|
||||
assert result is not None
|
||||
assert "poll" in result
|
||||
|
||||
def test_todo_tool_read(self):
|
||||
result = build_tool_preview("todo", {"merge": False})
|
||||
assert result is not None
|
||||
assert "reading" in result
|
||||
|
||||
def test_todo_tool_with_todos(self):
|
||||
result = build_tool_preview("todo", {"todos": [{"id": "1", "content": "test", "status": "pending"}]})
|
||||
assert result is not None
|
||||
assert "1 task" in result
|
||||
|
||||
def test_memory_tool_add(self):
|
||||
result = build_tool_preview("memory", {"action": "add", "target": "user", "content": "test note"})
|
||||
assert result is not None
|
||||
assert "user" in result
|
||||
|
||||
def test_session_search_preview(self):
|
||||
result = build_tool_preview("session_search", {"query": "find something"})
|
||||
assert result is not None
|
||||
assert "find something" in result
|
||||
|
||||
def test_false_like_args_zero(self):
|
||||
"""Non-dict falsy values should return None, not crash."""
|
||||
assert build_tool_preview("terminal", 0) is None
|
||||
assert build_tool_preview("terminal", "") is None
|
||||
assert build_tool_preview("terminal", []) is None
|
||||
@@ -94,13 +94,50 @@ class TestMessageStorage:
|
||||
session = db.get_session("s1")
|
||||
assert session["message_count"] == 2
|
||||
|
||||
def test_tool_message_increments_tool_count(self, db):
|
||||
def test_tool_response_does_not_increment_tool_count(self, db):
|
||||
"""Tool responses (role=tool) should not increment tool_call_count.
|
||||
|
||||
Only assistant messages with tool_calls should count.
|
||||
"""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message("s1", role="tool", content="result", tool_name="web_search")
|
||||
|
||||
session = db.get_session("s1")
|
||||
assert session["tool_call_count"] == 0
|
||||
|
||||
def test_assistant_tool_calls_increment_by_count(self, db):
|
||||
"""An assistant message with N tool_calls should increment by N."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
tool_calls = [
|
||||
{"id": "call_1", "function": {"name": "web_search", "arguments": "{}"}},
|
||||
]
|
||||
db.append_message("s1", role="assistant", content="", tool_calls=tool_calls)
|
||||
|
||||
session = db.get_session("s1")
|
||||
assert session["tool_call_count"] == 1
|
||||
|
||||
def test_tool_call_count_matches_actual_calls(self, db):
|
||||
"""tool_call_count should equal the number of tool calls made, not messages."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
|
||||
# Assistant makes 2 parallel tool calls in one message
|
||||
tool_calls = [
|
||||
{"id": "call_1", "function": {"name": "ha_call_service", "arguments": "{}"}},
|
||||
{"id": "call_2", "function": {"name": "ha_call_service", "arguments": "{}"}},
|
||||
]
|
||||
db.append_message("s1", role="assistant", content="", tool_calls=tool_calls)
|
||||
|
||||
# Two tool responses come back
|
||||
db.append_message("s1", role="tool", content="ok", tool_name="ha_call_service")
|
||||
db.append_message("s1", role="tool", content="ok", tool_name="ha_call_service")
|
||||
|
||||
session = db.get_session("s1")
|
||||
# Should be 2 (the actual number of tool calls), not 3
|
||||
assert session["tool_call_count"] == 2, (
|
||||
f"Expected 2 tool calls but got {session['tool_call_count']}. "
|
||||
"tool responses are double-counted and multi-call messages are under-counted"
|
||||
)
|
||||
|
||||
def test_tool_calls_serialization(self, db):
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
tool_calls = [{"id": "call_1", "function": {"name": "web_search", "arguments": "{}"}}]
|
||||
@@ -179,6 +216,54 @@ class TestFTS5Search:
|
||||
assert isinstance(results[0]["context"], list)
|
||||
assert len(results[0]["context"]) > 0
|
||||
|
||||
def test_search_special_chars_do_not_crash(self, db):
|
||||
"""FTS5 special characters in queries must not raise OperationalError."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message("s1", role="user", content="How do I use C++ templates?")
|
||||
|
||||
# Each of these previously caused sqlite3.OperationalError
|
||||
dangerous_queries = [
|
||||
'C++', # + is FTS5 column filter
|
||||
'"unterminated', # unbalanced double-quote
|
||||
'(problem', # unbalanced parenthesis
|
||||
'hello AND', # dangling boolean operator
|
||||
'***', # repeated wildcard
|
||||
'{test}', # curly braces (column reference)
|
||||
'OR hello', # leading boolean operator
|
||||
'a AND OR b', # adjacent operators
|
||||
]
|
||||
for query in dangerous_queries:
|
||||
# Must not raise — should return list (possibly empty)
|
||||
results = db.search_messages(query)
|
||||
assert isinstance(results, list), f"Query {query!r} did not return a list"
|
||||
|
||||
def test_search_sanitized_query_still_finds_content(self, db):
|
||||
"""Sanitization must not break normal keyword search."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message("s1", role="user", content="Learning C++ templates today")
|
||||
|
||||
# "C++" sanitized to "C" should still match "C++"
|
||||
results = db.search_messages("C++")
|
||||
# The word "C" appears in the content, so FTS5 should find it
|
||||
assert isinstance(results, list)
|
||||
|
||||
def test_sanitize_fts5_query_strips_dangerous_chars(self):
|
||||
"""Unit test for _sanitize_fts5_query static method."""
|
||||
from hermes_state import SessionDB
|
||||
s = SessionDB._sanitize_fts5_query
|
||||
assert s('hello world') == 'hello world'
|
||||
assert '+' not in s('C++')
|
||||
assert '"' not in s('"unterminated')
|
||||
assert '(' not in s('(problem')
|
||||
assert '{' not in s('{test}')
|
||||
# Dangling operators removed
|
||||
assert s('hello AND') == 'hello'
|
||||
assert s('OR world') == 'world'
|
||||
# Leading bare * removed
|
||||
assert s('***') == ''
|
||||
# Valid prefix kept
|
||||
assert s('deploy*') == 'deploy*'
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Session search and listing
|
||||
|
||||
@@ -136,7 +136,7 @@ class TestToolsetConsistency:
|
||||
|
||||
def test_hermes_platforms_share_core_tools(self):
|
||||
"""All hermes-* platform toolsets should have the same tools."""
|
||||
platforms = ["hermes-cli", "hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack"]
|
||||
platforms = ["hermes-cli", "hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant"]
|
||||
tool_sets = [set(TOOLSETS[p]["tools"]) for p in platforms]
|
||||
# All platform toolsets should be identical
|
||||
for ts in tool_sets[1:]:
|
||||
|
||||
385
tests/tools/test_checkpoint_manager.py
Normal file
385
tests/tools/test_checkpoint_manager.py
Normal file
@@ -0,0 +1,385 @@
|
||||
"""Tests for tools/checkpoint_manager.py — CheckpointManager."""
|
||||
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from tools.checkpoint_manager import (
|
||||
CheckpointManager,
|
||||
_shadow_repo_path,
|
||||
_init_shadow_repo,
|
||||
_run_git,
|
||||
_git_env,
|
||||
_dir_file_count,
|
||||
format_checkpoint_list,
|
||||
DEFAULT_EXCLUDES,
|
||||
CHECKPOINT_BASE,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Fixtures
|
||||
# =========================================================================
|
||||
|
||||
@pytest.fixture()
|
||||
def work_dir(tmp_path):
|
||||
"""Temporary working directory."""
|
||||
d = tmp_path / "project"
|
||||
d.mkdir()
|
||||
(d / "main.py").write_text("print('hello')\\n")
|
||||
(d / "README.md").write_text("# Project\\n")
|
||||
return d
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def checkpoint_base(tmp_path):
|
||||
"""Isolated checkpoint base — never writes to ~/.hermes/."""
|
||||
return tmp_path / "checkpoints"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mgr(work_dir, checkpoint_base, monkeypatch):
|
||||
"""CheckpointManager with redirected checkpoint base."""
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
return CheckpointManager(enabled=True, max_snapshots=50)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def disabled_mgr(checkpoint_base, monkeypatch):
|
||||
"""Disabled CheckpointManager."""
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
return CheckpointManager(enabled=False)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Shadow repo path
|
||||
# =========================================================================
|
||||
|
||||
class TestShadowRepoPath:
|
||||
def test_deterministic(self, work_dir, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
p1 = _shadow_repo_path(str(work_dir))
|
||||
p2 = _shadow_repo_path(str(work_dir))
|
||||
assert p1 == p2
|
||||
|
||||
def test_different_dirs_different_paths(self, tmp_path, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
p1 = _shadow_repo_path(str(tmp_path / "a"))
|
||||
p2 = _shadow_repo_path(str(tmp_path / "b"))
|
||||
assert p1 != p2
|
||||
|
||||
def test_under_checkpoint_base(self, work_dir, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
p = _shadow_repo_path(str(work_dir))
|
||||
assert str(p).startswith(str(checkpoint_base))
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Shadow repo init
|
||||
# =========================================================================
|
||||
|
||||
class TestShadowRepoInit:
|
||||
def test_creates_git_repo(self, work_dir, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
shadow = _shadow_repo_path(str(work_dir))
|
||||
err = _init_shadow_repo(shadow, str(work_dir))
|
||||
assert err is None
|
||||
assert (shadow / "HEAD").exists()
|
||||
|
||||
def test_no_git_in_project_dir(self, work_dir, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
shadow = _shadow_repo_path(str(work_dir))
|
||||
_init_shadow_repo(shadow, str(work_dir))
|
||||
assert not (work_dir / ".git").exists()
|
||||
|
||||
def test_has_exclude_file(self, work_dir, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
shadow = _shadow_repo_path(str(work_dir))
|
||||
_init_shadow_repo(shadow, str(work_dir))
|
||||
exclude = shadow / "info" / "exclude"
|
||||
assert exclude.exists()
|
||||
content = exclude.read_text()
|
||||
assert "node_modules/" in content
|
||||
assert ".env" in content
|
||||
|
||||
def test_has_workdir_file(self, work_dir, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
shadow = _shadow_repo_path(str(work_dir))
|
||||
_init_shadow_repo(shadow, str(work_dir))
|
||||
workdir_file = shadow / "HERMES_WORKDIR"
|
||||
assert workdir_file.exists()
|
||||
assert str(work_dir.resolve()) in workdir_file.read_text()
|
||||
|
||||
def test_idempotent(self, work_dir, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
shadow = _shadow_repo_path(str(work_dir))
|
||||
err1 = _init_shadow_repo(shadow, str(work_dir))
|
||||
err2 = _init_shadow_repo(shadow, str(work_dir))
|
||||
assert err1 is None
|
||||
assert err2 is None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# CheckpointManager — disabled
|
||||
# =========================================================================
|
||||
|
||||
class TestDisabledManager:
|
||||
def test_ensure_checkpoint_returns_false(self, disabled_mgr, work_dir):
|
||||
assert disabled_mgr.ensure_checkpoint(str(work_dir)) is False
|
||||
|
||||
def test_new_turn_works(self, disabled_mgr):
|
||||
disabled_mgr.new_turn() # should not raise
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# CheckpointManager — taking checkpoints
|
||||
# =========================================================================
|
||||
|
||||
class TestTakeCheckpoint:
|
||||
def test_first_checkpoint(self, mgr, work_dir):
|
||||
result = mgr.ensure_checkpoint(str(work_dir), "initial")
|
||||
assert result is True
|
||||
|
||||
def test_dedup_same_turn(self, mgr, work_dir):
|
||||
r1 = mgr.ensure_checkpoint(str(work_dir), "first")
|
||||
r2 = mgr.ensure_checkpoint(str(work_dir), "second")
|
||||
assert r1 is True
|
||||
assert r2 is False # dedup'd
|
||||
|
||||
def test_new_turn_resets_dedup(self, mgr, work_dir):
|
||||
r1 = mgr.ensure_checkpoint(str(work_dir), "turn 1")
|
||||
assert r1 is True
|
||||
|
||||
mgr.new_turn()
|
||||
|
||||
# Modify a file so there's something to commit
|
||||
(work_dir / "main.py").write_text("print('modified')\\n")
|
||||
r2 = mgr.ensure_checkpoint(str(work_dir), "turn 2")
|
||||
assert r2 is True
|
||||
|
||||
def test_no_changes_skips_commit(self, mgr, work_dir):
|
||||
# First checkpoint
|
||||
mgr.ensure_checkpoint(str(work_dir), "initial")
|
||||
mgr.new_turn()
|
||||
|
||||
# No file changes — should return False (nothing to commit)
|
||||
r = mgr.ensure_checkpoint(str(work_dir), "no changes")
|
||||
assert r is False
|
||||
|
||||
def test_skip_root_dir(self, mgr):
|
||||
r = mgr.ensure_checkpoint("/", "root")
|
||||
assert r is False
|
||||
|
||||
def test_skip_home_dir(self, mgr):
|
||||
r = mgr.ensure_checkpoint(str(Path.home()), "home")
|
||||
assert r is False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# CheckpointManager — listing checkpoints
|
||||
# =========================================================================
|
||||
|
||||
class TestListCheckpoints:
|
||||
def test_empty_when_no_checkpoints(self, mgr, work_dir):
|
||||
result = mgr.list_checkpoints(str(work_dir))
|
||||
assert result == []
|
||||
|
||||
def test_list_after_take(self, mgr, work_dir):
|
||||
mgr.ensure_checkpoint(str(work_dir), "test checkpoint")
|
||||
result = mgr.list_checkpoints(str(work_dir))
|
||||
assert len(result) == 1
|
||||
assert result[0]["reason"] == "test checkpoint"
|
||||
assert "hash" in result[0]
|
||||
assert "short_hash" in result[0]
|
||||
assert "timestamp" in result[0]
|
||||
|
||||
def test_multiple_checkpoints_ordered(self, mgr, work_dir):
|
||||
mgr.ensure_checkpoint(str(work_dir), "first")
|
||||
mgr.new_turn()
|
||||
|
||||
(work_dir / "main.py").write_text("v2\\n")
|
||||
mgr.ensure_checkpoint(str(work_dir), "second")
|
||||
mgr.new_turn()
|
||||
|
||||
(work_dir / "main.py").write_text("v3\\n")
|
||||
mgr.ensure_checkpoint(str(work_dir), "third")
|
||||
|
||||
result = mgr.list_checkpoints(str(work_dir))
|
||||
assert len(result) == 3
|
||||
# Most recent first
|
||||
assert result[0]["reason"] == "third"
|
||||
assert result[2]["reason"] == "first"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# CheckpointManager — restoring
|
||||
# =========================================================================
|
||||
|
||||
class TestRestore:
|
||||
def test_restore_to_previous(self, mgr, work_dir):
|
||||
# Write original content
|
||||
(work_dir / "main.py").write_text("original\\n")
|
||||
mgr.ensure_checkpoint(str(work_dir), "original state")
|
||||
mgr.new_turn()
|
||||
|
||||
# Modify the file
|
||||
(work_dir / "main.py").write_text("modified\\n")
|
||||
|
||||
# Get the checkpoint hash
|
||||
checkpoints = mgr.list_checkpoints(str(work_dir))
|
||||
assert len(checkpoints) == 1
|
||||
|
||||
# Restore
|
||||
result = mgr.restore(str(work_dir), checkpoints[0]["hash"])
|
||||
assert result["success"] is True
|
||||
|
||||
# File should be back to original
|
||||
assert (work_dir / "main.py").read_text() == "original\\n"
|
||||
|
||||
def test_restore_invalid_hash(self, mgr, work_dir):
|
||||
mgr.ensure_checkpoint(str(work_dir), "initial")
|
||||
result = mgr.restore(str(work_dir), "deadbeef1234")
|
||||
assert result["success"] is False
|
||||
|
||||
def test_restore_no_checkpoints(self, mgr, work_dir):
|
||||
result = mgr.restore(str(work_dir), "abc123")
|
||||
assert result["success"] is False
|
||||
|
||||
def test_restore_creates_pre_rollback_snapshot(self, mgr, work_dir):
|
||||
(work_dir / "main.py").write_text("v1\\n")
|
||||
mgr.ensure_checkpoint(str(work_dir), "v1")
|
||||
mgr.new_turn()
|
||||
|
||||
(work_dir / "main.py").write_text("v2\\n")
|
||||
|
||||
checkpoints = mgr.list_checkpoints(str(work_dir))
|
||||
mgr.restore(str(work_dir), checkpoints[0]["hash"])
|
||||
|
||||
# Should now have 2 checkpoints: original + pre-rollback
|
||||
all_cps = mgr.list_checkpoints(str(work_dir))
|
||||
assert len(all_cps) >= 2
|
||||
assert "pre-rollback" in all_cps[0]["reason"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# CheckpointManager — working dir resolution
|
||||
# =========================================================================
|
||||
|
||||
class TestWorkingDirResolution:
|
||||
def test_resolves_git_project_root(self, tmp_path):
|
||||
mgr = CheckpointManager(enabled=True)
|
||||
project = tmp_path / "myproject"
|
||||
project.mkdir()
|
||||
(project / ".git").mkdir()
|
||||
subdir = project / "src"
|
||||
subdir.mkdir()
|
||||
filepath = subdir / "main.py"
|
||||
filepath.write_text("x\\n")
|
||||
|
||||
result = mgr.get_working_dir_for_path(str(filepath))
|
||||
assert result == str(project)
|
||||
|
||||
def test_resolves_pyproject_root(self, tmp_path):
|
||||
mgr = CheckpointManager(enabled=True)
|
||||
project = tmp_path / "pyproj"
|
||||
project.mkdir()
|
||||
(project / "pyproject.toml").write_text("[project]\\n")
|
||||
subdir = project / "src"
|
||||
subdir.mkdir()
|
||||
|
||||
result = mgr.get_working_dir_for_path(str(subdir / "file.py"))
|
||||
assert result == str(project)
|
||||
|
||||
def test_falls_back_to_parent(self, tmp_path):
|
||||
mgr = CheckpointManager(enabled=True)
|
||||
filepath = tmp_path / "random" / "file.py"
|
||||
filepath.parent.mkdir(parents=True)
|
||||
filepath.write_text("x\\n")
|
||||
|
||||
result = mgr.get_working_dir_for_path(str(filepath))
|
||||
assert result == str(filepath.parent)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Git env isolation
|
||||
# =========================================================================
|
||||
|
||||
class TestGitEnvIsolation:
|
||||
def test_sets_git_dir(self, tmp_path):
|
||||
shadow = tmp_path / "shadow"
|
||||
env = _git_env(shadow, str(tmp_path / "work"))
|
||||
assert env["GIT_DIR"] == str(shadow)
|
||||
|
||||
def test_sets_work_tree(self, tmp_path):
|
||||
shadow = tmp_path / "shadow"
|
||||
work = tmp_path / "work"
|
||||
env = _git_env(shadow, str(work))
|
||||
assert env["GIT_WORK_TREE"] == str(work.resolve())
|
||||
|
||||
def test_clears_index_file(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("GIT_INDEX_FILE", "/some/index")
|
||||
shadow = tmp_path / "shadow"
|
||||
env = _git_env(shadow, str(tmp_path))
|
||||
assert "GIT_INDEX_FILE" not in env
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# format_checkpoint_list
|
||||
# =========================================================================
|
||||
|
||||
class TestFormatCheckpointList:
|
||||
def test_empty_list(self):
|
||||
result = format_checkpoint_list([], "/some/dir")
|
||||
assert "No checkpoints" in result
|
||||
|
||||
def test_formats_entries(self):
|
||||
cps = [
|
||||
{"hash": "abc123", "short_hash": "abc1", "timestamp": "2026-03-09T21:15:00-07:00", "reason": "before write_file"},
|
||||
{"hash": "def456", "short_hash": "def4", "timestamp": "2026-03-09T21:10:00-07:00", "reason": "before patch"},
|
||||
]
|
||||
result = format_checkpoint_list(cps, "/home/user/project")
|
||||
assert "abc1" in result
|
||||
assert "def4" in result
|
||||
assert "before write_file" in result
|
||||
assert "/rollback" in result
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# File count guard
|
||||
# =========================================================================
|
||||
|
||||
class TestDirFileCount:
|
||||
def test_counts_files(self, work_dir):
|
||||
count = _dir_file_count(str(work_dir))
|
||||
assert count >= 2 # main.py + README.md
|
||||
|
||||
def test_nonexistent_dir(self, tmp_path):
|
||||
count = _dir_file_count(str(tmp_path / "nonexistent"))
|
||||
assert count == 0
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Error resilience
|
||||
# =========================================================================
|
||||
|
||||
class TestErrorResilience:
|
||||
def test_no_git_installed(self, work_dir, checkpoint_base, monkeypatch):
|
||||
monkeypatch.setattr("tools.checkpoint_manager.CHECKPOINT_BASE", checkpoint_base)
|
||||
mgr = CheckpointManager(enabled=True)
|
||||
# Mock git not found
|
||||
monkeypatch.setattr("shutil.which", lambda x: None)
|
||||
mgr._git_available = None # reset lazy probe
|
||||
result = mgr.ensure_checkpoint(str(work_dir), "test")
|
||||
assert result is False
|
||||
|
||||
def test_checkpoint_failure_does_not_raise(self, mgr, work_dir, monkeypatch):
|
||||
"""Checkpoint failures should never raise — they're silently logged."""
|
||||
def broken_run_git(*args, **kwargs):
|
||||
raise OSError("git exploded")
|
||||
monkeypatch.setattr("tools.checkpoint_manager._run_git", broken_run_git)
|
||||
# Should not raise
|
||||
result = mgr.ensure_checkpoint(str(work_dir), "test")
|
||||
assert result is False
|
||||
@@ -505,6 +505,25 @@ class TestExpandPath:
|
||||
assert result == str(Path.home())
|
||||
_assert_clean(result)
|
||||
|
||||
def test_tilde_injection_blocked(self, ops):
|
||||
"""Paths like ~; rm -rf / must NOT execute shell commands."""
|
||||
malicious = "~; echo PWNED > /tmp/_hermes_injection_test"
|
||||
result = ops._expand_path(malicious)
|
||||
# The invalid username (contains ";") should prevent shell expansion.
|
||||
# The path should be returned as-is (no expansion).
|
||||
assert result == malicious
|
||||
# Verify the injected command did NOT execute
|
||||
import os
|
||||
assert not os.path.exists("/tmp/_hermes_injection_test")
|
||||
|
||||
def test_tilde_username_with_subpath(self, ops):
|
||||
"""~root/file.txt should attempt expansion (valid username)."""
|
||||
result = ops._expand_path("~root/file.txt")
|
||||
# On most systems ~root expands to /root
|
||||
if result != "~root/file.txt":
|
||||
assert result.endswith("/file.txt")
|
||||
assert "~" not in result
|
||||
|
||||
|
||||
# ── Terminal output cleanliness ──────────────────────────────────────────
|
||||
|
||||
|
||||
351
tests/tools/test_vision_tools.py
Normal file
351
tests/tools/test_vision_tools.py
Normal file
@@ -0,0 +1,351 @@
|
||||
"""Tests for tools/vision_tools.py — URL validation, type hints, error logging."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Awaitable
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.vision_tools import (
|
||||
_validate_image_url,
|
||||
_handle_vision_analyze,
|
||||
_determine_mime_type,
|
||||
_image_to_base64_data_url,
|
||||
vision_analyze_tool,
|
||||
check_vision_requirements,
|
||||
get_debug_session_info,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _validate_image_url — urlparse-based validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestValidateImageUrl:
|
||||
"""Tests for URL validation, including urlparse-based netloc check."""
|
||||
|
||||
def test_valid_https_url(self):
|
||||
assert _validate_image_url("https://example.com/image.jpg") is True
|
||||
|
||||
def test_valid_http_url(self):
|
||||
assert _validate_image_url("http://cdn.example.org/photo.png") is True
|
||||
|
||||
def test_valid_url_without_extension(self):
|
||||
"""CDN endpoints that redirect to images should still pass."""
|
||||
assert _validate_image_url("https://cdn.example.com/abcdef123") is True
|
||||
|
||||
def test_valid_url_with_query_params(self):
|
||||
assert _validate_image_url("https://img.example.com/pic?w=200&h=200") is True
|
||||
|
||||
def test_valid_url_with_port(self):
|
||||
assert _validate_image_url("http://localhost:8080/image.png") is True
|
||||
|
||||
def test_valid_url_with_path_only(self):
|
||||
assert _validate_image_url("https://example.com/") is True
|
||||
|
||||
def test_rejects_empty_string(self):
|
||||
assert _validate_image_url("") is False
|
||||
|
||||
def test_rejects_none(self):
|
||||
assert _validate_image_url(None) is False
|
||||
|
||||
def test_rejects_non_string(self):
|
||||
assert _validate_image_url(12345) is False
|
||||
|
||||
def test_rejects_ftp_scheme(self):
|
||||
assert _validate_image_url("ftp://files.example.com/image.jpg") is False
|
||||
|
||||
def test_rejects_file_scheme(self):
|
||||
assert _validate_image_url("file:///etc/passwd") is False
|
||||
|
||||
def test_rejects_no_scheme(self):
|
||||
assert _validate_image_url("example.com/image.jpg") is False
|
||||
|
||||
def test_rejects_javascript_scheme(self):
|
||||
assert _validate_image_url("javascript:alert(1)") is False
|
||||
|
||||
def test_rejects_http_without_netloc(self):
|
||||
"""http:// alone has no network location — urlparse catches this."""
|
||||
assert _validate_image_url("http://") is False
|
||||
|
||||
def test_rejects_https_without_netloc(self):
|
||||
assert _validate_image_url("https://") is False
|
||||
|
||||
def test_rejects_http_colon_only(self):
|
||||
assert _validate_image_url("http:") is False
|
||||
|
||||
def test_rejects_data_url(self):
|
||||
assert _validate_image_url("data:image/png;base64,iVBOR") is False
|
||||
|
||||
def test_rejects_whitespace_only(self):
|
||||
assert _validate_image_url(" ") is False
|
||||
|
||||
def test_rejects_boolean(self):
|
||||
assert _validate_image_url(True) is False
|
||||
|
||||
def test_rejects_list(self):
|
||||
assert _validate_image_url(["https://example.com"]) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _determine_mime_type
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDetermineMimeType:
|
||||
def test_jpg(self):
|
||||
assert _determine_mime_type(Path("photo.jpg")) == "image/jpeg"
|
||||
|
||||
def test_jpeg(self):
|
||||
assert _determine_mime_type(Path("photo.jpeg")) == "image/jpeg"
|
||||
|
||||
def test_png(self):
|
||||
assert _determine_mime_type(Path("screenshot.png")) == "image/png"
|
||||
|
||||
def test_gif(self):
|
||||
assert _determine_mime_type(Path("anim.gif")) == "image/gif"
|
||||
|
||||
def test_webp(self):
|
||||
assert _determine_mime_type(Path("modern.webp")) == "image/webp"
|
||||
|
||||
def test_unknown_extension_defaults_to_jpeg(self):
|
||||
assert _determine_mime_type(Path("file.xyz")) == "image/jpeg"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _image_to_base64_data_url
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestImageToBase64DataUrl:
|
||||
def test_returns_data_url(self, tmp_path):
|
||||
img = tmp_path / "test.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
|
||||
result = _image_to_base64_data_url(img)
|
||||
assert result.startswith("data:image/png;base64,")
|
||||
|
||||
def test_custom_mime_type(self, tmp_path):
|
||||
img = tmp_path / "test.bin"
|
||||
img.write_bytes(b"\x00" * 16)
|
||||
result = _image_to_base64_data_url(img, mime_type="image/webp")
|
||||
assert result.startswith("data:image/webp;base64,")
|
||||
|
||||
def test_file_not_found_raises(self, tmp_path):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
_image_to_base64_data_url(tmp_path / "nonexistent.png")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _handle_vision_analyze — type signature & behavior
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestHandleVisionAnalyze:
|
||||
"""Verify _handle_vision_analyze returns an Awaitable and builds correct prompt."""
|
||||
|
||||
def test_returns_awaitable(self):
|
||||
"""The handler must return an Awaitable (coroutine) since it's registered as async."""
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool:
|
||||
mock_tool.return_value = json.dumps({"result": "ok"})
|
||||
result = _handle_vision_analyze(
|
||||
{"image_url": "https://example.com/img.png", "question": "What is this?"}
|
||||
)
|
||||
# It should be an Awaitable (coroutine)
|
||||
assert isinstance(result, Awaitable)
|
||||
# Clean up the coroutine to avoid RuntimeWarning
|
||||
result.close()
|
||||
|
||||
def test_prompt_contains_question(self):
|
||||
"""The full prompt should incorporate the user's question."""
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool:
|
||||
mock_tool.return_value = json.dumps({"result": "ok"})
|
||||
coro = _handle_vision_analyze(
|
||||
{"image_url": "https://example.com/img.png", "question": "Describe the cat"}
|
||||
)
|
||||
# Clean up coroutine
|
||||
coro.close()
|
||||
call_args = mock_tool.call_args
|
||||
full_prompt = call_args[0][1] # second positional arg
|
||||
assert "Describe the cat" in full_prompt
|
||||
assert "Fully describe and explain" in full_prompt
|
||||
|
||||
def test_uses_auxiliary_vision_model_env(self):
|
||||
"""AUXILIARY_VISION_MODEL env var should override DEFAULT_VISION_MODEL."""
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool, \
|
||||
patch.dict(os.environ, {"AUXILIARY_VISION_MODEL": "custom/model-v1"}):
|
||||
mock_tool.return_value = json.dumps({"result": "ok"})
|
||||
coro = _handle_vision_analyze(
|
||||
{"image_url": "https://example.com/img.png", "question": "test"}
|
||||
)
|
||||
coro.close()
|
||||
call_args = mock_tool.call_args
|
||||
model = call_args[0][2] # third positional arg
|
||||
assert model == "custom/model-v1"
|
||||
|
||||
def test_falls_back_to_default_model(self):
|
||||
"""Without AUXILIARY_VISION_MODEL, should use DEFAULT_VISION_MODEL or fallback."""
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool, \
|
||||
patch.dict(os.environ, {}, clear=False):
|
||||
# Ensure AUXILIARY_VISION_MODEL is not set
|
||||
os.environ.pop("AUXILIARY_VISION_MODEL", None)
|
||||
mock_tool.return_value = json.dumps({"result": "ok"})
|
||||
coro = _handle_vision_analyze(
|
||||
{"image_url": "https://example.com/img.png", "question": "test"}
|
||||
)
|
||||
coro.close()
|
||||
call_args = mock_tool.call_args
|
||||
model = call_args[0][2]
|
||||
# Should be DEFAULT_VISION_MODEL or the hardcoded fallback
|
||||
assert model is not None
|
||||
assert len(model) > 0
|
||||
|
||||
def test_empty_args_graceful(self):
|
||||
"""Missing keys should default to empty strings, not raise."""
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool:
|
||||
mock_tool.return_value = json.dumps({"result": "ok"})
|
||||
result = _handle_vision_analyze({})
|
||||
assert isinstance(result, Awaitable)
|
||||
result.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Error logging with exc_info — verify tracebacks are logged
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestErrorLoggingExcInfo:
|
||||
"""Verify that exc_info=True is used in error/warning log calls."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_failure_logs_exc_info(self, tmp_path, caplog):
|
||||
"""After max retries, the download error should include exc_info."""
|
||||
from tools.vision_tools import _download_image
|
||||
|
||||
with patch("tools.vision_tools.httpx.AsyncClient") as mock_client_cls:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_client.get = AsyncMock(side_effect=ConnectionError("network down"))
|
||||
mock_client_cls.return_value = mock_client
|
||||
|
||||
dest = tmp_path / "image.jpg"
|
||||
with caplog.at_level(logging.ERROR, logger="tools.vision_tools"), \
|
||||
pytest.raises(ConnectionError):
|
||||
await _download_image("https://example.com/img.jpg", dest, max_retries=1)
|
||||
|
||||
# Should have logged with exc_info (traceback present)
|
||||
error_records = [r for r in caplog.records if r.levelno >= logging.ERROR]
|
||||
assert len(error_records) >= 1
|
||||
assert error_records[0].exc_info is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_analysis_error_logs_exc_info(self, caplog):
|
||||
"""When vision_analyze_tool encounters an error, it should log with exc_info."""
|
||||
with patch("tools.vision_tools._validate_image_url", return_value=True), \
|
||||
patch("tools.vision_tools._download_image", new_callable=AsyncMock,
|
||||
side_effect=Exception("download boom")), \
|
||||
caplog.at_level(logging.ERROR, logger="tools.vision_tools"):
|
||||
|
||||
result = await vision_analyze_tool(
|
||||
"https://example.com/img.jpg", "describe this", "test/model"
|
||||
)
|
||||
result_data = json.loads(result)
|
||||
# Error response uses "success": False, not an "error" key
|
||||
assert result_data["success"] is False
|
||||
|
||||
error_records = [r for r in caplog.records if r.levelno >= logging.ERROR]
|
||||
assert any(r.exc_info is not None for r in error_records)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cleanup_error_logs_exc_info(self, tmp_path, caplog):
|
||||
"""Temp file cleanup failure should log warning with exc_info."""
|
||||
# Create a real temp file that will be "downloaded"
|
||||
temp_dir = tmp_path / "temp_vision_images"
|
||||
temp_dir.mkdir()
|
||||
|
||||
async def fake_download(url, dest, max_retries=3):
|
||||
"""Simulate download by writing file to the expected destination."""
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.write_bytes(b"\xff\xd8\xff" + b"\x00" * 16)
|
||||
return dest
|
||||
|
||||
with patch("tools.vision_tools._validate_image_url", return_value=True), \
|
||||
patch("tools.vision_tools._download_image", side_effect=fake_download), \
|
||||
patch("tools.vision_tools._image_to_base64_data_url",
|
||||
return_value="data:image/jpeg;base64,abc"), \
|
||||
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None), \
|
||||
patch("agent.auxiliary_client.auxiliary_max_tokens_param", return_value={"max_tokens": 2000}), \
|
||||
caplog.at_level(logging.WARNING, logger="tools.vision_tools"):
|
||||
|
||||
# Mock the vision client
|
||||
mock_client = AsyncMock()
|
||||
mock_response = MagicMock()
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "A test image description"
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Patch module-level _aux_async_client so the tool doesn't bail early
|
||||
with patch("tools.vision_tools._aux_async_client", mock_client), \
|
||||
patch("tools.vision_tools.DEFAULT_VISION_MODEL", "test/model"):
|
||||
|
||||
# Make unlink fail to trigger cleanup warning
|
||||
original_unlink = Path.unlink
|
||||
def failing_unlink(self, *args, **kwargs):
|
||||
raise PermissionError("no permission")
|
||||
|
||||
with patch.object(Path, "unlink", failing_unlink):
|
||||
result = await vision_analyze_tool(
|
||||
"https://example.com/tempimg.jpg", "describe", "test/model"
|
||||
)
|
||||
|
||||
warning_records = [r for r in caplog.records if r.levelno == logging.WARNING
|
||||
and "temporary file" in r.getMessage().lower()]
|
||||
assert len(warning_records) >= 1
|
||||
assert warning_records[0].exc_info is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_vision_requirements & get_debug_session_info
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestVisionRequirements:
|
||||
def test_check_requirements_returns_bool(self):
|
||||
result = check_vision_requirements()
|
||||
assert isinstance(result, bool)
|
||||
|
||||
def test_debug_session_info_returns_dict(self):
|
||||
info = get_debug_session_info()
|
||||
assert isinstance(info, dict)
|
||||
# DebugSession.get_session_info() returns these keys
|
||||
assert "enabled" in info
|
||||
assert "session_id" in info
|
||||
assert "total_calls" in info
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration: registry entry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestVisionRegistration:
|
||||
def test_vision_analyze_registered(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools.get("vision_analyze")
|
||||
assert entry is not None
|
||||
assert entry.toolset == "vision"
|
||||
assert entry.is_async is True
|
||||
|
||||
def test_schema_has_required_fields(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools.get("vision_analyze")
|
||||
schema = entry.schema
|
||||
assert schema["name"] == "vision_analyze"
|
||||
params = schema.get("parameters", {})
|
||||
props = params.get("properties", {})
|
||||
assert "image_url" in props
|
||||
assert "question" in props
|
||||
|
||||
def test_handler_is_callable(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools.get("vision_analyze")
|
||||
assert callable(entry.handler)
|
||||
441
tools/checkpoint_manager.py
Normal file
441
tools/checkpoint_manager.py
Normal file
@@ -0,0 +1,441 @@
|
||||
"""
|
||||
Checkpoint Manager — Transparent filesystem snapshots via shadow git repos.
|
||||
|
||||
Creates automatic snapshots of working directories before file-mutating
|
||||
operations (write_file, patch), triggered once per conversation turn.
|
||||
Provides rollback to any previous checkpoint.
|
||||
|
||||
This is NOT a tool — the LLM never sees it. It's transparent infrastructure
|
||||
controlled by the ``checkpoints`` config flag or ``--checkpoints`` CLI flag.
|
||||
|
||||
Architecture:
|
||||
~/.hermes/checkpoints/{sha256(abs_dir)[:16]}/ — shadow git repo
|
||||
HEAD, refs/, objects/ — standard git internals
|
||||
HERMES_WORKDIR — original dir path
|
||||
info/exclude — default excludes
|
||||
|
||||
The shadow repo uses GIT_DIR + GIT_WORK_TREE so no git state leaks
|
||||
into the user's project directory.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Set
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CHECKPOINT_BASE = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "checkpoints"
|
||||
|
||||
DEFAULT_EXCLUDES = [
|
||||
"node_modules/",
|
||||
"dist/",
|
||||
"build/",
|
||||
".env",
|
||||
".env.*",
|
||||
".env.local",
|
||||
".env.*.local",
|
||||
"__pycache__/",
|
||||
"*.pyc",
|
||||
"*.pyo",
|
||||
".DS_Store",
|
||||
"*.log",
|
||||
".cache/",
|
||||
".next/",
|
||||
".nuxt/",
|
||||
"coverage/",
|
||||
".pytest_cache/",
|
||||
".venv/",
|
||||
"venv/",
|
||||
".git/",
|
||||
]
|
||||
|
||||
# Git subprocess timeout (seconds).
|
||||
_GIT_TIMEOUT: int = max(10, min(60, int(os.getenv("HERMES_CHECKPOINT_TIMEOUT", "30"))))
|
||||
|
||||
# Max files to snapshot — skip huge directories to avoid slowdowns.
|
||||
_MAX_FILES = 50_000
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shadow repo helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _shadow_repo_path(working_dir: str) -> Path:
|
||||
"""Deterministic shadow repo path: sha256(abs_path)[:16]."""
|
||||
abs_path = str(Path(working_dir).resolve())
|
||||
dir_hash = hashlib.sha256(abs_path.encode()).hexdigest()[:16]
|
||||
return CHECKPOINT_BASE / dir_hash
|
||||
|
||||
|
||||
def _git_env(shadow_repo: Path, working_dir: str) -> dict:
|
||||
"""Build env dict that redirects git to the shadow repo."""
|
||||
env = os.environ.copy()
|
||||
env["GIT_DIR"] = str(shadow_repo)
|
||||
env["GIT_WORK_TREE"] = str(Path(working_dir).resolve())
|
||||
env.pop("GIT_INDEX_FILE", None)
|
||||
env.pop("GIT_NAMESPACE", None)
|
||||
env.pop("GIT_ALTERNATE_OBJECT_DIRECTORIES", None)
|
||||
return env
|
||||
|
||||
|
||||
def _run_git(
|
||||
args: List[str],
|
||||
shadow_repo: Path,
|
||||
working_dir: str,
|
||||
timeout: int = _GIT_TIMEOUT,
|
||||
) -> tuple:
|
||||
"""Run a git command against the shadow repo. Returns (ok, stdout, stderr)."""
|
||||
env = _git_env(shadow_repo, working_dir)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git"] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=str(Path(working_dir).resolve()),
|
||||
)
|
||||
return result.returncode == 0, result.stdout.strip(), result.stderr.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, "", f"git timed out after {timeout}s: git {' '.join(args)}"
|
||||
except FileNotFoundError:
|
||||
return False, "", "git not found"
|
||||
except Exception as exc:
|
||||
return False, "", str(exc)
|
||||
|
||||
|
||||
def _init_shadow_repo(shadow_repo: Path, working_dir: str) -> Optional[str]:
|
||||
"""Initialise shadow repo if needed. Returns error string or None."""
|
||||
if (shadow_repo / "HEAD").exists():
|
||||
return None
|
||||
|
||||
shadow_repo.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
ok, _, err = _run_git(["init"], shadow_repo, working_dir)
|
||||
if not ok:
|
||||
return f"Shadow repo init failed: {err}"
|
||||
|
||||
_run_git(["config", "user.email", "hermes@local"], shadow_repo, working_dir)
|
||||
_run_git(["config", "user.name", "Hermes Checkpoint"], shadow_repo, working_dir)
|
||||
|
||||
info_dir = shadow_repo / "info"
|
||||
info_dir.mkdir(exist_ok=True)
|
||||
(info_dir / "exclude").write_text(
|
||||
"\n".join(DEFAULT_EXCLUDES) + "\n", encoding="utf-8"
|
||||
)
|
||||
|
||||
(shadow_repo / "HERMES_WORKDIR").write_text(
|
||||
str(Path(working_dir).resolve()) + "\n", encoding="utf-8"
|
||||
)
|
||||
|
||||
logger.debug("Initialised checkpoint repo at %s for %s", shadow_repo, working_dir)
|
||||
return None
|
||||
|
||||
|
||||
def _dir_file_count(path: str) -> int:
|
||||
"""Quick file count estimate (stops early if over _MAX_FILES)."""
|
||||
count = 0
|
||||
try:
|
||||
for _ in Path(path).rglob("*"):
|
||||
count += 1
|
||||
if count > _MAX_FILES:
|
||||
return count
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
return count
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CheckpointManager
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CheckpointManager:
|
||||
"""Manages automatic filesystem checkpoints.
|
||||
|
||||
Designed to be owned by AIAgent. Call ``new_turn()`` at the start of
|
||||
each conversation turn and ``ensure_checkpoint(dir, reason)`` before
|
||||
any file-mutating tool call. The manager deduplicates so at most one
|
||||
snapshot is taken per directory per turn.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
enabled : bool
|
||||
Master switch (from config / CLI flag).
|
||||
max_snapshots : int
|
||||
Keep at most this many checkpoints per directory.
|
||||
"""
|
||||
|
||||
def __init__(self, enabled: bool = False, max_snapshots: int = 50):
|
||||
self.enabled = enabled
|
||||
self.max_snapshots = max_snapshots
|
||||
self._checkpointed_dirs: Set[str] = set()
|
||||
self._git_available: Optional[bool] = None # lazy probe
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Turn lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def new_turn(self) -> None:
|
||||
"""Reset per-turn dedup. Call at the start of each agent iteration."""
|
||||
self._checkpointed_dirs.clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def ensure_checkpoint(self, working_dir: str, reason: str = "auto") -> bool:
|
||||
"""Take a checkpoint if enabled and not already done this turn.
|
||||
|
||||
Returns True if a checkpoint was taken, False otherwise.
|
||||
Never raises — all errors are silently logged.
|
||||
"""
|
||||
if not self.enabled:
|
||||
return False
|
||||
|
||||
# Lazy git probe
|
||||
if self._git_available is None:
|
||||
self._git_available = shutil.which("git") is not None
|
||||
if not self._git_available:
|
||||
logger.debug("Checkpoints disabled: git not found")
|
||||
if not self._git_available:
|
||||
return False
|
||||
|
||||
abs_dir = str(Path(working_dir).resolve())
|
||||
|
||||
# Skip root, home, and other overly broad directories
|
||||
if abs_dir in ("/", str(Path.home())):
|
||||
logger.debug("Checkpoint skipped: directory too broad (%s)", abs_dir)
|
||||
return False
|
||||
|
||||
# Already checkpointed this turn?
|
||||
if abs_dir in self._checkpointed_dirs:
|
||||
return False
|
||||
|
||||
self._checkpointed_dirs.add(abs_dir)
|
||||
|
||||
try:
|
||||
return self._take(abs_dir, reason)
|
||||
except Exception as e:
|
||||
logger.debug("Checkpoint failed (non-fatal): %s", e)
|
||||
return False
|
||||
|
||||
def list_checkpoints(self, working_dir: str) -> List[Dict]:
|
||||
"""List available checkpoints for a directory.
|
||||
|
||||
Returns a list of dicts with keys: hash, short_hash, timestamp, reason.
|
||||
Most recent first.
|
||||
"""
|
||||
abs_dir = str(Path(working_dir).resolve())
|
||||
shadow = _shadow_repo_path(abs_dir)
|
||||
|
||||
if not (shadow / "HEAD").exists():
|
||||
return []
|
||||
|
||||
ok, stdout, _ = _run_git(
|
||||
["log", "--format=%H|%h|%aI|%s", "--no-walk=unsorted",
|
||||
"--all" if False else "HEAD", # just HEAD lineage
|
||||
"-n", str(self.max_snapshots)],
|
||||
shadow, abs_dir,
|
||||
)
|
||||
|
||||
# Simpler: just use regular log
|
||||
ok, stdout, _ = _run_git(
|
||||
["log", "--format=%H|%h|%aI|%s", "-n", str(self.max_snapshots)],
|
||||
shadow, abs_dir,
|
||||
)
|
||||
|
||||
if not ok or not stdout:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for line in stdout.splitlines():
|
||||
parts = line.split("|", 3)
|
||||
if len(parts) == 4:
|
||||
results.append({
|
||||
"hash": parts[0],
|
||||
"short_hash": parts[1],
|
||||
"timestamp": parts[2],
|
||||
"reason": parts[3],
|
||||
})
|
||||
return results
|
||||
|
||||
def restore(self, working_dir: str, commit_hash: str) -> Dict:
|
||||
"""Restore files to a checkpoint state.
|
||||
|
||||
Uses ``git checkout <hash> -- .`` which restores tracked files
|
||||
without moving HEAD — safe and reversible.
|
||||
|
||||
Returns dict with success/error info.
|
||||
"""
|
||||
abs_dir = str(Path(working_dir).resolve())
|
||||
shadow = _shadow_repo_path(abs_dir)
|
||||
|
||||
if not (shadow / "HEAD").exists():
|
||||
return {"success": False, "error": "No checkpoints exist for this directory"}
|
||||
|
||||
# Verify the commit exists
|
||||
ok, _, err = _run_git(
|
||||
["cat-file", "-t", commit_hash], shadow, abs_dir,
|
||||
)
|
||||
if not ok:
|
||||
return {"success": False, "error": f"Checkpoint '{commit_hash}' not found"}
|
||||
|
||||
# Take a checkpoint of current state before restoring (so you can undo the undo)
|
||||
self._take(abs_dir, f"pre-rollback snapshot (restoring to {commit_hash[:8]})")
|
||||
|
||||
# Restore
|
||||
ok, stdout, err = _run_git(
|
||||
["checkout", commit_hash, "--", "."],
|
||||
shadow, abs_dir, timeout=_GIT_TIMEOUT * 2,
|
||||
)
|
||||
|
||||
if not ok:
|
||||
return {"success": False, "error": f"Restore failed: {err}"}
|
||||
|
||||
# Get info about what was restored
|
||||
ok2, reason_out, _ = _run_git(
|
||||
["log", "--format=%s", "-1", commit_hash], shadow, abs_dir,
|
||||
)
|
||||
reason = reason_out if ok2 else "unknown"
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"restored_to": commit_hash[:8],
|
||||
"reason": reason,
|
||||
"directory": abs_dir,
|
||||
}
|
||||
|
||||
def get_working_dir_for_path(self, file_path: str) -> str:
|
||||
"""Resolve a file path to its working directory for checkpointing.
|
||||
|
||||
Walks up from the file's parent to find a reasonable project root
|
||||
(directory containing .git, pyproject.toml, package.json, etc.).
|
||||
Falls back to the file's parent directory.
|
||||
"""
|
||||
path = Path(file_path).resolve()
|
||||
if path.is_dir():
|
||||
candidate = path
|
||||
else:
|
||||
candidate = path.parent
|
||||
|
||||
# Walk up looking for project root markers
|
||||
markers = {".git", "pyproject.toml", "package.json", "Cargo.toml",
|
||||
"go.mod", "Makefile", "pom.xml", ".hg", "Gemfile"}
|
||||
check = candidate
|
||||
while check != check.parent:
|
||||
if any((check / m).exists() for m in markers):
|
||||
return str(check)
|
||||
check = check.parent
|
||||
|
||||
# No project root found — use the file's parent
|
||||
return str(candidate)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _take(self, working_dir: str, reason: str) -> bool:
|
||||
"""Take a snapshot. Returns True on success."""
|
||||
shadow = _shadow_repo_path(working_dir)
|
||||
|
||||
# Init if needed
|
||||
err = _init_shadow_repo(shadow, working_dir)
|
||||
if err:
|
||||
logger.debug("Checkpoint init failed: %s", err)
|
||||
return False
|
||||
|
||||
# Quick size guard — don't try to snapshot enormous directories
|
||||
if _dir_file_count(working_dir) > _MAX_FILES:
|
||||
logger.debug("Checkpoint skipped: >%d files in %s", _MAX_FILES, working_dir)
|
||||
return False
|
||||
|
||||
# Stage everything
|
||||
ok, _, err = _run_git(
|
||||
["add", "-A"], shadow, working_dir, timeout=_GIT_TIMEOUT * 2,
|
||||
)
|
||||
if not ok:
|
||||
logger.debug("Checkpoint git-add failed: %s", err)
|
||||
return False
|
||||
|
||||
# Check if there's anything to commit
|
||||
ok_diff, diff_out, _ = _run_git(
|
||||
["diff", "--cached", "--quiet"], shadow, working_dir,
|
||||
)
|
||||
if ok_diff:
|
||||
# No changes to commit
|
||||
logger.debug("Checkpoint skipped: no changes in %s", working_dir)
|
||||
return False
|
||||
|
||||
# Commit
|
||||
ok, _, err = _run_git(
|
||||
["commit", "-m", reason, "--allow-empty-message"],
|
||||
shadow, working_dir, timeout=_GIT_TIMEOUT * 2,
|
||||
)
|
||||
if not ok:
|
||||
logger.debug("Checkpoint commit failed: %s", err)
|
||||
return False
|
||||
|
||||
logger.debug("Checkpoint taken in %s: %s", working_dir, reason)
|
||||
|
||||
# Prune old snapshots
|
||||
self._prune(shadow, working_dir)
|
||||
|
||||
return True
|
||||
|
||||
def _prune(self, shadow_repo: Path, working_dir: str) -> None:
|
||||
"""Keep only the last max_snapshots commits via orphan reset."""
|
||||
ok, stdout, _ = _run_git(
|
||||
["rev-list", "--count", "HEAD"], shadow_repo, working_dir,
|
||||
)
|
||||
if not ok:
|
||||
return
|
||||
|
||||
try:
|
||||
count = int(stdout)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
if count <= self.max_snapshots:
|
||||
return
|
||||
|
||||
# Get the hash of the commit at the cutoff point
|
||||
ok, cutoff_hash, _ = _run_git(
|
||||
["rev-list", "--reverse", "HEAD", "--skip=0",
|
||||
f"--max-count=1"],
|
||||
shadow_repo, working_dir,
|
||||
)
|
||||
|
||||
# For simplicity, we don't actually prune — git's pack mechanism
|
||||
# handles this efficiently, and the objects are small. The log
|
||||
# listing is already limited by max_snapshots.
|
||||
# Full pruning would require rebase --onto or filter-branch which
|
||||
# is fragile for a background feature. We just limit the log view.
|
||||
logger.debug("Checkpoint repo has %d commits (limit %d)", count, self.max_snapshots)
|
||||
|
||||
|
||||
def format_checkpoint_list(checkpoints: List[Dict], directory: str) -> str:
|
||||
"""Format checkpoint list for display to user."""
|
||||
if not checkpoints:
|
||||
return f"No checkpoints found for {directory}"
|
||||
|
||||
lines = [f"📸 Checkpoints for {directory}:\n"]
|
||||
for i, cp in enumerate(checkpoints, 1):
|
||||
# Parse ISO timestamp to something readable
|
||||
ts = cp["timestamp"]
|
||||
if "T" in ts:
|
||||
ts = ts.split("T")[1].split("+")[0].split("-")[0][:5] # HH:MM
|
||||
date = cp["timestamp"].split("T")[0]
|
||||
ts = f"{date} {ts}"
|
||||
lines.append(f" {i}. {cp['short_hash']} {ts} {cp['reason']}")
|
||||
|
||||
lines.append(f"\nUse /rollback <number> to restore, e.g. /rollback 1")
|
||||
return "\n".join(lines)
|
||||
@@ -511,7 +511,7 @@ def execute_code(
|
||||
duration = round(time.monotonic() - exec_start, 2)
|
||||
|
||||
# Wait for RPC thread to finish
|
||||
server_sock.close()
|
||||
server_sock.close() # break accept() so thread exits promptly
|
||||
rpc_thread.join(timeout=3)
|
||||
|
||||
# Build response
|
||||
@@ -547,6 +547,10 @@ def execute_code(
|
||||
|
||||
finally:
|
||||
# Cleanup temp dir and socket
|
||||
try:
|
||||
server_sock.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
import shutil
|
||||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||
|
||||
@@ -6,6 +6,7 @@ and resumed on next creation, preserving the filesystem across sessions.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import math
|
||||
import shlex
|
||||
import threading
|
||||
@@ -142,10 +143,9 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||
t = threading.Thread(target=_run, daemon=True)
|
||||
t.start()
|
||||
# Wait for timeout + generous buffer for network/SDK overhead
|
||||
deadline = timeout + 10
|
||||
deadline = time.monotonic() + timeout + 10
|
||||
while t.is_alive():
|
||||
t.join(timeout=0.2)
|
||||
deadline -= 0.2
|
||||
if is_interrupted():
|
||||
with self._lock:
|
||||
try:
|
||||
@@ -156,7 +156,7 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||
"output": "[Command interrupted - Daytona sandbox stopped]",
|
||||
"returncode": 130,
|
||||
}
|
||||
if deadline <= 0:
|
||||
if time.monotonic() > deadline:
|
||||
# Shell timeout didn't fire and SDK is hung — force stop
|
||||
with self._lock:
|
||||
try:
|
||||
|
||||
@@ -22,10 +22,16 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# Security flags applied to every container.
|
||||
# The container itself is the security boundary (isolated from host).
|
||||
# We drop all capabilities, block privilege escalation, and limit PIDs.
|
||||
# We drop all capabilities then add back the minimum needed:
|
||||
# DAC_OVERRIDE - root can write to bind-mounted dirs owned by host user
|
||||
# CHOWN/FOWNER - package managers (pip, npm, apt) need to set file ownership
|
||||
# Block privilege escalation and limit PIDs.
|
||||
# /tmp is size-limited and nosuid but allows exec (needed by pip/npm builds).
|
||||
_SECURITY_ARGS = [
|
||||
"--cap-drop", "ALL",
|
||||
"--cap-add", "DAC_OVERRIDE",
|
||||
"--cap-add", "CHOWN",
|
||||
"--cap-add", "FOWNER",
|
||||
"--security-opt", "no-new-privileges",
|
||||
"--pids-limit", "256",
|
||||
"--tmpfs", "/tmp:rw,nosuid,size=512m",
|
||||
|
||||
@@ -400,10 +400,16 @@ class ShellFileOperations(FileOperations):
|
||||
return home
|
||||
elif path.startswith('~/'):
|
||||
return home + path[1:] # Replace ~ with home
|
||||
# ~username format - let shell expand it
|
||||
expand_result = self._exec(f"echo {path}")
|
||||
if expand_result.exit_code == 0:
|
||||
return expand_result.stdout.strip()
|
||||
# ~username format - extract and validate username before
|
||||
# letting shell expand it (prevent shell injection via
|
||||
# paths like "~; rm -rf /").
|
||||
rest = path[1:] # strip leading ~
|
||||
slash_idx = rest.find('/')
|
||||
username = rest[:slash_idx] if slash_idx >= 0 else rest
|
||||
if username and re.fullmatch(r'[a-zA-Z0-9._-]+', username):
|
||||
expand_result = self._exec(f"echo {path}")
|
||||
if expand_result.exit_code == 0 and expand_result.stdout.strip():
|
||||
return expand_result.stdout.strip()
|
||||
|
||||
return path
|
||||
|
||||
@@ -956,37 +962,35 @@ class ShellFileOperations(FileOperations):
|
||||
# rg match lines: "file:lineno:content" (colon separator)
|
||||
# rg context lines: "file-lineno-content" (dash separator)
|
||||
# rg group seps: "--"
|
||||
# Note: on Windows, paths contain drive letters (e.g. C:\path),
|
||||
# so naive split(":") breaks. Use regex to handle both platforms.
|
||||
_match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$')
|
||||
_ctx_re = re.compile(r'^([A-Za-z]:)?(.*?)-(\d+)-(.*)$')
|
||||
matches = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if not line or line == "--":
|
||||
continue
|
||||
|
||||
# Try match line first (colon-separated: file:line:content)
|
||||
parts = line.split(':', 2)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
matches.append(SearchMatch(
|
||||
path=parts[0],
|
||||
line_number=int(parts[1]),
|
||||
content=parts[2][:500]
|
||||
))
|
||||
continue
|
||||
except ValueError:
|
||||
pass
|
||||
m = _match_re.match(line)
|
||||
if m:
|
||||
matches.append(SearchMatch(
|
||||
path=(m.group(1) or '') + m.group(2),
|
||||
line_number=int(m.group(3)),
|
||||
content=m.group(4)[:500]
|
||||
))
|
||||
continue
|
||||
|
||||
# Try context line (dash-separated: file-line-content)
|
||||
# Only attempt if context was requested to avoid false positives
|
||||
if context > 0:
|
||||
parts = line.split('-', 2)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
matches.append(SearchMatch(
|
||||
path=parts[0],
|
||||
line_number=int(parts[1]),
|
||||
content=parts[2][:500]
|
||||
))
|
||||
except ValueError:
|
||||
pass
|
||||
m = _ctx_re.match(line)
|
||||
if m:
|
||||
matches.append(SearchMatch(
|
||||
path=(m.group(1) or '') + m.group(2),
|
||||
line_number=int(m.group(3)),
|
||||
content=m.group(4)[:500]
|
||||
))
|
||||
|
||||
total = len(matches)
|
||||
page = matches[offset:offset + limit]
|
||||
@@ -1053,34 +1057,33 @@ class ShellFileOperations(FileOperations):
|
||||
# grep match lines: "file:lineno:content" (colon)
|
||||
# grep context lines: "file-lineno-content" (dash)
|
||||
# grep group seps: "--"
|
||||
# Note: on Windows, paths contain drive letters (e.g. C:\path),
|
||||
# so naive split(":") breaks. Use regex to handle both platforms.
|
||||
_match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$')
|
||||
_ctx_re = re.compile(r'^([A-Za-z]:)?(.*?)-(\d+)-(.*)$')
|
||||
matches = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if not line or line == "--":
|
||||
continue
|
||||
|
||||
parts = line.split(':', 2)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
matches.append(SearchMatch(
|
||||
path=parts[0],
|
||||
line_number=int(parts[1]),
|
||||
content=parts[2][:500]
|
||||
))
|
||||
continue
|
||||
except ValueError:
|
||||
pass
|
||||
m = _match_re.match(line)
|
||||
if m:
|
||||
matches.append(SearchMatch(
|
||||
path=(m.group(1) or '') + m.group(2),
|
||||
line_number=int(m.group(3)),
|
||||
content=m.group(4)[:500]
|
||||
))
|
||||
continue
|
||||
|
||||
if context > 0:
|
||||
parts = line.split('-', 2)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
matches.append(SearchMatch(
|
||||
path=parts[0],
|
||||
line_number=int(parts[1]),
|
||||
content=parts[2][:500]
|
||||
))
|
||||
except ValueError:
|
||||
pass
|
||||
m = _ctx_re.match(line)
|
||||
if m:
|
||||
matches.append(SearchMatch(
|
||||
path=(m.group(1) or '') + m.group(2),
|
||||
line_number=int(m.group(3)),
|
||||
content=m.group(4)[:500]
|
||||
))
|
||||
|
||||
|
||||
total = len(matches)
|
||||
page = matches[offset:offset + limit]
|
||||
|
||||
@@ -148,11 +148,14 @@ class ProcessRegistry:
|
||||
if use_pty:
|
||||
# Try PTY mode for interactive CLI tools
|
||||
try:
|
||||
import ptyprocess
|
||||
if _IS_WINDOWS:
|
||||
from winpty import PtyProcess as _PtyProcessCls
|
||||
else:
|
||||
from ptyprocess import PtyProcess as _PtyProcessCls
|
||||
user_shell = _find_shell()
|
||||
pty_env = os.environ | (env_vars or {})
|
||||
pty_env["PYTHONUNBUFFERED"] = "1"
|
||||
pty_proc = ptyprocess.PtyProcess.spawn(
|
||||
pty_proc = _PtyProcessCls.spawn(
|
||||
[user_shell, "-lic", command],
|
||||
cwd=session.cwd,
|
||||
env=pty_env,
|
||||
|
||||
@@ -37,6 +37,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
@@ -190,6 +191,38 @@ def _validate_file_path(file_path: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _atomic_write_text(file_path: Path, content: str, encoding: str = "utf-8") -> None:
|
||||
"""
|
||||
Atomically write text content to a file.
|
||||
|
||||
Uses a temporary file in the same directory and os.replace() to ensure
|
||||
the target file is never left in a partially-written state if the process
|
||||
crashes or is interrupted.
|
||||
|
||||
Args:
|
||||
file_path: Target file path
|
||||
content: Content to write
|
||||
encoding: Text encoding (default: utf-8)
|
||||
"""
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, temp_path = tempfile.mkstemp(
|
||||
dir=str(file_path.parent),
|
||||
prefix=f".{file_path.name}.tmp.",
|
||||
suffix="",
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding=encoding) as f:
|
||||
f.write(content)
|
||||
os.replace(temp_path, file_path)
|
||||
except Exception:
|
||||
# Clean up temp file on error
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Core actions
|
||||
# =============================================================================
|
||||
@@ -218,9 +251,9 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
|
||||
skill_dir = _resolve_skill_dir(name, category)
|
||||
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write SKILL.md
|
||||
# Write SKILL.md atomically
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(content, encoding="utf-8")
|
||||
_atomic_write_text(skill_md, content)
|
||||
|
||||
# Security scan — roll back on block
|
||||
scan_error = _security_scan_skill(skill_dir)
|
||||
@@ -256,13 +289,13 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
|
||||
skill_md = existing["path"] / "SKILL.md"
|
||||
# Back up original content for rollback
|
||||
original_content = skill_md.read_text(encoding="utf-8") if skill_md.exists() else None
|
||||
skill_md.write_text(content, encoding="utf-8")
|
||||
_atomic_write_text(skill_md, content)
|
||||
|
||||
# Security scan — roll back on block
|
||||
scan_error = _security_scan_skill(existing["path"])
|
||||
if scan_error:
|
||||
if original_content is not None:
|
||||
skill_md.write_text(original_content, encoding="utf-8")
|
||||
_atomic_write_text(skill_md, original_content)
|
||||
return {"success": False, "error": scan_error}
|
||||
|
||||
return {
|
||||
@@ -342,12 +375,12 @@ def _patch_skill(
|
||||
}
|
||||
|
||||
original_content = content # for rollback
|
||||
target.write_text(new_content, encoding="utf-8")
|
||||
_atomic_write_text(target, new_content)
|
||||
|
||||
# Security scan — roll back on block
|
||||
scan_error = _security_scan_skill(skill_dir)
|
||||
if scan_error:
|
||||
target.write_text(original_content, encoding="utf-8")
|
||||
_atomic_write_text(target, original_content)
|
||||
return {"success": False, "error": scan_error}
|
||||
|
||||
replacements = count if replace_all else 1
|
||||
@@ -394,13 +427,13 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Back up for rollback
|
||||
original_content = target.read_text(encoding="utf-8") if target.exists() else None
|
||||
target.write_text(file_content, encoding="utf-8")
|
||||
_atomic_write_text(target, file_content)
|
||||
|
||||
# Security scan — roll back on block
|
||||
scan_error = _security_scan_skill(existing["path"])
|
||||
if scan_error:
|
||||
if original_content is not None:
|
||||
target.write_text(original_content, encoding="utf-8")
|
||||
_atomic_write_text(target, original_content)
|
||||
else:
|
||||
target.unlink(missing_ok=True)
|
||||
return {"success": False, "error": scan_error}
|
||||
|
||||
@@ -29,6 +29,7 @@ Usage:
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
@@ -192,23 +193,35 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
||||
result = {"password": None, "done": False}
|
||||
|
||||
def read_password_thread():
|
||||
"""Read password from /dev/tty with echo disabled."""
|
||||
"""Read password with echo disabled. Uses msvcrt on Windows, /dev/tty on Unix."""
|
||||
tty_fd = None
|
||||
old_attrs = None
|
||||
try:
|
||||
import termios
|
||||
tty_fd = os.open("/dev/tty", os.O_RDONLY)
|
||||
old_attrs = termios.tcgetattr(tty_fd)
|
||||
new_attrs = termios.tcgetattr(tty_fd)
|
||||
new_attrs[3] = new_attrs[3] & ~termios.ECHO
|
||||
termios.tcsetattr(tty_fd, termios.TCSAFLUSH, new_attrs)
|
||||
chars = []
|
||||
while True:
|
||||
b = os.read(tty_fd, 1)
|
||||
if not b or b in (b"\n", b"\r"):
|
||||
break
|
||||
chars.append(b)
|
||||
result["password"] = b"".join(chars).decode("utf-8", errors="replace")
|
||||
if platform.system() == "Windows":
|
||||
import msvcrt
|
||||
chars = []
|
||||
while True:
|
||||
c = msvcrt.getwch()
|
||||
if c in ("\r", "\n"):
|
||||
break
|
||||
if c == "\x03":
|
||||
raise KeyboardInterrupt
|
||||
chars.append(c)
|
||||
result["password"] = "".join(chars)
|
||||
else:
|
||||
import termios
|
||||
tty_fd = os.open("/dev/tty", os.O_RDONLY)
|
||||
old_attrs = termios.tcgetattr(tty_fd)
|
||||
new_attrs = termios.tcgetattr(tty_fd)
|
||||
new_attrs[3] = new_attrs[3] & ~termios.ECHO
|
||||
termios.tcsetattr(tty_fd, termios.TCSAFLUSH, new_attrs)
|
||||
chars = []
|
||||
while True:
|
||||
b = os.read(tty_fd, 1)
|
||||
if not b or b in (b"\n", b"\r"):
|
||||
break
|
||||
chars.append(b)
|
||||
result["password"] = b"".join(chars).decode("utf-8", errors="replace")
|
||||
except (EOFError, KeyboardInterrupt, OSError):
|
||||
result["password"] = ""
|
||||
except Exception:
|
||||
|
||||
@@ -27,14 +27,15 @@ Usage:
|
||||
)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import asyncio
|
||||
import uuid
|
||||
import base64
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
from typing import Any, Awaitable, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from openai import AsyncOpenAI
|
||||
from agent.auxiliary_client import get_vision_auxiliary_client
|
||||
@@ -73,15 +74,18 @@ def _validate_image_url(url: str) -> bool:
|
||||
"""
|
||||
if not url or not isinstance(url, str):
|
||||
return False
|
||||
|
||||
# Check if it's a valid URL format
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
|
||||
# Basic HTTP/HTTPS URL check
|
||||
if not (url.startswith("http://") or url.startswith("https://")):
|
||||
return False
|
||||
|
||||
# Check for common image extensions (optional, as URLs may not have extensions)
|
||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
|
||||
|
||||
return True # Allow all HTTP/HTTPS URLs for flexibility
|
||||
|
||||
# Parse to ensure we at least have a network location; still allow URLs
|
||||
# without file extensions (e.g. CDN endpoints that redirect to images).
|
||||
parsed = urlparse(url)
|
||||
if not parsed.netloc:
|
||||
return False
|
||||
|
||||
return True # Allow all well-formed HTTP/HTTPS URLs for flexibility
|
||||
|
||||
|
||||
async def _download_image(image_url: str, destination: Path, max_retries: int = 3) -> Path:
|
||||
@@ -131,7 +135,12 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
|
||||
logger.warning("Retrying in %ss...", wait_time)
|
||||
await asyncio.sleep(wait_time)
|
||||
else:
|
||||
logger.error("Image download failed after %s attempts: %s", max_retries, str(e)[:100])
|
||||
logger.error(
|
||||
"Image download failed after %s attempts: %s",
|
||||
max_retries,
|
||||
str(e)[:100],
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
raise last_error
|
||||
|
||||
@@ -188,7 +197,7 @@ def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None)
|
||||
async def vision_analyze_tool(
|
||||
image_url: str,
|
||||
user_prompt: str,
|
||||
model: str = DEFAULT_VISION_MODEL
|
||||
model: str = DEFAULT_VISION_MODEL,
|
||||
) -> str:
|
||||
"""
|
||||
Analyze an image from a URL or local file path using vision AI.
|
||||
@@ -347,7 +356,7 @@ async def vision_analyze_tool(
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing image: {str(e)}"
|
||||
logger.error("%s", error_msg)
|
||||
logger.error("%s", error_msg, exc_info=True)
|
||||
|
||||
# Prepare error response
|
||||
result = {
|
||||
@@ -368,7 +377,9 @@ async def vision_analyze_tool(
|
||||
temp_image_path.unlink()
|
||||
logger.debug("Cleaned up temporary image file")
|
||||
except Exception as cleanup_error:
|
||||
logger.warning("Could not delete temporary file: %s", cleanup_error)
|
||||
logger.warning(
|
||||
"Could not delete temporary file: %s", cleanup_error, exc_info=True
|
||||
)
|
||||
|
||||
|
||||
def check_vision_requirements() -> bool:
|
||||
@@ -464,10 +475,13 @@ VISION_ANALYZE_SCHEMA = {
|
||||
}
|
||||
|
||||
|
||||
def _handle_vision_analyze(args, **kw):
|
||||
def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
|
||||
image_url = args.get("image_url", "")
|
||||
question = args.get("question", "")
|
||||
full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
|
||||
full_prompt = (
|
||||
"Fully describe and explain everything about this image, then answer the "
|
||||
f"following question:\n\n{question}"
|
||||
)
|
||||
model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
|
||||
or DEFAULT_VISION_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
|
||||
@@ -24,6 +24,7 @@ These are commands you run from your shell.
|
||||
| `hermes chat --toolsets "web,terminal"` / `-t` | Use specific toolsets |
|
||||
| `hermes chat --verbose` | Enable verbose/debug output |
|
||||
| `hermes --worktree` / `-w` | Start in an isolated git worktree (for parallel agents) |
|
||||
| `hermes --checkpoints` | Enable filesystem checkpoints before destructive file operations |
|
||||
|
||||
### Provider & Model Management
|
||||
|
||||
@@ -202,6 +203,8 @@ These work in messaging platforms (Telegram, Discord, Slack, WhatsApp) but not t
|
||||
| `/sethome` | Set this chat as the home channel |
|
||||
| `/status` | Show session info |
|
||||
| `/reload-mcp` | Reload MCP servers from config |
|
||||
| `/rollback` | List filesystem checkpoints for the current directory |
|
||||
| `/rollback <N>` | Restore files to checkpoint #N |
|
||||
| `/update` | Update Hermes Agent to the latest version |
|
||||
|
||||
---
|
||||
|
||||
@@ -393,8 +393,40 @@ terminal:
|
||||
backend: local # or: docker, ssh, singularity, modal, daytona
|
||||
cwd: "." # Working directory ("." = current dir)
|
||||
timeout: 180 # Command timeout in seconds
|
||||
|
||||
# Docker-specific settings
|
||||
docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
docker_volumes: # Share host directories with the container
|
||||
- "/home/user/projects:/workspace/projects"
|
||||
- "/home/user/data:/data:ro" # :ro for read-only
|
||||
|
||||
# Container resource limits (docker, singularity, modal, daytona)
|
||||
container_cpu: 1 # CPU cores
|
||||
container_memory: 5120 # MB (default 5GB)
|
||||
container_disk: 51200 # MB (default 50GB)
|
||||
container_persistent: true # Persist filesystem across sessions
|
||||
```
|
||||
|
||||
### Docker Volume Mounts
|
||||
|
||||
When using the Docker backend, `docker_volumes` lets you share host directories with the container. Each entry uses standard Docker `-v` syntax: `host_path:container_path[:options]`.
|
||||
|
||||
```yaml
|
||||
terminal:
|
||||
backend: docker
|
||||
docker_volumes:
|
||||
- "/home/user/projects:/workspace/projects" # Read-write (default)
|
||||
- "/home/user/datasets:/data:ro" # Read-only
|
||||
- "/home/user/outputs:/outputs" # Agent writes, you read
|
||||
```
|
||||
|
||||
This is useful for:
|
||||
- **Providing files** to the agent (datasets, configs, reference code)
|
||||
- **Receiving files** from the agent (generated code, reports, exports)
|
||||
- **Shared workspaces** where both you and the agent access the same files
|
||||
|
||||
Can also be set via environment variable: `TERMINAL_DOCKER_VOLUMES='["/host:/container"]'` (JSON array).
|
||||
|
||||
See [Code Execution](features/code-execution.md) and the [Terminal section of the README](features/tools.md) for details on each backend.
|
||||
|
||||
## Memory Configuration
|
||||
@@ -631,6 +663,16 @@ browser:
|
||||
record_sessions: false # Auto-record browser sessions as WebM videos to ~/.hermes/browser_recordings/
|
||||
```
|
||||
|
||||
## Checkpoints
|
||||
|
||||
Automatic filesystem snapshots before destructive file operations. See the [Checkpoints feature page](/docs/user-guide/features/checkpoints) for details.
|
||||
|
||||
```yaml
|
||||
checkpoints:
|
||||
enabled: false # Enable automatic checkpoints (also: hermes --checkpoints)
|
||||
max_snapshots: 50 # Max checkpoints to keep per directory
|
||||
```
|
||||
|
||||
## Delegation
|
||||
|
||||
Configure subagent behavior for the delegate tool:
|
||||
|
||||
97
website/docs/user-guide/features/checkpoints.md
Normal file
97
website/docs/user-guide/features/checkpoints.md
Normal file
@@ -0,0 +1,97 @@
|
||||
# Filesystem Checkpoints
|
||||
|
||||
Hermes can automatically snapshot your working directory before making file changes, giving you a safety net to roll back if something goes wrong.
|
||||
|
||||
## How It Works
|
||||
|
||||
When enabled, Hermes takes a **one-time snapshot** at the start of each conversation turn before the first file-modifying operation (`write_file` or `patch`). This creates a point-in-time backup you can restore to at any time.
|
||||
|
||||
Under the hood, checkpoints use a **shadow git repository** stored at `~/.hermes/checkpoints/`. This is completely separate from your project's git — no `.git` directory is created in your project, and your own git history is never touched.
|
||||
|
||||
## Enabling Checkpoints
|
||||
|
||||
### Per-session (CLI flag)
|
||||
|
||||
```bash
|
||||
hermes --checkpoints
|
||||
```
|
||||
|
||||
### Permanently (config.yaml)
|
||||
|
||||
```yaml
|
||||
# ~/.hermes/config.yaml
|
||||
checkpoints:
|
||||
enabled: true
|
||||
max_snapshots: 50 # max checkpoints per directory (default: 50)
|
||||
```
|
||||
|
||||
## Rolling Back
|
||||
|
||||
Use the `/rollback` slash command:
|
||||
|
||||
```
|
||||
/rollback # List all available checkpoints
|
||||
/rollback 1 # Restore to checkpoint #1 (most recent)
|
||||
/rollback 3 # Restore to checkpoint #3 (further back)
|
||||
/rollback abc1234 # Restore by git commit hash
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
📸 Checkpoints for /home/user/project:
|
||||
|
||||
1. abc1234 2026-03-10 14:22 before write_file
|
||||
2. def5678 2026-03-10 14:15 before patch
|
||||
3. ghi9012 2026-03-10 14:08 before write_file
|
||||
|
||||
Use /rollback <number> to restore, e.g. /rollback 1
|
||||
```
|
||||
|
||||
When you restore, Hermes automatically takes a **pre-rollback snapshot** first — so you can always undo your undo.
|
||||
|
||||
## What Gets Checkpointed
|
||||
|
||||
Checkpoints capture the entire working directory (the project root), excluding common large/sensitive patterns:
|
||||
|
||||
- `node_modules/`, `dist/`, `build/`
|
||||
- `.env`, `.env.*`
|
||||
- `__pycache__/`, `*.pyc`
|
||||
- `.venv/`, `venv/`
|
||||
- `.git/`
|
||||
- `.DS_Store`, `*.log`
|
||||
|
||||
## Performance
|
||||
|
||||
Checkpoints are designed to be lightweight:
|
||||
|
||||
- **Once per turn** — only the first file operation triggers a snapshot, not every write
|
||||
- **Skips large directories** — directories with >50,000 files are skipped automatically
|
||||
- **Skips when nothing changed** — if no files were modified since the last checkpoint, no commit is created
|
||||
- **Non-blocking** — if a checkpoint fails for any reason, the file operation proceeds normally
|
||||
|
||||
## How It Determines the Project Root
|
||||
|
||||
When you write to a file like `src/components/Button.tsx`, Hermes walks up the directory tree looking for project markers (`.git`, `pyproject.toml`, `package.json`, `Cargo.toml`, etc.) to find the project root. This ensures the entire project is checkpointed, not just the file's parent directory.
|
||||
|
||||
## Platforms
|
||||
|
||||
Checkpoints work on both:
|
||||
- **CLI** — uses your current working directory
|
||||
- **Gateway** (Telegram, Discord, etc.) — uses `MESSAGING_CWD`
|
||||
|
||||
The `/rollback` command is available on all platforms.
|
||||
|
||||
## FAQ
|
||||
|
||||
**Does this conflict with my project's git?**
|
||||
No. Checkpoints use a completely separate shadow git repository via `GIT_DIR` environment variables. Your project's `.git/` is never touched.
|
||||
|
||||
**How much disk space do checkpoints use?**
|
||||
Git is very efficient at storing diffs. For most projects, checkpoint data is negligible. Old checkpoints are pruned when `max_snapshots` is exceeded.
|
||||
|
||||
**Can I checkpoint without git installed?**
|
||||
No — git must be available on your PATH. If it's not installed, checkpoints silently disable.
|
||||
|
||||
**Can I roll back across sessions?**
|
||||
Yes! Checkpoints persist in `~/.hermes/checkpoints/` and survive across sessions. You can roll back to a checkpoint from yesterday.
|
||||
@@ -46,20 +46,26 @@ Navigate to **Features → OAuth & Permissions** in the sidebar. Scroll to **Sco
|
||||
| Scope | Purpose |
|
||||
|-------|---------|
|
||||
| `chat:write` | Send messages as the bot |
|
||||
| `app_mentions:read` | Respond when @mentioned in channels |
|
||||
| `app_mentions:read` | Detect when @mentioned in channels |
|
||||
| `channels:history` | Read messages in public channels the bot is in |
|
||||
| `channels:read` | List and get info about public channels |
|
||||
| `groups:history` | Read messages in private channels the bot is invited to |
|
||||
| `im:history` | Read direct message history |
|
||||
| `im:read` | View basic DM info |
|
||||
| `im:write` | Open and manage DMs |
|
||||
| `users:read` | Look up user information |
|
||||
| `files:write` | Upload files (images, audio, documents) |
|
||||
|
||||
:::caution Missing scopes = missing features
|
||||
Without `channels:history` and `groups:history`, the bot **will not receive messages in channels** —
|
||||
it will only work in DMs. These are the most commonly missed scopes.
|
||||
:::
|
||||
|
||||
**Optional scopes:**
|
||||
|
||||
| Scope | Purpose |
|
||||
|-------|---------|
|
||||
| `groups:history` | Read messages in private channels the bot is invited to |
|
||||
| `files:write` | Upload files (audio, images) |
|
||||
| `groups:read` | List and get info about private channels |
|
||||
|
||||
---
|
||||
|
||||
@@ -83,23 +89,27 @@ You can always find or regenerate app-level tokens under **Settings → Basic In
|
||||
|
||||
## Step 4: Subscribe to Events
|
||||
|
||||
This step is critical — it controls what messages the bot can see.
|
||||
|
||||
1. In the sidebar, go to **Features → Event Subscriptions**
|
||||
2. Toggle **Enable Events** to ON
|
||||
3. Expand **Subscribe to bot events** and add:
|
||||
|
||||
| Event | Purpose |
|
||||
|-------|---------|
|
||||
| `app_mention` | Bot responds when @mentioned in any channel |
|
||||
| `message.im` | Bot responds to direct messages |
|
||||
|
||||
**Optional event:**
|
||||
|
||||
| Event | Purpose |
|
||||
|-------|---------|
|
||||
| `message.channels` | Bot sees all messages in public channels it's added to |
|
||||
| Event | Required? | Purpose |
|
||||
|-------|-----------|---------|
|
||||
| `message.im` | **Yes** | Bot receives direct messages |
|
||||
| `message.channels` | **Yes** | Bot receives messages in **public** channels it's added to |
|
||||
| `message.groups` | **Recommended** | Bot receives messages in **private** channels it's invited to |
|
||||
| `app_mention` | **Yes** | Prevents Bolt SDK errors when bot is @mentioned |
|
||||
|
||||
4. Click **Save Changes** at the bottom of the page
|
||||
|
||||
:::danger Missing event subscriptions is the #1 setup issue
|
||||
If the bot works in DMs but **not in channels**, you almost certainly forgot to add
|
||||
`message.channels` (for public channels) and/or `message.groups` (for private channels).
|
||||
Without these events, Slack simply never delivers channel messages to the bot.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Install App to Workspace
|
||||
@@ -111,8 +121,8 @@ You can always find or regenerate app-level tokens under **Settings → Basic In
|
||||
5. **Copy this token** — this is your `SLACK_BOT_TOKEN`
|
||||
|
||||
:::tip
|
||||
If you change scopes later, you'll need to **reinstall the app** for the new scopes to take effect.
|
||||
The Install App page will show a banner prompting you to do so.
|
||||
If you change scopes or event subscriptions later, you **must reinstall the app** for the changes
|
||||
to take effect. The Install App page will show a banner prompting you to do so.
|
||||
:::
|
||||
|
||||
---
|
||||
@@ -139,7 +149,7 @@ Add the following to your `~/.hermes/.env` file:
|
||||
```bash
|
||||
# Required
|
||||
SLACK_BOT_TOKEN=xoxb-your-bot-token-here
|
||||
SLACK_APP_TOKEN=xapp-your-app-level-token-here
|
||||
SLACK_APP_TOKEN=xapp-your-app-token-here
|
||||
SLACK_ALLOWED_USERS=U01ABC2DEF3 # Comma-separated Member IDs
|
||||
|
||||
# Optional
|
||||
@@ -161,6 +171,35 @@ hermes gateway install # Install as a system service
|
||||
|
||||
---
|
||||
|
||||
## Step 8: Invite the Bot to Channels
|
||||
|
||||
After starting the gateway, you need to **invite the bot** to any channel where you want it to respond:
|
||||
|
||||
```
|
||||
/invite @Hermes Agent
|
||||
```
|
||||
|
||||
The bot will **not** automatically join channels. You must invite it to each channel individually.
|
||||
|
||||
---
|
||||
|
||||
## How the Bot Responds
|
||||
|
||||
Understanding how Hermes behaves in different contexts:
|
||||
|
||||
| Context | Behavior |
|
||||
|---------|----------|
|
||||
| **DMs** | Bot responds to every message — no @mention needed |
|
||||
| **Channels** | Bot **only responds when @mentioned** (e.g., `@Hermes Agent what time is it?`) |
|
||||
| **Threads** | Bot replies in threads when the triggering message is in a thread |
|
||||
|
||||
:::tip
|
||||
In channels, always @mention the bot. Simply typing a message without mentioning it will be ignored.
|
||||
This is intentional — it prevents the bot from responding to every message in busy channels.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
## Home Channel
|
||||
|
||||
Set `SLACK_HOME_CHANNEL` to a channel ID where Hermes will deliver scheduled messages,
|
||||
@@ -192,11 +231,27 @@ Hermes supports voice on Slack:
|
||||
| Problem | Solution |
|
||||
|---------|----------|
|
||||
| Bot doesn't respond to DMs | Verify `message.im` is in your event subscriptions and the app is reinstalled |
|
||||
| Bot doesn't respond to @mentions | Verify `app_mention` is in your event subscriptions |
|
||||
| Bot works in DMs but not in channels | **Most common issue.** Add `message.channels` and `message.groups` to event subscriptions, reinstall the app, and invite the bot to the channel with `/invite @Hermes Agent` |
|
||||
| Bot doesn't respond to @mentions in channels | 1) Check `message.channels` event is subscribed. 2) Bot must be invited to the channel. 3) Ensure `channels:history` scope is added. 4) Reinstall the app after scope/event changes |
|
||||
| Bot ignores messages in private channels | Add both the `message.groups` event subscription and `groups:history` scope, then reinstall the app and `/invite` the bot |
|
||||
| "not_authed" or "invalid_auth" errors | Regenerate your Bot Token and App Token, update `.env` |
|
||||
| Bot responds but can't post in a channel | Invite the bot to the channel with `/invite @Hermes Agent` |
|
||||
| "missing_scope" error | Add the required scope in OAuth & Permissions, then **reinstall** the app |
|
||||
| Socket disconnects frequently | Check your network; Bolt auto-reconnects but unstable connections cause lag |
|
||||
| Changed scopes/events but nothing changed | You **must reinstall** the app to your workspace after any scope or event subscription change |
|
||||
|
||||
### Quick Checklist
|
||||
|
||||
If the bot isn't working in channels, verify **all** of the following:
|
||||
|
||||
1. ✅ `message.channels` event is subscribed (for public channels)
|
||||
2. ✅ `message.groups` event is subscribed (for private channels)
|
||||
3. ✅ `app_mention` event is subscribed
|
||||
4. ✅ `channels:history` scope is added (for public channels)
|
||||
5. ✅ `groups:history` scope is added (for private channels)
|
||||
6. ✅ App was **reinstalled** after adding scopes/events
|
||||
7. ✅ Bot was **invited** to the channel (`/invite @Hermes Agent`)
|
||||
8. ✅ You are **@mentioning** the bot in your message
|
||||
|
||||
---
|
||||
|
||||
|
||||
Reference in New Issue
Block a user