Compare commits

..

1 Commits

Author SHA1 Message Date
Teknium
08b97660c5 feat: /context command + /compress focus — inspired by Claude Code
Two features inspired by Claude Code's recent releases (v2.1.89–v2.1.101):

1. /context command (alias: /ctx)
   Shows a live breakdown of context window usage by component:
   - System prompt (identity, memory, skills index, context files, guidance)
   - Tool schemas (count and token estimate)
   - Conversation messages (by role: user, assistant, tool results)
   - Compaction summaries
   - Auto-compress threshold and remaining tokens
   - Visual progress bar

   This gives users visibility into what is consuming their context window,
   matching Claude Code's /context feature.

2. /compress <focus> — guided compression
   The existing /compress command now accepts an optional focus topic:
   /compress database schema
   When provided, the summariser prioritises preserving information related
   to the focus topic (60-70% of summary budget) while being more aggressive
   about compressing everything else.

   Inspired by Claude Code's /compact <focus> feature.

Implementation details:
- /context: new _show_context_breakdown() method in cli.py
- /compress focus: focus_topic flows through _manual_compress → _compress_context
  → ContextCompressor.compress → _generate_summary, where it's appended to the
  LLM summarisation prompt
- 15 new tests covering both features
- No changes to prompt caching, message flow, or system prompt assembly
2026-04-10 17:17:16 -07:00
14 changed files with 610 additions and 486 deletions

View File

@@ -267,13 +267,19 @@ class ContextCompressor:
return "\n\n".join(parts)
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]:
"""Generate a structured summary of conversation turns.
Uses a structured template (Goal, Progress, Decisions, Files, Next Steps)
inspired by Pi-mono and OpenCode. When a previous summary exists,
generates an iterative update instead of summarizing from scratch.
Args:
focus_topic: Optional focus string for guided compression. When
provided, the summariser prioritises preserving information
related to this topic and is more aggressive about compressing
everything else. Inspired by Claude Code's ``/compact``.
Returns None if all attempts fail — the caller should drop
the middle turns without a summary rather than inject a useless
placeholder.
@@ -375,6 +381,14 @@ Target ~{summary_budget} tokens. Be specific — include file paths, command out
Write only the summary body. Do not include any preamble or prefix."""
# Inject focus topic guidance when the user provides one via /compress <focus>.
# This goes at the end of the prompt so it takes precedence.
if focus_topic:
prompt += f"""
FOCUS TOPIC: "{focus_topic}"
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
try:
call_kwargs = {
"task": "compression",
@@ -592,7 +606,7 @@ Write only the summary body. Do not include any preamble or prefix."""
# Main compression entry point
# ------------------------------------------------------------------
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None) -> List[Dict[str, Any]]:
"""Compress conversation messages by summarizing middle turns.
Algorithm:
@@ -604,6 +618,12 @@ Write only the summary body. Do not include any preamble or prefix."""
After compression, orphaned tool_call / tool_result pairs are cleaned
up so the API never receives mismatched IDs.
Args:
focus_topic: Optional focus string for guided compression. When
provided, the summariser will prioritise preserving information
related to this topic and be more aggressive about compressing
everything else. Inspired by Claude Code's ``/compact``.
"""
n_messages = len(messages)
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
@@ -661,7 +681,7 @@ Write only the summary body. Do not include any preamble or prefix."""
)
# Phase 3: Generate structured summary
summary = self._generate_summary(turns_to_summarize)
summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
# Phase 4: Assemble compressed message list
compressed = []

220
cli.py
View File

@@ -4962,7 +4962,9 @@ class HermesCLI:
elif canonical == "fast":
self._handle_fast_command(cmd_original)
elif canonical == "compress":
self._manual_compress()
self._manual_compress(cmd_original)
elif canonical == "context":
self._show_context_breakdown()
elif canonical == "usage":
self._show_usage()
elif canonical == "insights":
@@ -5818,8 +5820,14 @@ class HermesCLI:
self._reasoning_preview_buf = getattr(self, "_reasoning_preview_buf", "") + reasoning_text
self._flush_reasoning_preview(force=False)
def _manual_compress(self):
"""Manually trigger context compression on the current conversation."""
def _manual_compress(self, cmd_original: str = ""):
"""Manually trigger context compression on the current conversation.
Accepts an optional focus topic: ``/compress <focus>`` guides the
summariser to preserve information related to *focus* while being
more aggressive about discarding everything else. Inspired by
Claude Code's ``/compact <focus>`` feature.
"""
if not self.conversation_history or len(self.conversation_history) < 4:
print("(._.) Not enough conversation to compress (need at least 4 messages).")
return
@@ -5832,16 +5840,28 @@ class HermesCLI:
print("(._.) Compression is disabled in config.")
return
# Extract optional focus topic from the command (e.g. "/compress database schema")
focus_topic = ""
if cmd_original:
parts = cmd_original.strip().split(None, 1)
if len(parts) > 1:
focus_topic = parts[1].strip()
original_count = len(self.conversation_history)
try:
from agent.model_metadata import estimate_messages_tokens_rough
approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
if focus_topic:
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
f"focus: \"{focus_topic}\"...")
else:
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
compressed, _new_system = self.agent._compress_context(
self.conversation_history,
self.agent._cached_system_prompt or "",
approx_tokens=approx_tokens,
focus_topic=focus_topic or None,
)
self.conversation_history = compressed
new_count = len(self.conversation_history)
@@ -5854,6 +5874,198 @@ class HermesCLI:
except Exception as e:
print(f" ❌ Compression failed: {e}")
def _show_context_breakdown(self):
"""Show a live breakdown of context window usage by component.
Inspired by Claude Code's ``/context`` command — gives users visibility
into what is consuming their context window (system prompt, memory,
skills, context files, conversation messages, tool results, etc.).
"""
if not self.agent:
print("(._.) No active agent — send a message first.")
return
from agent.model_metadata import (
estimate_tokens_rough,
estimate_messages_tokens_rough,
)
agent = self.agent
compressor = getattr(agent, "context_compressor", None)
context_length = getattr(compressor, "context_length", 0) or 0
if not context_length:
from agent.model_metadata import get_model_context_length
context_length = get_model_context_length(agent.model or "")
# ── System prompt breakdown ────────────────────────────────
system_prompt = getattr(agent, "_cached_system_prompt", "") or ""
system_total = estimate_tokens_rough(system_prompt)
# Attempt to break down the system prompt into its component layers.
# The prompt is assembled by joining parts with "\n\n", so we can
# identify known sections by their content signatures.
components = []
if system_prompt:
from agent.prompt_builder import load_soul_md, DEFAULT_AGENT_IDENTITY
# Identity block
soul = load_soul_md()
if soul and soul[:60] in system_prompt:
identity_tokens = estimate_tokens_rough(soul)
components.append((" Identity (SOUL.md)", identity_tokens))
elif DEFAULT_AGENT_IDENTITY[:40] in system_prompt:
identity_tokens = estimate_tokens_rough(DEFAULT_AGENT_IDENTITY)
components.append((" Identity (built-in)", identity_tokens))
# Memory
mem_store = getattr(agent, "_memory_store", None)
if mem_store:
mem_block = mem_store.format_for_system_prompt("memory")
if mem_block and mem_block[:30] in system_prompt:
components.append((" Memory", estimate_tokens_rough(mem_block)))
user_block = mem_store.format_for_system_prompt("user")
if user_block and user_block[:30] in system_prompt:
components.append((" User profile", estimate_tokens_rough(user_block)))
# Skills
skills_marker = "## Skills (mandatory)"
if skills_marker in system_prompt:
skills_start = system_prompt.index(skills_marker)
# Find the next major section after skills
_next_sections = ["\nConversation started:", "\nYou are running as"]
skills_end = len(system_prompt)
for _sect in _next_sections:
idx = system_prompt.find(_sect, skills_start + 10)
if idx != -1:
skills_end = min(skills_end, idx)
skills_text = system_prompt[skills_start:skills_end]
components.append((" Skills index", estimate_tokens_rough(skills_text)))
# Context files (AGENTS.md, .cursorrules, etc.)
ctx_marker = "# Project Context"
if ctx_marker in system_prompt:
ctx_start = system_prompt.index(ctx_marker)
ctx_text = system_prompt[ctx_start:]
# Trim to just the context files section
for _end_mark in ["\nConversation started:", "\n## Skills"]:
idx = ctx_text.find(_end_mark, 10)
if idx != -1:
ctx_text = ctx_text[:idx]
break
components.append((" Context files", estimate_tokens_rough(ctx_text)))
# Tool-use guidance, platform hints, timestamps — remainder
accounted = sum(t for _, t in components)
remainder = max(0, system_total - accounted)
if remainder > 50:
components.append((" Other (guidance, hints, timestamp)", remainder))
# ── Conversation breakdown ─────────────────────────────────
msgs = self.conversation_history or []
msg_counts = {"user": 0, "assistant": 0, "tool": 0, "system": 0}
msg_tokens = {"user": 0, "assistant": 0, "tool": 0, "system": 0}
tool_result_tokens = 0
tool_call_tokens = 0
compaction_summary_tokens = 0
from agent.context_compressor import SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX
for msg in msgs:
role = msg.get("role", "unknown")
content = msg.get("content", "")
content_str = str(content) if content else ""
tokens = estimate_tokens_rough(content_str)
# Count tool_calls in assistant messages
tool_calls = msg.get("tool_calls")
if tool_calls:
tc_str = str(tool_calls)
tool_call_tokens += estimate_tokens_rough(tc_str)
if role in msg_counts:
msg_counts[role] += 1
msg_tokens[role] += tokens
else:
msg_counts.setdefault(role, 0)
msg_tokens.setdefault(role, 0)
msg_counts[role] += 1
msg_tokens[role] += tokens
if role == "tool":
tool_result_tokens += tokens
# Detect compaction summaries
if content_str and (SUMMARY_PREFIX in content_str or LEGACY_SUMMARY_PREFIX in content_str):
compaction_summary_tokens += tokens
conversation_total = estimate_messages_tokens_rough(msgs)
# ── Tool schemas ───────────────────────────────────────────
tool_schemas_tokens = 0
try:
tool_schemas = getattr(agent, "_cached_tool_schemas", None)
if tool_schemas:
tool_schemas_tokens = estimate_tokens_rough(str(tool_schemas))
except Exception:
pass
# ── Grand total ────────────────────────────────────────────
grand_total = system_total + conversation_total + tool_schemas_tokens
percent = round((grand_total / context_length) * 100) if context_length else 0
# ── Render ─────────────────────────────────────────────────
def _bar(tokens, total, width=20):
if total <= 0:
return ""
filled = max(0, min(width, round((tokens / total) * width)))
return "" * filled + "" * (width - filled)
def _fmt(tokens):
if tokens >= 1000:
return f"{tokens / 1000:.1f}K"
return str(tokens)
print()
model_short = (agent.model or "unknown").split("/")[-1]
print(f"◎ Context Window — {model_short}")
print(f" {_bar(grand_total, context_length, 30)} {_fmt(grand_total)} / {_fmt(context_length)} tokens ({percent}%)")
print()
# System prompt
print(f" ◆ System Prompt {_fmt(system_total):>8}")
for label, toks in components:
print(f" {label:<28} {_fmt(toks):>8}")
# Tool schemas
if tool_schemas_tokens:
n_tools = len(tool_schemas) if tool_schemas else 0
print(f" ◆ Tool Schemas ({n_tools} tools) {_fmt(tool_schemas_tokens):>8}")
# Conversation
total_msgs = sum(msg_counts.values())
print(f" ◆ Conversation ({total_msgs} msgs) {_fmt(conversation_total):>8}")
if msg_counts.get("user", 0):
print(f" User messages ({msg_counts['user']}) {_fmt(msg_tokens['user']):>8}")
if msg_counts.get("assistant", 0):
print(f" Assistant messages ({msg_counts['assistant']}) {_fmt(msg_tokens['assistant']):>8}")
if msg_counts.get("tool", 0):
print(f" Tool results ({msg_counts['tool']}) {_fmt(tool_result_tokens):>8}")
if tool_call_tokens:
print(f" Tool calls {_fmt(tool_call_tokens):>8}")
if compaction_summary_tokens:
print(f" Compaction summaries {_fmt(compaction_summary_tokens):>8}")
# Compression info
compressions = getattr(compressor, "compression_count", 0) or 0
if compressions:
print(f"\n ⚙ Compressions this session: {compressions}")
# Threshold info
if compressor:
threshold = getattr(compressor, "threshold_tokens", 0) or 0
if threshold:
remaining = max(0, threshold - grand_total)
print(f" ⚙ Auto-compress at: ~{_fmt(threshold)} tokens ({_fmt(remaining)} remaining)")
print()
def _show_usage(self):
"""Show rate limits (if available) and session token usage."""
if not self.agent:

View File

@@ -76,15 +76,10 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
except Exception as e:
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
# Platforms that don't support direct channel enumeration get session-based
# discovery automatically. Skip infrastructure entries that aren't messaging
# platforms — everything else falls through to _build_from_sessions().
_SKIP_SESSION_DISCOVERY = frozenset({"local", "api_server", "webhook"})
for plat in Platform:
plat_name = plat.value
if plat_name in _SKIP_SESSION_DISCOVERY or plat_name in platforms:
continue
platforms[plat_name] = _build_from_sessions(plat_name)
# Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
for plat_name in ("telegram", "whatsapp", "signal", "weixin", "email", "sms", "bluebubbles"):
if plat_name not in platforms:
platforms[plat_name] = _build_from_sessions(plat_name)
directory = {
"updated_at": datetime.now().isoformat(),

View File

@@ -69,7 +69,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
args_hint="[name]"),
CommandDef("branch", "Branch the current session (explore a different path)", "Session",
aliases=("fork",), args_hint="[name]"),
CommandDef("compress", "Manually compress conversation context", "Session"),
CommandDef("compress", "Manually compress conversation context", "Session",
args_hint="[focus topic]"),
CommandDef("context", "Show live context window breakdown (token usage per component)",
"Info", aliases=("ctx",)),
CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
args_hint="[number]"),
CommandDef("stop", "Kill all running background processes", "Session"),

View File

@@ -141,68 +141,6 @@ def managed_error(action: str = "modify configuration"):
print(format_managed_message(action), file=sys.stderr)
# =============================================================================
# Container-aware CLI (NixOS container mode)
# =============================================================================
def _is_inside_container() -> bool:
"""Detect if we're already running inside a Docker/Podman container."""
# Standard Docker/Podman indicators
if os.path.exists("/.dockerenv"):
return True
# Podman uses /run/.containerenv
if os.path.exists("/run/.containerenv"):
return True
# Check cgroup for container runtime evidence (works for both Docker & Podman)
try:
with open("/proc/1/cgroup", "r") as f:
cgroup = f.read()
if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
return True
except (OSError, IOError):
pass
return False
def get_container_exec_info() -> Optional[dict]:
"""Read container mode metadata from HERMES_HOME/.container-mode.
Returns a dict with keys: backend, container_name, hermes_bin
or None if container mode is not active or we're already inside the container.
The .container-mode file is written by the NixOS activation script when
container.enable = true. It tells the host CLI to exec into the container
instead of running locally.
"""
if _is_inside_container():
return None
container_mode_file = get_hermes_home() / ".container-mode"
if not container_mode_file.exists():
return None
try:
info = {}
with open(container_mode_file, "r") as f:
for line in f:
line = line.strip()
if "=" in line and not line.startswith("#"):
key, _, value = line.partition("=")
info[key.strip()] = value.strip()
backend = info.get("backend", "docker")
container_name = info.get("container_name", "hermes-agent")
hermes_bin = info.get("hermes_bin", "/data/current-package/bin/hermes")
return {
"backend": backend,
"container_name": container_name,
"hermes_bin": hermes_bin,
}
except (OSError, IOError):
return None
# =============================================================================
# Config paths
# =============================================================================

View File

@@ -528,56 +528,6 @@ def _resolve_last_cli_session() -> Optional[str]:
return None
def _exec_in_container(container_info: dict, cli_args: list):
"""Replace the current process with a command inside the managed container.
Uses os.execvp to hand off to docker/podman exec, preserving the TTY
so the interactive CLI works seamlessly inside the container.
Args:
container_info: dict with backend, container_name, hermes_bin
cli_args: the original CLI arguments (everything after 'hermes')
"""
import shutil
import subprocess
backend = container_info["backend"]
container_name = container_info["container_name"]
hermes_bin = container_info["hermes_bin"]
# Find the container runtime on PATH
runtime = shutil.which(backend)
if not runtime:
print(f"Warning: {backend} not found on PATH, falling back to host CLI.",
file=sys.stderr)
return # Fall through to normal CLI
# Check if the container is actually running
try:
result = subprocess.run(
[runtime, "inspect", "--format", "{{.State.Running}}", container_name],
capture_output=True, text=True, timeout=5
)
if result.returncode != 0 or result.stdout.strip().lower() != "true":
print(f"Warning: container '{container_name}' is not running, falling back to host CLI.",
file=sys.stderr)
return
except (subprocess.TimeoutExpired, OSError):
return # Fall through on any error
# Filter out --host flag from forwarded args (it's not meaningful inside)
forwarded_args = [a for a in cli_args if a != "--host"]
# Build the exec command
exec_cmd = [runtime, "exec", "-it", container_name, hermes_bin] + forwarded_args
print(f"Routing to container '{container_name}' via {backend}...",
file=sys.stderr)
# Replace the current process — this never returns on success
os.execvp(runtime, exec_cmd)
def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
"""Resolve a session name (title) or ID to a session ID.
@@ -606,21 +556,6 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
def cmd_chat(args):
"""Run interactive chat CLI."""
# ── Container-aware routing ──────────────────────────────────────────
# When NixOS container mode is active and we're on the host, exec into
# the managed container instead of running locally. --host bypasses this.
if not getattr(args, "host", False):
try:
from hermes_cli.config import get_container_exec_info
container_info = get_container_exec_info()
if container_info:
_exec_in_container(container_info, sys.argv[1:])
# _exec_in_container calls os.execvp which replaces the process.
# If we get here, the exec failed.
sys.exit(1)
except Exception:
pass # Fall through to normal CLI on any detection error
# Resolve --continue into --resume with the latest CLI session or by name
continue_val = getattr(args, "continue_last", None)
if continue_val and not getattr(args, "resume", None):
@@ -4451,12 +4386,6 @@ For more help on a command:
default=None,
help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists."
)
chat_parser.add_argument(
"--host",
action="store_true",
default=False,
help="Run on the host even when NixOS container mode is active (bypass container exec)"
)
chat_parser.set_defaults(func=cmd_chat)
# =========================================================================

View File

@@ -611,22 +611,6 @@
chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/.managed
chmod 0644 ${cfg.stateDir}/.hermes/.managed
# Container mode metadata tells the host CLI to exec into the
# container instead of running locally. Removed when container mode
# is disabled so the host CLI falls back to native execution.
${if cfg.container.enable then ''
cat > ${cfg.stateDir}/.hermes/.container-mode <<'HERMES_CONTAINER_MODE_EOF'
# Written by NixOS activation script. Do not edit manually.
backend=${cfg.container.backend}
container_name=${containerName}
hermes_bin=${containerDataDir}/current-package/bin/hermes
HERMES_CONTAINER_MODE_EOF
chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/.container-mode
chmod 0644 ${cfg.stateDir}/.hermes/.container-mode
'' else ''
rm -f ${cfg.stateDir}/.hermes/.container-mode
''}
# Seed auth file if provided
${lib.optionalString (cfg.authFile != null) ''
${if cfg.authFileForceOverwrite then ''

View File

@@ -88,10 +88,10 @@ all = [
"hermes-agent[modal]",
"hermes-agent[daytona]",
"hermes-agent[messaging]",
# matrix: python-olm (required by matrix-nio[e2e]) is upstream-broken on
# modern macOS (archived libolm, C++ errors with Clang 21+). On Linux the
# [matrix] extra's own marker pulls in the [e2e] variant automatically.
"hermes-agent[matrix]; sys_platform == 'linux'",
# matrix excluded: python-olm (required by matrix-nio[e2e]) is upstream-broken
# on modern macOS (archived libolm, C++ errors with Clang 21+). Including it
# here causes the entire [all] install to fail, dropping all other extras.
# Users who need Matrix can install manually: pip install 'hermes-agent[matrix]'
"hermes-agent[cron]",
"hermes-agent[cli]",
"hermes-agent[dev]",

View File

@@ -6281,17 +6281,23 @@ class AIAgent:
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
messages.pop()
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple:
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
"""Compress conversation context and split the session in SQLite.
Args:
focus_topic: Optional focus string for guided compression — the
summariser will prioritise preserving information related to
this topic. Inspired by Claude Code's ``/compact <focus>``.
Returns:
(compressed_messages, new_system_prompt) tuple
"""
_pre_msg_count = len(messages)
logger.info(
"context compression started: session=%s messages=%d tokens=~%s model=%s",
"context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
self.session_id or "none", _pre_msg_count,
f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
focus_topic,
)
# Pre-compression memory flush: let the model save memories before they're lost
self.flush_memories(messages, min_turns=0)
@@ -6303,7 +6309,7 @@ class AIAgent:
except Exception:
pass
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
todo_snapshot = self._todo_store.format_for_injection()
if todo_snapshot:

View File

@@ -0,0 +1,345 @@
"""Tests for /context command — live context window breakdown.
Inspired by Claude Code's /context feature.
"""
import os
from unittest.mock import MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_cli(tmp_path):
"""Build a minimal HermesCLI stub with enough state for _show_context_breakdown."""
from cli import HermesCLI
cli_obj = object.__new__(HermesCLI)
# Minimal attrs expected by _show_context_breakdown
cli_obj.agent = None
cli_obj.conversation_history = []
return cli_obj
def _make_agent_stub(model="anthropic/claude-sonnet-4.6", system_prompt="You are Hermes.",
context_length=200000, compression_count=0, threshold_tokens=160000,
last_prompt_tokens=50000):
"""Return a mock agent with attributes used by _show_context_breakdown."""
agent = MagicMock()
agent.model = model
agent._cached_system_prompt = system_prompt
agent.session_input_tokens = 1000
agent.session_output_tokens = 500
compressor = MagicMock()
compressor.context_length = context_length
compressor.compression_count = compression_count
compressor.threshold_tokens = threshold_tokens
compressor.last_prompt_tokens = last_prompt_tokens
agent.context_compressor = compressor
agent._memory_store = None
agent._cached_tool_schemas = None
return agent
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestContextBreakdown:
"""Tests for _show_context_breakdown method."""
def test_no_agent(self, tmp_path, capsys):
"""When no agent is active, prints a helpful message."""
cli_obj = _make_cli(tmp_path)
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "No active agent" in out
def test_basic_breakdown(self, tmp_path, capsys):
"""Basic breakdown shows model, context bar, and section headers."""
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub()
cli_obj.conversation_history = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
]
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
# Model name should appear
assert "claude-sonnet-4.6" in out
# Section headers
assert "System Prompt" in out
assert "Conversation" in out
# Token counts appear
assert "tokens" in out
def test_shows_context_percentage(self, tmp_path, capsys):
"""The context usage percentage is displayed."""
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub()
cli_obj.conversation_history = []
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "%" in out
def test_shows_tool_schemas_when_present(self, tmp_path, capsys):
"""When tool schemas are cached, their token count is shown."""
cli_obj = _make_cli(tmp_path)
agent = _make_agent_stub()
agent._cached_tool_schemas = [
{"name": "tool1", "description": "Does something", "parameters": {}},
{"name": "tool2", "description": "Does another thing", "parameters": {}},
]
cli_obj.agent = agent
cli_obj.conversation_history = []
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "Tool Schemas" in out
assert "2 tools" in out
def test_shows_message_role_breakdown(self, tmp_path, capsys):
"""Individual message role counts are shown."""
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub()
cli_obj.conversation_history = [
{"role": "user", "content": "Do something"},
{"role": "assistant", "content": "OK", "tool_calls": [
{"id": "call_1", "function": {"name": "terminal", "arguments": '{"command":"ls"}'}}
]},
{"role": "tool", "content": '{"output": "file1.py\\nfile2.py"}', "tool_call_id": "call_1"},
{"role": "assistant", "content": "Found 2 files."},
{"role": "user", "content": "Good"},
]
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "User messages (2)" in out
assert "Assistant messages (2)" in out
assert "Tool results (1)" in out
def test_shows_compression_info(self, tmp_path, capsys):
"""When compressions have occurred, that info is shown."""
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub(compression_count=2)
cli_obj.conversation_history = []
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "Compressions this session: 2" in out
def test_shows_auto_compress_threshold(self, tmp_path, capsys):
"""Auto-compress threshold and remaining tokens are shown."""
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub(threshold_tokens=160000)
cli_obj.conversation_history = []
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "Auto-compress at" in out
assert "remaining" in out
def test_detects_compaction_summaries(self, tmp_path, capsys):
"""Messages containing compaction summary markers are identified."""
from agent.context_compressor import SUMMARY_PREFIX
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub()
cli_obj.conversation_history = [
{"role": "assistant", "content": f"{SUMMARY_PREFIX}\n## Goal\nBuild a feature."},
{"role": "user", "content": "Continue from the summary."},
]
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "Compaction summaries" in out
def test_bar_rendering(self, tmp_path, capsys):
"""The progress bar renders block characters."""
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub()
cli_obj.conversation_history = [
{"role": "user", "content": "x" * 1000},
]
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
# Should contain block characters from the bar
assert "" in out or "" in out
def test_identifies_skills_section(self, tmp_path, capsys):
"""When system prompt contains skills marker, it's broken out."""
system_prompt = (
"You are Hermes.\n\n"
"## Skills (mandatory)\n"
"Before replying, scan the skills below.\n"
"<available_skills>\n skill1: does something\n</available_skills>\n\n"
"Conversation started: Friday, April 10, 2026"
)
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub(system_prompt=system_prompt)
cli_obj.conversation_history = []
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "Skills index" in out
def test_identifies_context_files_section(self, tmp_path, capsys):
"""When system prompt contains context files marker, it's broken out."""
system_prompt = (
"You are Hermes.\n\n"
"# Project Context\n\n"
"## AGENTS.md\nDevelopment guide content here...\n\n"
"Conversation started: Friday, April 10, 2026"
)
cli_obj = _make_cli(tmp_path)
cli_obj.agent = _make_agent_stub(system_prompt=system_prompt)
cli_obj.conversation_history = []
cli_obj._show_context_breakdown()
out = capsys.readouterr().out
assert "Context files" in out
class TestCompressFocusTopic:
"""Tests for /compress <focus> — guided compression."""
def test_focus_topic_extracted(self, tmp_path, capsys):
"""Focus topic is extracted from the command string."""
cli_obj = _make_cli(tmp_path)
agent = _make_agent_stub()
agent.compression_enabled = True
agent._cached_system_prompt = "You are Hermes."
# Make compress return the messages unchanged for testing
agent._compress_context = MagicMock(return_value=(
[{"role": "user", "content": "test"}],
"system prompt",
))
cli_obj.agent = agent
cli_obj.conversation_history = [
{"role": "user", "content": "a"},
{"role": "assistant", "content": "b"},
{"role": "user", "content": "c"},
{"role": "assistant", "content": "d"},
]
cli_obj._manual_compress("/compress database schema")
out = capsys.readouterr().out
assert 'focus: "database schema"' in out
# Verify the focus_topic was passed through
agent._compress_context.assert_called_once()
call_kwargs = agent._compress_context.call_args
assert call_kwargs.kwargs.get("focus_topic") == "database schema"
def test_no_focus_topic_when_bare_command(self, tmp_path, capsys):
"""When no focus topic is provided, None is passed."""
cli_obj = _make_cli(tmp_path)
agent = _make_agent_stub()
agent.compression_enabled = True
agent._cached_system_prompt = "You are Hermes."
agent._compress_context = MagicMock(return_value=(
[{"role": "user", "content": "test"}],
"system prompt",
))
cli_obj.agent = agent
cli_obj.conversation_history = [
{"role": "user", "content": "a"},
{"role": "assistant", "content": "b"},
{"role": "user", "content": "c"},
{"role": "assistant", "content": "d"},
]
cli_obj._manual_compress("/compress")
agent._compress_context.assert_called_once()
call_kwargs = agent._compress_context.call_args
assert call_kwargs.kwargs.get("focus_topic") is None
def test_focus_topic_in_generate_summary_prompt(self):
"""Focus topic is injected into the LLM prompt for summarization."""
from agent.context_compressor import ContextCompressor
compressor = ContextCompressor.__new__(ContextCompressor)
compressor.protect_first_n = 2
compressor.protect_last_n = 5
compressor.tail_token_budget = 20000
compressor.context_length = 200000
compressor.threshold_percent = 0.80
compressor.threshold_tokens = 160000
compressor.max_summary_tokens = 10000
compressor.quiet_mode = True
compressor.compression_count = 0
compressor.last_prompt_tokens = 0
compressor._previous_summary = None
compressor._summary_failure_cooldown_until = 0.0
compressor.summary_model = None
turns = [
{"role": "user", "content": "Tell me about the database schema"},
{"role": "assistant", "content": "The schema has tables: users, orders, products."},
]
# Mock call_llm to capture the prompt
captured_prompt = {}
def mock_call_llm(**kwargs):
captured_prompt["messages"] = kwargs["messages"]
resp = MagicMock()
resp.choices = [MagicMock()]
resp.choices[0].message.content = "## Goal\nUnderstand DB schema."
return resp
with patch("agent.context_compressor.call_llm", mock_call_llm):
result = compressor._generate_summary(turns, focus_topic="database schema")
assert result is not None
prompt_text = captured_prompt["messages"][0]["content"]
assert 'FOCUS TOPIC: "database schema"' in prompt_text
assert "PRIORITISE" in prompt_text
def test_no_focus_topic_no_injection(self):
"""Without focus_topic, the prompt doesn't contain focus guidance."""
from agent.context_compressor import ContextCompressor
compressor = ContextCompressor.__new__(ContextCompressor)
compressor.protect_first_n = 2
compressor.protect_last_n = 5
compressor.tail_token_budget = 20000
compressor.context_length = 200000
compressor.threshold_percent = 0.80
compressor.threshold_tokens = 160000
compressor.max_summary_tokens = 10000
compressor.quiet_mode = True
compressor.compression_count = 0
compressor.last_prompt_tokens = 0
compressor._previous_summary = None
compressor._summary_failure_cooldown_until = 0.0
compressor.summary_model = None
turns = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
]
captured_prompt = {}
def mock_call_llm(**kwargs):
captured_prompt["messages"] = kwargs["messages"]
resp = MagicMock()
resp.choices = [MagicMock()]
resp.choices[0].message.content = "## Goal\nGreeting."
return resp
with patch("agent.context_compressor.call_llm", mock_call_llm):
result = compressor._generate_summary(turns)
prompt_text = captured_prompt["messages"][0]["content"]
assert "FOCUS TOPIC" not in prompt_text

View File

@@ -1,275 +0,0 @@
"""Tests for container-aware CLI routing (NixOS container mode).
When container.enable = true in the NixOS module, the activation script
writes a .container-mode metadata file. The host CLI detects this and
execs into the container instead of running locally.
"""
import os
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from hermes_cli.config import (
_is_inside_container,
get_container_exec_info,
)
# =============================================================================
# _is_inside_container
# =============================================================================
def test_is_inside_container_dockerenv(tmp_path):
"""Detects /.dockerenv marker file."""
with patch("os.path.exists") as mock_exists:
mock_exists.side_effect = lambda p: p == "/.dockerenv"
assert _is_inside_container() is True
def test_is_inside_container_containerenv(tmp_path):
"""Detects Podman's /run/.containerenv marker."""
with patch("os.path.exists") as mock_exists:
mock_exists.side_effect = lambda p: p == "/run/.containerenv"
assert _is_inside_container() is True
def test_is_inside_container_cgroup_docker():
"""Detects 'docker' in /proc/1/cgroup."""
with patch("os.path.exists", return_value=False), \
patch("builtins.open", create=True) as mock_open:
mock_open.return_value.__enter__ = lambda s: s
mock_open.return_value.__exit__ = MagicMock(return_value=False)
mock_open.return_value.read = MagicMock(
return_value="12:memory:/docker/abc123\n"
)
assert _is_inside_container() is True
def test_is_inside_container_false_on_host():
"""Returns False when none of the container indicators are present."""
with patch("os.path.exists", return_value=False), \
patch("builtins.open", side_effect=OSError("no such file")):
assert _is_inside_container() is False
# =============================================================================
# get_container_exec_info
# =============================================================================
@pytest.fixture
def container_env(tmp_path, monkeypatch):
"""Set up a fake HERMES_HOME with .container-mode file."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
container_mode = hermes_home / ".container-mode"
container_mode.write_text(
"# Written by NixOS activation script. Do not edit manually.\n"
"backend=podman\n"
"container_name=hermes-agent\n"
"hermes_bin=/data/current-package/bin/hermes\n"
)
return hermes_home
def test_get_container_exec_info_returns_metadata(container_env):
"""Reads .container-mode and returns backend/name/bin."""
with patch("hermes_cli.config._is_inside_container", return_value=False):
info = get_container_exec_info()
assert info is not None
assert info["backend"] == "podman"
assert info["container_name"] == "hermes-agent"
assert info["hermes_bin"] == "/data/current-package/bin/hermes"
def test_get_container_exec_info_none_inside_container(container_env):
"""Returns None when we're already inside a container."""
with patch("hermes_cli.config._is_inside_container", return_value=True):
info = get_container_exec_info()
assert info is None
def test_get_container_exec_info_none_without_file(tmp_path, monkeypatch):
"""Returns None when .container-mode doesn't exist (native mode)."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with patch("hermes_cli.config._is_inside_container", return_value=False):
info = get_container_exec_info()
assert info is None
def test_get_container_exec_info_defaults():
"""Falls back to defaults for missing keys."""
import tempfile
with tempfile.TemporaryDirectory() as tmpdir:
hermes_home = Path(tmpdir) / ".hermes"
hermes_home.mkdir()
(hermes_home / ".container-mode").write_text(
"# minimal file with no keys\n"
)
with patch("hermes_cli.config._is_inside_container", return_value=False), \
patch("hermes_cli.config.get_hermes_home", return_value=hermes_home):
info = get_container_exec_info()
assert info is not None
assert info["backend"] == "docker"
assert info["container_name"] == "hermes-agent"
assert info["hermes_bin"] == "/data/current-package/bin/hermes"
def test_get_container_exec_info_docker_backend(container_env):
"""Correctly reads docker backend."""
(container_env / ".container-mode").write_text(
"backend=docker\n"
"container_name=hermes-custom\n"
"hermes_bin=/opt/hermes/bin/hermes\n"
)
with patch("hermes_cli.config._is_inside_container", return_value=False):
info = get_container_exec_info()
assert info["backend"] == "docker"
assert info["container_name"] == "hermes-custom"
assert info["hermes_bin"] == "/opt/hermes/bin/hermes"
# =============================================================================
# _exec_in_container
# =============================================================================
def test_exec_in_container_calls_execvp():
"""Verifies os.execvp is called with the correct command."""
from hermes_cli.main import _exec_in_container
container_info = {
"backend": "podman",
"container_name": "hermes-agent",
"hermes_bin": "/data/current-package/bin/hermes",
}
with patch("shutil.which", return_value="/usr/bin/podman"), \
patch("subprocess.run") as mock_run, \
patch("os.execvp") as mock_exec:
# Simulate running container
mock_result = MagicMock()
mock_result.returncode = 0
mock_result.stdout = "true\n"
mock_run.return_value = mock_result
_exec_in_container(container_info, ["chat", "-m", "claude-sonnet-4"])
mock_exec.assert_called_once_with(
"/usr/bin/podman",
["/usr/bin/podman", "exec", "-it", "hermes-agent",
"/data/current-package/bin/hermes", "chat", "-m", "claude-sonnet-4"]
)
def test_exec_in_container_strips_host_flag():
"""The --host flag is not forwarded into the container."""
from hermes_cli.main import _exec_in_container
container_info = {
"backend": "podman",
"container_name": "hermes-agent",
"hermes_bin": "/data/current-package/bin/hermes",
}
with patch("shutil.which", return_value="/usr/bin/podman"), \
patch("subprocess.run") as mock_run, \
patch("os.execvp") as mock_exec:
mock_result = MagicMock()
mock_result.returncode = 0
mock_result.stdout = "true\n"
mock_run.return_value = mock_result
_exec_in_container(container_info, ["chat", "--host", "-q", "hello"])
# --host should be stripped
exec_args = mock_exec.call_args[0][1]
assert "--host" not in exec_args
assert "-q" in exec_args
assert "hello" in exec_args
def test_exec_in_container_fallback_no_runtime(capsys):
"""Falls back gracefully when container runtime is not found."""
from hermes_cli.main import _exec_in_container
container_info = {
"backend": "podman",
"container_name": "hermes-agent",
"hermes_bin": "/data/current-package/bin/hermes",
}
with patch("shutil.which", return_value=None), \
patch("os.execvp") as mock_exec:
_exec_in_container(container_info, ["chat"])
# Should NOT call execvp — graceful fallback
mock_exec.assert_not_called()
captured = capsys.readouterr()
assert "not found on PATH" in captured.err
def test_exec_in_container_fallback_container_not_running(capsys):
"""Falls back when container exists but is not running."""
from hermes_cli.main import _exec_in_container
container_info = {
"backend": "docker",
"container_name": "hermes-agent",
"hermes_bin": "/data/current-package/bin/hermes",
}
with patch("shutil.which", return_value="/usr/bin/docker"), \
patch("subprocess.run") as mock_run, \
patch("os.execvp") as mock_exec:
mock_result = MagicMock()
mock_result.returncode = 0
mock_result.stdout = "false\n"
mock_run.return_value = mock_result
_exec_in_container(container_info, ["chat"])
mock_exec.assert_not_called()
captured = capsys.readouterr()
assert "not running" in captured.err
def test_exec_in_container_fallback_inspect_fails():
"""Falls back when docker inspect fails entirely."""
from hermes_cli.main import _exec_in_container
container_info = {
"backend": "docker",
"container_name": "hermes-agent",
"hermes_bin": "/data/current-package/bin/hermes",
}
with patch("shutil.which", return_value="/usr/bin/docker"), \
patch("subprocess.run") as mock_run, \
patch("os.execvp") as mock_exec:
mock_result = MagicMock()
mock_result.returncode = 1
mock_result.stdout = ""
mock_run.return_value = mock_result
_exec_in_container(container_info, ["chat"])
mock_exec.assert_not_called()

View File

@@ -11,19 +11,12 @@ def _load_optional_dependencies():
return project["optional-dependencies"]
def test_matrix_extra_linux_only_in_all():
def test_matrix_extra_exists_but_excluded_from_all():
"""matrix-nio[e2e] depends on python-olm which is upstream-broken on modern
macOS (archived libolm, C++ errors with Clang 21+). The [matrix] extra is
included in [all] but gated to Linux via a platform marker so that
``hermes update`` doesn't fail on macOS."""
kept for opt-in install but deliberately excluded from [all] so one broken
upstream dep doesn't nuke every other extra during ``hermes update``."""
optional_dependencies = _load_optional_dependencies()
assert "matrix" in optional_dependencies
# Must NOT be unconditional — python-olm has no macOS wheels.
assert "hermes-agent[matrix]" not in optional_dependencies["all"]
# Must be present with a Linux platform marker.
linux_gated = [
dep for dep in optional_dependencies["all"]
if "matrix" in dep and "linux" in dep
]
assert linux_gated, "expected hermes-agent[matrix] with sys_platform=='linux' marker in [all]"

19
uv.lock generated
View File

@@ -1661,7 +1661,7 @@ dependencies = [
{ name = "fal-client" },
{ name = "fire" },
{ name = "firecrawl-py" },
{ name = "httpx", extra = ["socks"] },
{ name = "httpx" },
{ name = "jinja2" },
{ name = "openai" },
{ name = "parallel-web" },
@@ -1691,8 +1691,6 @@ all = [
{ name = "faster-whisper" },
{ name = "honcho-ai" },
{ name = "lark-oapi" },
{ name = "markdown", marker = "sys_platform == 'linux'" },
{ name = "matrix-nio", extra = ["e2e"], marker = "sys_platform == 'linux'" },
{ name = "mcp" },
{ name = "mistralai" },
{ name = "modal" },
@@ -1829,7 +1827,6 @@ requires-dist = [
{ name = "hermes-agent", extras = ["homeassistant"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["honcho"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["honcho"], marker = "extra == 'termux'" },
{ name = "hermes-agent", extras = ["matrix"], marker = "sys_platform == 'linux' and extra == 'all'" },
{ name = "hermes-agent", extras = ["mcp"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["mcp"], marker = "extra == 'termux'" },
{ name = "hermes-agent", extras = ["messaging"], marker = "extra == 'all'" },
@@ -1842,7 +1839,7 @@ requires-dist = [
{ name = "hermes-agent", extras = ["tts-premium"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["voice"], marker = "extra == 'all'" },
{ name = "honcho-ai", marker = "extra == 'honcho'", specifier = ">=2.0.1,<3" },
{ name = "httpx", extras = ["socks"], specifier = ">=0.28.1,<1" },
{ name = "httpx", specifier = ">=0.28.1,<1" },
{ name = "jinja2", specifier = ">=3.1.5,<4" },
{ name = "lark-oapi", marker = "extra == 'feishu'", specifier = ">=1.5.3,<2" },
{ name = "markdown", marker = "extra == 'matrix'", specifier = ">=3.6,<4" },
@@ -2036,9 +2033,6 @@ wheels = [
http2 = [
{ name = "h2" },
]
socks = [
{ name = "socksio" },
]
[[package]]
name = "httpx-sse"
@@ -4506,15 +4500,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
]
[[package]]
name = "socksio"
version = "1.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f8/5c/48a7d9495be3d1c651198fd99dbb6ce190e2274d0f28b9051307bdec6b85/socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac", size = 19055, upload-time = "2020-04-17T15:50:34.664Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/37/c3/6eeb6034408dac0fa653d126c9204ade96b819c936e136c5e8a6897eee9c/socksio-1.0.0-py3-none-any.whl", hash = "sha256:95dc1f15f9b34e8d7b16f06d74b8ccf48f609af32ab33c608d08761c5dcbb1f3", size = 12763, upload-time = "2020-04-17T15:50:31.878Z" },
]
[[package]]
name = "sounddevice"
version = "0.5.5"

View File

@@ -122,17 +122,6 @@ services.hermes-agent.environmentFiles = [ "/var/lib/hermes/env" ];
Setting `addToSystemPackages = true` does two things: puts the `hermes` CLI on your system PATH **and** sets `HERMES_HOME` system-wide so the interactive CLI shares state (sessions, skills, cron) with the gateway service. Without it, running `hermes` in your shell creates a separate `~/.hermes/` directory.
:::
:::info Container-aware CLI
When `container.enable = true` and `addToSystemPackages = true`, running `hermes chat` on the host **automatically routes into the managed container**. This means your interactive CLI session runs inside the same environment as the gateway service — with access to all container-installed packages and tools.
- The routing is transparent: `hermes chat` detects container mode and does `podman exec` / `docker exec` under the hood
- All CLI flags are forwarded: `-m`, `--resume`, `--query`, etc. work as normal
- Use `hermes chat --host` to bypass container routing and run directly on the host
- If the container isn't running, the CLI falls back to host execution automatically
Other `hermes` subcommands (`version`, `config`, `sessions`, `setup`) always run on the host since they only need access to shared state files.
:::
### Verify It Works
After `nixos-rebuild switch`, check that the service is running: