Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 205891c9c8 | |||
| f24db23458 | |||
| d132e344d7 | |||
| 22f41daded | |||
| 7c7feaa033 | |||
| 2f80bd9f87 | |||
| 23e5e8dde9 | |||
| e99aca98ab | |||
| 7e30e97a59 | |||
| db4dfea7ec | |||
| 17254a7692 | |||
| adf188c439 | |||
| 21958a55d1 | |||
| 947827bba0 | |||
| e4a3ffa9c1 | |||
| 1fa3737134 |
@@ -253,18 +253,24 @@ Write only the summary body. Do not include any preamble or prefix; the system w
|
||||
"""Pull a compress-end boundary backward to avoid splitting a
|
||||
tool_call / result group.
|
||||
|
||||
If the message just before ``idx`` is an assistant message with
|
||||
tool_calls, those tool results will start at ``idx`` and would be
|
||||
separated from their parent. Move backwards to include the whole
|
||||
group in the summarised region.
|
||||
If the boundary falls in the middle of a tool-result group (i.e.
|
||||
there are consecutive tool messages before ``idx``), walk backward
|
||||
past all of them to find the parent assistant message. If found,
|
||||
move the boundary before the assistant so the entire
|
||||
assistant + tool_results group is included in the summarised region
|
||||
rather than being split (which causes silent data loss when
|
||||
``_sanitize_tool_pairs`` removes the orphaned tail results).
|
||||
"""
|
||||
if idx <= 0 or idx >= len(messages):
|
||||
return idx
|
||||
prev = messages[idx - 1]
|
||||
if prev.get("role") == "assistant" and prev.get("tool_calls"):
|
||||
# The results for this assistant turn sit at idx..idx+k.
|
||||
# Include the assistant message in the summarised region too.
|
||||
idx -= 1
|
||||
# Walk backward past consecutive tool results
|
||||
check = idx - 1
|
||||
while check >= 0 and messages[check].get("role") == "tool":
|
||||
check -= 1
|
||||
# If we landed on the parent assistant with tool_calls, pull the
|
||||
# boundary before it so the whole group gets summarised together.
|
||||
if check >= 0 and messages[check].get("role") == "assistant" and messages[check].get("tool_calls"):
|
||||
idx = check
|
||||
return idx
|
||||
|
||||
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
|
||||
|
||||
+37
-18
@@ -429,11 +429,42 @@ def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE
|
||||
return head + marker + tail
|
||||
|
||||
|
||||
def build_context_files_prompt(cwd: Optional[str] = None) -> str:
|
||||
def load_soul_md() -> Optional[str]:
|
||||
"""Load SOUL.md from HERMES_HOME and return its content, or None.
|
||||
|
||||
Used as the agent identity (slot #1 in the system prompt). When this
|
||||
returns content, ``build_context_files_prompt`` should be called with
|
||||
``skip_soul=True`` so SOUL.md isn't injected twice.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import ensure_hermes_home
|
||||
ensure_hermes_home()
|
||||
except Exception as e:
|
||||
logger.debug("Could not ensure HERMES_HOME before loading SOUL.md: %s", e)
|
||||
|
||||
soul_path = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "SOUL.md"
|
||||
if not soul_path.exists():
|
||||
return None
|
||||
try:
|
||||
content = soul_path.read_text(encoding="utf-8").strip()
|
||||
if not content:
|
||||
return None
|
||||
content = _scan_context_content(content, "SOUL.md")
|
||||
content = _truncate_content(content, "SOUL.md")
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
|
||||
return None
|
||||
|
||||
|
||||
def build_context_files_prompt(cwd: Optional[str] = None, skip_soul: bool = False) -> str:
|
||||
"""Discover and load context files for the system prompt.
|
||||
|
||||
Discovery: AGENTS.md (recursive), .cursorrules / .cursor/rules/*.mdc,
|
||||
and SOUL.md from HERMES_HOME only. Each capped at 20,000 chars.
|
||||
|
||||
When *skip_soul* is True, SOUL.md is not included here (it was already
|
||||
loaded via ``load_soul_md()`` for the identity slot).
|
||||
"""
|
||||
if cwd is None:
|
||||
cwd = os.getcwd()
|
||||
@@ -523,23 +554,11 @@ def build_context_files_prompt(cwd: Optional[str] = None) -> str:
|
||||
hermes_md_content = _truncate_content(hermes_md_content, ".hermes.md")
|
||||
sections.append(hermes_md_content)
|
||||
|
||||
# SOUL.md from HERMES_HOME only
|
||||
try:
|
||||
from hermes_cli.config import ensure_hermes_home
|
||||
ensure_hermes_home()
|
||||
except Exception as e:
|
||||
logger.debug("Could not ensure HERMES_HOME before loading SOUL.md: %s", e)
|
||||
|
||||
soul_path = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "SOUL.md"
|
||||
if soul_path.exists():
|
||||
try:
|
||||
content = soul_path.read_text(encoding="utf-8").strip()
|
||||
if content:
|
||||
content = _scan_context_content(content, "SOUL.md")
|
||||
content = _truncate_content(content, "SOUL.md")
|
||||
sections.append(content)
|
||||
except Exception as e:
|
||||
logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
|
||||
# SOUL.md from HERMES_HOME only — skip when already loaded as identity
|
||||
if not skip_soul:
|
||||
soul_content = load_soul_md()
|
||||
if soul_content:
|
||||
sections.append(soul_content)
|
||||
|
||||
if not sections:
|
||||
return ""
|
||||
|
||||
+12
-13
@@ -451,7 +451,7 @@ def load_gateway_config() -> GatewayConfig:
|
||||
"pair",
|
||||
)
|
||||
|
||||
# Bridge per-platform unauthorized_dm_behavior from config.yaml
|
||||
# Bridge per-platform settings from config.yaml into gw_data
|
||||
platforms_data = gw_data.setdefault("platforms", {})
|
||||
if not isinstance(platforms_data, dict):
|
||||
platforms_data = {}
|
||||
@@ -462,7 +462,16 @@ def load_gateway_config() -> GatewayConfig:
|
||||
platform_cfg = yaml_cfg.get(plat.value)
|
||||
if not isinstance(platform_cfg, dict):
|
||||
continue
|
||||
if "unauthorized_dm_behavior" not in platform_cfg:
|
||||
# Collect bridgeable keys from this platform section
|
||||
bridged = {}
|
||||
if "unauthorized_dm_behavior" in platform_cfg:
|
||||
bridged["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
|
||||
platform_cfg.get("unauthorized_dm_behavior"),
|
||||
gw_data.get("unauthorized_dm_behavior", "pair"),
|
||||
)
|
||||
if "reply_prefix" in platform_cfg:
|
||||
bridged["reply_prefix"] = platform_cfg["reply_prefix"]
|
||||
if not bridged:
|
||||
continue
|
||||
plat_data = platforms_data.setdefault(plat.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
@@ -472,10 +481,7 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if not isinstance(extra, dict):
|
||||
extra = {}
|
||||
plat_data["extra"] = extra
|
||||
extra["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
|
||||
platform_cfg.get("unauthorized_dm_behavior"),
|
||||
gw_data.get("unauthorized_dm_behavior", "pair"),
|
||||
)
|
||||
extra.update(bridged)
|
||||
|
||||
# Discord settings → env vars (env vars take precedence)
|
||||
discord_cfg = yaml_cfg.get("discord", {})
|
||||
@@ -489,13 +495,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
|
||||
if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
|
||||
os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
|
||||
|
||||
# Bridge whatsapp settings from config.yaml into platform config
|
||||
whatsapp_cfg = yaml_cfg.get("whatsapp", {})
|
||||
if isinstance(whatsapp_cfg, dict) and "reply_prefix" in whatsapp_cfg:
|
||||
if Platform.WHATSAPP not in config.platforms:
|
||||
config.platforms[Platform.WHATSAPP] = PlatformConfig()
|
||||
config.platforms[Platform.WHATSAPP].extra["reply_prefix"] = whatsapp_cfg["reply_prefix"]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -1099,6 +1099,22 @@ class BasePlatformAdapter(ABC):
|
||||
print(f"[{self.name}] Error handling message: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send the error to the user so they aren't left with radio silence
|
||||
try:
|
||||
error_type = type(e).__name__
|
||||
error_detail = str(e)[:300] if str(e) else "no details available"
|
||||
_thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
await self.send(
|
||||
chat_id=event.source.chat_id,
|
||||
content=(
|
||||
f"Sorry, I encountered an error ({error_type}).\n"
|
||||
f"{error_detail}\n"
|
||||
"Try again or use /reset to start a fresh session."
|
||||
),
|
||||
metadata=_thread_metadata,
|
||||
)
|
||||
except Exception:
|
||||
pass # Last resort — don't let error reporting crash the handler
|
||||
finally:
|
||||
# Stop typing indicator
|
||||
typing_task.cancel()
|
||||
|
||||
@@ -171,6 +171,12 @@ def _resolve_openrouter_runtime(
|
||||
model_cfg = _get_model_config()
|
||||
cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""
|
||||
cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""
|
||||
cfg_api_key = ""
|
||||
for k in ("api_key", "api"):
|
||||
v = model_cfg.get(k)
|
||||
if isinstance(v, str) and v.strip():
|
||||
cfg_api_key = v.strip()
|
||||
break
|
||||
requested_norm = (requested_provider or "").strip().lower()
|
||||
cfg_provider = cfg_provider.strip().lower()
|
||||
|
||||
@@ -178,26 +184,24 @@ def _resolve_openrouter_runtime(
|
||||
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
|
||||
|
||||
use_config_base_url = False
|
||||
if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
|
||||
if cfg_base_url.strip() and not explicit_base_url:
|
||||
if requested_norm == "auto":
|
||||
if not cfg_provider or cfg_provider == "auto":
|
||||
use_config_base_url = True
|
||||
elif requested_norm == "custom":
|
||||
# Persisted custom endpoints store their base URL in config.yaml.
|
||||
# If OPENAI_BASE_URL is not currently set in the environment, keep
|
||||
# honoring that saved endpoint instead of falling back to OpenRouter.
|
||||
if cfg_provider == "custom":
|
||||
if (not cfg_provider or cfg_provider == "auto") and not env_openai_base_url:
|
||||
use_config_base_url = True
|
||||
elif requested_norm == "custom" and cfg_provider == "custom":
|
||||
# provider: custom — use base_url from config (Fixes #1760).
|
||||
use_config_base_url = True
|
||||
|
||||
# When the user explicitly requested the openrouter provider, skip
|
||||
# OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter
|
||||
# endpoint and would prevent switching back to OpenRouter (#874).
|
||||
skip_openai_base = requested_norm == "openrouter"
|
||||
|
||||
# For custom, prefer config base_url over env so config.yaml is honored (#1760).
|
||||
base_url = (
|
||||
(explicit_base_url or "").strip()
|
||||
or ("" if skip_openai_base else env_openai_base_url)
|
||||
or (cfg_base_url.strip() if use_config_base_url else "")
|
||||
or ("" if skip_openai_base else env_openai_base_url)
|
||||
or env_openrouter_base_url
|
||||
or OPENROUTER_BASE_URL
|
||||
).rstrip("/")
|
||||
@@ -216,8 +220,10 @@ def _resolve_openrouter_runtime(
|
||||
or ""
|
||||
)
|
||||
else:
|
||||
# Custom endpoint: use api_key from config when using config base_url (#1760).
|
||||
api_key = (
|
||||
explicit_api_key
|
||||
or (cfg_api_key if use_config_base_url else "")
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
or os.getenv("OPENROUTER_API_KEY")
|
||||
or ""
|
||||
@@ -334,13 +340,23 @@ def resolve_runtime_provider(
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
creds = resolve_api_key_provider_credentials(provider)
|
||||
model_cfg = _get_model_config()
|
||||
base_url = creds.get("base_url", "").rstrip("/")
|
||||
api_mode = "chat_completions"
|
||||
if provider == "copilot":
|
||||
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
|
||||
else:
|
||||
# Check explicit api_mode from model config first
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode:
|
||||
api_mode = configured_mode
|
||||
# Auto-detect Anthropic-compatible endpoints by URL convention
|
||||
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
|
||||
elif base_url.rstrip("/").endswith("/anthropic"):
|
||||
api_mode = "anthropic_messages"
|
||||
return {
|
||||
"provider": provider,
|
||||
"api_mode": api_mode,
|
||||
"base_url": creds.get("base_url", "").rstrip("/"),
|
||||
"base_url": base_url,
|
||||
"api_key": creds.get("api_key", ""),
|
||||
"source": creds.get("source", "env"),
|
||||
"requested_provider": requested_provider,
|
||||
|
||||
+39
-20
@@ -85,7 +85,7 @@ from agent.model_metadata import (
|
||||
)
|
||||
from agent.context_compressor import ContextCompressor
|
||||
from agent.prompt_caching import apply_anthropic_cache_control
|
||||
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt
|
||||
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md
|
||||
from agent.usage_pricing import estimate_usage_cost, normalize_usage
|
||||
from agent.display import (
|
||||
KawaiiSpinner, build_tool_preview as _build_tool_preview,
|
||||
@@ -493,6 +493,11 @@ class AIAgent:
|
||||
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
|
||||
self.api_mode = "anthropic_messages"
|
||||
self.provider = "anthropic"
|
||||
elif self._base_url_lower.rstrip("/").endswith("/anthropic"):
|
||||
# Third-party Anthropic-compatible endpoints (e.g. MiniMax, DashScope)
|
||||
# use a URL convention ending in /anthropic. Auto-detect these so the
|
||||
# Anthropic Messages API adapter is used instead of chat completions.
|
||||
self.api_mode = "anthropic_messages"
|
||||
else:
|
||||
self.api_mode = "chat_completions"
|
||||
|
||||
@@ -1948,28 +1953,38 @@ class AIAgent:
|
||||
is stable across all turns in a session, maximizing prefix cache hits.
|
||||
"""
|
||||
# Layers (in order):
|
||||
# 1. Default agent identity (always present)
|
||||
# 1. Agent identity — SOUL.md when available, else DEFAULT_AGENT_IDENTITY
|
||||
# 2. User / gateway system prompt (if provided)
|
||||
# 3. Persistent memory (frozen snapshot)
|
||||
# 4. Skills guidance (if skills tools are loaded)
|
||||
# 5. Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# 5. Context files (AGENTS.md, .cursorrules — SOUL.md excluded here when used as identity)
|
||||
# 6. Current date & time (frozen at build time)
|
||||
# 7. Platform-specific formatting hint
|
||||
# If an AI peer name is configured in Honcho, personalise the identity line.
|
||||
_ai_peer_name = (
|
||||
self._honcho_config.ai_peer
|
||||
if self._honcho_config and self._honcho_config.ai_peer != "hermes"
|
||||
else None
|
||||
)
|
||||
if _ai_peer_name:
|
||||
_identity = DEFAULT_AGENT_IDENTITY.replace(
|
||||
"You are Hermes Agent",
|
||||
f"You are {_ai_peer_name}",
|
||||
1,
|
||||
|
||||
# Try SOUL.md as primary identity (unless context files are skipped)
|
||||
_soul_loaded = False
|
||||
if not self.skip_context_files:
|
||||
_soul_content = load_soul_md()
|
||||
if _soul_content:
|
||||
prompt_parts = [_soul_content]
|
||||
_soul_loaded = True
|
||||
|
||||
if not _soul_loaded:
|
||||
# Fallback to hardcoded identity
|
||||
_ai_peer_name = (
|
||||
self._honcho_config.ai_peer
|
||||
if self._honcho_config and self._honcho_config.ai_peer != "hermes"
|
||||
else None
|
||||
)
|
||||
else:
|
||||
_identity = DEFAULT_AGENT_IDENTITY
|
||||
prompt_parts = [_identity]
|
||||
if _ai_peer_name:
|
||||
_identity = DEFAULT_AGENT_IDENTITY.replace(
|
||||
"You are Hermes Agent",
|
||||
f"You are {_ai_peer_name}",
|
||||
1,
|
||||
)
|
||||
else:
|
||||
_identity = DEFAULT_AGENT_IDENTITY
|
||||
prompt_parts = [_identity]
|
||||
|
||||
# Tool-aware behavioral guidance: only inject when the tools are loaded
|
||||
tool_guidance = []
|
||||
@@ -2065,7 +2080,7 @@ class AIAgent:
|
||||
prompt_parts.append(skills_prompt)
|
||||
|
||||
if not self.skip_context_files:
|
||||
context_files_prompt = build_context_files_prompt()
|
||||
context_files_prompt = build_context_files_prompt(skip_soul=_soul_loaded)
|
||||
if context_files_prompt:
|
||||
prompt_parts.append(context_files_prompt)
|
||||
|
||||
@@ -2074,6 +2089,10 @@ class AIAgent:
|
||||
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
|
||||
if self.pass_session_id and self.session_id:
|
||||
timestamp_line += f"\nSession ID: {self.session_id}"
|
||||
if self.model:
|
||||
timestamp_line += f"\nModel: {self.model}"
|
||||
if self.provider:
|
||||
timestamp_line += f"\nProvider: {self.provider}"
|
||||
prompt_parts.append(timestamp_line)
|
||||
|
||||
platform_key = (self.platform or "").lower().strip()
|
||||
@@ -3460,11 +3479,11 @@ class AIAgent:
|
||||
|
||||
# Determine api_mode from provider
|
||||
fb_api_mode = "chat_completions"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
if fb_provider == "openai-codex":
|
||||
fb_api_mode = "codex_responses"
|
||||
elif fb_provider == "anthropic":
|
||||
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
|
||||
fb_api_mode = "anthropic_messages"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
|
||||
old_model = self.model
|
||||
self.model = fb_model
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: huggingface-hub
|
||||
description: Hugging Face Hub CLI (hf) — download/upload models and datasets, manage repos, run SQL on datasets, deploy inference endpoints, manage Spaces, and more. Use when working with HuggingFace models, datasets, or infrastructure.
|
||||
description: Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets.
|
||||
version: 1.0.0
|
||||
author: Hugging Face
|
||||
license: MIT
|
||||
|
||||
@@ -0,0 +1,199 @@
|
||||
"""Tests for context compression boundary alignment.
|
||||
|
||||
Verifies that _align_boundary_backward correctly handles tool result groups
|
||||
so that parallel tool calls are never split during compression.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from agent.context_compressor import ContextCompressor
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _tc(call_id: str) -> dict:
|
||||
"""Create a minimal tool_call dict."""
|
||||
return {"id": call_id, "type": "function", "function": {"name": "test", "arguments": "{}"}}
|
||||
|
||||
|
||||
def _tool_result(call_id: str, content: str = "result") -> dict:
|
||||
"""Create a tool result message."""
|
||||
return {"role": "tool", "tool_call_id": call_id, "content": content}
|
||||
|
||||
|
||||
def _assistant_with_tools(*call_ids: str) -> dict:
|
||||
"""Create an assistant message with tool_calls."""
|
||||
return {"role": "assistant", "tool_calls": [_tc(cid) for cid in call_ids], "content": None}
|
||||
|
||||
|
||||
def _make_compressor(**kwargs) -> ContextCompressor:
|
||||
defaults = dict(
|
||||
model="test-model",
|
||||
threshold_percent=0.75,
|
||||
protect_first_n=3,
|
||||
protect_last_n=4,
|
||||
quiet_mode=True,
|
||||
)
|
||||
defaults.update(kwargs)
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=8000):
|
||||
return ContextCompressor(**defaults)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _align_boundary_backward tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAlignBoundaryBackward:
|
||||
"""Test that compress-end boundary never splits a tool_call/result group."""
|
||||
|
||||
def test_boundary_at_clean_position(self):
|
||||
"""Boundary after a user message — no adjustment needed."""
|
||||
comp = _make_compressor()
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
{"role": "user", "content": "do something"},
|
||||
_assistant_with_tools("tc_1"),
|
||||
_tool_result("tc_1", "done"),
|
||||
{"role": "user", "content": "thanks"}, # idx=6
|
||||
{"role": "assistant", "content": "np"},
|
||||
]
|
||||
# Boundary at 7, messages[6] = user — no adjustment
|
||||
assert comp._align_boundary_backward(messages, 7) == 7
|
||||
|
||||
def test_boundary_after_assistant_with_tools(self):
|
||||
"""Original case: boundary right after assistant with tool_calls."""
|
||||
comp = _make_compressor()
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
_assistant_with_tools("tc_1", "tc_2"), # idx=3
|
||||
_tool_result("tc_1"), # idx=4
|
||||
_tool_result("tc_2"), # idx=5
|
||||
{"role": "user", "content": "next"},
|
||||
]
|
||||
# Boundary at 4, messages[3] = assistant with tool_calls → pull back to 3
|
||||
assert comp._align_boundary_backward(messages, 4) == 3
|
||||
|
||||
def test_boundary_in_middle_of_tool_results(self):
|
||||
"""THE BUG: boundary falls between tool results of the same group."""
|
||||
comp = _make_compressor()
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
{"role": "user", "content": "do 5 things"},
|
||||
_assistant_with_tools("tc_A", "tc_B", "tc_C", "tc_D", "tc_E"), # idx=4
|
||||
_tool_result("tc_A", "result A"), # idx=5
|
||||
_tool_result("tc_B", "result B"), # idx=6
|
||||
_tool_result("tc_C", "result C"), # idx=7
|
||||
_tool_result("tc_D", "result D"), # idx=8
|
||||
_tool_result("tc_E", "result E"), # idx=9
|
||||
{"role": "user", "content": "ok"},
|
||||
{"role": "assistant", "content": "done"},
|
||||
]
|
||||
# Boundary at 8 — in middle of tool results. messages[7] = tool result.
|
||||
# Must walk back to idx=4 (the parent assistant).
|
||||
assert comp._align_boundary_backward(messages, 8) == 4
|
||||
|
||||
def test_boundary_at_last_tool_result(self):
|
||||
"""Boundary right after last tool result — messages[idx-1] is tool."""
|
||||
comp = _make_compressor()
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
_assistant_with_tools("tc_1", "tc_2", "tc_3"), # idx=3
|
||||
_tool_result("tc_1"), # idx=4
|
||||
_tool_result("tc_2"), # idx=5
|
||||
_tool_result("tc_3"), # idx=6
|
||||
{"role": "user", "content": "next"},
|
||||
]
|
||||
# Boundary at 7 — messages[6] is last tool result.
|
||||
# Walk back: [6]=tool, [5]=tool, [4]=tool, [3]=assistant with tools → idx=3
|
||||
assert comp._align_boundary_backward(messages, 7) == 3
|
||||
|
||||
def test_boundary_with_consecutive_tool_groups(self):
|
||||
"""Two consecutive tool groups — only walk back to the nearest parent."""
|
||||
comp = _make_compressor()
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "hello"},
|
||||
_assistant_with_tools("tc_1"), # idx=2
|
||||
_tool_result("tc_1"), # idx=3
|
||||
{"role": "user", "content": "more"},
|
||||
_assistant_with_tools("tc_2", "tc_3"), # idx=5
|
||||
_tool_result("tc_2"), # idx=6
|
||||
_tool_result("tc_3"), # idx=7
|
||||
{"role": "user", "content": "done"},
|
||||
]
|
||||
# Boundary at 7 — messages[6] = tool result for tc_2 group
|
||||
# Walk back: [6]=tool, [5]=assistant with tools → idx=5
|
||||
assert comp._align_boundary_backward(messages, 7) == 5
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end: compression must not lose tool results
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCompressionToolResultPreservation:
|
||||
"""Verify that compress() never silently drops tool results."""
|
||||
|
||||
def test_parallel_tool_results_not_lost(self):
|
||||
"""The exact scenario that triggered silent data loss before the fix."""
|
||||
comp = _make_compressor(protect_first_n=3, protect_last_n=4)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."}, # 0
|
||||
{"role": "user", "content": "Hello"}, # 1
|
||||
{"role": "assistant", "content": "Hi there!"}, # 2 (end of head)
|
||||
{"role": "user", "content": "Read 7 files for me"}, # 3
|
||||
_assistant_with_tools("tc_A", "tc_B", "tc_C", "tc_D", "tc_E", "tc_F", "tc_G"), # 4
|
||||
_tool_result("tc_A", "content of file A"), # 5
|
||||
_tool_result("tc_B", "content of file B"), # 6
|
||||
_tool_result("tc_C", "content of file C"), # 7
|
||||
_tool_result("tc_D", "content of file D"), # 8
|
||||
_tool_result("tc_E", "content of file E"), # 9
|
||||
_tool_result("tc_F", "content of file F"), # 10
|
||||
_tool_result("tc_G", "CRITICAL DATA in file G"), # 11 ← compress_end=15-4=11
|
||||
{"role": "user", "content": "Now summarize them"}, # 12
|
||||
{"role": "assistant", "content": "Here is the summary..."}, # 13
|
||||
{"role": "user", "content": "Thanks"}, # 14
|
||||
]
|
||||
# 15 messages. compress_end = 15 - 4 = 11 (before fix: splits tool group)
|
||||
|
||||
fake_summary = "[Summary of earlier conversation]"
|
||||
with patch.object(comp, "_generate_summary", return_value=fake_summary):
|
||||
result = comp.compress(messages, current_tokens=7000)
|
||||
|
||||
# After compression, no tool results should be orphaned/lost.
|
||||
# All tool results in the result must have a matching assistant tool_call.
|
||||
assistant_call_ids = set()
|
||||
for msg in result:
|
||||
if msg.get("role") == "assistant":
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
cid = tc.get("id", "")
|
||||
if cid:
|
||||
assistant_call_ids.add(cid)
|
||||
|
||||
tool_result_ids = set()
|
||||
for msg in result:
|
||||
if msg.get("role") == "tool":
|
||||
cid = msg.get("tool_call_id")
|
||||
if cid:
|
||||
tool_result_ids.add(cid)
|
||||
|
||||
# Every tool result must have a parent — no orphans
|
||||
orphaned = tool_result_ids - assistant_call_ids
|
||||
assert not orphaned, f"Orphaned tool results found (data loss!): {orphaned}"
|
||||
|
||||
# Every assistant tool_call must have a real result (not a stub)
|
||||
for msg in result:
|
||||
if msg.get("role") == "tool":
|
||||
assert msg["content"] != "[Result from earlier conversation — see context summary above]", \
|
||||
f"Stub result found for {msg.get('tool_call_id')} — real result was lost"
|
||||
@@ -177,6 +177,50 @@ def test_custom_endpoint_uses_saved_config_base_url_when_env_missing(monkeypatch
|
||||
assert resolved["api_key"] == "local-key"
|
||||
|
||||
|
||||
def test_custom_endpoint_uses_config_api_key_over_env(monkeypatch):
|
||||
"""provider: custom with base_url and api_key in config uses them (#1760)."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "custom",
|
||||
"base_url": "https://my-api.example.com/v1",
|
||||
"api_key": "config-api-key",
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "https://other.example.com/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "env-key")
|
||||
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["base_url"] == "https://my-api.example.com/v1"
|
||||
assert resolved["api_key"] == "config-api-key"
|
||||
|
||||
|
||||
def test_custom_endpoint_uses_config_api_field_when_no_api_key(monkeypatch):
|
||||
"""provider: custom with 'api' in config uses it as api_key (#1760)."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "custom",
|
||||
"base_url": "https://custom.example.com/v1",
|
||||
"api": "config-api-field",
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["base_url"] == "https://custom.example.com/v1"
|
||||
assert resolved["api_key"] == "config-api-field"
|
||||
|
||||
|
||||
def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch):
|
||||
"""Auto provider with non-OpenRouter base_url should prefer OPENAI_API_KEY.
|
||||
|
||||
@@ -394,10 +438,75 @@ def test_named_custom_provider_without_api_mode_defaults(monkeypatch):
|
||||
lambda p: {
|
||||
"name": "my-server",
|
||||
"base_url": "http://localhost:8000/v1",
|
||||
"api_key": "sk-test",
|
||||
"api_key": "***",
|
||||
},
|
||||
)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="my-server")
|
||||
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
|
||||
|
||||
def test_anthropic_messages_in_valid_api_modes():
|
||||
"""anthropic_messages should be accepted by _parse_api_mode."""
|
||||
assert rp._parse_api_mode("anthropic_messages") == "anthropic_messages"
|
||||
|
||||
|
||||
def test_api_key_provider_anthropic_url_auto_detection(monkeypatch):
|
||||
"""API-key providers with /anthropic base URL should auto-detect anthropic_messages mode."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
|
||||
monkeypatch.setenv("MINIMAX_BASE_URL", "https://api.minimax.io/anthropic")
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="minimax")
|
||||
|
||||
assert resolved["provider"] == "minimax"
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
assert resolved["base_url"] == "https://api.minimax.io/anthropic"
|
||||
|
||||
|
||||
def test_api_key_provider_explicit_api_mode_config(monkeypatch):
|
||||
"""API-key providers should respect api_mode from model config."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {"api_mode": "anthropic_messages"})
|
||||
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
|
||||
monkeypatch.delenv("MINIMAX_BASE_URL", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="minimax")
|
||||
|
||||
assert resolved["provider"] == "minimax"
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
|
||||
|
||||
def test_api_key_provider_default_url_stays_chat_completions(monkeypatch):
|
||||
"""API-key providers with default /v1 URL should stay on chat_completions."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
|
||||
monkeypatch.delenv("MINIMAX_BASE_URL", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="minimax")
|
||||
|
||||
assert resolved["provider"] == "minimax"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["base_url"] == "https://api.minimax.io/v1"
|
||||
|
||||
|
||||
def test_named_custom_provider_anthropic_api_mode(monkeypatch):
|
||||
"""Custom providers should accept api_mode: anthropic_messages."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "my-anthropic-proxy")
|
||||
monkeypatch.setattr(
|
||||
rp, "_get_named_custom_provider",
|
||||
lambda p: {
|
||||
"name": "my-anthropic-proxy",
|
||||
"base_url": "https://proxy.example.com/anthropic",
|
||||
"api_key": "test-key",
|
||||
"api_mode": "anthropic_messages",
|
||||
},
|
||||
)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="my-anthropic-proxy")
|
||||
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
assert resolved["base_url"] == "https://proxy.example.com/anthropic"
|
||||
|
||||
@@ -28,17 +28,19 @@ Primary files:
|
||||
|
||||
The cached system prompt is assembled in roughly this order:
|
||||
|
||||
1. default agent identity
|
||||
1. agent identity — `SOUL.md` from `HERMES_HOME` when available, otherwise falls back to `DEFAULT_AGENT_IDENTITY` in `prompt_builder.py`
|
||||
2. tool-aware behavior guidance
|
||||
3. Honcho static block (when active)
|
||||
4. optional system message
|
||||
5. frozen MEMORY snapshot
|
||||
6. frozen USER profile snapshot
|
||||
7. skills index
|
||||
8. context files (`AGENTS.md`, `SOUL.md`, `.cursorrules`, `.cursor/rules/*.mdc`)
|
||||
8. context files (`AGENTS.md`, `.cursorrules`, `.cursor/rules/*.mdc`) — SOUL.md is **not** included here when it was already loaded as the identity in step 1
|
||||
9. timestamp / optional session ID
|
||||
10. platform hint
|
||||
|
||||
When `skip_context_files` is set (e.g., subagent delegation), SOUL.md is not loaded and the hardcoded `DEFAULT_AGENT_IDENTITY` is used instead.
|
||||
|
||||
## API-call-time-only layers
|
||||
|
||||
These are intentionally *not* persisted as part of the cached system prompt:
|
||||
@@ -59,10 +61,11 @@ Local memory and user profile data are injected as frozen snapshots at session s
|
||||
`agent/prompt_builder.py` scans and sanitizes:
|
||||
|
||||
- `AGENTS.md`
|
||||
- `SOUL.md`
|
||||
- `.cursorrules`
|
||||
- `.cursor/rules/*.mdc`
|
||||
|
||||
`SOUL.md` is loaded separately via `load_soul_md()` for the identity slot. When it loads successfully, `build_context_files_prompt(skip_soul=True)` prevents it from appearing twice.
|
||||
|
||||
Long files are truncated before injection.
|
||||
|
||||
## Skills index
|
||||
|
||||
@@ -6,9 +6,9 @@ description: "How to use SOUL.md to shape Hermes Agent's default voice, what bel
|
||||
|
||||
# Use SOUL.md with Hermes
|
||||
|
||||
`SOUL.md` is the easiest way to give Hermes a stable, default voice.
|
||||
`SOUL.md` is the **primary identity** for your Hermes instance. It's the first thing in the system prompt — it defines who the agent is, how it speaks, and what it avoids.
|
||||
|
||||
If you want Hermes to feel like the same assistant every time you talk to it — without repeating instructions in every session — this is the file to use.
|
||||
If you want Hermes to feel like the same assistant every time you talk to it — or if you want to replace the Hermes persona entirely with your own — this is the file to use.
|
||||
|
||||
## What SOUL.md is for
|
||||
|
||||
@@ -65,11 +65,11 @@ Important:
|
||||
|
||||
## How Hermes uses it
|
||||
|
||||
When Hermes starts a session, it reads `SOUL.md` from `HERMES_HOME`, scans it for prompt-injection patterns, truncates it if needed, and injects the content directly into the prompt.
|
||||
When Hermes starts a session, it reads `SOUL.md` from `HERMES_HOME`, scans it for prompt-injection patterns, truncates it if needed, and uses it as the **agent identity** — slot #1 in the system prompt. This means SOUL.md completely replaces the built-in default identity text.
|
||||
|
||||
No wrapper language is added around the file.
|
||||
If SOUL.md is missing, empty, or cannot be loaded, Hermes falls back to a built-in default identity.
|
||||
|
||||
So the content itself matters. Write the way you want Hermes to think and speak.
|
||||
No wrapper language is added around the file. The content itself matters — write the way you want your agent to think and speak.
|
||||
|
||||
## A good first edit
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ All settings are stored in the `~/.hermes/` directory for easy access.
|
||||
├── config.yaml # Settings (model, terminal, TTS, compression, etc.)
|
||||
├── .env # API keys and secrets
|
||||
├── auth.json # OAuth provider credentials (Nous Portal, etc.)
|
||||
├── SOUL.md # Optional: global persona (agent embodies this personality)
|
||||
├── SOUL.md # Primary agent identity (slot #1 in system prompt)
|
||||
├── memories/ # Persistent memory (MEMORY.md, USER.md)
|
||||
├── skills/ # Agent-created skills (managed via skill_manage tool)
|
||||
├── cron/ # Scheduled jobs
|
||||
@@ -1318,15 +1318,15 @@ Hermes uses two different context scopes:
|
||||
|
||||
| File | Purpose | Scope |
|
||||
|------|---------|-------|
|
||||
| `SOUL.md` | **Primary agent identity** — defines who the agent is (slot #1 in the system prompt) | `~/.hermes/SOUL.md` or `$HERMES_HOME/SOUL.md` |
|
||||
| `AGENTS.md` | Project-specific instructions, coding conventions | Working directory / project tree |
|
||||
| `SOUL.md` | Default persona for this Hermes instance | `~/.hermes/SOUL.md` or `$HERMES_HOME/SOUL.md` |
|
||||
| `.cursorrules` | Cursor IDE rules (also detected) | Working directory |
|
||||
| `.cursor/rules/*.mdc` | Cursor rule files (also detected) | Working directory |
|
||||
|
||||
- **SOUL.md** is the agent's primary identity. It occupies slot #1 in the system prompt, completely replacing the built-in default identity. Edit it to fully customize who the agent is.
|
||||
- If SOUL.md is missing, empty, or cannot be loaded, Hermes falls back to a built-in default identity.
|
||||
- **AGENTS.md** is hierarchical: if subdirectories also have AGENTS.md, all are combined.
|
||||
- **SOUL.md** is now global to the Hermes instance and is loaded only from `HERMES_HOME`.
|
||||
- Hermes automatically seeds a default `SOUL.md` if one does not already exist.
|
||||
- An empty `SOUL.md` contributes nothing to the system prompt.
|
||||
- All loaded context files are capped at 20,000 characters with smart truncation.
|
||||
|
||||
See also:
|
||||
|
||||
@@ -6,12 +6,12 @@ description: "Customize Hermes Agent's personality with a global SOUL.md, built-
|
||||
|
||||
# Personality & SOUL.md
|
||||
|
||||
Hermes Agent's personality is customizable, but there are two different layers that matter:
|
||||
Hermes Agent's personality is fully customizable. `SOUL.md` is the **primary identity** — it's the first thing in the system prompt and defines who the agent is.
|
||||
|
||||
- `SOUL.md` — a durable persona file that lives in `HERMES_HOME` and is loaded automatically for that Hermes instance
|
||||
- `SOUL.md` — a durable persona file that lives in `HERMES_HOME` and serves as the agent's identity (slot #1 in the system prompt)
|
||||
- built-in or custom `/personality` presets — session-level system-prompt overlays
|
||||
|
||||
If you want a stable default voice that follows you across sessions, `SOUL.md` is the right tool.
|
||||
If you want to change who Hermes is — or replace it with an entirely different agent persona — edit `SOUL.md`.
|
||||
|
||||
## How SOUL.md works now
|
||||
|
||||
@@ -29,15 +29,16 @@ $HERMES_HOME/SOUL.md
|
||||
|
||||
### Important behavior
|
||||
|
||||
- **SOUL.md is the agent's primary identity.** It occupies slot #1 in the system prompt, replacing the hardcoded default identity.
|
||||
- Hermes creates a starter `SOUL.md` automatically if one does not exist yet
|
||||
- Existing user `SOUL.md` files are never overwritten
|
||||
- Hermes loads `SOUL.md` only from `HERMES_HOME`
|
||||
- Hermes does not look in the current working directory for `SOUL.md`
|
||||
- If `SOUL.md` exists but is empty, Hermes adds nothing from it to the prompt
|
||||
- If `SOUL.md` exists but is empty, or cannot be loaded, Hermes falls back to a built-in default identity
|
||||
- If `SOUL.md` has content, that content is injected verbatim after security scanning and truncation
|
||||
- Hermes does not add wrapper language like "If SOUL.md is present..." around the file anymore
|
||||
- SOUL.md is **not** duplicated in the context files section — it appears only once, as the identity
|
||||
|
||||
That makes `SOUL.md` a true per-user or per-instance default personality, not a repo-local trick.
|
||||
That makes `SOUL.md` a true per-user or per-instance identity, not just an additive layer.
|
||||
|
||||
## Why this design
|
||||
|
||||
@@ -117,13 +118,13 @@ You optimize for truth, clarity, and usefulness over politeness theater.
|
||||
|
||||
## What Hermes injects into the prompt
|
||||
|
||||
If `SOUL.md` contains text, Hermes injects the file's text itself — not a wrapper explanation.
|
||||
`SOUL.md` content goes directly into slot #1 of the system prompt — the agent identity position. No wrapper language is added around it.
|
||||
|
||||
So the system prompt gets the content directly, after:
|
||||
The content goes through:
|
||||
- prompt-injection scanning
|
||||
- truncation if it is too large
|
||||
|
||||
If the file is empty or whitespace-only, nothing from `SOUL.md` is added.
|
||||
If the file is empty, whitespace-only, or cannot be read, Hermes falls back to a built-in default identity ("You are Hermes Agent, an intelligent AI assistant created by Nous Research..."). This fallback also applies when `skip_context_files` is set (e.g., in subagent/delegation contexts).
|
||||
|
||||
## Security scanning
|
||||
|
||||
@@ -242,14 +243,16 @@ That gives you:
|
||||
## How personality interacts with the full prompt
|
||||
|
||||
At a high level, the prompt stack includes:
|
||||
1. default Hermes identity
|
||||
2. memory/user context
|
||||
3. skills guidance
|
||||
4. context files such as `AGENTS.md`, `.cursorrules`, and global `SOUL.md`
|
||||
5. platform-specific formatting hints
|
||||
6. optional system-prompt overlays such as `/personality`
|
||||
1. **SOUL.md** (agent identity — or built-in fallback if SOUL.md is unavailable)
|
||||
2. tool-aware behavior guidance
|
||||
3. memory/user context
|
||||
4. skills guidance
|
||||
5. context files (`AGENTS.md`, `.cursorrules`)
|
||||
6. timestamp
|
||||
7. platform-specific formatting hints
|
||||
8. optional system-prompt overlays such as `/personality`
|
||||
|
||||
So `SOUL.md` is important, but it is one layer in a broader system.
|
||||
`SOUL.md` is the foundation — everything else builds on top of it.
|
||||
|
||||
## Related docs
|
||||
|
||||
|
||||
Reference in New Issue
Block a user