Compare commits

..

1 Commits

Author SHA1 Message Date
Teknium 29c850058f fix(moonshot): strip $ref siblings and collapse tuple items in tool schemas
Port from anomalyco/opencode#24730: Moonshot's JSON Schema validator rejects
two shapes that the rest of the JSON Schema ecosystem accepts:

1. $ref nodes with sibling keywords. Moonshot expands the reference before
   validation and then rejects the node if keys like `description`, `type`,
   or `default` appear alongside $ref. MCP-sourced tool schemas commonly
   put a `description` on $ref-typed properties so the model sees the
   field hint — which worked on every provider except Moonshot.

2. Tuple-style `items` arrays (positional element schemas). Moonshot's
   engine requires ONE schema applied to every array element. Common in
   tool schemas generated from Go/Protobuf that model fixed-length arrays
   as `[{type:number}, {type:number}]`.

Repairs applied in `agent/moonshot_schema.py`:

- Rule 3: when a node has `$ref`, return `{"$ref": <value>}` only
  (strip every sibling). The referenced definition still carries its own
  description on the target node, which Moonshot accepts.
- Rule 4: when `items` is a list, collapse to the first element schema
  (falling back to `{}` which is then filled by the generic missing-type
  rule). Preserves `minItems` / `maxItems` / other siblings.

Tests: 10 new cases across TestRefSiblingStripping + TestTupleItems,
plus the existing TestMissingTypeFilled::test_ref_node_is_not_given_synthetic_type
still passes (it asserted plain $ref passes through; now it passes through
as exactly `{"$ref": "..."}` which is strictly compatible).

All 35 tests in test_moonshot_schema.py pass.
2026-04-30 17:07:46 -07:00
153 changed files with 1801 additions and 14703 deletions
-6
View File
@@ -9,12 +9,6 @@ node_modules
.venv
**/.venv
# Built artifacts that are regenerated inside the image. Excluded so local
# rebuilds on the developer's machine don't invalidate the npm-install layer
# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
ui-tui/dist/
ui-tui/packages/hermes-ink/dist/
# CI/CD
.github
-10
View File
@@ -76,16 +76,6 @@ jobs:
run: |
mkdir -p _site/docs
cp -r website/build/* _site/docs/
# llms.txt / llms-full.txt are also published at the site root
# (https://hermes-agent.nousresearch.com/llms.txt) because some
# agents and IDE plugins probe the classic root-level path rather
# than /docs/llms.txt. Same file, two URLs, one source of truth.
if [ -f website/build/llms.txt ]; then
cp website/build/llms.txt _site/llms.txt
fi
if [ -f website/build/llms-full.txt ]; then
cp website/build/llms-full.txt _site/llms-full.txt
fi
- name: Upload artifact
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
+8 -18
View File
@@ -28,26 +28,10 @@ WORKDIR /opt/hermes
# ---------- Layer-cached dependency install ----------
# Copy only package manifests first so npm install + Playwright are cached
# unless the lockfiles themselves change.
#
# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
# because it is referenced as a `file:` workspace dependency from
# ui-tui/package.json. Copying the tree up front lets npm resolve the
# workspace to real content instead of stopping at a bare package.json.
COPY package.json package-lock.json ./
COPY web/package.json web/package-lock.json web/
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
# `npm_config_install_links=false` forces npm to install `file:` deps as
# symlinks (the npm 10+ default) even on Debian's older bundled npm 9.x,
# which defaults to `install-links=true` and installs file deps as *copies*.
# The host-side package-lock.json is generated with a newer npm that uses
# symlinks, so an install-as-copy produces a hidden node_modules/.package-lock.json
# that permanently disagrees with the root lock on the @hermes/ink entry.
# That disagreement trips the TUI launcher's `_tui_need_npm_install()`
# check on every startup and triggers a runtime `npm install` that then
# fails with EACCES (node_modules/ is root-owned from build time).
ENV npm_config_install_links=false
COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/
RUN npm install --prefer-offline --no-audit && \
npx playwright install --with-deps chromium --only-shell && \
@@ -61,7 +45,13 @@ COPY --chown=hermes:hermes . .
# Build browser dashboard and terminal UI assets.
RUN cd web && npm run build && \
cd ../ui-tui && npm run build
cd ../ui-tui && npm run build && \
rm -rf node_modules/@hermes/ink && \
rm -rf packages/hermes-ink/node_modules && \
cp -R packages/hermes-ink node_modules/@hermes/ink && \
npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
rm -rf node_modules/@hermes/ink/node_modules/react && \
node --input-type=module -e "await import('@hermes/ink')"
# ---------- Permissions ----------
# Make install dir world-readable so any HERMES_UID can read it at runtime.
+3 -136
View File
@@ -164,8 +164,6 @@ class HermesACPAgent(acp.Agent):
"context": "Show conversation context info",
"reset": "Clear conversation history",
"compact": "Compress conversation context",
"steer": "Inject guidance into the currently running agent turn",
"queue": "Queue a prompt to run after the current turn finishes",
"version": "Show Hermes version",
}
@@ -195,16 +193,6 @@ class HermesACPAgent(acp.Agent):
"name": "compact",
"description": "Compress conversation context",
},
{
"name": "steer",
"description": "Inject guidance into the currently running agent turn",
"input_hint": "guidance for the active turn",
},
{
"name": "queue",
"description": "Queue a prompt to run after the current turn finishes",
"input_hint": "prompt to run next",
},
{
"name": "version",
"description": "Show Hermes version",
@@ -569,9 +557,6 @@ class HermesACPAgent(acp.Agent):
async def cancel(self, session_id: str, **kwargs: Any) -> None:
state = self.session_manager.get_session(session_id)
if state and state.cancel_event:
with state.runtime_lock:
if state.is_running and state.current_prompt_text:
state.interrupted_prompt_text = state.current_prompt_text
state.cancel_event.set()
try:
if getattr(state, "agent", None) and hasattr(state.agent, "interrupt"):
@@ -669,39 +654,6 @@ class HermesACPAgent(acp.Agent):
if not has_content:
return PromptResponse(stop_reason="end_turn")
# /steer on an idle session has no in-flight tool call to inject into.
# Rewrite it so the payload runs as a normal user prompt, matching the
# gateway's behavior (gateway/run.py ~L4898). Two sub-cases:
# 1. Zed-interrupt salvage — a prior prompt was cancelled by the
# client right before /steer arrived; replay it with the steer
# text attached as explicit correction/guidance so the user's
# in-flight work isn't lost.
# 2. Plain idle — no prior work to salvage; just run the steer
# payload as a regular prompt. Without this, _cmd_steer would
# silently append to state.queued_prompts and respond with
# "No active turn — queued for the next turn", which looks like
# /queue even though the user never typed /queue.
if isinstance(user_content, str) and user_text.startswith("/steer"):
steer_text = user_text.split(maxsplit=1)[1].strip() if len(user_text.split(maxsplit=1)) > 1 else ""
interrupted_prompt = ""
rewrite_idle = False
with state.runtime_lock:
if not state.is_running and steer_text:
if state.interrupted_prompt_text:
interrupted_prompt = state.interrupted_prompt_text
state.interrupted_prompt_text = ""
else:
rewrite_idle = True
if interrupted_prompt:
user_text = (
f"{interrupted_prompt}\n\n"
f"User correction/guidance after interrupt: {steer_text}"
)
user_content = user_text
elif rewrite_idle:
user_text = steer_text
user_content = steer_text
# Intercept slash commands — handle locally without calling the LLM.
# Slash commands are text-only; if the client included images/resources,
# send the whole multimodal prompt to the agent instead of treating it as
@@ -714,24 +666,6 @@ class HermesACPAgent(acp.Agent):
await self._conn.session_update(session_id, update)
return PromptResponse(stop_reason="end_turn")
# If Zed sends another regular prompt while the same ACP session is
# still running, queue it instead of racing two AIAgent loops against
# the same state.history. /steer and /queue are handled above and can
# land immediately.
with state.runtime_lock:
if state.is_running:
queued_text = user_text or "[Image attachment]"
state.queued_prompts.append(queued_text)
depth = len(state.queued_prompts)
if self._conn:
update = acp.update_agent_message_text(
f"Queued for the next turn. ({depth} queued)"
)
await self._conn.session_update(session_id, update)
return PromptResponse(stop_reason="end_turn")
state.is_running = True
state.current_prompt_text = user_text or "[Image attachment]"
logger.info("Prompt on session %s: %s", session_id, user_text[:100])
conn = self._conn
@@ -843,9 +777,6 @@ class HermesACPAgent(acp.Agent):
result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
except Exception:
logger.exception("Executor error for session %s", session_id)
with state.runtime_lock:
state.is_running = False
state.current_prompt_text = ""
return PromptResponse(stop_reason="end_turn")
if result.get("messages"):
@@ -871,28 +802,6 @@ class HermesACPAgent(acp.Agent):
update = acp.update_agent_message_text(final_response)
await conn.session_update(session_id, update)
# Mark this turn idle before draining queued work so recursive prompt()
# calls can acquire the session. Queued turns are intentionally run as
# normal follow-up user prompts, preserving role alternation and history.
with state.runtime_lock:
state.is_running = False
state.current_prompt_text = ""
while True:
with state.runtime_lock:
if not state.queued_prompts:
break
next_prompt = state.queued_prompts.pop(0)
if conn:
await conn.session_update(
session_id,
acp.update_user_message_text(next_prompt),
)
await self.prompt(
prompt=[TextContentBlock(type="text", text=next_prompt)],
session_id=session_id,
)
usage = None
if any(result.get(key) is not None for key in ("prompt_tokens", "completion_tokens", "total_tokens")):
usage = Usage(
@@ -970,8 +879,6 @@ class HermesACPAgent(acp.Agent):
"context": self._cmd_context,
"reset": self._cmd_reset,
"compact": self._cmd_compact,
"steer": self._cmd_steer,
"queue": self._cmd_queue,
"version": self._cmd_version,
}.get(cmd)
@@ -1068,16 +975,10 @@ class HermesACPAgent(acp.Agent):
if not hasattr(agent, "_compress_context"):
return "Context compression not available for this agent."
from agent.model_metadata import estimate_request_tokens_rough
from agent.model_metadata import estimate_messages_tokens_rough
original_count = len(state.history)
# Include system prompt + tool schemas so the figure reflects real
# request pressure, not a transcript-only underestimate (#6217).
_sys_prompt = getattr(agent, "_cached_system_prompt", "") or ""
_tools = getattr(agent, "tools", None) or None
approx_tokens = estimate_request_tokens_rough(
state.history, system_prompt=_sys_prompt, tools=_tools
)
approx_tokens = estimate_messages_tokens_rough(state.history)
original_session_db = getattr(agent, "_session_db", None)
try:
@@ -1097,13 +998,7 @@ class HermesACPAgent(acp.Agent):
self.session_manager.save_session(state.session_id)
new_count = len(state.history)
_sys_prompt_after = getattr(agent, "_cached_system_prompt", "") or _sys_prompt
_tools_after = getattr(agent, "tools", None) or _tools
new_tokens = estimate_request_tokens_rough(
state.history,
system_prompt=_sys_prompt_after,
tools=_tools_after,
)
new_tokens = estimate_messages_tokens_rough(state.history)
return (
f"Context compressed: {original_count} -> {new_count} messages\n"
f"~{approx_tokens:,} -> ~{new_tokens:,} tokens"
@@ -1111,34 +1006,6 @@ class HermesACPAgent(acp.Agent):
except Exception as e:
return f"Compression failed: {e}"
def _cmd_steer(self, args: str, state: SessionState) -> str:
steer_text = args.strip()
if not steer_text:
return "Usage: /steer <guidance>"
if state.is_running and hasattr(state.agent, "steer"):
try:
if state.agent.steer(steer_text):
preview = steer_text[:80] + ("..." if len(steer_text) > 80 else "")
return f"⏩ Steer queued for the active turn: {preview}"
except Exception as exc:
logger.warning("ACP steer failed for session %s: %s", state.session_id, exc)
return f"⚠️ Steer failed: {exc}"
with state.runtime_lock:
state.queued_prompts.append(steer_text)
depth = len(state.queued_prompts)
return f"No active turn — queued for the next turn. ({depth} queued)"
def _cmd_queue(self, args: str, state: SessionState) -> str:
queued_text = args.strip()
if not queued_text:
return "Usage: /queue <prompt>"
with state.runtime_lock:
state.queued_prompts.append(queued_text)
depth = len(state.queued_prompts)
return f"Queued for the next turn. ({depth} queued)"
def _cmd_version(self, args: str, state: SessionState) -> str:
return f"Hermes Agent v{HERMES_VERSION}"
+7 -46
View File
@@ -26,33 +26,6 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
def _win_path_to_wsl(path: str) -> str | None:
"""Convert a Windows drive path to its WSL /mnt/<drive>/... equivalent."""
match = re.match(r"^([A-Za-z]):[\\/](.*)$", path)
if not match:
return None
drive = match.group(1).lower()
tail = match.group(2).replace("\\", "/")
return f"/mnt/{drive}/{tail}"
def _translate_acp_cwd(cwd: str) -> str:
"""Translate Windows ACP cwd values when Hermes itself is running in WSL.
Windows ACP clients can launch ``hermes acp`` inside WSL while still sending
editor workspaces as Windows drive paths such as ``E:\\Projects``. Store
and execute against the WSL mount path so agents, tools, and persisted ACP
sessions all agree on the usable workspace. Native Linux/macOS keeps the
original cwd unchanged.
"""
from hermes_constants import is_wsl
if not is_wsl():
return cwd
translated = _win_path_to_wsl(str(cwd))
return translated if translated is not None else cwd
def _normalize_cwd_for_compare(cwd: str | None) -> str:
raw = str(cwd or ".").strip()
if not raw:
@@ -61,9 +34,11 @@ def _normalize_cwd_for_compare(cwd: str | None) -> str:
# Normalize Windows drive paths into the equivalent WSL mount form so
# ACP history filters match the same workspace across Windows and WSL.
translated = _win_path_to_wsl(expanded)
if translated is not None:
expanded = translated
match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
if match:
drive = match.group(1).lower()
tail = match.group(2).replace("\\", "/")
expanded = f"/mnt/{drive}/{tail}"
elif re.match(r"^/mnt/[A-Za-z]/", expanded):
expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"
@@ -121,18 +96,12 @@ def _acp_stderr_print(*args, **kwargs) -> None:
def _register_task_cwd(task_id: str, cwd: str) -> None:
"""Bind a task/session id to the editor's working directory for tools.
Zed can launch Hermes from a Windows workspace while the ACP process runs
inside WSL. In that case ACP sends cwd as e.g. ``E:\\Projects\\POTI``;
local tools need the WSL mount equivalent or subprocess creation fails
before the command can run.
"""
"""Bind a task/session id to the editor's working directory for tools."""
if not task_id:
return
try:
from tools.terminal_tool import register_task_env_overrides
register_task_env_overrides(task_id, {"cwd": _translate_acp_cwd(cwd)})
register_task_env_overrides(task_id, {"cwd": cwd})
except Exception:
logger.debug("Failed to register ACP task cwd override", exc_info=True)
@@ -176,11 +145,6 @@ class SessionState:
model: str = ""
history: List[Dict[str, Any]] = field(default_factory=list)
cancel_event: Any = None # threading.Event
is_running: bool = False
queued_prompts: List[str] = field(default_factory=list)
runtime_lock: Any = field(default_factory=Lock)
current_prompt_text: str = ""
interrupted_prompt_text: str = ""
class SessionManager:
@@ -211,7 +175,6 @@ class SessionManager:
"""Create a new session with a unique ID and a fresh AIAgent."""
import threading
cwd = _translate_acp_cwd(cwd)
session_id = str(uuid.uuid4())
agent = self._make_agent(session_id=session_id, cwd=cwd)
state = SessionState(
@@ -254,7 +217,6 @@ class SessionManager:
"""Deep-copy a session's history into a new session."""
import threading
cwd = _translate_acp_cwd(cwd)
original = self.get_session(session_id) # checks DB too
if original is None:
return None
@@ -356,7 +318,6 @@ class SessionManager:
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
"""Update the working directory for a session and its tool overrides."""
cwd = _translate_acp_cwd(cwd)
state = self.get_session(session_id) # checks DB too
if state is None:
return None
+1 -29
View File
@@ -1977,12 +1977,6 @@ def resolve_provider_client(
(client, resolved_model) or (None, None) if auth is unavailable.
"""
_validate_proxy_env_urls()
# Preserve the original provider name before alias normalization so a
# user-declared ``custom_providers`` entry whose name coincidentally
# matches a built-in alias (e.g. user names their custom provider "kimi"
# which aliases to "kimi-coding") is still reachable via the named-custom
# branch below.
original_provider = (provider or "").strip().lower()
# Normalise aliases
provider = _normalize_aux_provider(provider)
@@ -2169,18 +2163,7 @@ def resolve_provider_client(
# ── Named custom providers (config.yaml providers dict / custom_providers list) ───
try:
from hermes_cli.runtime_provider import _get_named_custom_provider
# When the raw requested name is an alias (``kimi`` → ``kimi-coding``)
# and the user defined a ``custom_providers`` entry under that alias
# name, the custom entry is the intended target — the built-in alias
# rewriting would otherwise hijack the request. Only preferred when
# the raw name is an alias (not a canonical provider name) so custom
# entries that coincidentally match a canonical provider (e.g. ``nous``)
# still defer to the built-in per `_get_named_custom_provider`'s guard.
custom_entry = None
if original_provider and original_provider != provider:
custom_entry = _get_named_custom_provider(original_provider)
if custom_entry is None:
custom_entry = _get_named_custom_provider(provider)
custom_entry = _get_named_custom_provider(provider)
if custom_entry:
custom_base = custom_entry.get("base_url", "").strip()
custom_key = custom_entry.get("api_key", "").strip()
@@ -2290,12 +2273,6 @@ def resolve_provider_client(
creds = resolve_api_key_provider_credentials(provider)
api_key = str(creds.get("api_key", "")).strip()
# Honour an explicit api_key override (e.g. from a fallback_model entry
# or a custom_providers entry) so callers that pass an explicit
# credential can authenticate against endpoints where no built-in
# credential is registered for this provider alias.
if explicit_api_key:
api_key = explicit_api_key.strip() or api_key
if not api_key:
tried_sources = list(pconfig.api_key_env_vars)
if provider == "copilot":
@@ -2307,11 +2284,6 @@ def resolve_provider_client(
raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
base_url = _to_openai_base_url(raw_base_url)
# Honour an explicit base_url override from the caller — used when a
# fallback_model entry (or custom_providers lookup) routes through a
# built-in provider name but targets a user-specified endpoint.
if explicit_base_url:
base_url = _to_openai_base_url(explicit_base_url.strip().rstrip("/"))
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
final_model = _normalize_resolved_model(model or default_model, provider)
+3 -3
View File
@@ -538,7 +538,7 @@ class ContextCompressor(ContextEngine):
# Token-budget approach: walk backward accumulating tokens
accumulated = 0
boundary = len(result)
min_protect = min(protect_tail_count, len(result))
min_protect = min(protect_tail_count, len(result) - 1)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
raw_content = msg.get("content") or ""
@@ -992,8 +992,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
def _get_tool_call_id(tc) -> str:
"""Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
if isinstance(tc, dict):
return tc.get("call_id", "") or tc.get("id", "") or ""
return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
return tc.get("id", "")
return getattr(tc, "id", "") or ""
def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Fix orphaned tool_call / tool_result pairs after compression.
-80
View File
@@ -55,7 +55,6 @@ def _default_state() -> Dict[str, Any]:
"last_run_at": None,
"last_run_duration_seconds": None,
"last_run_summary": None,
"last_report_path": None,
"paused": False,
"run_count": 0,
}
@@ -767,39 +766,6 @@ def _write_run_report(
consolidated = classification["consolidated"]
pruned = classification["pruned"]
# Rewrite cron job skill references. When the curator consolidates
# skill X into umbrella Y, any cron job that lists X fails to load
# it at run time — the scheduler skips it and the job runs without
# the instructions it was scheduled to follow. Rewriting the
# references in-place keeps scheduled jobs working across
# consolidation passes. Best-effort: never let a cron-module issue
# break the curator.
cron_rewrites: Dict[str, Any] = {"rewrites": [], "jobs_updated": 0, "jobs_scanned": 0}
try:
consolidated_map = {
e["name"]: e["into"]
for e in consolidated
if isinstance(e, dict) and e.get("name") and e.get("into")
}
pruned_names = [
e["name"] for e in pruned
if isinstance(e, dict) and e.get("name")
]
if consolidated_map or pruned_names:
from cron.jobs import rewrite_skill_refs as _rewrite_cron_refs
cron_rewrites = _rewrite_cron_refs(
consolidated=consolidated_map,
pruned=pruned_names,
)
except Exception as e:
logger.debug("Curator cron skill rewrite failed: %s", e, exc_info=True)
cron_rewrites = {
"rewrites": [],
"jobs_updated": 0,
"jobs_scanned": 0,
"error": str(e),
}
payload = {
"started_at": started_at.isoformat(),
"duration_seconds": round(elapsed_seconds, 2),
@@ -815,7 +781,6 @@ def _write_run_report(
"consolidated_this_run": len(consolidated),
"pruned_this_run": len(pruned),
"state_transitions": len(transitions),
"cron_jobs_rewritten": int(cron_rewrites.get("jobs_updated", 0)),
"tool_calls_total": sum(tc_counts.values()),
},
"tool_call_counts": tc_counts,
@@ -825,7 +790,6 @@ def _write_run_report(
"pruned_names": [p["name"] for p in pruned],
"added": added,
"state_transitions": transitions,
"cron_rewrites": cron_rewrites,
"llm_final": llm_meta.get("final", ""),
"llm_summary": llm_meta.get("summary", ""),
"llm_error": llm_meta.get("error"),
@@ -848,17 +812,6 @@ def _write_run_report(
except Exception as e:
logger.debug("Curator REPORT.md write failed: %s", e)
# cron_rewrites.json — only when at least one job was touched, to
# keep run dirs uncluttered for the common no-op case.
try:
if int(cron_rewrites.get("jobs_updated", 0)) > 0:
(run_dir / "cron_rewrites.json").write_text(
json.dumps(cron_rewrites, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
except Exception as e:
logger.debug("Curator cron_rewrites.json write failed: %s", e)
return run_dir
@@ -989,39 +942,6 @@ def _render_report_markdown(p: Dict[str, Any]) -> str:
lines.append(f"- `{t.get('name')}`: {t.get('from')}{t.get('to')}")
lines.append("")
# Cron job rewrites — show which scheduled jobs had their skill
# references updated so users can audit that the auto-rewrite did
# the right thing. Only present when at least one job changed.
cron_rw = p.get("cron_rewrites") or {}
cron_rewrites_list = cron_rw.get("rewrites") or []
if cron_rewrites_list:
lines.append(f"### Cron job skill references rewritten ({len(cron_rewrites_list)})\n")
lines.append(
"_Cron jobs that referenced a consolidated or pruned skill were "
"updated in-place so they keep loading the right instructions "
"on their next run. See `cron_rewrites.json` for the full record._\n"
)
SHOW = 25
for entry in cron_rewrites_list[:SHOW]:
job_name = entry.get("job_name") or entry.get("job_id") or "?"
before = entry.get("before") or []
after = entry.get("after") or []
mapped = entry.get("mapped") or {}
dropped = entry.get("dropped") or []
lines.append(
f"- `{job_name}`: `{', '.join(before)}` → `{', '.join(after) or '(none)'}`"
)
for old, new in mapped.items():
lines.append(f" - `{old}` → `{new}` (consolidated)")
for name in dropped:
lines.append(f" - `{name}` dropped (pruned)")
if len(cron_rewrites_list) > SHOW:
lines.append(
f"- … and {len(cron_rewrites_list) - SHOW} more "
"(see `cron_rewrites.json`)"
)
lines.append("")
# Full LLM final response
final = (p.get("llm_final") or "").strip()
if final:
+5 -5
View File
@@ -20,25 +20,25 @@ def summarize_manual_compression(
headline = f"No changes from compression: {before_count} messages"
if after_tokens == before_tokens:
token_line = (
f"Approx request size: ~{before_tokens:,} tokens (unchanged)"
f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
)
else:
token_line = (
f"Approx request size: ~{before_tokens:,}"
f"Rough transcript estimate: ~{before_tokens:,}"
f"~{after_tokens:,} tokens"
)
else:
headline = f"Compressed: {before_count}{after_count} messages"
token_line = (
f"Approx request size: ~{before_tokens:,}"
f"Rough transcript estimate: ~{before_tokens:,}"
f"~{after_tokens:,} tokens"
)
note = None
if not noop and after_count < before_count and after_tokens > before_tokens:
note = (
"Note: fewer messages can still raise this estimate when "
"compression rewrites the transcript into denser summaries."
"Note: fewer messages can still raise this rough transcript estimate "
"when compression rewrites the transcript into denser summaries."
)
return {
+28 -47
View File
@@ -15,6 +15,16 @@ and MoonshotAI/kimi-cli#1595:
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
the parent. Presence of both causes "type should be defined in anyOf
items instead of the parent schema".
3. ``$ref`` nodes may not carry sibling keywords. Moonshot expands the
reference before validation and then rejects the node if sibling keys
like ``description`` remain on the same node as ``$ref``. Strip every
sibling from ``$ref`` nodes so only ``{"$ref": "..."}`` survives.
(Ported from anomalyco/opencode#24730.)
4. ``items`` may not be a tuple-style array (``items: [schemaA, schemaB]``
for positional element schemas). Moonshot's schema engine requires a
single object schema applied to every array element. Collapse tuple
``items`` to the first element schema (or ``{}`` if the tuple is empty).
(Ported from anomalyco/opencode#24730.)
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
@@ -66,6 +76,16 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
}
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
elif key == "items" and isinstance(value, list):
# Rule 4: tuple-style ``items`` arrays (positional element
# schemas) are not accepted by Moonshot. Collapse to the
# first element schema if present, else to ``{}``. This
# matches opencode's behaviour for moonshotai / kimi models.
first = value[0] if value else {}
if isinstance(first, dict):
repaired[key] = _repair_schema(first, is_schema=True)
else:
repaired[key] = first
elif key in _SCHEMA_NODE_KEYS:
# items / not / additionalProperties: single nested schema.
# additionalProperties can also be a bool — leave those alone.
@@ -81,56 +101,17 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
return repaired
# Rule 2: when anyOf is present, type belongs only on the children.
# Additionally, Moonshot rejects null-type branches inside anyOf
# (enum value (<nil>) does not match any type in [string]).
# Collapse the anyOf to the first non-null branch and infer its type.
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
repaired.pop("type", None)
non_null = [b for b in repaired["anyOf"]
if isinstance(b, dict) and b.get("type") != "null"]
if non_null and len(non_null) < len(repaired["anyOf"]):
# Drop the anyOf wrapper — keep only the non-null branch.
# If there's a single non-null branch, promote it and fall
# through to Rules 1/3 so nullable/enum cleanup still applies
# to the merged node.
if len(non_null) == 1:
merge = {k: v for k, v in repaired.items() if k != "anyOf"}
merge.update(non_null[0])
repaired = merge
else:
repaired["anyOf"] = non_null
return repaired
else:
# Nothing to collapse — parent type stripped, children already
# repaired by the recursive walk above.
return repaired
return repaired
# Moonshot also rejects non-standard keywords like ``nullable`` on
# parameter schemas — strip it.
repaired.pop("nullable", None)
# Rule 1: property schemas without type need one. $ref nodes are exempt
# — their type comes from the referenced definition.
# Fill missing type BEFORE Rule 3 so enum cleanup can check the type.
if "$ref" not in repaired:
repaired = _fill_missing_type(repaired)
# Rule 3: Moonshot rejects null/empty-string values inside enum arrays
# when the parent type is a scalar (string, integer, etc.). The error:
# "enum value (<nil>) does not match any type in [string]"
# Strip null and empty-string from enum values, and if the enum becomes
# empty, drop it entirely.
if "enum" in repaired and isinstance(repaired["enum"], list):
node_type = repaired.get("type")
if node_type in ("string", "integer", "number", "boolean"):
cleaned = [v for v in repaired["enum"]
if v is not None and v != ""]
if cleaned:
repaired["enum"] = cleaned
else:
repaired.pop("enum")
return repaired
# Rule 3: $ref nodes must not have sibling keywords. Strip everything
# except $ref itself so Moonshot's validator (which expands the ref
# before checking) doesn't reject the node for redundant keys like
# ``description`` / ``type`` / ``default`` appearing alongside $ref.
if "$ref" in repaired:
return {"$ref": repaired["$ref"]}
return _fill_missing_type(repaired)
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
-455
View File
@@ -1,455 +0,0 @@
"""Pure tool-call loop guardrail primitives.
The controller in this module is intentionally side-effect free: it tracks
per-turn tool-call observations and returns decisions. Runtime code owns whether
those decisions become warning guidance, synthetic tool results, or controlled
turn halts.
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass, field
from typing import Any, Mapping
from utils import safe_json_loads
IDEMPOTENT_TOOL_NAMES = frozenset(
{
"read_file",
"search_files",
"web_search",
"web_extract",
"session_search",
"browser_snapshot",
"browser_console",
"browser_get_images",
"mcp_filesystem_read_file",
"mcp_filesystem_read_text_file",
"mcp_filesystem_read_multiple_files",
"mcp_filesystem_list_directory",
"mcp_filesystem_list_directory_with_sizes",
"mcp_filesystem_directory_tree",
"mcp_filesystem_get_file_info",
"mcp_filesystem_search_files",
}
)
MUTATING_TOOL_NAMES = frozenset(
{
"terminal",
"execute_code",
"write_file",
"patch",
"todo",
"memory",
"skill_manage",
"browser_click",
"browser_type",
"browser_press",
"browser_scroll",
"browser_navigate",
"send_message",
"cronjob",
"delegate_task",
"process",
}
)
@dataclass(frozen=True)
class ToolCallGuardrailConfig:
"""Thresholds for per-turn tool-call loop detection.
Warnings are enabled by default and never prevent tool execution. Hard stops
are explicit opt-in so interactive CLI/TUI sessions get a gentle nudge unless
the user enables circuit-breaker behavior in config.yaml.
"""
warnings_enabled: bool = True
hard_stop_enabled: bool = False
exact_failure_warn_after: int = 2
exact_failure_block_after: int = 5
same_tool_failure_warn_after: int = 3
same_tool_failure_halt_after: int = 8
no_progress_warn_after: int = 2
no_progress_block_after: int = 5
idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
@classmethod
def from_mapping(cls, data: Mapping[str, Any] | None) -> "ToolCallGuardrailConfig":
"""Build config from the `tool_loop_guardrails` config.yaml section."""
if not isinstance(data, Mapping):
return cls()
warn_after = data.get("warn_after")
if not isinstance(warn_after, Mapping):
warn_after = {}
hard_stop_after = data.get("hard_stop_after")
if not isinstance(hard_stop_after, Mapping):
hard_stop_after = {}
defaults = cls()
return cls(
warnings_enabled=_as_bool(data.get("warnings_enabled"), defaults.warnings_enabled),
hard_stop_enabled=_as_bool(data.get("hard_stop_enabled"), defaults.hard_stop_enabled),
exact_failure_warn_after=_positive_int(
warn_after.get("exact_failure", data.get("exact_failure_warn_after")),
defaults.exact_failure_warn_after,
),
same_tool_failure_warn_after=_positive_int(
warn_after.get("same_tool_failure", data.get("same_tool_failure_warn_after")),
defaults.same_tool_failure_warn_after,
),
no_progress_warn_after=_positive_int(
warn_after.get("idempotent_no_progress", data.get("no_progress_warn_after")),
defaults.no_progress_warn_after,
),
exact_failure_block_after=_positive_int(
hard_stop_after.get("exact_failure", data.get("exact_failure_block_after")),
defaults.exact_failure_block_after,
),
same_tool_failure_halt_after=_positive_int(
hard_stop_after.get("same_tool_failure", data.get("same_tool_failure_halt_after")),
defaults.same_tool_failure_halt_after,
),
no_progress_block_after=_positive_int(
hard_stop_after.get("idempotent_no_progress", data.get("no_progress_block_after")),
defaults.no_progress_block_after,
),
)
@dataclass(frozen=True)
class ToolCallSignature:
"""Stable, non-reversible identity for a tool name plus canonical args."""
tool_name: str
args_hash: str
@classmethod
def from_call(cls, tool_name: str, args: Mapping[str, Any] | None) -> "ToolCallSignature":
canonical = canonical_tool_args(args or {})
return cls(tool_name=tool_name, args_hash=_sha256(canonical))
def to_metadata(self) -> dict[str, str]:
"""Return public metadata without raw argument values."""
return {"tool_name": self.tool_name, "args_hash": self.args_hash}
@dataclass(frozen=True)
class ToolGuardrailDecision:
"""Decision returned by the tool-call guardrail controller."""
action: str = "allow" # allow | warn | block | halt
code: str = "allow"
message: str = ""
tool_name: str = ""
count: int = 0
signature: ToolCallSignature | None = None
@property
def allows_execution(self) -> bool:
return self.action in {"allow", "warn"}
@property
def should_halt(self) -> bool:
return self.action in {"block", "halt"}
def to_metadata(self) -> dict[str, Any]:
data: dict[str, Any] = {
"action": self.action,
"code": self.code,
"message": self.message,
"tool_name": self.tool_name,
"count": self.count,
}
if self.signature is not None:
data["signature"] = self.signature.to_metadata()
return data
def canonical_tool_args(args: Mapping[str, Any]) -> str:
"""Return sorted compact JSON for parsed tool arguments."""
if not isinstance(args, Mapping):
raise TypeError(f"tool args must be a mapping, got {type(args).__name__}")
return json.dumps(
args,
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
default=str,
)
def classify_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
"""Safety-fallback classifier used only when callers don't pass ``failed``.
Mirrors ``agent.display._detect_tool_failure`` exactly so the guardrail
never disagrees with the CLI's user-visible ``[error]`` tag. Production
callers in ``run_agent.py`` always pass an explicit ``failed=`` derived
from ``_detect_tool_failure``; this function exists so standalone callers
(tests, tooling) still get consistent behavior.
"""
if result is None:
return False, ""
if tool_name == "terminal":
data = safe_json_loads(result)
if isinstance(data, dict):
exit_code = data.get("exit_code")
if exit_code is not None and exit_code != 0:
return True, f" [exit {exit_code}]"
return False, ""
if tool_name == "memory":
data = safe_json_loads(result)
if isinstance(data, dict):
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
return True, " [full]"
lower = result[:500].lower()
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
return True, " [error]"
return False, ""
class ToolCallGuardrailController:
"""Per-turn controller for repeated failed/non-progressing tool calls."""
def __init__(self, config: ToolCallGuardrailConfig | None = None):
self.config = config or ToolCallGuardrailConfig()
self.reset_for_turn()
def reset_for_turn(self) -> None:
self._exact_failure_counts: dict[ToolCallSignature, int] = {}
self._same_tool_failure_counts: dict[str, int] = {}
self._no_progress: dict[ToolCallSignature, tuple[str, int]] = {}
self._halt_decision: ToolGuardrailDecision | None = None
@property
def halt_decision(self) -> ToolGuardrailDecision | None:
return self._halt_decision
def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision:
signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
if not self.config.hard_stop_enabled:
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
exact_count = self._exact_failure_counts.get(signature, 0)
if exact_count >= self.config.exact_failure_block_after:
decision = ToolGuardrailDecision(
action="block",
code="repeated_exact_failure_block",
message=(
f"Blocked {tool_name}: the same tool call failed {exact_count} "
"times with identical arguments. Stop retrying it unchanged; "
"change strategy or explain the blocker."
),
tool_name=tool_name,
count=exact_count,
signature=signature,
)
self._halt_decision = decision
return decision
if self._is_idempotent(tool_name):
record = self._no_progress.get(signature)
if record is not None:
_result_hash, repeat_count = record
if repeat_count >= self.config.no_progress_block_after:
decision = ToolGuardrailDecision(
action="block",
code="idempotent_no_progress_block",
message=(
f"Blocked {tool_name}: this read-only call returned the same "
f"result {repeat_count} times. Stop repeating it unchanged; "
"use the result already provided or try a different query."
),
tool_name=tool_name,
count=repeat_count,
signature=signature,
)
self._halt_decision = decision
return decision
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
def after_call(
self,
tool_name: str,
args: Mapping[str, Any] | None,
result: str | None,
*,
failed: bool | None = None,
) -> ToolGuardrailDecision:
args = _coerce_args(args)
signature = ToolCallSignature.from_call(tool_name, args)
if failed is None:
failed, _ = classify_tool_failure(tool_name, result)
if failed:
exact_count = self._exact_failure_counts.get(signature, 0) + 1
self._exact_failure_counts[signature] = exact_count
self._no_progress.pop(signature, None)
same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1
self._same_tool_failure_counts[tool_name] = same_count
if self.config.hard_stop_enabled and same_count >= self.config.same_tool_failure_halt_after:
decision = ToolGuardrailDecision(
action="halt",
code="same_tool_failure_halt",
message=(
f"Stopped {tool_name}: it failed {same_count} times this turn. "
"Stop retrying the same failing tool path and choose a different approach."
),
tool_name=tool_name,
count=same_count,
signature=signature,
)
self._halt_decision = decision
return decision
if self.config.warnings_enabled and exact_count >= self.config.exact_failure_warn_after:
return ToolGuardrailDecision(
action="warn",
code="repeated_exact_failure_warning",
message=(
f"{tool_name} has failed {exact_count} times with identical arguments. "
"This looks like a loop; inspect the error and change strategy "
"instead of retrying it unchanged."
),
tool_name=tool_name,
count=exact_count,
signature=signature,
)
if self.config.warnings_enabled and same_count >= self.config.same_tool_failure_warn_after:
return ToolGuardrailDecision(
action="warn",
code="same_tool_failure_warning",
message=(
f"{tool_name} has failed {same_count} times this turn. "
"This looks like a loop; change approach before retrying."
),
tool_name=tool_name,
count=same_count,
signature=signature,
)
return ToolGuardrailDecision(tool_name=tool_name, count=exact_count, signature=signature)
self._exact_failure_counts.pop(signature, None)
self._same_tool_failure_counts.pop(tool_name, None)
if not self._is_idempotent(tool_name):
self._no_progress.pop(signature, None)
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
result_hash = _result_hash(result)
previous = self._no_progress.get(signature)
repeat_count = 1
if previous is not None and previous[0] == result_hash:
repeat_count = previous[1] + 1
self._no_progress[signature] = (result_hash, repeat_count)
if self.config.warnings_enabled and repeat_count >= self.config.no_progress_warn_after:
return ToolGuardrailDecision(
action="warn",
code="idempotent_no_progress_warning",
message=(
f"{tool_name} returned the same result {repeat_count} times. "
"Use the result already provided or change the query instead of "
"repeating it unchanged."
),
tool_name=tool_name,
count=repeat_count,
signature=signature,
)
return ToolGuardrailDecision(tool_name=tool_name, count=repeat_count, signature=signature)
def _is_idempotent(self, tool_name: str) -> bool:
if tool_name in self.config.mutating_tools:
return False
return tool_name in self.config.idempotent_tools
def toolguard_synthetic_result(decision: ToolGuardrailDecision) -> str:
"""Build a synthetic role=tool content string for a blocked tool call."""
return json.dumps(
{
"error": decision.message,
"guardrail": decision.to_metadata(),
},
ensure_ascii=False,
)
def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> str:
"""Append runtime guidance to the current tool result content."""
if decision.action not in {"warn", "halt"} or not decision.message:
return result
label = "Tool loop hard stop" if decision.action == "halt" else "Tool loop warning"
suffix = (
f"\n\n[{label}: "
f"{decision.code}; count={decision.count}; {decision.message}]"
)
return (result or "") + suffix
def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
return args if isinstance(args, Mapping) else {}
def _result_hash(result: str | None) -> str:
parsed = safe_json_loads(result or "")
if parsed is not None:
try:
canonical = json.dumps(
parsed,
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
default=str,
)
except TypeError:
canonical = str(parsed)
else:
canonical = result or ""
return _sha256(canonical)
def _as_bool(value: Any, default: bool) -> bool:
if value is None:
return default
if isinstance(value, bool):
return value
if isinstance(value, (int, float)):
return bool(value)
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"1", "true", "yes", "on", "enabled"}:
return True
if lowered in {"0", "false", "no", "off", "disabled"}:
return False
return default
def _positive_int(value: Any, default: int) -> int:
if value is None:
return default
try:
parsed = int(value)
except (TypeError, ValueError):
return default
return parsed if parsed >= 1 else default
def _sha256(value: str) -> str:
return hashlib.sha256(value.encode("utf-8")).hexdigest()
-19
View File
@@ -289,25 +289,6 @@ browser:
# after this period of no activity between agent loops (default: 120 = 2 minutes)
inactivity_timeout: 120
# =============================================================================
# Tool Loop Guardrails
# =============================================================================
# Soft warnings are enabled by default. They append guidance to repeated failed
# or non-progressing tool results but still let the tool execute. Hard stops are
# opt-in circuit breakers for autonomous/cron sessions where stopping a loop is
# preferable to spending the full iteration budget.
tool_loop_guardrails:
warnings_enabled: true
hard_stop_enabled: false
warn_after:
exact_failure: 2
same_tool_failure: 3
idempotent_no_progress: 2
hard_stop_after:
exact_failure: 5
same_tool_failure: 8
idempotent_no_progress: 5
# =============================================================================
# Context Compression (Auto-shrinks long conversations)
# =============================================================================
+14 -229
View File
@@ -15,6 +15,7 @@ Usage:
import logging
import os
import re
import shutil
import sys
import json
@@ -85,7 +86,7 @@ from hermes_cli.browser_connect import (
try_launch_chrome_debug,
)
from hermes_cli.env_loader import load_hermes_dotenv
from utils import base_url_host_matches, is_truthy_value
from utils import base_url_host_matches
_hermes_home = get_hermes_home()
_project_env = Path(__file__).parent / '.env'
@@ -599,7 +600,6 @@ def load_cli_config() -> Dict[str, Any]:
# Load configuration at module startup
CLI_CONFIG = load_cli_config()
# Initialize centralized logging early — agent.log + errors.log in ~/.hermes/logs/.
# This ensures CLI sessions produce a log trail even before AIAgent is instantiated.
try:
@@ -934,20 +934,6 @@ def _run_state_db_auto_maintenance(session_db) -> None:
try:
from hermes_cli.config import load_config as _load_full_config
from hermes_constants import get_hermes_home as _get_hermes_home
_hermes_home_maint = _get_hermes_home()
# One-time prune of empty TUI ghost sessions.
try:
if not session_db.get_meta("ghost_session_prune_v1"):
pruned = session_db.prune_empty_ghost_sessions(
sessions_dir=_hermes_home_maint / "sessions"
)
session_db.set_meta("ghost_session_prune_v1", "1")
if pruned:
logger.info("Pruned %d empty TUI ghost sessions", pruned)
except Exception as _prune_exc:
logger.debug("Ghost session prune skipped: %s", _prune_exc)
cfg = (_load_full_config().get("sessions") or {})
if not cfg.get("auto_prune", False):
return
@@ -955,7 +941,7 @@ def _run_state_db_auto_maintenance(session_db) -> None:
retention_days=int(cfg.get("retention_days", 90)),
min_interval_hours=int(cfg.get("min_interval_hours", 24)),
vacuum=bool(cfg.get("vacuum_after_prune", True)),
sessions_dir=_hermes_home_maint / "sessions",
sessions_dir=_get_hermes_home() / "sessions",
)
except Exception as exc:
logger.debug("state.db auto-maintenance skipped: %s", exc)
@@ -2132,8 +2118,6 @@ class HermesCLI:
# Parse and validate toolsets
self.enabled_toolsets = toolsets
self.disabled_toolsets = CLI_CONFIG["agent"].get("disabled_toolsets") or []
if toolsets and "all" not in toolsets and "*" not in toolsets:
# Validate each toolset — MCP server names are resolved via
# live registry aliases (registered during discover_mcp_tools),
@@ -3584,7 +3568,6 @@ class HermesCLI:
credential_pool=runtime.get("credential_pool"),
max_iterations=self.max_turns,
enabled_toolsets=self.enabled_toolsets,
disabled_toolsets=self.disabled_toolsets,
verbose_logging=self.verbose,
quiet_mode=not self.verbose,
ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
@@ -3632,18 +3615,14 @@ class HermesCLI:
tuple(runtime.get("args") or ()),
)
# Force-create DB row on /title intent, then apply title.
if self._pending_title and self._session_db and self.agent:
if self._pending_title and self._session_db:
try:
self.agent._ensure_db_session()
if self.agent._session_db_created:
self._session_db.set_session_title(self.session_id, self._pending_title)
_cprint(f" Session title applied: {self._pending_title}")
self._pending_title = None
# else: row creation failed transiently — keep _pending_title for retry
self._session_db.set_session_title(self.session_id, self._pending_title)
_cprint(f" Session title applied: {self._pending_title}")
self._pending_title = None
except (ValueError, Exception) as e:
_cprint(f" Could not apply pending title: {e}")
# Keep _pending_title so it can be retried after row creation succeeds
self._pending_title = None
return True
except Exception as e:
ChatConsole().print(f"[bold red]Failed to initialize agent: {e}[/]")
@@ -4971,7 +4950,6 @@ class HermesCLI:
if self._session_db:
try:
self.agent._session_db_created = False
self._session_db.create_session(
session_id=self.session_id,
source=os.environ.get("HERMES_SESSION_SOURCE", "cli"),
@@ -4981,7 +4959,6 @@ class HermesCLI:
"reasoning_config": self.reasoning_config,
},
)
self.agent._session_db_created = True
except Exception:
pass
# Notify memory providers that session_id rotated to a fresh
@@ -6560,8 +6537,6 @@ class HermesCLI:
# No active run — treat as a normal next-turn message.
self._pending_input.put(payload)
_cprint(f" No agent running; queued as next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
elif canonical == "goal":
self._handle_goal_command(cmd_original)
elif canonical == "skin":
self._handle_skin_command(cmd_original)
elif canonical == "voice":
@@ -6607,17 +6582,12 @@ class HermesCLI:
self._console_print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
# Check for plugin-registered slash commands
elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
from hermes_cli.plugins import (
get_plugin_command_handler,
resolve_plugin_command_result,
)
from hermes_cli.plugins import get_plugin_command_handler
plugin_handler = get_plugin_command_handler(base_cmd.lstrip("/"))
if plugin_handler:
user_args = cmd_original[len(base_cmd):].strip()
try:
result = resolve_plugin_command_result(
plugin_handler(user_args)
)
result = plugin_handler(user_args)
if result:
_cprint(str(result))
except Exception as e:
@@ -7042,166 +7012,6 @@ class HermesCLI:
print(" status Show current browser mode")
print()
# ────────────────────────────────────────────────────────────────
# /goal — persistent cross-turn goals (Ralph-style loop)
# ────────────────────────────────────────────────────────────────
def _get_goal_manager(self):
"""Return the GoalManager bound to the current session_id.
Cached on ``self._goal_manager`` and rebound lazily when
``session_id`` changes (e.g. after /new or a compression-driven
session split).
"""
try:
from hermes_cli.goals import GoalManager
from hermes_cli.config import load_config
except Exception as exc:
logging.debug("goal manager unavailable: %s", exc)
return None
sid = getattr(self, "session_id", None) or ""
if not sid:
return None
existing = getattr(self, "_goal_manager", None)
if existing is not None and getattr(existing, "session_id", None) == sid:
return existing
try:
cfg = load_config() or {}
goals_cfg = cfg.get("goals") or {}
max_turns = int(goals_cfg.get("max_turns", 20) or 20)
except Exception:
max_turns = 20
mgr = GoalManager(session_id=sid, default_max_turns=max_turns)
self._goal_manager = mgr
return mgr
def _handle_goal_command(self, cmd: str) -> None:
"""Dispatch /goal subcommands: set / status / pause / resume / clear."""
parts = (cmd or "").strip().split(None, 1)
arg = parts[1].strip() if len(parts) > 1 else ""
mgr = self._get_goal_manager()
if mgr is None:
_cprint(f" {_DIM}Goals unavailable (no active session).{_RST}")
return
lower = arg.lower()
# Bare /goal or /goal status → show current state
if not arg or lower == "status":
_cprint(f" {mgr.status_line()}")
return
if lower == "pause":
state = mgr.pause(reason="user-paused")
if state is None:
_cprint(f" {_DIM}No goal set.{_RST}")
else:
_cprint(f" ⏸ Goal paused: {state.goal}")
return
if lower == "resume":
state = mgr.resume()
if state is None:
_cprint(f" {_DIM}No goal to resume.{_RST}")
else:
_cprint(f" ▶ Goal resumed: {state.goal}")
_cprint(
f" {_DIM}Send any message (or press Enter on an empty prompt "
f"is a no-op; type 'continue' to kick it off).{_RST}"
)
return
if lower in ("clear", "stop", "done"):
had = mgr.has_goal()
mgr.clear()
if had:
_cprint(" ✓ Goal cleared.")
else:
_cprint(f" {_DIM}No active goal.{_RST}")
return
# Otherwise treat the arg as the goal text.
try:
state = mgr.set(arg)
except ValueError as exc:
_cprint(f" Invalid goal: {exc}")
return
_cprint(f" ⊙ Goal set ({state.max_turns}-turn budget): {state.goal}")
_cprint(
f" {_DIM}After each turn, a judge model will check if the goal is done. "
f"Hermes keeps working until it is, you pause/clear it, or the budget is "
f"exhausted. Use /goal status, /goal pause, /goal resume, /goal clear.{_RST}"
)
# Kick the loop off immediately so the user doesn't have to send a
# separate message after setting the goal.
try:
self._pending_input.put(state.goal)
except Exception:
pass
def _maybe_continue_goal_after_turn(self) -> None:
"""Hook run after every CLI turn. Judges + maybe re-queues.
Safe to call when no goal is set returns quickly.
Preemption is automatic: if a real user message is already in
``_pending_input`` we skip judging (the user's new input takes
priority and we'll re-judge after that turn). If judge says done,
mark it done and tell the user. If judge says continue and we're
under budget, push the continuation prompt onto the queue.
"""
mgr = self._get_goal_manager()
if mgr is None or not mgr.is_active():
return
# If a real user message is already queued, don't inject a
# continuation prompt on top — let the user's turn go first.
try:
if getattr(self, "_pending_input", None) is not None \
and not self._pending_input.empty():
return
except Exception:
pass
# Extract the agent's final response for this turn.
last_response = ""
try:
hist = self.conversation_history or []
for msg in reversed(hist):
if msg.get("role") == "assistant":
content = msg.get("content", "")
if isinstance(content, list):
# Multimodal content — flatten text parts.
parts = [
p.get("text", "")
for p in content
if isinstance(p, dict) and p.get("type") in ("text", "output_text")
]
last_response = "\n".join(t for t in parts if t)
else:
last_response = str(content or "")
break
except Exception:
last_response = ""
decision = mgr.evaluate_after_turn(last_response, user_initiated=True)
msg = decision.get("message") or ""
if msg:
_cprint(f" {msg}")
if decision.get("should_continue"):
prompt = decision.get("continuation_prompt")
if prompt:
try:
self._pending_input.put(prompt)
except Exception as exc:
logging.debug("goal continuation enqueue failed: %s", exc)
def _handle_skin_command(self, cmd: str):
"""Handle /skin [name] — show or change the display skin."""
try:
@@ -7328,7 +7138,7 @@ class HermesCLI:
import os
from hermes_cli.colors import Colors as _Colors
current = is_truthy_value(os.environ.get("HERMES_YOLO_MODE"))
current = bool(os.environ.get("HERMES_YOLO_MODE"))
if current:
os.environ.pop("HERMES_YOLO_MODE", None)
_cprint(
@@ -7525,20 +7335,10 @@ class HermesCLI:
original_count = len(self.conversation_history)
with self._busy_command("Compressing context..."):
try:
from agent.model_metadata import estimate_request_tokens_rough
from agent.model_metadata import estimate_messages_tokens_rough
from agent.manual_compression_feedback import summarize_manual_compression
original_history = list(self.conversation_history)
# Include system prompt + tool schemas in the estimate —
# a transcript-only number understates real request pressure
# and can even appear to grow after compression because a
# dense handoff summary replaces many short turns (#6217).
_sys_prompt = getattr(self.agent, "_cached_system_prompt", "") or ""
_tools = getattr(self.agent, "tools", None) or None
approx_tokens = estimate_request_tokens_rough(
original_history,
system_prompt=_sys_prompt,
tools=_tools,
)
approx_tokens = estimate_messages_tokens_rough(original_history)
if focus_topic:
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
f"focus: \"{focus_topic}\"...")
@@ -7570,11 +7370,7 @@ class HermesCLI:
):
self.session_id = self.agent.session_id
self._pending_title = None
new_tokens = estimate_request_tokens_rough(
self.conversation_history,
system_prompt=_sys_prompt,
tools=_tools,
)
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
summary = summarize_manual_compression(
original_history,
self.conversation_history,
@@ -11540,17 +11336,6 @@ class HermesCLI:
app.invalidate() # Refresh status line
# Goal continuation: if a standing goal is active, ask
# the judge whether the turn satisfied it. If not, and
# there's no real user message already queued, push the
# continuation prompt back into _pending_input so the
# next loop iteration picks it up naturally (and any
# user input that arrives in between still preempts).
try:
self._maybe_continue_goal_after_turn()
except Exception as _goal_exc:
logging.debug("goal continuation hook failed: %s", _goal_exc)
# Continuous voice: auto-restart recording after agent responds.
# Dispatch to a daemon thread so play_beep (sd.wait) and
# AudioRecorder.start (lock acquire) never block process_loop —
-118
View File
@@ -882,121 +882,3 @@ def save_job_output(job_id: str, output: str):
raise
return output_file
# =============================================================================
# Skill reference rewriting (curator integration)
# =============================================================================
def rewrite_skill_refs(
consolidated: Optional[Dict[str, str]] = None,
pruned: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""Rewrite cron job skill references after a curator consolidation pass.
When the curator consolidates a skill X into umbrella Y (or archives X
as pruned), any cron job that lists ``X`` in its ``skills`` field will
fail to load ``X`` at run time — the scheduler logs a warning and
skips the skill, so the job runs without the instructions it was
scheduled to follow. See cron/scheduler.py where ``skill_view`` is
called per skill name.
This function repairs cron jobs in-place:
- A skill listed in ``consolidated`` is replaced with its umbrella
target (the ``into`` value). If the umbrella is already in the
job's skill list, the stale name is dropped without duplication.
- A skill listed in ``pruned`` is dropped outright — there is no
forwarding target.
- Ordering and other skills in the list are preserved.
- The legacy ``skill`` field is realigned via ``_apply_skill_fields``.
Args:
consolidated: mapping of ``old_skill_name -> umbrella_skill_name``.
pruned: list of skill names that were archived with no forwarding
target.
Returns a report dict::
{
"rewrites": [
{
"job_id": ...,
"job_name": ...,
"before": [...],
"after": [...],
"mapped": {"old": "new", ...},
"dropped": ["old", ...],
},
...
],
"jobs_updated": N,
"jobs_scanned": M,
}
Best-effort: exceptions from loading/saving propagate to the caller so
tests can assert behaviour; the curator invocation site wraps this
call in a try/except so a failure here never breaks the curator.
"""
consolidated = dict(consolidated or {})
pruned_set = set(pruned or [])
# A skill listed in both wins as "consolidated" — it has a target,
# which is the more useful of the two outcomes.
pruned_set -= set(consolidated.keys())
if not consolidated and not pruned_set:
return {"rewrites": [], "jobs_updated": 0, "jobs_scanned": 0}
with _jobs_file_lock:
jobs = load_jobs()
rewrites: List[Dict[str, Any]] = []
changed = False
for job in jobs:
skills_before = _normalize_skill_list(job.get("skill"), job.get("skills"))
if not skills_before:
continue
mapped: Dict[str, str] = {}
dropped: List[str] = []
new_skills: List[str] = []
for name in skills_before:
if name in consolidated:
target = consolidated[name]
mapped[name] = target
if target and target not in new_skills:
new_skills.append(target)
elif name in pruned_set:
dropped.append(name)
else:
if name not in new_skills:
new_skills.append(name)
if not mapped and not dropped:
continue
job["skills"] = new_skills
job["skill"] = new_skills[0] if new_skills else None
changed = True
rewrites.append({
"job_id": job.get("id"),
"job_name": job.get("name") or job.get("id"),
"before": list(skills_before),
"after": list(new_skills),
"mapped": mapped,
"dropped": dropped,
})
if changed:
save_jobs(jobs)
logger.info(
"Curator rewrote skill references in %d cron job(s)", len(rewrites)
)
return {
"rewrites": rewrites,
"jobs_updated": len(rewrites),
"jobs_scanned": len(jobs),
}
+1 -1
View File
@@ -40,7 +40,7 @@ services:
# - TEAMS_CLIENT_SECRET=${TEAMS_CLIENT_SECRET}
# - TEAMS_TENANT_ID=${TEAMS_TENANT_ID}
# - TEAMS_ALLOWED_USERS=${TEAMS_ALLOWED_USERS}
# - TEAMS_PORT=${TEAMS_PORT:-3978}
# - TEAMS_PORT=3978
command: ["gateway", "run"]
dashboard:
+6 -39
View File
@@ -36,26 +36,6 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
return is_truthy_value(value, default=default)
def _coerce_float(value: Any, default: float) -> float:
"""Coerce numeric config values, falling back on malformed input."""
if value is None:
return default
try:
return float(value)
except (TypeError, ValueError):
return default
def _coerce_int(value: Any, default: int) -> int:
"""Coerce integer config values, falling back on malformed input."""
if value is None:
return default
try:
return int(value)
except (TypeError, ValueError):
return default
def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
"""Normalize unauthorized DM behavior to a supported value."""
if isinstance(value, str):
@@ -321,13 +301,13 @@ class StreamingConfig:
if not data:
return cls()
return cls(
enabled=_coerce_bool(data.get("enabled"), False),
enabled=data.get("enabled", False),
transport=data.get("transport", "edit"),
edit_interval=_coerce_float(data.get("edit_interval"), 1.0),
buffer_threshold=_coerce_int(data.get("buffer_threshold"), 40),
edit_interval=float(data.get("edit_interval", 1.0)),
buffer_threshold=int(data.get("buffer_threshold", 40)),
cursor=data.get("cursor", ""),
fresh_final_after_seconds=_coerce_float(
data.get("fresh_final_after_seconds"), 60.0
fresh_final_after_seconds=float(
data.get("fresh_final_after_seconds", 60.0)
),
)
@@ -920,12 +900,6 @@ def load_gateway_config() -> GatewayConfig:
if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"):
os.environ["MATRIX_DM_MENTION_THREADS"] = str(matrix_cfg["dm_mention_threads"]).lower()
# Feishu settings → env vars (env vars take precedence)
feishu_cfg = yaml_cfg.get("feishu", {})
if isinstance(feishu_cfg, dict):
if "allow_bots" in feishu_cfg and not os.getenv("FEISHU_ALLOW_BOTS"):
os.environ["FEISHU_ALLOW_BOTS"] = str(feishu_cfg["allow_bots"]).lower()
except Exception as e:
logger.warning(
"Failed to process config.yaml — falling back to .env / gateway.json values. "
@@ -1077,14 +1051,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
if Platform.WHATSAPP not in config.platforms:
config.platforms[Platform.WHATSAPP] = PlatformConfig()
config.platforms[Platform.WHATSAPP].enabled = True
whatsapp_home = os.getenv("WHATSAPP_HOME_CHANNEL")
if whatsapp_home and Platform.WHATSAPP in config.platforms:
config.platforms[Platform.WHATSAPP].home_channel = HomeChannel(
platform=Platform.WHATSAPP,
chat_id=whatsapp_home,
name=os.getenv("WHATSAPP_HOME_CHANNEL_NAME", "Home"),
)
# Slack
slack_token = os.getenv("SLACK_BOT_TOKEN")
if slack_token:
+2 -4
View File
@@ -2351,11 +2351,10 @@ class APIServerAdapter(BasePlatformAdapter):
)
if agent_ref is not None:
agent_ref[0] = agent
effective_task_id = session_id or str(uuid.uuid4())
result = agent.run_conversation(
user_message=user_message,
conversation_history=conversation_history,
task_id=effective_task_id,
task_id="default",
)
usage = {
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
@@ -2552,11 +2551,10 @@ class APIServerAdapter(BasePlatformAdapter):
)
self._active_run_agents[run_id] = agent
def _run_sync():
effective_task_id = session_id or run_id
r = agent.run_conversation(
user_message=user_message,
conversation_history=conversation_history,
task_id=effective_task_id,
task_id="default",
)
u = {
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
+13 -189
View File
@@ -416,7 +416,7 @@ def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = Non
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple, Union
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
from enum import Enum
from pathlib import Path as _Path
@@ -981,7 +981,7 @@ def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
return
@dataclass
@dataclass
class SendResult:
"""Result of sending a message."""
success: bool
@@ -991,45 +991,6 @@ class SendResult:
retryable: bool = False # True for transient connection errors — base will retry automatically
class EphemeralReply(str):
"""System-notice reply that auto-deletes after a TTL.
Slash-command handlers in ``gateway/run.py`` can return this wrapper
instead of a plain string to request that the reply message be deleted
after ``ttl_seconds`` on platforms that support ``delete_message``.
Subclassing ``str`` keeps the wrapper transparent to anything that
treats handler return values as text (existing tests use ``in`` /
``startswith`` / equality; the ``_process_message_background`` pipeline
extracts attachments from the string content). ``isinstance(r,
EphemeralReply)`` still distinguishes ephemeral replies from plain
strings so the send path can schedule deletion.
Platforms that don't override :meth:`BasePlatformAdapter.delete_message`
silently ignore the TTL the message is sent normally and left in
place. When ``ttl_seconds`` is ``None``, the pipeline uses the
configured ``display.ephemeral_system_ttl`` default. A default of ``0``
disables auto-deletion globally, preserving prior behavior.
"""
ttl_seconds: Optional[int]
def __new__(cls, text: str, ttl_seconds: Optional[int] = None):
instance = super().__new__(cls, text)
instance.ttl_seconds = ttl_seconds
return instance
@property
def text(self) -> str:
"""Return the underlying text.
Provided for call sites that want an explicit string conversion,
though ``str(reply)`` and using ``reply`` directly where a string
is expected both work identically.
"""
return str.__str__(self)
def merge_pending_message_event(
pending_messages: Dict[str, MessageEvent],
session_key: str,
@@ -1073,11 +1034,6 @@ def merge_pending_message_event(
existing.text = event.text
if existing_is_photo or incoming_is_photo:
existing.message_type = MessageType.PHOTO
elif (
getattr(existing, "message_type", None) == MessageType.TEXT
and event.message_type != MessageType.TEXT
):
existing.message_type = event.message_type
return
if (
@@ -1112,10 +1068,8 @@ _RETRYABLE_ERROR_PATTERNS = (
)
# Type for message handlers. Handlers may return a plain string (normal
# reply), an ``EphemeralReply`` to opt the reply into auto-deletion, or
# ``None`` when the response was already delivered (e.g. via streaming).
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[Union[str, "EphemeralReply"]]]]
# Type for message handlers
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
def resolve_channel_prompt(
@@ -1500,64 +1454,6 @@ class BasePlatformAdapter(ABC):
"""
return False
def _get_ephemeral_system_ttl_default(self) -> int:
"""Read ``display.ephemeral_system_ttl`` from config.
Returns the TTL in seconds to use when an :class:`EphemeralReply`
does not specify one explicitly. ``0`` (the default) disables
auto-deletion. Non-fatal if config is unreadable.
"""
try:
from hermes_cli.config import load_config as _load_config
except Exception:
return 0
try:
cfg = _load_config()
except Exception:
return 0
display = cfg.get("display", {}) if isinstance(cfg, dict) else {}
if not isinstance(display, dict):
return 0
raw = display.get("ephemeral_system_ttl", 0)
try:
return int(raw)
except (TypeError, ValueError):
return 0
def _schedule_ephemeral_delete(
self,
chat_id: str,
message_id: str,
ttl_seconds: int,
) -> None:
"""Spawn a detached task that deletes ``message_id`` after ``ttl_seconds``.
Best-effort failures (gateway restart, permission denied, message
too old for Telegram's 48h window) are swallowed at debug level.
Does not block the caller.
"""
async def _run_delete() -> None:
try:
await asyncio.sleep(max(1, int(ttl_seconds)))
await self.delete_message(chat_id=chat_id, message_id=message_id)
except asyncio.CancelledError:
raise
except Exception as e:
logger.debug(
"[%s] Ephemeral delete failed for %s/%s: %s",
self.name, chat_id, message_id, e,
)
coro = _run_delete()
try:
asyncio.create_task(coro)
except RuntimeError:
# No running loop (e.g. unit tests that never reach the async
# path). Close the coroutine cleanly so Python doesn't warn
# about it never being awaited, then drop silently.
coro.close()
async def send_slash_confirm(
self,
chat_id: str,
@@ -2147,28 +2043,6 @@ class BasePlatformAdapter(ABC):
lowered = error.lower()
return "timed out" in lowered or "readtimeout" in lowered or "writetimeout" in lowered
def _unwrap_ephemeral(self, response: Any) -> Tuple[Optional[str], int]:
"""Unwrap a handler response into (text, ttl_seconds).
Accepts a plain string, ``None``, or an :class:`EphemeralReply`.
Returns ``(text, ttl)`` where ``ttl > 0`` means the caller should
schedule a deletion via :meth:`_schedule_ephemeral_delete` after
the send succeeds. ``ttl`` is forced to 0 when the adapter
doesn't override :meth:`delete_message` so non-supporting
platforms silently degrade to normal sends.
"""
if isinstance(response, EphemeralReply):
ttl = response.ttl_seconds
if ttl is None:
try:
ttl = int(self._get_ephemeral_system_ttl_default())
except Exception:
ttl = 0
if ttl and ttl > 0 and type(self).delete_message is BasePlatformAdapter.delete_message:
ttl = 0
return response.text, int(ttl or 0)
return response, 0
async def _send_with_retry(
self,
chat_id: str,
@@ -2476,20 +2350,13 @@ class BasePlatformAdapter(ABC):
release_guard=False,
discard_pending=False,
)
_text, _eph_ttl = self._unwrap_ephemeral(response)
if _text:
_r = await self._send_with_retry(
if response:
await self._send_with_retry(
chat_id=event.source.chat_id,
content=_text,
content=response,
reply_to=event.message_id,
metadata=thread_meta,
)
if _eph_ttl > 0 and _r.success and _r.message_id:
self._schedule_ephemeral_delete(
chat_id=event.source.chat_id,
message_id=_r.message_id,
ttl_seconds=_eph_ttl,
)
except Exception:
# On failure, restore the original guard if one still exists so
# we don't leave the session in a half-reset state.
@@ -2569,20 +2436,13 @@ class BasePlatformAdapter(ABC):
try:
_thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
response = await self._message_handler(event)
_text, _eph_ttl = self._unwrap_ephemeral(response)
if _text:
_r = await self._send_with_retry(
if response:
await self._send_with_retry(
chat_id=event.source.chat_id,
content=_text,
content=response,
reply_to=event.message_id,
metadata=_thread_meta,
)
if _eph_ttl > 0 and _r.success and _r.message_id:
self._schedule_ephemeral_delete(
chat_id=event.source.chat_id,
message_id=_r.message_id,
ttl_seconds=_eph_ttl,
)
except Exception as e:
logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
return
@@ -2656,6 +2516,7 @@ class BasePlatformAdapter(ABC):
# Fall back to a new Event only if the entry was removed externally.
interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
self._active_sessions[session_key] = interrupt_event
callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
# Start continuous typing indicator (refreshes every 2 seconds)
_thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
@@ -2688,16 +2549,7 @@ class BasePlatformAdapter(ABC):
# Call the handler (this can take a while with tool calls)
response = await self._message_handler(event)
# Slash-command handlers may return an EphemeralReply sentinel to
# request that their reply message auto-delete after a TTL (used
# for system notices like "✨ New session started!" that the user
# doesn't need to keep in the thread). Unwrap here so all the
# downstream extract_media / text-processing logic sees a plain
# string, and remember the TTL + platform capability so the
# post-send block can schedule the deletion.
response, _ephemeral_ttl = self._unwrap_ephemeral(response)
# Send response if any. A None/empty response is normal when
# streaming already delivered the text (already_sent=True) or
# when the message was queued behind an active agent. Log at
@@ -2786,21 +2638,6 @@ class BasePlatformAdapter(ABC):
)
_record_delivery(result)
# Schedule auto-deletion of system-notice replies.
# Detached so the handler returns immediately; errors
# (permission denied, message too old) are swallowed.
if (
_ephemeral_ttl
and _ephemeral_ttl > 0
and result.success
and result.message_id
):
self._schedule_ephemeral_delete(
chat_id=event.source.chat_id,
message_id=result.message_id,
ttl_seconds=_ephemeral_ttl,
)
# Human-like pacing delay between text and media
human_delay = self._get_human_delay()
@@ -2978,20 +2815,7 @@ class BasePlatformAdapter(ABC):
finally:
# Fire any one-shot post-delivery callback registered for this
# session (e.g. deferred background-review notifications).
#
# Snapshot the callback generation HERE (after the agent has run),
# not at the top of this task. _hermes_run_generation is set on
# the interrupt event by GatewayRunner._bind_adapter_run_generation
# during _handle_message_with_agent — which happens DURING the
# self._message_handler(event) await above. Snapshotting earlier
# always captured None, which bypassed the generation-ownership
# check in pop_post_delivery_callback and let stale runs fire a
# fresher run's callbacks.
_callback_generation = getattr(
interrupt_event,
"_hermes_run_generation",
None,
)
_callback_generation = callback_generation
if hasattr(self, "pop_post_delivery_callback"):
_post_cb = self.pop_post_delivery_callback(
session_key,
+2 -4
View File
@@ -3078,7 +3078,6 @@ class DiscordAdapter(BasePlatformAdapter):
async def send_update_prompt(
self, chat_id: str, prompt: str, default: str = "",
session_key: str = "",
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send an interactive button-based update prompt (Yes / No).
@@ -3088,10 +3087,9 @@ class DiscordAdapter(BasePlatformAdapter):
if not self._client or not DISCORD_AVAILABLE:
return SendResult(success=False, error="Not connected")
try:
target_id = metadata.get("thread_id") if metadata and metadata.get("thread_id") else chat_id
channel = self._client.get_channel(int(target_id))
channel = self._client.get_channel(int(chat_id))
if not channel:
channel = await self._client.fetch_channel(int(target_id))
channel = await self._client.fetch_channel(int(chat_id))
default_hint = f" (default: {default})" if default else ""
embed = discord.Embed(
+51 -207
View File
@@ -64,7 +64,7 @@ from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from types import SimpleNamespace
from typing import Any, Dict, List, Literal, Optional, Sequence
from typing import Any, Dict, List, Optional, Sequence
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import Request, urlopen
@@ -141,7 +141,6 @@ from gateway.platforms.base import (
)
from gateway.status import acquire_scoped_lock, release_scoped_lock
from hermes_constants import get_hermes_home
from utils import atomic_json_write
logger = logging.getLogger(__name__)
@@ -388,8 +387,6 @@ class FeishuAdapterSettings:
admins: frozenset[str] = frozenset()
default_group_policy: str = ""
group_rules: Dict[str, FeishuGroupRule] = field(default_factory=dict)
allow_bots: str = "none" # "none" | "mentions" | "all"
require_mention: bool = True
@dataclass
@@ -399,7 +396,6 @@ class FeishuGroupRule:
policy: str # "open" | "allowlist" | "blacklist" | "admin_only" | "disabled"
allowlist: set[str] = field(default_factory=set)
blacklist: set[str] = field(default_factory=set)
require_mention: Optional[bool] = None # None = inherit global
@dataclass
@@ -409,40 +405,6 @@ class FeishuBatchState:
counts: Dict[str, int] = field(default_factory=dict)
# ---------------------------------------------------------------------------
# Admission: policy types
# ---------------------------------------------------------------------------
RejectReason = Literal[
"self_echo",
"self_ids_unknown",
"bots_disabled",
"bot_not_mentioned",
"group_policy_rejected",
]
def _is_bot_sender(sender: Any) -> bool:
# receive_v1 docs say {user, bot}; accept "app" defensively.
return getattr(sender, "sender_type", "") in ("bot", "app")
def _sender_identity(sender: Any) -> frozenset:
# Take any non-empty id variant — tenant sender_id_type decides which are populated.
sid = getattr(sender, "sender_id", None)
if sid is None:
return frozenset()
return frozenset(
v for v in (
getattr(sid, "open_id", None),
getattr(sid, "user_id", None),
getattr(sid, "union_id", None),
)
if v
)
# ---------------------------------------------------------------------------
# Markdown rendering helpers
# ---------------------------------------------------------------------------
@@ -1415,16 +1377,10 @@ class FeishuAdapter(BasePlatformAdapter):
for chat_id, rule_cfg in raw_group_rules.items():
if not isinstance(rule_cfg, dict):
continue
# Only override when the key is explicitly set — missing vs false
# must not collapse.
per_chat_require_mention: Optional[bool] = None
if "require_mention" in rule_cfg:
per_chat_require_mention = _to_boolean(rule_cfg.get("require_mention"))
group_rules[str(chat_id)] = FeishuGroupRule(
policy=str(rule_cfg.get("policy", "open")).strip().lower(),
allowlist=set(str(u).strip() for u in rule_cfg.get("allowlist", []) if str(u).strip()),
blacklist=set(str(u).strip() for u in rule_cfg.get("blacklist", []) if str(u).strip()),
require_mention=per_chat_require_mention,
)
# Bot-level admins
@@ -1434,16 +1390,6 @@ class FeishuAdapter(BasePlatformAdapter):
# Default group policy (for groups not in group_rules)
default_group_policy = str(extra.get("default_group_policy", "")).strip().lower()
# Env-only so adapter and gateway auth bypass share one source; yaml
# feishu.allow_bots is bridged to this env var at config load.
allow_bots = os.getenv("FEISHU_ALLOW_BOTS", "none").strip().lower()
if allow_bots not in ("none", "mentions", "all"):
logger.warning(
"[Feishu] Unknown allow_bots=%r, falling back to 'none'. Valid: none, mentions, all.",
allow_bots,
)
allow_bots = "none"
return FeishuAdapterSettings(
app_id=str(extra.get("app_id") or os.getenv("FEISHU_APP_ID", "")).strip(),
app_secret=str(extra.get("app_secret") or os.getenv("FEISHU_APP_SECRET", "")).strip(),
@@ -1500,10 +1446,6 @@ class FeishuAdapter(BasePlatformAdapter):
admins=admins,
default_group_policy=default_group_policy,
group_rules=group_rules,
allow_bots=allow_bots,
require_mention=_to_boolean(
extra.get("require_mention", os.getenv("FEISHU_REQUIRE_MENTION", "true"))
),
)
def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
@@ -1534,8 +1476,6 @@ class FeishuAdapter(BasePlatformAdapter):
self._ws_reconnect_interval = settings.ws_reconnect_interval
self._ws_ping_interval = settings.ws_ping_interval
self._ws_ping_timeout = settings.ws_ping_timeout
self._allow_bots = settings.allow_bots
self._require_mention = settings.require_mention
def _build_event_handler(self) -> Any:
if EventDispatcherHandler is None:
@@ -2249,28 +2189,30 @@ class FeishuAdapter(BasePlatformAdapter):
event = getattr(data, "event", None)
message = getattr(event, "message", None)
sender = getattr(event, "sender", None)
if not message or not sender or not getattr(sender, "sender_id", None):
logger.debug("[Feishu] Dropping malformed inbound event: missing message/sender")
sender_id = getattr(sender, "sender_id", None)
if not message or not sender_id:
logger.debug("[Feishu] Dropping malformed inbound event: missing message or sender_id")
return
message_id = getattr(message, "message_id", None)
if not message_id or self._is_duplicate(message_id):
logger.debug("[Feishu] Dropping duplicate/missing message_id: %s", message_id)
return
reason = self._admit(sender, message)
if reason is not None:
logger.debug("[Feishu] dropping inbound event: %s", reason)
if self._is_self_sent_bot_message(event):
logger.debug("[Feishu] Dropping self-sent bot event: %s", message_id)
return
chat_type = getattr(message, "chat_type", "p2p")
chat_id = getattr(message, "chat_id", "") or ""
if chat_type != "p2p" and not self._should_accept_group_message(message, sender_id, chat_id):
logger.debug("[Feishu] Dropping group message that failed mention/policy gate: %s", message_id)
return
await self._process_inbound_message(
data=data,
message=message,
sender_id=getattr(sender, "sender_id", None),
sender_id=sender_id,
chat_type=chat_type,
message_id=message_id,
is_bot=_is_bot_sender(sender),
)
def _on_message_read_event(self, data: P2ImMessageMessageReadV1) -> None:
@@ -2447,11 +2389,10 @@ class FeishuAdapter(BasePlatformAdapter):
msg = items[0] if items else None
if not msg:
return
# GET im/v1/messages returns sender.id=app_id for bot messages —
# peer bots and us share sender_type="app" but differ on app_id.
sender = getattr(msg, "sender", None)
if str(getattr(sender, "id", "") or "") != self._app_id:
return # only route reactions on this bot's own messages
sender_type = str(getattr(sender, "sender_type", "") or "").lower()
if sender_type != "app":
return # only route reactions on our own bot messages
chat_id = str(getattr(msg, "chat_id", "") or "")
chat_type_raw = str(getattr(msg, "chat_type", "p2p") or "p2p")
if not chat_id:
@@ -2738,7 +2679,6 @@ class FeishuAdapter(BasePlatformAdapter):
sender_id: Any,
chat_type: str,
message_id: str,
is_bot: bool = False,
) -> None:
text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
@@ -2764,27 +2704,19 @@ class FeishuAdapter(BasePlatformAdapter):
)
reply_to_text = await self._fetch_message_text(reply_to_message_id) if reply_to_message_id else None
sender_primary = (
getattr(sender_id, "open_id", None)
or getattr(sender_id, "user_id", None)
or getattr(sender_id, "union_id", None)
or "<unknown>"
)
logger.info(
"[Feishu] Inbound %s message received: id=%s type=%s chat_id=%s sender=%s:%s text=%r media=%d",
"[Feishu] Inbound %s message received: id=%s type=%s chat_id=%s text=%r media=%d",
"dm" if chat_type == "p2p" else "group",
message_id,
inbound_type.value,
getattr(message, "chat_id", "") or "",
"bot" if is_bot else "user",
sender_primary,
text[:120],
len(media_urls),
)
chat_id = getattr(message, "chat_id", "") or ""
chat_info = await self.get_chat_info(chat_id)
sender_profile = await self._resolve_sender_profile(sender_id, is_bot=is_bot)
sender_profile = await self._resolve_sender_profile(sender_id)
source = self.build_source(
chat_id=chat_id,
chat_name=chat_info.get("name") or chat_id or "Feishu Chat",
@@ -2793,7 +2725,6 @@ class FeishuAdapter(BasePlatformAdapter):
user_name=sender_profile["user_name"],
thread_id=getattr(message, "thread_id", None) or None,
user_id_alt=sender_profile["user_id_alt"],
is_bot=is_bot,
)
normalized = MessageEvent(
text=text,
@@ -3516,12 +3447,7 @@ class FeishuAdapter(BasePlatformAdapter):
return "dm"
return "group"
async def _resolve_sender_profile(
self,
sender_id: Any,
*,
is_bot: bool = False,
) -> Dict[str, Optional[str]]:
async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
"""Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
Preference order for the primary ``user_id`` field:
@@ -3538,11 +3464,7 @@ class FeishuAdapter(BasePlatformAdapter):
union_id = getattr(sender_id, "union_id", None) or None
# Prefer tenant-scoped user_id; fall back to app-scoped open_id.
primary_id = user_id or open_id
# bot/v3/bots/basic_batch only accepts open_id.
name_lookup_id = open_id if is_bot else (primary_id or union_id)
display_name = await self._resolve_sender_name_from_api(
name_lookup_id, is_bot=is_bot,
)
display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
return {
"user_id": primary_id,
"user_name": display_name,
@@ -3562,14 +3484,11 @@ class FeishuAdapter(BasePlatformAdapter):
self._sender_name_cache.pop(sender_id, None)
return None
async def _resolve_sender_name_from_api(
self,
sender_id: Optional[str],
*,
is_bot: bool = False,
) -> Optional[str]:
"""Bots divert to bot/basic_batch — contact API doesn't return bot names.
Failures are silent so the pipeline never blocks on name resolution.
async def _resolve_sender_name_from_api(self, sender_id: Optional[str]) -> Optional[str]:
"""Fetch the sender's display name from the Feishu contact API with a 10-minute cache.
ID-type detection mirrors openclaw: ou_ open_id, on_ union_id, else user_id.
Failures are silently suppressed; the message pipeline must not block on name resolution.
"""
if not sender_id or not self._client:
return None
@@ -3579,16 +3498,7 @@ class FeishuAdapter(BasePlatformAdapter):
now = time.time()
cached_name = self._get_cached_sender_name(trimmed)
if cached_name is not None:
return cached_name or None # "" cached means "known nameless"
if is_bot:
names = await self._fetch_bot_names([trimmed])
if names is None:
return None
expire_at = now + _FEISHU_SENDER_NAME_TTL_SECONDS
for oid, name in names.items():
self._sender_name_cache[oid] = (name, expire_at)
hit = self._sender_name_cache.get(trimmed)
return (hit[0] or None) if hit else None
return cached_name
try:
from lark_oapi.api.contact.v3 import GetUserRequest # lazy import
if trimmed.startswith("ou_"):
@@ -3617,35 +3527,6 @@ class FeishuAdapter(BasePlatformAdapter):
logger.debug("[Feishu] Failed to resolve sender name for %s", sender_id, exc_info=True)
return None
async def _fetch_bot_names(self, bot_ids: List[str]) -> Optional[Dict[str, str]]:
if not self._client or not bot_ids:
return None
try:
req = (
BaseRequest.builder()
.http_method(HttpMethod.GET)
.uri("/open-apis/bot/v3/bots/basic_batch")
.queries([("bot_ids", oid) for oid in bot_ids])
.token_types({AccessTokenType.TENANT})
.build()
)
resp = await asyncio.to_thread(self._client.request, req)
content = getattr(getattr(resp, "raw", None), "content", None)
if not content:
return None
payload = json.loads(content)
if payload.get("code") != 0:
return None
bots = (payload.get("data") or {}).get("bots") or {}
return {
oid: str(info.get("name") or "").strip()
for oid, info in bots.items()
if oid
}
except Exception:
logger.debug("[Feishu] Failed to fetch bot names for %s", bot_ids, exc_info=True)
return None
async def _fetch_message_text(self, message_id: str) -> Optional[str]:
if not self._client or not message_id:
return None
@@ -3709,60 +3590,10 @@ class FeishuAdapter(BasePlatformAdapter):
logger.exception("[Feishu] Background inbound processing failed")
# =========================================================================
# Inbound admission
# Group policy and mention gating
# =========================================================================
def _admit(self, sender: Any, message: Any) -> Optional[RejectReason]:
sender_ids = _sender_identity(sender)
self_ids = frozenset(v for v in (self._bot_open_id, self._bot_user_id) if v)
is_bot = _is_bot_sender(sender)
is_group = getattr(message, "chat_type", "p2p") != "p2p"
chat_id = getattr(message, "chat_id", "") or ""
require_mention = is_group and self._require_mention_for(chat_id)
# Defensive only — Feishu doesn't echo our outbound back as inbound,
# and open_id is always populated on both sides.
if self_ids and sender_ids & self_ids:
return "self_echo"
if is_bot:
mode = self._allow_bots
if mode != "mentions" and mode != "all":
return "bots_disabled"
# Defensive: pre-hydration or malformed payloads.
if not self_ids or not sender_ids:
return "self_ids_unknown"
# Step 4 covers mention enforcement for groups when require_mention
# is on; check here only on paths step 4 won't reach.
if mode == "mentions" and not require_mention and not self._mentions_self(message):
return "bot_not_mentioned"
if not is_group:
return None
if not self._allow_group_message(
getattr(sender, "sender_id", None), chat_id, is_bot=is_bot,
):
return "group_policy_rejected"
if require_mention and not self._mentions_self(message):
return "group_policy_rejected"
return None
def _require_mention_for(self, chat_id: str) -> bool:
rule = self._group_rules.get(chat_id) if chat_id else None
if rule and rule.require_mention is not None:
return rule.require_mention
return self._require_mention
# --- Group policy ---------------------------------------------------------
def _allow_group_message(
self,
sender_id: Any,
chat_id: str = "",
*,
is_bot: bool = False,
) -> bool:
def _allow_group_message(self, sender_id: Any, chat_id: str = "") -> bool:
"""Per-group policy gate for non-DM traffic."""
sender_open_id = getattr(sender_id, "open_id", None)
sender_user_id = getattr(sender_id, "user_id", None)
@@ -3781,17 +3612,12 @@ class FeishuAdapter(BasePlatformAdapter):
allowlist = self._allowed_group_users
blacklist = set()
# Channel locks apply to everyone; allowlist/blacklist only gate humans
# (bots were already cleared upstream by FEISHU_ALLOW_BOTS).
if policy == "disabled":
return False
if policy == "open":
return True
if policy == "admin_only":
return False
if is_bot:
return True
if policy == "allowlist":
return bool(sender_ids and (sender_ids & allowlist))
if policy == "blacklist":
@@ -3799,16 +3625,17 @@ class FeishuAdapter(BasePlatformAdapter):
return bool(sender_ids and (sender_ids & self._allowed_group_users))
# --- Mention detection ----------------------------------------------------
def _mentions_self(self, message: Any) -> bool:
# @_all is Feishu's @everyone placeholder.
def _should_accept_group_message(self, message: Any, sender_id: Any, chat_id: str = "") -> bool:
"""Require an explicit @mention before group messages enter the agent."""
if not self._allow_group_message(sender_id, chat_id):
return False
# @_all is Feishu's @everyone placeholder — always route to the bot.
raw_content = getattr(message, "content", "") or ""
if "@_all" in raw_content:
return True
mentions = getattr(message, "mentions", None) or []
if mentions and self._message_mentions_bot(mentions):
return True
if mentions:
return self._message_mentions_bot(mentions)
normalized = normalize_feishu_message(
message_type=getattr(message, "message_type", "") or "",
raw_content=raw_content,
@@ -3817,6 +3644,23 @@ class FeishuAdapter(BasePlatformAdapter):
)
return self._post_mentions_bot(normalized.mentions)
def _is_self_sent_bot_message(self, event: Any) -> bool:
"""Return True only for Feishu events emitted by this Hermes bot."""
sender = getattr(event, "sender", None)
sender_type = str(getattr(sender, "sender_type", "") or "").strip().lower()
if sender_type not in {"bot", "app"}:
return False
sender_id = getattr(sender, "sender_id", None)
sender_open_id = str(getattr(sender_id, "open_id", "") or "").strip()
sender_user_id = str(getattr(sender_id, "user_id", "") or "").strip()
if self._bot_open_id and sender_open_id == self._bot_open_id:
return True
if self._bot_user_id and sender_user_id == self._bot_user_id:
return True
return False
def _message_mentions_bot(self, mentions: List[Any]) -> bool:
# IDs trump names: when both sides have open_id (or both user_id),
# match requires equal IDs. Name fallback only when either side
@@ -3960,7 +3804,7 @@ class FeishuAdapter(BasePlatformAdapter):
recent = self._seen_message_order[-self._dedup_cache_size:]
# Save as {msg_id: timestamp} so TTL filtering works across restarts.
payload = {"message_ids": {k: self._seen_message_ids[k] for k in recent if k in self._seen_message_ids}}
atomic_json_write(self._dedup_state_path, payload, indent=None)
self._dedup_state_path.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
except OSError:
logger.warning("[Feishu] Failed to persist dedup state to %s", self._dedup_state_path, exc_info=True)
+2 -3
View File
@@ -13,8 +13,6 @@ import time
from pathlib import Path
from typing import TYPE_CHECKING, Dict
from utils import atomic_json_write
if TYPE_CHECKING:
from gateway.platforms.base import MessageEvent
@@ -239,11 +237,12 @@ class ThreadParticipationTracker:
def _save(self) -> None:
path = self._state_path()
path.parent.mkdir(parents=True, exist_ok=True)
thread_list = list(self._threads)
if len(thread_list) > self._max_tracked:
thread_list = thread_list[-self._max_tracked:]
self._threads = set(thread_list)
atomic_json_write(path, thread_list, indent=None)
path.write_text(json.dumps(thread_list), encoding="utf-8")
def mark(self, thread_id: str) -> None:
"""Mark *thread_id* as participated and persist."""
-12
View File
@@ -534,18 +534,6 @@ class SignalAdapter(BasePlatformAdapter):
except Exception:
logger.exception("Signal: failed to fetch attachment %s", att_id)
# Skip envelopes with no meaningful content (no text, no attachments).
# Catches profile key updates, empty messages, and other metadata-only
# envelopes that still carry a dataMessage wrapper but have nothing
# worth processing. See issue: signal-cli logs "Profile key update" +
# Hermes receives msg='' triggering a full agent turn for nothing.
if (not text or not text.strip()) and not media_urls:
logger.debug(
"Signal: skipping contentless envelope from %s (%d attachments)",
redact_phone(sender), len(media_urls) if media_urls else 0,
)
return
# Build session source
source = self.build_source(
chat_id=chat_id,
+7 -74
View File
@@ -290,53 +290,14 @@ class TelegramAdapter(BasePlatformAdapter):
# and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
self._slash_confirm_state: Dict[str, str] = {}
def _is_callback_user_authorized(
self,
user_id: str,
*,
chat_id: Optional[str] = None,
chat_type: Optional[str] = None,
thread_id: Optional[str] = None,
user_name: Optional[str] = None,
) -> bool:
@staticmethod
def _is_callback_user_authorized(user_id: str) -> bool:
"""Return whether a Telegram inline-button caller may perform gated actions."""
normalized_user_id = str(user_id or "").strip()
if not normalized_user_id:
return False
runner = getattr(getattr(self, "_message_handler", None), "__self__", None)
auth_fn = getattr(runner, "_is_user_authorized", None)
if callable(auth_fn):
try:
from gateway.session import SessionSource
normalized_chat_type = str(chat_type or "dm").strip().lower() or "dm"
if normalized_chat_type == "private":
normalized_chat_type = "dm"
elif normalized_chat_type == "supergroup":
normalized_chat_type = "forum" if thread_id is not None else "group"
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id=str(chat_id or normalized_user_id),
chat_type=normalized_chat_type,
user_id=normalized_user_id,
user_name=str(user_name).strip() if user_name else None,
thread_id=str(thread_id) if thread_id is not None else None,
)
return bool(auth_fn(source))
except Exception:
logger.debug(
"[Telegram] Falling back to env-only callback auth for user %s",
normalized_user_id,
exc_info=True,
)
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
if not allowed_csv:
return True
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
return "*" in allowed_ids or normalized_user_id in allowed_ids
return "*" in allowed_ids or user_id in allowed_ids
@classmethod
def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
@@ -1360,7 +1321,6 @@ class TelegramAdapter(BasePlatformAdapter):
async def send_update_prompt(
self, chat_id: str, prompt: str, default: str = "",
session_key: str = "",
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send an inline-keyboard update prompt (Yes / No buttons).
@@ -1378,14 +1338,11 @@ class TelegramAdapter(BasePlatformAdapter):
InlineKeyboardButton("✗ No", callback_data="update_prompt:n"),
]
])
thread_id = self._metadata_thread_id(metadata)
message_thread_id = self._message_thread_id_for_send(thread_id)
msg = await self._bot.send_message(
chat_id=int(chat_id),
text=text,
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
message_thread_id=message_thread_id,
**self._link_preview_kwargs(),
)
return SendResult(success=True, message_id=str(msg.message_id))
@@ -1803,12 +1760,6 @@ class TelegramAdapter(BasePlatformAdapter):
if not query or not query.data:
return
data = query.data
query_message = getattr(query, "message", None)
query_chat_id = getattr(query_message, "chat_id", None)
query_chat = getattr(query_message, "chat", None)
query_chat_type = getattr(query_chat, "type", None)
query_thread_id = getattr(query_message, "message_thread_id", None)
query_user_name = getattr(query.from_user, "first_name", None)
# --- Model picker callbacks ---
if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
@@ -1830,13 +1781,7 @@ class TelegramAdapter(BasePlatformAdapter):
# Only authorized users may click approval buttons.
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(
caller_id,
chat_id=query_chat_id,
chat_type=str(query_chat_type) if query_chat_type is not None else None,
thread_id=str(query_thread_id) if query_thread_id is not None else None,
user_name=query_user_name,
):
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to approve commands.")
return
@@ -1886,14 +1831,8 @@ class TelegramAdapter(BasePlatformAdapter):
choice = parts[1] # once, always, cancel
confirm_id = parts[2]
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(
caller_id,
chat_id=query_chat_id,
chat_type=str(query_chat_type) if query_chat_type is not None else None,
thread_id=str(query_thread_id) if query_thread_id is not None else None,
user_name=query_user_name,
):
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to answer this prompt.")
return
@@ -1952,13 +1891,7 @@ class TelegramAdapter(BasePlatformAdapter):
return
answer = data.split(":", 1)[1] # "y" or "n"
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(
caller_id,
chat_id=query_chat_id,
chat_type=str(query_chat_type) if query_chat_type is not None else None,
thread_id=str(query_thread_id) if query_thread_id is not None else None,
user_name=query_user_name,
):
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to answer update prompts.")
return
await query.answer(text=f"Sent '{answer}' to the update process.")
+4 -6
View File
@@ -1896,12 +1896,10 @@ class OwnerCommandMiddleware(InboundMiddleware):
if cmd not in cls.ALLOWLIST:
return None, None, False
# Sender identity check: bot owner <-> push.from_account == push.bot_owner_id.
# The allowlisted commands (/approve, /deny, /stop, /reset, ...) are
# privileged — leaking them to non-owners lets any group member approve
# a dangerous tool call, kill the owner's task, or wipe session state.
owner_id = str((push or {}).get("bot_owner_id") or "").strip()
is_owner = bool(owner_id) and owner_id == from_account
# Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
# owner_id = (push or {}).get("bot_owner_id") or ""
# is_owner = bool(owner_id) and owner_id == from_account
is_owner = True
return cmd, cmd_line, is_owner
async def handle(self, ctx: InboundContext, next_fn) -> None:
+62 -414
View File
@@ -29,7 +29,7 @@ from collections import OrderedDict
from contextvars import copy_context
from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List, Union
from typing import Dict, Optional, Any, List
# account_usage imports the OpenAI SDK chain (~230 ms). Only needed by
# /usage; we still import it at module top in the gateway because test
@@ -232,16 +232,6 @@ def _ensure_ssl_certs() -> None:
os.environ["SSL_CERT_FILE"] = candidate
return
def _home_target_env_var(platform_name: str) -> str:
"""Return the configured home-target env var for a platform."""
from cron.scheduler import _HOME_TARGET_ENV_VARS
return _HOME_TARGET_ENV_VARS.get(
platform_name.lower(),
f"{platform_name.upper()}_HOME_CHANNEL",
)
_ensure_ssl_certs()
# Add parent directory to path
@@ -249,7 +239,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
# Resolve Hermes home directory (respects HERMES_HOME override)
from hermes_constants import get_hermes_home
from utils import atomic_json_write, atomic_yaml_write, base_url_host_matches, is_truthy_value
from utils import atomic_yaml_write, base_url_host_matches, is_truthy_value
_hermes_home = get_hermes_home()
# Load environment variables from ~/.hermes/.env first.
@@ -391,8 +381,6 @@ if _config_path.exists():
if _display_cfg and isinstance(_display_cfg, dict):
if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
if "busy_ack_enabled" in _display_cfg and "HERMES_GATEWAY_BUSY_ACK_ENABLED" not in os.environ:
os.environ["HERMES_GATEWAY_BUSY_ACK_ENABLED"] = str(_display_cfg["busy_ack_enabled"])
# Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
# HERMES_TIMEZONE from .env takes precedence (already in os.environ).
_tz_cfg = _cfg.get("timezone", "")
@@ -464,7 +452,6 @@ from gateway.session import (
from gateway.delivery import DeliveryRouter
from gateway.platforms.base import (
BasePlatformAdapter,
EphemeralReply,
MessageEvent,
MessageType,
merge_pending_message_event,
@@ -895,7 +882,6 @@ class GatewayRunner:
# /new and /reset. /model and other mid-session operations
# preserve the queue.
self._queued_events: Dict[str, List[MessageEvent]] = {}
self._pending_native_image_paths_by_session: Dict[str, List[str]] = {}
self._busy_ack_ts: Dict[str, float] = {} # last busy-ack timestamp per session (debounce)
self._session_run_generation: Dict[str, int] = {}
@@ -1753,10 +1739,7 @@ class GatewayRunner:
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
return is_truthy_value(
cfg_get(cfg, "display", "show_reasoning"),
default=False,
)
return bool(cfg_get(cfg, "display", "show_reasoning", default=False))
except Exception:
pass
return False
@@ -1978,14 +1961,6 @@ class GatewayRunner:
except Exception:
pass # don't let interrupt failure block the ack
# Check if busy ack is disabled — skip sending but still process the input.
# Placed before debounce so we don't stamp a "last ack" timestamp that was
# never actually delivered.
busy_ack_enabled = os.environ.get("HERMES_GATEWAY_BUSY_ACK_ENABLED", "true").lower() == "true"
if not busy_ack_enabled:
logger.debug("Busy ack suppressed for session %s", session_key)
return True # input still processed, just no ack sent
# Debounce: only send an acknowledgment once every 30 seconds per session
# to avoid spamming the user when they send multiple messages quickly
_BUSY_ACK_COOLDOWN = 30
@@ -2270,7 +2245,7 @@ class GatewayRunner:
# (they might become active again next restart)
try:
atomic_json_write(path, new_counts, indent=None)
path.write_text(json.dumps(new_counts))
except Exception:
pass
@@ -2338,7 +2313,7 @@ class GatewayRunner:
if session_key in counts:
del counts[session_key]
if counts:
atomic_json_write(path, counts, indent=None)
path.write_text(json.dumps(counts))
else:
path.unlink(missing_ok=True)
except Exception:
@@ -3972,11 +3947,6 @@ class GatewayRunner:
Platform.QQBOT: "QQ_ALLOW_ALL_USERS",
Platform.YUANBAO: "YUANBAO_ALLOW_ALL_USERS",
}
# Bots admitted by {PLATFORM}_ALLOW_BOTS bypass the human allowlist (#4466).
platform_allow_bots_map = {
Platform.DISCORD: "DISCORD_ALLOW_BOTS",
Platform.FEISHU: "FEISHU_ALLOW_BOTS",
}
# Plugin platforms: check the registry for auth env var names
if source.platform not in platform_env_map:
@@ -3996,9 +3966,14 @@ class GatewayRunner:
if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
return True
if getattr(source, "is_bot", False):
allow_bots_var = platform_allow_bots_map.get(source.platform)
if allow_bots_var and os.getenv(allow_bots_var, "none").lower().strip() in ("mentions", "all"):
# Discord bot senders that passed the DISCORD_ALLOW_BOTS platform
# filter are already authorized at the platform level — skip the
# user allowlist. Without this, bot messages allowed by
# DISCORD_ALLOW_BOTS=mentions/all would be rejected here with
# "Unauthorized user" (fixes #4466).
if source.platform == Platform.DISCORD and getattr(source, "is_bot", False):
allow_bots = os.getenv("DISCORD_ALLOW_BOTS", "none").lower().strip()
if allow_bots in ("mentions", "all"):
return True
# Discord role-based access (DISCORD_ALLOWED_ROLES): the adapter's
@@ -4483,7 +4458,7 @@ class GatewayRunner:
invalidation_reason="stop_command",
)
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key)
return EphemeralReply("⚡ Stopped. You can continue this session.")
return "⚡ Stopped. You can continue this session."
# /reset and /new must bypass the running-agent guard so they
# actually dispatch as commands instead of being queued as user
@@ -4605,17 +4580,6 @@ class GatewayRunner:
if _cmd_def_inner and _cmd_def_inner.name == "kanban":
return await self._handle_kanban_command(event)
# /goal is safe mid-run for status/pause/clear (inspection and
# control-plane only — doesn't interrupt the running turn).
# Setting a new goal text mid-run is rejected with the same
# "wait or /stop" message as /model so we don't race a second
# continuation prompt against the current turn.
if _cmd_def_inner and _cmd_def_inner.name == "goal":
_goal_arg = (event.get_command_args() or "").strip().lower()
if not _goal_arg or _goal_arg in ("status", "pause", "resume", "clear", "stop", "done"):
return await self._handle_goal_command(event)
return "Agent is running — use /goal status / pause / clear mid-run, or /stop before setting a new goal."
# Session-level toggles that are safe to run mid-agent —
# /yolo can unblock a pending approval prompt, /verbose cycles
# the tool-progress display mode for the ongoing stream.
@@ -4699,7 +4663,7 @@ class GatewayRunner:
# Force-clean the sentinel so the session is unlocked.
self._release_running_agent_state(_quick_key)
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key)
return EphemeralReply("⚡ Force-stopped. The agent was still starting — session unlocked.")
return "⚡ Force-stopped. The agent was still starting — session unlocked."
# Queue the message so it will be picked up after the
# agent starts.
adapter = self.adapters.get(source.platform)
@@ -4932,9 +4896,6 @@ class GatewayRunner:
# at the end of this function so the rewritten text is sent
# to the agent as a regular user turn.
if canonical == "goal":
return await self._handle_goal_command(event)
if canonical == "voice":
return await self._handle_voice_command(event)
@@ -5080,36 +5041,7 @@ class GatewayRunner:
_run_generation = self._begin_session_run_generation(_quick_key)
try:
_agent_result = await self._handle_message_with_agent(event, source, _quick_key, _run_generation)
# Goal continuation: after the agent returns a final response
# for this turn, check any standing /goal — the judge will
# either mark it done, pause it (budget), or enqueue a
# continuation prompt back through the adapter FIFO so the
# next turn makes more progress. Wrapped in try/except so a
# broken judge never breaks normal message handling.
try:
_final_text = ""
if isinstance(_agent_result, dict):
_final_text = str(_agent_result.get("final_response") or "")
elif isinstance(_agent_result, str):
_final_text = _agent_result
# Skip for empty responses (interrupted / errored) — the
# judge would almost always say "continue" and we'd loop
# on error. Let the user drive the next turn.
if _final_text.strip():
try:
session_entry = self.session_store.get_or_create_session(source)
except Exception:
session_entry = None
if session_entry is not None:
self._post_turn_goal_continuation(
session_entry=session_entry,
source=source,
final_response=_final_text,
)
except Exception as _goal_exc:
logger.debug("goal continuation hook failed: %s", _goal_exc)
return _agent_result
return await self._handle_message_with_agent(event, source, _quick_key, _run_generation)
finally:
# If _run_agent replaced the sentinel with a real agent and
# then cleaned it up, this is a no-op. If we exited early
@@ -5137,29 +5069,22 @@ class GatewayRunner:
preprocessing pipeline so sender attribution, image enrichment, STT,
document notes, reply context, and @ references all behave the same.
Side effect: buffers per-session native image paths when the active
model supports native vision AND the user has images attached. The
caller consumes and clears that session-scoped buffer at the
``run_conversation`` site to build a multimodal user turn. When the
list is empty, the ``_enrich_message_with_vision`` text path has
already run and images are represented in-text.
Side effect: writes ``self._pending_native_image_paths`` to a list of
local image paths when the active model supports native vision AND
the user has images attached. The caller consumes and clears this
attribute at the ``run_conversation`` site to build a multimodal user
turn. When the list is empty, the ``_enrich_message_with_vision``
text path has already run and images are represented in-text.
"""
history = history or []
message_text = event.text or ""
_group_sessions_per_user = getattr(self.config, "group_sessions_per_user", True)
_thread_sessions_per_user = getattr(self.config, "thread_sessions_per_user", False)
# Use the same helper every other call site uses so the write key here
# matches the consume key at the run_conversation site — even if the
# session store overrides build_session_key's default behavior.
session_key = self._session_key_for_source(source)
# Reset only this session's per-call buffer; other sessions may be
# concurrently preparing multimodal turns on the same runner.
self._consume_pending_native_image_paths(session_key)
# Reset per-call buffer; set only when native routing is chosen.
self._pending_native_image_paths = []
_is_shared_multi_user = is_shared_multi_user_session(
source,
group_sessions_per_user=_group_sessions_per_user,
thread_sessions_per_user=_thread_sessions_per_user,
group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
)
if _is_shared_multi_user and source.user_name:
message_text = f"[{source.user_name}] {message_text}"
@@ -5180,11 +5105,7 @@ class GatewayRunner:
_img_mode = self._decide_image_input_mode()
if _img_mode == "native":
# Defer attachment to the run_conversation call site.
pending_native = getattr(self, "_pending_native_image_paths_by_session", None)
if pending_native is None:
pending_native = {}
self._pending_native_image_paths_by_session = pending_native
pending_native[session_key] = list(image_paths)
self._pending_native_image_paths = list(image_paths)
logger.info(
"Image routing: native (model supports vision). %d image(s) will be attached inline.",
len(image_paths),
@@ -5323,12 +5244,6 @@ class GatewayRunner:
return message_text
def _consume_pending_native_image_paths(self, session_key: str) -> List[str]:
pending_native = getattr(self, "_pending_native_image_paths_by_session", None)
if not pending_native:
return []
return list(pending_native.pop(session_key, []) or [])
async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int):
"""Inner handler that runs under the _running_agents sentinel guard."""
_msg_start_time = time.time()
@@ -5357,12 +5272,7 @@ class GatewayRunner:
_is_new_session = (
session_entry.created_at == session_entry.updated_at
or getattr(session_entry, "was_auto_reset", False)
or getattr(session_entry, "is_fresh_reset", False)
)
# Consume the is_fresh_reset flag immediately so it doesn't leak
# onto subsequent messages in the same session (issue #6508).
if getattr(session_entry, "is_fresh_reset", False):
session_entry.is_fresh_reset = False
if _is_new_session:
await self.hooks.emit("session:start", {
"platform": source.platform.value if source.platform else "",
@@ -5811,7 +5721,7 @@ class GatewayRunner:
# Skip for webhooks - they deliver directly to configured targets (github_comment, etc.)
if not history and source.platform and source.platform != Platform.LOCAL and source.platform != Platform.WEBHOOK:
platform_name = source.platform.value
env_key = _home_target_env_var(platform_name)
env_key = f"{platform_name.upper()}_HOME_CHANNEL"
if not os.getenv(env_key):
adapter = self.adapters.get(source.platform)
if adapter:
@@ -6407,7 +6317,7 @@ class GatewayRunner:
return "\n".join(lines)
async def _handle_reset_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
async def _handle_reset_command(self, event: MessageEvent) -> str:
"""Handle /new or /reset command."""
source = event.source
@@ -6518,8 +6428,8 @@ class GatewayRunner:
_tip_line = ""
if session_info:
return EphemeralReply(f"{header}\n\n{session_info}{_tip_line}")
return EphemeralReply(f"{header}{_tip_line}")
return f"{header}\n\n{session_info}{_tip_line}"
return f"{header}{_tip_line}"
async def _handle_profile_command(self, event: MessageEvent) -> str:
"""Handle /profile — show active profile name and home directory."""
@@ -6630,30 +6540,11 @@ class GatewayRunner:
queue_depth = self._queue_depth(session_key, adapter=adapter)
title = None
# Pull token totals from the SQLite session DB rather than the
# in-memory SessionStore. The agent's per-turn token deltas are
# persisted into sessions_db (run_agent.py), not into SessionEntry,
# so session_entry.total_tokens is always 0. SessionDB is the
# single source of truth; reading it here keeps /status accurate
# without duplicating token writes into two stores.
db_total_tokens = 0
if self._session_db:
try:
title = self._session_db.get_session_title(session_entry.session_id)
except Exception:
title = None
try:
row = self._session_db.get_session(session_entry.session_id)
if row:
db_total_tokens = (
(row.get("input_tokens") or 0)
+ (row.get("output_tokens") or 0)
+ (row.get("cache_read_tokens") or 0)
+ (row.get("cache_write_tokens") or 0)
+ (row.get("reasoning_tokens") or 0)
)
except Exception:
db_total_tokens = 0
lines = [
"📊 **Hermes Gateway Status**",
@@ -6665,7 +6556,7 @@ class GatewayRunner:
lines.extend([
f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
f"**Tokens:** {db_total_tokens:,}",
f"**Tokens:** {session_entry.total_tokens:,}",
f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
])
if queue_depth:
@@ -6767,7 +6658,7 @@ class GatewayRunner:
return "\n".join(lines)
async def _handle_stop_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
async def _handle_stop_command(self, event: MessageEvent) -> str:
"""Handle /stop command - interrupt a running agent.
When an agent is truly hung (blocked thread that never checks
@@ -6792,7 +6683,7 @@ class GatewayRunner:
invalidation_reason="stop_command_pending",
)
logger.info("STOP (pending) for session %s — sentinel cleared", session_key)
return EphemeralReply("⚡ Stopped. The agent hadn't started yet — you can continue this session.")
return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
if agent:
# Force-clean the session lock so a truly hung agent doesn't
# keep it locked forever.
@@ -6802,11 +6693,11 @@ class GatewayRunner:
interrupt_reason=_INTERRUPT_REASON_STOP,
invalidation_reason="stop_command_handler",
)
return EphemeralReply("⚡ Stopped. You can continue this session.")
return "⚡ Stopped. You can continue this session."
else:
return "No active task to stop."
async def _handle_restart_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
async def _handle_restart_command(self, event: MessageEvent) -> str:
"""Handle /restart command - drain active work, then restart the gateway."""
# Defensive idempotency check: if the previous gateway process
# recorded this same /restart (same platform + update_id) and the new
@@ -6832,7 +6723,7 @@ class GatewayRunner:
count = self._running_agent_count()
if count:
return f"⏳ Draining {count} active agent(s) before restart..."
return EphemeralReply("⏳ Gateway restart already in progress...")
return "⏳ Gateway restart already in progress..."
# Save the requester's routing info so the new gateway process can
# notify them once it comes back online.
@@ -6843,10 +6734,8 @@ class GatewayRunner:
}
if event.source.thread_id:
notify_data["thread_id"] = event.source.thread_id
atomic_json_write(
_hermes_home / ".restart_notify.json",
notify_data,
indent=None,
(_hermes_home / ".restart_notify.json").write_text(
json.dumps(notify_data)
)
except Exception as e:
logger.debug("Failed to write restart notify file: %s", e)
@@ -6863,10 +6752,8 @@ class GatewayRunner:
}
if event.platform_update_id is not None:
dedup_data["update_id"] = event.platform_update_id
atomic_json_write(
_hermes_home / ".restart_last_processed.json",
dedup_data,
indent=None,
(_hermes_home / ".restart_last_processed.json").write_text(
json.dumps(dedup_data)
)
except Exception as e:
logger.debug("Failed to write restart dedup marker: %s", e)
@@ -6884,7 +6771,7 @@ class GatewayRunner:
self.request_restart(detached=True, via_service=False)
if active_agents:
return f"⏳ Draining {active_agents} active agent(s) before restart..."
return EphemeralReply("♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`.")
return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."
def _is_stale_restart_redelivery(self, event: MessageEvent) -> bool:
"""Return True if this /restart is a Telegram re-delivery we already handled.
@@ -7475,201 +7362,6 @@ class GatewayRunner:
# Let the normal message handler process it
return await self._handle_message(retry_event)
# ────────────────────────────────────────────────────────────────
# /goal — persistent cross-turn goals (Ralph-style loop)
# ────────────────────────────────────────────────────────────────
def _get_goal_manager_for_event(self, event: "MessageEvent"):
"""Return a GoalManager bound to the session for this gateway event.
Returns ``(manager, session_entry)`` or ``(None, None)`` if the
goals module can't be loaded.
"""
try:
from hermes_cli.goals import GoalManager
except Exception as exc:
logger.debug("goal manager unavailable: %s", exc)
return None, None
try:
session_entry = self.session_store.get_or_create_session(event.source)
except Exception as exc:
logger.debug("goal manager: session lookup failed: %s", exc)
return None, None
sid = getattr(session_entry, "session_id", None) or ""
if not sid:
return None, None
try:
goals_cfg = (
(self.config or {}).get("goals", {})
if isinstance(self.config, dict)
else getattr(self.config, "goals", {}) or {}
)
max_turns = int(goals_cfg.get("max_turns", 20) or 20)
except Exception:
max_turns = 20
return GoalManager(session_id=sid, default_max_turns=max_turns), session_entry
async def _handle_goal_command(self, event: "MessageEvent") -> str:
"""Handle /goal for gateway platforms.
Subcommands: ``/goal`` / ``/goal status`` / ``/goal pause`` /
``/goal resume`` / ``/goal clear``. Any other text becomes the
new goal.
Setting a new goal queues the goal text as the next turn so the
agent starts working on it immediately the post-turn
continuation hook then takes over from there.
"""
args = (event.get_command_args() or "").strip()
lower = args.lower()
mgr, session_entry = self._get_goal_manager_for_event(event)
if mgr is None:
return "Goals unavailable on this session."
if not args or lower == "status":
return mgr.status_line()
if lower == "pause":
state = mgr.pause(reason="user-paused")
if state is None:
return "No goal set."
return f"⏸ Goal paused: {state.goal}"
if lower == "resume":
state = mgr.resume()
if state is None:
return "No goal to resume."
return (
f"▶ Goal resumed: {state.goal}\n"
"Send any message to continue, or wait — I'll take the next step on the next turn."
)
if lower in ("clear", "stop", "done"):
had = mgr.has_goal()
mgr.clear()
return "✓ Goal cleared." if had else "No active goal."
# Otherwise — treat the remaining text as the new goal.
try:
state = mgr.set(args)
except ValueError as exc:
return f"Invalid goal: {exc}"
# Queue the goal text as an immediate first turn so the agent
# starts making progress. The post-turn hook takes over after.
adapter = self.adapters.get(event.source.platform) if event.source else None
_quick_key = self._session_key_for_source(event.source) if event.source else None
if adapter and _quick_key:
try:
kickoff_event = MessageEvent(
text=state.goal,
message_type=MessageType.TEXT,
source=event.source,
message_id=event.message_id,
channel_prompt=event.channel_prompt,
)
self._enqueue_fifo(_quick_key, kickoff_event, adapter)
except Exception as exc:
logger.debug("goal kickoff enqueue failed: %s", exc)
return (
f"⊙ Goal set ({state.max_turns}-turn budget): {state.goal}\n"
"I'll keep working until the goal is done, you pause/clear it, or the budget is exhausted.\n"
"Controls: /goal status · /goal pause · /goal resume · /goal clear"
)
def _post_turn_goal_continuation(
self,
*,
session_entry: Any,
source: Any,
final_response: str,
) -> None:
"""Run the goal judge after a gateway turn and, if still active,
enqueue a continuation prompt for the same session.
Called from ``_handle_message_with_agent`` at turn boundary, AFTER
the response has been delivered. Safe when no goal is set.
We use the adapter's pending-message / FIFO machinery so any real
user message that arrives simultaneously is handled by the same
queue and takes priority naturally.
"""
try:
from hermes_cli.goals import GoalManager
except Exception as exc:
logger.debug("goal continuation: goals module unavailable: %s", exc)
return
sid = getattr(session_entry, "session_id", None) or ""
if not sid:
return
try:
goals_cfg = (
(self.config or {}).get("goals", {})
if isinstance(self.config, dict)
else getattr(self.config, "goals", {}) or {}
)
max_turns = int(goals_cfg.get("max_turns", 20) or 20)
except Exception:
max_turns = 20
mgr = GoalManager(session_id=sid, default_max_turns=max_turns)
if not mgr.is_active():
return
decision = mgr.evaluate_after_turn(final_response or "", user_initiated=True)
msg = decision.get("message") or ""
# Send the status line back to the user so they see the judge's
# verdict. Fire-and-forget via the adapter.
if msg and source is not None:
try:
adapter = self.adapters.get(source.platform)
if adapter and hasattr(adapter, "send_message"):
import asyncio as _asyncio
coro = adapter.send_message(source, msg)
if _asyncio.iscoroutine(coro):
try:
loop = _asyncio.get_event_loop()
if loop.is_running():
loop.create_task(coro)
else:
loop.run_until_complete(coro)
except RuntimeError:
# No event loop in this thread — schedule on the main one.
try:
_asyncio.run_coroutine_threadsafe(coro, self._loop)
except Exception:
pass
except Exception as exc:
logger.debug("goal continuation: status send failed: %s", exc)
if not decision.get("should_continue"):
return
prompt = decision.get("continuation_prompt") or ""
if not prompt or source is None:
return
# Enqueue via the adapter's FIFO so a user message already in
# flight preempts the continuation naturally.
try:
adapter = self.adapters.get(source.platform)
_quick_key = self._session_key_for_source(source)
if adapter and _quick_key:
cont_event = MessageEvent(
text=prompt,
message_type=MessageType.TEXT,
source=source,
message_id=None,
channel_prompt=None,
)
self._enqueue_fifo(_quick_key, cont_event, adapter)
except Exception as exc:
logger.debug("goal continuation: enqueue failed: %s", exc)
async def _handle_undo_command(self, event: MessageEvent) -> str:
"""Handle /undo command - remove the last user/assistant exchange."""
source = event.source
@@ -7701,16 +7393,16 @@ class GatewayRunner:
platform_name = source.platform.value if source.platform else "unknown"
chat_id = source.chat_id
chat_name = source.chat_name or chat_id
env_key = _home_target_env_var(platform_name)
env_key = f"{platform_name.upper()}_HOME_CHANNEL"
# Save to .env so it persists across restarts
try:
from hermes_cli.config import save_env_value
save_env_value(env_key, str(chat_id))
except Exception as e:
return f"Failed to save home channel: {e}"
return (
f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
f"Cron jobs and cross-platform messages will be delivered here."
@@ -8282,8 +7974,6 @@ class GatewayRunner:
from hermes_cli.tools_config import _get_platform_tools
enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
agent_cfg = user_config.get("agent") or {}
disabled_toolsets = agent_cfg.get("disabled_toolsets") or None
pr = self._provider_routing
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
@@ -8300,7 +7990,6 @@ class GatewayRunner:
quiet_mode=True,
verbose_logging=False,
enabled_toolsets=enabled_toolsets,
disabled_toolsets=disabled_toolsets,
reasoning_config=reasoning_config,
service_tier=self._service_tier,
request_overrides=turn_route.get("request_overrides"),
@@ -8570,7 +8259,7 @@ class GatewayRunner:
return f"⚡ ✓ Priority Processing: **{label}** (saved to config)\n_(takes effect on next message)_"
return f"⚡ ✓ Priority Processing: **{label}** (this session only)"
async def _handle_yolo_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
async def _handle_yolo_command(self, event: MessageEvent) -> str:
"""Handle /yolo — toggle dangerous command approval bypass for this session only."""
from tools.approval import (
disable_session_yolo,
@@ -8582,10 +8271,10 @@ class GatewayRunner:
current = is_session_yolo_enabled(session_key)
if current:
disable_session_yolo(session_key)
return EphemeralReply("⚠️ YOLO mode **OFF** for this session — dangerous commands will require approval.")
return "⚠️ YOLO mode **OFF** for this session — dangerous commands will require approval."
else:
enable_session_yolo(session_key)
return EphemeralReply("⚡ YOLO mode **ON** for this session — all commands auto-approved. Use with caution.")
return "⚡ YOLO mode **ON** for this session — all commands auto-approved. Use with caution."
async def _handle_verbose_command(self, event: MessageEvent) -> str:
"""Handle /verbose command — cycle tool progress display mode.
@@ -8603,10 +8292,7 @@ class GatewayRunner:
# --- check config gate ------------------------------------------------
try:
user_config = _load_gateway_config()
gate_enabled = is_truthy_value(
cfg_get(user_config, "display", "tool_progress_command"),
default=False,
)
gate_enabled = cfg_get(user_config, "display", "tool_progress_command", default=False)
except Exception:
gate_enabled = False
@@ -8761,7 +8447,7 @@ class GatewayRunner:
try:
from run_agent import AIAgent
from agent.manual_compression_feedback import summarize_manual_compression
from agent.model_metadata import estimate_request_tokens_rough
from agent.model_metadata import estimate_messages_tokens_rough
session_key = self._session_key_for_source(source)
model, runtime_kwargs = self._resolve_session_agent_runtime(
@@ -8776,6 +8462,7 @@ class GatewayRunner:
for m in history
if m.get("role") in ("user", "assistant") and m.get("content")
]
approx_tokens = estimate_messages_tokens_rough(msgs)
tmp_agent = AIAgent(
**runtime_kwargs,
@@ -8789,16 +8476,6 @@ class GatewayRunner:
try:
tmp_agent._print_fn = lambda *a, **kw: None
# Estimate with system prompt + tool schemas included so the
# figure reflects real request pressure, not a transcript-only
# underestimate (#6217). Must be computed after tmp_agent is
# built so _cached_system_prompt/tools are populated.
_sys_prompt = getattr(tmp_agent, "_cached_system_prompt", "") or ""
_tools = getattr(tmp_agent, "tools", None) or None
approx_tokens = estimate_request_tokens_rough(
msgs, system_prompt=_sys_prompt, tools=_tools
)
compressor = tmp_agent.context_compressor
if not compressor.has_content_to_compress(msgs):
return "Nothing to compress yet (the transcript is still all protected context)."
@@ -8823,9 +8500,7 @@ class GatewayRunner:
self.session_store.update_session(
session_entry.session_key, last_prompt_tokens=0
)
new_tokens = estimate_request_tokens_rough(
compressed, system_prompt=_sys_prompt, tools=_tools
)
new_tokens = estimate_messages_tokens_rough(compressed)
summary = summarize_manual_compression(
msgs,
compressed,
@@ -9060,12 +8735,8 @@ class GatewayRunner:
tool_name=msg.get("tool_name") or msg.get("name"),
tool_calls=msg.get("tool_calls"),
tool_call_id=msg.get("tool_call_id"),
finish_reason=msg.get("finish_reason"),
reasoning=msg.get("reasoning"),
reasoning_content=msg.get("reasoning_content"),
reasoning_details=msg.get("reasoning_details"),
codex_reasoning_items=msg.get("codex_reasoning_items"),
codex_message_items=msg.get("codex_message_items"),
)
except Exception:
pass # Best-effort copy
@@ -9815,8 +9486,6 @@ class GatewayRunner:
"session_key": session_key,
"timestamp": datetime.now().isoformat(),
}
if event.source.thread_id:
pending["thread_id"] = event.source.thread_id
_tmp_pending = pending_path.with_suffix(".tmp")
_tmp_pending.write_text(json.dumps(pending))
_tmp_pending.replace(pending_path)
@@ -9902,7 +9571,6 @@ class GatewayRunner:
adapter = None
chat_id = None
session_key = None
metadata = None
for path in (claimed_path, pending_path):
if path.exists():
try:
@@ -9910,8 +9578,6 @@ class GatewayRunner:
platform_str = pending.get("platform")
chat_id = pending.get("chat_id")
session_key = pending.get("session_key")
thread_id = pending.get("thread_id")
metadata = {"thread_id": thread_id} if thread_id else None
if platform_str and chat_id:
platform = Platform(platform_str)
adapter = self.adapters.get(platform)
@@ -9959,7 +9625,7 @@ class GatewayRunner:
chunks = [clean[i:i + max_chunk] for i in range(0, len(clean), max_chunk)]
for chunk in chunks:
try:
await adapter.send(chat_id, f"```\n{chunk}\n```", metadata=metadata)
await adapter.send(chat_id, f"```\n{chunk}\n```")
except Exception as e:
logger.debug("Update stream send failed: %s", e)
@@ -9982,13 +9648,9 @@ class GatewayRunner:
exit_code_raw = exit_code_path.read_text().strip() or "1"
exit_code = int(exit_code_raw)
if exit_code == 0:
await adapter.send(chat_id, "✅ Hermes update finished.", metadata=metadata)
await adapter.send(chat_id, "✅ Hermes update finished.")
else:
await adapter.send(
chat_id,
"❌ Hermes update failed (exit code {}).".format(exit_code),
metadata=metadata,
)
await adapter.send(chat_id, "❌ Hermes update failed (exit code {}).".format(exit_code))
logger.info("Update finished (exit=%s), notified %s", exit_code, session_key)
except Exception as e:
logger.warning("Update final notification failed: %s", e)
@@ -10038,7 +9700,6 @@ class GatewayRunner:
prompt=prompt_text,
default=default,
session_key=session_key,
metadata=metadata,
)
sent_buttons = True
except Exception as btn_err:
@@ -10050,8 +9711,7 @@ class GatewayRunner:
f"⚕ **Update needs your input:**\n\n"
f"{prompt_text}{default_hint}\n\n"
f"Reply `/approve` (yes) or `/deny` (no), "
f"or type your answer directly.",
metadata=metadata,
f"or type your answer directly."
)
self._update_prompt_pending[session_key] = True
# Remove the prompt file so it isn't re-read on the
@@ -10071,11 +9731,7 @@ class GatewayRunner:
exit_code_path.write_text("124")
await _flush_buffer()
try:
await adapter.send(
chat_id,
"❌ Hermes update timed out after 30 minutes.",
metadata=metadata,
)
await adapter.send(chat_id, "❌ Hermes update timed out after 30 minutes.")
except Exception:
pass
for p in (pending_path, claimed_path, output_path,
@@ -10117,7 +9773,6 @@ class GatewayRunner:
pending = json.loads(claimed_path.read_text())
platform_str = pending.get("platform")
chat_id = pending.get("chat_id")
thread_id = pending.get("thread_id")
if not exit_code_path.exists():
logger.info("Update notification deferred: update still running")
@@ -10139,7 +9794,6 @@ class GatewayRunner:
adapter = self.adapters.get(platform)
if adapter and chat_id:
metadata = {"thread_id": thread_id} if thread_id else None
# Strip ANSI escape codes for clean display
output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
if output:
@@ -10154,7 +9808,7 @@ class GatewayRunner:
msg = "✅ Hermes update finished successfully."
else:
msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
await adapter.send(chat_id, msg, metadata=metadata)
await adapter.send(chat_id, msg)
logger.info(
"Sent post-update notification to %s:%s (exit=%s)",
platform_str,
@@ -10694,7 +10348,6 @@ class GatewayRunner:
("compression", "threshold"),
("compression", "target_ratio"),
("compression", "protect_last_n"),
("agent", "disabled_toolsets"),
)
@classmethod
@@ -11478,8 +11131,6 @@ class GatewayRunner:
from hermes_cli.tools_config import _get_platform_tools
enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
agent_cfg_local = user_config.get("agent") or {}
disabled_toolsets = agent_cfg_local.get("disabled_toolsets") or None
display_config = user_config.get("display", {})
if not isinstance(display_config, dict):
@@ -11568,10 +11219,7 @@ class GatewayRunner:
tool_progress_hint_gateway,
)
_cfg = _load_gateway_config()
gate_on = is_truthy_value(
cfg_get(_cfg, "display", "tool_progress_command"),
default=False,
)
gate_on = bool(cfg_get(_cfg, "display", "tool_progress_command", default=False))
if gate_on and not is_seen(_cfg, TOOL_PROGRESS_FLAG):
long_tool_hint_fired[0] = True
progress_queue.put(tool_progress_hint_gateway())
@@ -12111,7 +11759,6 @@ class GatewayRunner:
quiet_mode=True,
verbose_logging=False,
enabled_toolsets=enabled_toolsets,
disabled_toolsets=disabled_toolsets,
ephemeral_system_prompt=combined_ephemeral or None,
prefill_messages=self._prefill_messages or None,
reasoning_config=reasoning_config,
@@ -12451,7 +12098,8 @@ class GatewayRunner:
# attachment, wrap the user turn as an OpenAI-style multimodal
# content list. Consume-and-clear so subsequent turns on the same
# runner instance don't re-attach stale images.
_native_imgs = self._consume_pending_native_image_paths(session_key)
_native_imgs = list(getattr(self, "_pending_native_image_paths", []) or [])
self._pending_native_image_paths = []
if _native_imgs:
try:
from agent.image_routing import build_native_content_parts
-12
View File
@@ -458,15 +458,6 @@ class SessionEntry:
was_auto_reset: bool = False
auto_reset_reason: Optional[str] = None # "idle" or "daily"
reset_had_activity: bool = False # whether the expired session had any messages
# Set by reset_session() when the user explicitly sends /new or /reset.
# Consumed once by _handle_message_with_agent to trigger topic/channel
# skill re-injection on the first message of the new session. We can't
# reuse was_auto_reset for this because that flag fires the "session
# expired due to inactivity" user-facing notice and a misleading
# context-note prepend — both wrong for an explicit manual reset.
# See issue #6508.
is_fresh_reset: bool = False
# Set by the background expiry watcher after it finalizes an expired
# session (invoking on_session_finalize hooks and evicting the cached
@@ -517,7 +508,6 @@ class SessionEntry:
if self.last_resume_marked_at
else None
),
"is_fresh_reset": self.is_fresh_reset,
}
if self.origin:
result["origin"] = self.origin.to_dict()
@@ -566,7 +556,6 @@ class SessionEntry:
resume_pending=data.get("resume_pending", False),
resume_reason=data.get("resume_reason"),
last_resume_marked_at=last_resume_marked_at,
is_fresh_reset=data.get("is_fresh_reset", False),
)
@@ -1143,7 +1132,6 @@ class SessionStore:
display_name=old_entry.display_name,
platform=old_entry.platform,
chat_type=old_entry.chat_type,
is_fresh_reset=True,
)
self._entries[session_key] = new_entry
+4 -8
View File
@@ -21,7 +21,6 @@ from datetime import datetime, timezone
from pathlib import Path
from hermes_constants import get_hermes_home
from typing import Any, Optional
from utils import atomic_json_write
if sys.platform == "win32":
import msvcrt
@@ -35,10 +34,6 @@ _IS_WINDOWS = sys.platform == "win32"
_UNSET = object()
_GATEWAY_LOCK_FILENAME = "gateway.lock"
_gateway_lock_handle = None
# Windows byte-range locks are mandatory for other readers. Lock a byte well
# past the JSON payload so runtime status / PID readers can still read the file
# while another process holds the mutual-exclusion lock.
_WINDOWS_LOCK_OFFSET = 1024 * 1024
def _get_pid_path() -> Path:
@@ -210,7 +205,8 @@ def _read_json_file(path: Path) -> Optional[dict[str, Any]]:
def _write_json_file(path: Path, payload: dict[str, Any]) -> None:
atomic_json_write(path, payload, indent=None, separators=(",", ":"))
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload))
def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
@@ -290,7 +286,7 @@ def _try_acquire_file_lock(handle) -> bool:
if handle.tell() == 0:
handle.write("\n")
handle.flush()
handle.seek(_WINDOWS_LOCK_OFFSET)
handle.seek(0)
msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
else:
fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
@@ -302,7 +298,7 @@ def _try_acquire_file_lock(handle) -> bool:
def _release_file_lock(handle) -> None:
try:
if _IS_WINDOWS:
handle.seek(_WINDOWS_LOCK_OFFSET)
handle.seek(0)
msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+5 -5
View File
@@ -43,7 +43,7 @@ import yaml
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
from hermes_constants import OPENROUTER_BASE_URL
from utils import atomic_replace, atomic_yaml_write, is_truthy_value
from utils import atomic_replace
logger = logging.getLogger(__name__)
@@ -2480,8 +2480,8 @@ def _resolve_verify(
tls_state = tls_state if isinstance(tls_state, dict) else {}
effective_insecure = (
is_truthy_value(insecure, default=False) if insecure is not None
else is_truthy_value(tls_state.get("insecure", False), default=False)
bool(insecure) if insecure is not None
else bool(tls_state.get("insecure", False))
)
effective_ca = (
ca_bundle
@@ -3653,7 +3653,7 @@ def _update_config_for_provider(
config["model"] = model_cfg
atomic_yaml_write(config_path, config, sort_keys=False)
config_path.write_text(yaml.safe_dump(config, sort_keys=False))
return config_path
@@ -3712,7 +3712,7 @@ def _reset_config_provider() -> Path:
model["provider"] = "auto"
if "base_url" in model:
model["base_url"] = OPENROUTER_BASE_URL
atomic_yaml_write(config_path, config, sort_keys=False)
config_path.write_text(yaml.safe_dump(config, sort_keys=False))
return config_path
+1 -5
View File
@@ -19,8 +19,6 @@ from collections.abc import Callable, Mapping
from dataclasses import dataclass
from typing import Any
from utils import is_truthy_value
# prompt_toolkit is an optional CLI dependency — only needed for
# SlashCommandCompleter and SlashCommandAutoSuggest. Gateway and test
# environments that lack it must still be able to import this module
@@ -95,8 +93,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
aliases=("q",), args_hint="<prompt>"),
CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
args_hint="<prompt>"),
CommandDef("goal", "Set a standing goal Hermes works on across turns until achieved", "Session",
args_hint="[text | pause | resume | clear | status]"),
CommandDef("status", "Show session info", "Session"),
CommandDef("profile", "Show active profile name and home directory", "Info"),
CommandDef("sethome", "Set this chat as the home channel", "Session",
@@ -375,7 +371,7 @@ def _resolve_config_gates() -> set[str]:
else:
val = None
break
if is_truthy_value(val, default=False):
if val:
result.add(cmd.name)
return result
+2 -55
View File
@@ -457,7 +457,6 @@ DEFAULT_CONFIG = {
# remains available as a tool regardless of this setting — the routing
# only controls how inbound user images are presented.
"image_input_mode": "auto",
"disabled_toolsets": [],
},
"terminal": {
@@ -607,24 +606,6 @@ DEFAULT_CONFIG = {
"max_line_length": 2000,
},
# Tool loop guardrails nudge models when they repeat failed or
# non-progressing tool calls. Soft warnings are always-on by default;
# hard stops are opt-in so interactive CLI/TUI sessions keep flowing.
"tool_loop_guardrails": {
"warnings_enabled": True,
"hard_stop_enabled": False,
"warn_after": {
"exact_failure": 2,
"same_tool_failure": 3,
"idempotent_no_progress": 2,
},
"hard_stop_after": {
"exact_failure": 5,
"same_tool_failure": 8,
"idempotent_no_progress": 5,
},
},
"compression": {
"enabled": True,
"threshold": 0.50, # compress when context usage exceeds this ratio
@@ -775,14 +756,6 @@ DEFAULT_CONFIG = {
"tool_progress_command": False, # Enable /verbose command in messaging gateway
"tool_progress_overrides": {}, # DEPRECATED — use display.platforms instead
"tool_preview_length": 0, # Max chars for tool call previews (0 = no limit, show full paths/commands)
# Auto-delete system-notice replies (e.g. "✨ New session started!",
# "♻ Restarting gateway…", "⚡ Stopped…") after N seconds on platforms
# that support message deletion (currently Telegram; other platforms
# ignore and leave the message in place). Only affects slash-command
# replies wrapped with gateway.platforms.base.EphemeralReply — agent
# responses and content messages are never touched. Default 0
# (disabled) preserves prior behavior.
"ephemeral_system_ttl": 0,
"platforms": {}, # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
# Gateway runtime-metadata footer appended to the FINAL message of a turn
# (disabled by default to keep replies minimal). When enabled, renders
@@ -952,23 +925,7 @@ DEFAULT_CONFIG = {
# injected at the start of every API call for few-shot priming.
# Never saved to sessions, logs, or trajectories.
"prefill_messages_file": "",
# Goals — persistent cross-turn goals (Ralph-style loop).
# After every turn, a lightweight judge call asks the auxiliary model
# whether the active /goal is satisfied by the assistant's last
# response. If not, Hermes feeds a continuation prompt back into the
# same session and keeps working until the goal is done, the turn
# budget is exhausted, or the user pauses/clears it. Judge failures
# fail OPEN (continue) so a flaky judge never wedges progress — the
# turn budget is the real backstop.
"goals": {
# Max continuation turns before Hermes auto-pauses the goal and
# asks the user to /goal resume. Protects against judge false
# negatives (goal actually done but judge says continue) and
# unbounded model spend on fuzzy / unachievable goals.
"max_turns": 20,
},
# Skills — external skill directories for sharing skills across tools/agents.
# Each path is expanded (~, ${VAR}) and resolved. Read-only — skill creation
# always goes to ~/.hermes/skills/.
@@ -2461,17 +2418,7 @@ def get_missing_skill_config_vars() -> List[Dict[str, Any]]:
except Exception:
return []
try:
all_vars = discover_all_skill_config_vars()
except Exception as e:
# A malformed SKILL.md, unreadable external skill dir, or similar
# should never break `hermes update`. Skill-config prompting is a
# post-migration nicety, not a blocker.
import logging
logging.getLogger(__name__).debug(
"discover_all_skill_config_vars failed: %s", e
)
return []
all_vars = discover_all_skill_config_vars()
if not all_vars:
return []
+1 -86
View File
@@ -10,7 +10,6 @@ import shutil
import signal
import subprocess
import sys
import textwrap
from dataclasses import dataclass
from pathlib import Path
@@ -60,13 +59,6 @@ class GatewayRuntimeSnapshot:
def has_process_service_mismatch(self) -> bool:
return self.service_installed and self.running and not self.service_running
@dataclass(frozen=True)
class ProfileGatewayProcess:
profile: str
path: Path
pid: int
def _get_service_pids() -> set:
"""Return PIDs currently managed by systemd or launchd gateway services.
@@ -379,83 +371,6 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
return pids
def find_profile_gateway_processes(
exclude_pids: set | None = None,
) -> list[ProfileGatewayProcess]:
"""Return running gateway PIDs mapped to Hermes profiles via PID files."""
_exclude = set(exclude_pids or set())
processes: list[ProfileGatewayProcess] = []
try:
from gateway.status import get_running_pid
from hermes_cli.profiles import list_profiles
except Exception:
return processes
seen: set[int] = set()
for profile in list_profiles():
try:
pid = get_running_pid(profile.path / "gateway.pid", cleanup_stale=False)
except Exception:
continue
if pid is None or pid <= 0 or pid in _exclude or pid in seen:
continue
seen.add(pid)
processes.append(ProfileGatewayProcess(profile=profile.name, path=profile.path, pid=pid))
return processes
def _gateway_run_args_for_profile(profile: str) -> list[str]:
args = [get_python_path(), "-m", "hermes_cli.main"]
if profile != "default":
args.extend(["--profile", profile])
args.extend(["gateway", "run", "--replace"])
return args
def launch_detached_profile_gateway_restart(profile: str, old_pid: int) -> bool:
"""Relaunch a manually-run profile gateway after its current PID exits."""
if old_pid <= 0:
return False
watcher = textwrap.dedent(
"""
import os
import subprocess
import sys
import time
pid = int(sys.argv[1])
cmd = sys.argv[2:]
deadline = time.monotonic() + 120
while time.monotonic() < deadline:
try:
os.kill(pid, 0)
except ProcessLookupError:
break
except PermissionError:
pass
time.sleep(0.2)
subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
"""
).strip()
try:
subprocess.Popen(
[sys.executable, "-c", watcher, str(old_pid), *_gateway_run_args_for_profile(profile)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True,
)
except OSError:
return False
return True
def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
selected_system = _select_systemd_scope(system)
unit_exists = get_systemd_unit_path(system=selected_system).exists()
@@ -4462,4 +4377,4 @@ def _gateway_command_inner(args):
if not supports_systemd_services() and not is_macos():
print("Legacy unit migration only applies to systemd-based Linux hosts.")
return
remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)
remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)
-535
View File
@@ -1,535 +0,0 @@
"""Persistent session goals — the Ralph loop for Hermes.
A goal is a free-form user objective that stays active across turns. After
each turn completes, a small judge call asks an auxiliary model "is this
goal satisfied by the assistant's last response?". If not, Hermes feeds a
continuation prompt back into the same session and keeps working until the
goal is done, turn budget is exhausted, the user pauses/clears it, or the
user sends a new message (which takes priority and pauses the goal loop).
State is persisted in SessionDB's ``state_meta`` table keyed by
``goal:<session_id>`` so ``/resume`` picks it up.
Design notes / invariants:
- The continuation prompt is just a normal user message appended to the
session via ``run_conversation``. No system-prompt mutation, no toolset
swap prompt caching stays intact.
- Judge failures are fail-OPEN: ``continue``. A broken judge must not wedge
progress; the turn budget is the backstop.
- When a real user message arrives mid-loop it preempts the continuation
prompt and also pauses the goal loop for that turn (we still re-judge
after, so if the user's message happens to complete the goal the judge
will say ``done``).
- This module has zero hard dependency on ``cli.HermesCLI`` or the gateway
runner both wire the same ``GoalManager`` in.
Nothing in this module touches the agent's system prompt or toolset.
"""
from __future__ import annotations
import json
import logging
import re
import time
from dataclasses import dataclass, asdict
from typing import Any, Dict, Optional, Tuple
logger = logging.getLogger(__name__)
# ──────────────────────────────────────────────────────────────────────
# Constants & defaults
# ──────────────────────────────────────────────────────────────────────
DEFAULT_MAX_TURNS = 20
DEFAULT_JUDGE_TIMEOUT = 30.0
# Cap how much of the last response + recent messages we send to the judge.
_JUDGE_RESPONSE_SNIPPET_CHARS = 4000
CONTINUATION_PROMPT_TEMPLATE = (
"[Continuing toward your standing goal]\n"
"Goal: {goal}\n\n"
"Continue working toward this goal. Take the next concrete step. "
"If you believe the goal is complete, state so explicitly and stop. "
"If you are blocked and need input from the user, say so clearly and stop."
)
JUDGE_SYSTEM_PROMPT = (
"You are a strict judge evaluating whether an autonomous agent has "
"achieved a user's stated goal. You receive the goal text and the "
"agent's most recent response. Your only job is to decide whether "
"the goal is fully satisfied based on that response.\n\n"
"A goal is DONE only when:\n"
"- The response explicitly confirms the goal was completed, OR\n"
"- The response clearly shows the final deliverable was produced, OR\n"
"- The response explains the goal is unachievable / blocked / needs "
"user input (treat this as DONE with reason describing the block).\n\n"
"Otherwise the goal is NOT done — CONTINUE.\n\n"
"Reply ONLY with a single JSON object on one line:\n"
'{\"done\": <true|false>, \"reason\": \"<one-sentence rationale>\"}'
)
JUDGE_USER_PROMPT_TEMPLATE = (
"Goal:\n{goal}\n\n"
"Agent's most recent response:\n{response}\n\n"
"Is the goal satisfied?"
)
# ──────────────────────────────────────────────────────────────────────
# Dataclass
# ──────────────────────────────────────────────────────────────────────
@dataclass
class GoalState:
"""Serializable goal state stored per session."""
goal: str
status: str = "active" # active | paused | done | cleared
turns_used: int = 0
max_turns: int = DEFAULT_MAX_TURNS
created_at: float = 0.0
last_turn_at: float = 0.0
last_verdict: Optional[str] = None # "done" | "continue" | "skipped"
last_reason: Optional[str] = None
paused_reason: Optional[str] = None # why we auto-paused (budget, etc.)
def to_json(self) -> str:
return json.dumps(asdict(self), ensure_ascii=False)
@classmethod
def from_json(cls, raw: str) -> "GoalState":
data = json.loads(raw)
return cls(
goal=data.get("goal", ""),
status=data.get("status", "active"),
turns_used=int(data.get("turns_used", 0) or 0),
max_turns=int(data.get("max_turns", DEFAULT_MAX_TURNS) or DEFAULT_MAX_TURNS),
created_at=float(data.get("created_at", 0.0) or 0.0),
last_turn_at=float(data.get("last_turn_at", 0.0) or 0.0),
last_verdict=data.get("last_verdict"),
last_reason=data.get("last_reason"),
paused_reason=data.get("paused_reason"),
)
# ──────────────────────────────────────────────────────────────────────
# Persistence (SessionDB state_meta)
# ──────────────────────────────────────────────────────────────────────
def _meta_key(session_id: str) -> str:
return f"goal:{session_id}"
_DB_CACHE: Dict[str, Any] = {}
def _get_session_db() -> Optional[Any]:
"""Return a SessionDB instance for the current HERMES_HOME.
SessionDB has no built-in singleton, but opening a new connection per
/goal call would thrash the file. We cache one instance per
``hermes_home`` path so profile switches still pick up the right DB.
Defensive against import/instantiation failures so tests and
non-standard launchers can still use the GoalManager.
"""
try:
from hermes_constants import get_hermes_home
from hermes_state import SessionDB
home = str(get_hermes_home())
except Exception as exc: # pragma: no cover
logger.debug("GoalManager: SessionDB bootstrap failed (%s)", exc)
return None
cached = _DB_CACHE.get(home)
if cached is not None:
return cached
try:
db = SessionDB()
except Exception as exc: # pragma: no cover
logger.debug("GoalManager: SessionDB() raised (%s)", exc)
return None
_DB_CACHE[home] = db
return db
def load_goal(session_id: str) -> Optional[GoalState]:
"""Load the goal for a session, or None if none exists."""
if not session_id:
return None
db = _get_session_db()
if db is None:
return None
try:
raw = db.get_meta(_meta_key(session_id))
except Exception as exc:
logger.debug("GoalManager: get_meta failed: %s", exc)
return None
if not raw:
return None
try:
return GoalState.from_json(raw)
except Exception as exc:
logger.warning("GoalManager: could not parse stored goal for %s: %s", session_id, exc)
return None
def save_goal(session_id: str, state: GoalState) -> None:
"""Persist a goal to SessionDB. No-op if DB unavailable."""
if not session_id:
return
db = _get_session_db()
if db is None:
return
try:
db.set_meta(_meta_key(session_id), state.to_json())
except Exception as exc:
logger.debug("GoalManager: set_meta failed: %s", exc)
def clear_goal(session_id: str) -> None:
"""Mark a goal cleared in the DB (preserved for audit, status=cleared)."""
state = load_goal(session_id)
if state is None:
return
state.status = "cleared"
save_goal(session_id, state)
# ──────────────────────────────────────────────────────────────────────
# Judge
# ──────────────────────────────────────────────────────────────────────
def _truncate(text: str, limit: int) -> str:
if not text:
return ""
if len(text) <= limit:
return text
return text[:limit] + "… [truncated]"
_JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL)
def _parse_judge_response(raw: str) -> Tuple[bool, str]:
"""Parse the judge's reply. Fail-open to ``(False, "<reason>")``.
Returns ``(done, reason)``.
"""
if not raw:
return False, "judge returned empty response"
text = raw.strip()
# Strip markdown code fences the model may wrap JSON in.
if text.startswith("```"):
text = text.strip("`")
# Peel off leading json/JSON/etc tag
nl = text.find("\n")
if nl != -1:
text = text[nl + 1:]
# First try: parse the whole blob.
data: Optional[Dict[str, Any]] = None
try:
data = json.loads(text)
except Exception:
# Second try: pull the first JSON object out.
match = _JSON_OBJECT_RE.search(text)
if match:
try:
data = json.loads(match.group(0))
except Exception:
data = None
if not isinstance(data, dict):
return False, f"judge reply was not JSON: {_truncate(raw, 200)!r}"
done_val = data.get("done")
if isinstance(done_val, str):
done = done_val.strip().lower() in ("true", "yes", "1", "done")
else:
done = bool(done_val)
reason = str(data.get("reason") or "").strip()
if not reason:
reason = "no reason provided"
return done, reason
def judge_goal(
goal: str,
last_response: str,
*,
timeout: float = DEFAULT_JUDGE_TIMEOUT,
) -> Tuple[str, str]:
"""Ask the auxiliary model whether the goal is satisfied.
Returns ``(verdict, reason)`` where verdict is ``"done"``, ``"continue"``,
or ``"skipped"`` (when the judge couldn't be reached).
This is deliberately fail-open: any error returns ``("continue", "...")``
so a broken judge doesn't wedge progress — the turn budget is the
backstop.
"""
if not goal.strip():
return "skipped", "empty goal"
if not last_response.strip():
# No substantive reply this turn — almost certainly not done yet.
return "continue", "empty response (nothing to evaluate)"
try:
from agent.auxiliary_client import get_text_auxiliary_client
except Exception as exc:
logger.debug("goal judge: auxiliary client import failed: %s", exc)
return "continue", "auxiliary client unavailable"
try:
client, model = get_text_auxiliary_client("goal_judge")
except Exception as exc:
logger.debug("goal judge: get_text_auxiliary_client failed: %s", exc)
return "continue", "auxiliary client unavailable"
if client is None or not model:
return "continue", "no auxiliary client configured"
prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
goal=_truncate(goal, 2000),
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
)
try:
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": JUDGE_SYSTEM_PROMPT},
{"role": "user", "content": prompt},
],
temperature=0,
max_tokens=200,
timeout=timeout,
)
except Exception as exc:
logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
return "continue", f"judge error: {type(exc).__name__}"
try:
raw = resp.choices[0].message.content or ""
except Exception:
raw = ""
done, reason = _parse_judge_response(raw)
verdict = "done" if done else "continue"
logger.info("goal judge: verdict=%s reason=%s", verdict, _truncate(reason, 120))
return verdict, reason
# ──────────────────────────────────────────────────────────────────────
# GoalManager — the orchestration surface CLI + gateway talk to
# ──────────────────────────────────────────────────────────────────────
class GoalManager:
"""Per-session goal state + continuation decisions.
The CLI and gateway each hold one ``GoalManager`` per live session.
Methods:
- ``set(goal)`` start a new standing goal.
- ``clear()`` remove the active goal.
- ``pause()`` / ``resume()`` explicit user controls.
- ``status()`` printable one-liner.
- ``evaluate_after_turn(last_response)`` call the judge, update state,
and return a decision dict the caller uses to drive the next turn.
- ``next_continuation_prompt()`` the canonical user-role message to
feed back into ``run_conversation``.
"""
def __init__(self, session_id: str, *, default_max_turns: int = DEFAULT_MAX_TURNS):
self.session_id = session_id
self.default_max_turns = int(default_max_turns or DEFAULT_MAX_TURNS)
self._state: Optional[GoalState] = load_goal(session_id)
# --- introspection ------------------------------------------------
@property
def state(self) -> Optional[GoalState]:
return self._state
def is_active(self) -> bool:
return self._state is not None and self._state.status == "active"
def has_goal(self) -> bool:
return self._state is not None and self._state.status in ("active", "paused")
def status_line(self) -> str:
s = self._state
if s is None or s.status in ("cleared",):
return "No active goal. Set one with /goal <text>."
turns = f"{s.turns_used}/{s.max_turns} turns"
if s.status == "active":
return f"⊙ Goal (active, {turns}): {s.goal}"
if s.status == "paused":
extra = f"{s.paused_reason}" if s.paused_reason else ""
return f"⏸ Goal (paused, {turns}{extra}): {s.goal}"
if s.status == "done":
return f"✓ Goal done ({turns}): {s.goal}"
return f"Goal ({s.status}, {turns}): {s.goal}"
# --- mutation -----------------------------------------------------
def set(self, goal: str, *, max_turns: Optional[int] = None) -> GoalState:
goal = (goal or "").strip()
if not goal:
raise ValueError("goal text is empty")
state = GoalState(
goal=goal,
status="active",
turns_used=0,
max_turns=int(max_turns) if max_turns else self.default_max_turns,
created_at=time.time(),
last_turn_at=0.0,
)
self._state = state
save_goal(self.session_id, state)
return state
def pause(self, reason: str = "user-paused") -> Optional[GoalState]:
if not self._state:
return None
self._state.status = "paused"
self._state.paused_reason = reason
save_goal(self.session_id, self._state)
return self._state
def resume(self, *, reset_budget: bool = True) -> Optional[GoalState]:
if not self._state:
return None
self._state.status = "active"
self._state.paused_reason = None
if reset_budget:
self._state.turns_used = 0
save_goal(self.session_id, self._state)
return self._state
def clear(self) -> None:
if self._state is None:
return
self._state.status = "cleared"
save_goal(self.session_id, self._state)
self._state = None
def mark_done(self, reason: str) -> None:
if not self._state:
return
self._state.status = "done"
self._state.last_verdict = "done"
self._state.last_reason = reason
save_goal(self.session_id, self._state)
# --- the main entry point called after every turn -----------------
def evaluate_after_turn(
self,
last_response: str,
*,
user_initiated: bool = True,
) -> Dict[str, Any]:
"""Run the judge and update state. Return a decision dict.
``user_initiated`` distinguishes a real user prompt (True) from a
continuation prompt we fed ourselves (False). Both increment
``turns_used`` because both consume model budget.
Decision keys:
- ``status``: current goal status after update
- ``should_continue``: bool caller should fire another turn
- ``continuation_prompt``: str or None
- ``verdict``: "done" | "continue" | "skipped" | "inactive"
- ``reason``: str
- ``message``: user-visible one-liner to print/send
"""
state = self._state
if state is None or state.status != "active":
return {
"status": state.status if state else None,
"should_continue": False,
"continuation_prompt": None,
"verdict": "inactive",
"reason": "no active goal",
"message": "",
}
# Count the turn that just finished.
state.turns_used += 1
state.last_turn_at = time.time()
verdict, reason = judge_goal(state.goal, last_response)
state.last_verdict = verdict
state.last_reason = reason
if verdict == "done":
state.status = "done"
save_goal(self.session_id, state)
return {
"status": "done",
"should_continue": False,
"continuation_prompt": None,
"verdict": "done",
"reason": reason,
"message": f"✓ Goal achieved: {reason}",
}
if state.turns_used >= state.max_turns:
state.status = "paused"
state.paused_reason = f"turn budget exhausted ({state.turns_used}/{state.max_turns})"
save_goal(self.session_id, state)
return {
"status": "paused",
"should_continue": False,
"continuation_prompt": None,
"verdict": "continue",
"reason": reason,
"message": (
f"⏸ Goal paused — {state.turns_used}/{state.max_turns} turns used. "
"Use /goal resume to keep going, or /goal clear to stop."
),
}
save_goal(self.session_id, state)
return {
"status": "active",
"should_continue": True,
"continuation_prompt": self.next_continuation_prompt(),
"verdict": "continue",
"reason": reason,
"message": (
f"↻ Continuing toward goal ({state.turns_used}/{state.max_turns}): {reason}"
),
}
def next_continuation_prompt(self) -> Optional[str]:
if not self._state or self._state.status != "active":
return None
return CONTINUATION_PROMPT_TEMPLATE.format(goal=self._state.goal)
__all__ = [
"GoalState",
"GoalManager",
"CONTINUATION_PROMPT_TEMPLATE",
"DEFAULT_MAX_TURNS",
"load_goal",
"save_goal",
"clear_goal",
"judge_goal",
]
+11 -61
View File
@@ -800,8 +800,6 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti
title = db.get_session_title(target)
message_count = int(session.get("message_count") or 0)
if message_count == 0:
return # No real conversation — don't show resume info
input_tokens = int(session.get("input_tokens") or 0)
output_tokens = int(session.get("output_tokens") or 0)
cache_read_tokens = int(session.get("cache_read_tokens") or 0)
@@ -6675,7 +6673,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
if gateway_mode
else None
)
assume_yes = bool(getattr(args, "yes", False))
print("⚕ Updating Hermes Agent...")
print()
@@ -6795,10 +6792,8 @@ def _cmd_update_impl(args, gateway_mode: bool):
else:
auto_stash_ref = _stash_local_changes_if_needed(git_cmd, PROJECT_ROOT)
prompt_for_restore = (
auto_stash_ref is not None
and not assume_yes
and (gateway_mode or (sys.stdin.isatty() and sys.stdout.isatty()))
prompt_for_restore = auto_stash_ref is not None and (
gateway_mode or (sys.stdin.isatty() and sys.stdout.isatty())
)
# Check if there are updates
@@ -7059,10 +7054,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
print(f" {len(missing_config)} new config option(s) available")
print()
if assume_yes:
print(" --yes: auto-applying config migration (skipping API-key prompts).")
response = "y"
elif gateway_mode:
if gateway_mode:
response = (
_gateway_prompt(
"Would you like to configure new options now? [Y/n]", "n"
@@ -7088,17 +7080,14 @@ def _cmd_update_impl(args, gateway_mode: bool):
if response in ("", "y", "yes"):
print()
# In gateway mode OR under --yes, run auto-migrations only (no
# input() prompts for API keys which would hang the detached
# process / defeat the point of --yes).
results = migrate_config(
interactive=not (gateway_mode or assume_yes), quiet=False
)
# In gateway mode, run auto-migrations only (no input() prompts
# for API keys which would hang the detached process).
results = migrate_config(interactive=not gateway_mode, quiet=False)
if results["env_added"] or results["config_added"]:
print()
print("✓ Configuration updated!")
if (gateway_mode or assume_yes) and missing_env:
if gateway_mode and missing_env:
print(" API keys require manual entry: hermes config migrate")
else:
print()
@@ -7148,8 +7137,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
supports_systemd_services,
_ensure_user_systemd_env,
find_gateway_pids,
find_profile_gateway_processes,
launch_detached_profile_gateway_restart,
_get_service_pids,
_graceful_restart_via_sigusr1,
)
@@ -7253,7 +7240,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
restarted_services = []
killed_pids = set()
relaunched_profiles = []
# --- Systemd services (Linux) ---
# Discover all hermes-gateway* units (default + profiles)
@@ -7443,33 +7429,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
manual_pids = find_gateway_pids(
exclude_pids=service_pids, all_profiles=True
)
profile_processes = {
proc.pid: proc
for proc in find_profile_gateway_processes(exclude_pids=service_pids)
if proc.pid in manual_pids
}
for pid, proc in profile_processes.items():
if not launch_detached_profile_gateway_restart(proc.profile, pid):
continue
# Prefer a graceful SIGUSR1 drain so in-flight agent runs
# finish before the watcher respawns the gateway. If the
# gateway doesn't support SIGUSR1 or doesn't exit within
# the drain budget, fall back to SIGTERM — the watcher
# still sees the exit and relaunches either way.
drained = _graceful_restart_via_sigusr1(
pid, drain_timeout=_drain_budget,
)
if not drained:
try:
os.kill(pid, _signal.SIGTERM)
except (ProcessLookupError, PermissionError):
pass
killed_pids.add(pid)
relaunched_profiles.append(proc.profile)
for pid in manual_pids:
if pid in profile_processes:
continue
try:
os.kill(pid, _signal.SIGTERM)
killed_pids.add(pid)
@@ -7480,14 +7440,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
print()
for svc in restarted_services:
print(f" ✓ Restarted {svc}")
if relaunched_profiles:
names = ", ".join(relaunched_profiles)
print(f" ✓ Restarting manual gateway profile(s): {names}")
unmapped_count = len(killed_pids) - len(relaunched_profiles)
if unmapped_count:
print(f" → Stopped {unmapped_count} manual gateway process(es)")
if killed_pids:
print(f" → Stopped {len(killed_pids)} manual gateway process(es)")
print(" Restart manually: hermes gateway run")
if unmapped_count > 1:
# Also restart for each profile if needed
if len(killed_pids) > 1:
print(
" (or: hermes -p <profile> gateway run for each profile)"
)
@@ -9904,13 +9861,6 @@ Examples:
default=False,
help="Force a pre-update backup for this run (off by default; overrides updates.pre_update_backup)",
)
update_parser.add_argument(
"--yes",
"-y",
action="store_true",
default=False,
help="Assume yes for interactive prompts (config migration, stash restore). API-key entry is skipped; run 'hermes config migrate' separately for those.",
)
update_parser.set_defaults(func=cmd_update)
# =========================================================================
+10 -18
View File
@@ -891,19 +891,14 @@ def switch_model(
if not validation.get("accepted"):
override = False
if user_providers:
# user_providers is a dict: {provider_slug: config_dict}
for slug, cfg in user_providers.items():
if slug == target_provider:
cfg_models = cfg.get("models", {})
# Direct membership works for dict (keys) and list (strings)
if new_model in cfg_models:
for up in user_providers:
if isinstance(up, dict) and up.get("provider") == target_provider:
cfg_models = up.get("models", [])
if new_model in cfg_models or any(
m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
):
override = True
break
# Also accept if models is a list of dicts with 'name' field
if isinstance(cfg_models, list):
if any(m.get("name") == new_model for m in cfg_models if isinstance(m, dict)):
override = True
break
if override:
validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
else:
@@ -1417,17 +1412,14 @@ def list_authenticated_providers(
models_list = list(fb)
# Prefer the endpoint's live /models list when credentials are
# available, unless the provider explicitly opts out via
# discover_models: false (e.g. dedicated endpoints that expose
# the entire aggregator catalog via /models).
# available. This keeps OpenAI-compatible relays (for example CRS)
# in sync when the server catalog changes without requiring the
# user to mirror every model into config.yaml.
api_key = str(ep_cfg.get("api_key", "") or "").strip()
if not api_key:
key_env = str(ep_cfg.get("key_env", "") or "").strip()
api_key = os.environ.get(key_env, "").strip() if key_env else ""
discover = ep_cfg.get("discover_models", True)
if isinstance(discover, str):
discover = discover.lower() not in ("false", "no", "0")
if api_url and api_key and discover:
if api_url and api_key:
try:
from hermes_cli.models import fetch_api_models
live_models = fetch_api_models(api_key, api_url)
+1 -1
View File
@@ -774,6 +774,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
@@ -803,7 +804,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway"),
]
# Derived dicts — used throughout the codebase
-52
View File
@@ -33,15 +33,12 @@ so plugin-defined tools appear alongside the built-in tools.
from __future__ import annotations
import asyncio
import importlib
import importlib.metadata
import importlib.util
import inspect
import logging
import os
import sys
import threading
import types
from dataclasses import dataclass, field
from pathlib import Path
@@ -1229,55 +1226,6 @@ def get_plugin_command_handler(name: str) -> Optional[Callable]:
return entry["handler"] if entry else None
_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS = 30.0
def resolve_plugin_command_result(result: Any) -> Any:
"""Resolve a plugin command return value, awaiting async handlers when needed.
Sync CLI/TUI dispatch sites call plugin handlers from plain functions.
If a handler is async, await it directly when no loop is running; if
we're already inside an active loop, run it in a helper thread with its
own loop so the caller still gets a concrete result synchronously. The
threaded path is bounded by a 30s timeout so a hung async handler cannot
wedge the terminal indefinitely.
"""
if not inspect.isawaitable(result):
return result
try:
asyncio.get_running_loop()
except RuntimeError:
return asyncio.run(result)
outcome: Dict[str, Any] = {}
failure: Dict[str, BaseException] = {}
done = threading.Event()
def _runner() -> None:
try:
outcome["value"] = asyncio.run(result)
except BaseException as exc: # pragma: no cover - re-raised below
failure["exc"] = exc
finally:
done.set()
thread = threading.Thread(
target=_runner,
name="hermes-plugin-command-await",
daemon=True,
)
thread.start()
if not done.wait(timeout=_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS):
raise TimeoutError(
"Plugin command async handler did not complete within "
f"{_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS:.0f}s"
)
if "exc" in failure:
raise failure["exc"]
return outcome.get("value")
def get_plugin_commands() -> Dict[str, dict]:
"""Return the full plugin commands dict (name → {handler, description, plugin}).
+2 -11
View File
@@ -358,20 +358,11 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
return None
if not requested_norm.startswith("custom:"):
try:
canonical = auth_mod.resolve_provider(requested_norm)
auth_mod.resolve_provider(requested_norm)
except AuthError:
pass
else:
# A user-declared ``custom_providers`` entry whose name matches
# only an *alias* (``kimi`` → built-in ``kimi-coding``) is the
# user's intended target — alias rewriting would otherwise hijack
# the request. We only defer to the built-in when the raw name is
# the canonical provider itself (``nous``, ``openrouter``, …) so
# accidentally shadowing a canonical provider still resolves to
# the built-in. See tests/hermes_cli/test_runtime_provider_resolution.py
# ``test_named_custom_provider_does_not_shadow_builtin_provider``.
if (canonical or "").strip().lower() == requested_norm:
return None
return None
config = load_config()
+1 -2
View File
@@ -18,7 +18,6 @@ for reinstall when scopes/commands change.
from __future__ import annotations
import json
import os
import sys
from pathlib import Path
@@ -129,7 +128,7 @@ def slack_manifest_command(args) -> int:
target = Path(get_hermes_home()) / "slack-manifest.json"
except Exception:
target = Path(os.environ.get("HERMES_HOME") or str(Path.home() / ".hermes")) / "slack-manifest.json"
target = Path.home() / ".hermes" / "slack-manifest.json"
else:
target = Path(write_target).expanduser()
target.parent.mkdir(parents=True, exist_ok=True)
-1
View File
@@ -125,7 +125,6 @@ def show_status(args):
keys = {
"OpenRouter": "OPENROUTER_API_KEY",
"OpenAI": "OPENAI_API_KEY",
"NVIDIA": "NVIDIA_API_KEY",
"Z.AI/GLM": "GLM_API_KEY",
"Kimi": "KIMI_API_KEY",
"StepFun Step Plan": "STEPFUN_API_KEY",
-1
View File
@@ -345,7 +345,6 @@ _CATEGORY_MERGE: Dict[str, str] = {
"dashboard": "display",
"code_execution": "agent",
"prompt_caching": "agent",
"goals": "agent",
# Only `telegram.reactions` currently lives under telegram — fold it in
# with the other messaging-platform config (discord) so it isn't an
# orphan tab of one field.
+46 -199
View File
@@ -514,7 +514,7 @@ class SessionDB:
# Session lifecycle
# =========================================================================
def _insert_session_row(
def create_session(
self,
session_id: str,
source: str,
@@ -523,8 +523,8 @@ class SessionDB:
system_prompt: str = None,
user_id: str = None,
parent_session_id: str = None,
) -> None:
"""Shared INSERT OR IGNORE for session rows."""
) -> str:
"""Create a new session record. Returns the session_id."""
def _do(conn):
conn.execute(
"""INSERT OR IGNORE INTO sessions (id, source, user_id, model, model_config,
@@ -542,11 +542,8 @@ class SessionDB:
),
)
self._execute_write(_do)
def create_session(self, session_id: str, source: str, **kwargs) -> str:
"""Create a new session record. Returns the session_id."""
self._insert_session_row(session_id, source, **kwargs)
return session_id
def end_session(self, session_id: str, end_reason: str) -> None:
"""Mark a session as ended.
@@ -682,41 +679,21 @@ class SessionDB:
session_id: str,
source: str = "unknown",
model: str = None,
**kwargs,
) -> str:
"""Ensure a session row exists (INSERT OR IGNORE). Accepts optional kwargs."""
self._insert_session_row(session_id, source, model=model, **kwargs)
return session_id
def prune_empty_ghost_sessions(self, sessions_dir: "Optional[Path]" = None) -> int:
"""Remove closed empty TUI ghost sessions (no messages, no title, >24hr old)."""
cutoff = time.time() - 86400 # Only sessions older than 24 hours
) -> None:
"""Ensure a session row exists, creating it with minimal metadata if absent.
Used by _flush_messages_to_session_db to recover from a failed
create_session() call (e.g. transient SQLite lock at agent startup).
INSERT OR IGNORE is safe to call even when the row already exists.
"""
def _do(conn):
rows = conn.execute("""
SELECT id FROM sessions
WHERE source = 'tui'
AND title IS NULL
AND ended_at IS NOT NULL
AND started_at < ?
AND NOT EXISTS (
SELECT 1 FROM messages WHERE messages.session_id = sessions.id
)
""", (cutoff,)).fetchall()
ids = [r[0] if isinstance(r, (tuple, list)) else r["id"] for r in rows]
if ids:
placeholders = ",".join("?" * len(ids))
conn.execute(
f"DELETE FROM sessions WHERE id IN ({placeholders})", ids
)
return ids
removed_ids = self._execute_write(_do) or []
# Clean up any on-disk session files (belt-and-suspenders)
if sessions_dir and removed_ids:
for sid in removed_ids:
self._remove_session_files(sessions_dir, sid)
return len(removed_ids)
conn.execute(
"""INSERT OR IGNORE INTO sessions
(id, source, model, started_at)
VALUES (?, ?, ?, ?)""",
(session_id, source, model, time.time()),
)
self._execute_write(_do)
def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
"""Get a session by ID."""
@@ -956,7 +933,6 @@ class SessionDB:
offset: int = 0,
include_children: bool = False,
project_compression_tips: bool = True,
order_by_last_active: bool = False,
) -> List[Dict[str, Any]]:
"""List sessions with preview (first user message) and last active timestamp.
@@ -976,14 +952,6 @@ class SessionDB:
compressed continuations from being invisible to users while keeping
delegate subagents and branches hidden. Pass ``False`` to return the
raw root rows (useful for admin/debug UIs).
Pass ``order_by_last_active=True`` to sort by most-recent activity
instead of original conversation start time. For compression chains,
the "most-recent activity" is taken from the live tip (not the root),
so an old conversation that was compressed and continued recently
surfaces in the correct slot. Ordering is computed at SQL level via
a recursive CTE that walks compression-continuation edges, so LIMIT
and OFFSET still apply efficiently.
"""
where_clauses = []
params = []
@@ -1011,80 +979,25 @@ class SessionDB:
params.extend(exclude_sources)
where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
if order_by_last_active:
# Compute effective_last_active by walking each surfaced session's
# compression-continuation chain forward in SQL and taking the MAX
# timestamp across the chain. This lets us ORDER BY + LIMIT at SQL
# level instead of fetching every row and sorting in Python, while
# still surfacing old compression roots whose live tip is fresh.
#
# The CTE seeds from rows the outer WHERE admits (roots + branch
# children), then recursively joins forward through
# compression-continuation edges using the same criteria as
# get_compression_tip (parent.end_reason='compression' AND
# child.started_at >= parent.ended_at).
query = f"""
WITH RECURSIVE chain(root_id, cur_id) AS (
SELECT s.id, s.id FROM sessions s {where_sql}
UNION ALL
SELECT c.root_id, child.id
FROM chain c
JOIN sessions parent ON parent.id = c.cur_id
JOIN sessions child ON child.parent_session_id = c.cur_id
WHERE parent.end_reason = 'compression'
AND child.started_at >= parent.ended_at
),
chain_max AS (
SELECT
root_id,
MAX(COALESCE(
(SELECT MAX(m.timestamp) FROM messages m WHERE m.session_id = cur_id),
(SELECT started_at FROM sessions ss WHERE ss.id = cur_id)
)) AS effective_last_active
FROM chain
GROUP BY root_id
)
SELECT s.*,
COALESCE(
(SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63)
FROM messages m
WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL
ORDER BY m.timestamp, m.id LIMIT 1),
''
) AS _preview_raw,
COALESCE(
(SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id),
s.started_at
) AS last_active,
COALESCE(cm.effective_last_active, s.started_at) AS _effective_last_active
FROM sessions s
LEFT JOIN chain_max cm ON cm.root_id = s.id
{where_sql}
ORDER BY _effective_last_active DESC, s.started_at DESC, s.id DESC
LIMIT ? OFFSET ?
"""
# WHERE params apply twice (CTE seed + outer select).
params = params + params + [limit, offset]
else:
query = f"""
SELECT s.*,
COALESCE(
(SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63)
FROM messages m
WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL
ORDER BY m.timestamp, m.id LIMIT 1),
''
) AS _preview_raw,
COALESCE(
(SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id),
s.started_at
) AS last_active
FROM sessions s
{where_sql}
ORDER BY s.started_at DESC
LIMIT ? OFFSET ?
"""
params.extend([limit, offset])
query = f"""
SELECT s.*,
COALESCE(
(SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63)
FROM messages m
WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL
ORDER BY m.timestamp, m.id LIMIT 1),
''
) AS _preview_raw,
COALESCE(
(SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id),
s.started_at
) AS last_active
FROM sessions s
{where_sql}
ORDER BY s.started_at DESC
LIMIT ? OFFSET ?
"""
params.extend([limit, offset])
with self._lock:
cursor = self._conn.execute(query, params)
rows = cursor.fetchall()
@@ -1098,8 +1011,6 @@ class SessionDB:
s["preview"] = text + ("..." if len(raw) > 60 else "")
else:
s["preview"] = ""
# Drop the internal ordering column so callers see a clean dict.
s.pop("_effective_last_active", None)
sessions.append(s)
# Project compression roots forward to their tips. Each row whose
@@ -1177,48 +1088,6 @@ class SessionDB:
# Message storage
# =========================================================================
# Sentinel prefix used to distinguish JSON-encoded structured content
# (multimodal messages: lists of parts like text + image_url) from plain
# string content. The NUL byte is not legal in normal text, so this
# cannot collide with real user content.
_CONTENT_JSON_PREFIX = "\x00json:"
@classmethod
def _encode_content(cls, content: Any) -> Any:
"""Serialize structured (list/dict) message content for sqlite.
sqlite3 can only bind ``str``, ``bytes``, ``int``, ``float``, and ``None``
to query parameters. Multimodal messages have ``content`` as a list of
parts (``[{"type": "text", ...}, {"type": "image_url", ...}]``), which
raises ``ProgrammingError: Error binding parameter N: type 'list' is
not supported`` when bound directly.
Returns the value unchanged when it's already a safe scalar, or a
sentinel-prefixed JSON string for lists/dicts. Paired with
:meth:`_decode_content` on read.
"""
if content is None or isinstance(content, (str, bytes, int, float)):
return content
try:
return cls._CONTENT_JSON_PREFIX + json.dumps(content)
except (TypeError, ValueError):
# Last-resort fallback: stringify so persistence never fails.
return str(content)
@classmethod
def _decode_content(cls, content: Any) -> Any:
"""Reverse :meth:`_encode_content`; returns scalars unchanged."""
if isinstance(content, str) and content.startswith(cls._CONTENT_JSON_PREFIX):
try:
return json.loads(content[len(cls._CONTENT_JSON_PREFIX):])
except (json.JSONDecodeError, TypeError):
logger.warning(
"Failed to decode JSON-encoded message content; "
"returning raw string"
)
return content
return content
def append_message(
self,
session_id: str,
@@ -1255,9 +1124,6 @@ class SessionDB:
if codex_message_items else None
)
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
# Multimodal content (list of parts) must be JSON-encoded: sqlite3
# cannot bind list/dict parameters directly.
stored_content = self._encode_content(content)
# Pre-compute tool call count
num_tool_calls = 0
@@ -1274,7 +1140,7 @@ class SessionDB:
(
session_id,
role,
stored_content,
content,
tool_call_id,
tool_calls_json,
tool_name,
@@ -1357,7 +1223,7 @@ class SessionDB:
(
session_id,
role,
self._encode_content(msg.get("content")),
msg.get("content"),
msg.get("tool_call_id"),
tool_calls_json,
msg.get("tool_name"),
@@ -1396,8 +1262,6 @@ class SessionDB:
result = []
for row in rows:
msg = dict(row)
if "content" in msg:
msg["content"] = self._decode_content(msg["content"])
if msg.get("tool_calls"):
try:
msg["tool_calls"] = json.loads(msg["tool_calls"])
@@ -1487,15 +1351,15 @@ class SessionDB:
placeholders = ",".join("?" for _ in session_ids)
rows = self._conn.execute(
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
"finish_reason, reasoning, reasoning_content, reasoning_details, "
"codex_reasoning_items, codex_message_items "
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items, "
"codex_message_items "
f"FROM messages WHERE session_id IN ({placeholders}) ORDER BY timestamp, id",
tuple(session_ids),
).fetchall()
messages = []
for row in rows:
content = self._decode_content(row["content"])
content = row["content"]
if row["role"] in {"user", "assistant"} and isinstance(content, str):
content = sanitize_context(content).strip()
msg = {"role": row["role"], "content": content}
@@ -1513,8 +1377,6 @@ class SessionDB:
# that replay reasoning (OpenRouter, OpenAI, Nous) receive
# coherent multi-turn reasoning context.
if row["role"] == "assistant":
if row["finish_reason"]:
msg["finish_reason"] = row["finish_reason"]
if row["reasoning"]:
msg["reasoning"] = row["reasoning"]
if row["reasoning_content"] is not None:
@@ -1882,26 +1744,10 @@ class SessionDB:
)""",
(match["id"], match["id"]),
)
context_msgs = []
for r in ctx_cursor.fetchall():
raw = r["content"]
decoded = self._decode_content(raw)
# Multimodal context: render a compact text-only
# summary for search previews.
if isinstance(decoded, list):
text_parts = [
p.get("text", "") for p in decoded
if isinstance(p, dict) and p.get("type") == "text"
]
text = " ".join(t for t in text_parts if t).strip()
preview = text or "[multimodal content]"
elif isinstance(decoded, str):
preview = decoded
else:
preview = ""
context_msgs.append(
{"role": r["role"], "content": preview[:200]}
)
context_msgs = [
{"role": r["role"], "content": (r["content"] or "")[:200]}
for r in ctx_cursor.fetchall()
]
match["context"] = context_msgs
except Exception:
match["context"] = []
@@ -2245,3 +2091,4 @@ class SessionDB:
result["error"] = str(exc)
return result
+6 -7
View File
@@ -356,17 +356,12 @@ def _compute_tool_definitions(
else:
if not quiet_mode:
print(f"⚠️ Unknown toolset: {toolset_name}")
else:
# Default: start with everything
elif disabled_toolsets:
from toolsets import get_all_toolsets
for ts_name in get_all_toolsets():
tools_to_include.update(resolve_toolset(ts_name))
# Always apply disabled toolsets as a subtraction step at the end.
# This ensures that even if a composite toolset (like hermes-cli)
# is enabled, any tools belonging to a disabled toolset are strictly
# stripped out. See issue #17309.
if disabled_toolsets:
for toolset_name in disabled_toolsets:
if validate_toolset(toolset_name):
resolved = resolve_toolset(toolset_name)
@@ -381,6 +376,10 @@ def _compute_tool_definitions(
else:
if not quiet_mode:
print(f"⚠️ Unknown toolset: {toolset_name}")
else:
from toolsets import get_all_toolsets
for ts_name in get_all_toolsets():
tools_to_include.update(resolve_toolset(ts_name))
# Plugin-registered tools are now resolved through the normal toolset
# path — validate_toolset() / resolve_toolset() / get_all_toolsets()
@@ -2960,7 +2960,7 @@ class Migrator:
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Migrate OpenClaw user state into Hermes Agent.")
parser.add_argument("--source", default=str(Path.home() / ".openclaw"), help="OpenClaw home directory")
parser.add_argument("--target", default=os.environ.get("HERMES_HOME") or str(Path.home() / ".hermes"), help="Hermes home directory")
parser.add_argument("--target", default=str(Path.home() / ".hermes"), help="Hermes home directory")
parser.add_argument(
"--workspace-target",
help="Optional workspace root where the workspace instructions file should be copied",
@@ -1,217 +0,0 @@
---
name: here.now
description: Publish static sites to {slug}.here.now and store private files in cloud Drives for agent-to-agent handoff.
version: 1.15.3
author: here.now
license: MIT
prerequisites:
commands: [curl, file, jq]
platforms: [macos, linux]
metadata:
hermes:
tags: [here.now, herenow, publish, deploy, hosting, static-site, web, share, URL, drive, storage]
homepage: https://here.now
requires_toolsets: [terminal]
---
# here.now
here.now lets agents publish websites and store private files in cloud Drives.
Use here.now for two jobs:
- **Sites**: publish websites and files at `{slug}.here.now`.
- **Drives**: store private agent files in cloud folders.
## Current docs
**Before answering questions about here.now capabilities, features, or workflows, read the current docs:**
→ **https://here.now/docs**
Read the docs:
- at the first here.now-related interaction in a conversation
- any time the user asks how to do something
- any time the user asks what is possible, supported, or recommended
- before telling the user a feature is unsupported
Topics that require current docs (do not rely on local skill text alone):
- Drives and Drive sharing
- custom domains
- payments and payment gating
- forking
- proxy routes and service variables
- handles and links
- limits and quotas
- SPA routing
- error handling and remediation
- feature availability
**If docs and live API behavior disagree, trust the live API behavior.**
If the docs fetch fails or times out, continue with the local skill and live API/script output. Prefer live API behavior for active operations.
## Requirements
- Required binaries: `curl`, `file`, `jq`
- Optional environment variable: `$HERENOW_API_KEY`
- Optional Drive token variable: `$HERENOW_DRIVE_TOKEN`
- Optional credentials file: `~/.herenow/credentials`
- Skill helper paths:
- `${HERMES_SKILL_DIR}/scripts/publish.sh` for publishing sites
- `${HERMES_SKILL_DIR}/scripts/drive.sh` for private Drive storage
## Create a site
```bash
PUBLISH="${HERMES_SKILL_DIR}/scripts/publish.sh"
bash "$PUBLISH" {file-or-dir} --client hermes
```
Outputs the live URL (e.g. `https://bright-canvas-a7k2.here.now/`).
Under the hood this is a three-step flow: create/update -> upload files -> finalize. A site is not live until finalize succeeds.
Without an API key this creates an **anonymous site** that expires in 24 hours.
With a saved API key, the site is permanent.
**File structure:** For HTML sites, place `index.html` at the root of the directory you publish, not inside a subdirectory. The directory's contents become the site root. For example, publish `my-site/` where `my-site/index.html` exists — don't publish a parent folder that contains `my-site/`.
You can also publish raw files without any HTML. Single files get a rich auto-viewer (images, PDF, video, audio). Multiple files get an auto-generated directory listing with folder navigation and an image gallery.
## Update an existing site
```bash
PUBLISH="${HERMES_SKILL_DIR}/scripts/publish.sh"
bash "$PUBLISH" {file-or-dir} --slug {slug} --client hermes
```
The script auto-loads the `claimToken` from `.herenow/state.json` when updating anonymous sites. Pass `--claim-token {token}` to override.
Authenticated updates require a saved API key.
## Use a Drive
Use a Drive when the user wants private cloud storage for agent files: documents, context, memory, plans, assets, media, research, code, and anything else that should persist without being published as a website.
Every signed-in account has a default Drive named `My Drive`.
```bash
DRIVE="${HERMES_SKILL_DIR}/scripts/drive.sh"
bash "$DRIVE" default
bash "$DRIVE" ls "My Drive"
bash "$DRIVE" put "My Drive" notes/today.md --from ./notes/today.md
bash "$DRIVE" cat "My Drive" notes/today.md
bash "$DRIVE" share "My Drive" --perms write --prefix notes/ --ttl 7d
```
Use scoped Drive tokens for agent-to-agent handoff. If you receive a `herenow_drive` share block, use its `token` as `Authorization: Bearer <token>` against `api_base`, respect `pathPrefix` when present, and preserve ETags on writes. A `pathPrefix` of `null` means full-Drive access. If the skill is available, prefer `drive.sh`; otherwise call the listed API operations directly.
## API key storage
The publish script reads the API key from these sources (first match wins):
1. `--api-key {key}` flag (CI/scripting only — avoid in interactive use)
2. `$HERENOW_API_KEY` environment variable
3. `~/.herenow/credentials` file (recommended for agents)
To store a key, write it to the credentials file:
```bash
mkdir -p ~/.herenow && echo "{API_KEY}" > ~/.herenow/credentials && chmod 600 ~/.herenow/credentials
```
**IMPORTANT**: After receiving an API key, save it immediately — run the command above yourself. Do not ask the user to run it manually. Avoid passing the key via CLI flags (e.g. `--api-key`) in interactive sessions; the credentials file is the preferred storage method.
Never commit credentials or local state files (`~/.herenow/credentials`, `.herenow/state.json`) to source control.
## Getting an API key
To upgrade from anonymous (24h) to permanent sites:
1. Ask the user for their email address.
2. Request a one-time sign-in code:
```bash
curl -sS https://here.now/api/auth/agent/request-code \
-H "content-type: application/json" \
-d '{"email": "user@example.com"}'
```
3. Tell the user: "Check your inbox for a sign-in code from here.now and paste it here."
4. Verify the code and get the API key:
```bash
curl -sS https://here.now/api/auth/agent/verify-code \
-H "content-type: application/json" \
-d '{"email":"user@example.com","code":"ABCD-2345"}'
```
5. Save the returned `apiKey` yourself (do not ask the user to do this):
```bash
mkdir -p ~/.herenow && echo "{API_KEY}" > ~/.herenow/credentials && chmod 600 ~/.herenow/credentials
```
## State file
After every site create/update, the script writes to `.herenow/state.json` in the working directory:
```json
{
"publishes": {
"bright-canvas-a7k2": {
"siteUrl": "https://bright-canvas-a7k2.here.now/",
"claimToken": "abc123",
"claimUrl": "https://here.now/claim?slug=bright-canvas-a7k2&token=abc123",
"expiresAt": "2026-02-18T01:00:00.000Z"
}
}
}
```
Before creating or updating sites, you may check this file to find prior slugs.
Treat `.herenow/state.json` as internal cache only.
Never present this local file path as a URL, and never use it as source of truth for auth mode, expiry, or claim URL.
## What to tell the user
For published sites:
- Always share the `siteUrl` from the current script run.
- Read and follow `publish_result.*` lines from script stderr to determine auth mode.
- When `publish_result.auth_mode=authenticated`: tell the user the site is **permanent** and saved to their account. No claim URL is needed.
- When `publish_result.auth_mode=anonymous`: tell the user the site **expires in 24 hours**. Share the claim URL (if `publish_result.claim_url` is non-empty and starts with `https://`) so they can keep it permanently. Warn that claim tokens are only returned once and cannot be recovered.
- Never tell the user to inspect `.herenow/state.json` for claim URLs or auth status.
For Drives:
- Do not describe Drive files as public URLs.
- Tell the user Drive contents are private unless shared with a scoped token.
- When sharing access with another agent, prefer a scoped token with a narrow `pathPrefix` and short TTL.
## publish.sh options
| Flag | Description |
| ---------------------- | -------------------------------------------- |
| `--slug {slug}` | Update an existing site instead of creating |
| `--claim-token {token}`| Override claim token for anonymous updates |
| `--title {text}` | Viewer title (non-HTML sites) |
| `--description {text}` | Viewer description |
| `--ttl {seconds}` | Set expiry (authenticated only) |
| `--client {name}` | Agent name for attribution (e.g. `hermes`) |
| `--base-url {url}` | API base URL (default: `https://here.now`) |
| `--allow-nonherenow-base-url` | Allow sending auth to non-default `--base-url` |
| `--api-key {key}` | API key override (prefer credentials file) |
| `--spa` | Enable SPA routing (serve index.html for unknown paths) |
| `--forkable` | Allow others to fork this site |
## Beyond publish.sh
For Drive operations, use `drive.sh` or the Drive API. For broader account and site management — delete, metadata, passwords, payments, domains, handles, links, variables, proxy routes, forking, duplication, and more — see the current docs:
→ **https://here.now/docs**
Full docs: https://here.now/docs
@@ -1,406 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
BASE_URL="https://here.now"
CREDENTIALS_FILE="$HOME/.herenow/credentials"
API_KEY="${HERENOW_API_KEY:-}"
DRIVE_TOKEN="${HERENOW_DRIVE_TOKEN:-}"
ALLOW_NON_HERENOW_BASE_URL=0
MAX_FILE_BYTES=$((500 * 1024 * 1024))
usage() {
cat <<'USAGE'
Usage: drive.sh [global options] <command> [args]
Global options:
--api-key <key> Account API key (or $HERENOW_API_KEY / ~/.herenow/credentials)
--token <drv_live_...> Drive token (or $HERENOW_DRIVE_TOKEN)
--base-url <url> API base (default: https://here.now)
--allow-nonherenow-base-url
Commands:
create [name] [--default]
default
ls
ls <drive> [prefix]
cat <drive> <path>
put <drive> <path> --from <local-file>
import <drive> <prefix> --from <local-folder> [--dry-run]
export <drive> <prefix> --to <local-folder> [--dry-run]
rm <drive> <path> [--recursive --confirm <path>]
share <drive> --perms read|write [--prefix notes/] [--ttl 30d] [--label text] [--manage-tokens]
tokens <drive>
revoke <drive> <tokenId>
delete <drive> --confirm "<drive name>"
USAGE
exit 1
}
die() { echo "error: $1" >&2; exit 1; }
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
BUNDLED_JQ="${SKILL_DIR}/bin/jq"
if [[ -x "$BUNDLED_JQ" ]]; then
JQ_BIN="$BUNDLED_JQ"
elif command -v jq >/dev/null 2>&1; then
JQ_BIN="$(command -v jq)"
else
die "requires jq"
fi
for cmd in curl file; do
command -v "$cmd" >/dev/null 2>&1 || die "requires $cmd"
done
while [[ $# -gt 0 ]]; do
case "$1" in
--api-key) API_KEY="$2"; shift 2 ;;
--token) DRIVE_TOKEN="$2"; shift 2 ;;
--base-url) BASE_URL="$2"; shift 2 ;;
--allow-nonherenow-base-url) ALLOW_NON_HERENOW_BASE_URL=1; shift ;;
--help|-h) usage ;;
--*) die "unknown global option: $1" ;;
*) break ;;
esac
done
CMD="${1:-}"
[[ -n "$CMD" ]] || usage
shift || true
if [[ -z "$API_KEY" && -z "$DRIVE_TOKEN" && -f "$CREDENTIALS_FILE" ]]; then
API_KEY=$(tr -d '[:space:]' < "$CREDENTIALS_FILE")
fi
BASE_URL="${BASE_URL%/}"
if [[ "$BASE_URL" != "https://here.now" && "$ALLOW_NON_HERENOW_BASE_URL" -ne 1 ]]; then
if [[ -n "$API_KEY" || -n "$DRIVE_TOKEN" ]]; then
die "refusing to send credentials to non-default base URL; pass --allow-nonherenow-base-url to override"
fi
fi
auth_header=()
if [[ -n "$DRIVE_TOKEN" ]]; then
auth_header=(-H "authorization: Bearer $DRIVE_TOKEN")
elif [[ -n "$API_KEY" ]]; then
auth_header=(-H "authorization: Bearer $API_KEY")
else
die "missing credentials; set HERENOW_API_KEY, HERENOW_DRIVE_TOKEN, or ~/.herenow/credentials"
fi
compute_sha256() {
local f="$1"
if command -v sha256sum >/dev/null 2>&1; then
sha256sum "$f" | cut -d' ' -f1
else
shasum -a 256 "$f" | cut -d' ' -f1
fi
}
guess_content_type() {
local f="$1"
case "${f##*.}" in
html|htm) echo "text/html; charset=utf-8" ;;
css) echo "text/css; charset=utf-8" ;;
js|mjs) echo "text/javascript; charset=utf-8" ;;
json) echo "application/json; charset=utf-8" ;;
md|txt) echo "text/plain; charset=utf-8" ;;
svg) echo "image/svg+xml" ;;
png) echo "image/png" ;;
jpg|jpeg) echo "image/jpeg" ;;
gif) echo "image/gif" ;;
webp) echo "image/webp" ;;
pdf) echo "application/pdf" ;;
*) file --brief --mime-type "$f" 2>/dev/null || echo "application/octet-stream" ;;
esac
}
api_json() {
local method="$1"; shift
local url="$1"; shift
local body="${1:-}"
local tmp
tmp=$(mktemp)
local code
if [[ -n "$body" ]]; then
code=$(curl -sS -o "$tmp" -w "%{http_code}" -X "$method" "$url" "${auth_header[@]}" -H "content-type: application/json" -d "$body")
else
code=$(curl -sS -o "$tmp" -w "%{http_code}" -X "$method" "$url" "${auth_header[@]}")
fi
if [[ "$code" -lt 200 || "$code" -ge 300 ]]; then
local err
err=$("$JQ_BIN" -r '.error // empty' "$tmp" 2>/dev/null || true)
[[ -n "$err" ]] || err="$(cat "$tmp")"
rm -f "$tmp"
die "HTTP $code: $err"
fi
cat "$tmp"
rm -f "$tmp"
}
urlenc() {
"$JQ_BIN" -nr --arg v "$1" '$v|@uri'
}
urlenc_path() {
local path="$1"
local out=""
local part
IFS='/' read -r -a parts <<< "$path"
for part in "${parts[@]}"; do
[[ -n "$out" ]] && out="$out/"
out="$out$(urlenc "$part")"
done
echo "$out"
}
resolve_drive() {
local name="$1"
if [[ "$name" == drv_* ]]; then
echo "$name"
return
fi
if [[ -n "$DRIVE_TOKEN" ]]; then
die "drive tokens must reference drives by drv_ id; use account credentials to resolve drive names"
fi
if [[ "$name" == "default" || "$name" == "my-drive" || "$name" == "My Drive" ]]; then
api_json GET "$BASE_URL/api/v1/drives/default" | "$JQ_BIN" -r '.drive.id'
return
fi
local rows count
rows=$(api_json GET "$BASE_URL/api/v1/drives" | "$JQ_BIN" --arg n "$name" '[.drives[] | select(.name == $n)]')
count=$(echo "$rows" | "$JQ_BIN" 'length')
[[ "$count" -eq 1 ]] || die "drive name '$name' matched $count drives; use a drv_ id"
echo "$rows" | "$JQ_BIN" -r '.[0].id'
}
drive_head() {
local id="$1"
api_json GET "$BASE_URL/api/v1/drives/$id" | "$JQ_BIN" -r '.drive.headVersionId // .headVersionId // empty'
}
file_meta() {
local id="$1"
local path="$2"
local prefix
prefix=$(urlenc "$path")
api_json GET "$BASE_URL/api/v1/drives/$id/files?prefix=$prefix&limit=200" | "$JQ_BIN" -c --arg p "$path" '.files[]? | select(.path == $p)' | head -n 1
}
put_file() {
local drive="$1"; shift
local path="$1"; shift
local local_file=""
while [[ $# -gt 0 ]]; do
case "$1" in
--from) local_file="$2"; shift 2 ;;
*) die "unexpected put argument: $1" ;;
esac
done
[[ -f "$local_file" ]] || die "--from must be a file"
local id sz ct sha meta body upload upload_url upload_id http_code
id=$(resolve_drive "$drive")
sz=$(wc -c < "$local_file" | tr -d ' ')
[[ "$sz" -le "$MAX_FILE_BYTES" ]] || die "$path exceeds the $MAX_FILE_BYTES byte Drive file limit"
ct=$(guess_content_type "$local_file")
sha=$(compute_sha256 "$local_file")
meta=$(file_meta "$id" "$path" || true)
body=$("$JQ_BIN" -n --arg p "$path" --argjson s "$sz" --arg c "$ct" --arg sha "$sha" \
'{path:$p,size:$s,contentType:$c,sha256:$sha}')
if [[ -n "$meta" ]]; then
etag=$(echo "$meta" | "$JQ_BIN" -r '.etag')
body=$(echo "$body" | "$JQ_BIN" --arg e "$etag" '.ifMatch = $e')
else
body=$(echo "$body" | "$JQ_BIN" '.ifNoneMatch = "*"')
fi
upload=$(api_json POST "$BASE_URL/api/v1/drives/$id/files/uploads" "$body")
upload_url=$(echo "$upload" | "$JQ_BIN" -r '.uploadUrl')
upload_id=$(echo "$upload" | "$JQ_BIN" -r '.uploadId')
http_code=$(curl -sS -o /dev/null -w "%{http_code}" -X PUT "$upload_url" -H "Content-Type: $ct" --data-binary "@$local_file")
[[ "$http_code" -ge 200 && "$http_code" -lt 300 ]] || die "upload failed for $path (HTTP $http_code)"
api_json POST "$BASE_URL/api/v1/drives/$id/files/finalize" "$("$JQ_BIN" -n --arg u "$upload_id" '{uploadId:$u}')" | "$JQ_BIN" .
}
case "$CMD" in
create)
name=""
is_default="false"
while [[ $# -gt 0 ]]; do
case "$1" in
--default) is_default="true"; shift ;;
*) [[ -z "$name" ]] && name="$1" || die "unexpected argument: $1"; shift ;;
esac
done
body=$("$JQ_BIN" -n --arg n "$name" --argjson d "$is_default" '{isDefault:$d} + (if $n == "" then {} else {name:$n} end)')
api_json POST "$BASE_URL/api/v1/drives" "$body" | "$JQ_BIN" .
;;
default)
api_json GET "$BASE_URL/api/v1/drives/default" | "$JQ_BIN" .
;;
ls)
if [[ $# -eq 0 ]]; then
[[ -z "$DRIVE_TOKEN" ]] || die "drive tokens cannot list drives; pass a drv_ id"
api_json GET "$BASE_URL/api/v1/drives" | "$JQ_BIN" .
else
id=$(resolve_drive "$1")
prefix="${2:-}"
api_json GET "$BASE_URL/api/v1/drives/$id/files?prefix=$(urlenc "$prefix")" | "$JQ_BIN" .
fi
;;
cat)
[[ $# -eq 2 ]] || die "usage: drive.sh cat <drive> <path>"
id=$(resolve_drive "$1")
curl -fsS "$BASE_URL/api/v1/drives/$id/files/$(urlenc_path "$2")" "${auth_header[@]}"
;;
put)
[[ $# -ge 2 ]] || die "usage: drive.sh put <drive> <path> --from <local-file>"
put_file "$@"
;;
import)
[[ $# -ge 2 ]] || die "usage: drive.sh import <drive> <prefix> --from <local-folder> [--dry-run]"
drive="$1"; prefix="${2%/}"; shift 2
from=""; dry=0
while [[ $# -gt 0 ]]; do
case "$1" in
--from) from="$2"; shift 2 ;;
--dry-run) dry=1; shift ;;
*) die "unexpected import argument: $1" ;;
esac
done
[[ -d "$from" ]] || die "--from must be a folder"
uploaded=0
skipped=0
failed=0
planned=0
while IFS= read -r -d '' f; do
rel="${f#$from/}"
[[ "$rel" == .git/* || "$rel" == node_modules/* || "$rel" == ".DS_Store" || "$rel" == */.DS_Store ]] && continue
planned=$((planned + 1))
sz=$(wc -c < "$f" | tr -d ' ')
if [[ "$sz" -gt "$MAX_FILE_BYTES" ]]; then
echo "skip oversized $f ($sz bytes > $MAX_FILE_BYTES)" >&2
skipped=$((skipped + 1))
continue
fi
dest="$rel"
[[ -n "$prefix" ]] && dest="$prefix/$rel"
if [[ "$dry" -eq 1 ]]; then
echo "upload $f -> $dest"
skipped=$((skipped + 1))
else
if (put_file "$drive" "$dest" --from "$f" >/dev/null); then
uploaded=$((uploaded + 1))
else
failed=$((failed + 1))
fi
fi
done < <(find "$from" -type f -print0 | sort -z)
echo "planned=$planned uploaded=$uploaded skipped=$skipped failed=$failed"
[[ "$failed" -eq 0 ]] || exit 1
;;
export)
[[ $# -ge 2 ]] || die "usage: drive.sh export <drive> <prefix> --to <local-folder> [--dry-run]"
id=$(resolve_drive "$1"); prefix="${2%/}"; shift 2
to=""; dry=0
while [[ $# -gt 0 ]]; do
case "$1" in
--to) to="$2"; shift 2 ;;
--dry-run) dry=1; shift ;;
*) die "unexpected export argument: $1" ;;
esac
done
[[ -n "$to" ]] || die "--to is required"
cursor=""
total=0
while true; do
url="$BASE_URL/api/v1/drives/$id/files?prefix=$(urlenc "$prefix")&limit=200"
[[ -n "$cursor" ]] && url="$url&cursor=$(urlenc "$cursor")"
files=$(api_json GET "$url")
while IFS= read -r p; do
[[ -n "$p" ]] || continue
rel="$p"
[[ -n "$prefix" ]] && rel="${p#$prefix/}"
out="$to/$rel"
if [[ "$dry" -eq 1 ]]; then
echo "download $p -> $out"
else
mkdir -p "$(dirname "$out")"
curl -fsS "$BASE_URL/api/v1/drives/$id/files/$(urlenc_path "$p")" "${auth_header[@]}" -o "$out"
fi
total=$((total + 1))
done < <(echo "$files" | "$JQ_BIN" -r '.files[].path')
cursor=$(echo "$files" | "$JQ_BIN" -r '.nextCursor // empty')
[[ -n "$cursor" ]] || break
done
echo "files=$total"
;;
rm)
[[ $# -ge 2 ]] || die "usage: drive.sh rm <drive> <path> [--recursive --confirm <path>]"
id=$(resolve_drive "$1"); path="$2"; shift 2
recursive=0; confirm=""
while [[ $# -gt 0 ]]; do
case "$1" in
--recursive) recursive=1; shift ;;
--confirm) confirm="$2"; shift 2 ;;
*) die "unexpected rm argument: $1" ;;
esac
done
if [[ "$recursive" -eq 1 ]]; then
[[ "$confirm" == "$path" ]] || die "recursive delete requires --confirm '$path'"
head=$(drive_head "$id")
api_json DELETE "$BASE_URL/api/v1/drives/$id/files/$(urlenc_path "$path")?recursive=true&baseVersionId=$(urlenc "$head")" | "$JQ_BIN" .
else
meta=$(file_meta "$id" "$path")
etag=$(echo "$meta" | "$JQ_BIN" -r '.etag')
curl -fsS -X DELETE "$BASE_URL/api/v1/drives/$id/files/$(urlenc_path "$path")" "${auth_header[@]}" -H "If-Match: $etag" | "$JQ_BIN" .
fi
;;
share)
[[ $# -ge 1 ]] || die "usage: drive.sh share <drive> --perms read|write [--prefix notes/] [--ttl 30d] [--label text] [--manage-tokens]"
id=$(resolve_drive "$1"); shift
perms="write"; prefix=""; ttl=""; label=""; manage_tokens="false"
while [[ $# -gt 0 ]]; do
case "$1" in
--perms) perms="$2"; shift 2 ;;
--prefix) prefix="$2"; shift 2 ;;
--ttl) ttl="$2"; shift 2 ;;
--label) label="$2"; shift 2 ;;
--manage-tokens) manage_tokens="true"; shift ;;
*) die "unexpected share argument: $1" ;;
esac
done
body=$("$JQ_BIN" -n --arg p "$perms" --arg pp "$prefix" --arg ttl "$ttl" --arg label "$label" --argjson mt "$manage_tokens" \
'{perms:$p} + (if $mt then {manageTokens:true} else {} end) + (if $ttl == "" then {} else {ttl:$ttl} end) + (if $pp == "" then {} else {pathPrefix:$pp} end) + (if $label == "" then {} else {label:$label} end)')
api_json POST "$BASE_URL/api/v1/drives/$id/tokens" "$body" | "$JQ_BIN" -r '.shareBlock'
;;
tokens)
[[ $# -eq 1 ]] || die "usage: drive.sh tokens <drive>"
id=$(resolve_drive "$1")
api_json GET "$BASE_URL/api/v1/drives/$id/tokens" | "$JQ_BIN" .
;;
revoke)
[[ $# -eq 2 ]] || die "usage: drive.sh revoke <drive> <tokenId>"
id=$(resolve_drive "$1")
api_json DELETE "$BASE_URL/api/v1/drives/$id/tokens/$2" | "$JQ_BIN" .
;;
delete)
[[ $# -ge 1 ]] || die "usage: drive.sh delete <drive> --confirm <drive name>"
id=$(resolve_drive "$1"); shift
confirm=""
while [[ $# -gt 0 ]]; do
case "$1" in
--confirm) confirm="$2"; shift 2 ;;
*) die "unexpected delete argument: $1" ;;
esac
done
drive=$(api_json GET "$BASE_URL/api/v1/drives/$id")
name=$(echo "$drive" | "$JQ_BIN" -r '.drive.name')
[[ "$confirm" == "$name" ]] || die "delete requires --confirm '$name'"
api_json DELETE "$BASE_URL/api/v1/drives/$id" | "$JQ_BIN" .
;;
*)
die "unknown command: $CMD"
;;
esac
@@ -1,445 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
BASE_URL="https://here.now"
CREDENTIALS_FILE="$HOME/.herenow/credentials"
API_KEY="${HERENOW_API_KEY:-}"
API_KEY_SOURCE="none"
if [[ -n "${HERENOW_API_KEY:-}" ]]; then
API_KEY_SOURCE="env"
fi
ALLOW_NON_HERENOW_BASE_URL=0
SLUG=""
CLAIM_TOKEN=""
TITLE=""
DESCRIPTION=""
TTL=""
CLIENT=""
TARGET=""
FORKABLE=""
SPA_MODE=""
FROM_DRIVE=""
DRIVE_VERSION=""
usage() {
cat <<'USAGE'
Usage: publish.sh <file-or-dir> [options]
Options:
--api-key <key> API key (or set $HERENOW_API_KEY)
--slug <slug> Update existing publish
--claim-token <token> Claim token for anonymous updates
--title <text> Viewer title
--description <text> Viewer description
--ttl <seconds> Expiry (authenticated only)
--client <name> Agent name for attribution (e.g. cursor, claude-code)
--forkable Allow others to fork this site
--spa Enable SPA routing
--from-drive <drv_...> Publish a Drive snapshot instead of local files
--version <dv_...> Drive version for --from-drive (default: current head)
--base-url <url> API base (default: https://here.now)
--allow-nonherenow-base-url
Allow auth requests to non-default API base URL
USAGE
exit 1
}
die() { echo "error: $1" >&2; exit 1; }
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
BUNDLED_JQ="${SKILL_DIR}/bin/jq"
if [[ -x "$BUNDLED_JQ" ]]; then
JQ_BIN="$BUNDLED_JQ"
elif command -v jq >/dev/null 2>&1; then
JQ_BIN="$(command -v jq)"
else
die "requires jq"
fi
for cmd in curl file; do
command -v "$cmd" >/dev/null 2>&1 || die "requires $cmd"
done
while [[ $# -gt 0 ]]; do
case "$1" in
--api-key) API_KEY="$2"; API_KEY_SOURCE="flag"; shift 2 ;;
--slug) SLUG="$2"; shift 2 ;;
--claim-token) CLAIM_TOKEN="$2"; shift 2 ;;
--title) TITLE="$2"; shift 2 ;;
--description) DESCRIPTION="$2"; shift 2 ;;
--ttl) TTL="$2"; shift 2 ;;
--client) CLIENT="$2"; shift 2 ;;
--base-url) BASE_URL="$2"; shift 2 ;;
--allow-nonherenow-base-url) ALLOW_NON_HERENOW_BASE_URL=1; shift ;;
--forkable) FORKABLE="true"; shift ;;
--spa) SPA_MODE="true"; shift ;;
--from-drive) FROM_DRIVE="$2"; shift 2 ;;
--version) DRIVE_VERSION="$2"; shift 2 ;;
--help|-h) usage ;;
-*) die "unknown option: $1" ;;
*) [[ -z "$TARGET" ]] && TARGET="$1" || die "unexpected argument: $1"; shift ;;
esac
done
if [[ -n "$FROM_DRIVE" ]]; then
[[ -z "$TARGET" ]] || die "--from-drive does not accept a local file-or-dir argument"
else
[[ -n "$TARGET" ]] || usage
[[ -e "$TARGET" ]] || die "path does not exist: $TARGET"
fi
# Load API key from credentials file if not provided via flag or env
if [[ -z "$API_KEY" && -f "$CREDENTIALS_FILE" ]]; then
API_KEY=$(cat "$CREDENTIALS_FILE" | tr -d '[:space:]')
[[ -n "$API_KEY" ]] && API_KEY_SOURCE="credentials"
fi
BASE_URL="${BASE_URL%/}"
STATE_DIR=".herenow"
STATE_FILE="$STATE_DIR/state.json"
# Safety guard: avoid accidentally sending bearer auth to arbitrary endpoints.
if [[ -n "$API_KEY" && "$BASE_URL" != "https://here.now" && "$ALLOW_NON_HERENOW_BASE_URL" -ne 1 ]]; then
die "refusing to send API key to non-default base URL; pass --allow-nonherenow-base-url to override"
fi
# Auto-load claim token from state file for anonymous updates
if [[ -n "$SLUG" && -z "$CLAIM_TOKEN" && -z "$API_KEY" && -f "$STATE_FILE" ]]; then
CLAIM_TOKEN=$("$JQ_BIN" -r --arg s "$SLUG" '.publishes[$s].claimToken // empty' "$STATE_FILE" 2>/dev/null || true)
fi
if [[ -n "$FROM_DRIVE" ]]; then
[[ -n "$API_KEY" ]] || die "--from-drive requires an account API key"
BODY=$("$JQ_BIN" -n --arg d "$FROM_DRIVE" '{driveId:$d}')
[[ -n "$DRIVE_VERSION" ]] && BODY=$(echo "$BODY" | "$JQ_BIN" --arg v "$DRIVE_VERSION" '.versionId = $v')
[[ -n "$SLUG" ]] && BODY=$(echo "$BODY" | "$JQ_BIN" --arg s "$SLUG" '.slug = $s')
if [[ -n "$TITLE" || -n "$DESCRIPTION" ]]; then
viewer="{}"
[[ -n "$TITLE" ]] && viewer=$(echo "$viewer" | "$JQ_BIN" --arg t "$TITLE" '.title = $t')
[[ -n "$DESCRIPTION" ]] && viewer=$(echo "$viewer" | "$JQ_BIN" --arg d "$DESCRIPTION" '.description = $d')
BODY=$(echo "$BODY" | "$JQ_BIN" --argjson v "$viewer" '.viewer = $v')
fi
[[ "$FORKABLE" == "true" ]] && BODY=$(echo "$BODY" | "$JQ_BIN" '.forkable = true')
[[ "$SPA_MODE" == "true" ]] && BODY=$(echo "$BODY" | "$JQ_BIN" '.spaMode = true')
CLIENT_HEADER_VALUE="here-now-publish-sh"
if [[ -n "$CLIENT" ]]; then
normalized_client=$(echo "$CLIENT" | tr '[:upper:]' '[:lower:]' | tr -cs 'a-z0-9._-' '-')
normalized_client="${normalized_client#-}"
normalized_client="${normalized_client%-}"
if [[ -n "$normalized_client" ]]; then
CLIENT_HEADER_VALUE="${normalized_client}/publish-sh"
fi
fi
echo "publishing from Drive..." >&2
RESPONSE=$(curl -sS -X POST "$BASE_URL/api/v1/publish/from-drive" \
-H "authorization: Bearer $API_KEY" \
-H "x-herenow-client: $CLIENT_HEADER_VALUE" \
-H "content-type: application/json" \
-d "$BODY")
if echo "$RESPONSE" | "$JQ_BIN" -e '.error' >/dev/null 2>&1; then
err=$(echo "$RESPONSE" | "$JQ_BIN" -r '.error')
die "$err"
fi
SITE_URL=$(echo "$RESPONSE" | "$JQ_BIN" -r '.siteUrl')
OUT_SLUG=$(echo "$RESPONSE" | "$JQ_BIN" -r '.slug')
CURRENT_VERSION=$(echo "$RESPONSE" | "$JQ_BIN" -r '.currentVersionId')
DRIVE_VERSION_OUT=$(echo "$RESPONSE" | "$JQ_BIN" -r '.driveVersionId')
echo "$SITE_URL"
echo "" >&2
echo "publish_result.site_url=$SITE_URL" >&2
echo "publish_result.slug=$OUT_SLUG" >&2
echo "publish_result.action=from_drive" >&2
echo "publish_result.auth_mode=authenticated" >&2
echo "publish_result.api_key_source=$API_KEY_SOURCE" >&2
echo "publish_result.persistence=permanent" >&2
echo "publish_result.drive_id=$FROM_DRIVE" >&2
echo "publish_result.drive_version_id=$DRIVE_VERSION_OUT" >&2
echo "publish_result.current_version_id=$CURRENT_VERSION" >&2
exit 0
fi
compute_sha256() {
local f="$1"
if command -v sha256sum >/dev/null 2>&1; then
sha256sum "$f" | cut -d' ' -f1
else
shasum -a 256 "$f" | cut -d' ' -f1
fi
}
guess_content_type() {
local f="$1"
case "${f##*.}" in
html|htm) echo "text/html; charset=utf-8" ;;
css) echo "text/css; charset=utf-8" ;;
js|mjs) echo "text/javascript; charset=utf-8" ;;
json) echo "application/json; charset=utf-8" ;;
md|txt) echo "text/plain; charset=utf-8" ;;
svg) echo "image/svg+xml" ;;
png) echo "image/png" ;;
jpg|jpeg) echo "image/jpeg" ;;
gif) echo "image/gif" ;;
webp) echo "image/webp" ;;
pdf) echo "application/pdf" ;;
mp4) echo "video/mp4" ;;
mov) echo "video/quicktime" ;;
mp3) echo "audio/mpeg" ;;
wav) echo "audio/wav" ;;
xml) echo "application/xml" ;;
woff2) echo "font/woff2" ;;
woff) echo "font/woff" ;;
ttf) echo "font/ttf" ;;
ico) echo "image/x-icon" ;;
*)
local detected
detected=$(file --brief --mime-type "$f" 2>/dev/null || echo "application/octet-stream")
echo "$detected"
;;
esac
}
# Build file manifest as JSON array
FILES_JSON="[]"
if [[ -f "$TARGET" ]]; then
sz=$(wc -c < "$TARGET" | tr -d ' ')
ct=$(guess_content_type "$TARGET")
bn=$(basename "$TARGET")
h=$(compute_sha256 "$TARGET")
FILES_JSON=$("$JQ_BIN" -n --arg p "$bn" --argjson s "$sz" --arg c "$ct" --arg h "$h" \
'[{"path":$p,"size":$s,"contentType":$c,"hash":$h}]')
FILE_MAP=$("$JQ_BIN" -n --arg p "$bn" --arg a "$(cd "$(dirname "$TARGET")" && pwd)/$(basename "$TARGET")" \
'{($p):$a}')
elif [[ -d "$TARGET" ]]; then
FILE_MAP="{}"
while IFS= read -r -d '' f; do
rel="${f#$TARGET/}"
[[ "$rel" == ".DS_Store" ]] && continue
[[ "$(basename "$rel")" == ".DS_Store" ]] && continue
[[ "$rel" == ".herenow/fork-meta.json" ]] && continue
sz=$(wc -c < "$f" | tr -d ' ')
ct=$(guess_content_type "$f")
h=$(compute_sha256 "$f")
abs=$(cd "$(dirname "$f")" && pwd)/$(basename "$f")
FILES_JSON=$(echo "$FILES_JSON" | "$JQ_BIN" --arg p "$rel" --argjson s "$sz" --arg c "$ct" --arg h "$h" \
'. + [{"path":$p,"size":$s,"contentType":$c,"hash":$h}]')
FILE_MAP=$(echo "$FILE_MAP" | "$JQ_BIN" --arg p "$rel" --arg a "$abs" '. + {($p):$a}')
done < <(find "$TARGET" -type f -print0 | sort -z)
else
die "not a file or directory: $TARGET"
fi
file_count=$(echo "$FILES_JSON" | "$JQ_BIN" 'length')
[[ "$file_count" -gt 0 ]] || die "no files found"
# Read fork-meta.json defaults if present and no explicit flags given
FORK_META=""
if [[ -d "$TARGET" ]]; then
FORK_META_PATH="$TARGET/.herenow/fork-meta.json"
if [[ -f "$FORK_META_PATH" ]]; then
FORK_META=$(cat "$FORK_META_PATH")
if [[ -z "$FORKABLE" ]]; then
FORKABLE=$("$JQ_BIN" -r '.forkable // empty' <<< "$FORK_META" 2>/dev/null || true)
fi
fi
fi
# Build request body
BODY=$(echo "$FILES_JSON" | "$JQ_BIN" '{files: .}')
if [[ -n "$TTL" ]]; then
BODY=$(echo "$BODY" | "$JQ_BIN" --argjson t "$TTL" '.ttlSeconds = $t')
fi
if [[ -n "$TITLE" || -n "$DESCRIPTION" ]]; then
viewer="{}"
[[ -n "$TITLE" ]] && viewer=$(echo "$viewer" | "$JQ_BIN" --arg t "$TITLE" '.title = $t')
[[ -n "$DESCRIPTION" ]] && viewer=$(echo "$viewer" | "$JQ_BIN" --arg d "$DESCRIPTION" '.description = $d')
BODY=$(echo "$BODY" | "$JQ_BIN" --argjson v "$viewer" '.viewer = $v')
fi
if [[ -n "$CLAIM_TOKEN" && -n "$SLUG" && -z "$API_KEY" ]]; then
BODY=$(echo "$BODY" | "$JQ_BIN" --arg ct "$CLAIM_TOKEN" '.claimToken = $ct')
fi
if [[ "$FORKABLE" == "true" ]]; then
BODY=$(echo "$BODY" | "$JQ_BIN" '.forkable = true')
fi
if [[ "$SPA_MODE" == "true" ]]; then
BODY=$(echo "$BODY" | "$JQ_BIN" '.spaMode = true')
fi
# Determine endpoint and method
if [[ -n "$SLUG" ]]; then
URL="$BASE_URL/api/v1/publish/$SLUG"
METHOD="PUT"
else
URL="$BASE_URL/api/v1/publish"
METHOD="POST"
fi
# Build auth header
AUTH_ARGS=()
if [[ -n "$API_KEY" ]]; then
AUTH_ARGS=(-H "authorization: Bearer $API_KEY")
fi
AUTH_MODE="anonymous"
if [[ -n "$API_KEY" ]]; then
AUTH_MODE="authenticated"
fi
CLIENT_HEADER_VALUE="here-now-publish-sh"
if [[ -n "$CLIENT" ]]; then
normalized_client=$(echo "$CLIENT" | tr '[:upper:]' '[:lower:]' | tr -cs 'a-z0-9._-' '-')
normalized_client="${normalized_client#-}"
normalized_client="${normalized_client%-}"
if [[ -n "$normalized_client" ]]; then
CLIENT_HEADER_VALUE="${normalized_client}/publish-sh"
fi
fi
CLIENT_ARGS=(-H "x-herenow-client: $CLIENT_HEADER_VALUE")
# Step 1: Create/update publish
echo "creating publish ($file_count files)..." >&2
RESPONSE=$(curl -sS -X "$METHOD" "$URL" \
"${AUTH_ARGS[@]+"${AUTH_ARGS[@]}"}" \
"${CLIENT_ARGS[@]+"${CLIENT_ARGS[@]}"}" \
-H "content-type: application/json" \
-d "$BODY")
# Check for errors
if echo "$RESPONSE" | "$JQ_BIN" -e '.error' >/dev/null 2>&1; then
err=$(echo "$RESPONSE" | "$JQ_BIN" -r '.error')
details=$(echo "$RESPONSE" | "$JQ_BIN" -r '.details // empty')
die "$err${details:+ ($details)}"
fi
OUT_SLUG=$(echo "$RESPONSE" | "$JQ_BIN" -r '.slug')
VERSION_ID=$(echo "$RESPONSE" | "$JQ_BIN" -r '.upload.versionId')
FINALIZE_URL=$(echo "$RESPONSE" | "$JQ_BIN" -r '.upload.finalizeUrl')
SITE_URL=$(echo "$RESPONSE" | "$JQ_BIN" -r '.siteUrl')
UPLOAD_COUNT=$(echo "$RESPONSE" | "$JQ_BIN" '.upload.uploads | length')
SKIPPED_COUNT=$(echo "$RESPONSE" | "$JQ_BIN" '.upload.skipped // [] | length')
[[ "$OUT_SLUG" != "null" ]] || die "unexpected response: $RESPONSE"
# Step 2: Upload files (skipped files are unchanged from previous version)
if [[ "$SKIPPED_COUNT" -gt 0 ]]; then
echo "uploading $UPLOAD_COUNT files ($SKIPPED_COUNT unchanged, skipped)..." >&2
else
echo "uploading $UPLOAD_COUNT files..." >&2
fi
upload_errors=0
for i in $(seq 0 $((UPLOAD_COUNT - 1))); do
upload_path=$(echo "$RESPONSE" | "$JQ_BIN" -r ".upload.uploads[$i].path")
upload_url=$(echo "$RESPONSE" | "$JQ_BIN" -r ".upload.uploads[$i].url")
upload_ct=$(echo "$RESPONSE" | "$JQ_BIN" -r ".upload.uploads[$i].headers[\"Content-Type\"] // empty")
if [[ -f "$TARGET" && ! -d "$TARGET" ]]; then
local_file="$TARGET"
else
local_file=$(echo "$FILE_MAP" | "$JQ_BIN" -r --arg p "$upload_path" '.[$p]')
fi
if [[ ! -f "$local_file" ]]; then
echo "warning: missing local file for $upload_path" >&2
upload_errors=$((upload_errors + 1))
continue
fi
ct_args=()
[[ -n "$upload_ct" ]] && ct_args=(-H "Content-Type: $upload_ct")
http_code=$(curl -sS -o /dev/null -w "%{http_code}" -X PUT "$upload_url" \
"${ct_args[@]+"${ct_args[@]}"}" \
--data-binary "@$local_file")
if [[ "$http_code" -lt 200 || "$http_code" -ge 300 ]]; then
echo "warning: upload failed for $upload_path (HTTP $http_code)" >&2
upload_errors=$((upload_errors + 1))
fi
done
[[ "$upload_errors" -eq 0 ]] || die "$upload_errors file(s) failed to upload"
# Step 3: Finalize
echo "finalizing..." >&2
FIN_RESPONSE=$(curl -sS -X POST "$FINALIZE_URL" \
"${AUTH_ARGS[@]+"${AUTH_ARGS[@]}"}" \
"${CLIENT_ARGS[@]+"${CLIENT_ARGS[@]}"}" \
-H "content-type: application/json" \
-d "{\"versionId\":\"$VERSION_ID\"}")
if echo "$FIN_RESPONSE" | "$JQ_BIN" -e '.error' >/dev/null 2>&1; then
err=$(echo "$FIN_RESPONSE" | "$JQ_BIN" -r '.error')
die "finalize failed: $err"
fi
# Save state
mkdir -p "$STATE_DIR"
if [[ -f "$STATE_FILE" ]]; then
STATE=$(cat "$STATE_FILE")
else
STATE='{"publishes":{}}'
fi
entry=$("$JQ_BIN" -n --arg s "$SITE_URL" '{siteUrl: $s}')
RESPONSE_CLAIM_TOKEN=$(echo "$RESPONSE" | "$JQ_BIN" -r '.claimToken // empty')
RESPONSE_CLAIM_URL=$(echo "$RESPONSE" | "$JQ_BIN" -r '.claimUrl // empty')
RESPONSE_EXPIRES=$(echo "$RESPONSE" | "$JQ_BIN" -r '.expiresAt // empty')
[[ -n "$RESPONSE_CLAIM_TOKEN" ]] && entry=$(echo "$entry" | "$JQ_BIN" --arg v "$RESPONSE_CLAIM_TOKEN" '.claimToken = $v')
[[ -n "$RESPONSE_CLAIM_URL" ]] && entry=$(echo "$entry" | "$JQ_BIN" --arg v "$RESPONSE_CLAIM_URL" '.claimUrl = $v')
[[ -n "$RESPONSE_EXPIRES" ]] && entry=$(echo "$entry" | "$JQ_BIN" --arg v "$RESPONSE_EXPIRES" '.expiresAt = $v')
STATE=$(echo "$STATE" | "$JQ_BIN" --arg slug "$OUT_SLUG" --argjson e "$entry" '.publishes[$slug] = $e')
echo "$STATE" | "$JQ_BIN" '.' > "$STATE_FILE"
# Output
echo "$SITE_URL"
PERSISTENCE="permanent"
if [[ "$AUTH_MODE" == "anonymous" ]]; then
PERSISTENCE="expires_24h"
elif [[ -n "$RESPONSE_EXPIRES" ]]; then
PERSISTENCE="expires_at"
fi
SAFE_CLAIM_URL=""
if [[ -n "$RESPONSE_CLAIM_URL" && "$RESPONSE_CLAIM_URL" == https://* ]]; then
SAFE_CLAIM_URL="$RESPONSE_CLAIM_URL"
fi
ACTION="create"
if [[ -n "$SLUG" ]]; then
ACTION="update"
fi
echo "" >&2
echo "publish_result.site_url=$SITE_URL" >&2
echo "publish_result.slug=$OUT_SLUG" >&2
echo "publish_result.action=$ACTION" >&2
echo "publish_result.auth_mode=$AUTH_MODE" >&2
echo "publish_result.api_key_source=$API_KEY_SOURCE" >&2
echo "publish_result.persistence=$PERSISTENCE" >&2
echo "publish_result.expires_at=$RESPONSE_EXPIRES" >&2
echo "publish_result.claim_url=$SAFE_CLAIM_URL" >&2
if [[ "$AUTH_MODE" == "authenticated" ]]; then
echo "authenticated publish (permanent, saved to your account)" >&2
else
echo "anonymous publish (expires in 24h)" >&2
if [[ -n "$SAFE_CLAIM_URL" ]]; then
echo "claim URL: $SAFE_CLAIM_URL" >&2
fi
if [[ -n "$RESPONSE_CLAIM_TOKEN" ]]; then
echo "claim token saved to $STATE_FILE" >&2
fi
fi
@@ -12,14 +12,6 @@ import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Set
try:
from hermes_constants import get_hermes_home
except ImportError:
import os as _os
def get_hermes_home() -> Path: # type: ignore[misc]
val = (_os.environ.get("HERMES_HOME") or "").strip()
return Path(val) if val else Path.home() / ".hermes"
try:
from fastapi import APIRouter
except Exception: # Allows local unit tests without dashboard dependencies.
@@ -143,15 +135,15 @@ ACHIEVEMENTS: List[Dict[str, Any]] = [
def state_path() -> Path:
return get_hermes_home() / "plugins" / "hermes-achievements" / "state.json"
return Path.home() / ".hermes" / "plugins" / "hermes-achievements" / "state.json"
def snapshot_path() -> Path:
return get_hermes_home() / "plugins" / "hermes-achievements" / "scan_snapshot.json"
return Path.home() / ".hermes" / "plugins" / "hermes-achievements" / "scan_snapshot.json"
def checkpoint_path() -> Path:
return get_hermes_home() / "plugins" / "hermes-achievements" / "scan_checkpoint.json"
return Path.home() / ".hermes" / "plugins" / "hermes-achievements" / "scan_checkpoint.json"
def load_state() -> Dict[str, Any]:
+12 -23
View File
@@ -110,17 +110,6 @@ def _parse_context_tokens(host_val, root_val) -> int | None:
return None
def _parse_int_config(host_val, root_val, default: int) -> int:
"""Parse an integer config: host wins, then root, then default."""
for val in (host_val, root_val):
if val is not None:
try:
return int(val)
except (ValueError, TypeError):
pass
return default
def _parse_dialectic_depth(host_val, root_val) -> int:
"""Parse dialecticDepth: host wins, then root, then 1. Clamped to 1-3."""
for val in (host_val, root_val):
@@ -474,10 +463,10 @@ class HonchoClientConfig:
raw.get("dialecticDynamic"),
default=True,
),
dialectic_max_chars=_parse_int_config(
host_block.get("dialecticMaxChars"),
raw.get("dialecticMaxChars"),
default=600,
dialectic_max_chars=int(
host_block.get("dialecticMaxChars")
or raw.get("dialecticMaxChars")
or 600
),
dialectic_depth=_parse_dialectic_depth(
host_block.get("dialecticDepth"),
@@ -498,15 +487,15 @@ class HonchoClientConfig:
or raw.get("reasoningLevelCap")
or "high"
),
message_max_chars=_parse_int_config(
host_block.get("messageMaxChars"),
raw.get("messageMaxChars"),
default=25000,
message_max_chars=int(
host_block.get("messageMaxChars")
or raw.get("messageMaxChars")
or 25000
),
dialectic_max_input_chars=_parse_int_config(
host_block.get("dialecticMaxInputChars"),
raw.get("dialecticMaxInputChars"),
default=10000,
dialectic_max_input_chars=int(
host_block.get("dialecticMaxInputChars")
or raw.get("dialecticMaxInputChars")
or 10000
),
recall_mode=_normalize_recall_mode(
host_block.get("recallMode")
+6 -9
View File
@@ -160,13 +160,11 @@ class HonchoSessionManager:
Peers are lazy -- no API call until first use.
Observation settings are controlled per-session via SessionPeerConfig.
"""
with self._cache_lock:
if peer_id in self._peers_cache:
return self._peers_cache[peer_id]
if peer_id in self._peers_cache:
return self._peers_cache[peer_id]
peer = self.honcho.peer(peer_id)
with self._cache_lock:
self._peers_cache[peer_id] = peer
self._peers_cache[peer_id] = peer
return peer
def _get_or_create_honcho_session(
@@ -178,10 +176,9 @@ class HonchoSessionManager:
Returns:
Tuple of (honcho_session, existing_messages).
"""
with self._cache_lock:
if session_id in self._sessions_cache:
logger.debug("Honcho session '%s' retrieved from cache", session_id)
return self._sessions_cache[session_id], []
if session_id in self._sessions_cache:
logger.debug("Honcho session '%s' retrieved from cache", session_id)
return self._sessions_cache[session_id], []
session = self.honcho.session(session_id)
-3
View File
@@ -38,7 +38,6 @@ except ImportError:
try:
from microsoft_teams.apps import App, ActivityContext
from microsoft_teams.common.http.client import ClientOptions
from microsoft_teams.api import MessageActivity, ConversationReference
from microsoft_teams.api.activities.typing import TypingActivityInput
from microsoft_teams.api.activities.invoke.adaptive_card import AdaptiveCardInvokeActivity
@@ -58,7 +57,6 @@ try:
TEAMS_SDK_AVAILABLE = True
except ImportError:
TEAMS_SDK_AVAILABLE = False
ClientOptions = None # type: ignore[assignment,misc]
App = None # type: ignore[assignment,misc]
ActivityContext = None # type: ignore[assignment,misc]
MessageActivity = None # type: ignore[assignment,misc]
@@ -210,7 +208,6 @@ class TeamsAdapter(BasePlatformAdapter):
client_secret=self._client_secret,
tenant_id=self._tenant_id,
http_server_adapter=_AiohttpBridgeAdapter(aiohttp_app),
client=ClientOptions(headers={"User-Agent": "Hermes"}),
)
# Register message handler before initialize()
+135 -317
View File
@@ -162,13 +162,6 @@ from agent.display import (
_detect_tool_failure,
get_tool_emoji as _get_tool_emoji,
)
from agent.tool_guardrails import (
ToolCallGuardrailConfig,
ToolCallGuardrailController,
ToolGuardrailDecision,
append_toolguard_guidance,
toolguard_synthetic_result,
)
from agent.trajectory import (
convert_scratchpad_to_think, has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file,
@@ -1157,8 +1150,6 @@ class AIAgent:
# Tool execution state — allows _vprint during tool execution
# even when stream consumers are registered (no tokens streaming then)
self._executing_tools = False
self._tool_guardrails = ToolCallGuardrailController()
self._tool_guardrail_halt_decision: ToolGuardrailDecision | None = None
# Interrupt mechanism for breaking out of tool loops
self._interrupt_requested = False
@@ -1632,12 +1623,30 @@ class AIAgent:
self._session_db = session_db
self._parent_session_id = parent_session_id
self._last_flushed_db_idx = 0 # tracks DB-write cursor to prevent duplicate writes
self._session_db_created = False # DB row deferred to run_conversation()
self._session_init_model_config = {
"max_iterations": self.max_iterations,
"reasoning_config": reasoning_config,
"max_tokens": max_tokens,
}
if self._session_db:
try:
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
model=self.model,
model_config={
"max_iterations": self.max_iterations,
"reasoning_config": reasoning_config,
"max_tokens": max_tokens,
},
user_id=None,
parent_session_id=self._parent_session_id,
)
except Exception as e:
# Transient SQLite lock contention (e.g. CLI and gateway writing
# concurrently) must NOT permanently disable session_search for
# this agent. Keep _session_db alive — subsequent message
# flushes and session_search calls will still work once the
# lock clears. The session row may be missing from the index
# for this run, but that is recoverable (flushes upsert rows).
logger.warning(
"Session DB create_session failed (session_search still available): %s", e
)
# In-memory todo list for task planning (one per agent/session)
from tools.todo_tool import TodoStore
@@ -1649,14 +1658,6 @@ class AIAgent:
_agent_cfg = _load_agent_config()
except Exception:
_agent_cfg = {}
try:
self._tool_guardrails = ToolCallGuardrailController(
ToolCallGuardrailConfig.from_mapping(
_agent_cfg.get("tool_loop_guardrails", {})
)
)
except Exception as _tlg_err:
logger.warning("Tool loop guardrail config ignored: %s", _tlg_err)
# Cache only the derived auxiliary compression context override that is
# needed later by the startup feasibility check. Avoid exposing a
# broad pseudo-public config object on the agent instance.
@@ -2152,28 +2153,6 @@ class AIAgent:
"is_anthropic_oauth": self._is_anthropic_oauth,
})
def _ensure_db_session(self) -> None:
"""Create session DB row on first use. Disables _session_db on failure."""
if self._session_db_created or not self._session_db:
return
try:
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
model=self.model,
model_config=self._session_init_model_config,
system_prompt=self._cached_system_prompt,
user_id=None,
parent_session_id=self._parent_session_id,
)
self._session_db_created = True
except Exception as e:
# Transient failure (e.g. SQLite lock). Keep _session_db alive —
# _session_db_created stays False so next run_conversation() retries.
logger.warning(
"Session DB creation failed (will retry next turn): %s", e
)
def reset_session_state(self):
"""Reset all session-scoped token counters to 0 for a fresh session.
@@ -3723,9 +3702,14 @@ class AIAgent:
return
self._apply_persist_user_message_override(messages)
try:
# Retry row creation if the earlier attempt failed transiently.
if not self._session_db_created:
self._ensure_db_session()
# If create_session() failed at startup (e.g. transient lock), the
# session row may not exist yet. ensure_session() uses INSERT OR
# IGNORE so it is a no-op when the row is already there.
self._session_db.ensure_session(
self.session_id,
source=self.platform or "cli",
model=self.model,
)
start_idx = len(conversation_history) if conversation_history else 0
flush_from = max(start_idx, self._last_flushed_db_idx)
for msg in messages[flush_from:]:
@@ -4998,8 +4982,8 @@ class AIAgent:
def _get_tool_call_id_static(tc) -> str:
"""Extract call ID from a tool_call entry (dict or object)."""
if isinstance(tc, dict):
return tc.get("call_id", "") or tc.get("id", "") or ""
return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
return tc.get("id", "") or ""
return getattr(tc, "id", "") or ""
_VALID_API_ROLES = frozenset({"system", "user", "assistant", "tool", "function", "developer"})
@@ -8602,13 +8586,9 @@ class AIAgent:
# message. Without it, replaying the persisted message causes
# HTTP 400 ("The reasoning_content in the thinking mode must
# be passed back to the API"). Include streamed reasoning
# text when captured; otherwise pad with a single space —
# DeepSeek V4 Pro tightened validation and rejects empty
# string ("The reasoning content in the thinking mode must
# be passed back to the API"). A space satisfies non-empty
# checks everywhere without leaking fabricated reasoning.
# Refs #15250, #17400, #17341.
msg["reasoning_content"] = reasoning_text or " "
# text when captured; otherwise pad with empty string.
# Refs #15250, #17400.
msg["reasoning_content"] = reasoning_text or ""
# Additive fallback (refs #16844, #16884). Streaming-only providers
# (glm, MiniMax, gpt-5.x via aigw, Anthropic via openai-compat shims)
@@ -8763,20 +8743,11 @@ class AIAgent:
return
# 1. Explicit reasoning_content already set — preserve it verbatim
# (includes DeepSeek/Kimi's own space-placeholder written at creation
# time, and any valid reasoning content from the same provider).
#
# Exception: sessions persisted BEFORE #17341 have empty-string
# placeholders pinned at creation time. DeepSeek V4 Pro rejects
# those with HTTP 400. When the active provider enforces the
# thinking-mode echo, upgrade "" → " " on replay so stale history
# doesn't 400 the user on the next turn.
# (includes DeepSeek/Kimi's own empty-string placeholder written at
# creation time, and any valid reasoning content from the same provider).
existing = source_msg.get("reasoning_content")
if isinstance(existing, str):
if existing == "" and self._needs_thinking_reasoning_pad():
api_msg["reasoning_content"] = " "
else:
api_msg["reasoning_content"] = existing
api_msg["reasoning_content"] = existing
return
needs_thinking_pad = self._needs_thinking_reasoning_pad()
@@ -8788,10 +8759,8 @@ class AIAgent:
# pins reasoning_content at creation time for tool-call turns, so the
# shape (reasoning set, reasoning_content absent, tool_calls present)
# is unreachable from same-provider DeepSeek history after this fix.
# Inject a single space to satisfy the API without leaking another
# provider's chain of thought to DeepSeek/Kimi. Space (not "")
# because DeepSeek V4 Pro rejects empty-string reasoning_content
# in thinking mode (refs #17341).
# Inject "" to satisfy the API without leaking another provider's
# chain of thought to DeepSeek/Kimi.
normalized_reasoning = source_msg.get("reasoning")
if (
needs_thinking_pad
@@ -8799,7 +8768,7 @@ class AIAgent:
and isinstance(normalized_reasoning, str)
and normalized_reasoning
):
api_msg["reasoning_content"] = " "
api_msg["reasoning_content"] = ""
return
# 3. Healthy session: promote 'reasoning' field to 'reasoning_content'
@@ -8812,15 +8781,12 @@ class AIAgent:
return
# 4. DeepSeek / Kimi thinking mode: all assistant messages need
# reasoning_content. Inject a single space to satisfy the provider's
# requirement when no explicit reasoning content is present. Covers
# both tool-call turns (already-poisoned history with no reasoning
# at all) and plain text turns. Space (not "") because DeepSeek V4
# Pro tightened validation and rejects empty string with HTTP 400
# ("The reasoning content in the thinking mode must be passed back
# to the API"). Refs #17341.
# reasoning_content. Inject "" to satisfy the provider's requirement
# when no explicit reasoning content is present. Covers both
# tool-call turns (already-poisoned history with no reasoning at all)
# and plain text turns.
if needs_thinking_pad:
api_msg["reasoning_content"] = " "
api_msg["reasoning_content"] = ""
return
# 5. reasoning_content was present but not a string (e.g. None after
@@ -9055,15 +9021,12 @@ class AIAgent:
self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
# Update session_log_file to point to the new session's JSON file
self.session_log_file = self.logs_dir / f"session_{self.session_id}.json"
self._session_db_created = False
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
model=self.model,
model_config=self._session_init_model_config,
parent_session_id=old_session_id,
)
self._session_db_created = True
# Auto-number the title for the continuation session
if old_title:
try:
@@ -9121,14 +9084,9 @@ class AIAgent:
# Update token estimate after compaction so pressure calculations
# use the post-compression count, not the stale pre-compression one.
# Use estimate_request_tokens_rough() so tool schemas are included —
# with 50+ tools enabled, schemas alone can add 20-30K tokens, and
# omitting them delays the next compression cycle far past the
# configured threshold (issue #14695).
_compressed_est = estimate_request_tokens_rough(
compressed,
system_prompt=new_system_prompt or "",
tools=self.tools or None,
_compressed_est = (
estimate_tokens_rough(new_system_prompt)
+ estimate_messages_tokens_rough(compressed)
)
self.context_compressor.last_prompt_tokens = _compressed_est
self.context_compressor.last_completion_tokens = 0
@@ -9149,44 +9107,6 @@ class AIAgent:
)
return compressed, new_system_prompt
def _set_tool_guardrail_halt(self, decision: ToolGuardrailDecision) -> None:
"""Record the first guardrail decision that should stop this turn."""
if decision.should_halt and self._tool_guardrail_halt_decision is None:
self._tool_guardrail_halt_decision = decision
def _toolguard_controlled_halt_response(self, decision: ToolGuardrailDecision) -> str:
tool = decision.tool_name or "a tool"
return (
f"I stopped retrying {tool} because it hit the tool-call guardrail "
f"({decision.code}) after {decision.count} repeated non-progressing "
"attempts. The last tool result explains the blocker; the next step is "
"to change strategy instead of repeating the same call."
)
def _append_guardrail_observation(
self,
tool_name: str,
function_args: dict,
function_result: str,
*,
failed: bool,
) -> str:
decision = self._tool_guardrails.after_call(
tool_name,
function_args,
function_result,
failed=failed,
)
if decision.action in {"warn", "halt"}:
function_result = append_toolguard_guidance(function_result, decision)
if decision.should_halt:
self._set_tool_guardrail_halt(decision)
return function_result
def _guardrail_block_result(self, decision: ToolGuardrailDecision) -> str:
self._set_tool_guardrail_halt(decision)
return toolguard_synthetic_result(decision)
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
"""Execute tool calls from the assistant message and append results to messages.
@@ -9230,8 +9150,7 @@ class AIAgent:
)
def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str,
tool_call_id: Optional[str] = None, messages: list = None,
pre_tool_block_checked: bool = False) -> str:
tool_call_id: Optional[str] = None, messages: list = None) -> str:
"""Invoke a single tool and return the result string. No display logic.
Handles both agent-level tools (todo, memory, etc.) and registry-dispatched
@@ -9240,14 +9159,13 @@ class AIAgent:
"""
# Check plugin hooks for a block directive before executing anything.
block_message: Optional[str] = None
if not pre_tool_block_checked:
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
block_message = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
pass
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
block_message = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
pass
if block_message is not None:
return json.dumps({"error": block_message}, ensure_ascii=False)
@@ -9399,31 +9317,13 @@ class AIAgent:
except Exception:
pass
block_result = None
blocked_by_guardrail = False
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
block_message = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
block_message = None
if block_message is not None:
block_result = json.dumps({"error": block_message}, ensure_ascii=False)
else:
guardrail_decision = self._tool_guardrails.before_call(function_name, function_args)
if not guardrail_decision.allows_execution:
block_result = self._guardrail_block_result(guardrail_decision)
blocked_by_guardrail = True
parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
parsed_calls.append((tool_call, function_name, function_args))
# ── Logging / callbacks ──────────────────────────────────────────
tool_names_str = ", ".join(name for _, name, _, _, _ in parsed_calls)
tool_names_str = ", ".join(name for _, name, _ in parsed_calls)
if not self.quiet_mode:
print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
for i, (tc, name, args) in enumerate(parsed_calls, 1):
args_str = json.dumps(args, ensure_ascii=False)
if self.verbose_logging:
print(f" 📞 Tool {i}: {name}({list(args.keys())})")
@@ -9432,9 +9332,7 @@ class AIAgent:
args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
if block_result is not None:
continue
for tc, name, args in parsed_calls:
if self.tool_progress_callback:
try:
preview = _build_tool_preview(name, args)
@@ -9442,9 +9340,7 @@ class AIAgent:
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
if block_result is not None:
continue
for tc, name, args in parsed_calls:
if self.tool_start_callback:
try:
self.tool_start_callback(tc.id, name, args)
@@ -9452,11 +9348,8 @@ class AIAgent:
logging.debug(f"Tool start callback error: {cb_err}")
# ── Concurrent execution ─────────────────────────────────────────
# Each slot holds (function_name, function_args, function_result, duration, error_flag, blocked_flag)
# Each slot holds (function_name, function_args, function_result, duration, error_flag)
results = [None] * num_tools
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
if block_result is not None:
results[i] = (name, args, block_result, 0.0, True, True)
# Touch activity before launching workers so the gateway knows
# we're executing tools (not stuck).
@@ -9511,14 +9404,7 @@ class AIAgent:
pass
start = time.time()
try:
result = self._invoke_tool(
function_name,
function_args,
effective_task_id,
tool_call.id,
messages=messages,
pre_tool_block_checked=True,
)
result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id, messages=messages)
except Exception as tool_error:
result = f"Error executing tool '{function_name}': {tool_error}"
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
@@ -9528,7 +9414,7 @@ class AIAgent:
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
results[index] = (function_name, function_args, result, duration, is_error, False)
results[index] = (function_name, function_args, result, duration, is_error)
# Tear down worker-tid tracking. Clear any interrupt bit we may
# have set so the next task scheduled onto this recycled tid
# starts with a clean slate.
@@ -9554,67 +9440,61 @@ class AIAgent:
spinner.start()
try:
runnable_calls = [
(i, tc, name, args)
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls)
if block_result is None
]
futures = []
if runnable_calls:
max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
for i, tc, name, args in runnable_calls:
# Propagate ContextVars (e.g. _approval_session_key); mirrors asyncio.to_thread.
ctx = contextvars.copy_context()
f = executor.submit(ctx.run, _run_tool, i, tc, name, args)
futures.append(f)
max_workers = min(num_tools, _MAX_TOOL_WORKERS)
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = []
for i, (tc, name, args) in enumerate(parsed_calls):
# Propagate ContextVars (e.g. _approval_session_key); mirrors asyncio.to_thread.
ctx = contextvars.copy_context()
f = executor.submit(ctx.run, _run_tool, i, tc, name, args)
futures.append(f)
# Wait for all to complete with periodic heartbeats so the
# gateway's inactivity monitor doesn't kill us during long
# concurrent tool batches. Also check for user interrupts
# so we don't block indefinitely when the user sends /stop
# or a new message during concurrent tool execution.
_conc_start = time.time()
_interrupt_logged = False
while True:
done, not_done = concurrent.futures.wait(
futures, timeout=5.0,
)
if not not_done:
break
# Wait for all to complete with periodic heartbeats so the
# gateway's inactivity monitor doesn't kill us during long
# concurrent tool batches. Also check for user interrupts
# so we don't block indefinitely when the user sends /stop
# or a new message during concurrent tool execution.
_conc_start = time.time()
_interrupt_logged = False
while True:
done, not_done = concurrent.futures.wait(
futures, timeout=5.0,
)
if not not_done:
break
# Check for interrupt — the per-thread interrupt signal
# already causes individual tools (terminal, execute_code)
# to abort, but tools without interrupt checks (web_search,
# read_file) will run to completion. Cancel any futures
# that haven't started yet so we don't block on them.
if self._interrupt_requested:
if not _interrupt_logged:
_interrupt_logged = True
self._vprint(
f"{self.log_prefix}⚡ Interrupt: cancelling "
f"{len(not_done)} pending concurrent tool(s)",
force=True,
)
for f in not_done:
f.cancel()
# Give already-running tools a moment to notice the
# per-thread interrupt signal and exit gracefully.
concurrent.futures.wait(not_done, timeout=3.0)
break
_conc_elapsed = int(time.time() - _conc_start)
# Heartbeat every ~30s (6 × 5s poll intervals)
if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
_still_running = [
parsed_calls[futures.index(f)][1]
for f in not_done
if f in futures
]
self._touch_activity(
f"concurrent tools running ({_conc_elapsed}s, "
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
# Check for interrupt — the per-thread interrupt signal
# already causes individual tools (terminal, execute_code)
# to abort, but tools without interrupt checks (web_search,
# read_file) will run to completion. Cancel any futures
# that haven't started yet so we don't block on them.
if self._interrupt_requested:
if not _interrupt_logged:
_interrupt_logged = True
self._vprint(
f"{self.log_prefix}⚡ Interrupt: cancelling "
f"{len(not_done)} pending concurrent tool(s)",
force=True,
)
for f in not_done:
f.cancel()
# Give already-running tools a moment to notice the
# per-thread interrupt signal and exit gracefully.
concurrent.futures.wait(not_done, timeout=3.0)
break
_conc_elapsed = int(time.time() - _conc_start)
# Heartbeat every ~30s (6 × 5s poll intervals)
if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
_still_running = [
parsed_calls[futures.index(f)][1]
for f in not_done
if f in futures
]
self._touch_activity(
f"concurrent tools running ({_conc_elapsed}s, "
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
)
finally:
if spinner:
# Build a summary message for the spinner stop
@@ -9623,9 +9503,8 @@ class AIAgent:
spinner.stop(f"{completed}/{num_tools} tools completed in {total_dur:.1f}s total")
# ── Post-execution: display per-tool results ─────────────────────
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
for i, (tc, name, args) in enumerate(parsed_calls):
r = results[i]
blocked = False
if r is None:
# Tool was cancelled (interrupt) or thread didn't return
if self._interrupt_requested:
@@ -9634,21 +9513,13 @@ class AIAgent:
function_result = f"Error executing tool '{name}': thread did not return a result"
tool_duration = 0.0
else:
function_name, function_args, function_result, tool_duration, is_error, blocked = r
if not blocked:
function_result = self._append_guardrail_observation(
function_name,
function_args,
function_result,
failed=is_error,
)
function_name, function_args, function_result, tool_duration, is_error = r
if is_error:
result_preview = function_result[:200] if len(function_result) > 200 else function_result
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
if not blocked and self.tool_progress_callback:
if self.tool_progress_callback:
try:
self.tool_progress_callback(
"tool.completed", function_name, None, None,
@@ -9676,7 +9547,7 @@ class AIAgent:
self._current_tool = None
self._touch_activity(f"tool completed: {name} ({tool_duration:.1f}s)")
if not blocked and self.tool_complete_callback:
if self.tool_complete_callback:
try:
self.tool_complete_callback(tc.id, name, args, function_result)
except Exception as cb_err:
@@ -9758,17 +9629,9 @@ class AIAgent:
except Exception:
pass
_guardrail_block_decision: ToolGuardrailDecision | None = None
if _block_msg is None:
guardrail_decision = self._tool_guardrails.before_call(function_name, function_args)
if not guardrail_decision.allows_execution:
_guardrail_block_decision = guardrail_decision
_execution_blocked = _block_msg is not None or _guardrail_block_decision is not None
if _execution_blocked:
# Tool blocked by plugin or guardrail policy — skip counters,
# callbacks, checkpointing, activity mutation, and real execution.
if _block_msg is not None:
# Tool blocked by plugin policy — skip counter resets.
# Execution is handled below in the tool dispatch chain.
pass
else:
# Reset nudge counters when the relevant tool is actually used
@@ -9786,35 +9649,35 @@ class AIAgent:
args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
if not _execution_blocked:
if _block_msg is None:
self._current_tool = function_name
self._touch_activity(f"executing tool: {function_name}")
# Set activity callback for long-running tool execution (terminal
# commands, etc.) so the gateway's inactivity monitor doesn't kill
# the agent while a command is running.
if not _execution_blocked:
if _block_msg is None:
try:
from tools.environments.base import set_activity_callback
set_activity_callback(self._touch_activity)
except Exception:
pass
if not _execution_blocked and self.tool_progress_callback:
if _block_msg is None and self.tool_progress_callback:
try:
preview = _build_tool_preview(function_name, function_args)
self.tool_progress_callback("tool.started", function_name, preview, function_args)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
if not _execution_blocked and self.tool_start_callback:
if _block_msg is None and self.tool_start_callback:
try:
self.tool_start_callback(tool_call.id, function_name, function_args)
except Exception as cb_err:
logging.debug(f"Tool start callback error: {cb_err}")
# Checkpoint: snapshot working dir before file-mutating tools
if not _execution_blocked and function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
if _block_msg is None and function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
try:
file_path = function_args.get("path", "")
if file_path:
@@ -9826,7 +9689,7 @@ class AIAgent:
pass # never block tool execution
# Checkpoint before destructive terminal commands
if not _execution_blocked and function_name == "terminal" and self._checkpoint_mgr.enabled:
if _block_msg is None and function_name == "terminal" and self._checkpoint_mgr.enabled:
try:
cmd = function_args.get("command", "")
if _is_destructive_command(cmd):
@@ -9843,11 +9706,6 @@ class AIAgent:
# Tool blocked by plugin policy — return error without executing.
function_result = json.dumps({"error": _block_msg}, ensure_ascii=False)
tool_duration = 0.0
elif _guardrail_block_decision is not None:
# Tool blocked by tool-loop guardrail — synthesize exactly one
# tool result for the original tool_call_id without executing.
function_result = self._guardrail_block_result(_guardrail_block_decision)
tool_duration = 0.0
elif function_name == "todo":
from tools.todo_tool import todo_tool as _todo_tool
function_result = _todo_tool(
@@ -10031,22 +9889,12 @@ class AIAgent:
# Log tool errors to the persistent error log so [error] tags
# in the UI always have a corresponding detailed entry on disk.
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
if not _execution_blocked:
function_result = self._append_guardrail_observation(
function_name,
function_args,
function_result,
failed=_is_error_result,
)
result_preview = function_result if self.verbose_logging else (
function_result[:200] if len(function_result) > 200 else function_result
)
if _is_error_result:
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, len(function_result))
if not _execution_blocked and self.tool_progress_callback:
if self.tool_progress_callback:
try:
self.tool_progress_callback(
"tool.completed", function_name, None, None,
@@ -10062,7 +9910,7 @@ class AIAgent:
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
if not _execution_blocked and self.tool_complete_callback:
if self.tool_complete_callback:
try:
self.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
except Exception as cb_err:
@@ -10165,13 +10013,6 @@ class AIAgent:
for idx, pfm in enumerate(self.prefill_messages):
api_messages.insert(sys_offset + idx, pfm.copy())
# Same safety net as the main loop: repair tool-call/result
# pairing before asking for a final summary. Compression and
# session resume can leave a tool result whose parent assistant
# tool_call was summarized away; Responses API rejects that as
# "No tool call found for function call output".
api_messages = self._sanitize_api_messages(api_messages)
# Same safety net as the main loop: drop thinking-only assistant
# turns so Anthropic-family providers don't 400 the summary call.
api_messages = self._drop_thinking_only_and_merge_users(api_messages)
@@ -10353,8 +10194,6 @@ class AIAgent:
# Installed once, transparent when streams are healthy, prevents crash on write.
_install_safe_stdio()
self._ensure_db_session()
# Tag all log records on this thread with the session ID so
# ``hermes logs --session <id>`` can filter a single conversation.
from hermes_logging import set_session_context
@@ -10398,8 +10237,6 @@ class AIAgent:
self._last_content_tools_all_housekeeping = False
self._mute_post_response = False
self._unicode_sanitization_passes = 0
self._tool_guardrails.reset_for_turn()
self._tool_guardrail_halt_decision = None
# Pre-turn connection health check: detect and clean up dead TCP
# connections left over from provider outages or dropped streams.
@@ -13197,16 +13034,6 @@ class AIAgent:
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
if self._tool_guardrail_halt_decision is not None:
decision = self._tool_guardrail_halt_decision
_turn_exit_reason = "guardrail_halt"
final_response = self._toolguard_controlled_halt_response(decision)
self._emit_status(
f"⚠️ Tool guardrail halted {decision.tool_name}: {decision.code}"
)
messages.append({"role": "assistant", "content": final_response})
break
# Reset per-turn retry counters after successful tool
# execution so a single truncation doesn't poison the
# entire conversation.
@@ -13250,13 +13077,7 @@ class AIAgent:
# causing premature compression. (#12026)
_real_tokens = _compressor.last_prompt_tokens
else:
# Include tool schemas — with 50+ tools enabled
# these add 20-30K tokens the messages-only
# estimate misses, which can skip compression
# past the configured threshold (#14695).
_real_tokens = estimate_request_tokens_rough(
messages, tools=self.tools or None
)
_real_tokens = estimate_messages_tokens_rough(messages)
if self.compression_enabled and _compressor.should_compress(_real_tokens):
self._safe_print(" ⟳ compacting context…")
@@ -13739,7 +13560,6 @@ class AIAgent:
"messages": messages,
"api_calls": api_call_count,
"completed": completed,
"turn_exit_reason": _turn_exit_reason,
"partial": False, # True only when stopped due to invalid tool calls
"interrupted": interrupted,
"response_previewed": getattr(self, "_response_was_previewed", False),
@@ -13759,8 +13579,6 @@ class AIAgent:
"cost_status": self.session_cost_status,
"cost_source": self.session_cost_source,
}
if self._tool_guardrail_halt_decision is not None:
result["guardrail"] = self._tool_guardrail_halt_decision.to_metadata()
# If a /steer landed after the final assistant turn (no more tool
# batches to drain into), hand it back to the caller so it can be
# delivered as the next user turn instead of being silently lost.
+2 -10
View File
@@ -35,18 +35,10 @@ import time
from pathlib import Path
from typing import Any
_PROJECT_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(_PROJECT_ROOT))
try:
from hermes_constants import get_hermes_home
except ImportError:
def get_hermes_home() -> Path: # type: ignore[misc]
val = (os.environ.get("HERMES_HOME") or "").strip()
return Path(val) if val else Path.home() / ".hermes"
DEFAULT_TUI_DIR = Path(os.environ.get("HERMES_TUI_DIR", "/home/bb/hermes-agent/ui-tui"))
DEFAULT_LOG = Path(os.environ.get("HERMES_PERF_LOG", str(get_hermes_home() / "perf.log")))
DEFAULT_STATE_DB = get_hermes_home() / "state.db"
DEFAULT_LOG = Path(os.environ.get("HERMES_PERF_LOG", str(Path.home() / ".hermes" / "perf.log")))
DEFAULT_STATE_DB = Path.home() / ".hermes" / "state.db"
# Keystroke escape sequences. Matches what xterm/VT220 send when the
# terminal has bracketed-paste disabled and the key-repeat handler fires.
-17
View File
@@ -41,17 +41,13 @@ PYPROJECT_FILE = REPO_ROOT / "pyproject.toml"
AUTHOR_MAP = {
# teknium (multiple emails)
"teknium1@gmail.com": "teknium1",
"m@mobrienv.dev": "mikeyobrien",
"qiyin.zuo@pcitc.com": "qiyin-code",
"leone.parise@gmail.com": "leoneparise",
"teknium@nousresearch.com": "teknium1",
"127238744+teknium1@users.noreply.github.com": "teknium1",
"aludwin+gh@gmail.com": "adamludwin",
"2093036+exiao@users.noreply.github.com": "exiao",
"rylen.anil@gmail.com": "rylena",
"godnanijatin@gmail.com": "jatingodnani",
"14046872+tmimmanuel@users.noreply.github.com": "tmimmanuel",
"657290301@qq.com": "IMHaoyan",
"revar@users.noreply.github.com": "revaraver",
# Matrix parity salvage batch (April 2026)
"sr@samirusani": "samrusani",
@@ -96,8 +92,6 @@ AUTHOR_MAP = {
"130918800+devorun@users.noreply.github.com": "devorun",
"surat.s@itm.kmutnb.ac.th": "beesrsj2500",
"beesr@bee.localdomain": "beesrsj2500",
"mind-dragon@nous.research": "Mind-Dragon",
"juntingpublic@gmail.com": "JustinUssuri",
"mtf201013@gmail.com": "ma-pony",
"sonoyuncudmr@gmail.com": "Sonoyunchu",
"43525405+yatesjalex@users.noreply.github.com": "yatesjalex",
@@ -106,8 +100,6 @@ AUTHOR_MAP = {
"web3blind@users.noreply.github.com": "web3blind",
"julia@alexland.us": "alexg0bot",
"christian@scheid.tech": "scheidti",
# Moonshot schema anyOf+enum salvage (May 2026)
"git@local.invalid": "hendrixfreire",
"1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
"nerijusn76@gmail.com": "Nerijusas",
"itonov@proton.me": "Ito-69",
@@ -120,7 +112,6 @@ AUTHOR_MAP = {
"foxion37@gmail.com": "foxion37",
"bloodcarter@gmail.com": "bloodcarter",
"scott@scotttrinh.com": "scotttrinh",
"quocanh261997@gmail.com": "quocanh261997",
# contributors (from noreply pattern)
"david.vv@icloud.com": "davidvv",
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
@@ -176,7 +167,6 @@ AUTHOR_MAP = {
"sir_even@icloud.com": "sirEven",
"36056348+sirEven@users.noreply.github.com": "sirEven",
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
"jezzahehn@gmail.com": "JezzaHehn",
"254021826+dodo-reach@users.noreply.github.com": "dodo-reach",
"259807879+Bartok9@users.noreply.github.com": "Bartok9",
"270082434+crayfish-ai@users.noreply.github.com": "crayfish-ai",
@@ -302,7 +292,6 @@ AUTHOR_MAP = {
"154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
"12250313+Kailigithub@users.noreply.github.com": "Kailigithub",
"mgparkprint@gmail.com": "vlwkaos",
"1317078257maroon@gmail.com": "Oxidane-bot",
"tranquil_flow@protonmail.com": "Tranquil-Flow",
"LyleLengyel@gmail.com": "mcndjxlefnd",
"wangshengyang2004@163.com": "Wangshengyang2004",
@@ -341,7 +330,6 @@ AUTHOR_MAP = {
"stefan@dimagents.ai": "dimitrovi",
"hermes@noushq.ai": "benbarclay",
"chinmingcock@gmail.com": "ChimingLiu",
"allard.quek@singtel.com": "AllardQuek",
"openclaw@sparklab.ai": "openclaw",
"semihcvlk53@gmail.com": "Himess",
"erenkar950@gmail.com": "erenkarakus",
@@ -434,8 +422,6 @@ AUTHOR_MAP = {
"ogzerber@users.noreply.github.com": "ogzerber",
"cola-runner@users.noreply.github.com": "cola-runner",
"ygd58@users.noreply.github.com": "ygd58",
"45554392+warabe1122@users.noreply.github.com": "warabe1122",
"187001140+willy-scr@users.noreply.github.com": "willy-scr",
"vominh1919@users.noreply.github.com": "vominh1919",
"iamagenius00@users.noreply.github.com": "iamagenius00",
"9219265+cresslank@users.noreply.github.com": "cresslank",
@@ -460,7 +446,6 @@ AUTHOR_MAP = {
"taosiyuan163@153.com": "taosiyuan163",
"tesseracttars@gmail.com": "tesseracttars-creator",
"tianliangjay@gmail.com": "xingkongliang",
"1317078257maroon@gmail.com": "Oxidane-bot",
"tranquil_flow@protonmail.com": "Tranquil-Flow",
"LyleLengyel@gmail.com": "mcndjxlefnd",
"unayung@gmail.com": "Unayung",
@@ -506,11 +491,9 @@ AUTHOR_MAP = {
"hubin_ll@qq.com": "LLQWQ",
"memosr_email@gmail.com": "memosr",
"jperlow@gmail.com": "perlowja",
"jasonpette1783@gmail.com": "web-dev0521",
"tangyuanjc@JCdeAIfenshendeMac-mini.local": "tangyuanjc",
"harryplusplus@gmail.com": "harryplusplus",
"anthhub@163.com": "anthhub",
"allard.quek@singtel.com": "AllardQuek",
"shenuu@gmail.com": "shenuu",
"xiayh17@gmail.com": "xiayh0107",
"zhujianxyz@gmail.com": "opriz",
+1 -2
View File
@@ -730,7 +730,6 @@ class TestSlashCommands:
]
state.agent.compression_enabled = True
state.agent._cached_system_prompt = "system"
state.agent.tools = None
original_session_db = object()
state.agent._session_db = original_session_db
@@ -747,7 +746,7 @@ class TestSlashCommands:
with (
patch.object(agent.session_manager, "save_session") as mock_save,
patch(
"agent.model_metadata.estimate_request_tokens_rough",
"agent.model_metadata.estimate_messages_tokens_rough",
side_effect=[40, 12],
),
):
-75
View File
@@ -8,7 +8,6 @@ from types import SimpleNamespace
import pytest
from unittest.mock import MagicMock, patch
from acp_adapter import session as acp_session
from acp_adapter.session import SessionManager, SessionState
from hermes_state import SessionDB
@@ -43,27 +42,6 @@ class TestCreateSession:
state = manager.create_session(cwd="/tmp/work")
assert calls == [(state.session_id, "/tmp/work")]
def test_register_task_cwd_translates_windows_drive_for_wsl_tools(self, monkeypatch):
captured = {}
def fake_register_task_env_overrides(task_id, overrides):
captured["task_id"] = task_id
captured["overrides"] = overrides
monkeypatch.setattr("hermes_constants._wsl_detected", True)
monkeypatch.setattr(
"tools.terminal_tool.register_task_env_overrides",
fake_register_task_env_overrides,
)
acp_session._register_task_cwd("session-1", r"E:\Projects\AI\paperclip")
assert captured == {
"task_id": "session-1",
"overrides": {"cwd": "/mnt/e/Projects/AI/paperclip"},
}
def test_session_ids_are_unique(self, manager):
s1 = manager.create_session()
s2 = manager.create_session()
@@ -78,59 +56,6 @@ class TestCreateSession:
assert manager.get_session("does-not-exist") is None
# ---------------------------------------------------------------------------
# WSL cwd translation
# ---------------------------------------------------------------------------
class TestWslCwdTranslation:
def test_translate_acp_cwd_converts_windows_drive_path_when_wsl(self, monkeypatch):
monkeypatch.setattr("hermes_constants._wsl_detected", True)
assert acp_session._translate_acp_cwd(r"E:\Projects\AI\paperclip") == "/mnt/e/Projects/AI/paperclip"
def test_translate_acp_cwd_handles_forward_slashes_when_wsl(self, monkeypatch):
monkeypatch.setattr("hermes_constants._wsl_detected", True)
assert acp_session._translate_acp_cwd("D:/work/project") == "/mnt/d/work/project"
def test_translate_acp_cwd_leaves_windows_drive_path_unchanged_off_wsl(self, monkeypatch):
monkeypatch.setattr("hermes_constants._wsl_detected", False)
assert acp_session._translate_acp_cwd(r"E:\Projects\AI\paperclip") == r"E:\Projects\AI\paperclip"
def test_translate_acp_cwd_leaves_posix_path_unchanged_on_wsl(self, monkeypatch):
monkeypatch.setattr("hermes_constants._wsl_detected", True)
assert acp_session._translate_acp_cwd("/mnt/e/Projects/AI/paperclip") == "/mnt/e/Projects/AI/paperclip"
def test_create_session_stores_translated_cwd_on_wsl(self, manager, monkeypatch):
monkeypatch.setattr("hermes_constants._wsl_detected", True)
state = manager.create_session(cwd=r"E:\Projects\AI\paperclip")
assert state.cwd == "/mnt/e/Projects/AI/paperclip"
def test_fork_session_stores_translated_cwd_on_wsl(self, manager, monkeypatch):
monkeypatch.setattr("hermes_constants._wsl_detected", True)
original = manager.create_session(cwd="/tmp/base")
forked = manager.fork_session(original.session_id, cwd=r"D:\work\project")
assert forked is not None
assert forked.cwd == "/mnt/d/work/project"
def test_update_cwd_stores_translated_cwd_on_wsl(self, manager, monkeypatch):
monkeypatch.setattr("hermes_constants._wsl_detected", True)
state = manager.create_session(cwd="/tmp/old")
updated = manager.update_cwd(state.session_id, cwd=r"C:\Users\foo\project")
assert updated is not None
assert updated.cwd == "/mnt/c/Users/foo/project"
# ---------------------------------------------------------------------------
# fork
# ---------------------------------------------------------------------------
-150
View File
@@ -1,150 +0,0 @@
from types import SimpleNamespace
import pytest
from acp.schema import TextContentBlock
from acp_adapter.server import HermesACPAgent
from acp_adapter.session import SessionManager
class FakeAgent:
def __init__(self):
self.model = "fake-model"
self.provider = "fake-provider"
self.enabled_toolsets = ["hermes-acp"]
self.disabled_toolsets = []
self.tools = []
self.valid_tool_names = set()
self.steers = []
self.runs = []
def steer(self, text):
self.steers.append(text)
return True
def run_conversation(self, *, user_message, conversation_history, task_id, **kwargs):
self.runs.append(user_message)
messages = list(conversation_history or [])
messages.append({"role": "user", "content": user_message})
final = f"ran: {user_message}"
messages.append({"role": "assistant", "content": final})
return {"final_response": final, "messages": messages}
class CaptureConn:
def __init__(self):
self.updates = []
async def session_update(self, *args, **kwargs):
if kwargs:
self.updates.append((kwargs.get("session_id"), kwargs.get("update")))
else:
self.updates.append((args[0], args[1]))
async def request_permission(self, *args, **kwargs):
return SimpleNamespace(outcome="allow")
class NoopDb:
def get_session(self, *_args, **_kwargs):
return None
def create_session(self, *_args, **_kwargs):
return None
def update_session(self, *_args, **_kwargs):
return None
def make_agent_and_state():
fake = FakeAgent()
manager = SessionManager(agent_factory=lambda **kwargs: fake, db=NoopDb())
acp_agent = HermesACPAgent(session_manager=manager)
state = manager.create_session(cwd=".")
conn = CaptureConn()
acp_agent.on_connect(conn)
return acp_agent, state, fake, conn
@pytest.mark.asyncio
async def test_acp_steer_slash_command_injects_into_running_agent():
acp_agent, state, fake, _conn = make_agent_and_state()
state.is_running = True
response = await acp_agent.prompt(
session_id=state.session_id,
prompt=[TextContentBlock(type="text", text="/steer prefer the simpler fix")],
)
assert response.stop_reason == "end_turn"
assert fake.steers == ["prefer the simpler fix"]
assert fake.runs == []
@pytest.mark.asyncio
async def test_acp_steer_after_zed_interrupt_replays_interrupted_prompt_with_guidance():
acp_agent, state, fake, _conn = make_agent_and_state()
state.interrupted_prompt_text = "write hi to a text file"
response = await acp_agent.prompt(
session_id=state.session_id,
prompt=[TextContentBlock(type="text", text="/steer write HELLO instead")],
)
assert response.stop_reason == "end_turn"
assert fake.steers == []
assert fake.runs == [
"write hi to a text file\n\nUser correction/guidance after interrupt: write HELLO instead"
]
assert state.interrupted_prompt_text == ""
@pytest.mark.asyncio
async def test_acp_steer_on_idle_session_runs_as_regular_prompt():
# /steer on an idle session (no running turn, nothing to salvage) should
# run the steer payload as a normal user prompt — NOT silently append it
# to state.queued_prompts. Without this, users on Zed / other ACP clients
# see their /steer turn into "queued for the next turn" when they never
# typed /queue. Matches gateway/run.py ~L4898 idle-/steer behavior.
acp_agent, state, fake, _conn = make_agent_and_state()
response = await acp_agent.prompt(
session_id=state.session_id,
prompt=[TextContentBlock(type="text", text="/steer summarize the README")],
)
assert response.stop_reason == "end_turn"
assert fake.steers == []
assert fake.runs == ["summarize the README"]
assert state.queued_prompts == []
@pytest.mark.asyncio
async def test_acp_queue_slash_command_adds_next_turn_without_running_now():
acp_agent, state, fake, _conn = make_agent_and_state()
response = await acp_agent.prompt(
session_id=state.session_id,
prompt=[TextContentBlock(type="text", text="/queue run the tests after this")],
)
assert response.stop_reason == "end_turn"
assert state.queued_prompts == ["run the tests after this"]
assert fake.runs == []
@pytest.mark.asyncio
async def test_acp_prompt_drains_queued_turns_after_current_run():
acp_agent, state, fake, conn = make_agent_and_state()
state.queued_prompts.append("then run tests")
response = await acp_agent.prompt(
session_id=state.session_id,
prompt=[TextContentBlock(type="text", text="make the change")],
)
assert response.stop_reason == "end_turn"
assert fake.runs == ["make the change", "then run tests"]
assert state.queued_prompts == []
agent_messages = [u for _sid, u in conn.updates if getattr(u, "session_update", None) == "agent_message_chunk"]
assert len(agent_messages) >= 2
@@ -427,68 +427,3 @@ class TestProvidersDictApiModeAnthropicMessages:
assert isinstance(sync_client, OpenAI)
async_client, _ = resolve_provider_client("localchat", async_mode=True)
assert isinstance(async_client, AsyncOpenAI)
class TestCustomProviderAliasCollision:
"""A user-declared custom_providers entry whose name matches a built-in
*alias* (not a canonical provider) must win over the built-in.
Regression guard for #15743: users who defined fallback_model pointing at
a custom_providers entry named ``kimi`` were having requests routed to
the built-in kimi-coding endpoint because ``_normalize_aux_provider``
rewrote ``kimi`` ``kimi-coding`` before the named-custom lookup.
"""
def test_custom_named_kimi_wins_over_builtin_alias(self, tmp_path):
_write_config(tmp_path, {
"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"},
"custom_providers": [
{
"name": "kimi",
"base_url": "https://my-custom-kimi.example.com/v1",
"api_key": "my-kimi-key",
"models": {"my-kimi-model": {"context_length": 200000}},
},
],
})
from agent.auxiliary_client import resolve_provider_client
from openai import OpenAI
client, model = resolve_provider_client("kimi", model="my-kimi-model", raw_codex=True)
assert isinstance(client, OpenAI)
assert "my-custom-kimi.example.com" in str(client.base_url)
assert client.api_key == "my-kimi-key"
assert model == "my-kimi-model"
def test_bare_kimi_without_custom_still_routes_to_builtin(self, tmp_path, monkeypatch):
"""Regression guard: bare 'kimi' with no custom entry must still
reach the built-in kimi-coding provider."""
_write_config(tmp_path, {
"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"},
})
monkeypatch.setenv("KIMI_API_KEY", "builtin-kimi-key")
from agent.auxiliary_client import resolve_provider_client
client, _ = resolve_provider_client("kimi", model="kimi-k2-0905-preview", raw_codex=True)
assert client is not None
base_url = str(client.base_url)
# Built-in kimi-coding points at api.moonshot.ai
assert "moonshot" in base_url or "kimi" in base_url, f"unexpected base_url {base_url!r}"
def test_explicit_overrides_applied_on_api_key_branch(self, tmp_path, monkeypatch):
"""Explicit base_url/api_key from the caller must override the
registered provider's defaults on the API-key branch. Used by
_try_activate_fallback to route a fallback through a built-in
provider name but targeting a user-supplied endpoint."""
_write_config(tmp_path, {
"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"},
})
monkeypatch.setenv("KIMI_API_KEY", "builtin-kimi-key")
from agent.auxiliary_client import resolve_provider_client
from openai import OpenAI
client, _ = resolve_provider_client(
"kimi-coding", model="kimi-k2", raw_codex=True,
explicit_base_url="https://override.example.com",
explicit_api_key="override-key",
)
assert isinstance(client, OpenAI)
assert "override.example.com" in str(client.base_url)
assert client.api_key == "override-key"
-52
View File
@@ -640,30 +640,6 @@ class TestCompressWithClient:
for tc in msg["tool_calls"]:
assert tc["id"] in answered_ids
def test_sanitizer_matches_responses_call_id_when_id_differs(self, compressor):
msgs = [
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "fc_123",
"call_id": "call_123",
"response_item_id": "fc_123",
"type": "function",
"function": {"name": "search_files", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_123", "content": "result"},
]
sanitized = compressor._sanitize_tool_pairs(msgs)
assert [m.get("tool_call_id") for m in sanitized if m.get("role") == "tool"] == [
"call_123"
]
def test_summary_role_avoids_consecutive_user_messages(self):
"""Summary role should alternate with the last head message to avoid consecutive same-role messages."""
mock_client = MagicMock()
@@ -1143,34 +1119,6 @@ class TestTokenBudgetTailProtection:
# At least one old tool result should have been pruned
assert pruned >= 1
def test_prune_short_conv_protects_entire_tail(self, budget_compressor):
"""Regression guard for PR #17025.
When ``len(messages) <= protect_tail_count`` and a token budget is
also set, every message must be protected. The previous code used
``min(protect_tail_count, len(result) - 1)`` which capped the floor
one below the full length, leaving the oldest message eligible for
pruning.
"""
c = budget_compressor
# 4 messages, protect_tail_count=4 -- nothing should be pruned.
# Oldest message is a large tool result; on the buggy path it falls
# outside the protected window and gets summarized.
messages = [
{"role": "tool", "content": "x" * 5000, "tool_call_id": "c0"},
{"role": "assistant", "content": "ack"},
{"role": "user", "content": "recent"},
{"role": "assistant", "content": "reply"},
]
result, pruned = c._prune_old_tool_results(
messages,
protect_tail_count=4,
protect_tail_tokens=1_000_000, # budget large enough to protect all
)
assert pruned == 0
# Tool result at index 0 must be preserved verbatim
assert result[0]["content"] == "x" * 5000
def test_prune_without_token_budget_uses_message_count(self, budget_compressor):
"""Without protect_tail_tokens, falls back to message-count behavior."""
c = budget_compressor
-13
View File
@@ -363,19 +363,6 @@ def test_state_atomic_write_no_tmp_leftovers(curator_env):
assert not p.name.startswith(".curator_state_"), f"tmp leftover: {p.name}"
def test_state_preserves_last_report_path(curator_env):
c = curator_env["curator"]
c.save_state({
"last_run_at": "2026-04-30T12:00:00+00:00",
"last_run_summary": "ok",
"last_report_path": "/tmp/curator-report",
"paused": False,
"run_count": 1,
})
state = c.load_state()
assert state["last_report_path"] == "/tmp/curator-report"
def test_curator_review_prompt_has_invariants():
"""Core invariants must be in the review prompt text."""
from agent.curator import CURATOR_REVIEW_PROMPT
-164
View File
@@ -270,167 +270,3 @@ def test_state_transitions_captured_in_report(curator_env):
assert "State transitions" in md
assert "getting-old" in md
assert "active → stale" in md
# ---------------------------------------------------------------------------
# Cron job skill reference rewriting (curator ↔ cron integration)
# ---------------------------------------------------------------------------
#
# When the curator consolidates skill X into umbrella Y during a run, any
# cron job that listed X in its ``skills`` field would fail to load X at
# run time — the scheduler logs a warning and skips it, so the scheduled
# job runs without the instructions it was scheduled to follow. These
# tests verify that _write_run_report calls into cron.jobs to repair
# those references and records what it did in both run.json and
# cron_rewrites.json.
@pytest.fixture
def curator_env_with_cron(curator_env, monkeypatch):
"""Extend curator_env with an initialized + repointed cron.jobs module."""
home = curator_env["home"]
(home / "cron").mkdir(exist_ok=True)
(home / "cron" / "output").mkdir(exist_ok=True)
import importlib
import cron.jobs as jobs_mod
importlib.reload(jobs_mod)
monkeypatch.setattr(jobs_mod, "HERMES_DIR", home)
monkeypatch.setattr(jobs_mod, "CRON_DIR", home / "cron")
monkeypatch.setattr(jobs_mod, "JOBS_FILE", home / "cron" / "jobs.json")
monkeypatch.setattr(jobs_mod, "OUTPUT_DIR", home / "cron" / "output")
return {**curator_env, "jobs": jobs_mod}
def test_curator_rewrites_cron_skills_when_skill_consolidated(curator_env_with_cron):
"""A skill consolidated into an umbrella should be rewritten in any
cron job's skills list; the rewrite should be visible in run.json
and cron_rewrites.json."""
curator = curator_env_with_cron["curator"]
jobs = curator_env_with_cron["jobs"]
# Create a cron job that depends on a soon-to-be-consolidated skill
job = jobs.create_job(
prompt="",
schedule="every 1h",
skills=["foo"],
name="foo-watcher",
)
# Simulate a curator pass that consolidated `foo` → `foo-umbrella`
before = [{"name": "foo", "state": "active", "pinned": False}]
after = [{"name": "foo-umbrella", "state": "active", "pinned": False}]
run_dir = curator._write_run_report(
started_at=datetime.now(timezone.utc),
elapsed_seconds=3.0,
auto_counts={"checked": 1, "marked_stale": 0, "archived": 0, "reactivated": 0},
auto_summary="no changes",
before_report=before,
before_names={"foo"},
after_report=after,
llm_meta=_make_llm_meta(
final="Consolidated foo into foo-umbrella.",
tool_calls=[
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "write_file",
"name": "foo-umbrella",
"file_path": "references/foo.md",
"file_content": "from foo",
}),
},
],
),
)
# Cron job is rewritten on disk
loaded = jobs.get_job(job["id"])
assert loaded["skills"] == ["foo-umbrella"]
assert loaded["skill"] == "foo-umbrella"
# Rewrite is recorded in run.json
payload = json.loads((run_dir / "run.json").read_text())
assert payload["cron_rewrites"]["jobs_updated"] == 1
assert payload["counts"]["cron_jobs_rewritten"] == 1
rewrites = payload["cron_rewrites"]["rewrites"]
assert len(rewrites) == 1
assert rewrites[0]["mapped"] == {"foo": "foo-umbrella"}
# Separate cron_rewrites.json is written for convenience
cron_file = run_dir / "cron_rewrites.json"
assert cron_file.exists()
detail = json.loads(cron_file.read_text())
assert detail["jobs_updated"] == 1
# Markdown surfaces the change
md = (run_dir / "REPORT.md").read_text()
assert "Cron job skill references rewritten" in md
assert "foo-watcher" in md
assert "foo-umbrella" in md
def test_curator_drops_pruned_skill_from_cron_job(curator_env_with_cron):
"""A pruned (no-umbrella) skill should be dropped from the cron
job's skill list entirely — there's no forwarding target."""
curator = curator_env_with_cron["curator"]
jobs = curator_env_with_cron["jobs"]
job = jobs.create_job(
prompt="",
schedule="every 1h",
skills=["keep", "stale-one"],
)
before = [{"name": "stale-one", "state": "active", "pinned": False}]
after: list = [] # stale-one was archived with no target
run_dir = curator._write_run_report(
started_at=datetime.now(timezone.utc),
elapsed_seconds=1.0,
auto_counts={"checked": 1, "marked_stale": 0, "archived": 1, "reactivated": 0},
auto_summary="1 archived",
before_report=before,
before_names={"stale-one"},
after_report=after,
llm_meta=_make_llm_meta(), # no tool calls → classifier marks it pruned
)
loaded = jobs.get_job(job["id"])
assert loaded["skills"] == ["keep"]
payload = json.loads((run_dir / "run.json").read_text())
assert payload["cron_rewrites"]["jobs_updated"] == 1
rewrites = payload["cron_rewrites"]["rewrites"]
assert rewrites[0]["dropped"] == ["stale-one"]
def test_curator_report_has_no_cron_section_when_nothing_changes(curator_env_with_cron):
"""When the curator run doesn't touch any skills, cron jobs are
untouched and cron_rewrites.json is not even written."""
curator = curator_env_with_cron["curator"]
jobs = curator_env_with_cron["jobs"]
jobs.create_job(prompt="", schedule="every 1h", skills=["foo"])
run_dir = curator._write_run_report(
started_at=datetime.now(timezone.utc),
elapsed_seconds=1.0,
auto_counts={"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0},
auto_summary="no changes",
before_report=[{"name": "foo", "state": "active", "pinned": False}],
before_names={"foo"},
after_report=[{"name": "foo", "state": "active", "pinned": False}],
llm_meta=_make_llm_meta(),
)
# No rewrites → no separate file, no section in md
assert not (run_dir / "cron_rewrites.json").exists()
md = (run_dir / "REPORT.md").read_text()
assert "Cron job skill references rewritten" not in md
payload = json.loads((run_dir / "run.json").read_text())
assert payload["cron_rewrites"]["jobs_updated"] == 0
assert payload["counts"]["cron_jobs_rewritten"] == 0
+161 -147
View File
@@ -6,6 +6,11 @@ the JSON Schema ecosystem accepts:
1. Properties without ``type`` Moonshot requires ``type`` on every node.
2. ``type`` at the parent of ``anyOf`` Moonshot requires it only inside
``anyOf`` children.
3. ``$ref`` with sibling keywords Moonshot expands the ref first and then
rejects ``description``/``type`` siblings on the same node.
(Ported from anomalyco/opencode#24730.)
4. Tuple-style ``items`` arrays Moonshot requires a single item schema,
not positional ones. (Ported from anomalyco/opencode#24730.)
These tests cover the repairs applied by ``agent/moonshot_schema.py``.
"""
@@ -115,15 +120,9 @@ class TestMissingTypeFilled:
class TestAnyOfParentType:
"""Rule 2: type must not appear at the anyOf parent level.
"""Rule 2: type must not appear at the anyOf parent level."""
When an anyOf contains a null-type branch, Moonshot rejects it.
The sanitizer collapses the anyOf: single non-null branch is promoted,
multiple non-null branches have null removed from the list.
"""
def test_anyof_null_branch_collapsed_to_single_type(self):
"""anyOf [string, null] → plain string (anyOf removed)."""
def test_parent_type_stripped_when_anyof_present(self):
params = {
"type": "object",
"properties": {
@@ -138,46 +137,179 @@ class TestAnyOfParentType:
}
out = sanitize_moonshot_tool_parameters(params)
from_format = out["properties"]["from_format"]
# null branch removed, anyOf collapsed to the single non-null type
assert "anyOf" not in from_format
assert from_format["type"] == "string"
assert "type" not in from_format
assert "anyOf" in from_format
def test_anyof_multiple_non_null_preserved(self):
"""anyOf [string, integer] (no null) → kept as-is with parent type stripped."""
def test_anyof_children_missing_type_get_filled(self):
params = {
"type": "object",
"properties": {
"mode": {
"value": {
"anyOf": [
{"type": "string"},
{"type": "integer"},
{"description": "A typeless option"},
],
},
},
}
out = sanitize_moonshot_tool_parameters(params)
mode = out["properties"]["mode"]
assert "anyOf" in mode
assert "type" not in mode # parent type stripped
children = out["properties"]["value"]["anyOf"]
assert children[0]["type"] == "string"
assert "type" in children[1]
def test_anyof_enum_with_null_collapsed(self):
"""anyOf [{enum: [...], type: string}, {type: null}] → enum + type only."""
class TestRefSiblingStripping:
"""Rule 3: ``$ref`` nodes may not carry sibling keywords on Moonshot.
Ported from anomalyco/opencode#24730. The real-world failure was MCP tools
whose generated schemas put a ``description`` on a ``$ref`` property so the
model would see the field's human-readable hint. The reference stays — the
referenced definition still owns the description (on the target node itself)
and still serves the model's context.
"""
def test_description_sibling_stripped_from_ref(self):
params = {
"type": "object",
"properties": {
"db_type": {
"variantOptions": {
"$ref": "#/$defs/VariantOptions",
"description": "Required. The variant options for generation.",
},
},
"$defs": {
"VariantOptions": {
"type": "object",
"properties": {},
"description": "Configuration options.",
},
},
}
out = sanitize_moonshot_tool_parameters(params)
# Sibling stripped.
assert out["properties"]["variantOptions"] == {"$ref": "#/$defs/VariantOptions"}
# The target definition's own description is preserved — we only strip
# siblings ON the $ref node, not on the thing it points at.
assert out["$defs"]["VariantOptions"]["description"] == "Configuration options."
def test_multiple_siblings_all_stripped(self):
params = {
"type": "object",
"properties": {
"p": {
"$ref": "#/$defs/T",
"type": "object",
"description": "x",
"default": {},
"title": "P",
},
},
"$defs": {"T": {"type": "object"}},
}
out = sanitize_moonshot_tool_parameters(params)
assert out["properties"]["p"] == {"$ref": "#/$defs/T"}
def test_ref_without_siblings_unchanged(self):
params = {
"type": "object",
"properties": {"p": {"$ref": "#/$defs/T"}},
"$defs": {"T": {"type": "object"}},
}
out = sanitize_moonshot_tool_parameters(params)
assert out["properties"]["p"] == {"$ref": "#/$defs/T"}
def test_ref_inside_anyof_children(self):
params = {
"type": "object",
"properties": {
"v": {
"anyOf": [
{"enum": ["mysql", "postgresql", ""]},
{"$ref": "#/$defs/A", "description": "variant A"},
{"type": "null"},
],
},
},
"$defs": {"A": {"type": "object"}},
}
out = sanitize_moonshot_tool_parameters(params)
db_type = out["properties"]["db_type"]
assert "anyOf" not in db_type
assert db_type["type"] == "string"
assert db_type["enum"] == ["mysql", "postgresql"] # "" stripped by enum cleanup
children = out["properties"]["v"]["anyOf"]
assert children[0] == {"$ref": "#/$defs/A"}
assert children[1] == {"type": "null"}
class TestTupleItems:
"""Rule 4: tuple-style ``items`` arrays collapse to a single schema.
Ported from anomalyco/opencode#24730. Moonshot's schema engine requires
``items`` to be ONE schema object applied to every array element; tuple-
style positional item schemas are rejected. We collapse to the first
element's schema (which is the "closest" interpretation of positional →
single) and drop the rest.
"""
def test_tuple_items_collapsed_to_first(self):
params = {
"type": "object",
"properties": {
"renderedSize": {
"type": "array",
"items": [{"type": "number"}, {"type": "number"}],
"minItems": 2,
"maxItems": 2,
},
},
}
out = sanitize_moonshot_tool_parameters(params)
assert out["properties"]["renderedSize"]["items"] == {"type": "number"}
# Sibling constraints are preserved — only the tuple shape is repaired.
assert out["properties"]["renderedSize"]["minItems"] == 2
def test_empty_tuple_items_becomes_empty_schema(self):
# Empty tuple collapses to ``{}``; the generic repair then fills a
# synthetic ``type`` because Moonshot requires ``type`` on every
# schema node. Either ``{}`` or ``{"type": "string"}`` is a valid
# final shape for Moonshot — both accept any string element — but we
# always go through ``_fill_missing_type`` so the result is fully
# well-formed without needing the consumer to patch it later.
params = {
"type": "object",
"properties": {
"things": {"type": "array", "items": []},
},
}
out = sanitize_moonshot_tool_parameters(params)
items = out["properties"]["things"]["items"]
# Must be a dict and must carry a ``type`` (the whole point of Rule 1).
assert isinstance(items, dict)
assert items.get("type")
def test_tuple_items_first_element_is_repaired(self):
# The first element itself has a missing type — it should be filled.
params = {
"type": "object",
"properties": {
"pair": {
"type": "array",
"items": [{"description": "first"}, {"description": "second"}],
},
},
}
out = sanitize_moonshot_tool_parameters(params)
# Repaired to a single schema with a synthetic type.
assert out["properties"]["pair"]["items"] == {
"description": "first",
"type": "string",
}
def test_single_schema_items_unchanged(self):
params = {
"type": "object",
"properties": {
"tags": {"type": "array", "items": {"type": "string"}},
},
}
out = sanitize_moonshot_tool_parameters(params)
assert out["properties"]["tags"]["items"] == {"type": "string"}
class TestTopLevelGuarantees:
@@ -253,7 +385,7 @@ class TestRealWorldMCPShape:
"""End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
def test_combined_rewrites(self):
# Shape: missing type on a property, anyOf with parent type + null, array
# Shape: missing type on a property, anyOf with parent type, array
# items without type — all in one tool.
params = {
"type": "object",
@@ -275,125 +407,7 @@ class TestRealWorldMCPShape:
}
out = sanitize_moonshot_tool_parameters(params)
assert out["properties"]["query"]["type"] == "string"
# anyOf with null collapsed to plain type
assert "anyOf" not in out["properties"]["filter"]
assert out["properties"]["filter"]["type"] == "string"
assert "type" not in out["properties"]["filter"]
assert out["properties"]["filter"]["anyOf"][0]["type"] == "string"
assert out["properties"]["tags"]["items"]["type"] == "string"
assert out["required"] == ["query"]
class TestEnumNullStripping:
"""Rule 3: Moonshot rejects null/empty-string inside enum arrays."""
def test_enum_null_value_stripped(self):
"""enum containing Python None must have it removed for Moonshot."""
params = {
"type": "object",
"properties": {
"db_type": {
"type": "string",
"enum": ["mysql", "postgresql", None],
},
},
}
out = sanitize_moonshot_tool_parameters(params)
db_type = out["properties"]["db_type"]
assert None not in db_type["enum"]
assert "mysql" in db_type["enum"]
assert "postgresql" in db_type["enum"]
def test_enum_empty_string_stripped(self):
"""enum containing empty string '' must have it removed for Moonshot."""
params = {
"type": "object",
"properties": {
"db_type": {
"type": "string",
"enum": ["mysql", "postgresql", ""],
},
},
}
out = sanitize_moonshot_tool_parameters(params)
db_type = out["properties"]["db_type"]
assert "" not in db_type["enum"]
assert db_type["enum"] == ["mysql", "postgresql"]
def test_enum_all_null_becomes_no_enum(self):
"""enum that only had null/empty values is dropped entirely."""
params = {
"type": "object",
"properties": {
"val": {
"type": "string",
"enum": [None, ""],
},
},
}
out = sanitize_moonshot_tool_parameters(params)
assert "enum" not in out["properties"]["val"]
def test_dataslayer_db_type_after_mcp_normalize(self):
"""Real-world: dataslayer db_type anyOf+enum after MCP normalization."""
# This is the exact shape after _normalize_mcp_input_schema runs:
# anyOf collapsed, but enum still has null + empty string
params = {
"type": "object",
"properties": {
"datasource": {"type": "string"},
"db_type": {
"enum": ["mysql", "mariadb", "postgresql", "sqlserver", "oracle", "", None],
"type": "string",
"nullable": True,
"default": None,
},
},
"required": ["datasource"],
}
out = sanitize_moonshot_tool_parameters(params)
db_type = out["properties"]["db_type"]
assert "nullable" not in db_type, "nullable keyword must be stripped"
assert None not in db_type["enum"]
assert "" not in db_type["enum"]
assert db_type["enum"] == ["mysql", "mariadb", "postgresql", "sqlserver", "oracle"]
assert db_type["type"] == "string"
def test_enum_on_object_type_not_stripped(self):
"""enum on non-scalar types (object) should NOT be touched."""
params = {
"type": "object",
"properties": {
"config": {
"type": "object",
"properties": {},
"enum": [{}, None],
},
},
}
out = sanitize_moonshot_tool_parameters(params)
# object-typed enum should pass through unchanged
assert "enum" in out["properties"]["config"]
def test_anyof_collapse_still_runs_nullable_and_enum_cleanup(self):
"""After anyOf collapses to a single non-null branch, the merged
node must still have ``nullable`` stripped and null/empty-string
values removed from enum not skipped by the early anyOf return.
"""
params = {
"type": "object",
"properties": {
"db_type": {
"anyOf": [
{"enum": ["mysql", "postgresql", "", None]},
{"type": "null"},
],
"nullable": True,
},
},
}
out = sanitize_moonshot_tool_parameters(params)
db_type = out["properties"]["db_type"]
assert "anyOf" not in db_type
assert "nullable" not in db_type, "nullable must be stripped after anyOf collapse"
assert db_type["type"] == "string"
assert db_type["enum"] == ["mysql", "postgresql"], \
"null/empty enum values must be stripped after anyOf collapse"
-58
View File
@@ -1,58 +0,0 @@
"""Tests for agent/skill_utils.py — extract_skill_conditions metadata handling."""
from agent.skill_utils import extract_skill_conditions
def test_metadata_as_dict_with_hermes():
"""Normal case: metadata is a dict containing hermes keys."""
frontmatter = {
"metadata": {
"hermes": {
"fallback_for_toolsets": ["toolset_a"],
"requires_toolsets": ["toolset_b"],
"fallback_for_tools": ["tool_x"],
"requires_tools": ["tool_y"],
}
}
}
result = extract_skill_conditions(frontmatter)
assert result["fallback_for_toolsets"] == ["toolset_a"]
assert result["requires_toolsets"] == ["toolset_b"]
assert result["fallback_for_tools"] == ["tool_x"]
assert result["requires_tools"] == ["tool_y"]
def test_metadata_as_string_does_not_crash():
"""Bug case: metadata is a non-dict truthy value (e.g. a YAML string)."""
frontmatter = {"metadata": "some text"}
result = extract_skill_conditions(frontmatter)
assert result == {
"fallback_for_toolsets": [],
"requires_toolsets": [],
"fallback_for_tools": [],
"requires_tools": [],
}
def test_metadata_as_none():
"""metadata key is present but set to null/None."""
frontmatter = {"metadata": None}
result = extract_skill_conditions(frontmatter)
assert result == {
"fallback_for_toolsets": [],
"requires_toolsets": [],
"fallback_for_tools": [],
"requires_tools": [],
}
def test_metadata_missing_entirely():
"""metadata key is absent from frontmatter."""
frontmatter = {"name": "my-skill", "description": "Does stuff."}
result = extract_skill_conditions(frontmatter)
assert result == {
"fallback_for_toolsets": [],
"requires_toolsets": [],
"fallback_for_tools": [],
"requires_tools": [],
}
-238
View File
@@ -1,238 +0,0 @@
"""Pure tool-call guardrail primitive tests."""
import json
from agent.tool_guardrails import (
ToolCallGuardrailConfig,
ToolCallGuardrailController,
ToolCallSignature,
canonical_tool_args,
)
def test_tool_call_signature_hashes_canonical_nested_unicode_args_without_exposing_raw_args():
args_a = {
"z": [{"β": "", "a": 1}],
"a": {"y": 2, "x": "secret-token-value"},
}
args_b = {
"a": {"x": "secret-token-value", "y": 2},
"z": [{"a": 1, "β": ""}],
}
assert canonical_tool_args(args_a) == canonical_tool_args(args_b)
sig_a = ToolCallSignature.from_call("web_search", args_a)
sig_b = ToolCallSignature.from_call("web_search", args_b)
assert sig_a == sig_b
assert len(sig_a.args_hash) == 64
metadata = sig_a.to_metadata()
assert metadata == {"tool_name": "web_search", "args_hash": sig_a.args_hash}
assert "secret-token-value" not in json.dumps(metadata)
assert "" not in json.dumps(metadata)
def test_default_config_is_soft_warning_only_with_hard_stop_disabled():
cfg = ToolCallGuardrailConfig()
assert cfg.warnings_enabled is True
assert cfg.hard_stop_enabled is False
assert cfg.exact_failure_warn_after == 2
assert cfg.same_tool_failure_warn_after == 3
assert cfg.no_progress_warn_after == 2
assert cfg.exact_failure_block_after == 5
assert cfg.same_tool_failure_halt_after == 8
assert cfg.no_progress_block_after == 5
def test_config_parses_nested_warn_and_hard_stop_thresholds():
cfg = ToolCallGuardrailConfig.from_mapping(
{
"warnings_enabled": False,
"hard_stop_enabled": True,
"warn_after": {
"exact_failure": 3,
"same_tool_failure": 4,
"idempotent_no_progress": 5,
},
"hard_stop_after": {
"exact_failure": 6,
"same_tool_failure": 7,
"idempotent_no_progress": 8,
},
}
)
assert cfg.warnings_enabled is False
assert cfg.hard_stop_enabled is True
assert cfg.exact_failure_warn_after == 3
assert cfg.same_tool_failure_warn_after == 4
assert cfg.no_progress_warn_after == 5
assert cfg.exact_failure_block_after == 6
assert cfg.same_tool_failure_halt_after == 7
assert cfg.no_progress_block_after == 8
def test_default_repeated_identical_failed_call_warns_without_blocking():
controller = ToolCallGuardrailController()
args = {"query": "same"}
decisions = []
for _ in range(5):
assert controller.before_call("web_search", args).action == "allow"
decisions.append(
controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
)
assert decisions[0].action == "allow"
assert [d.action for d in decisions[1:]] == ["warn", "warn", "warn", "warn"]
assert {d.code for d in decisions[1:]} == {"repeated_exact_failure_warning"}
assert controller.before_call("web_search", args).action == "allow"
assert controller.halt_decision is None
def test_hard_stop_enabled_blocks_repeated_exact_failure_before_next_execution():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(
hard_stop_enabled=True,
exact_failure_warn_after=2,
exact_failure_block_after=2,
same_tool_failure_halt_after=99,
)
)
args = {"query": "same"}
assert controller.before_call("web_search", args).action == "allow"
first = controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
assert first.action == "allow"
assert controller.before_call("web_search", args).action == "allow"
second = controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
assert second.action == "warn"
assert second.code == "repeated_exact_failure_warning"
blocked = controller.before_call("web_search", args)
assert blocked.action == "block"
assert blocked.code == "repeated_exact_failure_block"
assert blocked.count == 2
def test_success_resets_exact_signature_failure_streak():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(hard_stop_enabled=True, exact_failure_block_after=2, same_tool_failure_halt_after=99)
)
args = {"query": "same"}
controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
controller.after_call("web_search", args, '{"ok":true}', failed=False)
assert controller.before_call("web_search", args).action == "allow"
controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
assert controller.before_call("web_search", args).action == "allow"
def test_same_tool_varying_args_warns_by_default_without_halting():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(same_tool_failure_warn_after=2, same_tool_failure_halt_after=3)
)
first = controller.after_call("terminal", {"command": "cmd-1"}, '{"exit_code":1}', failed=True)
second = controller.after_call("terminal", {"command": "cmd-2"}, '{"exit_code":1}', failed=True)
third = controller.after_call("terminal", {"command": "cmd-3"}, '{"exit_code":1}', failed=True)
fourth = controller.after_call("terminal", {"command": "cmd-4"}, '{"exit_code":1}', failed=True)
assert first.action == "allow"
assert [second.action, third.action, fourth.action] == ["warn", "warn", "warn"]
assert {second.code, third.code, fourth.code} == {"same_tool_failure_warning"}
assert controller.halt_decision is None
def test_hard_stop_enabled_halts_same_tool_varying_args_failure_streak():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(
hard_stop_enabled=True,
exact_failure_block_after=99,
same_tool_failure_warn_after=2,
same_tool_failure_halt_after=3,
)
)
first = controller.after_call("terminal", {"command": "cmd-1"}, '{"exit_code":1}', failed=True)
assert first.action == "allow"
second = controller.after_call("terminal", {"command": "cmd-2"}, '{"exit_code":1}', failed=True)
assert second.action == "warn"
assert second.code == "same_tool_failure_warning"
third = controller.after_call("terminal", {"command": "cmd-3"}, '{"exit_code":1}', failed=True)
assert third.action == "halt"
assert third.code == "same_tool_failure_halt"
assert third.count == 3
def test_idempotent_no_progress_repeated_result_warns_without_blocking_by_default():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(no_progress_warn_after=2, no_progress_block_after=2)
)
args = {"path": "/tmp/same.txt"}
result = "same file contents"
for _ in range(4):
assert controller.before_call("read_file", args).action == "allow"
decision = controller.after_call("read_file", args, result, failed=False)
assert decision.action == "warn"
assert decision.code == "idempotent_no_progress_warning"
assert controller.before_call("read_file", args).action == "allow"
assert controller.halt_decision is None
def test_hard_stop_enabled_blocks_idempotent_no_progress_future_repeat():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(
hard_stop_enabled=True,
no_progress_warn_after=2,
no_progress_block_after=2,
)
)
args = {"path": "/tmp/same.txt"}
result = "same file contents"
assert controller.before_call("read_file", args).action == "allow"
assert controller.after_call("read_file", args, result, failed=False).action == "allow"
assert controller.before_call("read_file", args).action == "allow"
warn = controller.after_call("read_file", args, result, failed=False)
assert warn.action == "warn"
assert warn.code == "idempotent_no_progress_warning"
blocked = controller.before_call("read_file", args)
assert blocked.action == "block"
assert blocked.code == "idempotent_no_progress_block"
def test_mutating_or_unknown_tools_are_not_blocked_for_repeated_identical_success_output_by_default():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(no_progress_warn_after=2, no_progress_block_after=2)
)
for _ in range(3):
assert controller.before_call("write_file", {"path": "/tmp/x", "content": "x"}).action == "allow"
assert controller.after_call("write_file", {"path": "/tmp/x", "content": "x"}, "ok", failed=False).action == "allow"
assert controller.before_call("custom_tool", {"x": 1}).action == "allow"
assert controller.after_call("custom_tool", {"x": 1}, "ok", failed=False).action == "allow"
def test_reset_for_turn_clears_bounded_guardrail_state():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(hard_stop_enabled=True, exact_failure_block_after=2, no_progress_block_after=2)
)
controller.after_call("web_search", {"query": "same"}, '{"error":"boom"}', failed=True)
controller.after_call("web_search", {"query": "same"}, '{"error":"boom"}', failed=True)
controller.after_call("read_file", {"path": "/tmp/x"}, "same", failed=False)
controller.after_call("read_file", {"path": "/tmp/x"}, "same", failed=False)
assert controller.before_call("web_search", {"query": "same"}).action == "block"
assert controller.before_call("read_file", {"path": "/tmp/x"}).action == "block"
controller.reset_for_turn()
assert controller.before_call("web_search", {"query": "same"}).action == "allow"
assert controller.before_call("read_file", {"path": "/tmp/x"}).action == "allow"
+8 -12
View File
@@ -21,21 +21,20 @@ def test_manual_compress_reports_noop_without_success_banner(capsys):
shell.agent = MagicMock()
shell.agent.compression_enabled = True
shell.agent._cached_system_prompt = ""
shell.agent.tools = None
shell.agent.session_id = shell.session_id # no-op compression: no split
shell.agent._compress_context.return_value = (list(history), "")
def _estimate(messages, **_kwargs):
def _estimate(messages):
assert messages == history
return 100
with patch("agent.model_metadata.estimate_request_tokens_rough", side_effect=_estimate):
with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
shell._manual_compress()
output = capsys.readouterr().out
assert "No changes from compression" in output
assert "✅ Compressed" not in output
assert "Approx request size: ~100 tokens (unchanged)" in output
assert "Rough transcript estimate: ~100 tokens (unchanged)" in output
def test_manual_compress_explains_when_token_estimate_rises(capsys):
@@ -50,23 +49,22 @@ def test_manual_compress_explains_when_token_estimate_rises(capsys):
shell.agent = MagicMock()
shell.agent.compression_enabled = True
shell.agent._cached_system_prompt = ""
shell.agent.tools = None
shell.agent.session_id = shell.session_id # no-op: no split
shell.agent._compress_context.return_value = (compressed, "")
def _estimate(messages, **_kwargs):
def _estimate(messages):
if messages == history:
return 100
if messages == compressed:
return 120
raise AssertionError(f"unexpected transcript: {messages!r}")
with patch("agent.model_metadata.estimate_request_tokens_rough", side_effect=_estimate):
with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
shell._manual_compress()
output = capsys.readouterr().out
assert "✅ Compressed: 4 → 3 messages" in output
assert "Approx request size: ~100 → ~120 tokens" in output
assert "Rough transcript estimate: ~100 → ~120 tokens" in output
assert "denser summaries" in output
@@ -91,7 +89,6 @@ def test_manual_compress_syncs_session_id_after_split():
shell.agent = MagicMock()
shell.agent.compression_enabled = True
shell.agent._cached_system_prompt = ""
shell.agent.tools = None
# Simulate _compress_context mutating agent.session_id as a side effect.
def _fake_compress(*args, **kwargs):
shell.agent.session_id = new_child_id
@@ -100,7 +97,7 @@ def test_manual_compress_syncs_session_id_after_split():
shell.agent.session_id = old_id # starts in sync
shell._pending_title = "stale title"
with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
shell._manual_compress()
# CLI session_id must now point at the continuation child, not the parent.
@@ -121,12 +118,11 @@ def test_manual_compress_no_sync_when_session_id_unchanged():
shell.agent = MagicMock()
shell.agent.compression_enabled = True
shell.agent._cached_system_prompt = ""
shell.agent.tools = None
shell.agent.session_id = shell.session_id
shell.agent._compress_context.return_value = (list(history), "")
shell._pending_title = "keep me"
with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
shell._manual_compress()
# No split → pending title untouched.
-289
View File
@@ -1,289 +0,0 @@
"""Tests for cron.jobs.rewrite_skill_refs — the curator integration that
keeps scheduled cron jobs pointing at the right skill names after a
consolidation / pruning pass.
Bug this fixes: when the curator consolidates skill X into umbrella Y,
any cron job whose ``skills`` list contains X would silently fail to
load X at run time (the scheduler logs a warning and skips it), so the
job runs without the instructions it was scheduled to follow.
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
# Ensure project root is importable
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
@pytest.fixture
def cron_env(tmp_path, monkeypatch):
"""Isolated cron environment with temp HERMES_HOME."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
(hermes_home / "cron").mkdir()
(hermes_home / "cron" / "output").mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
import cron.jobs as jobs_mod
monkeypatch.setattr(jobs_mod, "HERMES_DIR", hermes_home)
monkeypatch.setattr(jobs_mod, "CRON_DIR", hermes_home / "cron")
monkeypatch.setattr(jobs_mod, "JOBS_FILE", hermes_home / "cron" / "jobs.json")
monkeypatch.setattr(jobs_mod, "OUTPUT_DIR", hermes_home / "cron" / "output")
return hermes_home
class TestRewriteSkillRefsNoop:
"""No jobs, no rewrites, no map — every combination of empty inputs."""
def test_empty_map_and_no_jobs(self, cron_env):
from cron.jobs import rewrite_skill_refs
report = rewrite_skill_refs(consolidated={}, pruned=[])
assert report == {"rewrites": [], "jobs_updated": 0, "jobs_scanned": 0}
def test_jobs_exist_but_map_empty(self, cron_env):
from cron.jobs import create_job, rewrite_skill_refs
create_job(prompt="", schedule="every 1h", skills=["foo"])
report = rewrite_skill_refs(consolidated={}, pruned=[])
assert report["jobs_updated"] == 0
# Early return: we don't even scan when there's nothing to apply.
assert report["jobs_scanned"] == 0
def test_jobs_exist_but_no_match(self, cron_env):
from cron.jobs import create_job, get_job, rewrite_skill_refs
job = create_job(prompt="", schedule="every 1h", skills=["foo"])
report = rewrite_skill_refs(
consolidated={"unrelated": "umbrella"},
pruned=["other"],
)
assert report["jobs_updated"] == 0
assert report["jobs_scanned"] == 1
# Job untouched
loaded = get_job(job["id"])
assert loaded["skills"] == ["foo"]
class TestRewriteSkillRefsConsolidation:
"""Consolidated skills should be replaced with their umbrella target."""
def test_single_skill_replaced(self, cron_env):
from cron.jobs import create_job, get_job, rewrite_skill_refs
job = create_job(prompt="", schedule="every 1h", skills=["legacy-skill"])
report = rewrite_skill_refs(
consolidated={"legacy-skill": "umbrella-skill"},
pruned=[],
)
assert report["jobs_updated"] == 1
loaded = get_job(job["id"])
assert loaded["skills"] == ["umbrella-skill"]
# Legacy ``skill`` field realigned
assert loaded["skill"] == "umbrella-skill"
def test_multiple_skills_one_consolidated(self, cron_env):
from cron.jobs import create_job, get_job, rewrite_skill_refs
job = create_job(
prompt="",
schedule="every 1h",
skills=["keep-a", "legacy", "keep-b"],
)
rewrite_skill_refs(consolidated={"legacy": "umbrella"}, pruned=[])
loaded = get_job(job["id"])
# Ordering preserved, legacy replaced in-place
assert loaded["skills"] == ["keep-a", "umbrella", "keep-b"]
def test_umbrella_already_in_list_dedupes(self, cron_env):
from cron.jobs import create_job, get_job, rewrite_skill_refs
# Job already loads the umbrella AND the legacy sub-skill
job = create_job(
prompt="",
schedule="every 1h",
skills=["umbrella", "legacy"],
)
rewrite_skill_refs(consolidated={"legacy": "umbrella"}, pruned=[])
loaded = get_job(job["id"])
# No duplicate — the umbrella stays exactly once
assert loaded["skills"] == ["umbrella"]
def test_rewrite_report_records_mapping(self, cron_env):
from cron.jobs import create_job, rewrite_skill_refs
job = create_job(
prompt="",
schedule="every 1h",
skills=["a", "b"],
name="my-job",
)
report = rewrite_skill_refs(
consolidated={"a": "umbrella-a", "b": "umbrella-b"},
pruned=[],
)
assert len(report["rewrites"]) == 1
entry = report["rewrites"][0]
assert entry["job_id"] == job["id"]
assert entry["job_name"] == "my-job"
assert entry["before"] == ["a", "b"]
assert entry["after"] == ["umbrella-a", "umbrella-b"]
assert entry["mapped"] == {"a": "umbrella-a", "b": "umbrella-b"}
assert entry["dropped"] == []
class TestRewriteSkillRefsPruning:
"""Pruned skills should be dropped outright (no forwarding target)."""
def test_pruned_skill_dropped(self, cron_env):
from cron.jobs import create_job, get_job, rewrite_skill_refs
job = create_job(
prompt="",
schedule="every 1h",
skills=["keep", "stale"],
)
report = rewrite_skill_refs(consolidated={}, pruned=["stale"])
assert report["jobs_updated"] == 1
loaded = get_job(job["id"])
assert loaded["skills"] == ["keep"]
assert loaded["skill"] == "keep"
def test_all_skills_pruned_leaves_empty_list(self, cron_env):
from cron.jobs import create_job, get_job, rewrite_skill_refs
job = create_job(prompt="", schedule="every 1h", skills=["gone"])
rewrite_skill_refs(consolidated={}, pruned=["gone"])
loaded = get_job(job["id"])
assert loaded["skills"] == []
assert loaded["skill"] is None
def test_pruned_report_records_drops(self, cron_env):
from cron.jobs import create_job, rewrite_skill_refs
create_job(prompt="", schedule="every 1h", skills=["keep", "stale"])
report = rewrite_skill_refs(consolidated={}, pruned=["stale"])
entry = report["rewrites"][0]
assert entry["dropped"] == ["stale"]
assert entry["mapped"] == {}
class TestRewriteSkillRefsMixed:
"""Consolidation + pruning in the same pass."""
def test_mixed_consolidation_and_pruning(self, cron_env):
from cron.jobs import create_job, get_job, rewrite_skill_refs
job = create_job(
prompt="",
schedule="every 1h",
skills=["keep", "legacy", "stale"],
)
rewrite_skill_refs(
consolidated={"legacy": "umbrella"},
pruned=["stale"],
)
loaded = get_job(job["id"])
assert loaded["skills"] == ["keep", "umbrella"]
def test_skill_in_both_maps_wins_as_consolidated(self, cron_env):
"""Defensive: if a skill appears in both lists (shouldn't happen
in practice), prefer consolidation it has a forwarding target,
which is the more useful outcome."""
from cron.jobs import create_job, get_job, rewrite_skill_refs
job = create_job(prompt="", schedule="every 1h", skills=["ambiguous"])
rewrite_skill_refs(
consolidated={"ambiguous": "umbrella"},
pruned=["ambiguous"],
)
loaded = get_job(job["id"])
assert loaded["skills"] == ["umbrella"]
class TestRewriteSkillRefsMultipleJobs:
"""Multiple jobs, some affected, some not."""
def test_only_affected_jobs_reported(self, cron_env):
from cron.jobs import create_job, get_job, rewrite_skill_refs
j1 = create_job(prompt="", schedule="every 1h", skills=["legacy"])
j2 = create_job(prompt="", schedule="every 1h", skills=["untouched"])
j3 = create_job(prompt="", schedule="every 1h", skills=[])
report = rewrite_skill_refs(
consolidated={"legacy": "umbrella"},
pruned=[],
)
assert report["jobs_updated"] == 1
assert report["jobs_scanned"] == 3
assert len(report["rewrites"]) == 1
assert report["rewrites"][0]["job_id"] == j1["id"]
# Untouched jobs stay put
assert get_job(j2["id"])["skills"] == ["untouched"]
assert get_job(j3["id"])["skills"] == []
def test_legacy_skill_field_also_rewritten(self, cron_env):
"""Old jobs may have the legacy single-skill ``skill`` field
set instead of ``skills``. Both paths should be rewritten."""
from cron.jobs import create_job, get_job, rewrite_skill_refs
# Create via the legacy ``skill`` argument
job = create_job(
prompt="",
schedule="every 1h",
skill="legacy",
)
rewrite_skill_refs(consolidated={"legacy": "umbrella"}, pruned=[])
loaded = get_job(job["id"])
assert loaded["skills"] == ["umbrella"]
assert loaded["skill"] == "umbrella"
class TestRewriteSkillRefsPersistence:
"""Rewrites persist to disk and survive a reload."""
def test_changes_persist_across_reload(self, cron_env):
import json
from cron.jobs import create_job, rewrite_skill_refs, JOBS_FILE
create_job(prompt="", schedule="every 1h", skills=["legacy"])
rewrite_skill_refs(consolidated={"legacy": "umbrella"}, pruned=[])
# Read raw file contents
data = json.loads(JOBS_FILE.read_text())
assert data["jobs"][0]["skills"] == ["umbrella"]
assert data["jobs"][0]["skill"] == "umbrella"
def test_noop_does_not_rewrite_file(self, cron_env):
from cron.jobs import create_job, rewrite_skill_refs, JOBS_FILE
create_job(prompt="", schedule="every 1h", skills=["keep"])
mtime_before = JOBS_FILE.stat().st_mtime_ns
# Nothing in the map matches
report = rewrite_skill_refs(
consolidated={"unrelated": "umbrella"},
pruned=["other"],
)
assert report["jobs_updated"] == 0
# File untouched — no pointless disk write
assert JOBS_FILE.stat().st_mtime_ns == mtime_before
-65
View File
@@ -1,65 +0,0 @@
"""Shared fixtures for Feishu adapter tests (admission, group policy, dispatch)."""
from __future__ import annotations
import threading
from types import SimpleNamespace
from typing import Any, Optional
def make_sender(sender_type: str = "user", open_id: str = "ou_human",
user_id: Optional[str] = None, union_id: Optional[str] = None) -> Any:
return SimpleNamespace(
sender_type=sender_type,
sender_id=SimpleNamespace(open_id=open_id, user_id=user_id, union_id=union_id),
)
def make_message(message_id: str = "om_xxx", chat_type: str = "p2p",
chat_id: str = "oc_1", mentions: Optional[list] = None) -> Any:
return SimpleNamespace(
message_id=message_id,
chat_type=chat_type,
chat_id=chat_id,
mentions=mentions,
content="",
message_type="text",
)
def make_adapter_skeleton(
*,
bot_open_id: str = "ou_me",
bot_user_id: str = "",
allow_bots: str = "none",
require_mention: bool = True,
group_policy: str = "allowlist",
) -> Any:
from gateway.platforms.feishu import FeishuAdapter
adapter = object.__new__(FeishuAdapter)
adapter._bot_open_id = bot_open_id
adapter._bot_user_id = bot_user_id
adapter._bot_name = ""
adapter._app_id = ""
adapter._admins = set()
adapter._group_rules = {}
adapter._group_policy = group_policy
adapter._default_group_policy = group_policy
adapter._allowed_group_users = frozenset()
adapter._allow_bots = allow_bots
adapter._require_mention = require_mention
return adapter
def install_dedup_state(adapter: Any, seen: Optional[dict] = None) -> None:
adapter._seen_message_ids = dict(seen) if seen else {}
adapter._seen_message_order = list((seen or {}).keys())
adapter._dedup_cache_size = 100
adapter._dedup_lock = threading.Lock()
adapter._dedup_state_path = None
adapter._persist_seen_message_ids = lambda: None
def stub_mention(adapter: Any, mentions_self: bool) -> None:
adapter._mentions_self = lambda _message: mentions_self
-30
View File
@@ -332,36 +332,6 @@ def auth_adapter():
return _make_adapter(api_key="sk-secret")
# ---------------------------------------------------------------------------
# Adapter internals
# ---------------------------------------------------------------------------
class TestAgentExecution:
@pytest.mark.asyncio
async def test_run_agent_uses_session_id_as_task_id(self, adapter):
mock_agent = MagicMock()
mock_agent.run_conversation.return_value = {"final_response": "ok"}
mock_agent.session_prompt_tokens = 1
mock_agent.session_completion_tokens = 2
mock_agent.session_total_tokens = 3
with patch.object(adapter, "_create_agent", return_value=mock_agent):
result, usage = await adapter._run_agent(
user_message="hello",
conversation_history=[],
session_id="session-123",
)
assert result == {"final_response": "ok"}
assert usage == {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3}
mock_agent.run_conversation.assert_called_once_with(
user_message="hello",
conversation_history=[],
task_id="session-123",
)
# ---------------------------------------------------------------------------
# /health endpoint
# ---------------------------------------------------------------------------
+4 -1
View File
@@ -253,7 +253,10 @@ class TestRunStatus:
await asyncio.sleep(0.05)
mock_agent.run_conversation.assert_called_once()
assert mock_agent.run_conversation.call_args.kwargs["task_id"] == "space-session"
# task_id stays "default" so the Runs API shares one sandbox
# container with CLI/gateway; session_id is surfaced in status
# for external UIs to correlate runs with their own session IDs.
assert mock_agent.run_conversation.call_args.kwargs["task_id"] == "default"
assert status["session_id"] == "space-session"
@pytest.mark.asyncio
@@ -173,23 +173,6 @@ class TestBlockingGatewayApproval:
assert e1.event.is_set()
assert e2.event.is_set()
def test_clear_session_denies_and_signals_all_entries(self):
"""clear_session must wake blocked entries during boundary cleanup."""
from tools.approval import clear_session, _ApprovalEntry, _gateway_queues
session_key = "test-boundary-cleanup"
e1 = _ApprovalEntry({"command": "cmd1"})
e2 = _ApprovalEntry({"command": "cmd2"})
_gateway_queues[session_key] = [e1, e2]
clear_session(session_key)
assert e1.event.is_set()
assert e2.event.is_set()
assert e1.result == "deny"
assert e2.result == "deny"
assert session_key not in _gateway_queues
# ------------------------------------------------------------------
# /approve command
+10 -18
View File
@@ -64,13 +64,11 @@ async def test_compress_command_reports_noop_without_success_banner():
agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock()
agent_instance._cached_system_prompt = ""
agent_instance.tools = None
agent_instance.context_compressor.has_content_to_compress.return_value = True
agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (list(history), "")
def _estimate(messages, **_kwargs):
def _estimate(messages):
assert messages == history
return 100
@@ -78,13 +76,13 @@ async def test_compress_command_reports_noop_without_success_banner():
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=agent_instance),
patch("agent.model_metadata.estimate_request_tokens_rough", side_effect=_estimate),
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
):
result = await runner._handle_compress_command(_make_event())
assert "No changes from compression" in result
assert "Compressed:" not in result
assert "Approx request size: ~100 tokens (unchanged)" in result
assert "Rough transcript estimate: ~100 tokens (unchanged)" in result
agent_instance.shutdown_memory_provider.assert_called_once()
agent_instance.close.assert_called_once()
@@ -101,13 +99,11 @@ async def test_compress_command_explains_when_token_estimate_rises():
agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock()
agent_instance._cached_system_prompt = ""
agent_instance.tools = None
agent_instance.context_compressor.has_content_to_compress.return_value = True
agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (compressed, "")
def _estimate(messages, **_kwargs):
def _estimate(messages):
if messages == history:
return 100
if messages == compressed:
@@ -118,12 +114,12 @@ async def test_compress_command_explains_when_token_estimate_rises():
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=agent_instance),
patch("agent.model_metadata.estimate_request_tokens_rough", side_effect=_estimate),
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
):
result = await runner._handle_compress_command(_make_event())
assert "Compressed: 4 → 3 messages" in result
assert "Approx request size: ~100 → ~120 tokens" in result
assert "Rough transcript estimate: ~100 → ~120 tokens" in result
assert "denser summaries" in result
agent_instance.shutdown_memory_provider.assert_called_once()
agent_instance.close.assert_called_once()
@@ -147,8 +143,6 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock()
agent_instance._cached_system_prompt = ""
agent_instance.tools = None
agent_instance.context_compressor.has_content_to_compress.return_value = True
# Simulate summary-generation failure: fallback flag set, dropped count
# populated, error string captured.
@@ -160,7 +154,7 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (compressed, "")
def _estimate(messages, **_kwargs):
def _estimate(messages):
if messages == history:
return 100
if messages == compressed:
@@ -171,7 +165,7 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=agent_instance),
patch("agent.model_metadata.estimate_request_tokens_rough", side_effect=_estimate),
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
):
result = await runner._handle_compress_command(_make_event())
@@ -206,8 +200,6 @@ async def test_compress_command_surfaces_aux_model_failure_even_when_recovered()
agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock()
agent_instance._cached_system_prompt = ""
agent_instance.tools = None
agent_instance.context_compressor.has_content_to_compress.return_value = True
# Fallback placeholder was NOT used — recovery succeeded.
agent_instance.context_compressor._last_summary_fallback_used = False
@@ -223,7 +215,7 @@ async def test_compress_command_surfaces_aux_model_failure_even_when_recovered()
agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (compressed, "")
def _estimate(messages, **_kwargs):
def _estimate(messages):
if messages == history:
return 100
if messages == compressed:
@@ -234,7 +226,7 @@ async def test_compress_command_surfaces_aux_model_failure_even_when_recovered()
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=agent_instance),
patch("agent.model_metadata.estimate_request_tokens_rough", side_effect=_estimate),
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
):
result = await runner._handle_compress_command(_make_event())
-60
View File
@@ -9,7 +9,6 @@ from gateway.config import (
Platform,
PlatformConfig,
SessionResetPolicy,
StreamingConfig,
_apply_env_overrides,
load_gateway_config,
)
@@ -150,24 +149,6 @@ class TestSessionResetPolicy:
assert restored.notify is False
class TestStreamingConfig:
def test_from_dict_coerces_quoted_false_enabled(self):
restored = StreamingConfig.from_dict({"enabled": "false"})
assert restored.enabled is False
def test_from_dict_malformed_numeric_values_fall_back_to_defaults(self):
restored = StreamingConfig.from_dict(
{
"edit_interval": "oops",
"buffer_threshold": "oops",
"fresh_final_after_seconds": "oops",
}
)
assert restored.edit_interval == 1.0
assert restored.buffer_threshold == 40
assert restored.fresh_final_after_seconds == 60.0
class TestGatewayConfigRoundtrip:
def test_full_roundtrip(self):
config = GatewayConfig(
@@ -379,38 +360,6 @@ class TestLoadGatewayConfig:
"C01ABC": "Code review mode",
}
def test_bridges_feishu_allow_bots_from_config_yaml_to_env(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"feishu:\n allow_bots: mentions\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("FEISHU_ALLOW_BOTS", raising=False)
load_gateway_config()
assert os.environ.get("FEISHU_ALLOW_BOTS") == "mentions"
def test_feishu_allow_bots_env_takes_precedence_over_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"feishu:\n allow_bots: all\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("FEISHU_ALLOW_BOTS", "none")
load_gateway_config()
assert os.environ.get("FEISHU_ALLOW_BOTS") == "none"
def test_invalid_quick_commands_in_config_yaml_are_ignored(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
@@ -506,15 +455,6 @@ class TestHomeChannelEnvOverrides:
{"SLACK_HOME_CHANNEL": "C123", "SLACK_HOME_CHANNEL_NAME": "Ops"},
("C123", "Ops"),
),
(
Platform.WHATSAPP,
PlatformConfig(enabled=True),
{
"WHATSAPP_HOME_CHANNEL": "1234567890@lid",
"WHATSAPP_HOME_CHANNEL_NAME": "Owner DM",
},
("1234567890@lid", "Owner DM"),
),
(
Platform.SIGNAL,
PlatformConfig(
-336
View File
@@ -1,336 +0,0 @@
"""Tests for EphemeralReply — system-notice auto-delete in gateway adapters.
Slash-command handlers in ``gateway/run.py`` can return an
``EphemeralReply`` wrapper to request auto-deletion of the reply message
after a TTL. The base adapter unwraps the sentinel before sending and
schedules a detached delete task when the platform supports
``delete_message``.
Covered:
1. ``_unwrap_ephemeral`` returns text + ttl for EphemeralReply, and
passes plain strings through unchanged.
2. TTL is zeroed on platforms that don't override ``delete_message``
(silent degrade message stays in place).
3. TTL is honored on platforms that DO override ``delete_message``.
4. ``_schedule_ephemeral_delete`` invokes ``delete_message`` after the
configured delay with the correct chat_id / message_id.
5. ``_process_message_background`` sends the unwrapped text (not the
sentinel object) and schedules deletion when appropriate.
6. The two busy-session bypass paths also unwrap + schedule.
"""
import asyncio
from unittest.mock import AsyncMock, patch
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
BasePlatformAdapter,
EphemeralReply,
MessageEvent,
MessageType,
SendResult,
)
from gateway.session import SessionSource
class _NoDeleteAdapter(BasePlatformAdapter):
"""Adapter that does NOT override delete_message (silent degrade)."""
async def connect(self):
pass
async def disconnect(self):
pass
async def send(self, chat_id, content="", **kwargs):
return SendResult(success=True, message_id="m-1")
async def get_chat_info(self, chat_id):
return {}
class _DeleteCapableAdapter(BasePlatformAdapter):
"""Adapter that overrides delete_message (TTL honored)."""
def __init__(self, *a, **kw):
super().__init__(*a, **kw)
self.deleted: list[tuple[str, str]] = []
async def connect(self):
pass
async def disconnect(self):
pass
async def send(self, chat_id, content="", **kwargs):
return SendResult(success=True, message_id="m-2")
async def get_chat_info(self, chat_id):
return {}
async def delete_message(self, chat_id: str, message_id: str) -> bool:
self.deleted.append((chat_id, message_id))
return True
def _no_delete_adapter():
return _NoDeleteAdapter(
PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM
)
def _delete_adapter():
return _DeleteCapableAdapter(
PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM
)
def _make_event(text="/stop", chat_id="42"):
return MessageEvent(
text=text,
message_id="msg-1",
source=SessionSource(
platform=Platform.TELEGRAM,
chat_id=chat_id,
user_id="u-1",
),
message_type=MessageType.TEXT,
)
# ---------------------------------------------------------------------------
# _unwrap_ephemeral
# ---------------------------------------------------------------------------
def test_unwrap_plain_string_is_passthrough():
adapter = _delete_adapter()
text, ttl = adapter._unwrap_ephemeral("hello")
assert text == "hello"
assert ttl == 0
def test_unwrap_none_is_passthrough():
adapter = _delete_adapter()
text, ttl = adapter._unwrap_ephemeral(None)
assert text is None
assert ttl == 0
def test_unwrap_ephemeral_explicit_ttl_on_capable_adapter():
adapter = _delete_adapter()
text, ttl = adapter._unwrap_ephemeral(EphemeralReply("bye", ttl_seconds=60))
assert text == "bye"
assert ttl == 60
def test_unwrap_ephemeral_zeros_ttl_on_incapable_adapter():
"""Platforms without delete_message should silently degrade to normal send."""
adapter = _no_delete_adapter()
text, ttl = adapter._unwrap_ephemeral(EphemeralReply("bye", ttl_seconds=60))
assert text == "bye"
assert ttl == 0 # forced to 0 — message will stay in place
def test_unwrap_ephemeral_default_ttl_from_config():
adapter = _delete_adapter()
with patch.object(adapter, "_get_ephemeral_system_ttl_default", return_value=120):
text, ttl = adapter._unwrap_ephemeral(EphemeralReply("bye"))
assert text == "bye"
assert ttl == 120
def test_unwrap_ephemeral_default_ttl_zero_disables():
"""Config default of 0 (the shipped default) means the feature is off."""
adapter = _delete_adapter()
with patch.object(adapter, "_get_ephemeral_system_ttl_default", return_value=0):
text, ttl = adapter._unwrap_ephemeral(EphemeralReply("bye"))
assert text == "bye"
assert ttl == 0
def test_unwrap_ephemeral_handles_unreadable_config():
adapter = _delete_adapter()
with patch.object(
adapter,
"_get_ephemeral_system_ttl_default",
side_effect=RuntimeError("boom"),
):
text, ttl = adapter._unwrap_ephemeral(EphemeralReply("bye"))
# Fall back to 0 rather than crashing the handler pipeline.
assert text == "bye"
assert ttl == 0
# ---------------------------------------------------------------------------
# _schedule_ephemeral_delete
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_schedule_ephemeral_delete_calls_delete_after_ttl():
adapter = _delete_adapter()
# Use a very short TTL to keep the test fast — the implementation
# floors sleeps at 1s via ``max(1, int(ttl_seconds))``. Patch asyncio.sleep
# inside the module under test; the test body uses the real one for
# scheduler pumping.
import gateway.platforms.base as base_module
sleeps: list[float] = []
_real_sleep = base_module.asyncio.sleep
async def _fake_sleep(duration):
sleeps.append(duration)
# Yield control so the rest of the task body can run.
await _real_sleep(0)
with patch.object(base_module.asyncio, "sleep", _fake_sleep):
adapter._schedule_ephemeral_delete(
chat_id="42", message_id="m-2", ttl_seconds=5
)
# Let the spawned task run.
for _ in range(5):
await _real_sleep(0)
# Only the ttl sleep shows up — the test pump uses the real sleep.
assert 5 in sleeps
assert adapter.deleted == [("42", "m-2")]
@pytest.mark.asyncio
async def test_schedule_ephemeral_delete_swallows_errors():
adapter = _delete_adapter()
async def _boom(*a, **kw):
raise RuntimeError("permission denied")
adapter.delete_message = _boom # type: ignore[assignment]
with patch("gateway.platforms.base.asyncio.sleep", AsyncMock()):
adapter._schedule_ephemeral_delete(
chat_id="42", message_id="m-2", ttl_seconds=1
)
# No exception should propagate even though delete_message raised.
for _ in range(5):
await asyncio.sleep(0)
def test_schedule_ephemeral_delete_outside_event_loop_is_noop():
"""No running loop → no crash, silently drops the request."""
adapter = _delete_adapter()
# No pytest.mark.asyncio → no loop. Must not raise.
adapter._schedule_ephemeral_delete(
chat_id="42", message_id="m-2", ttl_seconds=1
)
assert adapter.deleted == []
# ---------------------------------------------------------------------------
# _process_message_background unwraps EphemeralReply before send
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_process_message_unwraps_ephemeral_before_send():
"""The adapter must send the wrapper's .text, never the wrapper object."""
adapter = _delete_adapter()
adapter._send_with_retry = AsyncMock(
return_value=SendResult(success=True, message_id="sent-1")
)
async def _handler(evt):
return EphemeralReply("⚡ Stopped.", ttl_seconds=5)
adapter.set_message_handler(_handler)
sleeps: list[float] = []
async def _fake_sleep(duration):
sleeps.append(duration)
event = _make_event()
session_key = "agent:main:telegram:private:42"
with patch("gateway.platforms.base.asyncio.sleep", _fake_sleep), patch.object(
adapter, "_keep_typing", new=AsyncMock()
):
await adapter._process_message_background(event, session_key)
# Pump until the detached delete task completes.
for _ in range(10):
await asyncio.sleep(0)
# Sent text is the unwrapped string, NOT repr(EphemeralReply(...))
adapter._send_with_retry.assert_called_once()
sent_text = adapter._send_with_retry.call_args.kwargs["content"]
assert sent_text == "⚡ Stopped."
# Auto-delete scheduled using the returned message_id
assert ("42", "sent-1") in adapter.deleted
@pytest.mark.asyncio
async def test_process_message_incapable_platform_does_not_schedule_delete():
adapter = _no_delete_adapter()
adapter._send_with_retry = AsyncMock(
return_value=SendResult(success=True, message_id="sent-1")
)
async def _handler(evt):
return EphemeralReply("⚡ Stopped.", ttl_seconds=5)
adapter.set_message_handler(_handler)
# Spy on delete_message to confirm it is NOT invoked.
delete_calls: list = []
async def _spy_delete(chat_id, message_id):
delete_calls.append((chat_id, message_id))
return False
adapter.delete_message = _spy_delete # type: ignore[assignment]
event = _make_event()
session_key = "agent:main:telegram:private:42"
with patch("gateway.platforms.base.asyncio.sleep", AsyncMock()), patch.object(
adapter, "_keep_typing", new=AsyncMock()
):
await adapter._process_message_background(event, session_key)
for _ in range(10):
await asyncio.sleep(0)
# Send happened with the unwrapped text...
adapter._send_with_retry.assert_called_once()
assert adapter._send_with_retry.call_args.kwargs["content"] == "⚡ Stopped."
# ...but delete was never scheduled because the capability check skipped
# the schedule call (TTL was zeroed in _unwrap_ephemeral).
# Note: the capability gate on _unwrap_ephemeral checks for
# ``type(adapter).delete_message is BasePlatformAdapter.delete_message``.
# Monkeypatching the instance does NOT change the class, so this test
# verifies the gate uses the class method to detect capability.
assert delete_calls == []
@pytest.mark.asyncio
async def test_process_message_plain_string_behaves_unchanged():
adapter = _delete_adapter()
adapter._send_with_retry = AsyncMock(
return_value=SendResult(success=True, message_id="sent-1")
)
async def _handler(evt):
return "plain reply"
adapter.set_message_handler(_handler)
event = _make_event()
session_key = "agent:main:telegram:private:42"
with patch("gateway.platforms.base.asyncio.sleep", AsyncMock()), patch.object(
adapter, "_keep_typing", new=AsyncMock()
):
await adapter._process_message_background(event, session_key)
for _ in range(5):
await asyncio.sleep(0)
adapter._send_with_retry.assert_called_once()
assert adapter._send_with_retry.call_args.kwargs["content"] == "plain reply"
assert adapter.deleted == [] # no auto-delete for plain replies
+115 -258
View File
@@ -8,7 +8,6 @@ import time
import unittest
from pathlib import Path
from types import SimpleNamespace
from typing import Dict
from unittest.mock import AsyncMock, Mock, patch
from gateway.platforms.base import ProcessingOutcome
@@ -558,16 +557,6 @@ class TestAdapterModule(unittest.TestCase):
self.assertEqual(fake_client._ping_interval, 4)
def _admits_group(adapter, message, sender_id, chat_id=""):
"""Group-path shim: run a message through ``_admit`` and return a bool."""
sender = SimpleNamespace(sender_type="user", sender_id=sender_id)
if not hasattr(message, "chat_type"):
message.chat_type = "group"
if chat_id:
message.chat_id = chat_id
return adapter._admit(sender, message) is None
class TestAdapterBehavior(unittest.TestCase):
@patch.dict(os.environ, {}, clear=True)
def test_build_event_handler_registers_reaction_and_card_processors(self):
@@ -700,67 +689,6 @@ class TestAdapterBehavior(unittest.TestCase):
adapter._on_reaction_event("im.message.reaction.created_v1", data)
run_threadsafe.assert_called_once()
def _build_reaction_adapter(self, *, msg_sender_id: str):
"""Build a FeishuAdapter wired up to return a single GET-message result."""
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter
adapter = FeishuAdapter(PlatformConfig())
adapter._app_id = "cli_self_app"
adapter._bot_open_id = "ou_self_bot"
adapter._bot_user_id = "u_self_bot"
msg = SimpleNamespace(
sender=SimpleNamespace(sender_type="app", id=msg_sender_id, id_type="app_id"),
chat_id="oc_chat",
chat_type="group",
)
response = SimpleNamespace(success=lambda: True, data=SimpleNamespace(items=[msg]))
adapter._client = SimpleNamespace(
im=SimpleNamespace(
v1=SimpleNamespace(message=SimpleNamespace(get=Mock(return_value=response)))
)
)
adapter._build_get_message_request = Mock(return_value=object())
adapter._handle_message_with_guards = AsyncMock()
adapter._resolve_sender_profile = AsyncMock(
return_value={"user_id": "u_human", "user_name": "Human", "user_id_alt": None}
)
adapter.get_chat_info = AsyncMock(return_value={"name": "Test Chat"})
return adapter
@patch.dict(os.environ, {}, clear=True)
def test_reaction_on_peer_bot_message_is_not_routed(self):
# GET im/v1/messages sender for bot messages carries id=app_id; a peer
# bot's message has a different app_id than ours, so it must be dropped.
adapter = self._build_reaction_adapter(msg_sender_id="cli_peer_app")
event = SimpleNamespace(
message_id="om_peer_msg",
user_id=SimpleNamespace(open_id="ou_human", user_id=None, union_id=None),
reaction_type=SimpleNamespace(emoji_type="THUMBSUP"),
)
data = SimpleNamespace(event=event)
asyncio.run(
adapter._handle_reaction_event("im.message.reaction.created_v1", data)
)
adapter._handle_message_with_guards.assert_not_awaited()
@patch.dict(os.environ, {}, clear=True)
def test_reaction_on_our_own_bot_message_is_routed(self):
adapter = self._build_reaction_adapter(msg_sender_id="cli_self_app")
event = SimpleNamespace(
message_id="om_self_msg",
user_id=SimpleNamespace(open_id="ou_human", user_id=None, union_id=None),
reaction_type=SimpleNamespace(emoji_type="THUMBSUP"),
)
data = SimpleNamespace(event=event)
asyncio.run(
adapter._handle_reaction_event("im.message.reaction.created_v1", data)
)
adapter._handle_message_with_guards.assert_awaited_once()
@patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
def test_group_message_requires_mentions_even_when_policy_open(self):
from gateway.config import PlatformConfig
@@ -769,10 +697,10 @@ class TestAdapterBehavior(unittest.TestCase):
adapter = FeishuAdapter(PlatformConfig())
message = SimpleNamespace(mentions=[])
sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
self.assertFalse(_admits_group(adapter, message, sender_id, ""))
self.assertFalse(adapter._should_accept_group_message(message, sender_id, ""))
message_with_mention = SimpleNamespace(mentions=[SimpleNamespace(key="@_user_1")])
self.assertFalse(_admits_group(adapter, message_with_mention, sender_id, ""))
self.assertFalse(adapter._should_accept_group_message(message_with_mention, sender_id, ""))
@patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
def test_group_message_with_other_user_mention_is_rejected_when_bot_identity_unknown(self):
@@ -786,10 +714,59 @@ class TestAdapterBehavior(unittest.TestCase):
id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
)
self.assertFalse(
_admits_group(adapter, SimpleNamespace(mentions=[other_mention]), sender_id, "")
self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[other_mention]), sender_id, ""))
@patch.dict(
os.environ,
{
"FEISHU_BOT_OPEN_ID": "ou_hermes",
"FEISHU_BOT_USER_ID": "u_hermes",
},
clear=True,
)
def test_other_bot_sender_is_not_treated_as_self_sent_message(self):
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter
adapter = FeishuAdapter(PlatformConfig())
event = SimpleNamespace(
sender=SimpleNamespace(
sender_type="bot",
sender_id=SimpleNamespace(open_id="ou_other_bot", user_id="u_other_bot"),
)
)
self.assertFalse(adapter._is_self_sent_bot_message(event))
@patch.dict(
os.environ,
{
"FEISHU_BOT_OPEN_ID": "ou_hermes",
"FEISHU_BOT_USER_ID": "u_hermes",
},
clear=True,
)
def test_self_bot_sender_is_treated_as_self_sent_message(self):
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter
adapter = FeishuAdapter(PlatformConfig())
by_open_id = SimpleNamespace(
sender=SimpleNamespace(
sender_type="bot",
sender_id=SimpleNamespace(open_id="ou_hermes", user_id="u_other"),
)
)
by_user_id = SimpleNamespace(
sender=SimpleNamespace(
sender_type="app",
sender_id=SimpleNamespace(open_id="ou_other", user_id="u_hermes"),
)
)
self.assertTrue(adapter._is_self_sent_bot_message(by_open_id))
self.assertTrue(adapter._is_self_sent_bot_message(by_user_id))
@patch.dict(
os.environ,
{
@@ -815,14 +792,14 @@ class TestAdapterBehavior(unittest.TestCase):
)
self.assertTrue(
_admits_group(adapter,
adapter._should_accept_group_message(
mentioned,
SimpleNamespace(open_id="ou_allowed", user_id=None),
"",
)
)
self.assertFalse(
_admits_group(adapter,
adapter._should_accept_group_message(
mentioned,
SimpleNamespace(open_id="ou_blocked", user_id=None),
"",
@@ -851,14 +828,14 @@ class TestAdapterBehavior(unittest.TestCase):
)
self.assertTrue(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_alice", user_id=None),
"oc_chat_a",
)
)
self.assertFalse(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_charlie", user_id=None),
"oc_chat_a",
@@ -887,14 +864,14 @@ class TestAdapterBehavior(unittest.TestCase):
)
self.assertTrue(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_alice", user_id=None),
"oc_chat_b",
)
)
self.assertFalse(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_blocked", user_id=None),
"oc_chat_b",
@@ -923,14 +900,14 @@ class TestAdapterBehavior(unittest.TestCase):
)
self.assertTrue(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_admin", user_id=None),
"oc_chat_c",
)
)
self.assertFalse(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_regular", user_id=None),
"oc_chat_c",
@@ -959,14 +936,14 @@ class TestAdapterBehavior(unittest.TestCase):
)
self.assertTrue(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_admin", user_id=None),
"oc_chat_d",
)
)
self.assertFalse(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_regular", user_id=None),
"oc_chat_d",
@@ -996,7 +973,7 @@ class TestAdapterBehavior(unittest.TestCase):
)
self.assertTrue(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_admin", user_id=None),
"oc_chat_e",
@@ -1020,7 +997,7 @@ class TestAdapterBehavior(unittest.TestCase):
)
self.assertTrue(
_admits_group(adapter,
adapter._should_accept_group_message(
message,
SimpleNamespace(open_id="ou_anyone", user_id=None),
"oc_chat_unknown",
@@ -1045,12 +1022,8 @@ class TestAdapterBehavior(unittest.TestCase):
id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
)
self.assertTrue(
_admits_group(adapter, SimpleNamespace(mentions=[bot_mention]), sender_id, "")
)
self.assertFalse(
_admits_group(adapter, SimpleNamespace(mentions=[other_mention]), sender_id, "")
)
self.assertTrue(adapter._should_accept_group_message(SimpleNamespace(mentions=[bot_mention]), sender_id, ""))
self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[other_mention]), sender_id, ""))
@patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
def test_group_message_matches_bot_name_when_only_name_available(self):
@@ -1075,12 +1048,8 @@ class TestAdapterBehavior(unittest.TestCase):
id=SimpleNamespace(open_id=None, user_id=None),
)
self.assertTrue(
_admits_group(adapter, SimpleNamespace(mentions=[name_only_mention]), sender_id, "")
)
self.assertFalse(
_admits_group(adapter, SimpleNamespace(mentions=[different_mention]), sender_id, "")
)
self.assertTrue(adapter._should_accept_group_message(SimpleNamespace(mentions=[name_only_mention]), sender_id, ""))
self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[different_mention]), sender_id, ""))
# Case 2: bot's open_id IS known — a same-name human with different
# open_id must NOT admit (IDs override names).
@@ -1097,17 +1066,8 @@ class TestAdapterBehavior(unittest.TestCase):
id=SimpleNamespace(open_id="ou_bot", user_id=None),
)
self.assertFalse(
_admits_group(
adapter2,
SimpleNamespace(mentions=[same_name_other_id_mention]),
sender_id,
"",
)
)
self.assertTrue(
_admits_group(adapter2, SimpleNamespace(mentions=[bot_mention]), sender_id, "")
)
self.assertFalse(adapter2._should_accept_group_message(SimpleNamespace(mentions=[same_name_other_id_mention]), sender_id, ""))
self.assertTrue(adapter2._should_accept_group_message(SimpleNamespace(mentions=[bot_mention]), sender_id, ""))
@patch.dict(os.environ, {}, clear=True)
def test_extract_post_message_as_text(self):
@@ -1451,7 +1411,6 @@ class TestAdapterBehavior(unittest.TestCase):
data=SimpleNamespace(event=SimpleNamespace(message=message)),
message=message,
sender_id=SimpleNamespace(open_id="ou_user", user_id=None, union_id=None),
is_bot=False,
chat_type="p2p",
message_id="om_command",
)
@@ -1563,14 +1522,13 @@ class TestAdapterBehavior(unittest.TestCase):
user_id="u_user",
union_id="on_union",
)
sender = SimpleNamespace(sender_type="user", sender_id=sender_id)
data = SimpleNamespace(event=SimpleNamespace(message=message, sender=sender))
data = SimpleNamespace(event=SimpleNamespace(message=message, sender=SimpleNamespace(sender_id=sender_id)))
asyncio.run(
adapter._process_inbound_message(
data=data,
message=message,
sender_id=sender.sender_id,
sender_id=sender_id,
chat_type="p2p",
message_id="om_text",
)
@@ -1803,14 +1761,13 @@ class TestAdapterBehavior(unittest.TestCase):
message_id="om_group_text",
)
sender_id = SimpleNamespace(open_id="ou_user", user_id=None, union_id=None)
sender = SimpleNamespace(sender_type="user", sender_id=sender_id)
data = SimpleNamespace(event=SimpleNamespace(message=message))
asyncio.run(
adapter._process_inbound_message(
data=data,
message=message,
sender_id=sender.sender_id,
sender_id=sender_id,
chat_type="group",
message_id="om_group_text",
)
@@ -1848,7 +1805,6 @@ class TestAdapterBehavior(unittest.TestCase):
data=SimpleNamespace(event=SimpleNamespace(message=message)),
message=message,
sender_id=SimpleNamespace(open_id="ou_user", user_id=None, union_id=None),
is_bot=False,
chat_type="p2p",
message_id="om_reply",
)
@@ -2711,12 +2667,11 @@ class TestAdapterBehavior(unittest.TestCase):
@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
class TestHydrateBotIdentity(unittest.TestCase):
"""Hydration of bot identity via ``/open-apis/bot/v3/info``.
"""Hydration of bot identity via /open-apis/bot/v3/info and application info.
Covers the manual-setup path where ``FEISHU_BOT_OPEN_ID`` /
``FEISHU_BOT_NAME`` are not configured hydration populates them so
self-echo protection and group @mention gating both have something to
match against.
Covers the manual-setup path where FEISHU_BOT_OPEN_ID / FEISHU_BOT_USER_ID
are not configured. Hydration must populate _bot_open_id so that
_is_self_sent_bot_message() can filter the adapter's own outbound echoes.
"""
def _make_adapter(self):
@@ -2745,6 +2700,11 @@ class TestHydrateBotIdentity(unittest.TestCase):
self.assertEqual(adapter._bot_open_id, "ou_hermes_hydrated")
self.assertEqual(adapter._bot_name, "Hermes Bot")
# Application-info fallback must NOT run when bot_name is already set.
self.assertFalse(
adapter._client.application.v6.application.get.called
if hasattr(adapter._client, "application") else False
)
@patch.dict(
os.environ,
@@ -2761,6 +2721,7 @@ class TestHydrateBotIdentity(unittest.TestCase):
asyncio.run(adapter._hydrate_bot_identity())
# Neither probe should run — both fields are already populated.
adapter._client.request.assert_not_called()
self.assertEqual(adapter._bot_open_id, "ou_env")
self.assertEqual(adapter._bot_name, "Env Hermes")
@@ -2805,6 +2766,33 @@ class TestHydrateBotIdentity(unittest.TestCase):
self.assertEqual(adapter._bot_open_id, "")
self.assertEqual(adapter._bot_name, "Fallback Bot")
@patch.dict(os.environ, {}, clear=True)
def test_hydrated_open_id_enables_self_send_filter(self):
"""E2E: after hydration, _is_self_sent_bot_message() rejects adapter's own id."""
adapter = self._make_adapter()
adapter._client = Mock()
payload = json.dumps(
{"code": 0, "bot": {"bot_name": "Hermes", "open_id": "ou_hermes"}}
).encode("utf-8")
adapter._client.request = Mock(return_value=SimpleNamespace(raw=SimpleNamespace(content=payload)))
asyncio.run(adapter._hydrate_bot_identity())
self_event = SimpleNamespace(
sender=SimpleNamespace(
sender_type="bot",
sender_id=SimpleNamespace(open_id="ou_hermes", user_id=""),
)
)
peer_event = SimpleNamespace(
sender=SimpleNamespace(
sender_type="bot",
sender_id=SimpleNamespace(open_id="ou_peer_bot", user_id=""),
)
)
self.assertTrue(adapter._is_self_sent_bot_message(self_event))
self.assertFalse(adapter._is_self_sent_bot_message(peer_event))
@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
class TestPendingInboundQueue(unittest.TestCase):
@@ -3149,7 +3137,7 @@ class TestGroupMentionAtAll(unittest.TestCase):
mentions=[],
)
sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
self.assertTrue(_admits_group(adapter, message, sender_id, ""))
self.assertTrue(adapter._should_accept_group_message(message, sender_id, ""))
@patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "allowlist", "FEISHU_ALLOWED_USERS": "ou_allowed"}, clear=True)
def test_at_all_still_requires_policy_gate(self):
@@ -3161,15 +3149,15 @@ class TestGroupMentionAtAll(unittest.TestCase):
message = SimpleNamespace(content='{"text":"@_all attention"}', mentions=[])
# Non-allowlisted user — should be blocked even with @_all.
blocked_sender = SimpleNamespace(open_id="ou_blocked", user_id=None)
self.assertFalse(_admits_group(adapter, message, blocked_sender, ""))
self.assertFalse(adapter._should_accept_group_message(message, blocked_sender, ""))
# Allowlisted user — should pass.
allowed_sender = SimpleNamespace(open_id="ou_allowed", user_id=None)
self.assertTrue(_admits_group(adapter, message, allowed_sender, ""))
self.assertTrue(adapter._should_accept_group_message(message, allowed_sender, ""))
@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
class TestSenderNameResolution(unittest.TestCase):
"""Tests for _resolve_sender_name_from_api (contact API + cache)."""
"""Tests for _resolve_sender_name_from_api."""
@patch.dict(os.environ, {}, clear=True)
def test_returns_none_when_client_is_none(self):
@@ -3273,137 +3261,6 @@ class TestSenderNameResolution(unittest.TestCase):
self.assertIsNone(result)
@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
class TestBotNameResolution(unittest.TestCase):
"""Tests for the bot branch of _resolve_sender_name_from_api (basic_batch API + shared cache)."""
@staticmethod
def _batch_payload(bots: Dict[str, str]):
import json as _json
body = {
oid: {"bot_id": oid, "name": name, "i18n_names": {"en_us": name}}
for oid, name in bots.items()
}
return _json.dumps({"code": 0, "msg": "", "data": {"bots": body, "failed_bots": {}}}).encode()
def _build_adapter_with_bots(self, bots: Dict[str, str]):
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter
adapter = FeishuAdapter(PlatformConfig())
calls = []
def _fake_request(request):
calls.append(request)
return SimpleNamespace(raw=SimpleNamespace(content=self._batch_payload(bots)))
adapter._client = SimpleNamespace(request=_fake_request)
return adapter, calls
@patch.dict(os.environ, {}, clear=True)
def test_returns_cached_bot_name_without_api_call(self):
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter
adapter = FeishuAdapter(PlatformConfig())
adapter._sender_name_cache["ou_peer"] = ("Peer Bot", time.time() + 600)
adapter._client = SimpleNamespace(
request=lambda _r: (_ for _ in ()).throw(RuntimeError("should not fetch"))
)
result = asyncio.run(adapter._resolve_sender_name_from_api("ou_peer", is_bot=True))
self.assertEqual(result, "Peer Bot")
@patch.dict(os.environ, {}, clear=True)
def test_fetches_and_caches_bot_name(self):
adapter, calls = self._build_adapter_with_bots({"ou_peer": "Peer Bot"})
async def _direct(func, *args, **kwargs):
return func(*args, **kwargs)
with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
result = asyncio.run(adapter._resolve_sender_name_from_api("ou_peer", is_bot=True))
self.assertEqual(result, "Peer Bot")
self.assertEqual(adapter._sender_name_cache["ou_peer"][0], "Peer Bot")
self.assertEqual(len(calls), 1)
self.assertIn("/open-apis/bot/v3/bots/basic_batch", calls[0].uri)
# Feishu expects repeated ?bot_ids= params, not comma-joined.
self.assertEqual(calls[0].queries, [("bot_ids", "ou_peer")])
@patch.dict(os.environ, {}, clear=True)
def test_api_failure_returns_none_and_does_not_poison_cache(self):
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter
adapter = FeishuAdapter(PlatformConfig())
def _broken_request(_req):
raise RuntimeError("API down")
adapter._client = SimpleNamespace(request=_broken_request)
async def _direct(func, *args, **kwargs):
return func(*args, **kwargs)
with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
result = asyncio.run(adapter._resolve_sender_name_from_api("ou_peer", is_bot=True))
self.assertIsNone(result)
self.assertNotIn("ou_peer", adapter._sender_name_cache)
@patch.dict(os.environ, {}, clear=True)
def test_bot_absent_from_response_is_not_cached(self):
"""Bot not in ``data.bots`` (e.g. landed in ``failed_bots``) → no
cache entry, next lookup re-fetches."""
adapter, _ = self._build_adapter_with_bots({"ou_other": "Other Bot"})
async def _direct(func, *args, **kwargs):
return func(*args, **kwargs)
with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
result = asyncio.run(adapter._resolve_sender_name_from_api("ou_ghost", is_bot=True))
self.assertIsNone(result)
self.assertNotIn("ou_ghost", adapter._sender_name_cache)
@patch.dict(os.environ, {}, clear=True)
def test_empty_name_in_response_is_negative_cached(self):
"""API returns name="" → cache "" so repeat lookups short-circuit."""
adapter, calls = self._build_adapter_with_bots({"ou_nameless": ""})
async def _direct(func, *args, **kwargs):
return func(*args, **kwargs)
with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
first = asyncio.run(adapter._resolve_sender_name_from_api("ou_nameless", is_bot=True))
second = asyncio.run(adapter._resolve_sender_name_from_api("ou_nameless", is_bot=True))
self.assertIsNone(first)
self.assertIsNone(second)
self.assertEqual(adapter._sender_name_cache["ou_nameless"][0], "")
self.assertEqual(len(calls), 1)
@patch.dict(os.environ, {}, clear=True)
def test_non_zero_code_returns_none(self):
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter
adapter = FeishuAdapter(PlatformConfig())
error_payload = b'{"code":99991663,"msg":"permission denied"}'
adapter._client = SimpleNamespace(
request=lambda _r: SimpleNamespace(raw=SimpleNamespace(content=error_payload))
)
async def _direct(func, *args, **kwargs):
return func(*args, **kwargs)
with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
result = asyncio.run(adapter._resolve_sender_name_from_api("ou_peer", is_bot=True))
self.assertIsNone(result)
self.assertNotIn("ou_peer", adapter._sender_name_cache)
@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
class TestProcessingReactions(unittest.TestCase):
"""Typing on start → removed on SUCCESS, swapped for CrossMark on FAILURE,
-745
View File
@@ -1,745 +0,0 @@
"""Adapter-layer tests for Feishu bot-sender admission (``FeishuAdapter._admit``)."""
from __future__ import annotations
from types import SimpleNamespace
from typing import Any
import pytest
from tests.gateway.feishu_helpers import (
install_dedup_state,
make_adapter_skeleton,
make_message,
make_sender,
stub_mention,
)
# --- FeishuAdapterSettings wiring ------------------------------------------
@pytest.mark.parametrize(
"env_value, expected",
[
("none", "none"),
("mentions", "mentions"),
("all", "all"),
(" Mentions ", "mentions"),
],
)
def test_feishu_load_settings_populates_allow_bots(monkeypatch, env_value, expected):
from gateway.platforms.feishu import FeishuAdapter
monkeypatch.setenv("FEISHU_APP_ID", "cli_test")
monkeypatch.setenv("FEISHU_APP_SECRET", "secret_test")
monkeypatch.setenv("FEISHU_ALLOW_BOTS", env_value)
settings = FeishuAdapter._load_settings(extra={})
assert settings.allow_bots == expected
def test_feishu_load_settings_allow_bots_defaults_to_none(monkeypatch):
from gateway.platforms.feishu import FeishuAdapter
monkeypatch.setenv("FEISHU_APP_ID", "cli_test")
monkeypatch.setenv("FEISHU_APP_SECRET", "secret_test")
monkeypatch.delenv("FEISHU_ALLOW_BOTS", raising=False)
settings = FeishuAdapter._load_settings(extra={})
assert settings.allow_bots == "none"
def test_feishu_load_settings_ignores_extra_allow_bots(monkeypatch):
# extra is ignored — env is single source of truth (yaml is bridged to env).
from gateway.platforms.feishu import FeishuAdapter
monkeypatch.setenv("FEISHU_APP_ID", "cli_test")
monkeypatch.setenv("FEISHU_APP_SECRET", "secret_test")
monkeypatch.delenv("FEISHU_ALLOW_BOTS", raising=False)
settings = FeishuAdapter._load_settings(extra={"allow_bots": "all"})
assert settings.allow_bots == "none"
def test_feishu_load_settings_falls_back_to_env_when_extra_missing(monkeypatch):
from gateway.platforms.feishu import FeishuAdapter
monkeypatch.setenv("FEISHU_APP_ID", "cli_test")
monkeypatch.setenv("FEISHU_APP_SECRET", "secret_test")
monkeypatch.setenv("FEISHU_ALLOW_BOTS", "mentions")
settings = FeishuAdapter._load_settings(extra={})
assert settings.allow_bots == "mentions"
def test_feishu_load_settings_warns_on_unknown_allow_bots(monkeypatch, caplog):
import logging
from gateway.platforms.feishu import FeishuAdapter
monkeypatch.setenv("FEISHU_APP_ID", "cli_test")
monkeypatch.setenv("FEISHU_APP_SECRET", "secret_test")
monkeypatch.setenv("FEISHU_ALLOW_BOTS", "menton") # typo
with caplog.at_level(logging.WARNING, logger="gateway.platforms.feishu"):
settings = FeishuAdapter._load_settings(extra={})
assert settings.allow_bots == "none"
assert any("allow_bots" in r.message and "menton" in r.message for r in caplog.records)
@pytest.mark.parametrize(
"env_value, extra, expected",
[
(None, {}, True),
("false", {}, False),
("true", {}, True),
("true", {"require_mention": False}, False),
],
)
def test_feishu_load_settings_require_mention(monkeypatch, env_value, extra, expected):
from gateway.platforms.feishu import FeishuAdapter
monkeypatch.setenv("FEISHU_APP_ID", "cli_test")
monkeypatch.setenv("FEISHU_APP_SECRET", "secret_test")
if env_value is None:
monkeypatch.delenv("FEISHU_REQUIRE_MENTION", raising=False)
else:
monkeypatch.setenv("FEISHU_REQUIRE_MENTION", env_value)
settings = FeishuAdapter._load_settings(extra=extra)
assert settings.require_mention is expected
def test_feishu_load_settings_parses_per_group_require_mention(monkeypatch):
from gateway.platforms.feishu import FeishuAdapter
monkeypatch.setenv("FEISHU_APP_ID", "cli_test")
monkeypatch.setenv("FEISHU_APP_SECRET", "secret_test")
settings = FeishuAdapter._load_settings(extra={
"group_rules": {
"oc_free": {"policy": "open", "require_mention": False},
"oc_strict": {"policy": "open", "require_mention": True},
"oc_inherit": {"policy": "open"},
},
})
assert settings.group_rules["oc_free"].require_mention is False
assert settings.group_rules["oc_strict"].require_mention is True
assert settings.group_rules["oc_inherit"].require_mention is None
# --- Module-level helpers --------------------------------------------------
def test_sender_identity_collects_every_non_empty_id_variant():
from gateway.platforms.feishu import _sender_identity
sender = SimpleNamespace(
sender_id=SimpleNamespace(open_id="ou_x", user_id="", union_id="un_x"),
)
assert _sender_identity(sender) == frozenset({"ou_x", "un_x"})
def test_sender_identity_handles_missing_sender_id():
from gateway.platforms.feishu import _sender_identity
assert _sender_identity(SimpleNamespace()) == frozenset()
@pytest.mark.parametrize("sender_type", ["bot", "app"])
def test_is_bot_sender_treats_bot_and_app_as_bot_origin(sender_type):
from gateway.platforms.feishu import _is_bot_sender
assert _is_bot_sender(SimpleNamespace(sender_type=sender_type)) is True
@pytest.mark.parametrize("sender_type", ["user", "", None])
def test_is_bot_sender_rejects_non_bot_origin(sender_type):
from gateway.platforms.feishu import _is_bot_sender
assert _is_bot_sender(SimpleNamespace(sender_type=sender_type)) is False
# --- _admit pipeline matrix ------------------------------------------------
#
# Covers the four-step admission pipeline (self_echo → bot_policy →
# DM bypass → group_policy + mention) as a single result-only matrix.
# Each row pins one decision in the pipeline; tests asserting call-count
# semantics live below in their own functions.
def _admit_case(
*,
adapter: dict | None = None,
sender: dict | None = None,
message: dict | None = None,
mentions_self: bool | None = None,
expected: str | None = None,
):
return {
"adapter": adapter or {},
"sender": sender or {},
"message": message or {},
"mentions_self": mentions_self,
"expected": expected,
}
_ADMIT_CASES = [
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_me", "allow_bots": "all"},
sender={"sender_type": "bot", "open_id": "ou_me"},
expected="self_echo",
),
id="self_echo:open_id_under_all_mode",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "", "bot_user_id": "u_me", "allow_bots": "all"},
sender={"sender_type": "bot", "open_id": None, "user_id": "u_me"},
expected="self_echo",
),
id="self_echo:user_id_only",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_me", "allow_bots": "all"},
sender={"sender_type": "bot", "open_id": "ou_me", "user_id": "u_me", "union_id": "un_me"},
expected="self_echo",
),
id="self_echo:mixed_ids",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "bot_user_id": "u_self", "allow_bots": "all"},
sender={"sender_type": "bot", "open_id": None, "user_id": "u_self"},
expected="self_echo",
),
id="self_echo:user_id_when_bot_user_id_set",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "allow_bots": "none"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
expected="bots_disabled",
),
id="bots_disabled:mode_none",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "allow_bots": ""},
sender={"sender_type": "bot", "open_id": "ou_peer"},
expected="bots_disabled",
),
id="bots_disabled:mode_empty",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "allow_bots": "loose"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
expected="bots_disabled",
),
id="bots_disabled:mode_unknown_value",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "", "allow_bots": "none"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
expected="bots_disabled",
),
id="bots_disabled:wins_over_self_ids_unknown",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "", "allow_bots": "all"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
expected="self_ids_unknown",
),
id="self_ids_unknown:bot_sender_no_self_ids",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "", "allow_bots": "all"},
sender={"sender_type": "app", "open_id": "ou_peer"},
expected="self_ids_unknown",
),
id="self_ids_unknown:app_sender_no_self_ids",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "allow_bots": "all"},
sender={"sender_type": "app", "open_id": None},
expected="self_ids_unknown",
),
id="self_ids_unknown:no_sender_ids",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "allow_bots": "mentions"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
mentions_self=False,
expected="bot_not_mentioned",
),
id="mentions_mode:not_mentioned_dm",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "allow_bots": "mentions"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
mentions_self=True,
expected=None,
),
id="mentions_mode:mentioned_dm",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "allow_bots": "all"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
mentions_self=False,
expected=None,
),
id="all_mode:not_mentioned_dm",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "ou_self", "allow_bots": "all"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
mentions_self=True,
expected=None,
),
id="all_mode:mentioned_dm",
),
pytest.param(
_admit_case(
adapter={"bot_open_id": "", "allow_bots": "none"},
sender={"sender_type": "user", "open_id": "ou_human"},
expected=None,
),
id="human:dm_admitted_regardless_of_allow_bots",
),
pytest.param(
_admit_case(
adapter={"allow_bots": "all"},
sender={"sender_type": "user", "open_id": "ou_human"},
message={"message_id": "om_ok", "chat_type": "p2p"},
expected=None,
),
id="human:p2p_admitted",
),
pytest.param(
_admit_case(
adapter={
"bot_open_id": "ou_self",
"require_mention": False,
"group_policy": "open",
},
sender={"sender_type": "user", "open_id": "ou_human"},
message={"chat_type": "group"},
mentions_self=False,
expected=None,
),
id="require_mention_false:group_human_no_mention_admitted",
),
pytest.param(
_admit_case(
adapter={
"bot_open_id": "ou_self",
"allow_bots": "all",
"require_mention": False,
"group_policy": "open",
},
sender={"sender_type": "bot", "open_id": "ou_peer"},
message={"chat_type": "group"},
mentions_self=False,
expected=None,
),
id="require_mention_false:group_bot_all_mode_admitted",
),
pytest.param(
_admit_case(
adapter={
"bot_open_id": "ou_self",
"allow_bots": "mentions",
"require_mention": False,
"group_policy": "open",
},
sender={"sender_type": "bot", "open_id": "ou_peer"},
message={"chat_type": "group"},
mentions_self=False,
expected="bot_not_mentioned",
),
id="require_mention_false:group_bot_mentions_mode_still_gated",
),
]
@pytest.mark.parametrize("case", _ADMIT_CASES)
def test_admit_pipeline(case):
adapter = make_adapter_skeleton(**case["adapter"])
if case["mentions_self"] is not None:
stub_mention(adapter, case["mentions_self"])
sender = make_sender(**case["sender"])
message = make_message(**case["message"])
assert adapter._admit(sender, message) == case["expected"]
# --- Mention call-count semantics ------------------------------------------
def test_admit_skips_mention_check_under_all_mode():
# Tripwire: under allow_bots=all the mention path must not be probed.
adapter = make_adapter_skeleton(bot_open_id="ou_self", allow_bots="all")
calls = 0
def _tripwire(_message):
nonlocal calls
calls += 1
return False
adapter._mentions_self = _tripwire
sender = make_sender(sender_type="bot", open_id="ou_peer")
assert adapter._admit(sender, make_message()) is None
assert calls == 0
def test_admit_group_mention_checked_once_per_call():
# Stage 2 (mentions mode) and stage 4 (group require_mention) must not
# double-evaluate _mentions_self for the same admit call.
adapter = make_adapter_skeleton(
bot_open_id="ou_self", allow_bots="mentions", require_mention=True,
group_policy="open",
)
calls = 0
def _counting(_message):
nonlocal calls
calls += 1
return True
adapter._mentions_self = _counting
sender = make_sender(sender_type="bot", open_id="ou_peer")
assert adapter._admit(sender, make_message(chat_type="group")) is None
assert calls == 1
# --- Per-group require_mention override ------------------------------------
def test_admit_per_group_require_mention_overrides_global():
from gateway.platforms.feishu import FeishuGroupRule
adapter = make_adapter_skeleton(
bot_open_id="ou_self", require_mention=True, group_policy="open",
)
adapter._group_rules = {
"oc_free": FeishuGroupRule(policy="open", require_mention=False),
}
stub_mention(adapter, False)
sender = make_sender(sender_type="user", open_id="ou_human")
assert adapter._admit(sender, make_message(chat_id="oc_free", chat_type="group")) is None
assert (
adapter._admit(sender, make_message(chat_id="oc_other", chat_type="group"))
== "group_policy_rejected"
)
# --- Hydration -------------------------------------------------------------
def test_hydrate_bot_identity_populates_self_ids_from_bot_v3_info(monkeypatch):
import asyncio
from gateway.platforms.feishu import FeishuAdapter
adapter = object.__new__(FeishuAdapter)
adapter._bot_open_id = ""
adapter._bot_user_id = ""
adapter._bot_name = ""
adapter._allow_bots = "all"
captured = {}
def _fake_request(request):
captured["uri"] = getattr(request, "uri", None)
captured["http_method"] = getattr(request, "http_method", None)
return SimpleNamespace(raw=SimpleNamespace(
content=b'{"code":0,"bot":{"app_name":"Hermes","open_id":"ou_hydrated"}}'
))
adapter._client = SimpleNamespace(request=_fake_request)
asyncio.run(adapter._hydrate_bot_identity())
assert captured["uri"] == "/open-apis/bot/v3/info"
assert str(captured["http_method"]).endswith("GET")
assert adapter._bot_open_id == "ou_hydrated"
assert adapter._bot_name == "Hermes"
# /bot/v3/info doesn't surface user_id, so _bot_user_id stays empty.
assert adapter._bot_user_id == ""
def test_resolve_sender_profile_uses_open_id_for_bot_name_lookup():
import asyncio
from gateway.platforms.feishu import FeishuAdapter
adapter = object.__new__(FeishuAdapter)
adapter._client = object()
adapter._sender_name_cache = {}
seen_ids = []
async def _fake_fetch_bot_names(bot_ids):
seen_ids.extend(bot_ids)
return {"ou_peer": "Peer Bot"}
adapter._fetch_bot_names = _fake_fetch_bot_names
profile = asyncio.run(
adapter._resolve_sender_profile(
SimpleNamespace(open_id="ou_peer", user_id="u_peer", union_id="on_peer"),
is_bot=True,
)
)
assert seen_ids == ["ou_peer"]
assert profile["user_id"] == "u_peer"
assert profile["user_name"] == "Peer Bot"
# --- _allow_group_message matrix -------------------------------------------
#
# Bot-bypass semantics: admitted bots skip allowlist/blacklist (parallel
# human-scope filters), but channel-level locks (disabled, admin_only) and
# admin short-circuits still apply.
def _group_case(
*,
adapter: dict | None = None,
admins: set | None = None,
group_rules: dict | None = None,
sender: dict | None = None,
chat_id: str = "oc_1",
is_bot: bool = False,
expected: bool = False,
):
return {
"adapter": adapter or {},
"admins": admins or set(),
"group_rules": group_rules or {},
"sender": sender or {},
"chat_id": chat_id,
"is_bot": is_bot,
"expected": expected,
}
def _group_rule(policy: str, **kwargs):
from gateway.platforms.feishu import FeishuGroupRule
return FeishuGroupRule(policy=policy, **kwargs)
_GROUP_CASES = [
pytest.param(
_group_case(
sender={"sender_type": "bot", "open_id": "ou_peer"},
is_bot=True,
expected=True,
),
id="bot:bypasses_default_allowlist",
),
pytest.param(
_group_case(
sender={"sender_type": "user", "open_id": "ou_stranger"},
is_bot=False,
expected=False,
),
id="human:gated_by_default_allowlist",
),
pytest.param(
_group_case(
admins={"ou_peer"},
sender={"sender_type": "bot", "open_id": "ou_peer"},
is_bot=True,
expected=True,
),
id="bot:admin_short_circuit",
),
pytest.param(
_group_case(
admins={"u_admin"},
sender={"sender_type": "user", "open_id": None, "user_id": "u_admin"},
is_bot=False,
expected=True,
),
id="human:admin_via_user_id",
),
pytest.param(
_group_case(
sender={"sender_type": "bot", "open_id": "ou_peer"},
is_bot=True,
expected=True,
),
id="bot:allowlist_skipped",
),
pytest.param(
_group_case(
sender={"sender_type": "app", "open_id": "ou_peer"},
is_bot=True,
expected=True,
),
id="app:allowlist_skipped",
),
]
# Channel-lock cases need group_rules construction; keep them in a separate
# parametrize so we can use _group_rule() (FeishuGroupRule import).
_GROUP_RULE_CASES = [
pytest.param(
"disabled", "bot", False,
id="bot:disabled_policy_blocks_even_with_bypass",
),
pytest.param(
"disabled", "app", False,
id="app:disabled_policy_blocks_even_with_bypass",
),
pytest.param(
"admin_only", "bot", False,
id="bot:admin_only_policy_blocks_non_admin",
),
pytest.param(
"admin_only", "app", False,
id="app:admin_only_policy_blocks_non_admin",
),
]
@pytest.mark.parametrize("case", _GROUP_CASES)
def test_allow_group_message_matrix(case):
adapter = make_adapter_skeleton(**case["adapter"])
adapter._admins = case["admins"]
adapter._group_rules = case["group_rules"]
sender = make_sender(**case["sender"])
assert adapter._allow_group_message(
sender_id=sender.sender_id,
chat_id=case["chat_id"],
is_bot=case["is_bot"],
) is case["expected"]
@pytest.mark.parametrize("policy, sender_type, expected", _GROUP_RULE_CASES)
def test_allow_group_message_channel_locks_apply_to_bots(policy, sender_type, expected):
adapter = make_adapter_skeleton()
adapter._group_rules = {"oc_locked": _group_rule(policy)}
sender = make_sender(sender_type=sender_type, open_id="ou_peer")
assert adapter._allow_group_message(
sender_id=sender.sender_id,
chat_id="oc_locked",
is_bot=True,
) is expected
@pytest.mark.parametrize("sender_type", ["bot", "app"])
def test_allow_group_message_blacklist_is_human_scope_only(sender_type):
# blacklist is parallel to allowlist (human-scope); admitted bots bypass
# it. To block a specific bot, gate upstream via FEISHU_ALLOW_BOTS.
adapter = make_adapter_skeleton()
adapter._group_rules = {
"oc_1": _group_rule("blacklist", blacklist={"ou_peer"})
}
sender = make_sender(sender_type=sender_type, open_id="ou_peer")
assert adapter._allow_group_message(
sender_id=sender.sender_id,
chat_id="oc_1",
is_bot=True,
) is True
# --- Realistic payload smoke -----------------------------------------------
def test_admit_accepts_realistic_bot_at_bot_group_event():
# Locks in the real im.message.receive_v1 payload shape under mode=mentions.
adapter = make_adapter_skeleton(bot_open_id="ou_self", allow_bots="mentions")
mention = SimpleNamespace(
key="@_user_1",
id=SimpleNamespace(union_id="on_mentionUnion", user_id="", open_id="ou_self"),
name="Hermes",
mentioned_type="bot",
tenant_key="tenant_ab",
)
message = SimpleNamespace(
message_id="om_realistic_bot_at_bot",
chat_id="oc_real",
chat_type="group",
message_type="text",
content='{"text":"@_user_1 hello"}',
mentions=[mention],
)
sender = SimpleNamespace(
sender_type="bot",
sender_id=SimpleNamespace(union_id="on_peerUnion", user_id="u_peer", open_id="ou_peer_bot"),
tenant_key="tenant_ab",
)
assert adapter._admit(sender, message) is None
# --- Event-dispatch plumbing -----------------------------------------------
def test_handle_message_event_data_drops_bot_sender_by_default():
import asyncio
adapter = make_adapter_skeleton()
install_dedup_state(adapter)
processed = []
async def _fake_process_inbound_message(**kwargs):
processed.append(kwargs)
adapter._process_inbound_message = _fake_process_inbound_message
data = SimpleNamespace(
event=SimpleNamespace(
sender=make_sender(sender_type="bot", open_id="ou_peer"),
message=make_message(message_id="om_bot_default", chat_type="p2p"),
)
)
asyncio.run(adapter._handle_message_event_data(data))
assert processed == []
def test_handle_message_event_data_forwards_sender_when_admitted():
import asyncio
adapter = make_adapter_skeleton(allow_bots="all")
install_dedup_state(adapter)
captured = {}
async def _fake_process_inbound_message(**kwargs):
captured.update(kwargs)
adapter._process_inbound_message = _fake_process_inbound_message
sender = make_sender(sender_type="bot", open_id="ou_peer")
data = SimpleNamespace(
event=SimpleNamespace(
sender=sender,
message=make_message(message_id="om_bot_ok", chat_type="p2p"),
)
)
asyncio.run(adapter._handle_message_event_data(data))
assert captured.get("sender_id") is sender.sender_id
assert captured.get("is_bot") is True
assert captured.get("message_id") == "om_bot_ok"
@@ -1,113 +0,0 @@
"""Regression guard for Feishu bot-sender authorization bypass.
Mirrors tests/gateway/test_discord_bot_auth_bypass.py for Platform.FEISHU.
Without the bypass in gateway/run.py, Feishu bot senders admitted by the
adapter would be rejected at _is_user_authorized with "Unauthorized user"
same class of bug as Discord #4466.
"""
from __future__ import annotations
from types import SimpleNamespace
import pytest
from gateway.session import Platform, SessionSource
@pytest.fixture(autouse=True)
def _isolate_feishu_env(monkeypatch):
for var in (
"FEISHU_ALLOW_BOTS",
"FEISHU_ALLOWED_USERS",
"FEISHU_ALLOW_ALL_USERS",
"GATEWAY_ALLOW_ALL_USERS",
"GATEWAY_ALLOWED_USERS",
):
monkeypatch.delenv(var, raising=False)
def _make_bare_runner():
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.pairing_store = SimpleNamespace(is_approved=lambda *_a, **_kw: False)
return runner
def _make_feishu_bot_source(open_id: str = "ou_peer"):
return SessionSource(
platform=Platform.FEISHU,
chat_id="oc_1",
chat_type="group",
user_id=open_id,
user_name="PeerBot",
is_bot=True,
)
def _make_feishu_human_source(open_id: str = "ou_human"):
return SessionSource(
platform=Platform.FEISHU,
chat_id="oc_1",
chat_type="group",
user_id=open_id,
user_name="Human",
is_bot=False,
)
def test_feishu_bot_authorized_when_allow_bots_mentions(monkeypatch):
runner = _make_bare_runner()
monkeypatch.setenv("FEISHU_ALLOW_BOTS", "mentions")
monkeypatch.setenv("FEISHU_ALLOWED_USERS", "ou_human")
assert runner._is_user_authorized(_make_feishu_bot_source("ou_peer")) is True
def test_feishu_bot_authorized_when_allow_bots_all(monkeypatch):
runner = _make_bare_runner()
monkeypatch.setenv("FEISHU_ALLOW_BOTS", "all")
monkeypatch.setenv("FEISHU_ALLOWED_USERS", "ou_human")
assert runner._is_user_authorized(_make_feishu_bot_source()) is True
def test_feishu_bot_NOT_authorized_when_allow_bots_none(monkeypatch):
runner = _make_bare_runner()
monkeypatch.setenv("FEISHU_ALLOW_BOTS", "none")
monkeypatch.setenv("FEISHU_ALLOWED_USERS", "ou_human")
assert runner._is_user_authorized(_make_feishu_bot_source("ou_peer")) is False
def test_feishu_bot_NOT_authorized_when_allow_bots_unset(monkeypatch):
runner = _make_bare_runner()
monkeypatch.setenv("FEISHU_ALLOWED_USERS", "ou_human")
assert runner._is_user_authorized(_make_feishu_bot_source("ou_peer")) is False
def test_feishu_human_still_checked_against_allowlist_when_bot_policy_set(monkeypatch):
"""FEISHU_ALLOW_BOTS=all must NOT open the gate for humans."""
runner = _make_bare_runner()
monkeypatch.setenv("FEISHU_ALLOW_BOTS", "all")
monkeypatch.setenv("FEISHU_ALLOWED_USERS", "ou_human")
assert runner._is_user_authorized(_make_feishu_human_source("ou_stranger")) is False
assert runner._is_user_authorized(_make_feishu_human_source("ou_human")) is True
def test_feishu_bot_bypass_does_not_leak_to_other_platforms(monkeypatch):
"""FEISHU_ALLOW_BOTS=all must not authorize Telegram/Discord bot sources."""
runner = _make_bare_runner()
monkeypatch.setenv("FEISHU_ALLOW_BOTS", "all")
telegram_bot = SessionSource(
platform=Platform.TELEGRAM,
chat_id="123",
chat_type="channel",
user_id="999",
is_bot=True,
)
assert runner._is_user_authorized(telegram_bot) is False
@@ -1,201 +0,0 @@
"""Regression tests for topic/channel skill auto-injection after /new or /reset.
Covers the fix for issue #6508.
Before the fix:
1. User sends ``/new`` ``reset_session`` creates a fresh SessionEntry
with ``created_at == updated_at``.
2. User sends the next message.
3. ``get_or_create_session`` finds the entry and bumps
``entry.updated_at = now`` (microseconds after ``created_at``).
4. ``_handle_message_with_agent`` checks
``_is_new_session = (created_at == updated_at) or was_auto_reset``.
Both are False ``_is_new_session = False`` topic/channel skills
are silently skipped for the first message of a manually reset session.
After the fix:
``reset_session`` stamps the new entry with ``is_fresh_reset=True``.
``_handle_message_with_agent`` ORs this into ``_is_new_session`` and
consumes the flag immediately after the check, so subsequent messages
are treated as continuing the session and the flag does not leak.
We use ``was_auto_reset`` for surprise resets (idle/daily/suspended) and
``is_fresh_reset`` for user-initiated resets because the former also drives
a "Session automatically reset due to inactivity" user-facing notice and
a context-note prepend into the agent's prompt — both wrong for an explicit
/new or /reset.
"""
import pytest
from gateway.config import GatewayConfig, Platform
from gateway.session import SessionEntry, SessionSource, SessionStore
def _make_store(tmp_path):
return SessionStore(sessions_dir=tmp_path, config=GatewayConfig())
def _make_source(chat_id="123", user_id="u1"):
return SessionSource(
platform=Platform.TELEGRAM,
chat_id=chat_id,
user_id=user_id,
)
def _is_new_session(entry) -> bool:
"""Mirror of the predicate in ``_handle_message_with_agent``.
Kept in-sync with the production check so this test fails loudly if the
upstream logic regresses.
"""
return (
entry.created_at == entry.updated_at
or getattr(entry, "was_auto_reset", False)
or getattr(entry, "is_fresh_reset", False)
)
# ---------------------------------------------------------------------------
# reset_session stamps is_fresh_reset=True
# ---------------------------------------------------------------------------
class TestResetSessionStampsFreshReset:
def test_reset_session_sets_is_fresh_reset_true(self, tmp_path):
store = _make_store(tmp_path)
source = _make_source()
store.get_or_create_session(source)
session_key = store._generate_session_key(source)
new_entry = store.reset_session(session_key)
assert new_entry is not None
assert new_entry.is_fresh_reset is True
def test_reset_session_unknown_key_returns_none(self, tmp_path):
store = _make_store(tmp_path)
assert store.reset_session("unknown:key") is None
def test_fresh_session_does_not_have_is_fresh_reset(self, tmp_path):
"""A vanilla first-time session should not carry the flag."""
store = _make_store(tmp_path)
entry = store.get_or_create_session(_make_source())
assert entry.is_fresh_reset is False
# ---------------------------------------------------------------------------
# Core regression: _is_new_session stays True after updated_at bump
# ---------------------------------------------------------------------------
class TestIsNewSessionSurvivesUpdatedAtBump:
def test_is_new_session_true_after_reset_then_next_message(self, tmp_path):
"""The actual bug: _is_new_session was False on message after /reset."""
store = _make_store(tmp_path)
source = _make_source()
store.get_or_create_session(source)
session_key = store._generate_session_key(source)
# User sends /reset
store.reset_session(session_key)
# Next inbound message — get_or_create_session bumps updated_at
entry = store.get_or_create_session(source)
# Before the fix: created_at != updated_at, was_auto_reset=False → False
# After the fix: is_fresh_reset=True carries the signal through the bump
assert _is_new_session(entry) is True
def test_flag_consumed_after_first_read(self, tmp_path):
"""After the message handler consumes is_fresh_reset, the NEXT
message should not be treated as a new session (skill re-injection
must not fire a second time).
"""
store = _make_store(tmp_path)
source = _make_source()
store.get_or_create_session(source)
session_key = store._generate_session_key(source)
store.reset_session(session_key)
# First message — handler consumes the flag
entry = store.get_or_create_session(source)
assert _is_new_session(entry) is True
entry.is_fresh_reset = False # what _handle_message_with_agent does
# Second message — must not be treated as new
entry = store.get_or_create_session(source)
assert _is_new_session(entry) is False
# ---------------------------------------------------------------------------
# Vanilla-session behavior is unchanged
# ---------------------------------------------------------------------------
class TestVanillaBehaviorUnaffected:
def test_ongoing_session_not_flagged_as_new(self, tmp_path):
store = _make_store(tmp_path)
source = _make_source()
store.get_or_create_session(source)
# Second message on the same session — updated_at bumps,
# is_fresh_reset was never set
entry = store.get_or_create_session(source)
assert entry.is_fresh_reset is False
assert _is_new_session(entry) is False
def test_idle_auto_reset_does_not_set_is_fresh_reset(self, tmp_path):
"""Idle/daily auto-resets use was_auto_reset — confirm they do NOT
also set is_fresh_reset (which would double-fire the skill path and
not leak through the auto-reset guard).
"""
store = _make_store(tmp_path)
source = _make_source()
entry = store.get_or_create_session(source)
# Simulate the auto-reset code path: get_or_create_session's internal
# branch that sets was_auto_reset does NOT touch is_fresh_reset.
# Construct a fresh entry the same way that branch does.
store._entries.pop(store._generate_session_key(source))
fresh = SessionEntry(
session_key=entry.session_key,
session_id="new_id",
created_at=entry.created_at,
updated_at=entry.created_at,
origin=source,
was_auto_reset=True,
auto_reset_reason="idle",
)
assert fresh.is_fresh_reset is False
assert fresh.was_auto_reset is True
# ---------------------------------------------------------------------------
# Persistence through sessions.json round-trip
# ---------------------------------------------------------------------------
class TestPersistence:
def test_is_fresh_reset_survives_to_dict_from_dict(self, tmp_path):
"""Protect against the gateway restarting between /reset and the
next message the flag must be persisted in sessions.json.
"""
store = _make_store(tmp_path)
source = _make_source()
store.get_or_create_session(source)
session_key = store._generate_session_key(source)
new_entry = store.reset_session(session_key)
assert new_entry.is_fresh_reset is True
restored = SessionEntry.from_dict(new_entry.to_dict())
assert restored.is_fresh_reset is True
def test_default_false_when_missing_from_dict(self, tmp_path):
"""Older sessions.json files written before this field existed must
load cleanly with is_fresh_reset defaulting to False.
"""
data = {
"session_key": "telegram:1:123",
"session_id": "sess1",
"created_at": "2026-01-01T00:00:00",
"updated_at": "2026-01-01T00:00:00",
}
entry = SessionEntry.from_dict(data)
assert entry.is_fresh_reset is False
-36
View File
@@ -1,36 +0,0 @@
"""Regression tests for /sethome env-var resolution.
The `/sethome` command writes to a platform's home-target env var. Two platforms
don't follow the `{PLATFORM}_HOME_CHANNEL` convention: matrix uses
`MATRIX_HOME_ROOM` and email uses `EMAIL_HOME_ADDRESS`. Before PR #12698
`/sethome` hardcoded the `_HOME_CHANNEL` suffix, so Matrix and Email saves went
to env vars nothing read on startup the home channel appeared to set
successfully but was lost on every new gateway session.
"""
from gateway.run import _home_target_env_var
def test_matrix_home_target_env_var_uses_home_room():
assert _home_target_env_var("matrix") == "MATRIX_HOME_ROOM"
def test_email_home_target_env_var_uses_home_address():
assert _home_target_env_var("email") == "EMAIL_HOME_ADDRESS"
def test_telegram_home_target_env_var_uses_home_channel():
assert _home_target_env_var("telegram") == "TELEGRAM_HOME_CHANNEL"
def test_discord_home_target_env_var_uses_home_channel():
assert _home_target_env_var("discord") == "DISCORD_HOME_CHANNEL"
def test_unknown_platform_home_target_env_var_falls_back_to_home_channel():
assert _home_target_env_var("custom") == "CUSTOM_HOME_CHANNEL"
def test_case_insensitive_platform_name():
assert _home_target_env_var("MATRIX") == "MATRIX_HOME_ROOM"
assert _home_target_env_var("Email") == "EMAIL_HOME_ADDRESS"
@@ -1,79 +0,0 @@
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent, MessageType
from gateway.run import GatewayRunner
from gateway.session import SessionSource, build_session_key
def _make_runner() -> GatewayRunner:
runner = GatewayRunner.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake")},
)
runner.adapters = {}
runner._model = "openai/gpt-4.1-mini"
runner._base_url = None
runner._decide_image_input_mode = lambda: "native"
return runner
def _source(chat_id: str) -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
chat_id=chat_id,
chat_type="private",
user_name=f"user-{chat_id}",
)
def _image_event(source: SessionSource, path: str) -> MessageEvent:
return MessageEvent(
text="see image",
message_type=MessageType.PHOTO,
source=source,
media_urls=[path],
media_types=["image/png"],
)
@pytest.mark.asyncio
async def test_native_image_buffer_isolated_per_session():
runner = _make_runner()
source_a = _source("chat-a")
source_b = _source("chat-b")
await runner._prepare_inbound_message_text(
event=_image_event(source_a, "/tmp/a.png"),
source=source_a,
history=[],
)
await runner._prepare_inbound_message_text(
event=_image_event(source_b, "/tmp/b.png"),
source=source_b,
history=[],
)
assert runner._consume_pending_native_image_paths(build_session_key(source_a)) == ["/tmp/a.png"]
assert runner._consume_pending_native_image_paths(build_session_key(source_b)) == ["/tmp/b.png"]
@pytest.mark.asyncio
async def test_native_image_buffer_not_cleared_by_other_sessions_without_images():
runner = _make_runner()
source_a = _source("chat-a")
source_b = _source("chat-b")
await runner._prepare_inbound_message_text(
event=_image_event(source_a, "/tmp/a.png"),
source=source_a,
history=[],
)
await runner._prepare_inbound_message_text(
event=MessageEvent(text="plain text", source=source_b),
source=source_b,
history=[],
)
assert runner._consume_pending_native_image_paths(build_session_key(source_a)) == ["/tmp/a.png"]
assert runner._consume_pending_native_image_paths(build_session_key(source_b)) == []
-41
View File
@@ -407,44 +407,3 @@ class TestReasoningCommand:
assert result["final_response"] == "ok"
assert _CapturingAgent.last_init is not None
assert "homeassistant" in set(_CapturingAgent.last_init["enabled_toolsets"])
class TestLoadShowReasoningCoercion:
"""Regression: display.show_reasoning must be coerced, not bool()'d."""
def _load_with_config(self, tmp_path, monkeypatch, yaml_body: str) -> bool:
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text(yaml_body, encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
return gateway_run.GatewayRunner._load_show_reasoning()
def test_quoted_false_is_false(self, tmp_path, monkeypatch):
assert self._load_with_config(
tmp_path, monkeypatch,
'display:\n show_reasoning: "false"\n',
) is False
def test_quoted_off_is_false(self, tmp_path, monkeypatch):
assert self._load_with_config(
tmp_path, monkeypatch,
'display:\n show_reasoning: "off"\n',
) is False
def test_quoted_true_is_true(self, tmp_path, monkeypatch):
assert self._load_with_config(
tmp_path, monkeypatch,
'display:\n show_reasoning: "true"\n',
) is True
def test_bare_true_is_true(self, tmp_path, monkeypatch):
assert self._load_with_config(
tmp_path, monkeypatch,
'display:\n show_reasoning: true\n',
) is True
def test_missing_is_false(self, tmp_path, monkeypatch):
assert self._load_with_config(
tmp_path, monkeypatch,
'display: {}\n',
) is False
@@ -113,36 +113,6 @@ async def test_restart_command_preserves_thread_id(tmp_path, monkeypatch):
assert data["thread_id"] == "topic_7"
@pytest.mark.asyncio
async def test_restart_command_uses_atomic_json_writes_for_marker_files(tmp_path, monkeypatch):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
calls = []
def _fake_atomic_json_write(path, payload, **kwargs):
calls.append((Path(path).name, payload, kwargs))
monkeypatch.setattr(gateway_run, "atomic_json_write", _fake_atomic_json_write)
runner, _adapter = make_restart_runner()
runner.request_restart = MagicMock(return_value=True)
source = make_restart_source(chat_id="42")
event = MessageEvent(
text="/restart",
message_type=MessageType.TEXT,
source=source,
message_id="m1",
)
await runner._handle_restart_command(event)
names = [name for name, _payload, _kwargs in calls]
assert names == [".restart_notify.json", ".restart_last_processed.json"]
assert calls[0][1]["chat_id"] == "42"
assert calls[1][1]["platform"] == "telegram"
# ── _send_restart_notification ───────────────────────────────────────────
@@ -999,65 +999,3 @@ class TestStuckLoopEscalation:
assert store._entries[entry.session_key].resume_pending is False
assert not counts_file.exists()
def test_increment_restart_failure_counts_uses_atomic_json_write(
self, tmp_path, monkeypatch
):
from gateway.run import GatewayRunner
source = _make_source()
session_key = _make_store(tmp_path).get_or_create_session(source).session_key
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
calls = []
def _fake_atomic_json_write(path, payload, **kwargs):
calls.append((path, payload, kwargs))
monkeypatch.setattr("gateway.run.atomic_json_write", _fake_atomic_json_write)
runner = object.__new__(GatewayRunner)
runner._increment_restart_failure_counts({session_key})
assert calls == [
(
tmp_path / ".restart_failure_counts",
{session_key: 1},
{"indent": None},
)
]
def test_clear_restart_failure_count_uses_atomic_json_write_when_entries_remain(
self, tmp_path, monkeypatch
):
import json
from gateway.run import GatewayRunner
source = _make_source()
session_key = _make_store(tmp_path).get_or_create_session(source).session_key
other_key = "agent:main:telegram:dm:other"
counts_file = tmp_path / ".restart_failure_counts"
counts_file.write_text(
json.dumps({session_key: 2, other_key: 1}),
encoding="utf-8",
)
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
calls = []
def _fake_atomic_json_write(path, payload, **kwargs):
calls.append((path, payload, kwargs))
monkeypatch.setattr("gateway.run.atomic_json_write", _fake_atomic_json_write)
runner = object.__new__(GatewayRunner)
runner._clear_restart_failure_count(session_key)
assert calls == [
(
tmp_path / ".restart_failure_counts",
{other_key: 1},
{"indent": None},
)
]
+5 -16
View File
@@ -1243,7 +1243,7 @@ class TestRewriteTranscriptPreservesReasoning:
assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path, monkeypatch):
def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path):
from hermes_state import SessionDB
db = SessionDB(db_path=tmp_path / "test.db")
@@ -1258,27 +1258,16 @@ class TestRewriteTranscriptPreservesReasoning:
store._db = db
store._loaded = True
# Force the second insert inside replace_messages to fail, simulating
# any storage-layer error that might abort a multi-row rewrite.
real_encode = SessionDB._encode_content
calls = {"n": 0}
def flaky_encode(cls, content):
calls["n"] += 1
if calls["n"] == 2:
raise RuntimeError("simulated storage failure")
return real_encode.__func__(cls, content)
monkeypatch.setattr(SessionDB, "_encode_content", classmethod(flaky_encode))
replacement = [
{"role": "user", "content": "after user"},
{"role": "assistant", "content": "after assistant"},
{
"role": "assistant",
"content": {"not": "sqlite-bindable but JSONL-safe"},
},
]
store.rewrite_transcript(session_id, replacement)
# The rewrite must roll back atomically — original messages preserved.
after = db.get_messages_as_conversation(session_id)
assert [msg["content"] for msg in after] == [
"before user",
@@ -10,7 +10,6 @@ from gateway.platforms.base import MessageEvent
from gateway.session import SessionEntry, SessionSource, build_session_key
from tools import approval as approval_mod
from tools.approval import (
_ApprovalEntry,
approve_session,
enable_session_yolo,
is_approved,
@@ -173,38 +172,6 @@ async def test_branch_clears_session_scoped_approval_and_yolo_state():
assert other_key in runner._update_prompt_pending
@pytest.mark.asyncio
async def test_branch_preserves_persisted_assistant_metadata():
runner, _session_key = _make_branch_runner()
runner.session_store.load_transcript.return_value = [
{"role": "user", "content": "hello"},
{
"role": "assistant",
"content": "world",
"finish_reason": "stop",
"reasoning": "thinking",
"reasoning_content": "provider scratchpad",
"reasoning_details": [{"type": "summary", "text": "step"}],
"codex_reasoning_items": [{"id": "r1", "type": "reasoning"}],
"codex_message_items": [{"id": "m1", "type": "message"}],
},
]
result = await runner._handle_branch_command(_make_event("/branch"))
assert "Branched to" in result
append_calls = runner._session_db.append_message.call_args_list
assert len(append_calls) == 2
assistant_kwargs = append_calls[1].kwargs
assert assistant_kwargs["role"] == "assistant"
assert assistant_kwargs["finish_reason"] == "stop"
assert assistant_kwargs["reasoning"] == "thinking"
assert assistant_kwargs["reasoning_content"] == "provider scratchpad"
assert assistant_kwargs["reasoning_details"] == [{"type": "summary", "text": "step"}]
assert assistant_kwargs["codex_reasoning_items"] == [{"id": "r1", "type": "reasoning"}]
assert assistant_kwargs["codex_message_items"] == [{"id": "m1", "type": "message"}]
def test_clear_session_boundary_security_state_is_scoped():
"""The helper must wipe only the target session's approval/yolo state.
@@ -247,30 +214,3 @@ def test_clear_session_boundary_security_state_is_scoped():
runner._clear_session_boundary_security_state("")
assert is_approved(other_key, "recursive delete") is True
assert other_key in runner._update_prompt_pending
def test_clear_session_boundary_security_state_wakes_blocked_approvals():
"""Boundary cleanup must cancel blocked approval waiters immediately."""
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner._pending_approvals = {}
runner._update_prompt_pending = {}
source = _make_source()
session_key = build_session_key(source)
other_key = "agent:main:telegram:dm:other-chat"
target_entry = _ApprovalEntry({"command": "rm -rf /tmp/demo"})
other_entry = _ApprovalEntry({"command": "rm -rf /tmp/other"})
approval_mod._gateway_queues[session_key] = [target_entry]
approval_mod._gateway_queues[other_key] = [other_entry]
runner._clear_session_boundary_security_state(session_key)
assert target_entry.event.is_set()
assert target_entry.result == "deny"
assert other_entry.event.is_set() is False
assert other_entry.result is None
assert session_key not in approval_mod._gateway_queues
assert other_key in approval_mod._gateway_queues
-33
View File
@@ -226,39 +226,6 @@ def test_merge_pending_message_event_merges_text_and_photo_followups():
assert merged.media_types == ["image/png"]
def test_merge_pending_message_event_promotes_document_followups_over_text():
pending = {}
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="12345",
chat_type="dm",
user_id="u1",
)
session_key = build_session_key(source)
text_event = MessageEvent(
text="please review this",
message_type=MessageType.TEXT,
source=source,
)
document_event = MessageEvent(
text="",
message_type=MessageType.DOCUMENT,
source=source,
media_urls=["/tmp/report.pdf"],
media_types=["application/pdf"],
)
merge_pending_message_event(pending, session_key, text_event, merge_text=True)
merge_pending_message_event(pending, session_key, document_event, merge_text=True)
merged = pending[session_key]
assert merged.message_type == MessageType.DOCUMENT
assert merged.text == "please review this"
assert merged.media_urls == ["/tmp/report.pdf"]
assert merged.media_types == ["application/pdf"]
@pytest.mark.asyncio
async def test_recent_telegram_text_followup_is_queued_without_interrupt():
runner = _make_runner()
-145
View File
@@ -1649,148 +1649,3 @@ class TestSignalSendTimeout:
# 32 attachments × 5s = 160s; ought to comfortably outlast a
# serial upload of an attachment-heavy batch.
assert _signal_send_timeout(32) == 160.0
# ---------------------------------------------------------------------------
# Contentless Envelope Filtering (profile key updates, empty messages)
# ---------------------------------------------------------------------------
class TestSignalContentlessEnvelope:
"""Verify that profile key updates and empty Signal messages are skipped."""
@pytest.mark.asyncio
async def test_skips_profile_key_update_no_message_field(self, monkeypatch):
"""Profile key updates may carry a dataMessage without 'message' field.
Must be skipped to avoid triggering agent turns for metadata."""
adapter = _make_signal_adapter(monkeypatch)
captured = {}
async def fake_handle(event):
captured["event"] = event
adapter.handle_message = fake_handle
# Profile key update: dataMessage exists but has no "message" field
await adapter._handle_envelope({
"envelope": {
"sourceNumber": "+155****9999",
"sourceUuid": "05668cf3-8ffa-467e-9b24-f5eefa5cf475",
"sourceName": "Elliott McManis",
"timestamp": 1777600696077,
"dataMessage": {
# No "message" field — profile key update metadata only
"profileKey": "some-profile-key-data",
},
}
})
assert "event" not in captured, "Profile key update should be skipped"
@pytest.mark.asyncio
async def test_skips_empty_message(self, monkeypatch):
"""Empty text messages (message='') should be skipped."""
adapter = _make_signal_adapter(monkeypatch)
captured = {}
async def fake_handle(event):
captured["event"] = event
adapter.handle_message = fake_handle
await adapter._handle_envelope({
"envelope": {
"sourceNumber": "+155****9999",
"sourceUuid": "05668cf3-8ffa-467e-9b24-f5eefa5cf475",
"sourceName": "Elliott McManis",
"timestamp": 1777600696077,
"dataMessage": {
"message": "",
},
}
})
assert "event" not in captured, "Empty message should be skipped"
@pytest.mark.asyncio
async def test_skips_whitespace_only_message(self, monkeypatch):
"""Whitespace-only messages (' ') should be skipped."""
adapter = _make_signal_adapter(monkeypatch)
captured = {}
async def fake_handle(event):
captured["event"] = event
adapter.handle_message = fake_handle
await adapter._handle_envelope({
"envelope": {
"sourceNumber": "+155****9999",
"sourceUuid": "05668cf3-8ffa-467e-9b24-f5eefa5cf475",
"sourceName": "Elliott McManis",
"timestamp": 1777600696077,
"dataMessage": {
"message": " \n\t ",
},
}
})
assert "event" not in captured, "Whitespace-only message should be skipped"
@pytest.mark.asyncio
async def test_allows_message_with_attachment_no_text(self, monkeypatch):
"""Messages with attachments but no text should still be processed."""
adapter = _make_signal_adapter(monkeypatch)
captured = {}
async def fake_handle(event):
captured["event"] = event
adapter.handle_message = fake_handle
# Mock attachment fetch to return a cached image
png_data = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100
b64_data = base64.b64encode(png_data).decode()
adapter._rpc, _ = _stub_rpc({"data": b64_data})
with patch("gateway.platforms.signal.cache_image_from_bytes", return_value="/tmp/img.png"):
await adapter._handle_envelope({
"envelope": {
"sourceNumber": "+155****9999",
"sourceUuid": "05668cf3-8ffa-467e-9b24-f5eefa5cf475",
"sourceName": "Elliott McManis",
"timestamp": 1777600696077,
"dataMessage": {
"message": "", # No text
"attachments": [{"id": "att-123", "size": 200}],
},
}
})
assert "event" in captured, "Message with attachment should NOT be skipped"
assert captured["event"].media_urls == ["/tmp/img.png"]
@pytest.mark.asyncio
async def test_allows_normal_text_message(self, monkeypatch):
"""Normal text messages should still flow through."""
adapter = _make_signal_adapter(monkeypatch)
captured = {}
async def fake_handle(event):
captured["event"] = event
adapter.handle_message = fake_handle
await adapter._handle_envelope({
"envelope": {
"sourceNumber": "+155****9999",
"sourceUuid": "05668cf3-8ffa-467e-9b24-f5eefa5cf475",
"sourceName": "Elliott McManis",
"timestamp": 1777600696077,
"dataMessage": {
"message": "hello world",
},
}
})
assert "event" in captured, "Normal message should NOT be skipped"
assert captured["event"].text == "hello world"
-51
View File
@@ -2,7 +2,6 @@
import json
import os
from pathlib import Path
from types import SimpleNamespace
from gateway import status
@@ -246,27 +245,6 @@ class TestGatewayPidState:
class TestGatewayRuntimeStatus:
def test_write_json_file_uses_atomic_json_write(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
calls = []
def _fake_atomic_json_write(path, payload, **kwargs):
calls.append((Path(path), payload, kwargs))
monkeypatch.setattr(status, "atomic_json_write", _fake_atomic_json_write)
payload = {"gateway_state": "running"}
target = tmp_path / "gateway_state.json"
status._write_json_file(target, payload)
assert calls == [
(
target,
payload,
{"indent": None, "separators": (",", ":")},
)
]
def test_write_runtime_status_overwrites_stale_pid_on_restart(self, tmp_path, monkeypatch):
"""Regression: setdefault() preserved stale PID from previous process (#1631)."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -371,35 +349,6 @@ class TestTerminatePid:
class TestScopedLocks:
def test_windows_file_lock_uses_high_offset(self, tmp_path, monkeypatch):
lock_path = tmp_path / "gateway.lock"
handle = open(lock_path, "a+", encoding="utf-8")
fd = handle.fileno()
calls = []
def fake_locking(fd, mode, size):
calls.append((fd, mode, size, handle.tell()))
monkeypatch.setattr(status, "_IS_WINDOWS", True)
monkeypatch.setattr(
status,
"msvcrt",
SimpleNamespace(LK_NBLCK=1, LK_UNLCK=2, locking=fake_locking),
raising=False,
)
try:
assert status._try_acquire_file_lock(handle) is True
status._release_file_lock(handle)
finally:
handle.close()
assert calls == [
(fd, 1, 1, status._WINDOWS_LOCK_OFFSET),
(fd, 2, 1, status._WINDOWS_LOCK_OFFSET),
]
assert lock_path.read_text(encoding="utf-8") == "\n"
def test_acquire_scoped_lock_rejects_live_other_process(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
-126
View File
@@ -55,9 +55,6 @@ def _make_runner(session_entry: SessionEntry, *, platform: Platform = Platform.T
runner._pending_approvals = {}
runner._session_db = MagicMock()
runner._session_db.get_session_title.return_value = None
# Default: no DB row → /status reports 0 tokens. Tests that exercise
# the populated path override this.
runner._session_db.get_session.return_value = None
runner._reasoning_config = None
runner._provider_routing = {}
runner._fallback_model = None
@@ -83,14 +80,6 @@ async def test_status_command_reports_running_agent_without_interrupt(monkeypatc
total_tokens=321,
)
runner = _make_runner(session_entry)
# Token total comes from the SQLite SessionDB, not SessionEntry.
runner._session_db.get_session.return_value = {
"input_tokens": 200,
"output_tokens": 121,
"cache_read_tokens": 0,
"cache_write_tokens": 0,
"reasoning_tokens": 0,
}
running_agent = MagicMock()
runner._running_agents[build_session_key(_make_source())] = running_agent
@@ -124,56 +113,6 @@ async def test_status_command_includes_session_title_when_present():
assert "**Title:** My titled session" in result
@pytest.mark.asyncio
async def test_status_command_reads_token_totals_from_session_db():
"""Regression test for #17158: /status must source token totals from the
SQLite SessionDB (where run_agent.py persists them) and sum all component
counts, not from SessionEntry (which the agent never writes)."""
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
total_tokens=0, # SessionEntry never gets written to — always 0.
)
runner = _make_runner(session_entry)
runner._session_db.get_session.return_value = {
"input_tokens": 1000,
"output_tokens": 250,
"cache_read_tokens": 500,
"cache_write_tokens": 100,
"reasoning_tokens": 50,
}
result = await runner._handle_message(_make_event("/status"))
# 1000 + 250 + 500 + 100 + 50 = 1,900
assert "**Tokens:** 1,900" in result
@pytest.mark.asyncio
async def test_status_command_tokens_zero_when_session_db_row_missing():
"""When the SessionDB has no row for the current session yet (fresh
session, no agent calls), /status reports 0 without raising."""
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
total_tokens=999, # This should be ignored.
)
runner = _make_runner(session_entry)
runner._session_db.get_session.return_value = None
result = await runner._handle_message(_make_event("/status"))
assert "**Tokens:** 0" in result
@pytest.mark.asyncio
async def test_agents_command_reports_active_agents_and_processes(monkeypatch):
session_key = build_session_key(_make_source())
@@ -568,68 +507,3 @@ async def test_profile_command_reports_custom_root_profile(monkeypatch, tmp_path
assert "**Profile:** `coder`" in result
assert f"**Home:** `{profile_home}`" in result
@pytest.mark.asyncio
async def test_post_delivery_callback_generation_snapshot_happens_after_bind():
"""Regression: the callback_generation snapshot in _process_message_background
must happen AFTER the handler runs, not before.
_hermes_run_generation is set on the interrupt event by
GatewayRunner._bind_adapter_run_generation during _handle_message_with_agent.
The earlier snapshot-at-task-start always captured None, which bypassed the
generation-ownership check in pop_post_delivery_callback and let stale runs
fire a fresher run's callbacks.
"""
import asyncio
from gateway.platforms.base import BasePlatformAdapter
source = _make_source()
session_key = build_session_key(source)
fired = []
class _ConcreteAdapter(BasePlatformAdapter):
platform = Platform.TELEGRAM
async def connect(self): pass
async def disconnect(self): pass
async def send(self, chat_id, content, **kwargs): pass
async def get_chat_info(self, chat_id): return {}
adapter = _ConcreteAdapter(
PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM
)
async def fake_handler(event):
# Simulate what _bind_adapter_run_generation does mid-run.
interrupt_event = adapter._active_sessions.get(session_key)
setattr(interrupt_event, "_hermes_run_generation", 1)
# Stale run registers its callback at generation=1.
adapter.register_post_delivery_callback(
session_key,
lambda: fired.append("older"),
generation=1,
)
# A fresher run overwrites with generation=2 (different dict entry).
adapter.register_post_delivery_callback(
session_key,
lambda: fired.append("newer"),
generation=2,
)
return None
adapter.set_message_handler(fake_handler)
event = MessageEvent(text="hello", source=source, message_id="m1")
await adapter.handle_message(event)
tasks = list(adapter._background_tasks)
assert tasks, "expected background task to be created"
await asyncio.gather(*tasks)
# The stale run (generation=1) must NOT fire the fresher run's callback
# (generation=2). With the pre-fix code, callback_generation was snapshotted
# as None before the handler ran, bypassing the ownership check and firing
# "newer" anyway.
assert fired == []
assert session_key in adapter._post_delivery_callbacks
assert adapter._post_delivery_callbacks[session_key][0] == 2
@@ -59,21 +59,6 @@ def _make_adapter(extra=None):
return adapter
class _AuthRunner:
"""Minimal runner shim for callback auth tests."""
def __init__(self, authorized: bool):
self.authorized = authorized
self.last_source = None
async def _handle_message(self, event):
return None
def _is_user_authorized(self, source):
self.last_source = source
return self.authorized
# ===========================================================================
# send_exec_approval — inline keyboard buttons
# ===========================================================================
@@ -245,41 +230,6 @@ class TestTelegramApprovalCallback:
edit_kwargs = query.edit_message_text.call_args[1]
assert "Denied" in edit_kwargs["text"]
@pytest.mark.asyncio
async def test_approval_callback_rejects_user_blocked_by_global_allowlist(self):
adapter = _make_adapter()
adapter._approval_state[7] = "agent:main:telegram:group:12345:99"
runner = _AuthRunner(authorized=False)
adapter._message_handler = runner._handle_message
query = AsyncMock()
query.data = "ea:once:7"
query.message = MagicMock()
query.message.chat_id = 12345
query.message.chat.type = "private"
query.from_user = MagicMock()
query.from_user.id = 222
query.from_user.first_name = "Mallory"
query.answer = AsyncMock()
query.edit_message_text = AsyncMock()
update = MagicMock()
update.callback_query = query
context = MagicMock()
with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
await adapter._handle_callback_query(update, context)
mock_resolve.assert_not_called()
query.answer.assert_called_once()
assert "not authorized" in query.answer.call_args[1]["text"].lower()
query.edit_message_text.assert_not_called()
assert adapter._approval_state[7] == "agent:main:telegram:group:12345:99"
assert runner.last_source is not None
assert runner.last_source.platform == Platform.TELEGRAM
assert runner.last_source.user_id == "222"
assert runner.last_source.chat_id == "12345"
@pytest.mark.asyncio
async def test_already_resolved(self):
adapter = _make_adapter()
@@ -383,39 +333,6 @@ class TestTelegramApprovalCallback:
query.edit_message_text.assert_not_called()
assert not (tmp_path / ".update_response").exists()
@pytest.mark.asyncio
async def test_update_prompt_callback_rejects_user_blocked_by_global_allowlist(self, tmp_path):
adapter = _make_adapter()
runner = _AuthRunner(authorized=False)
adapter._message_handler = runner._handle_message
query = AsyncMock()
query.data = "update_prompt:y"
query.message = MagicMock()
query.message.chat_id = 12345
query.message.chat.type = "private"
query.from_user = MagicMock()
query.from_user.id = 222
query.from_user.first_name = "Mallory"
query.answer = AsyncMock()
query.edit_message_text = AsyncMock()
update = MagicMock()
update.callback_query = query
context = MagicMock()
with patch("hermes_constants.get_hermes_home", return_value=tmp_path):
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": ""}):
await adapter._handle_callback_query(update, context)
query.answer.assert_called_once()
assert "not authorized" in query.answer.call_args[1]["text"].lower()
query.edit_message_text.assert_not_called()
assert not (tmp_path / ".update_response").exists()
assert runner.last_source is not None
assert runner.last_source.platform == Platform.TELEGRAM
assert runner.last_source.user_id == "222"
@pytest.mark.asyncio
async def test_update_prompt_callback_allows_authorized_user(self, tmp_path):
"""Allowed Telegram users can still answer update prompt buttons."""
+1 -55
View File
@@ -17,14 +17,13 @@ from gateway.session import SessionSource
def _make_event(text="/update", platform=Platform.TELEGRAM,
user_id="12345", chat_id="67890", thread_id=None):
user_id="12345", chat_id="67890"):
"""Build a MessageEvent for testing."""
source = SessionSource(
platform=platform,
user_id=user_id,
chat_id=chat_id,
user_name="testuser",
thread_id=thread_id,
)
return MessageEvent(text=text, source=source)
@@ -215,34 +214,6 @@ class TestHandleUpdateCommand:
assert "timestamp" in data
assert not (hermes_home / ".update_exit_code").exists()
@pytest.mark.asyncio
async def test_writes_pending_marker_with_thread_id(self, tmp_path):
"""Persists thread_id so update notifications can route back to the thread."""
runner = _make_runner()
event = _make_event(
platform=Platform.TELEGRAM,
chat_id="99999",
thread_id="777",
)
fake_root = tmp_path / "project"
fake_root.mkdir()
(fake_root / ".git").mkdir()
(fake_root / "gateway").mkdir()
(fake_root / "gateway" / "run.py").touch()
fake_file = str(fake_root / "gateway" / "run.py")
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
with patch("gateway.run._hermes_home", hermes_home), \
patch("gateway.run.__file__", fake_file), \
patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/setsid"), \
patch("subprocess.Popen"):
await runner._handle_update_command(event)
data = json.loads((hermes_home / ".update_pending.json").read_text())
assert data["thread_id"] == "777"
@pytest.mark.asyncio
async def test_spawns_setsid(self, tmp_path):
"""Uses setsid when available."""
@@ -461,31 +432,6 @@ class TestSendUpdateNotification:
assert call_args[0][0] == "67890" # chat_id
assert "Update complete" in call_args[0][1] or "update finished" in call_args[0][1].lower()
@pytest.mark.asyncio
async def test_sends_notification_with_thread_metadata(self, tmp_path):
"""Final update notification preserves thread metadata when present."""
runner = _make_runner()
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
pending = {
"platform": "telegram",
"chat_id": "67890",
"thread_id": "777",
"user_id": "12345",
}
(hermes_home / ".update_pending.json").write_text(json.dumps(pending))
(hermes_home / ".update_output.txt").write_text("done")
(hermes_home / ".update_exit_code").write_text("0")
mock_adapter = AsyncMock()
runner.adapters = {Platform.TELEGRAM: mock_adapter}
with patch("gateway.run._hermes_home", hermes_home):
await runner._send_update_notification()
assert mock_adapter.send.call_args.kwargs["metadata"] == {"thread_id": "777"}
@pytest.mark.asyncio
async def test_strips_ansi_codes(self, tmp_path):
"""ANSI escape codes are removed from output."""
-52
View File
@@ -321,58 +321,6 @@ class TestWatchUpdateProgress:
# Check session was marked as having pending prompt
# (may be cleared by the time we check since update finished)
@pytest.mark.asyncio
async def test_prompt_forwarding_preserves_thread_metadata(self, tmp_path):
"""Forwarded update prompts keep the originating thread/topic metadata."""
runner = _make_runner()
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
pending = {
"platform": "telegram",
"chat_id": "111",
"thread_id": "777",
"user_id": "222",
"session_key": "agent:main:telegram:group:111:777",
}
(hermes_home / ".update_pending.json").write_text(json.dumps(pending))
(hermes_home / ".update_output.txt").write_text("")
(hermes_home / ".update_prompt.json").write_text(json.dumps({
"prompt": "Restore local changes? [Y/n]",
"default": "y",
"id": "threaded-prompt",
}))
class _PromptCapableAdapter:
def __init__(self):
self.send = AsyncMock()
self.prompt_calls = AsyncMock()
async def send_update_prompt(self, **kwargs):
return await self.prompt_calls(**kwargs)
mock_adapter = _PromptCapableAdapter()
runner.adapters = {Platform.TELEGRAM: mock_adapter}
async def finish_after_prompt():
await asyncio.sleep(0.3)
(hermes_home / ".update_response").write_text("y")
await asyncio.sleep(0.2)
(hermes_home / ".update_exit_code").write_text("0")
with patch("gateway.run._hermes_home", hermes_home):
task = asyncio.create_task(finish_after_prompt())
await runner._watch_update_progress(
poll_interval=0.1,
stream_interval=0.2,
timeout=5.0,
)
await task
assert mock_adapter.prompt_calls.call_args.kwargs["metadata"] == {
"thread_id": "777"
}
@pytest.mark.asyncio
async def test_cleans_up_on_completion(self, tmp_path):
"""All marker files are cleaned up when update finishes."""
-19
View File
@@ -85,25 +85,6 @@ class TestVerboseCommand:
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["display"]["platforms"]["telegram"]["tool_progress"] == "verbose"
@pytest.mark.asyncio
async def test_quoted_false_keeps_command_disabled(self, tmp_path, monkeypatch):
"""Quoted false must not enable the /verbose gateway command."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
'display:\n tool_progress_command: "false"\n tool_progress: all\n',
encoding="utf-8",
)
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
result = await runner._handle_verbose_command(_make_event())
assert "not enabled" in result.lower()
assert "tool_progress_command" in result
@pytest.mark.asyncio
async def test_cycles_through_all_modes(self, tmp_path, monkeypatch):
"""Calling /verbose repeatedly cycles through all four modes."""
-35
View File
@@ -5,10 +5,8 @@ from __future__ import annotations
import base64
import json
from datetime import datetime, timezone
from unittest.mock import patch
import pytest
import yaml
def _write_auth_store(tmp_path, payload: dict) -> None:
@@ -591,39 +589,6 @@ def test_logout_clears_stale_active_codex_without_provider_credentials(tmp_path,
assert "provider: auto" in config_text
def test_reset_config_provider_uses_atomic_yaml_write(tmp_path, monkeypatch):
"""Logout config reset should delegate the YAML write atomically."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config_path = hermes_home / "config.yaml"
original = {
"model": {
"default": "gpt-5.3-codex",
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
}
}
config_path.write_text(yaml.safe_dump(original, sort_keys=False), encoding="utf-8")
original_text = config_path.read_text(encoding="utf-8")
from hermes_cli.auth import _reset_config_provider
def _boom(path, data, **kwargs):
assert path == config_path
assert data["model"]["provider"] == "auto"
assert data["model"]["base_url"] == "https://openrouter.ai/api/v1"
assert kwargs["sort_keys"] is False
raise OSError("simulated atomic write failure")
with patch("hermes_cli.auth.atomic_yaml_write", side_effect=_boom) as mock_write:
with pytest.raises(OSError, match="simulated atomic write failure"):
_reset_config_provider()
assert mock_write.call_count == 1
assert config_path.read_text(encoding="utf-8") == original_text
def test_auth_list_does_not_call_mutating_select(monkeypatch, capsys):
from hermes_cli.auth_commands import auth_list_command
@@ -76,20 +76,6 @@ class TestResolveVerifyFallback:
)
assert result is False
def test_string_false_in_auth_state_does_not_disable_tls_verify(self):
import ssl
from hermes_cli.auth import _resolve_verify
result = _resolve_verify(auth_state={"tls": {"insecure": "false"}})
assert result is not False
assert result is True or isinstance(result, ssl.SSLContext)
def test_string_true_in_auth_state_disables_tls_verify(self):
from hermes_cli.auth import _resolve_verify
result = _resolve_verify(auth_state={"tls": {"insecure": "true"}})
assert result is False
def test_no_ca_bundle_returns_true(self, monkeypatch):
from hermes_cli.auth import _resolve_verify
-15
View File
@@ -405,21 +405,6 @@ class TestGatewayConfigGate:
joined = "\n".join(lines)
assert "`/verbose" in joined
def test_config_gate_quoted_false_stays_disabled_everywhere(self, tmp_path, monkeypatch):
"""Quoted false must not enable config-gated gateway commands."""
config_file = tmp_path / "config.yaml"
config_file.write_text('display:\n tool_progress_command: "false"\n')
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
lines = gateway_help_lines()
joined = "\n".join(lines)
names = {name for name, _ in telegram_bot_commands()}
mapping = slack_subcommand_map()
assert "`/verbose" not in joined
assert "verbose" not in names
assert "verbose" not in mapping
def test_config_gate_excluded_from_telegram_when_off(self, tmp_path, monkeypatch):
config_file = tmp_path / "config.yaml"
config_file.write_text("display:\n tool_progress_command: false\n")
-358
View File
@@ -1,358 +0,0 @@
"""Tests for hermes_cli/goals.py — persistent cross-turn goals."""
from __future__ import annotations
import json
from unittest.mock import patch, MagicMock
import pytest
# ──────────────────────────────────────────────────────────────────────
# Fixtures
# ──────────────────────────────────────────────────────────────────────
@pytest.fixture
def hermes_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME so SessionDB.state_meta writes don't clobber the real one."""
from pathlib import Path
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Bust the goal-module's DB cache for each test so it re-resolves HERMES_HOME.
from hermes_cli import goals
goals._DB_CACHE.clear()
yield home
goals._DB_CACHE.clear()
# ──────────────────────────────────────────────────────────────────────
# _parse_judge_response
# ──────────────────────────────────────────────────────────────────────
class TestParseJudgeResponse:
def test_clean_json_done(self):
from hermes_cli.goals import _parse_judge_response
done, reason = _parse_judge_response('{"done": true, "reason": "all good"}')
assert done is True
assert reason == "all good"
def test_clean_json_continue(self):
from hermes_cli.goals import _parse_judge_response
done, reason = _parse_judge_response('{"done": false, "reason": "more work needed"}')
assert done is False
assert reason == "more work needed"
def test_json_in_markdown_fence(self):
from hermes_cli.goals import _parse_judge_response
raw = '```json\n{"done": true, "reason": "done"}\n```'
done, reason = _parse_judge_response(raw)
assert done is True
assert "done" in reason
def test_json_embedded_in_prose(self):
"""Some models prefix reasoning before emitting JSON — we extract it."""
from hermes_cli.goals import _parse_judge_response
raw = 'Looking at this... the agent says X. Verdict: {"done": false, "reason": "partial"}'
done, reason = _parse_judge_response(raw)
assert done is False
assert reason == "partial"
def test_string_done_values(self):
from hermes_cli.goals import _parse_judge_response
for s in ("true", "yes", "done", "1"):
done, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
assert done is True
for s in ("false", "no", "not yet"):
done, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
assert done is False
def test_malformed_json_fails_open(self):
"""Non-JSON → not done, with error-ish reason (so judge_goal can map to continue)."""
from hermes_cli.goals import _parse_judge_response
done, reason = _parse_judge_response("this is not json at all")
assert done is False
assert reason # non-empty
def test_empty_response(self):
from hermes_cli.goals import _parse_judge_response
done, reason = _parse_judge_response("")
assert done is False
assert reason
# ──────────────────────────────────────────────────────────────────────
# judge_goal — fail-open semantics
# ──────────────────────────────────────────────────────────────────────
class TestJudgeGoal:
def test_empty_goal_skipped(self):
from hermes_cli.goals import judge_goal
verdict, _ = judge_goal("", "some response")
assert verdict == "skipped"
def test_empty_response_continues(self):
from hermes_cli.goals import judge_goal
verdict, _ = judge_goal("ship the thing", "")
assert verdict == "continue"
def test_no_aux_client_continues(self):
"""Fail-open: if no aux client, we must return continue, not skipped/done."""
from hermes_cli import goals
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(None, None),
):
verdict, _ = goals.judge_goal("my goal", "my response")
assert verdict == "continue"
def test_api_error_continues(self):
"""Judge exception → fail-open continue (don't wedge progress on judge bugs)."""
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.side_effect = RuntimeError("boom")
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason = goals.judge_goal("goal", "response")
assert verdict == "continue"
assert "judge error" in reason.lower()
def test_judge_says_done(self):
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[
MagicMock(
message=MagicMock(content='{"done": true, "reason": "achieved"}')
)
]
)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason = goals.judge_goal("goal", "agent response")
assert verdict == "done"
assert reason == "achieved"
def test_judge_says_continue(self):
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[
MagicMock(
message=MagicMock(content='{"done": false, "reason": "not yet"}')
)
]
)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason = goals.judge_goal("goal", "agent response")
assert verdict == "continue"
assert reason == "not yet"
# ──────────────────────────────────────────────────────────────────────
# GoalManager lifecycle + persistence
# ──────────────────────────────────────────────────────────────────────
class TestGoalManager:
def test_no_goal_initial(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-1")
assert mgr.state is None
assert not mgr.is_active()
assert not mgr.has_goal()
assert "No active goal" in mgr.status_line()
def test_set_then_status(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-2", default_max_turns=5)
state = mgr.set("port the thing")
assert state.goal == "port the thing"
assert state.status == "active"
assert state.max_turns == 5
assert state.turns_used == 0
assert mgr.is_active()
assert "active" in mgr.status_line().lower()
assert "port the thing" in mgr.status_line()
def test_set_rejects_empty(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-3")
with pytest.raises(ValueError):
mgr.set("")
with pytest.raises(ValueError):
mgr.set(" ")
def test_pause_and_resume(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-4")
mgr.set("goal text")
mgr.pause(reason="user-paused")
assert mgr.state.status == "paused"
assert not mgr.is_active()
assert mgr.has_goal()
mgr.resume()
assert mgr.state.status == "active"
assert mgr.is_active()
def test_clear(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-5")
mgr.set("goal")
mgr.clear()
assert mgr.state is None
assert not mgr.is_active()
def test_persistence_across_managers(self, hermes_home):
"""Key invariant: a second manager on the same session sees the goal.
This is what makes /resume work each session rebinds its
GoalManager and picks up the saved state.
"""
from hermes_cli.goals import GoalManager
mgr1 = GoalManager(session_id="persist-sid")
mgr1.set("do the thing")
mgr2 = GoalManager(session_id="persist-sid")
assert mgr2.state is not None
assert mgr2.state.goal == "do the thing"
assert mgr2.is_active()
def test_evaluate_after_turn_done(self, hermes_home):
"""Judge says done → status=done, no continuation."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-1")
mgr.set("ship it")
with patch.object(goals, "judge_goal", return_value=("done", "shipped")):
decision = mgr.evaluate_after_turn("I shipped the feature.")
assert decision["verdict"] == "done"
assert decision["should_continue"] is False
assert decision["continuation_prompt"] is None
assert mgr.state.status == "done"
assert mgr.state.turns_used == 1
def test_evaluate_after_turn_continue_under_budget(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-2", default_max_turns=5)
mgr.set("a long goal")
with patch.object(goals, "judge_goal", return_value=("continue", "more work")):
decision = mgr.evaluate_after_turn("made some progress")
assert decision["verdict"] == "continue"
assert decision["should_continue"] is True
assert decision["continuation_prompt"] is not None
assert "a long goal" in decision["continuation_prompt"]
assert mgr.state.status == "active"
assert mgr.state.turns_used == 1
def test_evaluate_after_turn_budget_exhausted(self, hermes_home):
"""When turn budget hits ceiling, auto-pause instead of continuing."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-3", default_max_turns=2)
mgr.set("hard goal")
with patch.object(goals, "judge_goal", return_value=("continue", "not yet")):
d1 = mgr.evaluate_after_turn("step 1")
assert d1["should_continue"] is True
assert mgr.state.turns_used == 1
assert mgr.state.status == "active"
d2 = mgr.evaluate_after_turn("step 2")
# turns_used is now 2 which equals max_turns → paused
assert d2["should_continue"] is False
assert mgr.state.status == "paused"
assert mgr.state.turns_used == 2
assert "budget" in (mgr.state.paused_reason or "").lower()
def test_evaluate_after_turn_inactive(self, hermes_home):
"""evaluate_after_turn is a no-op when goal isn't active."""
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-4")
d = mgr.evaluate_after_turn("anything")
assert d["verdict"] == "inactive"
assert d["should_continue"] is False
mgr.set("a goal")
mgr.pause()
d2 = mgr.evaluate_after_turn("anything")
assert d2["verdict"] == "inactive"
assert d2["should_continue"] is False
def test_continuation_prompt_shape(self, hermes_home):
"""The continuation prompt must include the goal text verbatim —
and must be safe to inject as a user-role message (prompt-cache
invariants: no system-prompt mutation)."""
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="cont-sid")
mgr.set("port goal command to hermes")
prompt = mgr.next_continuation_prompt()
assert prompt is not None
assert "port goal command to hermes" in prompt
assert prompt.strip() # non-empty
# ──────────────────────────────────────────────────────────────────────
# Smoke: CommandDef is wired
# ──────────────────────────────────────────────────────────────────────
def test_goal_command_in_registry():
from hermes_cli.commands import resolve_command
cmd = resolve_command("goal")
assert cmd is not None
assert cmd.name == "goal"
def test_goal_command_dispatches_in_cli_registry_helpers():
"""goal shows up in autocomplete / help categories alongside other Session cmds."""
from hermes_cli.commands import COMMANDS, COMMANDS_BY_CATEGORY
assert "/goal" in COMMANDS
session_cmds = COMMANDS_BY_CATEGORY.get("Session", {})
assert "/goal" in session_cmds
@@ -71,32 +71,6 @@ class TestSaveModelChoiceAlwaysDict:
class TestProviderPersistsAfterModelSave:
def test_update_config_for_provider_uses_atomic_yaml_write(self, config_home):
"""Provider switches should delegate config writes to atomic_yaml_write."""
from hermes_cli.auth import _update_config_for_provider
config_path = config_home / "config.yaml"
original_text = config_path.read_text(encoding="utf-8")
def _boom(path, data, **kwargs):
assert path == config_path
assert data["model"]["provider"] == "nous"
assert data["model"]["base_url"] == "https://inference.example.com/v1"
assert data["model"]["default"] == "some-old-model"
assert kwargs["sort_keys"] is False
raise OSError("simulated atomic write failure")
with patch("hermes_cli.auth.atomic_yaml_write", side_effect=_boom) as mock_write:
with pytest.raises(OSError, match="simulated atomic write failure"):
_update_config_for_provider(
"nous",
"https://inference.example.com/v1/",
default_model="llama-3.3",
)
assert mock_write.call_count == 1
assert config_path.read_text(encoding="utf-8") == original_text
def test_api_key_provider_saved_when_model_was_string(self, config_home, monkeypatch):
"""_model_flow_api_key_provider must persist the provider even when
config.model started as a plain string."""
-40
View File
@@ -21,7 +21,6 @@ from hermes_cli.plugins import (
get_plugin_command_handler,
get_plugin_commands,
get_pre_tool_call_block_message,
resolve_plugin_command_result,
discover_plugins,
invoke_hook,
)
@@ -1062,45 +1061,6 @@ class TestPluginCommands:
assert mgr._plugin_commands["cmd-b"]["plugin"] == "plugin-b"
class TestPluginCommandResultResolution:
def test_returns_sync_values_unchanged(self):
assert resolve_plugin_command_result("ok") == "ok"
def test_awaits_async_result_without_running_loop(self):
async def _handler():
return "async-ok"
assert resolve_plugin_command_result(_handler()) == "async-ok"
def test_awaits_async_result_with_running_loop(self, monkeypatch):
class _Loop:
pass
async def _handler():
return "threaded-ok"
monkeypatch.setattr("hermes_cli.plugins.asyncio.get_running_loop", lambda: _Loop())
assert resolve_plugin_command_result(_handler()) == "threaded-ok"
def test_running_loop_timeout_does_not_hang_forever(self, monkeypatch):
"""Threaded path must abort a hung async handler instead of blocking the caller."""
import asyncio as _asyncio
class _Loop:
pass
async def _slow_handler():
await _asyncio.sleep(10)
return "should-not-reach"
monkeypatch.setattr("hermes_cli.plugins.asyncio.get_running_loop", lambda: _Loop())
monkeypatch.setattr("hermes_cli.plugins._PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS", 0.1)
import pytest
with pytest.raises(TimeoutError):
resolve_plugin_command_result(_slow_handler())
# ── TestPluginDispatchTool ────────────────────────────────────────────────
@@ -897,58 +897,6 @@ def test_named_custom_provider_does_not_shadow_builtin_provider(monkeypatch):
assert resolved["requested_provider"] == "nous"
def test_named_custom_provider_wins_over_builtin_alias(monkeypatch):
"""A custom_providers entry named after a built-in *alias* (not a canonical
provider name) must win over the built-in. Regression guard for #15743:
when users define ``custom_providers: [{name: kimi, ...}]`` and reference
``provider: kimi``, the built-in alias rewriting (``kimi`` ``kimi-coding``)
would otherwise hijack the request and send it to the wrong endpoint.
"""
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{
"name": "kimi",
"base_url": "https://my-custom-kimi.example.com/v1",
"api_key": "my-kimi-key",
}
]
},
)
entry = rp._get_named_custom_provider("kimi")
assert entry is not None
assert entry["base_url"] == "https://my-custom-kimi.example.com/v1"
assert entry["api_key"] == "my-kimi-key"
def test_named_custom_provider_skipped_for_canonical_built_in(monkeypatch):
"""Companion to the test above: ``nous`` is a canonical provider name
(``resolve_provider('nous') == 'nous'``), so a custom entry with that name
should NOT be returned the built-in wins as before.
"""
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{
"name": "nous",
"base_url": "http://localhost:1234/v1",
"api_key": "shadow-key",
}
]
},
)
entry = rp._get_named_custom_provider("nous")
assert entry is None
def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
"""When the user explicitly requests openrouter, OPENAI_BASE_URL
(which may point to a custom endpoint) must not override the
+1 -10
View File
@@ -120,16 +120,7 @@ def test_get_platform_tools_preserves_explicit_empty_selection():
enabled = _get_platform_tools(config, "cli")
# An explicit empty list disables every CONFIGURABLE toolset (web,
# terminal, memory, …). Non-configurable platform toolsets that ride
# along on the platform's default composite (e.g. `kanban`, whose tools
# live in _HERMES_CORE_TOOLS but aren't user-toggleable) are still
# auto-recovered by _get_platform_tools so saving via `hermes tools`
# doesn't silently drop them. The contract this test guards is the
# configurable side: nothing the user could have checked in the TUI
# checklist should reappear here.
configurable = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
assert enabled.isdisjoint(configurable)
assert enabled == set()
def test_apply_toolset_change_from_default_does_not_enable_default_off_toolsets():
@@ -392,81 +392,6 @@ class TestCmdUpdateLaunchdRestart:
captured = capsys.readouterr().out
assert "Restart manually: hermes gateway run" in captured
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_restarts_profile_manual_gateways(
self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
):
"""Profile-mapped manual gateways are relaunched automatically after update."""
monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
monkeypatch.setattr(
gateway_cli,
"get_launchd_plist_path",
lambda: tmp_path / "ai.hermes.gateway.plist",
)
mock_run.side_effect = _make_run_side_effect(
commit_count="3",
launchctl_loaded=False,
)
process = gateway_cli.ProfileGatewayProcess(
profile="coder",
path=tmp_path / ".hermes" / "profiles" / "coder",
pid=12345,
)
with patch.object(gateway_cli, "find_gateway_pids", return_value=[12345]), \
patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \
patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \
patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=True) as graceful, \
patch("os.kill") as kill:
cmd_update(mock_args)
captured = capsys.readouterr().out
restart.assert_called_once_with("coder", 12345)
graceful.assert_called_once()
# Graceful drain succeeded — no SIGTERM fallback needed.
kill.assert_not_called()
assert "Restarting manual gateway profile(s): coder" in captured
assert "Restart manually: hermes gateway run" not in captured
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_profile_manual_gateway_falls_back_to_sigterm(
self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
):
"""When graceful SIGUSR1 drain fails, manual profile restart falls back to SIGTERM."""
monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
monkeypatch.setattr(
gateway_cli,
"get_launchd_plist_path",
lambda: tmp_path / "ai.hermes.gateway.plist",
)
mock_run.side_effect = _make_run_side_effect(
commit_count="3",
launchctl_loaded=False,
)
process = gateway_cli.ProfileGatewayProcess(
profile="coder",
path=tmp_path / ".hermes" / "profiles" / "coder",
pid=12345,
)
with patch.object(gateway_cli, "find_gateway_pids", return_value=[12345]), \
patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \
patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \
patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=False) as graceful, \
patch("os.kill") as kill:
cmd_update(mock_args)
captured = capsys.readouterr().out
restart.assert_called_once_with("coder", 12345)
graceful.assert_called_once()
# Graceful drain returned False → SIGTERM fallback.
kill.assert_called_once()
assert "Restarting manual gateway profile(s): coder" in captured
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_with_systemd_still_restarts_via_systemd(

Some files were not shown because too many files have changed in this diff Show More