Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b49bd7b93d | |||
| 420c4d02e2 | |||
| 193f3b8339 | |||
| ce089169d5 | |||
| e3c0084140 | |||
| 5651a73331 | |||
| 81d925f2a5 | |||
| ec02d905c9 | |||
| b7bdf32d4e | |||
| d72985b7ce | |||
| 5a26938aa5 |
@@ -69,7 +69,7 @@ hermes-agent/
|
||||
│ ├── server.py # RPC handlers and session logic
|
||||
│ ├── render.py # Optional rich/ANSI bridge
|
||||
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── hermes_agent/acp/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── tests/ # Pytest suite (~3000 tests)
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
"""Allow running the ACP adapter as ``python -m acp_adapter``."""
|
||||
|
||||
from .entry import main
|
||||
|
||||
main()
|
||||
@@ -8454,7 +8454,7 @@ class HermesCLI:
|
||||
# in terminal_tool is populated for this thread. The main thread
|
||||
# registration (run() line ~9046) is invisible here because
|
||||
# _callback_tls is threading.local(). Matches the pattern used
|
||||
# by acp_adapter/server.py for ACP sessions.
|
||||
# by hermes_agent/acp/server.py for ACP sessions.
|
||||
set_sudo_password_callback(self._sudo_password_callback)
|
||||
set_approval_callback(self._approval_callback)
|
||||
try:
|
||||
|
||||
+265
-19
@@ -900,10 +900,16 @@ class BasePlatformAdapter(ABC):
|
||||
self._fatal_error_retryable = True
|
||||
self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
|
||||
|
||||
# Track active message handlers per session for interrupt support
|
||||
# Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
|
||||
# Track active message handlers per session for interrupt support.
|
||||
# _active_sessions stores the per-session interrupt Event; _session_tasks
|
||||
# maps session → the specific Task currently processing it so that
|
||||
# session-terminating commands (/stop, /new, /reset) can cancel the
|
||||
# right task and release the adapter-level guard deterministically.
|
||||
# Without the owner-task map, an old task's finally block could delete
|
||||
# a newer task's guard, leaving stale busy state.
|
||||
self._active_sessions: Dict[str, asyncio.Event] = {}
|
||||
self._pending_messages: Dict[str, MessageEvent] = {}
|
||||
self._session_tasks: Dict[str, asyncio.Task] = {}
|
||||
# Background message-processing tasks spawned by handle_message().
|
||||
# Gateway shutdown cancels these so an old gateway instance doesn't keep
|
||||
# working on a task after --replace or manual restarts.
|
||||
@@ -1680,6 +1686,222 @@ class BasePlatformAdapter(ABC):
|
||||
return f"{existing_text}\n\n{new_text}".strip()
|
||||
return existing_text
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session task + guard ownership helpers
|
||||
# ------------------------------------------------------------------
|
||||
# These were introduced together with the _session_tasks owner map to
|
||||
# make session lifecycle reconciliation deterministic across (a) the
|
||||
# normal completion path, (b) /stop/ /new/ /reset bypass commands,
|
||||
# and (c) stale-lock self-heal on the next inbound message.
|
||||
|
||||
def _release_session_guard(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
guard: Optional[asyncio.Event] = None,
|
||||
) -> None:
|
||||
"""Release the adapter-level guard for a session.
|
||||
|
||||
When ``guard`` is provided, only release the entry if it still points
|
||||
at that exact Event. This lets reset-like commands swap in a temporary
|
||||
guard while the old processing task unwinds, without having the old
|
||||
task's cleanup accidentally clear the replacement guard.
|
||||
"""
|
||||
current_guard = self._active_sessions.get(session_key)
|
||||
if current_guard is None:
|
||||
return
|
||||
if guard is not None and current_guard is not guard:
|
||||
return
|
||||
del self._active_sessions[session_key]
|
||||
|
||||
def _session_task_is_stale(self, session_key: str) -> bool:
|
||||
"""Return True if the owner task for ``session_key`` is done/cancelled.
|
||||
|
||||
A lock is "stale" when the adapter still has ``_active_sessions[key]``
|
||||
AND a known owner task in ``_session_tasks`` that has already exited.
|
||||
When there is no owner task at all, that usually means the guard was
|
||||
installed by some path other than handle_message() (tests sometimes
|
||||
install guards directly) — don't treat that as stale. The on-entry
|
||||
self-heal only needs to handle the production split-brain case where
|
||||
an owner task was recorded, then exited without clearing its guard.
|
||||
"""
|
||||
task = self._session_tasks.get(session_key)
|
||||
if task is None:
|
||||
return False
|
||||
done = getattr(task, "done", None)
|
||||
return bool(done and done())
|
||||
|
||||
def _heal_stale_session_lock(self, session_key: str) -> bool:
|
||||
"""Clear a stale session lock if the owner task is already gone.
|
||||
|
||||
Returns True if a stale lock was healed. Returns False if there is
|
||||
no lock, or the owner task is still alive (the normal busy case).
|
||||
|
||||
This is the on-entry safety net sidbin's issue #11016 analysis calls
|
||||
for: without it, a split-brain — adapter still thinks the session is
|
||||
active, but nothing is actually processing — traps the chat in
|
||||
infinite "Interrupting current task..." until the gateway is
|
||||
restarted.
|
||||
"""
|
||||
if session_key not in self._active_sessions:
|
||||
return False
|
||||
if not self._session_task_is_stale(session_key):
|
||||
return False
|
||||
logger.warning(
|
||||
"[%s] Healing stale session lock for %s (owner task is done/absent)",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
self._active_sessions.pop(session_key, None)
|
||||
self._pending_messages.pop(session_key, None)
|
||||
self._session_tasks.pop(session_key, None)
|
||||
return True
|
||||
|
||||
def _start_session_processing(
|
||||
self,
|
||||
event: MessageEvent,
|
||||
session_key: str,
|
||||
*,
|
||||
interrupt_event: Optional[asyncio.Event] = None,
|
||||
) -> bool:
|
||||
"""Spawn a background processing task under the given session guard.
|
||||
|
||||
Returns True on success. If the runtime stubs ``create_task`` with a
|
||||
non-Task sentinel (some tests do this), the guard is rolled back and
|
||||
False is returned so the caller isn't left holding a half-installed
|
||||
session lock.
|
||||
"""
|
||||
guard = interrupt_event or asyncio.Event()
|
||||
self._active_sessions[session_key] = guard
|
||||
|
||||
task = asyncio.create_task(self._process_message_background(event, session_key))
|
||||
self._session_tasks[session_key] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
# Tests stub create_task() with lightweight sentinels that are not
|
||||
# hashable and do not support lifecycle callbacks.
|
||||
self._session_tasks.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=guard)
|
||||
return False
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
return True
|
||||
|
||||
async def cancel_session_processing(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
release_guard: bool = True,
|
||||
discard_pending: bool = True,
|
||||
) -> None:
|
||||
"""Cancel in-flight processing for a single session.
|
||||
|
||||
``release_guard=False`` keeps the adapter-level session guard in place
|
||||
so reset-like commands can finish atomically before follow-up messages
|
||||
are allowed to start a fresh background task.
|
||||
"""
|
||||
task = self._session_tasks.pop(session_key, None)
|
||||
if task is not None and not task.done():
|
||||
logger.debug(
|
||||
"[%s] Cancelling active processing for session %s",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Session cancellation raised while unwinding %s",
|
||||
self.name,
|
||||
session_key,
|
||||
exc_info=True,
|
||||
)
|
||||
if discard_pending:
|
||||
self._pending_messages.pop(session_key, None)
|
||||
if release_guard:
|
||||
self._release_session_guard(session_key)
|
||||
|
||||
async def _drain_pending_after_session_command(
|
||||
self,
|
||||
session_key: str,
|
||||
command_guard: asyncio.Event,
|
||||
) -> None:
|
||||
"""Resume the latest queued follow-up once a session command completes.
|
||||
|
||||
Called at the tail of /stop, /new, and /reset dispatch. Releases the
|
||||
command-scoped guard, then — if a follow-up message landed while the
|
||||
command was running — spawns a fresh processing task for it.
|
||||
"""
|
||||
pending_event = self._pending_messages.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
if pending_event is None:
|
||||
return
|
||||
self._start_session_processing(pending_event, session_key)
|
||||
|
||||
async def _dispatch_active_session_command(
|
||||
self,
|
||||
event: MessageEvent,
|
||||
session_key: str,
|
||||
cmd: str,
|
||||
) -> None:
|
||||
"""Dispatch a reset-like bypass command while preserving guard ordering.
|
||||
|
||||
/stop, /new, and /reset must:
|
||||
1. Keep the session guard installed while the runner processes the
|
||||
command (so a racing follow-up message stays queued, not
|
||||
dispatched as a second parallel run).
|
||||
2. Cancel the old in-flight adapter task only AFTER the runner has
|
||||
finished handling the command (so the runner sees consistent
|
||||
state and its response is sent in order).
|
||||
3. Release the command-scoped guard and drain the latest queued
|
||||
follow-up exactly once, after 1 and 2 complete.
|
||||
"""
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name,
|
||||
cmd,
|
||||
session_key,
|
||||
)
|
||||
|
||||
current_guard = self._active_sessions.get(session_key)
|
||||
command_guard = asyncio.Event()
|
||||
self._active_sessions[session_key] = command_guard
|
||||
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
|
||||
try:
|
||||
response = await self._message_handler(event)
|
||||
# Old adapter task (if any) is cancelled AFTER the runner has
|
||||
# fully handled the command — keeps ordering deterministic.
|
||||
await self.cancel_session_processing(
|
||||
session_key,
|
||||
release_guard=False,
|
||||
discard_pending=False,
|
||||
)
|
||||
if response:
|
||||
await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=response,
|
||||
reply_to=event.message_id,
|
||||
metadata=thread_meta,
|
||||
)
|
||||
except Exception:
|
||||
# On failure, restore the original guard if one still exists so
|
||||
# we don't leave the session in a half-reset state.
|
||||
if self._active_sessions.get(session_key) is command_guard:
|
||||
if session_key in self._session_tasks and current_guard is not None:
|
||||
self._active_sessions[session_key] = current_guard
|
||||
else:
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
raise
|
||||
|
||||
await self._drain_pending_after_session_command(session_key, command_guard)
|
||||
|
||||
async def handle_message(self, event: MessageEvent) -> None:
|
||||
"""
|
||||
Process an incoming message.
|
||||
@@ -1696,7 +1918,15 @@ class BasePlatformAdapter(ABC):
|
||||
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
|
||||
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
|
||||
)
|
||||
|
||||
|
||||
# On-entry self-heal: if the adapter still has an _active_sessions
|
||||
# entry for this key but the owner task has already exited (done or
|
||||
# cancelled), the lock is stale. Clear it and fall through to
|
||||
# normal dispatch so the user isn't trapped behind a dead guard —
|
||||
# this is the split-brain tail described in issue #11016.
|
||||
if session_key in self._active_sessions:
|
||||
self._heal_stale_session_lock(session_key)
|
||||
|
||||
# Check if there's already an active handler for this session
|
||||
if session_key in self._active_sessions:
|
||||
# Certain commands must bypass the active-session guard and be
|
||||
@@ -1713,6 +1943,23 @@ class BasePlatformAdapter(ABC):
|
||||
from hermes_cli.commands import should_bypass_active_session
|
||||
|
||||
if should_bypass_active_session(cmd):
|
||||
# /stop, /new, /reset must cancel the in-flight adapter task
|
||||
# and preserve ordering of queued follow-ups. Route those
|
||||
# through the dedicated handoff path that serializes
|
||||
# cancellation + runner response + pending drain.
|
||||
if cmd in ("stop", "new", "reset"):
|
||||
try:
|
||||
await self._dispatch_active_session_command(event, session_key, cmd)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[%s] Command '/%s' dispatch failed: %s",
|
||||
self.name, cmd, e, exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
# Other bypass commands (/approve, /deny, /status,
|
||||
# /background, /restart) just need direct dispatch — they
|
||||
# don't cancel the running task.
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name, cmd, session_key,
|
||||
@@ -1758,19 +2005,9 @@ class BasePlatformAdapter(ABC):
|
||||
# starts would also pass the _active_sessions check and spawn a
|
||||
# duplicate task. (grammY sequentialize / aiogram EventIsolation
|
||||
# pattern — set the guard synchronously, not inside the task.)
|
||||
self._active_sessions[session_key] = asyncio.Event()
|
||||
|
||||
# Spawn background task to process this message
|
||||
task = asyncio.create_task(self._process_message_background(event, session_key))
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
# Some tests stub create_task() with lightweight sentinels that are not
|
||||
# hashable and do not support lifecycle callbacks.
|
||||
return
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
# _start_session_processing installs the guard AND the owner-task
|
||||
# mapping atomically so stale-lock detection works.
|
||||
self._start_session_processing(event, session_key)
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
@@ -2130,6 +2367,9 @@ class BasePlatformAdapter(ABC):
|
||||
drain_task = asyncio.create_task(
|
||||
self._process_message_background(late_pending, session_key)
|
||||
)
|
||||
# Hand ownership of the session to the drain task so stale-lock
|
||||
# detection keeps working while it runs.
|
||||
self._session_tasks[session_key] = drain_task
|
||||
try:
|
||||
self._background_tasks.add(drain_task)
|
||||
drain_task.add_done_callback(self._background_tasks.discard)
|
||||
@@ -2139,9 +2379,14 @@ class BasePlatformAdapter(ABC):
|
||||
# Leave _active_sessions[session_key] populated — the drain
|
||||
# task's own lifecycle will clean it up.
|
||||
else:
|
||||
# Clean up session tracking
|
||||
if session_key in self._active_sessions:
|
||||
del self._active_sessions[session_key]
|
||||
# Clean up session tracking. Guard-match both deletes so a
|
||||
# reset-like command that already swapped in its own
|
||||
# command_guard (and cancelled us) can't be accidentally
|
||||
# cleared by our unwind. The command owns the session now.
|
||||
current_task = asyncio.current_task()
|
||||
if current_task is not None and self._session_tasks.get(session_key) is current_task:
|
||||
del self._session_tasks[session_key]
|
||||
self._release_session_guard(session_key, guard=interrupt_event)
|
||||
|
||||
async def cancel_background_tasks(self) -> None:
|
||||
"""Cancel any in-flight background message-processing tasks.
|
||||
@@ -2171,6 +2416,7 @@ class BasePlatformAdapter(ABC):
|
||||
# will be in self._background_tasks now. Re-check.
|
||||
self._background_tasks.clear()
|
||||
self._expected_cancelled_tasks.clear()
|
||||
self._session_tasks.clear()
|
||||
self._pending_messages.clear()
|
||||
self._active_sessions.clear()
|
||||
|
||||
|
||||
+45
-7
@@ -8665,7 +8665,12 @@ class GatewayRunner:
|
||||
override = self._session_model_overrides.get(session_key)
|
||||
return override is not None and override.get("model") == agent_model
|
||||
|
||||
def _release_running_agent_state(self, session_key: str) -> None:
|
||||
def _release_running_agent_state(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
run_generation: Optional[int] = None,
|
||||
) -> bool:
|
||||
"""Pop ALL per-running-agent state entries for ``session_key``.
|
||||
|
||||
Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
|
||||
@@ -8681,13 +8686,25 @@ class GatewayRunner:
|
||||
across turns (``_session_model_overrides``, ``_voice_mode``,
|
||||
``_pending_approvals``, ``_update_prompt_pending``) is NOT
|
||||
touched here — those have their own lifecycles.
|
||||
|
||||
When ``run_generation`` is provided, only clear the slot if that
|
||||
generation is still current for the session. This prevents an
|
||||
older async run whose generation was bumped by /stop or /new from
|
||||
clobbering a newer run's state during its own unwind. Returns
|
||||
True when the slot was cleared, False when an ownership guard
|
||||
blocked it.
|
||||
"""
|
||||
if not session_key:
|
||||
return
|
||||
return False
|
||||
if run_generation is not None and not self._is_session_run_current(
|
||||
session_key, run_generation
|
||||
):
|
||||
return False
|
||||
self._running_agents.pop(session_key, None)
|
||||
self._running_agents_ts.pop(session_key, None)
|
||||
if hasattr(self, "_busy_ack_ts"):
|
||||
self._busy_ack_ts.pop(session_key, None)
|
||||
return True
|
||||
|
||||
def _clear_session_boundary_security_state(self, session_key: str) -> None:
|
||||
"""Clear approval state that must not survive a real conversation switch."""
|
||||
@@ -10249,10 +10266,24 @@ class GatewayRunner:
|
||||
# Wait for agent to be created
|
||||
while agent_holder[0] is None:
|
||||
await asyncio.sleep(0.05)
|
||||
if session_key:
|
||||
self._running_agents[session_key] = agent_holder[0]
|
||||
if self._draining:
|
||||
self._update_runtime_status("draining")
|
||||
if not session_key:
|
||||
return
|
||||
# Only promote the sentinel to the real agent if this run is still
|
||||
# current. If /stop or /new bumped the generation while we were
|
||||
# spinning up, leave the newer run's slot alone — we'll be
|
||||
# discarded by the stale-result check in _handle_message_with_agent.
|
||||
if run_generation is not None and not self._is_session_run_current(
|
||||
session_key, run_generation
|
||||
):
|
||||
logger.info(
|
||||
"Skipping stale agent promotion for %s — generation %s is no longer current",
|
||||
(session_key or "")[:20],
|
||||
run_generation,
|
||||
)
|
||||
return
|
||||
self._running_agents[session_key] = agent_holder[0]
|
||||
if self._draining:
|
||||
self._update_runtime_status("draining")
|
||||
|
||||
tracking_task = asyncio.create_task(track_agent())
|
||||
|
||||
@@ -10758,7 +10789,14 @@ class GatewayRunner:
|
||||
# Clean up tracking
|
||||
tracking_task.cancel()
|
||||
if session_key:
|
||||
self._release_running_agent_state(session_key)
|
||||
# Only release the slot if this run's generation still owns
|
||||
# it. A /stop or /new that bumped the generation while we
|
||||
# were unwinding has already installed its own state; this
|
||||
# guard prevents an old run from clobbering it on the way
|
||||
# out.
|
||||
self._release_running_agent_state(
|
||||
session_key, run_generation=run_generation
|
||||
)
|
||||
if self._draining:
|
||||
self._update_runtime_status("draining")
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
"""Hermes Agent — The self-improving AI agent."""
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Allow running the ACP adapter as ``python -m hermes_agent.acp``."""
|
||||
|
||||
from hermes_agent.acp.entry import main
|
||||
|
||||
main()
|
||||
@@ -6,7 +6,7 @@ and starts the ACP agent server.
|
||||
|
||||
Usage::
|
||||
|
||||
python -m acp_adapter.entry
|
||||
python -m hermes_agent.acp.entry
|
||||
# or
|
||||
hermes acp
|
||||
# or
|
||||
@@ -16,7 +16,6 @@ Usage::
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
@@ -104,13 +103,8 @@ def main() -> None:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Starting hermes-agent ACP adapter")
|
||||
|
||||
# Ensure the project root is on sys.path so ``from run_agent import AIAgent`` works
|
||||
project_root = str(Path(__file__).resolve().parent.parent)
|
||||
if project_root not in sys.path:
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
import acp
|
||||
from .server import HermesACPAgent
|
||||
from hermes_agent.acp.server import HermesACPAgent
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
@@ -15,7 +15,7 @@ from typing import Any, Callable, Deque, Dict
|
||||
|
||||
import acp
|
||||
|
||||
from .tools import (
|
||||
from hermes_agent.acp.tools import (
|
||||
build_tool_complete,
|
||||
build_tool_start,
|
||||
make_tool_call_id,
|
||||
@@ -52,15 +52,15 @@ try:
|
||||
except ImportError:
|
||||
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
|
||||
|
||||
from acp_adapter.auth import detect_provider
|
||||
from acp_adapter.events import (
|
||||
from hermes_agent.acp.auth import detect_provider
|
||||
from hermes_agent.acp.events import (
|
||||
make_message_cb,
|
||||
make_step_cb,
|
||||
make_thinking_cb,
|
||||
make_tool_progress_cb,
|
||||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState
|
||||
from hermes_agent.acp.permissions import make_approval_callback
|
||||
from hermes_agent.acp.session import SessionManager, SessionState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
+24
-7
@@ -394,17 +394,23 @@ DEFAULT_CONFIG = {
|
||||
# (bash doesn't source bashrc in non-interactive login mode) or
|
||||
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
|
||||
# Paths support ``~`` / ``${VAR}``. Missing files are silently
|
||||
# skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
|
||||
# skipped. When empty, Hermes auto-sources ``~/.profile``,
|
||||
# ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
|
||||
# snapshot shell is bash (this is the ``auto_source_bashrc``
|
||||
# behaviour — disable with that key if you want strict login-only
|
||||
# semantics).
|
||||
"shell_init_files": [],
|
||||
# When true (default), Hermes sources ``~/.bashrc`` in the login
|
||||
# shell used to build the environment snapshot. This captures
|
||||
# PATH additions, shell functions, and aliases defined in the
|
||||
# user's bashrc — which a plain ``bash -l -c`` would otherwise
|
||||
# miss because bash skips bashrc in non-interactive login mode.
|
||||
# Turn this off if you have a bashrc that misbehaves when sourced
|
||||
# When true (default), Hermes sources the user's shell rc files
|
||||
# (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
|
||||
# login shell used to build the environment snapshot. This
|
||||
# captures PATH additions, shell functions, and aliases — which a
|
||||
# plain ``bash -l -c`` would otherwise miss because bash skips
|
||||
# bashrc in non-interactive login mode, and because a default
|
||||
# Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
|
||||
# sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
|
||||
# because ``n`` / ``nvm`` / ``asdf`` installers typically write
|
||||
# their PATH exports there without an interactivity guard. Turn
|
||||
# this off if your rc files misbehave when sourced
|
||||
# non-interactively (e.g. one that hard-exits on TTY checks).
|
||||
"auto_source_bashrc": True,
|
||||
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
@@ -754,6 +760,17 @@ DEFAULT_CONFIG = {
|
||||
"inline_shell": False,
|
||||
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
|
||||
"inline_shell_timeout": 10,
|
||||
# Run the keyword/pattern security scanner on skills the agent
|
||||
# writes via skill_manage (create/edit/patch). Off by default
|
||||
# because the agent can already execute the same code paths via
|
||||
# terminal() with no gate, so the scan adds friction (blocks
|
||||
# skills that mention risky keywords in prose) without meaningful
|
||||
# security. Turn on if you want the belt-and-suspenders — a
|
||||
# dangerous verdict will then surface as a tool error to the
|
||||
# agent, which can retry with the flagged content removed.
|
||||
# External hub installs (trusted/community sources) are always
|
||||
# scanned regardless of this setting.
|
||||
"guard_agent_created": False,
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
|
||||
+1
-1
@@ -8516,7 +8516,7 @@ Examples:
|
||||
def cmd_acp(args):
|
||||
"""Launch Hermes Agent as an ACP server."""
|
||||
try:
|
||||
from acp_adapter.entry import main as acp_main
|
||||
from hermes_agent.acp.entry import main as acp_main
|
||||
|
||||
acp_main()
|
||||
except ImportError:
|
||||
|
||||
+2
-2
@@ -117,7 +117,7 @@ all = [
|
||||
[project.scripts]
|
||||
hermes = "hermes_cli.main:main"
|
||||
hermes-agent = "run_agent:main"
|
||||
hermes-acp = "acp_adapter.entry:main"
|
||||
hermes-acp = "hermes_agent.acp.entry:main"
|
||||
|
||||
[tool.setuptools]
|
||||
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]
|
||||
@@ -126,7 +126,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
|
||||
hermes_cli = ["web_dist/**/*"]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
|
||||
include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "hermes_agent", "hermes_agent.*", "plugins", "plugins.*"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
||||
@@ -112,6 +112,8 @@ AUTHOR_MAP = {
|
||||
"josephzcan@gmail.com": "j0sephz",
|
||||
# contributors (manual mapping from git names)
|
||||
"ahmedsherif95@gmail.com": "asheriif",
|
||||
"dyxushuai@gmail.com": "dyxushuai",
|
||||
"33860762+etcircle@users.noreply.github.com": "etcircle",
|
||||
"liujinkun@bytedance.com": "liujinkun2025",
|
||||
"dmayhem93@gmail.com": "dmahan93",
|
||||
"fr@tecompanytea.com": "ifrederico",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Tests for acp_adapter.auth — provider detection."""
|
||||
"""Tests for hermes_agent.acp.auth — provider detection."""
|
||||
|
||||
from acp_adapter.auth import has_provider, detect_provider
|
||||
from hermes_agent.acp.auth import has_provider, detect_provider
|
||||
|
||||
|
||||
class TestHasProvider:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""Tests for acp_adapter.entry startup wiring."""
|
||||
"""Tests for hermes_agent.acp.entry startup wiring."""
|
||||
|
||||
import acp
|
||||
|
||||
from acp_adapter import entry
|
||||
from hermes_agent.acp import entry
|
||||
|
||||
|
||||
def test_main_enables_unstable_protocol(monkeypatch):
|
||||
|
||||
+21
-21
@@ -1,4 +1,4 @@
|
||||
"""Tests for acp_adapter.events — callback factories for ACP notifications."""
|
||||
"""Tests for hermes_agent.acp.events — callback factories for ACP notifications."""
|
||||
|
||||
import asyncio
|
||||
from concurrent.futures import Future
|
||||
@@ -9,7 +9,7 @@ import pytest
|
||||
import acp
|
||||
from acp.schema import ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk
|
||||
|
||||
from acp_adapter.events import (
|
||||
from hermes_agent.acp.events import (
|
||||
make_message_cb,
|
||||
make_step_cb,
|
||||
make_thinking_cb,
|
||||
@@ -48,7 +48,7 @@ class TestToolProgressCallback:
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
# Run callback in the event loop context
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -72,7 +72,7 @@ class TestToolProgressCallback:
|
||||
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -89,7 +89,7 @@ class TestToolProgressCallback:
|
||||
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -107,7 +107,7 @@ class TestToolProgressCallback:
|
||||
progress_cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
step_cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -135,7 +135,7 @@ class TestThinkingCallback:
|
||||
|
||||
cb = make_thinking_cb(mock_conn, "session-1", loop)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -150,7 +150,7 @@ class TestThinkingCallback:
|
||||
|
||||
cb = make_thinking_cb(mock_conn, "session-1", loop)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
cb("")
|
||||
|
||||
mock_rcts.assert_not_called()
|
||||
@@ -169,7 +169,7 @@ class TestStepCallback:
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -187,7 +187,7 @@ class TestStepCallback:
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
cb(1, [{"name": "unknown_tool", "result": "ok"}])
|
||||
|
||||
mock_rcts.assert_not_called()
|
||||
@@ -199,7 +199,7 @@ class TestStepCallback:
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -218,8 +218,8 @@ class TestStepCallback:
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("acp_adapter.events.build_tool_complete") as mock_btc:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("hermes_agent.acp.events.build_tool_complete") as mock_btc:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -240,8 +240,8 @@ class TestStepCallback:
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, {})
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("acp_adapter.events.build_tool_complete") as mock_btc:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("hermes_agent.acp.events.build_tool_complete") as mock_btc:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -259,8 +259,8 @@ class TestStepCallback:
|
||||
|
||||
cb = make_step_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("acp_adapter.events.build_tool_complete") as mock_btc:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts, \
|
||||
patch("hermes_agent.acp.events.build_tool_complete") as mock_btc:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -280,8 +280,8 @@ class TestStepCallback:
|
||||
tool_call_meta = {}
|
||||
loop = event_loop_fixture
|
||||
|
||||
with patch("acp_adapter.events.make_tool_call_id", return_value="tc-meta"), \
|
||||
patch("acp_adapter.events._send_update") as mock_send, \
|
||||
with patch("hermes_agent.acp.events.make_tool_call_id", return_value="tc-meta"), \
|
||||
patch("hermes_agent.acp.events._send_update") as mock_send, \
|
||||
patch("agent.display.capture_local_edit_snapshot", return_value="snapshot"):
|
||||
cb = make_tool_progress_cb(mock_conn, "session-1", loop, tool_call_ids, tool_call_meta)
|
||||
cb("tool.started", "write_file", None, {"path": "diff-test.txt", "content": "hello"})
|
||||
@@ -306,7 +306,7 @@ class TestMessageCallback:
|
||||
|
||||
cb = make_message_cb(mock_conn, "session-1", loop)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
mock_rcts.return_value = future
|
||||
@@ -321,7 +321,7 @@ class TestMessageCallback:
|
||||
|
||||
cb = make_message_cb(mock_conn, "session-1", loop)
|
||||
|
||||
with patch("acp_adapter.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
with patch("hermes_agent.acp.events.asyncio.run_coroutine_threadsafe") as mock_rcts:
|
||||
cb("")
|
||||
|
||||
mock_rcts.assert_not_called()
|
||||
|
||||
@@ -27,9 +27,9 @@ from acp.schema import (
|
||||
ToolCallStart,
|
||||
)
|
||||
|
||||
from acp_adapter.server import HermesACPAgent
|
||||
from acp_adapter.session import SessionManager
|
||||
from acp_adapter.tools import build_tool_start
|
||||
from hermes_agent.acp.server import HermesACPAgent
|
||||
from hermes_agent.acp.session import SessionManager
|
||||
from hermes_agent.acp.tools import build_tool_start
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests for acp_adapter.permissions — ACP approval bridging."""
|
||||
"""Tests for hermes_agent.acp.permissions — ACP approval bridging."""
|
||||
|
||||
import asyncio
|
||||
from concurrent.futures import Future
|
||||
@@ -11,7 +11,7 @@ from acp.schema import (
|
||||
DeniedOutcome,
|
||||
RequestPermissionResponse,
|
||||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from hermes_agent.acp.permissions import make_approval_callback
|
||||
|
||||
|
||||
def _make_response(outcome):
|
||||
@@ -37,7 +37,7 @@ def _setup_callback(outcome, timeout=60.0):
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = response
|
||||
|
||||
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
|
||||
with patch("hermes_agent.acp.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
|
||||
cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=timeout)
|
||||
result = cb("rm -rf /", "dangerous command")
|
||||
|
||||
@@ -68,7 +68,7 @@ class TestApprovalMapping:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.side_effect = TimeoutError("timed out")
|
||||
|
||||
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
|
||||
with patch("hermes_agent.acp.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
|
||||
cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=0.01)
|
||||
result = cb("rm -rf /", "dangerous")
|
||||
|
||||
@@ -82,7 +82,7 @@ class TestApprovalMapping:
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
|
||||
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
|
||||
with patch("hermes_agent.acp.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
|
||||
cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
|
||||
result = cb("echo hi", "demo")
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests for acp_adapter.entry._BenignProbeMethodFilter.
|
||||
"""Tests for hermes_agent.acp.entry._BenignProbeMethodFilter.
|
||||
|
||||
Covers both the isolated filter logic and the full end-to-end path where a
|
||||
client sends a bare JSON-RPC ``ping`` request over stdio and the acp runtime
|
||||
@@ -18,7 +18,7 @@ import pytest
|
||||
|
||||
from acp.exceptions import RequestError
|
||||
|
||||
from acp_adapter.entry import _BenignProbeMethodFilter
|
||||
from hermes_agent.acp.entry import _BenignProbeMethodFilter
|
||||
|
||||
|
||||
# -- Unit tests on the filter itself ----------------------------------------
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests for acp_adapter.server — HermesACPAgent ACP server."""
|
||||
"""Tests for hermes_agent.acp.server — HermesACPAgent ACP server."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
@@ -28,8 +28,8 @@ from acp.schema import (
|
||||
TextContentBlock,
|
||||
Usage,
|
||||
)
|
||||
from acp_adapter.server import HermesACPAgent, HERMES_VERSION
|
||||
from acp_adapter.session import SessionManager
|
||||
from hermes_agent.acp.server import HermesACPAgent, HERMES_VERSION
|
||||
from hermes_agent.acp.session import SessionManager
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ class TestAuthenticate:
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.detect_provider",
|
||||
"hermes_agent.acp.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="openrouter")
|
||||
@@ -106,7 +106,7 @@ class TestAuthenticate:
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.detect_provider",
|
||||
"hermes_agent.acp.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="OpenRouter")
|
||||
@@ -115,7 +115,7 @@ class TestAuthenticate:
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.detect_provider",
|
||||
"hermes_agent.acp.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="totally-invalid-method")
|
||||
@@ -124,7 +124,7 @@ class TestAuthenticate:
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_without_provider(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.detect_provider",
|
||||
"hermes_agent.acp.server.detect_provider",
|
||||
lambda: None,
|
||||
)
|
||||
resp = await agent.authenticate(method_id="openrouter")
|
||||
@@ -272,7 +272,7 @@ class TestListAndFork:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_pagination_first_page(self, agent):
|
||||
from acp_adapter import server as acp_server
|
||||
from hermes_agent.acp import server as acp_server
|
||||
|
||||
infos = [
|
||||
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests for acp_adapter.session — SessionManager and SessionState."""
|
||||
"""Tests for hermes_agent.acp.session — SessionManager and SessionState."""
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
@@ -8,7 +8,7 @@ from types import SimpleNamespace
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from acp_adapter.session import SessionManager, SessionState
|
||||
from hermes_agent.acp.session import SessionManager, SessionState
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ class TestCreateSession:
|
||||
|
||||
def test_create_session_registers_task_cwd(self, manager, monkeypatch):
|
||||
calls = []
|
||||
monkeypatch.setattr("acp_adapter.session._register_task_cwd", lambda task_id, cwd: calls.append((task_id, cwd)))
|
||||
monkeypatch.setattr("hermes_agent.acp.session._register_task_cwd", lambda task_id, cwd: calls.append((task_id, cwd)))
|
||||
state = manager.create_session(cwd="/tmp/work")
|
||||
assert calls == [(state.session_id, "/tmp/work")]
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""Tests for acp_adapter.tools — tool kind mapping and ACP content building."""
|
||||
"""Tests for hermes_agent.acp.tools — tool kind mapping and ACP content building."""
|
||||
|
||||
import pytest
|
||||
|
||||
from acp_adapter.tools import (
|
||||
from hermes_agent.acp.tools import (
|
||||
TOOL_KIND_MAP,
|
||||
build_tool_complete,
|
||||
build_tool_start,
|
||||
|
||||
@@ -0,0 +1,399 @@
|
||||
"""Regression tests for issue #11016 — Telegram sessions trapped in
|
||||
repeated 'Interrupting current task...' while /stop reports no active task.
|
||||
|
||||
Covers three layers of the fix:
|
||||
|
||||
1. Adapter-side task ownership (_session_tasks map): /stop, /new, /reset
|
||||
actually cancel the in-flight adapter task and release the guard in
|
||||
order, so follow-up messages reach the new session.
|
||||
|
||||
2. Adapter-side on-entry self-heal: if _active_sessions still has an
|
||||
entry but the recorded owner task is already done/cancelled, clear it
|
||||
on the next inbound message rather than trapping the user.
|
||||
|
||||
3. Runner-side generation guard: a stale async run can't promote itself
|
||||
into _running_agents after /stop/ /new bumped the generation, and
|
||||
can't clear a newer run's slot on the way out.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
)
|
||||
from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapter helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _StubAdapter(BasePlatformAdapter):
|
||||
async def connect(self):
|
||||
pass
|
||||
|
||||
async def disconnect(self):
|
||||
pass
|
||||
|
||||
async def send(self, chat_id, text, **kwargs):
|
||||
pass
|
||||
|
||||
async def get_chat_info(self, chat_id):
|
||||
return {}
|
||||
|
||||
|
||||
def _make_adapter():
|
||||
config = PlatformConfig(enabled=True, token="test-token")
|
||||
adapter = _StubAdapter(config, Platform.TELEGRAM)
|
||||
adapter.sent_responses = []
|
||||
|
||||
async def _mock_send_retry(chat_id, content, **kwargs):
|
||||
adapter.sent_responses.append(content)
|
||||
|
||||
adapter._send_with_retry = _mock_send_retry
|
||||
return adapter
|
||||
|
||||
|
||||
def _make_event(text="hello", chat_id="12345"):
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
|
||||
)
|
||||
return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)
|
||||
|
||||
|
||||
def _session_key(chat_id="12345"):
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
|
||||
)
|
||||
return build_session_key(source)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Runner helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_runner():
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||
)
|
||||
runner.adapters = {}
|
||||
runner._running_agents = {}
|
||||
runner._running_agents_ts = {}
|
||||
runner._session_run_generation = {}
|
||||
runner._pending_messages = {}
|
||||
runner._draining = False
|
||||
runner._update_runtime_status = MagicMock()
|
||||
return runner
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Layer 1: Adapter-side session cancellation on /stop /new /reset
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestAdapterSessionCancellation:
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("command_text", ["/stop", "/new", "/reset"])
|
||||
async def test_command_cancels_active_task_and_unblocks_follow_up(
|
||||
self, command_text
|
||||
):
|
||||
"""/stop /new /reset must cancel the adapter task and let follow-ups through."""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
processing_started = asyncio.Event()
|
||||
processing_cancelled = asyncio.Event()
|
||||
blocked_first_message = True
|
||||
|
||||
async def _handler(event):
|
||||
nonlocal blocked_first_message
|
||||
cmd = event.get_command()
|
||||
if cmd in {"stop", "new", "reset", "model"}:
|
||||
return f"handled:{cmd}"
|
||||
|
||||
if blocked_first_message:
|
||||
blocked_first_message = False
|
||||
processing_started.set()
|
||||
try:
|
||||
await asyncio.Event().wait()
|
||||
except asyncio.CancelledError:
|
||||
processing_cancelled.set()
|
||||
raise
|
||||
return f"handled:text:{event.text}"
|
||||
|
||||
adapter._message_handler = _handler
|
||||
|
||||
await adapter.handle_message(_make_event("hello world"))
|
||||
await processing_started.wait()
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert sk in adapter._active_sessions
|
||||
assert sk in adapter._session_tasks
|
||||
|
||||
await adapter.handle_message(_make_event(command_text))
|
||||
|
||||
assert processing_cancelled.is_set(), (
|
||||
f"{command_text} did not cancel the active processing task"
|
||||
)
|
||||
assert sk not in adapter._active_sessions
|
||||
assert sk not in adapter._pending_messages
|
||||
assert sk not in adapter._session_tasks
|
||||
expected = command_text.lstrip("/")
|
||||
assert any(f"handled:{expected}" in r for r in adapter.sent_responses)
|
||||
|
||||
# Follow-up must go through normally now that the session is clean.
|
||||
await adapter.handle_message(
|
||||
_make_event("/model xiaomi/mimo-v2-pro --provider nous")
|
||||
)
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert any("handled:model" in r for r in adapter.sent_responses), (
|
||||
f"follow-up /model stayed blocked after {command_text}"
|
||||
)
|
||||
assert sk not in adapter._pending_messages
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_keeps_guard_until_command_finishes_then_runs_follow_up(self):
|
||||
"""/new must finish runner logic before cancelling old work or releasing the guard."""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
processing_started = asyncio.Event()
|
||||
command_started = asyncio.Event()
|
||||
allow_command_finish = asyncio.Event()
|
||||
follow_up_processed = asyncio.Event()
|
||||
call_order = []
|
||||
|
||||
async def _handler(event):
|
||||
cmd = event.get_command()
|
||||
if cmd == "new":
|
||||
call_order.append("command:start")
|
||||
command_started.set()
|
||||
await allow_command_finish.wait()
|
||||
call_order.append("command:end")
|
||||
return "handled:new"
|
||||
|
||||
if event.text == "hello world":
|
||||
processing_started.set()
|
||||
try:
|
||||
await asyncio.Event().wait()
|
||||
except asyncio.CancelledError:
|
||||
call_order.append("original:cancelled")
|
||||
raise
|
||||
|
||||
if event.text == "after reset":
|
||||
call_order.append("followup:processed")
|
||||
follow_up_processed.set()
|
||||
return f"handled:text:{event.text}"
|
||||
|
||||
adapter._message_handler = _handler
|
||||
|
||||
await adapter.handle_message(_make_event("hello world"))
|
||||
await processing_started.wait()
|
||||
|
||||
command_task = asyncio.create_task(adapter.handle_message(_make_event("/new")))
|
||||
await command_started.wait()
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert sk in adapter._active_sessions
|
||||
|
||||
await adapter.handle_message(_make_event("after reset"))
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert sk in adapter._active_sessions, "guard must stay active while /new is still running"
|
||||
assert sk in adapter._pending_messages, "follow-up should stay queued until /new finishes"
|
||||
assert not follow_up_processed.is_set(), "follow-up ran before /new completed"
|
||||
assert "original:cancelled" not in call_order, "old task was cancelled before runner completed /new"
|
||||
|
||||
allow_command_finish.set()
|
||||
await command_task
|
||||
await asyncio.wait_for(follow_up_processed.wait(), timeout=1.0)
|
||||
|
||||
assert any("handled:new" in r for r in adapter.sent_responses)
|
||||
assert call_order.index("command:end") < call_order.index("original:cancelled")
|
||||
assert call_order.index("original:cancelled") < call_order.index("followup:processed")
|
||||
assert sk not in adapter._pending_messages
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Layer 2: Adapter-side on-entry self-heal for stale session locks
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestStaleSessionLockSelfHeal:
|
||||
@pytest.mark.asyncio
|
||||
async def test_stale_lock_with_done_task_is_healed_on_next_message(self):
|
||||
"""A split-brain guard (owner task done but entry still live) heals on next inbound."""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
|
||||
# Simulate the production split-brain: an _active_sessions entry
|
||||
# remains AND a recorded owner task, but that task is already done.
|
||||
async def _done():
|
||||
return None
|
||||
|
||||
done_task = asyncio.create_task(_done())
|
||||
await done_task
|
||||
assert done_task.done()
|
||||
|
||||
adapter._active_sessions[sk] = asyncio.Event()
|
||||
adapter._session_tasks[sk] = done_task
|
||||
|
||||
assert adapter._session_task_is_stale(sk)
|
||||
|
||||
async def _handler(event):
|
||||
return f"handled:{event.get_command() or 'text'}"
|
||||
|
||||
adapter._message_handler = _handler
|
||||
|
||||
# An ordinary message should heal the stale lock, then fall through
|
||||
# to normal dispatch. User gets a reply instead of a busy ack.
|
||||
await adapter.handle_message(_make_event("hello"))
|
||||
# Drain any spawned background tasks.
|
||||
for _ in range(5):
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert any("handled:text" in r for r in adapter.sent_responses), (
|
||||
"stale lock trapped a normal message — split-brain not healed"
|
||||
)
|
||||
|
||||
def test_no_owner_task_is_not_treated_as_stale(self):
|
||||
"""If _session_tasks has no entry at all, the guard isn't stale.
|
||||
|
||||
Tests and rare legitimate code paths install _active_sessions
|
||||
entries directly. Auto-healing those would break real fixtures.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
|
||||
adapter._active_sessions[sk] = asyncio.Event()
|
||||
# No _session_tasks entry.
|
||||
|
||||
assert adapter._session_task_is_stale(sk) is False
|
||||
assert adapter._heal_stale_session_lock(sk) is False
|
||||
|
||||
def test_live_owner_task_is_not_stale(self):
|
||||
"""When the owner task is alive, do NOT heal — agent is really busy."""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
|
||||
fake_task = MagicMock()
|
||||
fake_task.done.return_value = False
|
||||
adapter._active_sessions[sk] = asyncio.Event()
|
||||
adapter._session_tasks[sk] = fake_task
|
||||
|
||||
assert adapter._session_task_is_stale(sk) is False
|
||||
assert adapter._heal_stale_session_lock(sk) is False
|
||||
# Lock still in place.
|
||||
assert sk in adapter._active_sessions
|
||||
assert sk in adapter._session_tasks
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Layer 3: Runner-side generation guard on slot promotion + release
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestRunnerSessionGenerationGuard:
|
||||
def test_release_without_generation_behaves_as_before(self):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:telegram:dm:12345"
|
||||
runner._running_agents[sk] = "agent"
|
||||
runner._running_agents_ts[sk] = 1.0
|
||||
assert runner._release_running_agent_state(sk) is True
|
||||
assert sk not in runner._running_agents
|
||||
assert sk not in runner._running_agents_ts
|
||||
|
||||
def test_release_with_current_generation_clears_slot(self):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:telegram:dm:12345"
|
||||
gen = runner._begin_session_run_generation(sk)
|
||||
runner._running_agents[sk] = "agent"
|
||||
runner._running_agents_ts[sk] = 1.0
|
||||
|
||||
assert runner._release_running_agent_state(sk, run_generation=gen) is True
|
||||
assert sk not in runner._running_agents
|
||||
|
||||
def test_release_with_stale_generation_blocks(self):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:telegram:dm:12345"
|
||||
stale_gen = runner._begin_session_run_generation(sk)
|
||||
# /stop bumps the generation — stale run's generation is no longer current.
|
||||
runner._invalidate_session_run_generation(sk, reason="stop")
|
||||
# The fresh run lands next; imagine it has its own state installed.
|
||||
runner._running_agents[sk] = "fresh_agent"
|
||||
runner._running_agents_ts[sk] = 2.0
|
||||
|
||||
# Stale run's unwind MUST NOT clobber the fresh run's state.
|
||||
released = runner._release_running_agent_state(sk, run_generation=stale_gen)
|
||||
|
||||
assert released is False
|
||||
assert runner._running_agents[sk] == "fresh_agent"
|
||||
assert runner._running_agents_ts[sk] == 2.0
|
||||
|
||||
def test_is_session_run_current_tracks_bumps(self):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:telegram:dm:12345"
|
||||
gen1 = runner._begin_session_run_generation(sk)
|
||||
assert runner._is_session_run_current(sk, gen1) is True
|
||||
|
||||
runner._invalidate_session_run_generation(sk, reason="test")
|
||||
assert runner._is_session_run_current(sk, gen1) is False
|
||||
|
||||
gen2 = runner._begin_session_run_generation(sk)
|
||||
assert gen2 > gen1
|
||||
assert runner._is_session_run_current(sk, gen2) is True
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Layer 1 (regression): old task's finally must NOT delete a newer guard
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestOldTaskCannotClobberNewerGuard:
|
||||
"""Direct regression for the unconditional-delete bug.
|
||||
|
||||
Before the guard-match fix, a task in its finally would delete
|
||||
``_active_sessions[session_key]`` unconditionally — even if a
|
||||
/stop/ /new command had already swapped in its own command_guard
|
||||
(which then gets clobbered, opening a race for follow-up messages).
|
||||
"""
|
||||
|
||||
def test_release_session_guard_matches_on_event_identity(self):
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
|
||||
old_guard = asyncio.Event()
|
||||
new_guard = asyncio.Event()
|
||||
# Command swapped in a newer guard.
|
||||
adapter._active_sessions[sk] = new_guard
|
||||
|
||||
# Old task tries to release using its captured (stale) guard.
|
||||
adapter._release_session_guard(sk, guard=old_guard)
|
||||
|
||||
# The newer guard survives.
|
||||
assert adapter._active_sessions.get(sk) is new_guard
|
||||
|
||||
# Now the command itself releases using the matching guard.
|
||||
adapter._release_session_guard(sk, guard=new_guard)
|
||||
assert sk not in adapter._active_sessions
|
||||
|
||||
def test_release_session_guard_without_guard_releases_unconditionally(self):
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
adapter._active_sessions[sk] = asyncio.Event()
|
||||
# Callers that don't know the guard (e.g. cancel_session_processing's
|
||||
# default path) still work.
|
||||
adapter._release_session_guard(sk)
|
||||
assert sk not in adapter._active_sessions
|
||||
|
||||
@@ -34,8 +34,59 @@ class TestResolveShellInitFiles:
|
||||
|
||||
assert resolved == [str(bashrc)]
|
||||
|
||||
def test_auto_sources_profile_when_present(self, tmp_path, monkeypatch):
|
||||
"""~/.profile is where ``n`` / ``nvm`` installers typically write
|
||||
their PATH export on Debian/Ubuntu, and it has no interactivity
|
||||
guard so a non-interactive source actually runs it.
|
||||
"""
|
||||
profile = tmp_path / ".profile"
|
||||
profile.write_text('export PATH="$HOME/n/bin:$PATH"\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == [str(profile)]
|
||||
|
||||
def test_auto_sources_bash_profile_when_present(self, tmp_path, monkeypatch):
|
||||
bash_profile = tmp_path / ".bash_profile"
|
||||
bash_profile.write_text('export MARKER=bp\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == [str(bash_profile)]
|
||||
|
||||
def test_auto_sources_profile_before_bashrc(self, tmp_path, monkeypatch):
|
||||
"""Both files present: profile runs first so PATH exports in
|
||||
profile take effect even if bashrc short-circuits on the
|
||||
non-interactive ``case $- in *i*) ;; *) return;; esac`` guard.
|
||||
"""
|
||||
profile = tmp_path / ".profile"
|
||||
profile.write_text('export FROM_PROFILE=1\n')
|
||||
bash_profile = tmp_path / ".bash_profile"
|
||||
bash_profile.write_text('export FROM_BASH_PROFILE=1\n')
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text('export FROM_BASHRC=1\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == [str(profile), str(bash_profile), str(bashrc)]
|
||||
|
||||
def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch):
|
||||
# No bashrc written.
|
||||
# No rc files written.
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
@@ -49,6 +100,8 @@ class TestResolveShellInitFiles:
|
||||
def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch):
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text('export MARKER=seen\n')
|
||||
profile = tmp_path / ".profile"
|
||||
profile.write_text('export MARKER=p\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
@@ -160,3 +213,58 @@ class TestSnapshotEndToEnd:
|
||||
output = result.get("output", "")
|
||||
assert "PROBE=probe-ok" in output
|
||||
assert "/opt/shell-init-probe/bin" in output
|
||||
|
||||
def test_profile_path_export_survives_bashrc_interactive_guard(
|
||||
self, tmp_path, monkeypatch
|
||||
):
|
||||
"""Reproduces the Debian/Ubuntu + ``n``/``nvm`` case.
|
||||
|
||||
Setup:
|
||||
- ``~/.bashrc`` starts with ``case $- in *i*) ;; *) return;; esac``
|
||||
(the default on Debian/Ubuntu) and would happily export a PATH
|
||||
entry below that guard — but never gets there because a
|
||||
non-interactive source short-circuits.
|
||||
- ``~/.profile`` exports ``$HOME/fake-n/bin`` onto PATH, no guard.
|
||||
|
||||
Expectation: auto-sourced rc list picks up ``~/.profile`` before
|
||||
``~/.bashrc``, so the snapshot ends up with ``fake-n/bin`` on PATH
|
||||
even though the bashrc export is silently skipped.
|
||||
"""
|
||||
fake_n_bin = tmp_path / "fake-n" / "bin"
|
||||
fake_n_bin.mkdir(parents=True)
|
||||
|
||||
profile = tmp_path / ".profile"
|
||||
profile.write_text(
|
||||
f'export PATH="{fake_n_bin}:$PATH"\n'
|
||||
'export FROM_PROFILE=profile-ok\n'
|
||||
)
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text(
|
||||
'case $- in\n'
|
||||
' *i*) ;;\n'
|
||||
' *) return;;\n'
|
||||
'esac\n'
|
||||
'export FROM_BASHRC=bashrc-should-not-appear\n'
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], True),
|
||||
):
|
||||
env = LocalEnvironment(cwd=str(tmp_path), timeout=15)
|
||||
try:
|
||||
result = env.execute(
|
||||
'echo "PATH=$PATH"; '
|
||||
'echo "FROM_PROFILE=$FROM_PROFILE"; '
|
||||
'echo "FROM_BASHRC=$FROM_BASHRC"'
|
||||
)
|
||||
finally:
|
||||
env.cleanup()
|
||||
|
||||
output = result.get("output", "")
|
||||
assert "FROM_PROFILE=profile-ok" in output
|
||||
assert str(fake_n_bin) in output
|
||||
# bashrc short-circuited on the interactive guard — its export never ran
|
||||
assert "FROM_BASHRC=bashrc-should-not-appear" not in output
|
||||
|
||||
@@ -484,3 +484,85 @@ class TestSkillManageDispatcher:
|
||||
raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT)
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestSecurityScanGate:
|
||||
"""_security_scan_skill is gated by skills.guard_agent_created config flag."""
|
||||
|
||||
def test_scan_noop_when_flag_off(self, tmp_path):
|
||||
"""Default config (flag off) short-circuits before running scan_skill."""
|
||||
from tools.skill_manager_tool import _security_scan_skill
|
||||
|
||||
with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=False), \
|
||||
patch("tools.skill_manager_tool.scan_skill") as mock_scan:
|
||||
result = _security_scan_skill(tmp_path)
|
||||
|
||||
assert result is None
|
||||
mock_scan.assert_not_called() # scan never ran
|
||||
|
||||
def test_scan_runs_when_flag_on(self, tmp_path):
|
||||
"""When flag is on, scan_skill is invoked and its verdict is honored."""
|
||||
from tools.skill_manager_tool import _security_scan_skill
|
||||
from tools.skills_guard import ScanResult
|
||||
|
||||
# Fake a safe scan result — caller should return None (allow)
|
||||
fake_result = ScanResult(
|
||||
skill_name="test",
|
||||
source="agent-created",
|
||||
trust_level="agent-created",
|
||||
verdict="safe",
|
||||
findings=[],
|
||||
summary="ok",
|
||||
)
|
||||
with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=True), \
|
||||
patch("tools.skill_manager_tool.scan_skill", return_value=fake_result) as mock_scan:
|
||||
result = _security_scan_skill(tmp_path)
|
||||
|
||||
assert result is None
|
||||
mock_scan.assert_called_once()
|
||||
|
||||
def test_scan_blocks_dangerous_when_flag_on(self, tmp_path):
|
||||
"""Dangerous verdict + flag on → returns an error string for the agent."""
|
||||
from tools.skill_manager_tool import _security_scan_skill
|
||||
from tools.skills_guard import ScanResult, Finding
|
||||
|
||||
finding = Finding(
|
||||
pattern_id="test", severity="critical", category="exfiltration",
|
||||
file="SKILL.md", line=1, match="curl $TOKEN", description="test",
|
||||
)
|
||||
fake_result = ScanResult(
|
||||
skill_name="test",
|
||||
source="agent-created",
|
||||
trust_level="agent-created",
|
||||
verdict="dangerous",
|
||||
findings=[finding],
|
||||
summary="dangerous",
|
||||
)
|
||||
with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=True), \
|
||||
patch("tools.skill_manager_tool.scan_skill", return_value=fake_result):
|
||||
result = _security_scan_skill(tmp_path)
|
||||
|
||||
assert result is not None
|
||||
assert "Security scan blocked" in result
|
||||
|
||||
def test_guard_flag_reads_config_default_false(self):
|
||||
"""_guard_agent_created_enabled returns False when config doesn't set it."""
|
||||
from tools.skill_manager_tool import _guard_agent_created_enabled
|
||||
|
||||
with patch("hermes_cli.config.load_config", return_value={"skills": {}}):
|
||||
assert _guard_agent_created_enabled() is False
|
||||
|
||||
def test_guard_flag_reads_config_when_set(self):
|
||||
"""_guard_agent_created_enabled returns True when user explicitly enables."""
|
||||
from tools.skill_manager_tool import _guard_agent_created_enabled
|
||||
|
||||
with patch("hermes_cli.config.load_config",
|
||||
return_value={"skills": {"guard_agent_created": True}}):
|
||||
assert _guard_agent_created_enabled() is True
|
||||
|
||||
def test_guard_flag_handles_config_error(self):
|
||||
"""If load_config raises, _guard_agent_created_enabled defaults to False (fail-safe off)."""
|
||||
from tools.skill_manager_tool import _guard_agent_created_enabled
|
||||
|
||||
with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")):
|
||||
assert _guard_agent_created_enabled() is False
|
||||
|
||||
@@ -175,7 +175,11 @@ class TestShouldAllowInstall:
|
||||
assert "agent-created" in reason
|
||||
|
||||
def test_dangerous_agent_created_asks(self):
|
||||
"""Agent-created skills with dangerous verdict return None (ask for confirmation)."""
|
||||
"""Agent-created skills with dangerous verdict return None (ask for confirmation)
|
||||
when the scan runs. The caller (_security_scan_skill) surfaces this as an error
|
||||
to the agent, who can retry without the flagged content.
|
||||
|
||||
This gate only runs when skills.guard_agent_created is enabled (off by default)."""
|
||||
f = [Finding("env_exfil_curl", "critical", "exfiltration", "SKILL.md", 1, "curl $TOKEN", "exfiltration")]
|
||||
allowed, reason = should_allow_install(self._result("agent-created", "dangerous", f))
|
||||
assert allowed is None
|
||||
|
||||
@@ -247,10 +247,22 @@ def _resolve_shell_init_files() -> list[str]:
|
||||
if explicit:
|
||||
candidates.extend(explicit)
|
||||
elif auto_bashrc and not _IS_WINDOWS:
|
||||
# Bash's login-shell invocation does NOT source ~/.bashrc by default,
|
||||
# so tools like nvm / asdf / pyenv that self-install there stay
|
||||
# invisible to the snapshot without this nudge.
|
||||
candidates.append("~/.bashrc")
|
||||
# Build a login-shell-ish source list so tools like n / nvm / asdf /
|
||||
# pyenv that self-install into the user's shell rc land on PATH in
|
||||
# the captured snapshot.
|
||||
#
|
||||
# ~/.profile and ~/.bash_profile run first because they have no
|
||||
# interactivity guard — installers like ``n`` and ``nvm`` append
|
||||
# their PATH export there on most distros, and a non-interactive
|
||||
# ``. ~/.profile`` picks that up.
|
||||
#
|
||||
# ~/.bashrc runs last. On Debian/Ubuntu the default bashrc starts
|
||||
# with ``case $- in *i*) ;; *) return;; esac`` and exits early
|
||||
# when sourced non-interactively, which is why sourcing bashrc
|
||||
# alone misses nvm/n PATH additions placed below that guard. We
|
||||
# still include it so users who put PATH logic in bashrc (and
|
||||
# stripped the guard, or never had one) keep working.
|
||||
candidates.extend(["~/.profile", "~/.bash_profile", "~/.bashrc"])
|
||||
|
||||
resolved: list[str] = []
|
||||
for raw in candidates:
|
||||
|
||||
@@ -44,8 +44,8 @@ from typing import Dict, Any, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import security scanner — agent-created skills get the same scrutiny as
|
||||
# community hub installs.
|
||||
# Import security scanner — external hub installs always get scanned;
|
||||
# agent-created skills only get scanned when skills.guard_agent_created is on.
|
||||
try:
|
||||
from tools.skills_guard import scan_skill, should_allow_install, format_scan_report
|
||||
_GUARD_AVAILABLE = True
|
||||
@@ -53,10 +53,31 @@ except ImportError:
|
||||
_GUARD_AVAILABLE = False
|
||||
|
||||
|
||||
def _guard_agent_created_enabled() -> bool:
|
||||
"""Read skills.guard_agent_created from config (default False).
|
||||
|
||||
Off by default because the agent can already execute the same code
|
||||
paths via terminal() with no gate, so the scan adds friction without
|
||||
meaningful security. Users who want belt-and-suspenders can turn it
|
||||
on via `hermes config set skills.guard_agent_created true`.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
return bool(cfg.get("skills", {}).get("guard_agent_created", False))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _security_scan_skill(skill_dir: Path) -> Optional[str]:
|
||||
"""Scan a skill directory after write. Returns error string if blocked, else None."""
|
||||
"""Scan a skill directory after write. Returns error string if blocked, else None.
|
||||
|
||||
No-op when skills.guard_agent_created is disabled (the default).
|
||||
"""
|
||||
if not _GUARD_AVAILABLE:
|
||||
return None
|
||||
if not _guard_agent_created_enabled():
|
||||
return None
|
||||
try:
|
||||
result = scan_skill(skill_dir, source="agent-created")
|
||||
allowed, reason = should_allow_install(result)
|
||||
@@ -65,7 +86,8 @@ def _security_scan_skill(skill_dir: Path) -> Optional[str]:
|
||||
return f"Security scan blocked this skill ({reason}):\n{report}"
|
||||
if allowed is None:
|
||||
# "ask" verdict — for agent-created skills this means dangerous
|
||||
# findings were detected. Block the skill and include the report.
|
||||
# findings were detected. Surface as an error so the agent can
|
||||
# retry with the flagged content removed.
|
||||
report = format_scan_report(result)
|
||||
logger.warning("Agent-created skill blocked (dangerous findings): %s", reason)
|
||||
return f"Security scan blocked this skill ({reason}):\n{report}"
|
||||
|
||||
@@ -43,6 +43,10 @@ INSTALL_POLICY = {
|
||||
"builtin": ("allow", "allow", "allow"),
|
||||
"trusted": ("allow", "allow", "block"),
|
||||
"community": ("allow", "block", "block"),
|
||||
# Agent-created: "ask" on dangerous surfaces as an error to the agent,
|
||||
# which can retry without the flagged content. This gate only runs when
|
||||
# skills.guard_agent_created is enabled (off by default) — see
|
||||
# tools/skill_manager_tool.py::_guard_agent_created_enabled.
|
||||
"agent-created": ("allow", "allow", "ask"),
|
||||
}
|
||||
|
||||
|
||||
@@ -10,20 +10,20 @@ The ACP adapter wraps Hermes' synchronous `AIAgent` in an async JSON-RPC stdio s
|
||||
|
||||
Key implementation files:
|
||||
|
||||
- `acp_adapter/entry.py`
|
||||
- `acp_adapter/server.py`
|
||||
- `acp_adapter/session.py`
|
||||
- `acp_adapter/events.py`
|
||||
- `acp_adapter/permissions.py`
|
||||
- `acp_adapter/tools.py`
|
||||
- `acp_adapter/auth.py`
|
||||
- `hermes_agent/acp/entry.py`
|
||||
- `hermes_agent/acp/server.py`
|
||||
- `hermes_agent/acp/session.py`
|
||||
- `hermes_agent/acp/events.py`
|
||||
- `hermes_agent/acp/permissions.py`
|
||||
- `hermes_agent/acp/tools.py`
|
||||
- `hermes_agent/acp/auth.py`
|
||||
- `acp_registry/agent.json`
|
||||
|
||||
## Boot flow
|
||||
|
||||
```text
|
||||
hermes acp / hermes-acp / python -m acp_adapter
|
||||
-> acp_adapter.entry.main()
|
||||
hermes acp / hermes-acp / python -m hermes_agent.acp
|
||||
-> hermes_agent.acp.entry.main()
|
||||
-> load ~/.hermes/.env
|
||||
-> configure stderr logging
|
||||
-> construct HermesACPAgent
|
||||
@@ -36,7 +36,7 @@ Stdout is reserved for ACP JSON-RPC transport. Human-readable logs go to stderr.
|
||||
|
||||
### `HermesACPAgent`
|
||||
|
||||
`acp_adapter/server.py` implements the ACP agent protocol.
|
||||
`hermes_agent/acp/server.py` implements the ACP agent protocol.
|
||||
|
||||
Responsibilities:
|
||||
|
||||
@@ -48,7 +48,7 @@ Responsibilities:
|
||||
|
||||
### `SessionManager`
|
||||
|
||||
`acp_adapter/session.py` tracks live ACP sessions.
|
||||
`hermes_agent/acp/session.py` tracks live ACP sessions.
|
||||
|
||||
Each session stores:
|
||||
|
||||
@@ -71,7 +71,7 @@ The manager is thread-safe and supports:
|
||||
|
||||
### Event bridge
|
||||
|
||||
`acp_adapter/events.py` converts AIAgent callbacks into ACP `session_update` events.
|
||||
`hermes_agent/acp/events.py` converts AIAgent callbacks into ACP `session_update` events.
|
||||
|
||||
Bridged callbacks:
|
||||
|
||||
@@ -88,7 +88,7 @@ asyncio.run_coroutine_threadsafe(...)
|
||||
|
||||
### Permission bridge
|
||||
|
||||
`acp_adapter/permissions.py` adapts dangerous terminal approval prompts into ACP permission requests.
|
||||
`hermes_agent/acp/permissions.py` adapts dangerous terminal approval prompts into ACP permission requests.
|
||||
|
||||
Mapping:
|
||||
|
||||
@@ -100,7 +100,7 @@ Timeouts and bridge failures deny by default.
|
||||
|
||||
### Tool rendering helpers
|
||||
|
||||
`acp_adapter/tools.py` maps Hermes tools to ACP tool kinds and builds editor-facing content.
|
||||
`hermes_agent/acp/tools.py` maps Hermes tools to ACP tool kinds and builds editor-facing content.
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -144,7 +144,7 @@ ACP does not implement its own auth store.
|
||||
|
||||
Instead it reuses Hermes' runtime resolver:
|
||||
|
||||
- `acp_adapter/auth.py`
|
||||
- `hermes_agent/acp/auth.py`
|
||||
- `hermes_cli/runtime_provider.py`
|
||||
|
||||
So ACP advertises and uses the currently configured Hermes provider/credentials.
|
||||
|
||||
@@ -14,7 +14,7 @@ This page is the top-level map of Hermes Agent internals. Use it to orient yours
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ Entry Points │
|
||||
│ │
|
||||
│ CLI (cli.py) Gateway (gateway/run.py) ACP (acp_adapter/) │
|
||||
│ CLI (cli.py) Gateway (gateway/run.py) ACP (hermes_agent/acp/)│
|
||||
│ Batch Runner API Server Python Library │
|
||||
└──────────┬──────────────┬───────────────────────┬───────────────────┘
|
||||
│ │ │
|
||||
@@ -122,7 +122,7 @@ hermes-agent/
|
||||
│ # dingtalk, feishu, wecom, wecom_callback, weixin,
|
||||
│ # bluebubbles, qqbot, homeassistant, webhook, api_server
|
||||
│
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains)
|
||||
├── hermes_agent/acp/ # ACP server (VS Code / Zed / JetBrains)
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── plugins/memory/ # Memory provider plugins
|
||||
├── plugins/context_engine/ # Context engine plugins
|
||||
|
||||
@@ -650,7 +650,7 @@ Related entrypoints:
|
||||
|
||||
```bash
|
||||
hermes-acp
|
||||
python -m acp_adapter
|
||||
python -m hermes_agent.acp
|
||||
```
|
||||
|
||||
Install support first:
|
||||
|
||||
@@ -43,7 +43,7 @@ This installs the `agent-client-protocol` dependency and enables:
|
||||
|
||||
- `hermes acp`
|
||||
- `hermes-acp`
|
||||
- `python -m acp_adapter`
|
||||
- `python -m hermes_agent.acp`
|
||||
|
||||
## Launching the ACP server
|
||||
|
||||
@@ -58,7 +58,7 @@ hermes-acp
|
||||
```
|
||||
|
||||
```bash
|
||||
python -m acp_adapter
|
||||
python -m hermes_agent.acp
|
||||
```
|
||||
|
||||
Hermes logs to stderr so stdout remains reserved for ACP JSON-RPC traffic.
|
||||
|
||||
Reference in New Issue
Block a user