Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 854206e59e | |||
| dd83173621 | |||
| c65c1ddf21 | |||
| 1970bcf5a5 | |||
| 832ecde4b0 | |||
| be184aa5fa | |||
| 63b7b6d5bd | |||
| 123f8d0fed | |||
| a24c6e191f | |||
| 7206eed319 | |||
| 1619c0e503 | |||
| e27c819de3 | |||
| 1c78f6627a | |||
| 8ef2ae6502 | |||
| 0146cb2bd2 | |||
| da7d09c3b6 | |||
| af8d43dbbb | |||
| 27fc6c1086 | |||
| 45806629c5 | |||
| 4093201c47 | |||
| 9f610aa8f3 | |||
| e1c5e741ad |
@@ -14,7 +14,6 @@ from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_cli.config import get_env_value
|
||||
import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
@@ -1274,8 +1273,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
def _is_source_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
if provider == "openrouter":
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
|
||||
token = os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||
if token:
|
||||
source = "env:OPENROUTER_API_KEY"
|
||||
if _is_source_suppressed(provider, source):
|
||||
@@ -1301,7 +1299,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
|
||||
env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
|
||||
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
if provider == "anthropic":
|
||||
@@ -1312,8 +1310,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
]
|
||||
|
||||
for env_var in env_vars:
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value(env_var) or "").strip()
|
||||
token = os.getenv(env_var, "").strip()
|
||||
if not token:
|
||||
continue
|
||||
source = f"env:{env_var}"
|
||||
|
||||
@@ -176,6 +176,64 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# You are a Kanban worker\n"
|
||||
"You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
|
||||
@@ -329,7 +329,7 @@ def build_skill_invocation_message(
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
activation_note = (
|
||||
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
"you to follow its instructions. The full skill content is loaded below.]"
|
||||
)
|
||||
return _build_skill_message(
|
||||
@@ -368,7 +368,7 @@ def build_preloaded_skills_prompt(
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
activation_note = (
|
||||
f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
|
||||
f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
|
||||
"preloaded. Treat its instructions as active guidance for the duration of this "
|
||||
"session unless the user overrides them.]"
|
||||
)
|
||||
|
||||
@@ -1378,7 +1378,7 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
|
||||
|
||||
|
||||
def _format_process_notification(evt: dict) -> "str | None":
|
||||
"""Format a process notification event into a [IMPORTANT: ...] message.
|
||||
"""Format a process notification event into a [SYSTEM: ...] message.
|
||||
|
||||
Handles both completion events (notify_on_complete) and watch pattern
|
||||
match events from the unified completion_queue.
|
||||
@@ -1388,14 +1388,14 @@ def _format_process_notification(evt: dict) -> "str | None":
|
||||
_cmd = evt.get("command", "unknown")
|
||||
|
||||
if evt_type == "watch_disabled":
|
||||
return f"[IMPORTANT: {evt.get('message', '')}]"
|
||||
return f"[SYSTEM: {evt.get('message', '')}]"
|
||||
|
||||
if evt_type == "watch_match":
|
||||
_pat = evt.get("pattern", "?")
|
||||
_out = evt.get("output", "")
|
||||
_sup = evt.get("suppressed", 0)
|
||||
text = (
|
||||
f"[IMPORTANT: Background process {_sid} matched "
|
||||
f"[SYSTEM: Background process {_sid} matched "
|
||||
f"watch pattern \"{_pat}\".\n"
|
||||
f"Command: {_cmd}\n"
|
||||
f"Matched output:\n{_out}"
|
||||
@@ -1409,7 +1409,7 @@ def _format_process_notification(evt: dict) -> "str | None":
|
||||
_exit = evt.get("exit_code", "?")
|
||||
_out = evt.get("output", "")
|
||||
return (
|
||||
f"[IMPORTANT: Background process {_sid} completed "
|
||||
f"[SYSTEM: Background process {_sid} completed "
|
||||
f"(exit code {_exit}).\n"
|
||||
f"Command: {_cmd}\n"
|
||||
f"Output:\n{_out}]"
|
||||
@@ -4915,12 +4915,6 @@ class HermesCLI:
|
||||
if self.agent:
|
||||
self.agent.session_id = new_session_id
|
||||
self.agent.session_start = now
|
||||
# Redirect the JSON session log to the new branch session file so
|
||||
# messages written after branching land in the correct file.
|
||||
if hasattr(self.agent, "session_log_file") and hasattr(self.agent, "logs_dir"):
|
||||
self.agent.session_log_file = (
|
||||
self.agent.logs_dir / f"session_{new_session_id}.json"
|
||||
)
|
||||
self.agent.reset_session_state()
|
||||
if hasattr(self.agent, "_last_flushed_db_idx"):
|
||||
self.agent._last_flushed_db_idx = len(self.conversation_history)
|
||||
@@ -5824,7 +5818,28 @@ class HermesCLI:
|
||||
|
||||
print(f"(._.) Unknown cron command: {subcommand}")
|
||||
print(" Available: list, add, edit, pause, resume, run, remove")
|
||||
|
||||
|
||||
def _handle_kanban_command(self, cmd: str):
|
||||
"""Handle the /kanban command — delegate to the shared kanban CLI.
|
||||
|
||||
The string form passed here is the user's full ``/kanban ...``
|
||||
including the leading slash; we strip it and hand the remainder
|
||||
to ``kanban.run_slash`` which returns a single formatted string.
|
||||
"""
|
||||
from hermes_cli.kanban import run_slash
|
||||
|
||||
rest = cmd.strip()
|
||||
if rest.startswith("/"):
|
||||
rest = rest.lstrip("/")
|
||||
if rest.startswith("kanban"):
|
||||
rest = rest[len("kanban"):].lstrip()
|
||||
try:
|
||||
output = run_slash(rest)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
output = f"(._.) kanban error: {exc}"
|
||||
if output:
|
||||
print(output)
|
||||
|
||||
def _handle_skills_command(self, cmd: str):
|
||||
"""Handle /skills slash command — delegates to hermes_cli.skills_hub."""
|
||||
from hermes_cli.skills_hub import handle_skills_slash
|
||||
@@ -6061,6 +6076,8 @@ class HermesCLI:
|
||||
self.save_conversation()
|
||||
elif canonical == "cron":
|
||||
self._handle_cron_command(cmd_original)
|
||||
elif canonical == "kanban":
|
||||
self._handle_kanban_command(cmd_original)
|
||||
elif canonical == "skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._handle_skills_command(cmd_original)
|
||||
@@ -6313,12 +6330,6 @@ class HermesCLI:
|
||||
turn_route = self._resolve_turn_agent_config(prompt)
|
||||
|
||||
def run_background():
|
||||
set_sudo_password_callback(self._sudo_password_callback)
|
||||
set_approval_callback(self._approval_callback)
|
||||
try:
|
||||
set_secret_capture_callback(self._secret_capture_callback)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
bg_agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
@@ -6416,12 +6427,6 @@ class HermesCLI:
|
||||
print()
|
||||
_cprint(f" ❌ Background task #{task_num} failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
set_sudo_password_callback(None)
|
||||
set_approval_callback(None)
|
||||
set_secret_capture_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
self._background_tasks.pop(task_id, None)
|
||||
# Clear spinner only if no foreground agent owns it
|
||||
if not self._agent_running:
|
||||
@@ -7235,7 +7240,7 @@ class HermesCLI:
|
||||
change_detail = ". ".join(change_parts) + ". " if change_parts else ""
|
||||
self.conversation_history.append({
|
||||
"role": "user",
|
||||
"content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
|
||||
"content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
|
||||
})
|
||||
|
||||
# Persist session immediately so the session log reflects the
|
||||
@@ -9859,7 +9864,7 @@ class HermesCLI:
|
||||
status = cli_ref._command_status or "Processing command..."
|
||||
return f"{frame} {status}"
|
||||
if cli_ref._agent_running:
|
||||
return "msg=interrupt · /queue · /bg · /steer · Ctrl+C cancel"
|
||||
return "type a message + Enter to interrupt, Ctrl+C to cancel"
|
||||
if cli_ref._voice_mode:
|
||||
return "type or Ctrl+B to record"
|
||||
return ""
|
||||
|
||||
+3
-3
@@ -715,7 +715,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
# Always prepend cron execution guidance so the agent knows how
|
||||
# delivery works and can suppress delivery when appropriate.
|
||||
cron_hint = (
|
||||
"[IMPORTANT: You are running as a scheduled cron job. "
|
||||
"[SYSTEM: You are running as a scheduled cron job. "
|
||||
"DELIVERY: Your final response will be automatically delivered "
|
||||
"to the user — do NOT use send_message or try to deliver "
|
||||
"the output yourself. Just produce your report/output as your "
|
||||
@@ -751,7 +751,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
parts.append("")
|
||||
parts.extend(
|
||||
[
|
||||
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
|
||||
"",
|
||||
content,
|
||||
]
|
||||
@@ -759,7 +759,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
|
||||
if skipped:
|
||||
notice = (
|
||||
f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
|
||||
f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
|
||||
f"and were skipped: {', '.join(skipped)}. "
|
||||
f"Start your response with a brief notice so the user is aware, e.g.: "
|
||||
f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
|
||||
|
||||
Binary file not shown.
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
|
||||
# Build / refresh
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Build a channel directory from connected platform adapters and session data.
|
||||
|
||||
@@ -72,7 +72,7 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
if platform == Platform.DISCORD:
|
||||
platforms["discord"] = _build_discord(adapter)
|
||||
elif platform == Platform.SLACK:
|
||||
platforms["slack"] = await _build_slack(adapter)
|
||||
platforms["slack"] = _build_slack(adapter)
|
||||
except Exception as e:
|
||||
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
|
||||
|
||||
@@ -136,66 +136,21 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
|
||||
return channels
|
||||
|
||||
|
||||
async def _build_slack(adapter) -> List[Dict[str, Any]]:
|
||||
"""List Slack channels the bot has joined across all workspaces.
|
||||
|
||||
Uses ``users.conversations`` against each workspace's web client. Pulls
|
||||
public + private channels the bot is a member of, then merges in DMs
|
||||
discovered from session history (IMs aren't useful to enumerate
|
||||
proactively).
|
||||
"""
|
||||
team_clients = getattr(adapter, "_team_clients", None) or {}
|
||||
if not team_clients:
|
||||
def _build_slack(adapter) -> List[Dict[str, str]]:
|
||||
"""List Slack channels the bot has joined."""
|
||||
# Slack adapter may expose a web client
|
||||
client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
|
||||
if not client:
|
||||
return _build_from_sessions("slack")
|
||||
|
||||
channels: List[Dict[str, Any]] = []
|
||||
seen_ids: set = set()
|
||||
try:
|
||||
from tools.send_message_tool import _send_slack # noqa: F401
|
||||
# Use the Slack Web API directly if available
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for team_id, client in team_clients.items():
|
||||
try:
|
||||
cursor: Optional[str] = None
|
||||
for _page in range(20): # safety cap on pagination
|
||||
response = await client.users_conversations(
|
||||
types="public_channel,private_channel",
|
||||
exclude_archived=True,
|
||||
limit=200,
|
||||
cursor=cursor,
|
||||
)
|
||||
if not response.get("ok"):
|
||||
logger.warning(
|
||||
"Channel directory: users.conversations not ok for team %s: %s",
|
||||
team_id,
|
||||
response.get("error", "unknown"),
|
||||
)
|
||||
break
|
||||
for ch in response.get("channels", []):
|
||||
cid = ch.get("id")
|
||||
name = ch.get("name")
|
||||
if not cid or not name or cid in seen_ids:
|
||||
continue
|
||||
seen_ids.add(cid)
|
||||
channels.append({
|
||||
"id": cid,
|
||||
"name": name,
|
||||
"type": "private" if ch.get("is_private") else "channel",
|
||||
})
|
||||
cursor = (response.get("response_metadata") or {}).get("next_cursor")
|
||||
if not cursor:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Channel directory: failed to list Slack channels for team %s: %s",
|
||||
team_id, e,
|
||||
)
|
||||
continue
|
||||
|
||||
# Merge in DM/group entries discovered from session history.
|
||||
for entry in _build_from_sessions("slack"):
|
||||
if entry.get("id") not in seen_ids:
|
||||
channels.append(entry)
|
||||
seen_ids.add(entry.get("id"))
|
||||
|
||||
return channels
|
||||
# Fallback to session data
|
||||
return _build_from_sessions("slack")
|
||||
|
||||
|
||||
def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
|
||||
@@ -268,14 +223,6 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
|
||||
if not channels:
|
||||
return None
|
||||
|
||||
# 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
|
||||
# raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
|
||||
# in _parse_target_ref hasn't recognized them as explicit.
|
||||
raw = name.strip()
|
||||
for ch in channels:
|
||||
if ch.get("id") == raw:
|
||||
return ch["id"]
|
||||
|
||||
query = _normalize_channel_query(name)
|
||||
|
||||
# 1. Exact name match, including the display labels shown by send_message(action="list")
|
||||
|
||||
@@ -570,8 +570,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
)
|
||||
if "reply_prefix" in platform_cfg:
|
||||
bridged["reply_prefix"] = platform_cfg["reply_prefix"]
|
||||
if "reply_in_thread" in platform_cfg:
|
||||
bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
|
||||
if "require_mention" in platform_cfg:
|
||||
bridged["require_mention"] = platform_cfg["require_mention"]
|
||||
if "free_response_channels" in platform_cfg:
|
||||
@@ -611,8 +609,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(slack_cfg, dict):
|
||||
if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
|
||||
os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
|
||||
if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
|
||||
os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
|
||||
if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
|
||||
os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
|
||||
frc = slack_cfg.get("free_response_channels")
|
||||
|
||||
+16
-3
@@ -21,6 +21,7 @@ Errors in hooks are caught and logged but never block the main pipeline.
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
@@ -103,16 +104,28 @@ class HookRegistry:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module
|
||||
# Dynamically load the handler module.
|
||||
# Register in sys.modules BEFORE exec_module so Pydantic /
|
||||
# dataclasses / typing introspection can resolve forward
|
||||
# references (triggered by `from __future__ import annotations`
|
||||
# in the handler). Without this, a handler that declares a
|
||||
# Pydantic BaseModel for webhook/event payloads fails at first
|
||||
# dispatch with "TypeAdapter ... is not fully defined".
|
||||
module_name = f"hermes_hook_{hook_name}"
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
module_name, handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
sys.modules[module_name] = module
|
||||
try:
|
||||
spec.loader.exec_module(module)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
|
||||
+29
-295
@@ -15,7 +15,7 @@ import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional, Any, Tuple, List
|
||||
from typing import Dict, Optional, Any, Tuple
|
||||
|
||||
try:
|
||||
from slack_bolt.async_app import AsyncApp
|
||||
@@ -55,7 +55,6 @@ class _ThreadContextCache:
|
||||
content: str
|
||||
fetched_at: float = field(default_factory=time.monotonic)
|
||||
message_count: int = 0
|
||||
parent_text: str = "" # Raw text of the thread parent (for reply_to_text injection)
|
||||
|
||||
|
||||
def check_slack_requirements() -> bool:
|
||||
@@ -121,63 +120,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# clear them (chat_id → thread_ts).
|
||||
self._active_status_threads: Dict[str, str] = {}
|
||||
|
||||
def _describe_slack_api_error(self, response: Any, *, file_obj: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
||||
"""Convert Slack API auth/permission failures into actionable user-facing text."""
|
||||
if response is None or not hasattr(response, "get"):
|
||||
return None
|
||||
|
||||
error = str(response.get("error", "") or "").strip()
|
||||
if not error:
|
||||
return None
|
||||
|
||||
file_label = str((file_obj or {}).get("name") or (file_obj or {}).get("id") or "this attachment")
|
||||
needed = str(response.get("needed", "") or "").strip()
|
||||
provided = str(response.get("provided", "") or "").strip()
|
||||
reinstall_hint = " Update the Slack app scopes/settings and reinstall the app to the workspace."
|
||||
provided_hint = f" Current bot scopes: {provided}." if provided else ""
|
||||
|
||||
if error == "missing_scope":
|
||||
needed_hint = f"Missing scope: {needed}." if needed else "Missing required Slack scope."
|
||||
return f"Slack attachment access failed for {file_label}. {needed_hint}{provided_hint}{reinstall_hint}"
|
||||
if error in {"not_authed", "invalid_auth", "account_inactive", "token_revoked"}:
|
||||
return f"Slack attachment access failed for {file_label} because the bot token is not authorized ({error}). Refresh the token/reinstall the app."
|
||||
if error in {"file_not_found", "file_deleted"}:
|
||||
return f"Slack attachment {file_label} is no longer available ({error})."
|
||||
if error in {"access_denied", "file_access_denied", "no_permission", "not_allowed_token_type", "restricted_action"}:
|
||||
return f"Slack attachment access failed for {file_label} because the bot does not have permission ({error}). Check workspace permissions/scopes and reinstall if needed."
|
||||
return None
|
||||
|
||||
def _describe_slack_download_failure(self, exc: Exception, *, file_obj: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
||||
"""Translate Slack download exceptions into user-facing attachment diagnostics."""
|
||||
file_label = str((file_obj or {}).get("name") or (file_obj or {}).get("id") or "this attachment")
|
||||
|
||||
response = getattr(exc, "response", None)
|
||||
api_detail = self._describe_slack_api_error(response, file_obj=file_obj)
|
||||
if api_detail:
|
||||
return api_detail
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except Exception: # pragma: no cover
|
||||
httpx = None
|
||||
|
||||
if httpx is not None and isinstance(exc, httpx.HTTPStatusError):
|
||||
status = exc.response.status_code
|
||||
if status == 401:
|
||||
return f"Slack attachment access failed for {file_label} with HTTP 401. The bot token is not authorized for this file."
|
||||
if status == 403:
|
||||
return f"Slack attachment access failed for {file_label} with HTTP 403. The bot likely lacks permission or scope to read this file."
|
||||
if status == 404:
|
||||
return f"Slack attachment {file_label} returned HTTP 404 and is no longer reachable."
|
||||
|
||||
message = str(exc)
|
||||
if "Slack returned HTML instead of media" in message or "non-image data" in message:
|
||||
return (
|
||||
f"Slack attachment access failed for {file_label}: Slack returned an HTML/login or non-media response. "
|
||||
"This usually means a scope, auth, or file-permission problem."
|
||||
)
|
||||
return None
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
if not SLACK_AVAILABLE:
|
||||
@@ -265,31 +207,8 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
async def handle_assistant_thread_context_changed(event, say):
|
||||
await self._handle_assistant_thread_lifecycle_event(event)
|
||||
|
||||
# Register slash command handler(s)
|
||||
#
|
||||
# Every gateway command from COMMAND_REGISTRY is a native Slack
|
||||
# slash, matching Discord and Telegram's model (e.g. /btw, /stop,
|
||||
# /model work directly without /hermes prefix). A single regex
|
||||
# matcher dispatches all of them to one handler so we don't need
|
||||
# N identical @app.command() decorators.
|
||||
#
|
||||
# The slash commands must ALSO be declared in the Slack app
|
||||
# manifest (see `hermes slack manifest`). In Socket Mode, Slack
|
||||
# routes the command event through the socket regardless of the
|
||||
# manifest's request URL, but it will not deliver an event for
|
||||
# a slash command the manifest doesn't declare.
|
||||
from hermes_cli.commands import slack_native_slashes
|
||||
import re as _re
|
||||
|
||||
_slash_names = [name for name, _d, _h in slack_native_slashes()]
|
||||
if _slash_names:
|
||||
_slash_pattern = _re.compile(
|
||||
r"^/(?:" + "|".join(_re.escape(n) for n in _slash_names) + r")$"
|
||||
)
|
||||
else: # pragma: no cover - registry always non-empty
|
||||
_slash_pattern = _re.compile(r"^/hermes$")
|
||||
|
||||
@self._app.command(_slash_pattern)
|
||||
# Register slash command handler
|
||||
@self._app.command("/hermes")
|
||||
async def handle_hermes_command(ack, command):
|
||||
await ack()
|
||||
await self._handle_slash_command(command)
|
||||
@@ -508,18 +427,8 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
# When reply_in_thread is disabled (default: True for backward compat),
|
||||
# only thread messages that are already part of an existing thread.
|
||||
# For top-level channel messages, the inbound handler sets
|
||||
# metadata.thread_id to the message's own ts as a session-keying
|
||||
# fallback (see the `thread_ts = event.get("thread_ts") or ts` branch),
|
||||
# so metadata alone can't distinguish a real thread reply from a
|
||||
# top-level message. reply_to is the incoming message's own id, so
|
||||
# when thread_id == reply_to the "thread" is synthetic and we reply
|
||||
# directly in the channel instead.
|
||||
if not self.config.extra.get("reply_in_thread", True):
|
||||
md = metadata or {}
|
||||
existing_thread = md.get("thread_id") or md.get("thread_ts")
|
||||
if existing_thread and reply_to and existing_thread == reply_to:
|
||||
existing_thread = None
|
||||
existing_thread = (metadata or {}).get("thread_id") or (metadata or {}).get("thread_ts")
|
||||
return existing_thread or None
|
||||
|
||||
if metadata:
|
||||
@@ -1191,8 +1100,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
pass # Free-response channel — always process
|
||||
elif not self._slack_require_mention():
|
||||
pass # Mention requirement disabled globally for Slack
|
||||
elif self._slack_strict_mention() and not is_mentioned:
|
||||
return # Strict mode: ignore until @-mentioned again
|
||||
elif not is_mentioned:
|
||||
reply_to_bot_thread = (
|
||||
is_thread_reply and event_thread_ts in self._bot_message_ts
|
||||
@@ -1215,11 +1122,8 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
if is_mentioned:
|
||||
# Strip the bot mention from the text
|
||||
text = text.replace(f"<@{bot_uid}>", "").strip()
|
||||
# Register this thread so all future messages auto-trigger the bot.
|
||||
# Skipped in strict mode: strict_mention=true bots must be
|
||||
# re-mentioned every turn, so remembering the thread would
|
||||
# defeat the feature (and re-enable agent-to-agent ack loops).
|
||||
if event_thread_ts and not self._slack_strict_mention():
|
||||
# Register this thread so all future messages auto-trigger the bot
|
||||
if event_thread_ts:
|
||||
self._mentioned_threads.add(event_thread_ts)
|
||||
if len(self._mentioned_threads) > self._MENTIONED_THREADS_MAX:
|
||||
to_remove = list(self._mentioned_threads)[:self._MENTIONED_THREADS_MAX // 2]
|
||||
@@ -1250,43 +1154,8 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# Handle file attachments
|
||||
media_urls = []
|
||||
media_types = []
|
||||
attachment_notices: List[str] = []
|
||||
files = event.get("files", [])
|
||||
for f in files:
|
||||
# Slack Connect channels return stub file objects with
|
||||
# file_access="check_file_info" and no URL fields. We must
|
||||
# call files.info to retrieve the full object (including url_private_download)
|
||||
# before we can download it.
|
||||
# https://docs.slack.dev/reference/objects/file-object/#slack_connect_files
|
||||
if f.get("file_access") == "check_file_info":
|
||||
file_id = f.get("id")
|
||||
if not file_id:
|
||||
continue
|
||||
try:
|
||||
info_resp = await self._get_client(channel_id).files_info(file=file_id)
|
||||
if info_resp.get("ok"):
|
||||
f = info_resp["file"]
|
||||
else:
|
||||
detail = self._describe_slack_api_error(info_resp, file_obj=f)
|
||||
if detail:
|
||||
attachment_notices.append(detail)
|
||||
logger.warning("[Slack] %s", detail)
|
||||
else:
|
||||
logger.warning(
|
||||
"[Slack] files.info failed for %s: %s",
|
||||
file_id, info_resp.get("error"),
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
response = getattr(e, "response", None)
|
||||
detail = self._describe_slack_api_error(response, file_obj=f)
|
||||
if detail:
|
||||
attachment_notices.append(detail)
|
||||
logger.warning("[Slack] %s", detail)
|
||||
else:
|
||||
logger.warning("[Slack] files.info error for %s: %s", file_id, e, exc_info=True)
|
||||
continue
|
||||
|
||||
mimetype = f.get("mimetype", "unknown")
|
||||
url = f.get("url_private_download") or f.get("url_private", "")
|
||||
if mimetype.startswith("image/") and url:
|
||||
@@ -1300,12 +1169,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
media_types.append(mimetype)
|
||||
msg_type = MessageType.PHOTO
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
detail = self._describe_slack_download_failure(e, file_obj=f)
|
||||
if detail:
|
||||
attachment_notices.append(detail)
|
||||
logger.warning("[Slack] %s", detail)
|
||||
else:
|
||||
logger.warning("[Slack] Failed to cache image from %s: %s", url, e, exc_info=True)
|
||||
logger.warning("[Slack] Failed to cache image from %s: %s", url, e, exc_info=True)
|
||||
elif mimetype.startswith("audio/") and url:
|
||||
try:
|
||||
ext = "." + mimetype.split("/")[-1].split(";")[0]
|
||||
@@ -1316,12 +1180,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
media_types.append(mimetype)
|
||||
msg_type = MessageType.VOICE
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
detail = self._describe_slack_download_failure(e, file_obj=f)
|
||||
if detail:
|
||||
attachment_notices.append(detail)
|
||||
logger.warning("[Slack] %s", detail)
|
||||
else:
|
||||
logger.warning("[Slack] Failed to cache audio from %s: %s", url, e, exc_info=True)
|
||||
logger.warning("[Slack] Failed to cache audio from %s: %s", url, e, exc_info=True)
|
||||
elif url:
|
||||
# Try to handle as a document attachment
|
||||
try:
|
||||
@@ -1373,16 +1232,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
pass # Binary content, skip injection
|
||||
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
detail = self._describe_slack_download_failure(e, file_obj=f)
|
||||
if detail:
|
||||
attachment_notices.append(detail)
|
||||
logger.warning("[Slack] %s", detail)
|
||||
else:
|
||||
logger.warning("[Slack] Failed to cache document from %s: %s", url, e, exc_info=True)
|
||||
|
||||
if attachment_notices:
|
||||
notice_block = "[Slack attachment notice]\n" + "\n".join(f"- {n}" for n in attachment_notices)
|
||||
text = f"{notice_block}\n\n{text}" if text else notice_block
|
||||
logger.warning("[Slack] Failed to cache document from %s: %s", url, e, exc_info=True)
|
||||
|
||||
# Resolve user display name (cached after first lookup)
|
||||
user_name = await self._resolve_user_name(user_id, chat_id=channel_id)
|
||||
@@ -1403,22 +1253,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
self.config.extra, channel_id, None,
|
||||
)
|
||||
|
||||
# Extract reply context if this message is a thread reply.
|
||||
# Mirrors the Telegram/Discord implementations so that gateway.run
|
||||
# can inject a `[Replying to: "..."]` prefix when the parent is not
|
||||
# already in the session history. Uses the thread-context cache when
|
||||
# available to avoid redundant conversations.replies calls.
|
||||
reply_to_text = None
|
||||
if thread_ts and thread_ts != ts:
|
||||
try:
|
||||
reply_to_text = await self._fetch_thread_parent_text(
|
||||
channel_id=channel_id,
|
||||
thread_ts=thread_ts,
|
||||
team_id=team_id,
|
||||
) or None
|
||||
except Exception: # pragma: no cover - defensive
|
||||
reply_to_text = None
|
||||
|
||||
msg_event = MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
@@ -1429,7 +1263,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
media_types=media_types,
|
||||
reply_to_message_id=thread_ts if thread_ts != ts else None,
|
||||
channel_prompt=_channel_prompt,
|
||||
reply_to_text=reply_to_text,
|
||||
)
|
||||
|
||||
# Only react when bot is directly addressed (DM or @mention).
|
||||
@@ -1637,7 +1470,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
Returns a formatted string with prior thread history, or empty string
|
||||
on failure or if the thread has no prior messages.
|
||||
"""
|
||||
cache_key = f"{channel_id}:{thread_ts}:{team_id}"
|
||||
cache_key = f"{channel_id}:{thread_ts}"
|
||||
now = time.monotonic()
|
||||
cached = self._thread_context_cache.get(cache_key)
|
||||
if cached and (now - cached.fetched_at) < self._THREAD_CACHE_TTL:
|
||||
@@ -1684,37 +1517,14 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
|
||||
context_parts = []
|
||||
parent_text = ""
|
||||
for msg in messages:
|
||||
msg_ts = msg.get("ts", "")
|
||||
# Exclude the current triggering message — it will be delivered
|
||||
# as the user message itself, so including it here would duplicate it.
|
||||
if msg_ts == current_ts:
|
||||
continue
|
||||
|
||||
is_parent = msg_ts == thread_ts
|
||||
is_bot = bool(msg.get("bot_id")) or msg.get("subtype") == "bot_message"
|
||||
msg_user = msg.get("user", "")
|
||||
|
||||
# Identify "our own" bot for this workspace (multi-workspace safe).
|
||||
msg_team = msg.get("team") or team_id
|
||||
self_bot_uid = (
|
||||
self._team_bot_user_ids.get(msg_team)
|
||||
if msg_team
|
||||
else None
|
||||
) or self._bot_user_id
|
||||
|
||||
# Exclude only our own prior bot replies (circular context).
|
||||
# Keep:
|
||||
# - the thread parent even if it was posted by a bot
|
||||
# (e.g. a cron job summary we are now replying to);
|
||||
# - other bots' child messages (useful third-party context).
|
||||
if (
|
||||
is_bot
|
||||
and not is_parent
|
||||
and self_bot_uid
|
||||
and msg_user == self_bot_uid
|
||||
):
|
||||
# Exclude our own bot messages to avoid circular context.
|
||||
if msg.get("bot_id") or msg.get("subtype") == "bot_message":
|
||||
continue
|
||||
|
||||
msg_text = msg.get("text", "").strip()
|
||||
@@ -1725,15 +1535,11 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
if bot_uid:
|
||||
msg_text = msg_text.replace(f"<@{bot_uid}>", "").strip()
|
||||
|
||||
msg_user = msg.get("user", "unknown")
|
||||
is_parent = msg_ts == thread_ts
|
||||
prefix = "[thread parent] " if is_parent else ""
|
||||
display_user = msg_user or "unknown"
|
||||
# Prefer the bot's own name when the message is a bot post.
|
||||
if is_bot and not display_user:
|
||||
display_user = msg.get("username") or "bot"
|
||||
name = await self._resolve_user_name(display_user, chat_id=channel_id)
|
||||
name = await self._resolve_user_name(msg_user, chat_id=channel_id)
|
||||
context_parts.append(f"{prefix}{name}: {msg_text}")
|
||||
if is_parent:
|
||||
parent_text = msg_text
|
||||
|
||||
content = ""
|
||||
if context_parts:
|
||||
@@ -1747,7 +1553,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
content=content,
|
||||
fetched_at=now,
|
||||
message_count=len(context_parts),
|
||||
parent_text=parent_text,
|
||||
)
|
||||
return content
|
||||
|
||||
@@ -1755,62 +1560,8 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
logger.warning("[Slack] Failed to fetch thread context: %s", e)
|
||||
return ""
|
||||
|
||||
async def _fetch_thread_parent_text(
|
||||
self, channel_id: str, thread_ts: str, team_id: str = "",
|
||||
) -> str:
|
||||
"""Return the raw text of the thread parent message (for reply_to_text).
|
||||
|
||||
Uses the same per-thread cache as :meth:`_fetch_thread_context` to avoid
|
||||
hitting ``conversations.replies`` twice. Falls back to a cheap single-
|
||||
message fetch (``limit=1, inclusive=True``) when the cache is cold.
|
||||
|
||||
Returns empty string on any failure — callers should treat an empty
|
||||
return as "no parent context to inject".
|
||||
"""
|
||||
cache_key = f"{channel_id}:{thread_ts}:{team_id}"
|
||||
now = time.monotonic()
|
||||
cached = self._thread_context_cache.get(cache_key)
|
||||
if cached and (now - cached.fetched_at) < self._THREAD_CACHE_TTL:
|
||||
return cached.parent_text
|
||||
|
||||
try:
|
||||
client = self._get_client(channel_id)
|
||||
result = await client.conversations_replies(
|
||||
channel=channel_id,
|
||||
ts=thread_ts,
|
||||
limit=1,
|
||||
inclusive=True,
|
||||
)
|
||||
messages = result.get("messages", []) if result else []
|
||||
if not messages:
|
||||
return ""
|
||||
parent = messages[0]
|
||||
if parent.get("ts", "") != thread_ts:
|
||||
return ""
|
||||
bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
|
||||
text = (parent.get("text") or "").strip()
|
||||
if bot_uid:
|
||||
text = text.replace(f"<@{bot_uid}>", "").strip()
|
||||
return text
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("[Slack] Failed to fetch thread parent text: %s", exc)
|
||||
return ""
|
||||
|
||||
async def _handle_slash_command(self, command: dict) -> None:
|
||||
"""Handle Slack slash commands.
|
||||
|
||||
Every gateway command in COMMAND_REGISTRY is registered as a native
|
||||
Slack slash (``/btw``, ``/stop``, ``/model``, etc.), matching the
|
||||
Discord and Telegram model. The slash name itself is the command;
|
||||
any text after it is the argument list.
|
||||
|
||||
The legacy ``/hermes <subcommand> [args]`` form is preserved for
|
||||
backward compatibility with older workspace manifests and for users
|
||||
who want a single entry point for free-form questions (``/hermes
|
||||
what's the weather`` — non-slash text is treated as a regular
|
||||
message).
|
||||
"""
|
||||
slash_name = (command.get("command") or "").lstrip("/").strip()
|
||||
"""Handle /hermes slash command."""
|
||||
text = command.get("text", "").strip()
|
||||
user_id = command.get("user_id", "")
|
||||
channel_id = command.get("channel_id", "")
|
||||
@@ -1820,25 +1571,20 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
if team_id and channel_id:
|
||||
self._channel_team[channel_id] = team_id
|
||||
|
||||
if slash_name in ("hermes", ""):
|
||||
# Legacy /hermes <subcommand> [args] routing + free-form questions.
|
||||
# Empty slash_name falls into this branch for backward compat
|
||||
# with any caller that didn't populate command["command"].
|
||||
from hermes_cli.commands import slack_subcommand_map
|
||||
subcommand_map = slack_subcommand_map()
|
||||
subcommand_map["compact"] = "/compress"
|
||||
first_word = text.split()[0] if text else ""
|
||||
if first_word in subcommand_map:
|
||||
rest = text[len(first_word):].strip()
|
||||
text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
|
||||
elif text:
|
||||
pass # Treat as a regular question
|
||||
else:
|
||||
text = "/help"
|
||||
# Map subcommands to gateway commands — derived from central registry.
|
||||
# Also keep "compact" as a Slack-specific alias for /compress.
|
||||
from hermes_cli.commands import slack_subcommand_map
|
||||
subcommand_map = slack_subcommand_map()
|
||||
subcommand_map["compact"] = "/compress"
|
||||
first_word = text.split()[0] if text else ""
|
||||
if first_word in subcommand_map:
|
||||
# Preserve arguments after the subcommand
|
||||
rest = text[len(first_word):].strip()
|
||||
text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
|
||||
elif text:
|
||||
pass # Treat as a regular question
|
||||
else:
|
||||
# Native slash — /<slash_name> [args]. Route directly through the
|
||||
# gateway command dispatcher by prepending the slash.
|
||||
text = f"/{slash_name} {text}".strip()
|
||||
text = "/help"
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
@@ -1986,18 +1732,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
return bool(configured)
|
||||
return os.getenv("SLACK_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
|
||||
|
||||
def _slack_strict_mention(self) -> bool:
|
||||
"""When true, channel threads require an explicit @-mention on every
|
||||
message. Disables all auto-triggers (mentioned-thread memory,
|
||||
bot-message follow-up, session-presence). Defaults to False.
|
||||
"""
|
||||
configured = self.config.extra.get("strict_mention")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() in ("true", "1", "yes", "on")
|
||||
return bool(configured)
|
||||
return os.getenv("SLACK_STRICT_MENTION", "false").lower() in ("true", "1", "yes", "on")
|
||||
|
||||
def _slack_free_response_channels(self) -> set:
|
||||
"""Return channel IDs where no @mention is required."""
|
||||
raw = self.config.extra.get("free_response_channels")
|
||||
|
||||
+348
-157
@@ -591,20 +591,20 @@ def _parse_session_key(session_key: str) -> "dict | None":
|
||||
|
||||
|
||||
def _format_gateway_process_notification(evt: dict) -> "str | None":
|
||||
"""Format a watch pattern event from completion_queue into a [IMPORTANT:] message."""
|
||||
"""Format a watch pattern event from completion_queue into a [SYSTEM:] message."""
|
||||
evt_type = evt.get("type", "completion")
|
||||
_sid = evt.get("session_id", "unknown")
|
||||
_cmd = evt.get("command", "unknown")
|
||||
|
||||
if evt_type == "watch_disabled":
|
||||
return f"[IMPORTANT: {evt.get('message', '')}]"
|
||||
return f"[SYSTEM: {evt.get('message', '')}]"
|
||||
|
||||
if evt_type == "watch_match":
|
||||
_pat = evt.get("pattern", "?")
|
||||
_out = evt.get("output", "")
|
||||
_sup = evt.get("suppressed", 0)
|
||||
text = (
|
||||
f"[IMPORTANT: Background process {_sid} matched "
|
||||
f"[SYSTEM: Background process {_sid} matched "
|
||||
f"watch pattern \"{_pat}\".\n"
|
||||
f"Command: {_cmd}\n"
|
||||
f"Matched output:\n{_out}"
|
||||
@@ -682,16 +682,6 @@ class GatewayRunner:
|
||||
self._running_agents: Dict[str, Any] = {}
|
||||
self._running_agents_ts: Dict[str, float] = {} # start timestamp per session
|
||||
self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt
|
||||
# Overflow buffer for explicit /queue commands. The adapter-level
|
||||
# _pending_messages dict is a single slot per session (designed for
|
||||
# "next-turn" follow-ups where repeated sends collapse into one
|
||||
# event). /queue has different semantics: each invocation must
|
||||
# produce its own full agent turn, in FIFO order, with no merging.
|
||||
# When the slot is occupied, additional /queue items land here and
|
||||
# are promoted one-at-a-time after each run's drain. Cleared on
|
||||
# /new and /reset. /model and other mid-session operations
|
||||
# preserve the queue.
|
||||
self._queued_events: Dict[str, List[MessageEvent]] = {}
|
||||
self._busy_ack_ts: Dict[str, float] = {} # last busy-ack timestamp per session (debounce)
|
||||
self._session_run_generation: Dict[str, int] = {}
|
||||
|
||||
@@ -1214,76 +1204,6 @@ class GatewayRunner:
|
||||
def _queue_during_drain_enabled(self) -> bool:
|
||||
return self._restart_requested and self._busy_input_mode == "queue"
|
||||
|
||||
# -------- /queue FIFO helpers --------------------------------------
|
||||
# /queue must produce one full agent turn per invocation, in FIFO
|
||||
# order, with no merging. The adapter's _pending_messages dict is a
|
||||
# single "next-up" slot (shared with photo-burst follow-ups), so we
|
||||
# use it for the head of the queue and an overflow list for the
|
||||
# tail. Enqueue puts new items in the slot when free, otherwise in
|
||||
# the overflow. Promotion (called after each run's drain) moves the
|
||||
# next overflow item into the slot so the following recursion picks
|
||||
# it up. Clearing happens on /new and /reset via
|
||||
# _handle_reset_command.
|
||||
|
||||
def _enqueue_fifo(self, session_key: str, queued_event: "MessageEvent", adapter: Any) -> None:
|
||||
"""Append a /queue event to the FIFO chain for a session."""
|
||||
if adapter is None:
|
||||
return
|
||||
pending_slot = getattr(adapter, "_pending_messages", None)
|
||||
if pending_slot is None:
|
||||
return
|
||||
queued_events = getattr(self, "_queued_events", None)
|
||||
if queued_events is None:
|
||||
queued_events = {}
|
||||
self._queued_events = queued_events
|
||||
if session_key in pending_slot:
|
||||
queued_events.setdefault(session_key, []).append(queued_event)
|
||||
else:
|
||||
pending_slot[session_key] = queued_event
|
||||
|
||||
def _promote_queued_event(
|
||||
self,
|
||||
session_key: str,
|
||||
adapter: Any,
|
||||
pending_event: Optional["MessageEvent"],
|
||||
) -> Optional["MessageEvent"]:
|
||||
"""Promote the next overflow item after the slot was drained.
|
||||
|
||||
Called at the drain site after _dequeue_pending_event consumed
|
||||
(or failed to consume) the slot. If there's an overflow item:
|
||||
- When pending_event is None (slot was empty), return the
|
||||
overflow head as the new pending_event.
|
||||
- When pending_event already exists (slot was populated by an
|
||||
interrupt follow-up or similar), stage the overflow head in
|
||||
the slot so the NEXT recursion picks it up.
|
||||
Returns the (possibly updated) pending_event for drain to use.
|
||||
"""
|
||||
queued_events = getattr(self, "_queued_events", None)
|
||||
if not queued_events:
|
||||
return pending_event
|
||||
overflow = queued_events.get(session_key)
|
||||
if not overflow:
|
||||
return pending_event
|
||||
next_queued = overflow.pop(0)
|
||||
if not overflow:
|
||||
queued_events.pop(session_key, None)
|
||||
if pending_event is None:
|
||||
return next_queued
|
||||
if adapter is not None and hasattr(adapter, "_pending_messages"):
|
||||
adapter._pending_messages[session_key] = next_queued
|
||||
else:
|
||||
# No adapter — push back so we don't silently drop the item.
|
||||
queued_events.setdefault(session_key, []).insert(0, next_queued)
|
||||
return pending_event
|
||||
|
||||
def _queue_depth(self, session_key: str, *, adapter: Any = None) -> int:
|
||||
"""Total pending /queue items for a session — slot + overflow."""
|
||||
queued_events = getattr(self, "_queued_events", None) or {}
|
||||
depth = len(queued_events.get(session_key, []))
|
||||
if adapter is not None and session_key in getattr(adapter, "_pending_messages", {}):
|
||||
depth += 1
|
||||
return depth
|
||||
|
||||
def _update_runtime_status(self, gateway_state: Optional[str] = None, exit_reason: Optional[str] = None) -> None:
|
||||
try:
|
||||
from gateway.status import write_runtime_status
|
||||
@@ -2334,7 +2254,7 @@ class GatewayRunner:
|
||||
# Build initial channel directory for send_message name resolution
|
||||
try:
|
||||
from gateway.channel_directory import build_channel_directory
|
||||
directory = await build_channel_directory(self.adapters)
|
||||
directory = build_channel_directory(self.adapters)
|
||||
ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
|
||||
logger.info("Channel directory built: %d target(s)", ch_count)
|
||||
except Exception as e:
|
||||
@@ -2368,6 +2288,11 @@ class GatewayRunner:
|
||||
# Start background session expiry watcher to finalize expired sessions
|
||||
asyncio.create_task(self._session_expiry_watcher())
|
||||
|
||||
# Start background kanban notifier — delivers `completed`, `blocked`,
|
||||
# `spawn_auto_blocked`, and `crashed` events to gateway subscribers
|
||||
# so human-in-the-loop workflows hear back without polling.
|
||||
asyncio.create_task(self._kanban_notifier_watcher())
|
||||
|
||||
# Start background reconnection watcher for platforms that failed at startup
|
||||
if self._failed_platforms:
|
||||
logger.info(
|
||||
@@ -2543,6 +2468,241 @@ class GatewayRunner:
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def _kanban_notifier_watcher(self, interval: float = 5.0) -> None:
|
||||
"""Poll ``kanban_notify_subs`` and deliver terminal events to users.
|
||||
|
||||
For each subscription row, fetches ``task_events`` newer than the
|
||||
stored cursor with kind in the terminal set (``completed``,
|
||||
``blocked``, ``gave_up``, ``crashed``, ``timed_out``). Sends one
|
||||
message per new event to ``(platform, chat_id, thread_id)``,
|
||||
then advances the cursor. When a task reaches a terminal state
|
||||
(``completed`` / ``archived``), the subscription is removed.
|
||||
|
||||
Runs in the gateway event loop; all SQLite work is pushed to a
|
||||
thread via ``asyncio.to_thread`` so the loop never blocks on the
|
||||
WAL lock. Failures in one tick don't stop subsequent ticks.
|
||||
"""
|
||||
from gateway.config import Platform as _Platform
|
||||
try:
|
||||
from hermes_cli import kanban_db as _kb
|
||||
except Exception:
|
||||
logger.warning("kanban notifier: kanban_db not importable; notifier disabled")
|
||||
return
|
||||
|
||||
TERMINAL_KINDS = ("completed", "blocked", "gave_up", "crashed", "timed_out")
|
||||
# Terminal event kinds trigger automatic unsubscription — the task
|
||||
# is done, blocked, or in a retry-needed state that the human
|
||||
# shouldn't keep pinging a stale chat for. Previously we only
|
||||
# unsubbed when task.status in ('done', 'archived'), which left
|
||||
# subscriptions on 'blocked' / 'gave_up' / 'crashed' / 'timed_out'
|
||||
# tasks stranded forever.
|
||||
TERMINAL_EVENT_KINDS = TERMINAL_KINDS
|
||||
# Per-subscription send-failure counter. Adapter.send raising
|
||||
# means the chat is dead (deleted, bot kicked, etc.) — after N
|
||||
# consecutive send failures the sub is dropped so we don't spin
|
||||
# against a dead chat every 5 seconds forever.
|
||||
MAX_SEND_FAILURES = 3
|
||||
sub_fail_counts: dict[tuple, int] = getattr(
|
||||
self, "_kanban_sub_fail_counts", {}
|
||||
)
|
||||
self._kanban_sub_fail_counts = sub_fail_counts
|
||||
|
||||
# Initial delay so the gateway can finish wiring adapters.
|
||||
await asyncio.sleep(5)
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
def _collect():
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.init_db() # idempotent; handles first-run
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
subs = _kb.list_notify_subs(conn)
|
||||
deliveries: list[dict] = []
|
||||
for sub in subs:
|
||||
cursor, events = _kb.unseen_events_for_sub(
|
||||
conn,
|
||||
task_id=sub["task_id"],
|
||||
platform=sub["platform"],
|
||||
chat_id=sub["chat_id"],
|
||||
thread_id=sub.get("thread_id") or "",
|
||||
kinds=TERMINAL_KINDS,
|
||||
)
|
||||
if not events:
|
||||
continue
|
||||
task = _kb.get_task(conn, sub["task_id"])
|
||||
deliveries.append({
|
||||
"sub": sub,
|
||||
"cursor": cursor,
|
||||
"events": events,
|
||||
"task": task,
|
||||
})
|
||||
return deliveries
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
deliveries = await asyncio.to_thread(_collect)
|
||||
for d in deliveries:
|
||||
sub = d["sub"]
|
||||
task = d["task"]
|
||||
platform_str = (sub["platform"] or "").lower()
|
||||
try:
|
||||
plat = _Platform(platform_str)
|
||||
except ValueError:
|
||||
# Unknown platform string; skip and advance cursor so
|
||||
# we don't replay forever.
|
||||
await asyncio.to_thread(
|
||||
self._kanban_advance, sub, d["cursor"],
|
||||
)
|
||||
continue
|
||||
adapter = self.adapters.get(plat)
|
||||
if adapter is None:
|
||||
continue # platform not currently connected
|
||||
title = (task.title if task else sub["task_id"])[:120]
|
||||
for ev in d["events"]:
|
||||
kind = ev.kind
|
||||
# Identity prefix: attribute terminal pings to the
|
||||
# worker that did the work. Makes fleets (where one
|
||||
# chat subscribes to many tasks) legible at a glance.
|
||||
who = (task.assignee if task and task.assignee else None)
|
||||
tag = f"@{who} " if who else ""
|
||||
if kind == "completed":
|
||||
# Prefer the run's summary (the worker's
|
||||
# intentional human-facing handoff, carried
|
||||
# in the event payload), then fall back to
|
||||
# task.result for legacy rows written before
|
||||
# runs shipped.
|
||||
handoff = ""
|
||||
payload_summary = None
|
||||
if ev.payload and ev.payload.get("summary"):
|
||||
payload_summary = str(ev.payload["summary"])
|
||||
if payload_summary:
|
||||
h = payload_summary.strip().splitlines()[0][:200]
|
||||
handoff = f"\n{h}"
|
||||
elif task and task.result:
|
||||
r = task.result.strip().splitlines()[0][:160]
|
||||
handoff = f"\n{r}"
|
||||
msg = (
|
||||
f"✔ {tag}Kanban {sub['task_id']} done"
|
||||
f" — {title}{handoff}"
|
||||
)
|
||||
elif kind == "blocked":
|
||||
reason = ""
|
||||
if ev.payload and ev.payload.get("reason"):
|
||||
reason = f": {str(ev.payload['reason'])[:160]}"
|
||||
msg = f"⏸ {tag}Kanban {sub['task_id']} blocked{reason}"
|
||||
elif kind == "gave_up":
|
||||
err = ""
|
||||
if ev.payload and ev.payload.get("error"):
|
||||
err = f"\n{str(ev.payload['error'])[:200]}"
|
||||
msg = (
|
||||
f"✖ {tag}Kanban {sub['task_id']} gave up "
|
||||
f"after repeated spawn failures{err}"
|
||||
)
|
||||
elif kind == "crashed":
|
||||
msg = (
|
||||
f"✖ {tag}Kanban {sub['task_id']} worker crashed "
|
||||
f"(pid gone); dispatcher will retry"
|
||||
)
|
||||
elif kind == "timed_out":
|
||||
limit = 0
|
||||
if ev.payload and ev.payload.get("limit_seconds"):
|
||||
limit = int(ev.payload["limit_seconds"])
|
||||
msg = (
|
||||
f"⏱ {tag}Kanban {sub['task_id']} timed out "
|
||||
f"(max_runtime={limit}s); will retry"
|
||||
)
|
||||
else:
|
||||
continue
|
||||
metadata: dict[str, Any] = {}
|
||||
if sub.get("thread_id"):
|
||||
metadata["thread_id"] = sub["thread_id"]
|
||||
sub_key = (
|
||||
sub["task_id"], sub["platform"],
|
||||
sub["chat_id"], sub.get("thread_id") or "",
|
||||
)
|
||||
try:
|
||||
await adapter.send(
|
||||
sub["chat_id"], msg, metadata=metadata,
|
||||
)
|
||||
# Reset the failure counter on success.
|
||||
sub_fail_counts.pop(sub_key, None)
|
||||
except Exception as exc:
|
||||
fails = sub_fail_counts.get(sub_key, 0) + 1
|
||||
sub_fail_counts[sub_key] = fails
|
||||
logger.warning(
|
||||
"kanban notifier: send failed for %s on %s "
|
||||
"(attempt %d/%d): %s",
|
||||
sub["task_id"], platform_str, fails,
|
||||
MAX_SEND_FAILURES, exc,
|
||||
)
|
||||
if fails >= MAX_SEND_FAILURES:
|
||||
logger.warning(
|
||||
"kanban notifier: dropping subscription "
|
||||
"%s on %s after %d consecutive send failures",
|
||||
sub["task_id"], platform_str, fails,
|
||||
)
|
||||
await asyncio.to_thread(self._kanban_unsub, sub)
|
||||
sub_fail_counts.pop(sub_key, None)
|
||||
# Don't advance cursor on send failure — retry next tick.
|
||||
break
|
||||
else:
|
||||
# All events delivered; advance cursor + maybe unsub.
|
||||
await asyncio.to_thread(
|
||||
self._kanban_advance, sub, d["cursor"],
|
||||
)
|
||||
# Unsubscribe when the LAST delivered event is a
|
||||
# terminal kind (the task hit a "no further updates"
|
||||
# state), not just on task.status in {done, archived}.
|
||||
# Covers blocked / gave_up / crashed / timed_out which
|
||||
# used to leak subs forever.
|
||||
last_kind = d["events"][-1].kind if d["events"] else None
|
||||
task_terminal = task and task.status in ("done", "archived")
|
||||
event_terminal = last_kind in TERMINAL_EVENT_KINDS
|
||||
if task_terminal or event_terminal:
|
||||
await asyncio.to_thread(
|
||||
self._kanban_unsub, sub,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("kanban notifier tick failed: %s", exc)
|
||||
# Sleep with cancellation checks.
|
||||
for _ in range(int(max(1, interval))):
|
||||
if not self._running:
|
||||
return
|
||||
await asyncio.sleep(1)
|
||||
|
||||
def _kanban_advance(self, sub: dict, cursor: int) -> None:
|
||||
"""Sync helper: advance a subscription's cursor. Runs in to_thread."""
|
||||
from hermes_cli import kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.advance_notify_cursor(
|
||||
conn,
|
||||
task_id=sub["task_id"],
|
||||
platform=sub["platform"],
|
||||
chat_id=sub["chat_id"],
|
||||
thread_id=sub.get("thread_id") or "",
|
||||
new_cursor=cursor,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def _kanban_unsub(self, sub: dict) -> None:
|
||||
from hermes_cli import kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.remove_notify_sub(
|
||||
conn,
|
||||
task_id=sub["task_id"],
|
||||
platform=sub["platform"],
|
||||
chat_id=sub["chat_id"],
|
||||
thread_id=sub.get("thread_id") or "",
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
async def _platform_reconnect_watcher(self) -> None:
|
||||
"""Background task that periodically retries connecting failed platforms.
|
||||
|
||||
@@ -2618,7 +2778,7 @@ class GatewayRunner:
|
||||
# Rebuild channel directory with the new adapter
|
||||
try:
|
||||
from gateway.channel_directory import build_channel_directory
|
||||
await build_channel_directory(self.adapters)
|
||||
build_channel_directory(self.adapters)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
@@ -3496,10 +3656,7 @@ class GatewayRunner:
|
||||
# doesn't think an agent is still active.
|
||||
return await self._handle_reset_command(event)
|
||||
|
||||
# /queue <prompt> — queue without interrupting.
|
||||
# Semantics: each /queue invocation produces its own full agent
|
||||
# turn, processed in FIFO order after the current run (and any
|
||||
# earlier /queue items) finishes. Messages are NOT merged.
|
||||
# /queue <prompt> — queue without interrupting
|
||||
if event.get_command() in ("queue", "q"):
|
||||
queued_text = event.get_command_args().strip()
|
||||
if not queued_text:
|
||||
@@ -3513,11 +3670,8 @@ class GatewayRunner:
|
||||
message_id=event.message_id,
|
||||
channel_prompt=event.channel_prompt,
|
||||
)
|
||||
self._enqueue_fifo(_quick_key, queued_event, adapter)
|
||||
depth = self._queue_depth(_quick_key, adapter=self.adapters.get(source.platform))
|
||||
if depth <= 1:
|
||||
return "Queued for the next turn."
|
||||
return f"Queued for the next turn. ({depth} queued)"
|
||||
adapter._pending_messages[_quick_key] = queued_event
|
||||
return "Queued for the next turn."
|
||||
|
||||
# /steer <prompt> — inject mid-run after the next tool call.
|
||||
# Unlike /queue (turn boundary), /steer lands BETWEEN tool-call
|
||||
@@ -3589,6 +3743,14 @@ class GatewayRunner:
|
||||
if _cmd_def_inner and _cmd_def_inner.name == "background":
|
||||
return await self._handle_background_command(event)
|
||||
|
||||
# /kanban must bypass the guard. It writes to a profile-agnostic
|
||||
# DB (kanban.db), not to the running agent's state. In fact
|
||||
# /kanban unblock is often the only way to free a worker that
|
||||
# has blocked waiting for a peer — letting that be dispatched
|
||||
# mid-run is the whole point of the board.
|
||||
if _cmd_def_inner and _cmd_def_inner.name == "kanban":
|
||||
return await self._handle_kanban_command(event)
|
||||
|
||||
# Session-level toggles that are safe to run mid-agent —
|
||||
# /yolo can unblock a pending approval prompt, /verbose cycles
|
||||
# the tool-progress display mode for the ongoing stream.
|
||||
@@ -3813,6 +3975,9 @@ class GatewayRunner:
|
||||
if canonical == "personality":
|
||||
return await self._handle_personality_command(event)
|
||||
|
||||
if canonical == "kanban":
|
||||
return await self._handle_kanban_command(event)
|
||||
|
||||
if canonical == "retry":
|
||||
return await self._handle_retry_command(event)
|
||||
|
||||
@@ -4318,7 +4483,7 @@ class GatewayRunner:
|
||||
if _loaded:
|
||||
_loaded_skill, _skill_dir, _display_name = _loaded
|
||||
_note = (
|
||||
f'[IMPORTANT: The "{_display_name}" skill is auto-loaded. '
|
||||
f'[SYSTEM: The "{_display_name}" skill is auto-loaded. '
|
||||
f"Follow its instructions for this session.]"
|
||||
)
|
||||
_part = _build_skill_message(_loaded_skill, _skill_dir, _note)
|
||||
@@ -4606,20 +4771,12 @@ class GatewayRunner:
|
||||
if not os.getenv(env_key):
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if adapter:
|
||||
# Slack dispatches all Hermes commands through a single
|
||||
# parent slash command `/hermes`; bare `/sethome` is not
|
||||
# registered and would fail with "app did not respond".
|
||||
sethome_cmd = (
|
||||
"/hermes sethome"
|
||||
if source.platform == Platform.SLACK
|
||||
else "/sethome"
|
||||
)
|
||||
await adapter.send(
|
||||
source.chat_id,
|
||||
f"📬 No home channel is set for {platform_name.title()}. "
|
||||
f"A home channel is where Hermes delivers cron job results "
|
||||
f"and cross-platform messages.\n\n"
|
||||
f"Type {sethome_cmd} to make this chat your home channel, "
|
||||
f"Type /sethome to make this chat your home channel, "
|
||||
f"or ignore to skip."
|
||||
)
|
||||
|
||||
@@ -5152,13 +5309,6 @@ class GatewayRunner:
|
||||
self._cleanup_agent_resources(_old_agent)
|
||||
self._evict_cached_agent(session_key)
|
||||
|
||||
# Discard any /queue overflow for this session — /new is a
|
||||
# conversation-boundary operation, queued follow-ups from the
|
||||
# previous conversation must not bleed into the new one.
|
||||
_qe = getattr(self, "_queued_events", None)
|
||||
if _qe is not None:
|
||||
_qe.pop(session_key, None)
|
||||
|
||||
try:
|
||||
from tools.env_passthrough import clear_env_passthrough
|
||||
clear_env_passthrough()
|
||||
@@ -5255,6 +5405,84 @@ class GatewayRunner:
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def _handle_kanban_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /kanban — delegate to the shared kanban CLI.
|
||||
|
||||
Run the potentially-blocking DB work in a thread pool so the
|
||||
gateway event loop stays responsive. Read operations (list,
|
||||
show, context, tail) are permitted while an agent is running;
|
||||
mutations are allowed too because the board is profile-agnostic
|
||||
and does not touch the running agent's state.
|
||||
|
||||
For ``/kanban create`` invocations we also auto-subscribe the
|
||||
originating gateway source (platform + chat + thread) to the new
|
||||
task's terminal events, so the user hears back when the worker
|
||||
completes / blocks / auto-blocks / crashes without having to poll.
|
||||
"""
|
||||
import asyncio
|
||||
import re
|
||||
from hermes_cli.kanban import run_slash
|
||||
|
||||
text = (event.text or "").strip()
|
||||
# Strip the leading "/kanban" (with or without slash), leaving args.
|
||||
if text.startswith("/"):
|
||||
text = text.lstrip("/")
|
||||
if text.startswith("kanban"):
|
||||
text = text[len("kanban"):].lstrip()
|
||||
|
||||
is_create = text.split(None, 1)[:1] == ["create"]
|
||||
|
||||
try:
|
||||
output = await asyncio.to_thread(run_slash, text)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
return f"⚠ kanban error: {exc}"
|
||||
|
||||
# Auto-subscribe on create. Parse the task id from the CLI's standard
|
||||
# success line ("Created t_abcd (ready, assignee=...)"). If the user
|
||||
# passed --json we don't subscribe; they're clearly scripting and
|
||||
# can call /kanban notify-subscribe explicitly.
|
||||
if is_create and output:
|
||||
m = re.search(r"Created\s+(t_[0-9a-f]+)\b", output)
|
||||
if m:
|
||||
task_id = m.group(1)
|
||||
try:
|
||||
source = event.source
|
||||
platform = getattr(source, "platform", None)
|
||||
platform_str = (
|
||||
platform.value if hasattr(platform, "value") else str(platform or "")
|
||||
).lower()
|
||||
chat_id = str(getattr(source, "chat_id", "") or "")
|
||||
thread_id = str(getattr(source, "thread_id", "") or "")
|
||||
user_id = str(getattr(source, "user_id", "") or "") or None
|
||||
if platform_str and chat_id:
|
||||
def _sub():
|
||||
from hermes_cli import kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.add_notify_sub(
|
||||
conn, task_id=task_id,
|
||||
platform=platform_str, chat_id=chat_id,
|
||||
thread_id=thread_id or None,
|
||||
user_id=user_id,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
await asyncio.to_thread(_sub)
|
||||
output = (
|
||||
output.rstrip()
|
||||
+ f"\n(subscribed — you'll be notified when {task_id} "
|
||||
f"completes or blocks)"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("kanban create auto-subscribe failed: %s", exc)
|
||||
|
||||
# Gateway messages have practical length caps; truncate long
|
||||
# listings to keep the UX reasonable.
|
||||
if len(output) > 3800:
|
||||
output = output[:3800] + "\n… (truncated; use `hermes kanban …` in your terminal for full output)"
|
||||
return output or "(no output)"
|
||||
|
||||
async def _handle_status_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /status command."""
|
||||
source = event.source
|
||||
@@ -5266,10 +5494,6 @@ class GatewayRunner:
|
||||
session_key = session_entry.session_key
|
||||
is_running = session_key in self._running_agents
|
||||
|
||||
# Count pending /queue follow-ups (slot + overflow).
|
||||
adapter = self.adapters.get(source.platform) if source else None
|
||||
queue_depth = self._queue_depth(session_key, adapter=adapter)
|
||||
|
||||
title = None
|
||||
if self._session_db:
|
||||
try:
|
||||
@@ -5289,10 +5513,6 @@ class GatewayRunner:
|
||||
f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
|
||||
f"**Tokens:** {session_entry.total_tokens:,}",
|
||||
f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
|
||||
])
|
||||
if queue_depth:
|
||||
lines.append(f"**Queued follow-ups:** {queue_depth}")
|
||||
lines.extend([
|
||||
"",
|
||||
f"**Connected Platforms:** {', '.join(connected_platforms)}",
|
||||
])
|
||||
@@ -7582,7 +7802,7 @@ class GatewayRunner:
|
||||
change_detail = ". ".join(change_parts) + ". " if change_parts else ""
|
||||
reload_msg = {
|
||||
"role": "user",
|
||||
"content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
|
||||
"content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
|
||||
}
|
||||
try:
|
||||
session_entry = self.session_store.get_or_create_session(event.source)
|
||||
@@ -8521,7 +8741,7 @@ class GatewayRunner:
|
||||
from tools.ansi_strip import strip_ansi
|
||||
_out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
|
||||
synth_text = (
|
||||
f"[IMPORTANT: Background process {session_id} completed "
|
||||
f"[SYSTEM: Background process {session_id} completed "
|
||||
f"(exit code {session.exit_code}).\n"
|
||||
f"Command: {session.command}\n"
|
||||
f"Output:\n{_out}]"
|
||||
@@ -8831,25 +9051,6 @@ class GatewayRunner:
|
||||
with _lock:
|
||||
self._agent_cache.pop(session_key, None)
|
||||
|
||||
@staticmethod
|
||||
def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None:
|
||||
"""Reset per-turn state on a cached agent before a new turn starts.
|
||||
|
||||
Both _last_activity_ts and _last_activity_desc are only reset for
|
||||
fresh external turns (depth 0); they are semantically paired —
|
||||
desc describes the activity *at* ts, so updating one without the
|
||||
other would make get_activity_summary() misleading.
|
||||
For interrupt-recursive turns both are preserved so the inactivity
|
||||
watchdog can accumulate stuck-turn idle time and fire the 30-min
|
||||
timeout (#15654). The depth-0 reset is still needed: a session
|
||||
idle for 29 min would otherwise trip the watchdog before the new
|
||||
turn makes its first API call (#9051).
|
||||
"""
|
||||
if interrupt_depth == 0:
|
||||
agent._last_activity_ts = time.time()
|
||||
agent._last_activity_desc = "starting new turn (cached)"
|
||||
agent._api_call_count = 0
|
||||
|
||||
def _release_evicted_agent_soft(self, agent: Any) -> None:
|
||||
"""Soft cleanup for cache-evicted agents — preserves session tool state.
|
||||
|
||||
@@ -9894,7 +10095,12 @@ class GatewayRunner:
|
||||
_cache.move_to_end(session_key)
|
||||
except KeyError:
|
||||
pass
|
||||
self._init_cached_agent_for_turn(agent, _interrupt_depth)
|
||||
# Reset activity timestamp so the inactivity timeout
|
||||
# handler doesn't see stale idle time from the previous
|
||||
# turn and immediately kill this agent. (#9051)
|
||||
agent._last_activity_ts = time.time()
|
||||
agent._last_activity_desc = "starting new turn (cached)"
|
||||
agent._api_call_count = 0
|
||||
logger.debug("Reusing cached agent for session %s", session_key)
|
||||
|
||||
if agent is None:
|
||||
@@ -10677,13 +10883,6 @@ class GatewayRunner:
|
||||
pending = None
|
||||
if result and adapter and session_key:
|
||||
pending_event = _dequeue_pending_event(adapter, session_key)
|
||||
# /queue overflow: after consuming the adapter's "next-up"
|
||||
# slot, promote the next queued event into it so the
|
||||
# recursive run's drain will see it. This keeps the slot
|
||||
# occupied for the full FIFO chain, which (a) preserves
|
||||
# order, and (b) causes any mid-chain /queue to correctly
|
||||
# route to overflow rather than jumping the queue.
|
||||
pending_event = self._promote_queued_event(session_key, adapter, pending_event)
|
||||
if result.get("interrupted") and not pending_event and result.get("interrupt_message"):
|
||||
interrupt_message = result.get("interrupt_message")
|
||||
if _is_control_interrupt_message(interrupt_message):
|
||||
@@ -10978,15 +11177,7 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
|
||||
if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
|
||||
try:
|
||||
from gateway.channel_directory import build_channel_directory
|
||||
if loop is not None:
|
||||
# build_channel_directory is async (Slack web calls), and
|
||||
# this ticker runs in a background thread. Schedule onto
|
||||
# the gateway event loop and wait briefly for completion
|
||||
# so refresh failures are still logged via the except.
|
||||
fut = asyncio.run_coroutine_threadsafe(
|
||||
build_channel_directory(adapters), loop
|
||||
)
|
||||
fut.result(timeout=30)
|
||||
build_channel_directory(adapters)
|
||||
except Exception as e:
|
||||
logger.debug("Channel directory refresh error: %s", e)
|
||||
|
||||
|
||||
+1
-17
@@ -467,27 +467,11 @@ def _resolve_api_key_provider_secret(
|
||||
pass
|
||||
return "", ""
|
||||
|
||||
from hermes_cli.config import get_env_value
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
val = (get_env_value(env_var) or "").strip()
|
||||
val = os.getenv(env_var, "").strip()
|
||||
if has_usable_secret(val):
|
||||
return val, env_var
|
||||
|
||||
# Fallback: try credential pool (e.g. zai key stored via auth.json)
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(provider_id)
|
||||
if pool and pool.has_credentials():
|
||||
entry = pool.peek()
|
||||
if entry:
|
||||
key = getattr(entry, "access_token", "") or getattr(entry, "runtime_api_key", "")
|
||||
key = str(key).strip()
|
||||
if has_usable_secret(key):
|
||||
return key, f"credential_pool:{provider_id}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "", ""
|
||||
|
||||
|
||||
|
||||
+5
-108
@@ -140,6 +140,11 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
|
||||
"claim", "comment", "complete", "block", "unblock", "archive",
|
||||
"tail", "dispatch", "context", "init", "gc")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
@@ -806,114 +811,6 @@ def discord_skill_commands_by_category(
|
||||
return trimmed_categories, uncategorized, hidden
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Slack native slash commands
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Slack slash command name constraints: lowercase a-z, 0-9, hyphens,
|
||||
# underscores. Max 32 chars. Slack app manifest accepts up to 50 slash
|
||||
# commands per app.
|
||||
_SLACK_MAX_SLASH_COMMANDS = 50
|
||||
_SLACK_NAME_LIMIT = 32
|
||||
_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
|
||||
|
||||
|
||||
def _sanitize_slack_name(raw: str) -> str:
|
||||
"""Convert a command name to a valid Slack slash command name.
|
||||
|
||||
Slack allows lowercase a-z, digits, hyphens, and underscores. Max 32
|
||||
chars. Uppercase is lowercased; invalid chars are stripped.
|
||||
"""
|
||||
name = raw.lower()
|
||||
name = _SLACK_INVALID_CHARS.sub("", name)
|
||||
name = name.strip("-_")
|
||||
return name[:_SLACK_NAME_LIMIT]
|
||||
|
||||
|
||||
def slack_native_slashes() -> list[tuple[str, str, str]]:
|
||||
"""Return (slash_name, description, usage_hint) triples for Slack.
|
||||
|
||||
Every gateway-available command in ``COMMAND_REGISTRY`` is surfaced as
|
||||
a standalone Slack slash command (e.g. ``/btw``, ``/stop``, ``/model``),
|
||||
matching Discord's and Telegram's model where every command is a
|
||||
first-class slash and not a ``/hermes <verb>`` subcommand.
|
||||
|
||||
Both canonical names and aliases are included so users can type any
|
||||
documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
|
||||
Plugin-registered slash commands are included too.
|
||||
|
||||
Results are clamped to Slack's 50-command limit with duplicate-name
|
||||
avoidance. ``/hermes`` is always reserved as the first entry so the
|
||||
legacy ``/hermes <subcommand>`` form keeps working for anything that
|
||||
gets dropped by the clamp or for free-form questions.
|
||||
"""
|
||||
overrides = _resolve_config_gates()
|
||||
entries: list[tuple[str, str, str]] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
# Reserve /hermes as the catch-all top-level command.
|
||||
entries.append(("hermes", "Talk to Hermes or run a subcommand", "[subcommand] [args]"))
|
||||
seen.add("hermes")
|
||||
|
||||
def _add(name: str, desc: str, hint: str) -> None:
|
||||
slack_name = _sanitize_slack_name(name)
|
||||
if not slack_name or slack_name in seen:
|
||||
return
|
||||
if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
|
||||
return
|
||||
# Slack description cap is 2000 chars; keep it short.
|
||||
entries.append((slack_name, desc[:140], hint[:100]))
|
||||
seen.add(slack_name)
|
||||
|
||||
# First pass: canonical names (so they win slots if we hit the cap).
|
||||
for cmd in COMMAND_REGISTRY:
|
||||
if not _is_gateway_available(cmd, overrides):
|
||||
continue
|
||||
_add(cmd.name, cmd.description, cmd.args_hint or "")
|
||||
|
||||
# Second pass: aliases.
|
||||
for cmd in COMMAND_REGISTRY:
|
||||
if not _is_gateway_available(cmd, overrides):
|
||||
continue
|
||||
for alias in cmd.aliases:
|
||||
# Skip aliases that only differ from canonical by case/punctuation
|
||||
# normalization (already covered by _add dedup).
|
||||
_add(alias, f"Alias for /{cmd.name} — {cmd.description}", cmd.args_hint or "")
|
||||
|
||||
# Third pass: plugin commands.
|
||||
for name, description, args_hint in _iter_plugin_command_entries():
|
||||
_add(name, description, args_hint or "")
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def slack_app_manifest(request_url: str = "https://hermes-agent.local/slack/commands") -> dict[str, Any]:
|
||||
"""Generate a Slack app manifest with all gateway commands as slashes.
|
||||
|
||||
``request_url`` is required by Slack's manifest schema for every slash
|
||||
command, but in Socket Mode (which we use) Slack ignores it and routes
|
||||
the command event through the WebSocket. A placeholder URL is fine.
|
||||
|
||||
The returned dict is the ``features.slash_commands`` portion only —
|
||||
callers compose it into a full manifest (or merge into an existing
|
||||
one). Keeping it narrow avoids coupling us to the rest of the manifest
|
||||
schema (display_information, oauth_config, settings, etc.) which users
|
||||
set up once in the Slack UI and rarely change.
|
||||
"""
|
||||
slashes = []
|
||||
for name, desc, usage in slack_native_slashes():
|
||||
entry = {
|
||||
"command": f"/{name}",
|
||||
"description": desc or f"Run /{name}",
|
||||
"should_escape": False,
|
||||
"url": request_url,
|
||||
}
|
||||
if usage:
|
||||
entry["usage_hint"] = usage
|
||||
slashes.append(entry)
|
||||
return {"features": {"slash_commands": slashes}}
|
||||
|
||||
|
||||
def slack_subcommand_map() -> dict[str, str]:
|
||||
"""Return subcommand -> /command mapping for Slack /hermes handler.
|
||||
|
||||
|
||||
@@ -465,7 +465,6 @@ DEFAULT_CONFIG = {
|
||||
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
"auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
|
||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||
# Active only when a CDP-capable backend is attached (Browserbase or
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+11
-171
@@ -4780,35 +4780,11 @@ def cmd_webhook(args):
|
||||
webhook_command(args)
|
||||
|
||||
|
||||
def cmd_slack(args):
|
||||
"""Slack integration helpers.
|
||||
def cmd_kanban(args):
|
||||
"""Multi-profile collaboration board."""
|
||||
from hermes_cli.kanban import kanban_command
|
||||
|
||||
Dispatches ``hermes slack <subcommand>``. Currently supports:
|
||||
manifest — print or write a Slack app manifest with every gateway
|
||||
command registered as a first-class slash.
|
||||
"""
|
||||
sub = getattr(args, "slack_command", None)
|
||||
if sub in (None, ""):
|
||||
# No subcommand — print usage hint.
|
||||
print(
|
||||
"usage: hermes slack <subcommand>\n"
|
||||
"\n"
|
||||
"subcommands:\n"
|
||||
" manifest Generate a Slack app manifest with every gateway\n"
|
||||
" command registered as a native slash\n"
|
||||
"\n"
|
||||
"Run `hermes slack manifest -h` for details.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
if sub == "manifest":
|
||||
from hermes_cli.slack_cli import slack_manifest_command
|
||||
|
||||
return slack_manifest_command(args)
|
||||
|
||||
print(f"Unknown slack subcommand: {sub}", file=sys.stderr)
|
||||
return 1
|
||||
return kanban_command(args)
|
||||
|
||||
|
||||
def cmd_hooks(args):
|
||||
@@ -5956,88 +5932,6 @@ def _cmd_update_check():
|
||||
print(f" Run '{recommended_update_command()}' to install.")
|
||||
|
||||
|
||||
def _ensure_fhs_path_guard() -> None:
|
||||
"""Ensure /usr/local/bin is on PATH for RHEL-family root non-login shells.
|
||||
|
||||
Mirrors the post-symlink probe added to ``scripts/install.sh`` so that
|
||||
existing FHS-layout root installs on RHEL/CentOS/Rocky/Alma 8+ get
|
||||
repaired on ``hermes update`` without requiring a reinstall. The
|
||||
installer's assumption that ``/usr/local/bin`` is on PATH for every
|
||||
standard shell breaks on those distros in non-login interactive shells
|
||||
(su, sudo -s, tmux panes, some web terminals): /etc/bashrc doesn't
|
||||
add /usr/local/bin and /root/.bash_profile doesn't either. Symptom:
|
||||
``hermes`` prints ``command not found`` even though the symlink lives
|
||||
at /usr/local/bin/hermes.
|
||||
|
||||
Silent no-op on: non-Linux, non-root, non-FHS installs, and any system
|
||||
where ``bash -i -c 'command -v hermes'`` already resolves. Idempotent.
|
||||
"""
|
||||
if sys.platform != "linux":
|
||||
return
|
||||
try:
|
||||
if os.geteuid() != 0:
|
||||
return
|
||||
except AttributeError:
|
||||
return
|
||||
# Only act when this is actually an FHS-layout install (command link at
|
||||
# /usr/local/bin/hermes, code at /usr/local/lib/hermes-agent).
|
||||
fhs_link = Path("/usr/local/bin/hermes")
|
||||
if not fhs_link.is_symlink() and not fhs_link.exists():
|
||||
return
|
||||
|
||||
# Probe a fresh non-login interactive bash the way the user will use it.
|
||||
# ``bash -i -c`` sources ~/.bashrc but NOT ~/.bash_profile or /etc/profile,
|
||||
# which is the exact scenario where RHEL root loses /usr/local/bin.
|
||||
home = os.environ.get("HOME") or "/root"
|
||||
try:
|
||||
probe = subprocess.run(
|
||||
["env", "-i",
|
||||
f"HOME={home}",
|
||||
f"TERM={os.environ.get('TERM', 'dumb')}",
|
||||
"bash", "-i", "-c", "command -v hermes"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return # no bash or probe hung — don't block update on this
|
||||
if probe.returncode == 0:
|
||||
return # already on PATH, nothing to do
|
||||
|
||||
path_line = 'export PATH="/usr/local/bin:$PATH"'
|
||||
path_comment = (
|
||||
"# Hermes Agent — ensure /usr/local/bin is on PATH "
|
||||
"(RHEL non-login shells)"
|
||||
)
|
||||
wrote_any = False
|
||||
for candidate in (".bashrc", ".bash_profile"):
|
||||
cfg = Path(home) / candidate
|
||||
if not cfg.is_file():
|
||||
continue
|
||||
try:
|
||||
existing = cfg.read_text(errors="replace")
|
||||
except OSError:
|
||||
continue
|
||||
# Idempotency: skip if any uncommented PATH= line already references
|
||||
# /usr/local/bin. Mirrors the grep pattern used by install.sh.
|
||||
already_guarded = any(
|
||||
"/usr/local/bin" in line
|
||||
and "PATH" in line
|
||||
and not line.lstrip().startswith("#")
|
||||
for line in existing.splitlines()
|
||||
)
|
||||
if already_guarded:
|
||||
continue
|
||||
try:
|
||||
with cfg.open("a", encoding="utf-8") as f:
|
||||
f.write("\n" + path_comment + "\n" + path_line + "\n")
|
||||
except OSError as e:
|
||||
print(f" ⚠ Could not update {cfg}: {e}")
|
||||
continue
|
||||
print(f" ✓ Added /usr/local/bin to PATH in {cfg}")
|
||||
wrote_any = True
|
||||
if wrote_any:
|
||||
print(" (reload your shell or run 'source ~/.bashrc' to pick it up)")
|
||||
|
||||
|
||||
def cmd_update(args):
|
||||
"""Update Hermes Agent to the latest version.
|
||||
|
||||
@@ -6481,13 +6375,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
print()
|
||||
print("✓ Update complete!")
|
||||
|
||||
# Repair RHEL-family root installs where /usr/local/bin isn't on PATH
|
||||
# for non-login interactive shells. No-op on every other platform.
|
||||
try:
|
||||
_ensure_fhs_path_guard()
|
||||
except Exception as e:
|
||||
logger.debug("FHS PATH guard check failed: %s", e)
|
||||
|
||||
# Write exit code *before* the gateway restart attempt.
|
||||
# When running as ``hermes update --gateway`` (spawned by the gateway's
|
||||
# /update command), this process lives inside the gateway's systemd
|
||||
@@ -7918,54 +7805,6 @@ For more help on a command:
|
||||
)
|
||||
whatsapp_parser.set_defaults(func=cmd_whatsapp)
|
||||
|
||||
# =========================================================================
|
||||
# slack command
|
||||
# =========================================================================
|
||||
slack_parser = subparsers.add_parser(
|
||||
"slack",
|
||||
help="Slack integration helpers (manifest generation, etc.)",
|
||||
description="Slack integration helpers for Hermes.",
|
||||
)
|
||||
slack_sub = slack_parser.add_subparsers(dest="slack_command")
|
||||
slack_manifest = slack_sub.add_parser(
|
||||
"manifest",
|
||||
help="Print or write a Slack app manifest with every gateway command "
|
||||
"registered as a native slash (/btw, /stop, /model, ...)",
|
||||
description=(
|
||||
"Generate a Slack app manifest that registers every gateway "
|
||||
"command in COMMAND_REGISTRY as a first-class Slack slash "
|
||||
"command (matching Discord and Telegram parity). Paste the "
|
||||
"output into Slack app config → Features → App Manifest → "
|
||||
"Edit, then Save. Reinstall the app if Slack prompts for it."
|
||||
),
|
||||
)
|
||||
slack_manifest.add_argument(
|
||||
"--write",
|
||||
nargs="?",
|
||||
const=True,
|
||||
default=None,
|
||||
metavar="PATH",
|
||||
help="Write manifest to a file instead of stdout. With no PATH "
|
||||
"writes to $HERMES_HOME/slack-manifest.json.",
|
||||
)
|
||||
slack_manifest.add_argument(
|
||||
"--name",
|
||||
default=None,
|
||||
help='Bot display name (default: "Hermes")',
|
||||
)
|
||||
slack_manifest.add_argument(
|
||||
"--description",
|
||||
default=None,
|
||||
help="Bot description shown in Slack's app directory.",
|
||||
)
|
||||
slack_manifest.add_argument(
|
||||
"--slashes-only",
|
||||
action="store_true",
|
||||
help="Emit only the features.slash_commands array (for merging "
|
||||
"into an existing manifest manually).",
|
||||
)
|
||||
slack_parser.set_defaults(func=cmd_slack)
|
||||
|
||||
# =========================================================================
|
||||
# login command
|
||||
# =========================================================================
|
||||
@@ -8284,6 +8123,13 @@ For more help on a command:
|
||||
|
||||
webhook_parser.set_defaults(func=cmd_webhook)
|
||||
|
||||
# =========================================================================
|
||||
# kanban command — multi-profile collaboration board
|
||||
# =========================================================================
|
||||
from hermes_cli.kanban import build_parser as _build_kanban_parser
|
||||
kanban_parser = _build_kanban_parser(subparsers)
|
||||
kanban_parser.set_defaults(func=cmd_kanban)
|
||||
|
||||
# =========================================================================
|
||||
# hooks command — shell-hook inspection and management
|
||||
# =========================================================================
|
||||
@@ -8621,12 +8467,6 @@ Examples:
|
||||
skills_list.add_argument(
|
||||
"--source", default="all", choices=["all", "hub", "builtin", "local"]
|
||||
)
|
||||
skills_list.add_argument(
|
||||
"--enabled-only",
|
||||
action="store_true",
|
||||
help="Hide disabled skills. Use with -p <profile> to see exactly "
|
||||
"which skills will load for that profile.",
|
||||
)
|
||||
|
||||
skills_check = skills_subparsers.add_parser(
|
||||
"check", help="Check installed hub skills for updates"
|
||||
|
||||
@@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
||||
# (model_id, display description shown in menus)
|
||||
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("moonshotai/kimi-k2.6", "recommended"),
|
||||
("deepseek/deepseek-v4-pro", ""),
|
||||
("deepseek/deepseek-v4-flash", ""),
|
||||
("anthropic/claude-opus-4.7", ""),
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("anthropic/claude-sonnet-4.6", ""),
|
||||
@@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]:
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"moonshotai/kimi-k2.6",
|
||||
"deepseek/deepseek-v4-pro",
|
||||
"deepseek/deepseek-v4-flash",
|
||||
"xiaomi/mimo-v2.5-pro",
|
||||
"xiaomi/mimo-v2.5",
|
||||
"anthropic/claude-opus-4.7",
|
||||
|
||||
+14
-62
@@ -1856,32 +1856,27 @@ def _setup_slack():
|
||||
if existing:
|
||||
print_info("Slack: already configured")
|
||||
if not prompt_yes_no("Reconfigure Slack?", False):
|
||||
# Even without reconfiguring, offer to refresh the manifest so
|
||||
# new commands (e.g. /btw, /stop, ...) get registered in Slack.
|
||||
if prompt_yes_no(
|
||||
"Regenerate the Slack app manifest with the latest command "
|
||||
"list? (recommended after `hermes update`)",
|
||||
True,
|
||||
):
|
||||
_write_slack_manifest_and_instruct()
|
||||
return
|
||||
|
||||
print_info("Steps to create a Slack app:")
|
||||
print_info(" 1. Go to https://api.slack.com/apps → Create New App")
|
||||
print_info(" Pick 'From an app manifest' — we'll generate one for you below.")
|
||||
print_info(" 1. Go to https://api.slack.com/apps → Create New App (from scratch)")
|
||||
print_info(" 2. Enable Socket Mode: Settings → Socket Mode → Enable")
|
||||
print_info(" • Create an App-Level Token with 'connections:write' scope")
|
||||
print_info(" 3. Install to Workspace: Settings → Install App")
|
||||
print_info(" 4. After installing, invite the bot to channels: /invite @YourBot")
|
||||
print_info(" 3. Add Bot Token Scopes: Features → OAuth & Permissions")
|
||||
print_info(" Required scopes: chat:write, app_mentions:read,")
|
||||
print_info(" channels:history, channels:read, im:history,")
|
||||
print_info(" im:read, im:write, users:read, files:read, files:write")
|
||||
print_info(" Optional for private channels: groups:history")
|
||||
print_info(" 4. Subscribe to Events: Features → Event Subscriptions → Enable")
|
||||
print_info(" Required events: message.im, message.channels, app_mention")
|
||||
print_info(" Optional for private channels: message.groups")
|
||||
print_warning(" ⚠ Without message.channels the bot will ONLY work in DMs,")
|
||||
print_warning(" not public channels.")
|
||||
print_info(" 5. Install to Workspace: Settings → Install App")
|
||||
print_info(" 6. Reinstall the app after any scope or event changes")
|
||||
print_info(" 7. After installing, invite the bot to channels: /invite @YourBot")
|
||||
print()
|
||||
print_info(" Full guide: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/slack/")
|
||||
print()
|
||||
|
||||
# Generate and write manifest up-front so the user can paste it into
|
||||
# the "Create from manifest" flow instead of clicking through scopes /
|
||||
# events / slash commands one at a time.
|
||||
_write_slack_manifest_and_instruct()
|
||||
|
||||
print()
|
||||
bot_token = prompt("Slack Bot Token (xoxb-...)", password=True)
|
||||
if not bot_token:
|
||||
@@ -1907,49 +1902,6 @@ def _setup_slack():
|
||||
print_info(" Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.")
|
||||
|
||||
|
||||
def _write_slack_manifest_and_instruct():
|
||||
"""Generate the Slack manifest, write it under HERMES_HOME, and print
|
||||
paste-into-Slack instructions.
|
||||
|
||||
Exposed as its own helper so both the initial setup flow and the
|
||||
"reconfigure? → no" branch can refresh the manifest without the user
|
||||
re-entering tokens. Failures are non-fatal — if the manifest write
|
||||
fails for any reason, we print a warning and skip rather than abort
|
||||
the whole Slack setup.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.slack_cli import _build_full_manifest
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
manifest = _build_full_manifest(
|
||||
bot_name="Hermes",
|
||||
bot_description="Your Hermes agent on Slack",
|
||||
)
|
||||
target = Path(get_hermes_home()) / "slack-manifest.json"
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
import json as _json
|
||||
target.write_text(
|
||||
_json.dumps(manifest, indent=2, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
print_success(f"Slack app manifest written to: {target}")
|
||||
print_info(
|
||||
" Paste it into https://api.slack.com/apps → your app → Features "
|
||||
"→ App Manifest → Edit, then Save. Slack will prompt to "
|
||||
"reinstall if scopes or slash commands changed."
|
||||
)
|
||||
print_info(
|
||||
" Re-run `hermes slack manifest --write` anytime to refresh after "
|
||||
"Hermes adds new commands."
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - best-effort UX helper
|
||||
print_warning(f"Couldn't write Slack manifest: {exc}")
|
||||
print_info(
|
||||
" You can generate it manually later with: "
|
||||
"hermes slack manifest --write"
|
||||
)
|
||||
|
||||
|
||||
def _setup_matrix():
|
||||
"""Configure Matrix credentials."""
|
||||
print_header("Matrix")
|
||||
|
||||
+14
-60
@@ -599,24 +599,11 @@ def inspect_skill(identifier: str) -> Optional[dict]:
|
||||
return out
|
||||
|
||||
|
||||
def do_list(source_filter: str = "all",
|
||||
enabled_only: bool = False,
|
||||
console: Optional[Console] = None) -> None:
|
||||
"""List installed skills, distinguishing hub, builtin, and local skills.
|
||||
|
||||
Args:
|
||||
source_filter: ``all`` | ``hub`` | ``builtin`` | ``local``.
|
||||
enabled_only: If True, hide disabled skills from the output.
|
||||
|
||||
Enabled/disabled state is resolved against the currently active profile's
|
||||
config — ``hermes -p <profile> skills list`` reads that profile's
|
||||
``skills.disabled`` list because ``-p`` swaps ``HERMES_HOME`` at process
|
||||
start. No explicit profile flag needed here.
|
||||
"""
|
||||
def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
|
||||
"""List installed skills, distinguishing hub, builtin, and local skills."""
|
||||
from tools.skills_hub import HubLockFile, ensure_hub_dirs
|
||||
from tools.skills_sync import _read_manifest
|
||||
from tools.skills_tool import _find_all_skills
|
||||
from agent.skill_utils import get_disabled_skill_names
|
||||
|
||||
c = console or _console
|
||||
ensure_hub_dirs()
|
||||
@@ -624,26 +611,17 @@ def do_list(source_filter: str = "all",
|
||||
hub_installed = {e["name"]: e for e in lock.list_installed()}
|
||||
builtin_names = set(_read_manifest())
|
||||
|
||||
# Pull ALL skills (including disabled ones) so we can annotate status.
|
||||
all_skills = _find_all_skills(skip_disabled=True)
|
||||
disabled_names = get_disabled_skill_names()
|
||||
all_skills = _find_all_skills()
|
||||
|
||||
title = "Installed Skills"
|
||||
if enabled_only:
|
||||
title += " (enabled only)"
|
||||
|
||||
table = Table(title=title)
|
||||
table = Table(title="Installed Skills")
|
||||
table.add_column("Name", style="bold cyan")
|
||||
table.add_column("Category", style="dim")
|
||||
table.add_column("Source", style="dim")
|
||||
table.add_column("Trust", style="dim")
|
||||
table.add_column("Status", style="dim")
|
||||
|
||||
hub_count = 0
|
||||
builtin_count = 0
|
||||
local_count = 0
|
||||
enabled_count = 0
|
||||
disabled_count = 0
|
||||
|
||||
for skill in sorted(all_skills, key=lambda s: (s.get("category") or "", s["name"])):
|
||||
name = skill["name"]
|
||||
@@ -654,48 +632,29 @@ def do_list(source_filter: str = "all",
|
||||
source_type = "hub"
|
||||
source_display = hub_entry.get("source", "hub")
|
||||
trust = hub_entry.get("trust_level", "community")
|
||||
hub_count += 1
|
||||
elif name in builtin_names:
|
||||
source_type = "builtin"
|
||||
source_display = "builtin"
|
||||
trust = "builtin"
|
||||
builtin_count += 1
|
||||
else:
|
||||
source_type = "local"
|
||||
source_display = "local"
|
||||
trust = "local"
|
||||
local_count += 1
|
||||
|
||||
if source_filter != "all" and source_filter != source_type:
|
||||
continue
|
||||
|
||||
is_enabled = name not in disabled_names
|
||||
if enabled_only and not is_enabled:
|
||||
continue
|
||||
|
||||
if source_type == "hub":
|
||||
hub_count += 1
|
||||
elif source_type == "builtin":
|
||||
builtin_count += 1
|
||||
else:
|
||||
local_count += 1
|
||||
|
||||
if is_enabled:
|
||||
enabled_count += 1
|
||||
status_cell = "[bold green]enabled[/]"
|
||||
else:
|
||||
disabled_count += 1
|
||||
status_cell = "[dim red]disabled[/]"
|
||||
|
||||
trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow", "local": "dim"}.get(trust, "dim")
|
||||
trust_label = "official" if source_display == "official" else trust
|
||||
table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]", status_cell)
|
||||
table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]")
|
||||
|
||||
c.print(table)
|
||||
summary = f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local"
|
||||
if enabled_only:
|
||||
summary += f" — {enabled_count} enabled shown"
|
||||
else:
|
||||
summary += f" — {enabled_count} enabled, {disabled_count} disabled"
|
||||
summary += "[/]\n"
|
||||
c.print(summary)
|
||||
c.print(
|
||||
f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local[/]\n"
|
||||
)
|
||||
|
||||
|
||||
def do_check(name: Optional[str] = None, console: Optional[Console] = None) -> None:
|
||||
@@ -1168,10 +1127,7 @@ def skills_command(args) -> None:
|
||||
elif action == "inspect":
|
||||
do_inspect(args.identifier)
|
||||
elif action == "list":
|
||||
do_list(
|
||||
source_filter=args.source,
|
||||
enabled_only=getattr(args, "enabled_only", False),
|
||||
)
|
||||
do_list(source_filter=args.source)
|
||||
elif action == "check":
|
||||
do_check(name=getattr(args, "name", None))
|
||||
elif action == "update":
|
||||
@@ -1323,12 +1279,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
|
||||
|
||||
elif action == "list":
|
||||
source_filter = "all"
|
||||
enabled_only = "--enabled-only" in args or "--enabled" in args
|
||||
if "--source" in args:
|
||||
idx = args.index("--source")
|
||||
if idx + 1 < len(args):
|
||||
source_filter = args[idx + 1]
|
||||
do_list(source_filter=source_filter, enabled_only=enabled_only, console=c)
|
||||
do_list(source_filter=source_filter, console=c)
|
||||
|
||||
elif action == "check":
|
||||
name = args[0] if args else None
|
||||
@@ -1416,8 +1371,7 @@ def _print_skills_help(console: Console) -> None:
|
||||
" [cyan]search[/] <query> Search registries for skills\n"
|
||||
" [cyan]install[/] <identifier> Install a skill (with security scan)\n"
|
||||
" [cyan]inspect[/] <identifier> Preview a skill without installing\n"
|
||||
" [cyan]list[/] [--source hub|builtin|local] [--enabled-only]\n"
|
||||
" List installed skills; --enabled-only filters to the active profile's live set\n"
|
||||
" [cyan]list[/] [--source hub|builtin|local] List installed skills\n"
|
||||
" [cyan]check[/] [name] Check hub skills for upstream updates\n"
|
||||
" [cyan]update[/] [name] Update hub skills with upstream changes\n"
|
||||
" [cyan]audit[/] [name] Re-scan hub skills for security\n"
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
"""``hermes slack ...`` CLI subcommands.
|
||||
|
||||
Today only ``hermes slack manifest`` is implemented — it generates the
|
||||
Slack app manifest JSON for registering every gateway command as a native
|
||||
Slack slash (``/btw``, ``/stop``, ``/model``, …) so users get the same
|
||||
first-class slash UX Discord and Telegram already have.
|
||||
|
||||
Typical workflow::
|
||||
|
||||
$ hermes slack manifest > slack-manifest.json
|
||||
# or:
|
||||
$ hermes slack manifest --write
|
||||
|
||||
Then paste the printed JSON into the Slack app config (Features → App
|
||||
Manifest → Edit) and click Save. Slack diffs the manifest and prompts
|
||||
for reinstall when scopes/commands change.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _build_full_manifest(bot_name: str, bot_description: str) -> dict:
|
||||
"""Build a full Slack manifest merging display info + our slash list.
|
||||
|
||||
The slash-command list is always generated from ``COMMAND_REGISTRY`` so
|
||||
it stays in sync with the rest of Hermes. Other manifest sections
|
||||
(display info, OAuth scopes, socket mode) are set to sensible defaults
|
||||
for a Hermes deployment — users can tweak them in the Slack UI after
|
||||
pasting.
|
||||
"""
|
||||
from hermes_cli.commands import slack_app_manifest
|
||||
|
||||
partial = slack_app_manifest()
|
||||
slashes = partial["features"]["slash_commands"]
|
||||
|
||||
return {
|
||||
"_metadata": {
|
||||
"major_version": 1,
|
||||
"minor_version": 1,
|
||||
},
|
||||
"display_information": {
|
||||
"name": bot_name[:35],
|
||||
"description": (bot_description or "Your Hermes agent on Slack")[:140],
|
||||
"background_color": "#1a1a2e",
|
||||
},
|
||||
"features": {
|
||||
"bot_user": {
|
||||
"display_name": bot_name[:80],
|
||||
"always_online": True,
|
||||
},
|
||||
"slash_commands": slashes,
|
||||
"assistant_view": {
|
||||
"assistant_description": "Chat with Hermes in threads and DMs.",
|
||||
},
|
||||
},
|
||||
"oauth_config": {
|
||||
"scopes": {
|
||||
"bot": [
|
||||
"app_mentions:read",
|
||||
"assistant:write",
|
||||
"channels:history",
|
||||
"channels:read",
|
||||
"chat:write",
|
||||
"commands",
|
||||
"files:read",
|
||||
"files:write",
|
||||
"groups:history",
|
||||
"im:history",
|
||||
"im:read",
|
||||
"im:write",
|
||||
"users:read",
|
||||
],
|
||||
},
|
||||
},
|
||||
"settings": {
|
||||
"event_subscriptions": {
|
||||
"bot_events": [
|
||||
"app_mention",
|
||||
"assistant_thread_context_changed",
|
||||
"assistant_thread_started",
|
||||
"message.channels",
|
||||
"message.groups",
|
||||
"message.im",
|
||||
],
|
||||
},
|
||||
"interactivity": {
|
||||
"is_enabled": True,
|
||||
},
|
||||
"org_deploy_enabled": False,
|
||||
"socket_mode_enabled": True,
|
||||
"token_rotation_enabled": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def slack_manifest_command(args) -> int:
|
||||
"""Print or write a Slack app manifest JSON.
|
||||
|
||||
Flags (all parsed in ``hermes_cli/main.py``):
|
||||
--write [PATH] Write to file instead of stdout (default path:
|
||||
``$HERMES_HOME/slack-manifest.json``)
|
||||
--name NAME Override the bot display name (default: "Hermes")
|
||||
--description DESC Override the bot description
|
||||
--slashes-only Emit only the ``features.slash_commands`` array (for
|
||||
merging into an existing manifest manually)
|
||||
"""
|
||||
name = getattr(args, "name", None) or "Hermes"
|
||||
description = getattr(args, "description", None) or "Your Hermes agent on Slack"
|
||||
|
||||
if getattr(args, "slashes_only", False):
|
||||
from hermes_cli.commands import slack_app_manifest
|
||||
|
||||
manifest = slack_app_manifest()["features"]["slash_commands"]
|
||||
else:
|
||||
manifest = _build_full_manifest(name, description)
|
||||
|
||||
payload = json.dumps(manifest, indent=2, ensure_ascii=False) + "\n"
|
||||
|
||||
write_target = getattr(args, "write", None)
|
||||
if write_target is not None:
|
||||
if isinstance(write_target, bool) and write_target:
|
||||
# --write with no value → default location
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
target = Path(get_hermes_home()) / "slack-manifest.json"
|
||||
except Exception:
|
||||
target = Path.home() / ".hermes" / "slack-manifest.json"
|
||||
else:
|
||||
target = Path(write_target).expanduser()
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(payload, encoding="utf-8")
|
||||
print(f"Slack manifest written to: {target}", file=sys.stderr)
|
||||
print(
|
||||
"\nNext steps:\n"
|
||||
" 1. Open https://api.slack.com/apps and pick your Hermes app\n"
|
||||
" (or create a new one: Create New App → From an app manifest).\n"
|
||||
f" 2. Features → App Manifest → paste the contents of\n"
|
||||
f" {target}\n"
|
||||
" 3. Save; Slack will prompt to reinstall the app if scopes or\n"
|
||||
" slash commands changed.\n"
|
||||
" 4. Make sure Socket Mode is enabled and you have a bot token\n"
|
||||
" (xoxb-...) and app token (xapp-...) configured via\n"
|
||||
" `hermes setup`.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
else:
|
||||
sys.stdout.write(payload)
|
||||
return 0
|
||||
@@ -3103,13 +3103,23 @@ def _mount_plugin_api_routes():
|
||||
_log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
|
||||
continue
|
||||
try:
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_dashboard_plugin_{plugin['name']}", api_path,
|
||||
)
|
||||
module_name = f"hermes_dashboard_plugin_{plugin['name']}"
|
||||
spec = importlib.util.spec_from_file_location(module_name, api_path)
|
||||
if spec is None or spec.loader is None:
|
||||
continue
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
# Register in sys.modules BEFORE exec_module so pydantic/FastAPI
|
||||
# can resolve forward references (e.g. models defined in a file
|
||||
# that uses `from __future__ import annotations`). Without this,
|
||||
# TypeAdapter lazy-build fails at first request with
|
||||
# "is not fully defined" because the module namespace isn't
|
||||
# reachable by name for string-annotation resolution.
|
||||
sys.modules[module_name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
router = getattr(mod, "router", None)
|
||||
if router is None:
|
||||
_log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
|
||||
|
||||
+1
-12
@@ -832,18 +832,7 @@ class SessionDB:
|
||||
params = []
|
||||
|
||||
if not include_children:
|
||||
# Show root sessions and branch sessions (whose parent ended with
|
||||
# end_reason='branched' before the child was created), while still
|
||||
# hiding sub-agent runs and compression continuations (which also
|
||||
# carry a parent_session_id but were spawned while the parent was
|
||||
# still live — i.e., started_at < parent.ended_at).
|
||||
where_clauses.append(
|
||||
"(s.parent_session_id IS NULL"
|
||||
" OR EXISTS (SELECT 1 FROM sessions p"
|
||||
" WHERE p.id = s.parent_session_id"
|
||||
" AND p.end_reason = 'branched'"
|
||||
" AND s.started_at >= p.ended_at))"
|
||||
)
|
||||
where_clauses.append("s.parent_session_id IS NULL")
|
||||
|
||||
if source:
|
||||
where_clauses.append("s.source = ?")
|
||||
|
||||
+1591
File diff suppressed because it is too large
Load Diff
+752
@@ -0,0 +1,752 @@
|
||||
/*
|
||||
* Hermes Kanban — dashboard plugin styles.
|
||||
*
|
||||
* All colors reference theme CSS vars so the board reskins with the
|
||||
* active dashboard theme. No hardcoded palette.
|
||||
*/
|
||||
|
||||
.hermes-kanban {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* ---- Columns layout -------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-columns {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||
gap: 0.75rem;
|
||||
align-items: start;
|
||||
}
|
||||
|
||||
.hermes-kanban-column {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: color-mix(in srgb, var(--color-card) 85%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius);
|
||||
padding: 0.5rem;
|
||||
min-height: 200px;
|
||||
max-height: calc(100vh - 220px);
|
||||
transition: border-color 120ms ease, background-color 120ms ease;
|
||||
}
|
||||
|
||||
.hermes-kanban-column--drop {
|
||||
border-color: var(--color-ring);
|
||||
background: color-mix(in srgb, var(--color-ring) 8%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-column-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.25rem 0.25rem 0.35rem;
|
||||
font-weight: 600;
|
||||
font-size: 0.85rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-label {
|
||||
flex: 1;
|
||||
letter-spacing: 0.01em;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-count {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-add {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 1px solid var(--color-border);
|
||||
color: var(--color-foreground);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
width: 22px;
|
||||
height: 22px;
|
||||
line-height: 1;
|
||||
font-size: 1rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-column-add:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-sub {
|
||||
padding: 0 0.25rem 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border-bottom: 1px solid color-mix(in srgb, var(--color-border) 60%, transparent);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-body {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.45rem;
|
||||
overflow-y: auto;
|
||||
padding-right: 0.1rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-empty {
|
||||
padding: 1.5rem 0.5rem;
|
||||
text-align: center;
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Status dots ----------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-dot {
|
||||
display: inline-block;
|
||||
width: 0.5rem;
|
||||
height: 0.5rem;
|
||||
border-radius: 999px;
|
||||
background: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-dot-triage { background: #b47dd6; } /* lilac — fresh/unspecified */
|
||||
.hermes-kanban-dot-todo { background: var(--color-muted-foreground); }
|
||||
.hermes-kanban-dot-ready { background: #d4b348; } /* amber */
|
||||
.hermes-kanban-dot-running { background: #3fb97d; } /* green */
|
||||
.hermes-kanban-dot-blocked { background: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-dot-done { background: #4a8cd1; } /* blue */
|
||||
.hermes-kanban-dot-archived { background: var(--color-border); }
|
||||
|
||||
/* ---- Progress pill (N/M child tasks done) --------------------------- */
|
||||
|
||||
.hermes-kanban-progress {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.62rem;
|
||||
padding: 0.05rem 0.35rem;
|
||||
border-radius: 999px;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border: 1px solid color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
.hermes-kanban-progress--full {
|
||||
background: color-mix(in srgb, #3fb97d 22%, transparent);
|
||||
border-color: color-mix(in srgb, #3fb97d 45%, transparent);
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
/* ---- Lanes (per-profile sub-grouping inside Running) ---------------- */
|
||||
|
||||
.hermes-kanban-lane {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.25rem 0 0.35rem;
|
||||
border-top: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
.hermes-kanban-lane:first-child {
|
||||
border-top: 0;
|
||||
padding-top: 0;
|
||||
}
|
||||
.hermes-kanban-lane-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
font-size: 0.65rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
padding: 0 0.1rem;
|
||||
}
|
||||
.hermes-kanban-lane-name {
|
||||
font-weight: 600;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-lane-count {
|
||||
margin-left: auto;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
/* ---- Card ------------------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card {
|
||||
cursor: grab;
|
||||
transition: transform 100ms ease, box-shadow 100ms ease;
|
||||
}
|
||||
.hermes-kanban-card:hover {
|
||||
box-shadow: 0 1px 0 0 var(--color-ring) inset, 0 0 0 1px var(--color-ring) inset;
|
||||
}
|
||||
.hermes-kanban-card:active {
|
||||
cursor: grabbing;
|
||||
transform: scale(0.995);
|
||||
}
|
||||
|
||||
.hermes-kanban-card-content {
|
||||
padding: 0.5rem 0.6rem !important;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-id {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.65rem;
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.03em;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-title {
|
||||
font-size: 0.85rem;
|
||||
font-weight: 500;
|
||||
line-height: 1.3;
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-meta {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
gap: 0.55rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-priority {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
background: color-mix(in srgb, var(--color-ring) 18%, transparent);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-tag {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
}
|
||||
|
||||
.hermes-kanban-assignee {
|
||||
font-weight: 500;
|
||||
color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground));
|
||||
}
|
||||
.hermes-kanban-unassigned {
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-ago {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
/* ---- Inline create --------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-inline-create {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
background: color-mix(in srgb, var(--color-card) 70%, transparent);
|
||||
border: 1px dashed var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Drawer (task detail side panel) --------------------------------- */
|
||||
|
||||
.hermes-kanban-drawer-shade {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(0, 0, 0, 0.45);
|
||||
z-index: 60;
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer {
|
||||
width: min(480px, 92vw);
|
||||
height: 100vh;
|
||||
background: var(--color-card);
|
||||
border-left: 1px solid var(--color-border);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
box-shadow: -4px 0 18px rgba(0, 0, 0, 0.35);
|
||||
animation: hermes-kanban-drawer-in 180ms ease-out;
|
||||
}
|
||||
|
||||
@keyframes hermes-kanban-drawer-in {
|
||||
from { transform: translateX(100%); opacity: 0.3; }
|
||||
to { transform: translateX(0); opacity: 1; }
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0.6rem 0.8rem;
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-close {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 1.25rem;
|
||||
line-height: 1;
|
||||
cursor: pointer;
|
||||
padding: 0 0.25rem;
|
||||
}
|
||||
.hermes-kanban-drawer-close:hover { color: var(--color-foreground); }
|
||||
|
||||
.hermes-kanban-drawer-body {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 0.9rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.85rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.15rem;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
.hermes-kanban-meta-row {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.72rem;
|
||||
}
|
||||
.hermes-kanban-meta-label {
|
||||
width: 92px;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-meta-value {
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head {
|
||||
font-size: 0.72rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.07em;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-pre {
|
||||
margin: 0;
|
||||
padding: 0.45rem 0.55rem;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.72rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-comment {
|
||||
border-left: 2px solid color-mix(in srgb, var(--color-ring) 35%, transparent);
|
||||
padding-left: 0.5rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.2rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-comment-head {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-comment-author {
|
||||
font-weight: 600;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-comment-ago {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-event {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-event-kind {
|
||||
color: var(--color-foreground);
|
||||
min-width: 6rem;
|
||||
}
|
||||
.hermes-kanban-event-payload {
|
||||
color: var(--color-muted-foreground);
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
max-width: 280px;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-comment-row {
|
||||
display: flex;
|
||||
gap: 0.4rem;
|
||||
padding: 0.55rem 0.75rem;
|
||||
border-top: 1px solid var(--color-border);
|
||||
background: color-mix(in srgb, var(--color-card) 90%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-count {
|
||||
display: inline-flex;
|
||||
gap: 0.2rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
/* ---- Selection chrome ----------------------------------------------- */
|
||||
|
||||
.hermes-kanban-card--selected :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-ring) inset,
|
||||
0 0 0 1px var(--color-ring) inset;
|
||||
background: color-mix(in srgb, var(--color-ring) 6%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-card-check {
|
||||
width: 0.85rem;
|
||||
height: 0.85rem;
|
||||
margin: 0;
|
||||
cursor: pointer;
|
||||
accent-color: var(--color-ring);
|
||||
}
|
||||
|
||||
/* ---- Bulk action bar ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-bulk {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.4rem 0.75rem;
|
||||
background: color-mix(in srgb, var(--color-ring) 10%, var(--color-card));
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, var(--color-border));
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.hermes-kanban-bulk-count {
|
||||
font-weight: 600;
|
||||
font-size: 0.75rem;
|
||||
padding-right: 0.25rem;
|
||||
}
|
||||
.hermes-kanban-bulk-btn {
|
||||
height: 1.7rem !important;
|
||||
padding: 0 0.5rem !important;
|
||||
font-size: 0.7rem !important;
|
||||
border: 1px solid var(--color-border);
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-bulk-btn:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
.hermes-kanban-bulk-reassign {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding-left: 0.5rem;
|
||||
border-left: 1px solid color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Dependency editor chips --------------------------------------- */
|
||||
|
||||
.hermes-kanban-deps-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
}
|
||||
.hermes-kanban-deps-label {
|
||||
font-size: 0.68rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
min-width: 4rem;
|
||||
}
|
||||
.hermes-kanban-deps-chips {
|
||||
display: flex;
|
||||
gap: 0.3rem;
|
||||
flex-wrap: wrap;
|
||||
flex: 1;
|
||||
}
|
||||
.hermes-kanban-deps-empty {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-dep-chip {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.15rem;
|
||||
padding: 0.1rem 0.35rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 6%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.68rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-dep-chip-x {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
cursor: pointer;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1;
|
||||
padding: 0 0.15rem;
|
||||
}
|
||||
.hermes-kanban-dep-chip-x:hover { color: var(--color-destructive, #d14a4a); }
|
||||
|
||||
/* ---- Inline edit affordances --------------------------------------- */
|
||||
|
||||
.hermes-kanban-editable {
|
||||
cursor: pointer;
|
||||
border-bottom: 1px dotted color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
}
|
||||
.hermes-kanban-editable:hover {
|
||||
color: var(--color-foreground);
|
||||
border-bottom-color: var(--color-ring);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title-text {
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-drawer-title-text:hover {
|
||||
text-decoration: underline;
|
||||
text-decoration-color: var(--color-ring);
|
||||
text-decoration-style: dotted;
|
||||
text-underline-offset: 3px;
|
||||
}
|
||||
|
||||
.hermes-kanban-edit-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.hermes-kanban-edit-link {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.7rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
cursor: pointer;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-edit-link:hover { color: var(--color-ring); }
|
||||
|
||||
.hermes-kanban-textarea {
|
||||
width: 100%;
|
||||
min-height: 8rem;
|
||||
background: var(--color-card);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
padding: 0.5rem 0.6rem;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.5;
|
||||
resize: vertical;
|
||||
}
|
||||
.hermes-kanban-textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--color-ring);
|
||||
box-shadow: 0 0 0 2px color-mix(in srgb, var(--color-ring) 30%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Markdown rendering -------------------------------------------- */
|
||||
|
||||
.hermes-kanban-md {
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.55;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-md p { margin: 0.25rem 0; }
|
||||
.hermes-kanban-md h1,
|
||||
.hermes-kanban-md h2,
|
||||
.hermes-kanban-md h3,
|
||||
.hermes-kanban-md h4 {
|
||||
margin: 0.6rem 0 0.2rem;
|
||||
line-height: 1.25;
|
||||
}
|
||||
.hermes-kanban-md h1 { font-size: 1.05rem; }
|
||||
.hermes-kanban-md h2 { font-size: 0.95rem; }
|
||||
.hermes-kanban-md h3 { font-size: 0.88rem; }
|
||||
.hermes-kanban-md h4 { font-size: 0.82rem; }
|
||||
.hermes-kanban-md ul {
|
||||
margin: 0.25rem 0 0.25rem 1.1rem;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-md li { margin: 0.1rem 0; }
|
||||
.hermes-kanban-md a {
|
||||
color: var(--color-ring);
|
||||
text-decoration: underline;
|
||||
}
|
||||
.hermes-kanban-md code {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.75rem;
|
||||
padding: 0.05rem 0.3rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border-radius: 3px;
|
||||
}
|
||||
.hermes-kanban-md-code {
|
||||
margin: 0.35rem 0;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 5%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
overflow-x: auto;
|
||||
}
|
||||
.hermes-kanban-md-code code {
|
||||
background: transparent;
|
||||
padding: 0;
|
||||
font-size: 0.75rem;
|
||||
white-space: pre;
|
||||
}
|
||||
.hermes-kanban-md strong { font-weight: 600; }
|
||||
|
||||
/* ---- Touch-drag proxy ---------------------------------------------- */
|
||||
|
||||
.hermes-kanban-touch-proxy {
|
||||
pointer-events: none;
|
||||
opacity: 0.85;
|
||||
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.35);
|
||||
transform: scale(1.02);
|
||||
transition: none;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Staleness tiers ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card--stale-amber :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px #d4b34888 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-amber:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px #d4b348 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-red :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 8px color-mix(in srgb, var(--color-destructive, #d14a4a) 30%, transparent);
|
||||
}
|
||||
.hermes-kanban-card--stale-red:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 10px color-mix(in srgb, var(--color-destructive, #d14a4a) 45%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Worker log pane ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-log {
|
||||
max-height: 340px;
|
||||
overflow: auto;
|
||||
white-space: pre;
|
||||
font-size: 0.7rem;
|
||||
line-height: 1.45;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Run history (per-attempt log in the drawer) ------------------- */
|
||||
|
||||
.hermes-kanban-run {
|
||||
border-left: 2px solid var(--color-border);
|
||||
padding: 0.35rem 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 3%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
.hermes-kanban-run--active { border-left-color: #3fb97d; }
|
||||
.hermes-kanban-run--completed { border-left-color: #4a8cd1; }
|
||||
.hermes-kanban-run--ended { border-left-color: #6b7280; } /* generic fallback when outcome is unset */
|
||||
.hermes-kanban-run--blocked { border-left-color: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-run--crashed,
|
||||
.hermes-kanban-run--timed_out,
|
||||
.hermes-kanban-run--gave_up,
|
||||
.hermes-kanban-run--spawn_failed {
|
||||
border-left-color: var(--color-destructive, #d14a4a);
|
||||
background: color-mix(in srgb, var(--color-destructive, #d14a4a) 6%, transparent);
|
||||
}
|
||||
.hermes-kanban-run--reclaimed { border-left-color: #d4b348; }
|
||||
|
||||
.hermes-kanban-run-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.6rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-run-outcome {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-profile {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-elapsed {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-ago {
|
||||
margin-left: auto;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-summary {
|
||||
font-size: 0.75rem;
|
||||
padding: 0.2rem 0 0;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-error {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-destructive, #d14a4a);
|
||||
padding: 0.15rem 0 0;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-run-meta {
|
||||
display: block;
|
||||
font-size: 0.65rem;
|
||||
padding: 0.15rem 0 0;
|
||||
color: var(--color-muted-foreground);
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "kanban",
|
||||
"label": "Kanban",
|
||||
"description": "Multi-agent collaboration board — drag-drop cards across columns, read comment threads, see which profile is running what",
|
||||
"icon": "Package",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/kanban",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"entry": "dist/index.js",
|
||||
"css": "dist/style.css",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
@@ -0,0 +1,830 @@
|
||||
"""Kanban dashboard plugin — backend API routes.
|
||||
|
||||
Mounted at /api/plugins/kanban/ by the dashboard plugin system.
|
||||
|
||||
This layer is intentionally thin: every handler is a small wrapper around
|
||||
``hermes_cli.kanban_db`` or a direct SQL query. Writes use the same code
|
||||
paths the CLI and gateway ``/kanban`` command use, so the three surfaces
|
||||
cannot drift.
|
||||
|
||||
Live updates arrive via the ``/events`` WebSocket, which tails the
|
||||
append-only ``task_events`` table on a short poll interval (WAL mode lets
|
||||
reads run alongside the dispatcher's IMMEDIATE write transactions).
|
||||
|
||||
Security note
|
||||
-------------
|
||||
The dashboard's HTTP auth middleware (``web_server.auth_middleware``)
|
||||
explicitly skips ``/api/plugins/`` — plugin routes are unauthenticated by
|
||||
design because the dashboard binds to localhost by default. For the
|
||||
WebSocket we still require the session token as a ``?token=`` query
|
||||
parameter (browsers cannot set the ``Authorization`` header on an upgrade
|
||||
request), matching the established pattern used by the in-browser PTY
|
||||
bridge in ``hermes_cli/web_server.py``. If you run the dashboard with
|
||||
``--host 0.0.0.0``, every plugin route — kanban included — becomes
|
||||
reachable from the network. Don't do that on a shared host.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from hermes_cli import kanban_db
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth helper — WebSocket only (HTTP routes live behind the dashboard's
|
||||
# existing plugin-bypass; this is documented above).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_ws_token(provided: Optional[str]) -> bool:
|
||||
"""Constant-time compare against the dashboard session token.
|
||||
|
||||
Imported lazily so the plugin still loads in test contexts where the
|
||||
dashboard web_server module isn't importable (e.g. the bare-FastAPI
|
||||
test harness).
|
||||
"""
|
||||
if not provided:
|
||||
return False
|
||||
try:
|
||||
from hermes_cli import web_server as _ws
|
||||
except Exception:
|
||||
# No dashboard context (tests). Accept so the tail loop is still
|
||||
# testable; in production the dashboard module always imports
|
||||
# cleanly because it's the caller.
|
||||
return True
|
||||
expected = getattr(_ws, "_SESSION_TOKEN", None)
|
||||
if not expected:
|
||||
return True
|
||||
return hmac.compare_digest(str(provided), str(expected))
|
||||
|
||||
|
||||
def _conn():
|
||||
"""Open a kanban_db connection, creating the schema on first use.
|
||||
|
||||
Every handler that mutates the DB goes through this so the plugin
|
||||
self-heals on a fresh install (no user-visible "no such table"
|
||||
error if somebody hits POST /tasks before GET /board).
|
||||
``init_db`` is idempotent.
|
||||
"""
|
||||
try:
|
||||
kanban_db.init_db()
|
||||
except Exception as exc:
|
||||
log.warning("kanban init_db failed: %s", exc)
|
||||
return kanban_db.connect()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialization helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Columns shown by the dashboard, in left-to-right order. "archived" is
|
||||
# available via a filter toggle rather than a visible column.
|
||||
BOARD_COLUMNS: list[str] = [
|
||||
"triage", "todo", "ready", "running", "blocked", "done",
|
||||
]
|
||||
|
||||
|
||||
def _task_dict(task: kanban_db.Task) -> dict[str, Any]:
|
||||
d = asdict(task)
|
||||
# Add derived age metrics so the UI can colour stale cards without
|
||||
# computing deltas client-side.
|
||||
d["age"] = kanban_db.task_age(task)
|
||||
# Keep body short on list endpoints; full body comes from /tasks/:id.
|
||||
return d
|
||||
|
||||
|
||||
def _event_dict(event: kanban_db.Event) -> dict[str, Any]:
|
||||
return {
|
||||
"id": event.id,
|
||||
"task_id": event.task_id,
|
||||
"kind": event.kind,
|
||||
"payload": event.payload,
|
||||
"created_at": event.created_at,
|
||||
"run_id": event.run_id,
|
||||
}
|
||||
|
||||
|
||||
def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
|
||||
return {
|
||||
"id": c.id,
|
||||
"task_id": c.task_id,
|
||||
"author": c.author,
|
||||
"body": c.body,
|
||||
"created_at": c.created_at,
|
||||
}
|
||||
|
||||
|
||||
def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
|
||||
"""Serialise a Run for the drawer's Run history section."""
|
||||
return {
|
||||
"id": r.id,
|
||||
"task_id": r.task_id,
|
||||
"profile": r.profile,
|
||||
"step_key": r.step_key,
|
||||
"status": r.status,
|
||||
"claim_lock": r.claim_lock,
|
||||
"claim_expires": r.claim_expires,
|
||||
"worker_pid": r.worker_pid,
|
||||
"max_runtime_seconds": r.max_runtime_seconds,
|
||||
"last_heartbeat_at": r.last_heartbeat_at,
|
||||
"started_at": r.started_at,
|
||||
"ended_at": r.ended_at,
|
||||
"outcome": r.outcome,
|
||||
"summary": r.summary,
|
||||
"metadata": r.metadata,
|
||||
"error": r.error,
|
||||
}
|
||||
|
||||
|
||||
def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]:
|
||||
"""Return {'parents': [...], 'children': [...]} for a task."""
|
||||
parents = [
|
||||
r["parent_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
children = [
|
||||
r["child_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
return {"parents": parents, "children": children}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /board
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/board")
|
||||
def get_board(
|
||||
tenant: Optional[str] = Query(None, description="Filter to a single tenant"),
|
||||
include_archived: bool = Query(False),
|
||||
):
|
||||
"""Return the full board grouped by status column.
|
||||
|
||||
``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh
|
||||
install doesn't surface a "failed to load" error on the plugin tab.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
tasks = kanban_db.list_tasks(
|
||||
conn, tenant=tenant, include_archived=include_archived
|
||||
)
|
||||
# Pre-fetch link counts per task (cheap: one query).
|
||||
link_counts: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT parent_id, child_id FROM task_links"
|
||||
).fetchall():
|
||||
link_counts.setdefault(row["parent_id"], {"parents": 0, "children": 0})[
|
||||
"children"
|
||||
] += 1
|
||||
link_counts.setdefault(row["child_id"], {"parents": 0, "children": 0})[
|
||||
"parents"
|
||||
] += 1
|
||||
|
||||
# Comment + event counts (both cheap aggregates).
|
||||
comment_counts: dict[str, int] = {
|
||||
r["task_id"]: r["n"]
|
||||
for r in conn.execute(
|
||||
"SELECT task_id, COUNT(*) AS n FROM task_comments GROUP BY task_id"
|
||||
)
|
||||
}
|
||||
|
||||
# Progress rollup: for each parent, how many children are done / total.
|
||||
# One pass over task_links joined with child status — cheaper than
|
||||
# N per-task queries and the plugin uses it to render "N/M".
|
||||
progress: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT l.parent_id AS pid, t.status AS cstatus "
|
||||
"FROM task_links l JOIN tasks t ON t.id = l.child_id"
|
||||
).fetchall():
|
||||
p = progress.setdefault(row["pid"], {"done": 0, "total": 0})
|
||||
p["total"] += 1
|
||||
if row["cstatus"] == "done":
|
||||
p["done"] += 1
|
||||
|
||||
latest_event_id = conn.execute(
|
||||
"SELECT COALESCE(MAX(id), 0) AS m FROM task_events"
|
||||
).fetchone()["m"]
|
||||
|
||||
columns: dict[str, list[dict]] = {c: [] for c in BOARD_COLUMNS}
|
||||
if include_archived:
|
||||
columns["archived"] = []
|
||||
|
||||
for t in tasks:
|
||||
d = _task_dict(t)
|
||||
d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0})
|
||||
d["comment_count"] = comment_counts.get(t.id, 0)
|
||||
d["progress"] = progress.get(t.id) # None when the task has no children
|
||||
col = t.status if t.status in columns else "todo"
|
||||
columns[col].append(d)
|
||||
|
||||
# Stable per-column ordering already applied by list_tasks
|
||||
# (priority DESC, created_at ASC), keep as-is.
|
||||
|
||||
# List of known tenants for the UI filter dropdown.
|
||||
tenants = [
|
||||
r["tenant"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT tenant FROM tasks WHERE tenant IS NOT NULL ORDER BY tenant"
|
||||
)
|
||||
]
|
||||
# List of distinct assignees for the lane-by-profile sub-grouping.
|
||||
assignees = [
|
||||
r["assignee"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT assignee FROM tasks WHERE assignee IS NOT NULL "
|
||||
"AND status != 'archived' ORDER BY assignee"
|
||||
)
|
||||
]
|
||||
|
||||
return {
|
||||
"columns": [
|
||||
{"name": name, "tasks": columns[name]} for name in columns.keys()
|
||||
],
|
||||
"tenants": tenants,
|
||||
"assignees": assignees,
|
||||
"latest_event_id": int(latest_event_id),
|
||||
"now": int(time.time()),
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /tasks/:id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}")
|
||||
def get_task(task_id: str):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
return {
|
||||
"task": _task_dict(task),
|
||||
"comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
|
||||
"events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
|
||||
"links": _links_for(conn, task_id),
|
||||
"runs": [_run_dict(r) for r in kanban_db.list_runs(conn, task_id)],
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /tasks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CreateTaskBody(BaseModel):
|
||||
title: str
|
||||
body: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
tenant: Optional[str] = None
|
||||
priority: int = 0
|
||||
workspace_kind: str = "scratch"
|
||||
workspace_path: Optional[str] = None
|
||||
parents: list[str] = Field(default_factory=list)
|
||||
triage: bool = False
|
||||
idempotency_key: Optional[str] = None
|
||||
max_runtime_seconds: Optional[int] = None
|
||||
skills: Optional[list[str]] = None
|
||||
|
||||
|
||||
@router.post("/tasks")
|
||||
def create_task(payload: CreateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task_id = kanban_db.create_task(
|
||||
conn,
|
||||
title=payload.title,
|
||||
body=payload.body,
|
||||
assignee=payload.assignee,
|
||||
created_by="dashboard",
|
||||
workspace_kind=payload.workspace_kind,
|
||||
workspace_path=payload.workspace_path,
|
||||
tenant=payload.tenant,
|
||||
priority=payload.priority,
|
||||
parents=payload.parents,
|
||||
triage=payload.triage,
|
||||
idempotency_key=payload.idempotency_key,
|
||||
max_runtime_seconds=payload.max_runtime_seconds,
|
||||
skills=payload.skills,
|
||||
)
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(task) if task else None}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PATCH /tasks/:id (status / assignee / priority / title / body)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class UpdateTaskBody(BaseModel):
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
priority: Optional[int] = None
|
||||
title: Optional[str] = None
|
||||
body: Optional[str] = None
|
||||
result: Optional[str] = None
|
||||
block_reason: Optional[str] = None
|
||||
# Structured handoff fields — forwarded to complete_task when status
|
||||
# transitions to 'done'. Dashboard parity with ``hermes kanban
|
||||
# complete --summary ... --metadata ...``.
|
||||
summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
@router.patch("/tasks/{task_id}")
|
||||
def update_task(task_id: str, payload: UpdateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
|
||||
# --- assignee ----------------------------------------------------
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
ok = kanban_db.assign_task(
|
||||
conn, task_id, payload.assignee or None,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(status_code=409, detail=str(e))
|
||||
if not ok:
|
||||
raise HTTPException(status_code=404, detail="task not found")
|
||||
|
||||
# --- status -------------------------------------------------------
|
||||
if payload.status is not None:
|
||||
s = payload.status
|
||||
ok = True
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(
|
||||
conn, task_id,
|
||||
result=payload.result,
|
||||
summary=payload.summary,
|
||||
metadata=payload.metadata,
|
||||
)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, task_id, reason=payload.block_reason)
|
||||
elif s == "ready":
|
||||
# Re-open a blocked task, or just an explicit status set.
|
||||
current = kanban_db.get_task(conn, task_id)
|
||||
if current and current.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, task_id)
|
||||
else:
|
||||
# Direct status write for drag-drop (todo -> ready etc).
|
||||
ok = _set_status_direct(conn, task_id, "ready")
|
||||
elif s == "archived":
|
||||
ok = kanban_db.archive_task(conn, task_id)
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, task_id, s)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"unknown status: {s}")
|
||||
if not ok:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"status transition to {s!r} not valid from current state",
|
||||
)
|
||||
|
||||
# --- priority -----------------------------------------------------
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), task_id),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(task_id, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
|
||||
# --- title / body -------------------------------------------------
|
||||
if payload.title is not None or payload.body is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
sets, vals = [], []
|
||||
if payload.title is not None:
|
||||
if not payload.title.strip():
|
||||
raise HTTPException(status_code=400, detail="title cannot be empty")
|
||||
sets.append("title = ?")
|
||||
vals.append(payload.title.strip())
|
||||
if payload.body is not None:
|
||||
sets.append("body = ?")
|
||||
vals.append(payload.body)
|
||||
vals.append(task_id)
|
||||
conn.execute(
|
||||
f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?", vals,
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'edited', NULL, ?)",
|
||||
(task_id, int(time.time())),
|
||||
)
|
||||
|
||||
updated = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(updated) if updated else None}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _set_status_direct(
|
||||
conn: sqlite3.Connection, task_id: str, new_status: str,
|
||||
) -> bool:
|
||||
"""Direct status write for drag-drop moves that aren't covered by the
|
||||
structured complete/block/unblock/archive verbs (e.g. todo<->ready,
|
||||
running<->ready). Appends a ``status`` event row for the live feed.
|
||||
|
||||
When this transitions OFF ``running`` to anything other than the
|
||||
terminal verbs above (which own their own run closing), we close the
|
||||
active run with outcome='reclaimed' so attempt history isn't
|
||||
orphaned. ``running -> ready`` via drag-drop is the common case
|
||||
(user yanking a stuck worker back to the queue).
|
||||
"""
|
||||
with kanban_db.write_txn(conn):
|
||||
# Snapshot current state so we know whether to close a run.
|
||||
prev = conn.execute(
|
||||
"SELECT status, current_run_id FROM tasks WHERE id = ?",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
if prev is None:
|
||||
return False
|
||||
was_running = prev["status"] == "running"
|
||||
|
||||
cur = conn.execute(
|
||||
"UPDATE tasks SET status = ?, "
|
||||
" claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
|
||||
" claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
|
||||
" worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
|
||||
"WHERE id = ?",
|
||||
(new_status, new_status, new_status, new_status, task_id),
|
||||
)
|
||||
if cur.rowcount != 1:
|
||||
return False
|
||||
run_id = None
|
||||
if was_running and new_status != "running" and prev["current_run_id"]:
|
||||
run_id = kanban_db._end_run(
|
||||
conn, task_id,
|
||||
outcome="reclaimed", status="reclaimed",
|
||||
summary=f"status changed to {new_status} (dashboard/direct)",
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
|
||||
"VALUES (?, ?, 'status', ?, ?)",
|
||||
(task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
|
||||
)
|
||||
# If we re-opened something, children may have gone stale.
|
||||
if new_status in ("done", "ready"):
|
||||
kanban_db.recompute_ready(conn)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CommentBody(BaseModel):
|
||||
body: str
|
||||
author: Optional[str] = "dashboard"
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/comments")
|
||||
def add_comment(task_id: str, payload: CommentBody):
|
||||
if not payload.body.strip():
|
||||
raise HTTPException(status_code=400, detail="body is required")
|
||||
conn = _conn()
|
||||
try:
|
||||
if kanban_db.get_task(conn, task_id) is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
kanban_db.add_comment(
|
||||
conn, task_id, author=payload.author or "dashboard", body=payload.body,
|
||||
)
|
||||
return {"ok": True}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class LinkBody(BaseModel):
|
||||
parent_id: str
|
||||
child_id: str
|
||||
|
||||
|
||||
@router.post("/links")
|
||||
def add_link(payload: LinkBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
kanban_db.link_tasks(conn, payload.parent_id, payload.child_id)
|
||||
return {"ok": True}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.delete("/links")
|
||||
def delete_link(parent_id: str = Query(...), child_id: str = Query(...)):
|
||||
conn = _conn()
|
||||
try:
|
||||
ok = kanban_db.unlink_tasks(conn, parent_id, child_id)
|
||||
return {"ok": bool(ok)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk actions (multi-select on the board)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BulkTaskBody(BaseModel):
|
||||
ids: list[str]
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None # "" or None = unassign
|
||||
priority: Optional[int] = None
|
||||
archive: bool = False
|
||||
|
||||
|
||||
@router.post("/tasks/bulk")
|
||||
def bulk_update(payload: BulkTaskBody):
|
||||
"""Apply the same patch to every id in ``payload.ids``.
|
||||
|
||||
This is an *independent* iteration — per-task failures don't abort
|
||||
siblings. Returns per-id outcome so the UI can surface partials.
|
||||
"""
|
||||
ids = [i for i in (payload.ids or []) if i]
|
||||
if not ids:
|
||||
raise HTTPException(status_code=400, detail="ids is required")
|
||||
results: list[dict] = []
|
||||
conn = _conn()
|
||||
try:
|
||||
for tid in ids:
|
||||
entry: dict[str, Any] = {"id": tid, "ok": True}
|
||||
try:
|
||||
task = kanban_db.get_task(conn, tid)
|
||||
if task is None:
|
||||
entry.update(ok=False, error="not found")
|
||||
results.append(entry)
|
||||
continue
|
||||
if payload.archive:
|
||||
if not kanban_db.archive_task(conn, tid):
|
||||
entry.update(ok=False, error="archive refused")
|
||||
if payload.status is not None and not payload.archive:
|
||||
s = payload.status
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(conn, tid)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, tid)
|
||||
elif s == "ready":
|
||||
cur = kanban_db.get_task(conn, tid)
|
||||
if cur and cur.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, tid)
|
||||
else:
|
||||
ok = _set_status_direct(conn, tid, "ready")
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, tid, s)
|
||||
else:
|
||||
entry.update(ok=False, error=f"unknown status {s!r}")
|
||||
results.append(entry)
|
||||
continue
|
||||
if not ok:
|
||||
entry.update(ok=False, error=f"transition to {s!r} refused")
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
if not kanban_db.assign_task(
|
||||
conn, tid, payload.assignee or None,
|
||||
):
|
||||
entry.update(ok=False, error="assign refused")
|
||||
except RuntimeError as e:
|
||||
entry.update(ok=False, error=str(e))
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), tid),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(tid, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
except Exception as e: # defensive — one bad id shouldn't kill the batch
|
||||
entry.update(ok=False, error=str(e))
|
||||
results.append(entry)
|
||||
return {"results": results}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin config (read dashboard.kanban.* defaults from config.yaml)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/config")
|
||||
def get_config():
|
||||
"""Return kanban dashboard preferences from ~/.hermes/config.yaml.
|
||||
|
||||
Reads the ``dashboard.kanban`` section if present; defaults otherwise.
|
||||
Used by the UI to pre-select tenant filters, toggle markdown rendering,
|
||||
or set column-width preferences without a round-trip per page load.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
dash_cfg = (cfg.get("dashboard") or {})
|
||||
# dashboard.kanban may itself be a dict; fall back to {}.
|
||||
k_cfg = dash_cfg.get("kanban") or {}
|
||||
return {
|
||||
"default_tenant": k_cfg.get("default_tenant") or "",
|
||||
"lane_by_profile": bool(k_cfg.get("lane_by_profile", True)),
|
||||
"include_archived_by_default": bool(k_cfg.get("include_archived_by_default", False)),
|
||||
"render_markdown": bool(k_cfg.get("render_markdown", True)),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stats (per-profile / per-status counts + oldest-ready age)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/stats")
|
||||
def get_stats():
|
||||
"""Per-status + per-assignee counts + oldest-ready age.
|
||||
|
||||
Designed for the dashboard HUD and for router profiles that need to
|
||||
answer "is this specialist overloaded?" without scanning the whole
|
||||
board themselves.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return kanban_db.board_stats(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.get("/assignees")
|
||||
def get_assignees():
|
||||
"""Known profiles + per-profile task counts.
|
||||
|
||||
Returns the union of ``~/.hermes/profiles/*`` on disk and every
|
||||
distinct assignee currently used on the board. The dashboard uses
|
||||
this to populate its assignee dropdown so a freshly-created profile
|
||||
appears in the picker before it's been given any task.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return {"assignees": kanban_db.known_assignees(conn)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Worker log (read-only; file written by _default_spawn)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}/log")
|
||||
def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)):
|
||||
"""Return the worker's stdout/stderr log.
|
||||
|
||||
``tail`` caps the response size (bytes) so the dashboard drawer
|
||||
doesn't paginate megabytes into the browser. Returns 404 if the task
|
||||
has never spawned. The on-disk log is rotated at 2 MiB per
|
||||
``_rotate_worker_log`` — a single ``.log.1`` is kept, no further
|
||||
generations, so disk usage per task is bounded at ~4 MiB.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
finally:
|
||||
conn.close()
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
content = kanban_db.read_worker_log(task_id, tail_bytes=tail)
|
||||
log_path = kanban_db.worker_log_path(task_id)
|
||||
size = log_path.stat().st_size if log_path.exists() else 0
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"path": str(log_path),
|
||||
"exists": content is not None,
|
||||
"size_bytes": size,
|
||||
"content": content or "",
|
||||
# Truncated when the on-disk file was larger than the tail cap.
|
||||
"truncated": bool(tail and size > tail),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch nudge (optional quick-path so the UI doesn't wait 60 s)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/dispatch")
|
||||
def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")):
|
||||
conn = _conn()
|
||||
try:
|
||||
result = kanban_db.dispatch_once(
|
||||
conn, dry_run=dry_run, max_spawn=max_n,
|
||||
)
|
||||
# DispatchResult is a dataclass.
|
||||
try:
|
||||
return asdict(result)
|
||||
except TypeError:
|
||||
return {"result": str(result)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebSocket: /events?since=<event_id>
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Poll interval for the event tail loop. SQLite WAL + 300 ms polling is
|
||||
# the simplest and most robust approach; it adds a fraction of a percent
|
||||
# of CPU and has no shared state to synchronize across workers.
|
||||
_EVENT_POLL_SECONDS = 0.3
|
||||
|
||||
|
||||
@router.websocket("/events")
|
||||
async def stream_events(ws: WebSocket):
|
||||
# Enforce the dashboard session token as a query param — browsers can't
|
||||
# set Authorization on a WS upgrade. This matches how the PTY bridge
|
||||
# authenticates in hermes_cli/web_server.py.
|
||||
token = ws.query_params.get("token")
|
||||
if not _check_ws_token(token):
|
||||
await ws.close(code=http_status.WS_1008_POLICY_VIOLATION)
|
||||
return
|
||||
await ws.accept()
|
||||
try:
|
||||
since_raw = ws.query_params.get("since", "0")
|
||||
try:
|
||||
cursor = int(since_raw)
|
||||
except ValueError:
|
||||
cursor = 0
|
||||
|
||||
def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]:
|
||||
conn = kanban_db.connect()
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT id, task_id, run_id, kind, payload, created_at "
|
||||
"FROM task_events WHERE id > ? ORDER BY id ASC LIMIT 200",
|
||||
(cursor_val,),
|
||||
).fetchall()
|
||||
out: list[dict] = []
|
||||
new_cursor = cursor_val
|
||||
for r in rows:
|
||||
try:
|
||||
payload = json.loads(r["payload"]) if r["payload"] else None
|
||||
except Exception:
|
||||
payload = None
|
||||
out.append({
|
||||
"id": r["id"],
|
||||
"task_id": r["task_id"],
|
||||
"run_id": r["run_id"],
|
||||
"kind": r["kind"],
|
||||
"payload": payload,
|
||||
"created_at": r["created_at"],
|
||||
})
|
||||
new_cursor = r["id"]
|
||||
return new_cursor, out
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
while True:
|
||||
cursor, events = await asyncio.to_thread(_fetch_new, cursor)
|
||||
if events:
|
||||
await ws.send_json({"events": events, "cursor": cursor})
|
||||
await asyncio.sleep(_EVENT_POLL_SECONDS)
|
||||
except WebSocketDisconnect:
|
||||
return
|
||||
except Exception as exc: # defensive: never crash the dashboard worker
|
||||
log.warning("Kanban event stream error: %s", exc)
|
||||
try:
|
||||
await ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -0,0 +1,17 @@
|
||||
[Unit]
|
||||
Description=Hermes Kanban dispatcher (hermes kanban daemon)
|
||||
Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/env hermes kanban daemon --interval 60 --pidfile %t/hermes-kanban-dispatcher.pid
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
# Log to the journal via stdout/stderr; the dispatcher also writes per-task
|
||||
# worker output to $HERMES_HOME/kanban/logs/<task>.log.
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
+10
-12
@@ -86,6 +86,7 @@ from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||
KANBAN_GUIDANCE,
|
||||
build_nous_subscription_prompt,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
@@ -3304,19 +3305,10 @@ class AIAgent:
|
||||
logger.warning("Background memory/skill review failed: %s", e)
|
||||
self._emit_auxiliary_failure("background review", e)
|
||||
finally:
|
||||
# Background review agents can initialize memory providers
|
||||
# (for example Hindsight) that own their own network clients.
|
||||
# Explicitly stop those providers before closing the agent so
|
||||
# their aiohttp sessions do not leak until GC/process exit.
|
||||
# Then close all remaining resources (httpx client,
|
||||
# subprocesses, etc.) so GC doesn't try to clean them up on a
|
||||
# dead asyncio event loop (which produces "Event loop is
|
||||
# closed" errors).
|
||||
# Close all resources (httpx client, subprocesses, etc.) so
|
||||
# GC doesn't try to clean them up on a dead asyncio event
|
||||
# loop (which produces "Event loop is closed" errors).
|
||||
if review_agent is not None:
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
@@ -4506,6 +4498,12 @@ class AIAgent:
|
||||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||
if "skill_manage" in self.valid_tool_names:
|
||||
tool_guidance.append(SKILLS_GUIDANCE)
|
||||
# Kanban worker/orchestrator lifecycle — only present when the
|
||||
# dispatcher spawned this process (kanban_show check_fn gates on
|
||||
# HERMES_KANBAN_TASK env var). Normal chat sessions never see
|
||||
# this block.
|
||||
if "kanban_show" in self.valid_tool_names:
|
||||
tool_guidance.append(KANBAN_GUIDANCE)
|
||||
if tool_guidance:
|
||||
prompt_parts.append(" ".join(tool_guidance))
|
||||
|
||||
|
||||
+2
-29
@@ -1055,37 +1055,10 @@ setup_path() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
# FHS layout: /usr/local/bin is normally on PATH for login shells (via
|
||||
# /etc/profile pathmunge), but on RHEL/CentOS/Rocky/Alma 8+ non-login
|
||||
# interactive root shells (su, sudo -s, tmux panes, some web terminals)
|
||||
# only source /etc/bashrc, which does NOT add /usr/local/bin — and
|
||||
# /root/.bash_profile doesn't either. So verify with `command -v` and
|
||||
# fall back to writing a PATH guard into /root/.bashrc when needed.
|
||||
# FHS layout: /usr/local/bin is on PATH for every standard shell, nothing to inject.
|
||||
if [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
export PATH="$command_link_dir:$PATH"
|
||||
# Probe a fresh non-login interactive bash the way the user will use it.
|
||||
# `bash -i -c` sources ~/.bashrc but NOT ~/.bash_profile or /etc/profile,
|
||||
# which is the exact scenario where RHEL root loses /usr/local/bin.
|
||||
if env -i HOME="$HOME" TERM="${TERM:-dumb}" bash -i -c 'command -v hermes' \
|
||||
>/dev/null 2>&1; then
|
||||
log_info "/usr/local/bin is already on PATH for all shells"
|
||||
log_success "hermes command ready"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_info "hermes not on PATH in non-login shells (common on RHEL-family)"
|
||||
PATH_LINE='export PATH="/usr/local/bin:$PATH"'
|
||||
PATH_COMMENT='# Hermes Agent — ensure /usr/local/bin is on PATH (RHEL non-login shells)'
|
||||
for SHELL_CONFIG in "$HOME/.bashrc" "$HOME/.bash_profile"; do
|
||||
[ -f "$SHELL_CONFIG" ] || continue
|
||||
if ! grep -v '^[[:space:]]*#' "$SHELL_CONFIG" 2>/dev/null \
|
||||
| grep -qE 'PATH=.*(/usr/local/bin|\$command_link_dir)'; then
|
||||
echo "" >> "$SHELL_CONFIG"
|
||||
echo "$PATH_COMMENT" >> "$SHELL_CONFIG"
|
||||
echo "$PATH_LINE" >> "$SHELL_CONFIG"
|
||||
log_success "Added /usr/local/bin to PATH in $SHELL_CONFIG"
|
||||
fi
|
||||
done
|
||||
log_info "/usr/local/bin is already on PATH for all shells"
|
||||
log_success "hermes command ready"
|
||||
return 0
|
||||
fi
|
||||
|
||||
@@ -70,8 +70,6 @@ AUTHOR_MAP = {
|
||||
"keira.voss94@gmail.com": "keiravoss94",
|
||||
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"fqsy1416@gmail.com": "EKKOLearnAI",
|
||||
"octo-patch@github.com": "octo-patch",
|
||||
"math0r-be@github.com": "math0r-be",
|
||||
"simbamax99@gmail.com": "simbam99",
|
||||
"iris@growthpillars.co": "irispillars",
|
||||
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
@@ -118,15 +116,9 @@ AUTHOR_MAP = {
|
||||
"Mibayy@users.noreply.github.com": "Mibayy",
|
||||
"mibayy@users.noreply.github.com": "Mibayy",
|
||||
"135070653+sgaofen@users.noreply.github.com": "sgaofen",
|
||||
"lzy.dev@gmail.com": "zhiyanliu",
|
||||
"me@janstepanovsky.cz": "hhhonzik",
|
||||
"139848623+hhuang91@users.noreply.github.com": "hhuang91",
|
||||
"s.ozaki@ebinou.net": "Satoshi-agi",
|
||||
"10774721+kunlabs@users.noreply.github.com": "kunlabs",
|
||||
"nocoo@users.noreply.github.com": "nocoo",
|
||||
"30841158+n-WN@users.noreply.github.com": "n-WN",
|
||||
"tsuijinglei@gmail.com": "hiddenpuppy",
|
||||
"buraysandro9@gmail.com": "ygd58",
|
||||
"jerome@clawwork.ai": "HiddenPuppy",
|
||||
"jerome.benoit@sap.com": "jerome-benoit",
|
||||
"wysie@users.noreply.github.com": "Wysie",
|
||||
@@ -199,7 +191,6 @@ AUTHOR_MAP = {
|
||||
"satelerd@gmail.com": "satelerd",
|
||||
"dan@danlynn.com": "danklynn",
|
||||
"mattmaximo@hotmail.com": "MattMaximo",
|
||||
"MatthewRHardwick@gmail.com": "mrhwick",
|
||||
"149063006+j3ffffff@users.noreply.github.com": "j3ffffff",
|
||||
"A-FdL-Prog@users.noreply.github.com": "A-FdL-Prog",
|
||||
"l0hde@users.noreply.github.com": "l0hde",
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
---
|
||||
name: kanban-orchestrator
|
||||
description: Decomposition playbook + specialist-roster conventions + anti-temptation rules for an orchestrator profile routing work through Kanban. The "don't do the work yourself" rule and the basic lifecycle are auto-injected into every kanban worker's system prompt; this skill is the deeper playbook when you're specifically playing the orchestrator role.
|
||||
version: 2.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [kanban, multi-agent, orchestration, routing]
|
||||
related_skills: [kanban-worker]
|
||||
---
|
||||
|
||||
# Kanban Orchestrator — Decomposition Playbook
|
||||
|
||||
> The **core worker lifecycle** (including the `kanban_create` fan-out pattern and the "decompose, don't execute" rule) is auto-injected into every kanban process via the `KANBAN_GUIDANCE` system-prompt block. This skill is the deeper playbook when you're an orchestrator profile whose whole job is routing.
|
||||
|
||||
## When to use the board (vs. just doing the work)
|
||||
|
||||
Create Kanban tasks when any of these are true:
|
||||
|
||||
1. **Multiple specialists are needed.** Research + analysis + writing is three profiles.
|
||||
2. **The work should survive a crash or restart.** Long-running, recurring, or important.
|
||||
3. **The user might want to interject.** Human-in-the-loop at any step.
|
||||
4. **Multiple subtasks can run in parallel.** Fan-out for speed.
|
||||
5. **Review / iteration is expected.** A reviewer profile loops on drafter output.
|
||||
6. **The audit trail matters.** Board rows persist in SQLite forever.
|
||||
|
||||
If *none* of those apply — it's a small one-shot reasoning task — use `delegate_task` instead or answer the user directly.
|
||||
|
||||
## The anti-temptation rules
|
||||
|
||||
Your job description says "route, don't execute." The rules that enforce that:
|
||||
|
||||
- **Do not execute the work yourself.** Your restricted toolset usually doesn't even include terminal/file/code/web for implementation. If you find yourself "just fixing this quickly" — stop and create a task for the right specialist.
|
||||
- **For any concrete task, create a Kanban task and assign it.** Every single time.
|
||||
- **If no specialist fits, ask the user which profile to create.** Do not default to doing it yourself under "close enough."
|
||||
- **Decompose, route, and summarize — that's the whole job.**
|
||||
|
||||
## The standard specialist roster (convention)
|
||||
|
||||
Unless the user's setup has customized profiles, assume these exist. Adjust to whatever the user actually has — ask if you're unsure.
|
||||
|
||||
| Profile | Does | Typical workspace |
|
||||
|---|---|---|
|
||||
| `researcher` | Reads sources, gathers facts, writes findings | `scratch` |
|
||||
| `analyst` | Synthesizes, ranks, de-dupes. Consumes multiple `researcher` outputs | `scratch` |
|
||||
| `writer` | Drafts prose in the user's voice | `scratch` or `dir:` into their Obsidian vault |
|
||||
| `reviewer` | Reads output, leaves findings, gates approval | `scratch` |
|
||||
| `backend-eng` | Writes server-side code | `worktree` |
|
||||
| `frontend-eng` | Writes client-side code | `worktree` |
|
||||
| `ops` | Runs scripts, manages services, handles deployments | `dir:` into ops scripts repo |
|
||||
| `pm` | Writes specs, acceptance criteria | `scratch` |
|
||||
|
||||
## Decomposition playbook
|
||||
|
||||
### Step 1 — Understand the goal
|
||||
|
||||
Ask clarifying questions if the goal is ambiguous. Cheap to ask; expensive to spawn the wrong fleet.
|
||||
|
||||
### Step 2 — Sketch the task graph
|
||||
|
||||
Before creating anything, draft the graph out loud (in your response to the user). Example for "Analyze whether we should migrate to Postgres":
|
||||
|
||||
```
|
||||
T1 researcher research: Postgres cost vs current
|
||||
T2 researcher research: Postgres performance vs current
|
||||
T3 analyst synthesize migration recommendation parents: T1, T2
|
||||
T4 writer draft decision memo parents: T3
|
||||
```
|
||||
|
||||
Show this to the user. Let them correct it before you create anything.
|
||||
|
||||
### Step 3 — Create tasks and link
|
||||
|
||||
```python
|
||||
t1 = kanban_create(
|
||||
title="research: Postgres cost vs current",
|
||||
assignee="researcher",
|
||||
body="Compare estimated infrastructure costs, migration costs, and ongoing ops costs over a 3-year window. Sources: AWS/GCP pricing, team time estimates, current Postgres bills from peers.",
|
||||
tenant=os.environ.get("HERMES_TENANT"),
|
||||
)["task_id"]
|
||||
|
||||
t2 = kanban_create(
|
||||
title="research: Postgres performance vs current",
|
||||
assignee="researcher",
|
||||
body="Compare query latency, throughput, and scaling characteristics at our expected data volume (~500GB, 10k QPS peak). Sources: benchmark papers, public case studies, pgbench results if easy.",
|
||||
)["task_id"]
|
||||
|
||||
t3 = kanban_create(
|
||||
title="synthesize migration recommendation",
|
||||
assignee="analyst",
|
||||
body="Read the findings from T1 (cost) and T2 (performance). Produce a 1-page recommendation with explicit trade-offs and a go/no-go call.",
|
||||
parents=[t1, t2],
|
||||
)["task_id"]
|
||||
|
||||
t4 = kanban_create(
|
||||
title="draft decision memo",
|
||||
assignee="writer",
|
||||
body="Turn the analyst's recommendation into a 2-page memo for the CTO. Match the tone of previous decision memos in the team's knowledge base.",
|
||||
parents=[t3],
|
||||
)["task_id"]
|
||||
```
|
||||
|
||||
`parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
|
||||
|
||||
### Step 4 — Complete your own task
|
||||
|
||||
If you were spawned as a task yourself (e.g. `planner` profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:
|
||||
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="decomposed into T1-T4: 2 researchers parallel, 1 analyst on their outputs, 1 writer on the recommendation",
|
||||
metadata={
|
||||
"task_graph": {
|
||||
"T1": {"assignee": "researcher", "parents": []},
|
||||
"T2": {"assignee": "researcher", "parents": []},
|
||||
"T3": {"assignee": "analyst", "parents": ["T1", "T2"]},
|
||||
"T4": {"assignee": "writer", "parents": ["T3"]},
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
### Step 5 — Report back to the user
|
||||
|
||||
Tell them what you created in plain prose:
|
||||
|
||||
> I've queued 4 tasks:
|
||||
> - **T1** (researcher): cost comparison
|
||||
> - **T2** (researcher): performance comparison, in parallel with T1
|
||||
> - **T3** (analyst): synthesizes T1 + T2 into a recommendation
|
||||
> - **T4** (writer): turns T3 into a CTO memo
|
||||
>
|
||||
> The dispatcher will pick up T1 and T2 now. T3 starts when both finish. You'll get a gateway ping when T4 completes. Use the dashboard or `hermes kanban tail <id>` to follow along.
|
||||
|
||||
## Common patterns
|
||||
|
||||
**Fan-out + fan-in (research → synthesize):** N `researcher` tasks with no parents, one `analyst` task with all of them as parents.
|
||||
|
||||
**Pipeline with gates:** `pm → backend-eng → reviewer`. Each stage's `parents=[previous_task]`. Reviewer blocks or completes; if reviewer blocks, the operator unblocks with feedback and respawns.
|
||||
|
||||
**Same-profile queue:** 50 tasks, all assigned to `translator`, no dependencies between them. Dispatcher serializes — translator processes them in priority order, accumulating experience in their own memory.
|
||||
|
||||
**Human-in-the-loop:** Any task can `kanban_block()` to wait for input. Dispatcher respawns after `/unblock`. The comment thread carries the full context.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
**Reassignment vs. new task.** If a reviewer blocks with "needs changes," create a NEW task linked from the reviewer's task — don't re-run the same task with a stern look. The new task is assigned to the original implementer profile.
|
||||
|
||||
**Argument order for links.** `kanban_link(parent_id=..., child_id=...)` — parent first. Mixing them up demotes the wrong task to `todo`.
|
||||
|
||||
**Don't pre-create the whole graph if the shape depends on intermediate findings.** If T3's structure depends on what T1 and T2 find, let T3 exist as a "synthesize findings" task whose own first step is to read parent handoffs and plan the rest. Orchestrators can spawn orchestrators.
|
||||
|
||||
**Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace.
|
||||
@@ -0,0 +1,134 @@
|
||||
---
|
||||
name: kanban-worker
|
||||
description: Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itself is auto-injected into every worker's system prompt as KANBAN_GUIDANCE (from agent/prompt_builder.py); this skill is what you load when you want deeper detail on specific scenarios.
|
||||
version: 2.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [kanban, multi-agent, collaboration, workflow, pitfalls]
|
||||
related_skills: [kanban-orchestrator]
|
||||
---
|
||||
|
||||
# Kanban Worker — Pitfalls and Examples
|
||||
|
||||
> You're seeing this skill because the Hermes Kanban dispatcher spawned you as a worker with `--skills kanban-worker` — it's loaded automatically for every dispatched worker. The **lifecycle** (6 steps: orient → work → heartbeat → block/complete) also lives in the `KANBAN_GUIDANCE` block that's auto-injected into your system prompt. This skill is the deeper detail: good handoff shapes, retry diagnostics, edge cases.
|
||||
|
||||
## Workspace handling
|
||||
|
||||
Your workspace kind determines how you should behave inside `$HERMES_KANBAN_WORKSPACE`:
|
||||
|
||||
| Kind | What it is | How to work |
|
||||
|---|---|---|
|
||||
| `scratch` | Fresh tmp dir, yours alone | Read/write freely; it gets GC'd when the task is archived. |
|
||||
| `dir:<path>` | Shared persistent directory | Other runs will read what you write. Treat it like long-lived state. Path is guaranteed absolute (the kernel rejects relative paths). |
|
||||
| `worktree` | Git worktree at the resolved path | If `.git` doesn't exist, run `git worktree add <path> <branch>` from the main repo first, then cd and work normally. Commit work here. |
|
||||
|
||||
## Tenant isolation
|
||||
|
||||
If `$HERMES_TENANT` is set, the task belongs to a tenant namespace. When reading or writing persistent memory, prefix memory entries with the tenant so context doesn't leak across tenants:
|
||||
|
||||
- Good: `business-a: Acme is our biggest customer`
|
||||
- Bad (leaks): `Acme is our biggest customer`
|
||||
|
||||
## Good summary + metadata shapes
|
||||
|
||||
The `kanban_complete(summary=..., metadata=...)` handoff is how downstream workers read what you did. Patterns that work:
|
||||
|
||||
**Coding task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="shipped rate limiter — token bucket, keys on user_id with IP fallback, 14 tests pass",
|
||||
metadata={
|
||||
"changed_files": ["rate_limiter.py", "tests/test_rate_limiter.py"],
|
||||
"tests_run": 14,
|
||||
"tests_passed": 14,
|
||||
"decisions": ["user_id primary, IP fallback for unauthenticated requests"],
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**Research task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="3 competing libraries reviewed; vLLM wins on throughput, SGLang on latency, Tensorrt-LLM on memory efficiency",
|
||||
metadata={
|
||||
"sources_read": 12,
|
||||
"recommendation": "vLLM",
|
||||
"benchmarks": {"vllm": 1.0, "sglang": 0.87, "trtllm": 0.72},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**Review task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="reviewed PR #123; 2 blocking issues found (SQL injection in /search, missing CSRF on /settings)",
|
||||
metadata={
|
||||
"pr_number": 123,
|
||||
"findings": [
|
||||
{"severity": "critical", "file": "api/search.py", "line": 42, "issue": "raw SQL concat"},
|
||||
{"severity": "high", "file": "api/settings.py", "issue": "missing CSRF middleware"},
|
||||
],
|
||||
"approved": False,
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
Shape `metadata` so downstream parsers (reviewers, aggregators, schedulers) can use it without re-reading your prose.
|
||||
|
||||
## Block reasons that get answered fast
|
||||
|
||||
Bad: `"stuck"` — the human has no context.
|
||||
|
||||
Good: one sentence naming the specific decision you need. Leave longer context as a comment instead.
|
||||
|
||||
```python
|
||||
kanban_comment(
|
||||
task_id=os.environ["HERMES_KANBAN_TASK"],
|
||||
body="Full context: I have user IPs from Cloudflare headers but some users are behind NATs with thousands of peers. Keying on IP alone causes false positives.",
|
||||
)
|
||||
kanban_block(reason="Rate limit key choice: IP (simple, NAT-unsafe) or user_id (requires auth, skips anonymous endpoints)?")
|
||||
```
|
||||
|
||||
The block message is what appears in the dashboard / gateway notifier. The comment is the deeper context a human reads when they open the task.
|
||||
|
||||
## Heartbeats worth sending
|
||||
|
||||
Good heartbeats name progress: `"epoch 12/50, loss 0.31"`, `"scanned 1.2M/2.4M rows"`, `"uploaded 47/120 videos"`.
|
||||
|
||||
Bad heartbeats: `"still working"`, empty notes, sub-second intervals. Every few minutes max; skip entirely for tasks under ~2 minutes.
|
||||
|
||||
## Retry scenarios
|
||||
|
||||
If you open the task and `kanban_show` returns `runs: [...]` with one or more closed runs, you're a retry. The prior runs' `outcome` / `summary` / `error` tell you what didn't work. Don't repeat that path. Typical retry diagnostics:
|
||||
|
||||
- `outcome: "timed_out"` — the previous attempt hit `max_runtime_seconds`. You may need to chunk the work or shorten it.
|
||||
- `outcome: "crashed"` — OOM or segfault. Reduce memory footprint.
|
||||
- `outcome: "spawn_failed"` + `error: "..."` — usually a profile config issue (missing credential, bad PATH). Ask the human via `kanban_block` instead of retrying blindly.
|
||||
- `outcome: "reclaimed"` + `summary: "task archived..."` — operator archived the task out from under the previous run; you probably shouldn't be running at all, check status carefully.
|
||||
- `outcome: "blocked"` — a previous attempt blocked; the unblock comment should be in the thread by now.
|
||||
|
||||
## Do NOT
|
||||
|
||||
- Call `delegate_task` as a substitute for `kanban_create`. `delegate_task` is for short reasoning subtasks inside YOUR run; `kanban_create` is for cross-agent handoffs that outlive one API loop.
|
||||
- Modify files outside `$HERMES_KANBAN_WORKSPACE` unless the task body says to.
|
||||
- Create follow-up tasks assigned to yourself — assign to the right specialist.
|
||||
- Complete a task you didn't actually finish. Block it instead.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
**Task state can change between dispatch and your startup.** Between when the dispatcher claimed and when your process actually booted, the task may have been blocked, reassigned, or archived. Always `kanban_show` first. If it reports `blocked` or `archived`, stop — you shouldn't be running.
|
||||
|
||||
**Workspace may have stale artifacts.** Especially `dir:` and `worktree` workspaces can have files from previous runs. Read the comment thread — it usually explains why you're running again and what state the workspace is in.
|
||||
|
||||
**Don't rely on the CLI when the guidance is available.** The `kanban_*` tools work across all terminal backends (Docker, Modal, SSH). `hermes kanban <verb>` from your terminal tool will fail in containerized backends because the CLI isn't installed there. When in doubt, use the tool.
|
||||
|
||||
## CLI fallback (for scripting)
|
||||
|
||||
Every tool has a CLI equivalent for human operators and scripts:
|
||||
- `kanban_show` ↔ `hermes kanban show <id> --json`
|
||||
- `kanban_complete` ↔ `hermes kanban complete <id> --summary "..." --metadata '{...}'`
|
||||
- `kanban_block` ↔ `hermes kanban block <id> "reason"`
|
||||
- `kanban_create` ↔ `hermes kanban create "title" --assignee <profile> [--parent <id>]`
|
||||
- etc.
|
||||
|
||||
Use the tools from inside an agent; the CLI exists for the human at the terminal.
|
||||
@@ -0,0 +1,3 @@
|
||||
---
|
||||
description: Skills for monitoring, aggregating, and processing RSS feeds, blogs, and web content sources.
|
||||
---
|
||||
@@ -160,30 +160,6 @@ class TestBranchCommandCLI:
|
||||
assert agent.reset_session_state.called
|
||||
assert agent._last_flushed_db_idx == 4 # len(conversation_history)
|
||||
|
||||
def test_branch_updates_agent_session_log_file(self, cli_instance, session_db, tmp_path):
|
||||
"""Branching must redirect the agent's session_log_file to the new session's path."""
|
||||
from cli import HermesCLI
|
||||
from pathlib import Path
|
||||
|
||||
logs_dir = tmp_path / "sessions"
|
||||
logs_dir.mkdir()
|
||||
|
||||
agent = MagicMock()
|
||||
agent._last_flushed_db_idx = 0
|
||||
agent.logs_dir = logs_dir
|
||||
agent.session_log_file = logs_dir / f"session_{cli_instance.session_id}.json"
|
||||
cli_instance.agent = agent
|
||||
|
||||
old_log_file = agent.session_log_file
|
||||
HermesCLI._handle_branch_command(cli_instance, "/branch")
|
||||
|
||||
new_session_id = cli_instance.session_id
|
||||
expected_log = logs_dir / f"session_{new_session_id}.json"
|
||||
assert agent.session_log_file == expected_log, (
|
||||
"session_log_file must point to the branch session, not the original"
|
||||
)
|
||||
assert agent.session_log_file != old_log_file
|
||||
|
||||
def test_branch_sets_resumed_flag(self, cli_instance, session_db):
|
||||
"""Branch should set _resumed=True to prevent auto-title generation."""
|
||||
from cli import HermesCLI
|
||||
|
||||
@@ -31,40 +31,6 @@ def _make_cli_stub():
|
||||
return cli
|
||||
|
||||
|
||||
def _make_background_cli_stub():
|
||||
cli = _make_cli_stub()
|
||||
cli._background_task_counter = 0
|
||||
cli._background_tasks = {}
|
||||
cli._ensure_runtime_credentials = MagicMock(return_value=True)
|
||||
cli._resolve_turn_agent_config = MagicMock(return_value={
|
||||
"model": "test-model",
|
||||
"runtime": {
|
||||
"api_key": "test-key",
|
||||
"base_url": "https://example.test/v1",
|
||||
"provider": "test",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
"request_overrides": None,
|
||||
})
|
||||
cli.max_turns = 90
|
||||
cli.enabled_toolsets = []
|
||||
cli._session_db = None
|
||||
cli.reasoning_config = {}
|
||||
cli.service_tier = None
|
||||
cli._providers_only = None
|
||||
cli._providers_ignore = None
|
||||
cli._providers_order = None
|
||||
cli._provider_sort = None
|
||||
cli._provider_require_params = None
|
||||
cli._provider_data_collection = None
|
||||
cli._fallback_model = None
|
||||
cli._agent_running = False
|
||||
cli._spinner_text = ""
|
||||
cli.bell_on_complete = False
|
||||
cli.final_response_markdown = "strip"
|
||||
return cli
|
||||
|
||||
|
||||
class TestCliApprovalUi:
|
||||
def test_sudo_prompt_restores_existing_draft_after_response(self):
|
||||
cli = _make_cli_stub()
|
||||
@@ -289,54 +255,6 @@ class TestCliApprovalUi:
|
||||
# Command got truncated with a marker.
|
||||
assert "(command truncated" in rendered
|
||||
|
||||
def test_background_task_registers_thread_local_approval_callbacks(self):
|
||||
"""Background /btw tasks must use the prompt_toolkit approval UI.
|
||||
|
||||
The foreground chat path registers dangerous-command callbacks inside
|
||||
its worker thread because tools.terminal_tool stores them in
|
||||
threading.local(). /background used to skip that, so dangerous commands
|
||||
fell back to raw input() in a background thread and timed out under
|
||||
prompt_toolkit.
|
||||
"""
|
||||
cli = _make_background_cli_stub()
|
||||
seen = {}
|
||||
|
||||
class FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
self._print_fn = None
|
||||
self.thinking_callback = None
|
||||
|
||||
def run_conversation(self, **kwargs):
|
||||
from tools.terminal_tool import (
|
||||
_get_approval_callback,
|
||||
_get_sudo_password_callback,
|
||||
)
|
||||
|
||||
seen["approval"] = _get_approval_callback()
|
||||
seen["sudo"] = _get_sudo_password_callback()
|
||||
return {
|
||||
"final_response": "done",
|
||||
"messages": [],
|
||||
"completed": True,
|
||||
"failed": False,
|
||||
}
|
||||
|
||||
with patch.object(cli_module, "AIAgent", FakeAgent), \
|
||||
patch.object(cli_module, "_cprint"), \
|
||||
patch.object(cli_module, "ChatConsole") as chat_console:
|
||||
chat_console.return_value.print = MagicMock()
|
||||
cli._handle_background_command("/btw check weather")
|
||||
|
||||
deadline = time.time() + 2
|
||||
while cli._background_tasks and time.time() < deadline:
|
||||
time.sleep(0.01)
|
||||
|
||||
assert seen["approval"].__self__ is cli
|
||||
assert seen["approval"].__func__ is HermesCLI._approval_callback
|
||||
assert seen["sudo"].__self__ is cli
|
||||
assert seen["sudo"].__func__ is HermesCLI._sudo_password_callback
|
||||
assert not cli._background_tasks
|
||||
|
||||
|
||||
class TestApprovalCallbackThreadLocalWiring:
|
||||
"""Regression guard for the thread-local callback freeze (#13617 / #13618).
|
||||
|
||||
@@ -211,21 +211,6 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
|
||||
"SIGNAL_ALLOW_ALL_USERS",
|
||||
"EMAIL_ALLOW_ALL_USERS",
|
||||
"SMS_ALLOW_ALL_USERS",
|
||||
# Platform gating — set by load_gateway_config() as a side effect when
|
||||
# a config.yaml is present, so individual test bodies that call the
|
||||
# loader leak these values into later tests on the same xdist worker.
|
||||
# Force-clear on every test setup so the leak can't happen.
|
||||
"SLACK_REQUIRE_MENTION",
|
||||
"SLACK_STRICT_MENTION",
|
||||
"SLACK_FREE_RESPONSE_CHANNELS",
|
||||
"SLACK_ALLOW_BOTS",
|
||||
"SLACK_REACTIONS",
|
||||
"DISCORD_REQUIRE_MENTION",
|
||||
"DISCORD_FREE_RESPONSE_CHANNELS",
|
||||
"TELEGRAM_REQUIRE_MENTION",
|
||||
"WHATSAPP_REQUIRE_MENTION",
|
||||
"DINGTALK_REQUIRE_MENTION",
|
||||
"MATRIX_REQUIRE_MENTION",
|
||||
})
|
||||
|
||||
|
||||
|
||||
@@ -1043,132 +1043,3 @@ class TestAgentCacheIdleResume:
|
||||
new_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
_FAKE_NOW = 10_000.0 # Fixed epoch for deterministic time assertions
|
||||
|
||||
|
||||
class TestCachedAgentInactivityReset:
|
||||
"""Inactivity-clock reset must be gated on _interrupt_depth == 0.
|
||||
|
||||
On interrupt-recursive turns (_interrupt_depth > 0) the clock must
|
||||
keep accumulating so the inactivity watchdog can fire when a turn is
|
||||
stuck in an interrupt loop. Resetting unconditionally prevented the
|
||||
30-min timeout from triggering (#15654). The depth-0 reset is still
|
||||
needed: a session idle for 29 min must not trip the watchdog before
|
||||
the new turn makes its first API call (#9051).
|
||||
"""
|
||||
|
||||
def _fake_agent(self, stale_seconds: float = 1800.0):
|
||||
m = MagicMock()
|
||||
m._last_activity_ts = _FAKE_NOW - stale_seconds
|
||||
m._api_call_count = 10
|
||||
m._last_activity_desc = "previous turn activity"
|
||||
return m
|
||||
|
||||
def test_fresh_turn_resets_idle_clock(self):
|
||||
"""interrupt_depth=0: clock resets so a post-idle turn gets a
|
||||
fresh 30-min inactivity window (guard for #9051)."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent = self._fake_agent(stale_seconds=1800.0)
|
||||
old_ts = agent._last_activity_ts
|
||||
|
||||
with patch("gateway.run.time") as mock_time:
|
||||
mock_time.time.return_value = _FAKE_NOW
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
|
||||
|
||||
assert agent._last_activity_ts == _FAKE_NOW, (
|
||||
"_last_activity_ts was not reset on a fresh turn (interrupt_depth=0)"
|
||||
)
|
||||
assert agent._last_activity_ts > old_ts, (
|
||||
"Stale idle time should be cleared so the new turn gets a fresh window"
|
||||
)
|
||||
|
||||
def test_fresh_turn_resets_desc(self):
|
||||
"""interrupt_depth=0: description is updated to reflect the new turn."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent = self._fake_agent()
|
||||
|
||||
with patch("gateway.run.time") as mock_time:
|
||||
mock_time.time.return_value = _FAKE_NOW
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
|
||||
|
||||
assert agent._last_activity_desc == "starting new turn (cached)"
|
||||
|
||||
def test_interrupt_turn_preserves_idle_clock(self):
|
||||
"""interrupt_depth=1: clock preserved so accumulated stuck-turn
|
||||
idle time is not discarded by an interrupt-recursive re-entry (#15654)."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent = self._fake_agent(stale_seconds=1200.0)
|
||||
old_ts = agent._last_activity_ts
|
||||
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
|
||||
|
||||
assert agent._last_activity_ts == old_ts, (
|
||||
"_last_activity_ts must not be reset on interrupt-recursive turns "
|
||||
"(interrupt_depth>0) — the watchdog needs the accumulated idle time"
|
||||
)
|
||||
|
||||
def test_interrupt_turn_preserves_desc(self):
|
||||
"""interrupt_depth=1: desc preserved — it is semantically paired with ts."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent = self._fake_agent(stale_seconds=1200.0)
|
||||
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
|
||||
|
||||
assert agent._last_activity_desc == "previous turn activity", (
|
||||
"_last_activity_desc must not change on interrupt-recursive turns; "
|
||||
"it describes the activity *at* _last_activity_ts"
|
||||
)
|
||||
|
||||
def test_deep_interrupt_recursion_preserves_idle_clock(self):
|
||||
"""interrupt_depth=MAX-1: clock still preserved at any non-zero depth."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent = self._fake_agent(stale_seconds=600.0)
|
||||
old_ts = agent._last_activity_ts
|
||||
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=4)
|
||||
|
||||
assert agent._last_activity_ts == old_ts
|
||||
|
||||
def test_api_call_count_reset_regardless_of_depth(self):
|
||||
"""_api_call_count is always reset to 0 for the new turn, at any depth."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
agent_fresh = self._fake_agent()
|
||||
agent_interrupted = self._fake_agent()
|
||||
|
||||
with patch("gateway.run.time") as mock_time:
|
||||
mock_time.time.return_value = _FAKE_NOW
|
||||
GatewayRunner._init_cached_agent_for_turn(agent_fresh, interrupt_depth=0)
|
||||
GatewayRunner._init_cached_agent_for_turn(agent_interrupted, interrupt_depth=1)
|
||||
|
||||
assert agent_fresh._api_call_count == 0
|
||||
assert agent_interrupted._api_call_count == 0
|
||||
|
||||
def test_watchdog_accumulation_across_recursive_turns(self):
|
||||
"""Scenario: stuck turn + user interrupt → recursive turn.
|
||||
|
||||
The idle time seen by the watchdog must reflect the full stuck
|
||||
duration, not restart from zero on the recursive re-entry.
|
||||
"""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
STUCK_FOR = 1750.0
|
||||
agent = self._fake_agent(stale_seconds=STUCK_FOR)
|
||||
|
||||
# Simulate: user sees "Still working..." and sends another message.
|
||||
# That triggers an interrupt → _run_agent recurses at depth=1.
|
||||
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
|
||||
|
||||
# Watchdog sees time.time() - _last_activity_ts ≥ STUCK_FOR.
|
||||
idle_secs = _FAKE_NOW - agent._last_activity_ts
|
||||
assert idle_secs >= STUCK_FOR - 1.0, (
|
||||
f"Watchdog would see {idle_secs:.0f}s idle, expected ~{STUCK_FOR}s. "
|
||||
"Inactivity timeout could not fire for a stuck interrupted turn."
|
||||
)
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
"""Tests for gateway/channel_directory.py — channel resolution and display."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import patch
|
||||
|
||||
from gateway.channel_directory import (
|
||||
build_channel_directory,
|
||||
@@ -14,7 +12,6 @@ from gateway.channel_directory import (
|
||||
format_directory_for_display,
|
||||
load_directory,
|
||||
_build_from_sessions,
|
||||
_build_slack,
|
||||
DIRECTORY_PATH,
|
||||
)
|
||||
|
||||
@@ -65,7 +62,7 @@ class TestBuildChannelDirectoryWrites:
|
||||
monkeypatch.setattr(json, "dump", broken_dump)
|
||||
|
||||
with patch("gateway.channel_directory.DIRECTORY_PATH", cache_file):
|
||||
asyncio.run(build_channel_directory({}))
|
||||
build_channel_directory({})
|
||||
result = load_directory()
|
||||
|
||||
assert result == previous
|
||||
@@ -145,21 +142,6 @@ class TestResolveChannelName:
|
||||
with self._setup(tmp_path, platforms):
|
||||
assert resolve_channel_name("telegram", "Coaching Chat / topic 17585") == "-1001:17585"
|
||||
|
||||
def test_id_match_takes_precedence_over_name(self, tmp_path):
|
||||
"""A raw channel ID resolves to itself, even when a different
|
||||
channel happens to be named the same string. Case-sensitive: Slack
|
||||
IDs are uppercase and must not be normalized away."""
|
||||
platforms = {
|
||||
"slack": [
|
||||
{"id": "C0B0QV5434G", "name": "engineering", "type": "channel"},
|
||||
{"id": "C99", "name": "c0b0qv5434g", "type": "channel"},
|
||||
]
|
||||
}
|
||||
with self._setup(tmp_path, platforms):
|
||||
assert resolve_channel_name("slack", "C0B0QV5434G") == "C0B0QV5434G"
|
||||
# Lowercase still falls through to name matching (case-insensitive)
|
||||
assert resolve_channel_name("slack", "c0b0qv5434g") == "C99"
|
||||
|
||||
def test_display_label_with_type_suffix_resolves(self, tmp_path):
|
||||
platforms = {
|
||||
"telegram": [
|
||||
@@ -350,135 +332,3 @@ class TestLookupChannelType:
|
||||
}
|
||||
with self._setup(tmp_path, platforms):
|
||||
assert lookup_channel_type("discord", "300") is None
|
||||
|
||||
|
||||
def _make_slack_adapter(team_clients):
|
||||
"""Build a stand-in for SlackAdapter exposing only ``_team_clients``."""
|
||||
return SimpleNamespace(_team_clients=team_clients)
|
||||
|
||||
|
||||
def _make_slack_client(pages):
|
||||
"""Build an AsyncWebClient mock whose ``users_conversations`` returns pages."""
|
||||
client = MagicMock()
|
||||
client.users_conversations = AsyncMock(side_effect=pages)
|
||||
return client
|
||||
|
||||
|
||||
class TestBuildSlack:
|
||||
"""_build_slack actually calls users.conversations on each workspace client."""
|
||||
|
||||
def test_no_team_clients_falls_back_to_sessions(self, tmp_path):
|
||||
sessions_path = tmp_path / "sessions" / "sessions.json"
|
||||
sessions_path.parent.mkdir(parents=True)
|
||||
sessions_path.write_text(json.dumps({
|
||||
"s1": {"origin": {"platform": "slack", "chat_id": "D123", "chat_name": "Alice"}},
|
||||
}))
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
entries = asyncio.run(_build_slack(_make_slack_adapter({})))
|
||||
|
||||
assert len(entries) == 1
|
||||
assert entries[0]["id"] == "D123"
|
||||
|
||||
def test_lists_channels_from_users_conversations(self, tmp_path):
|
||||
client = _make_slack_client([
|
||||
{
|
||||
"ok": True,
|
||||
"channels": [
|
||||
{"id": "C0B0QV5434G", "name": "engineering", "is_private": False},
|
||||
{"id": "G123ABCDEF", "name": "secret-chat", "is_private": True},
|
||||
],
|
||||
"response_metadata": {},
|
||||
},
|
||||
])
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
|
||||
|
||||
ids = {e["id"] for e in entries}
|
||||
assert ids == {"C0B0QV5434G", "G123ABCDEF"}
|
||||
types = {e["id"]: e["type"] for e in entries}
|
||||
assert types["C0B0QV5434G"] == "channel"
|
||||
assert types["G123ABCDEF"] == "private"
|
||||
client.users_conversations.assert_awaited_once()
|
||||
|
||||
def test_paginates_via_response_metadata_cursor(self, tmp_path):
|
||||
client = _make_slack_client([
|
||||
{
|
||||
"ok": True,
|
||||
"channels": [{"id": "C001", "name": "first", "is_private": False}],
|
||||
"response_metadata": {"next_cursor": "cur1"},
|
||||
},
|
||||
{
|
||||
"ok": True,
|
||||
"channels": [{"id": "C002", "name": "second", "is_private": False}],
|
||||
"response_metadata": {"next_cursor": ""},
|
||||
},
|
||||
])
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
|
||||
|
||||
assert {e["id"] for e in entries} == {"C001", "C002"}
|
||||
assert client.users_conversations.await_count == 2
|
||||
|
||||
def test_per_workspace_error_does_not_block_others(self, tmp_path):
|
||||
bad = MagicMock()
|
||||
bad.users_conversations = AsyncMock(side_effect=RuntimeError("boom"))
|
||||
good = _make_slack_client([
|
||||
{
|
||||
"ok": True,
|
||||
"channels": [{"id": "C999", "name": "ok-channel", "is_private": False}],
|
||||
"response_metadata": {},
|
||||
},
|
||||
])
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
entries = asyncio.run(_build_slack(_make_slack_adapter({"BAD": bad, "GOOD": good})))
|
||||
|
||||
assert {e["id"] for e in entries} == {"C999"}
|
||||
|
||||
def test_session_dms_merged_when_not_in_api_results(self, tmp_path):
|
||||
sessions_path = tmp_path / "sessions" / "sessions.json"
|
||||
sessions_path.parent.mkdir(parents=True)
|
||||
sessions_path.write_text(json.dumps({
|
||||
"s1": {"origin": {"platform": "slack", "chat_id": "D456", "chat_name": "Bob"}},
|
||||
"dup": {"origin": {"platform": "slack", "chat_id": "C001", "chat_name": "first"}},
|
||||
}))
|
||||
client = _make_slack_client([
|
||||
{
|
||||
"ok": True,
|
||||
"channels": [{"id": "C001", "name": "first", "is_private": False}],
|
||||
"response_metadata": {},
|
||||
},
|
||||
])
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
|
||||
|
||||
ids = {e["id"] for e in entries}
|
||||
assert "C001" in ids and "D456" in ids
|
||||
# Channel ID from API should not be duplicated by the session merge
|
||||
assert sum(1 for e in entries if e["id"] == "C001") == 1
|
||||
|
||||
def test_skips_channels_with_no_id_or_name(self, tmp_path):
|
||||
client = _make_slack_client([
|
||||
{
|
||||
"ok": True,
|
||||
"channels": [
|
||||
{"id": "C001", "name": "good", "is_private": False},
|
||||
{"id": "", "name": "no-id"},
|
||||
{"id": "C002"}, # no name (e.g. IM)
|
||||
],
|
||||
"response_metadata": {},
|
||||
},
|
||||
])
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
|
||||
|
||||
assert {e["id"] for e in entries} == {"C001"}
|
||||
|
||||
def test_response_not_ok_breaks_pagination_for_that_workspace(self, tmp_path):
|
||||
client = _make_slack_client([
|
||||
{"ok": False, "error": "missing_scope"},
|
||||
])
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||
entries = asyncio.run(_build_slack(_make_slack_adapter({"T1": client})))
|
||||
|
||||
assert entries == []
|
||||
|
||||
@@ -540,7 +540,7 @@ from gateway.config import Platform, PlatformConfig # noqa: E402
|
||||
|
||||
|
||||
def _make_slack_adapter():
|
||||
config = PlatformConfig(enabled=True, token="***")
|
||||
config = PlatformConfig(enabled=True, token="xoxb-fake-token")
|
||||
adapter = SlackAdapter(config)
|
||||
adapter._app = MagicMock()
|
||||
adapter._app.client = AsyncMock()
|
||||
@@ -549,39 +549,6 @@ def _make_slack_adapter():
|
||||
return adapter
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SlackAdapter diagnostics helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSlackAttachmentDiagnostics:
|
||||
def test_missing_scope_error_returns_actionable_notice(self):
|
||||
"""_describe_slack_api_error translates a missing_scope response into
|
||||
a user-facing notice mentioning the needed scope and the reinstall
|
||||
step. This is the helper used by every files.info call site (Slack
|
||||
Connect stubs + post-download failures) to surface scope problems
|
||||
without making an extra probe call per attachment.
|
||||
"""
|
||||
adapter = _make_slack_adapter()
|
||||
|
||||
response = {
|
||||
"error": "missing_scope",
|
||||
"needed": "files:read",
|
||||
"provided": "chat:write,files:write",
|
||||
}
|
||||
detail = adapter._describe_slack_api_error(response, file_obj={"id": "F123", "name": "photo.jpg"})
|
||||
assert detail is not None
|
||||
assert "files:read" in detail
|
||||
assert "reinstall" in detail.lower()
|
||||
assert "chat:write,files:write" in detail
|
||||
|
||||
def test_download_failure_403_returns_permission_notice(self):
|
||||
adapter = _make_slack_adapter()
|
||||
exc = _make_http_status_error(403)
|
||||
detail = adapter._describe_slack_download_failure(exc, file_obj={"name": "report.pdf"})
|
||||
assert "403" in detail
|
||||
assert "permission or scope" in detail
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SlackAdapter._download_slack_file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -168,196 +168,19 @@ class TestQueueConsumptionAfterCompletion:
|
||||
assert retrieved is not None
|
||||
assert retrieved.text == "process this after"
|
||||
|
||||
def test_multiple_queues_overflow_fifo(self):
|
||||
"""Multiple /queue commands must stack in FIFO order, no merging.
|
||||
|
||||
The adapter's _pending_messages dict has a single slot per session,
|
||||
but GatewayRunner layers an overflow buffer on top so repeated
|
||||
/queue invocations all get their own turn in order.
|
||||
"""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._queued_events = {}
|
||||
def test_multiple_queues_last_one_wins(self):
|
||||
"""If user /queue's multiple times, last message overwrites."""
|
||||
adapter = _StubAdapter()
|
||||
session_key = "telegram:user:123"
|
||||
|
||||
events = [
|
||||
MessageEvent(
|
||||
for text in ["first", "second", "third"]:
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=MagicMock(chat_id="123", platform=Platform.TELEGRAM),
|
||||
source=MagicMock(),
|
||||
message_id=f"q-{text}",
|
||||
)
|
||||
for text in ("first", "second", "third")
|
||||
]
|
||||
adapter._pending_messages[session_key] = event
|
||||
|
||||
for ev in events:
|
||||
runner._enqueue_fifo(session_key, ev, adapter)
|
||||
|
||||
# Slot holds head; overflow holds the tail in order.
|
||||
assert adapter._pending_messages[session_key].text == "first"
|
||||
assert [e.text for e in runner._queued_events[session_key]] == ["second", "third"]
|
||||
assert runner._queue_depth(session_key, adapter=adapter) == 3
|
||||
|
||||
def test_promote_advances_queue_fifo(self):
|
||||
"""After the slot drains, the next overflow item is promoted."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._queued_events = {}
|
||||
adapter = _StubAdapter()
|
||||
session_key = "telegram:user:123"
|
||||
|
||||
for text in ("A", "B", "C"):
|
||||
runner._enqueue_fifo(
|
||||
session_key,
|
||||
MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=MagicMock(),
|
||||
message_id=f"q-{text}",
|
||||
),
|
||||
adapter,
|
||||
)
|
||||
|
||||
# Simulate turn 1 drain: consume slot, promote next.
|
||||
pending_event = _dequeue_pending_event(adapter, session_key)
|
||||
pending_event = runner._promote_queued_event(session_key, adapter, pending_event)
|
||||
assert pending_event is not None and pending_event.text == "A"
|
||||
assert adapter._pending_messages[session_key].text == "B"
|
||||
assert runner._queue_depth(session_key, adapter=adapter) == 2
|
||||
|
||||
# Simulate turn 2 drain.
|
||||
pending_event = _dequeue_pending_event(adapter, session_key)
|
||||
pending_event = runner._promote_queued_event(session_key, adapter, pending_event)
|
||||
assert pending_event.text == "B"
|
||||
assert adapter._pending_messages[session_key].text == "C"
|
||||
assert session_key not in runner._queued_events # overflow emptied
|
||||
|
||||
# Simulate turn 3 drain.
|
||||
pending_event = _dequeue_pending_event(adapter, session_key)
|
||||
pending_event = runner._promote_queued_event(session_key, adapter, pending_event)
|
||||
assert pending_event.text == "C"
|
||||
assert session_key not in adapter._pending_messages
|
||||
assert runner._queue_depth(session_key, adapter=adapter) == 0
|
||||
|
||||
# Turn 4: nothing pending.
|
||||
pending_event = _dequeue_pending_event(adapter, session_key)
|
||||
pending_event = runner._promote_queued_event(session_key, adapter, pending_event)
|
||||
assert pending_event is None
|
||||
|
||||
def test_promote_stages_overflow_when_slot_already_populated(self):
|
||||
"""If the slot was re-populated (e.g. by an interrupt follow-up),
|
||||
promotion must stage the overflow head without clobbering it."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._queued_events = {}
|
||||
adapter = _StubAdapter()
|
||||
session_key = "telegram:user:123"
|
||||
|
||||
# /queue once — lands in slot. Second /queue — overflow.
|
||||
for text in ("Q1", "Q2"):
|
||||
runner._enqueue_fifo(
|
||||
session_key,
|
||||
MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=MagicMock(),
|
||||
message_id=f"q-{text}",
|
||||
),
|
||||
adapter,
|
||||
)
|
||||
|
||||
# Drain consumes Q1.
|
||||
pending_event = _dequeue_pending_event(adapter, session_key)
|
||||
assert pending_event.text == "Q1"
|
||||
|
||||
# Someone else (interrupt path) re-populates the slot.
|
||||
interrupt_follow_up = MessageEvent(
|
||||
text="urgent",
|
||||
message_type=MessageType.TEXT,
|
||||
source=MagicMock(),
|
||||
message_id="m-urg",
|
||||
)
|
||||
adapter._pending_messages[session_key] = interrupt_follow_up
|
||||
|
||||
# Promotion must NOT overwrite the interrupt follow-up; Q2 should
|
||||
# move into a position that runs AFTER it. In the current design
|
||||
# the overflow head is staged in the slot AFTER the interrupt
|
||||
# follow-up's turn runs — so here, the slot keeps the interrupt
|
||||
# and Q2 stays queued. Verify we return the interrupt event and
|
||||
# Q2 is positioned to run next.
|
||||
returned = runner._promote_queued_event(session_key, adapter, interrupt_follow_up)
|
||||
assert returned is interrupt_follow_up
|
||||
# Q2 was moved into the slot, evicting the interrupt? No —
|
||||
# current implementation puts Q2 in the slot unconditionally,
|
||||
# overwriting the interrupt. This is an acceptable edge-case
|
||||
# trade-off: /queue items always run after the currently-staged
|
||||
# pending_event (which is what `returned` is), and the slot
|
||||
# gets the next-in-line item.
|
||||
assert adapter._pending_messages[session_key].text == "Q2"
|
||||
|
||||
def test_queue_depth_counts_slot_plus_overflow(self):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._queued_events = {}
|
||||
adapter = _StubAdapter()
|
||||
session_key = "telegram:user:depth"
|
||||
|
||||
assert runner._queue_depth(session_key, adapter=adapter) == 0
|
||||
|
||||
runner._enqueue_fifo(
|
||||
session_key,
|
||||
MessageEvent(
|
||||
text="one",
|
||||
message_type=MessageType.TEXT,
|
||||
source=MagicMock(),
|
||||
message_id="q1",
|
||||
),
|
||||
adapter,
|
||||
)
|
||||
assert runner._queue_depth(session_key, adapter=adapter) == 1
|
||||
|
||||
for text in ("two", "three"):
|
||||
runner._enqueue_fifo(
|
||||
session_key,
|
||||
MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=MagicMock(),
|
||||
message_id=f"q-{text}",
|
||||
),
|
||||
adapter,
|
||||
)
|
||||
assert runner._queue_depth(session_key, adapter=adapter) == 3
|
||||
|
||||
def test_enqueue_preserves_text_no_merging(self):
|
||||
"""Each /queue item keeps its own text — never merged with neighbors."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._queued_events = {}
|
||||
adapter = _StubAdapter()
|
||||
session_key = "telegram:user:nomerge"
|
||||
|
||||
texts = ["deploy the branch", "then run tests", "finally push"]
|
||||
for text in texts:
|
||||
runner._enqueue_fifo(
|
||||
session_key,
|
||||
MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=MagicMock(),
|
||||
message_id=f"q-{text[:4]}",
|
||||
),
|
||||
adapter,
|
||||
)
|
||||
|
||||
# Slot + overflow contain exactly the three texts, unmodified.
|
||||
collected = [adapter._pending_messages[session_key].text] + [
|
||||
e.text for e in runner._queued_events[session_key]
|
||||
]
|
||||
assert collected == texts
|
||||
retrieved = adapter.get_pending_message(session_key)
|
||||
assert retrieved.text == "third"
|
||||
|
||||
+1
-193
@@ -147,20 +147,7 @@ class TestAppMentionHandler:
|
||||
assert "app_mention" in registered_events
|
||||
assert "assistant_thread_started" in registered_events
|
||||
assert "assistant_thread_context_changed" in registered_events
|
||||
# Slack slash commands are registered via a single regex matcher
|
||||
# covering every COMMAND_REGISTRY entry (e.g. /hermes, /btw, /stop,
|
||||
# /model, ...) so users get native-slash parity with Discord and
|
||||
# Telegram. Verify the regex matches the key expected slashes.
|
||||
assert len(registered_commands) == 1, (
|
||||
f"expected 1 combined slash matcher, got {registered_commands!r}"
|
||||
)
|
||||
slash_matcher = registered_commands[0]
|
||||
import re as _re
|
||||
assert isinstance(slash_matcher, _re.Pattern)
|
||||
for expected in ("/hermes", "/btw", "/stop", "/model", "/help"):
|
||||
assert slash_matcher.match(expected), (
|
||||
f"Slack slash regex does not match {expected}"
|
||||
)
|
||||
assert "/hermes" in registered_commands
|
||||
|
||||
|
||||
class TestSlackConnectCleanup:
|
||||
@@ -511,35 +498,6 @@ class TestIncomingDocumentHandling:
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.message_type == MessageType.PHOTO
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_failure_is_surfaced_in_message_text(self, adapter):
|
||||
"""Attachment download failures (401/403/HTML-body/etc.) should be
|
||||
translated into a user-facing `[Slack attachment notice]` block so
|
||||
the agent can tell the user what to fix (e.g. missing files:read
|
||||
scope). No proactive files.info probe is made — the diagnostic
|
||||
runs only when the download actually fails.
|
||||
"""
|
||||
import httpx
|
||||
req = httpx.Request("GET", "https://files.slack.com/photo.jpg")
|
||||
resp = httpx.Response(403, request=req)
|
||||
|
||||
with patch.object(adapter, "_download_slack_file", new_callable=AsyncMock) as dl:
|
||||
dl.side_effect = httpx.HTTPStatusError("403", request=req, response=resp)
|
||||
event = self._make_event(text="what's in this?", files=[{
|
||||
"id": "F123",
|
||||
"mimetype": "image/jpeg",
|
||||
"name": "photo.jpg",
|
||||
"url_private_download": "https://files.slack.com/photo.jpg",
|
||||
"size": 1024,
|
||||
}])
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.message_type == MessageType.TEXT
|
||||
assert "[Slack attachment notice]" in msg_event.text
|
||||
assert "403" in msg_event.text
|
||||
assert "what's in this?" in msg_event.text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestMessageRouting
|
||||
@@ -1586,83 +1544,6 @@ class TestSlashCommands:
|
||||
msg = adapter.handle_message.call_args[0][0]
|
||||
assert msg.text == "/reasoning"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Native slash commands — /btw, /stop, /model, ... dispatched directly
|
||||
# instead of as /hermes subcommands. This is the Discord/Telegram parity
|
||||
# fix: the slash name itself becomes the command.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_native_btw_slash(self, adapter):
|
||||
"""/btw with args must dispatch to /background, not /hermes btw."""
|
||||
command = {
|
||||
"command": "/btw",
|
||||
"text": "fix the failing test",
|
||||
"user_id": "U1",
|
||||
"channel_id": "C1",
|
||||
}
|
||||
await adapter._handle_slash_command(command)
|
||||
msg = adapter.handle_message.call_args[0][0]
|
||||
# The gateway command dispatcher resolves /btw -> background via
|
||||
# resolve_command() — our handler's job is just to deliver
|
||||
# "/btw <args>" to the gateway runner, which is what this asserts.
|
||||
assert msg.text == "/btw fix the failing test"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_native_stop_slash_no_args(self, adapter):
|
||||
command = {
|
||||
"command": "/stop",
|
||||
"text": "",
|
||||
"user_id": "U1",
|
||||
"channel_id": "C1",
|
||||
}
|
||||
await adapter._handle_slash_command(command)
|
||||
msg = adapter.handle_message.call_args[0][0]
|
||||
assert msg.text == "/stop"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_native_model_slash_with_args(self, adapter):
|
||||
command = {
|
||||
"command": "/model",
|
||||
"text": "anthropic/claude-sonnet-4",
|
||||
"user_id": "U1",
|
||||
"channel_id": "C1",
|
||||
}
|
||||
await adapter._handle_slash_command(command)
|
||||
msg = adapter.handle_message.call_args[0][0]
|
||||
assert msg.text == "/model anthropic/claude-sonnet-4"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_legacy_hermes_prefix_still_works(self, adapter):
|
||||
"""Backward compat: /hermes btw foo must still route to /btw foo.
|
||||
|
||||
Old workspace manifests only declared /hermes as the single slash.
|
||||
After users refresh their manifest they get /btw natively, but the
|
||||
legacy form must keep working during the transition.
|
||||
"""
|
||||
command = {
|
||||
"command": "/hermes",
|
||||
"text": "btw run the tests",
|
||||
"user_id": "U1",
|
||||
"channel_id": "C1",
|
||||
}
|
||||
await adapter._handle_slash_command(command)
|
||||
msg = adapter.handle_message.call_args[0][0]
|
||||
assert msg.text == "/btw run the tests"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_legacy_hermes_freeform_question(self, adapter):
|
||||
"""/hermes <free-form text> must stay as the raw text (non-command)."""
|
||||
command = {
|
||||
"command": "/hermes",
|
||||
"text": "what's the weather today?",
|
||||
"user_id": "U1",
|
||||
"channel_id": "C1",
|
||||
}
|
||||
await adapter._handle_slash_command(command)
|
||||
msg = adapter.handle_message.call_args[0][0]
|
||||
assert msg.text == "what's the weather today?"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestMessageSplitting
|
||||
@@ -2040,76 +1921,3 @@ class TestProgressMessageThread:
|
||||
"so each @mention starts its own thread"
|
||||
)
|
||||
assert msg_event.message_id == "2000000000.000001"
|
||||
|
||||
|
||||
class TestSlackReplyToText:
|
||||
"""Ensure MessageEvent.reply_to_text is populated on thread replies so
|
||||
gateway.run can inject a ``[Replying to: "..."]`` prefix (parity with
|
||||
Telegram/Discord/Feishu/WeCom)."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_slack_reply_to_text_set_on_thread_reply(self, adapter):
|
||||
"""When a thread reply arrives and the parent was posted by a bot
|
||||
(e.g. cron summary), reply_to_text must carry the parent's text."""
|
||||
adapter._channel_team = {} # primary workspace only
|
||||
adapter._team_bot_user_ids = {}
|
||||
|
||||
# Mock conversations_replies to return a bot-posted parent
|
||||
adapter._app.client.conversations_replies = AsyncMock(return_value={
|
||||
"messages": [
|
||||
{
|
||||
"ts": "1000.0",
|
||||
"bot_id": "B_CRON",
|
||||
"text": "メール要約: 新着メール3件あります",
|
||||
},
|
||||
{"ts": "1000.5", "user": "U_USER", "text": "詳細を教えて"},
|
||||
]
|
||||
})
|
||||
|
||||
# Use a DM so mention-gating doesn't short-circuit the handler.
|
||||
event = {
|
||||
"text": "詳細を教えて",
|
||||
"user": "U_USER",
|
||||
"channel": "D123",
|
||||
"channel_type": "im",
|
||||
"ts": "1000.5",
|
||||
"thread_ts": "1000.0", # thread reply
|
||||
}
|
||||
|
||||
with patch.object(
|
||||
adapter, "_resolve_user_name", new=AsyncMock(return_value="Alice")
|
||||
):
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
assert adapter.handle_message.call_args is not None, (
|
||||
"handle_message must be invoked for thread-reply DM"
|
||||
)
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.reply_to_message_id == "1000.0"
|
||||
# The critical assertion: parent text is exposed as reply_to_text so the
|
||||
# gateway can inject it when not already in the session history.
|
||||
assert msg_event.reply_to_text is not None
|
||||
assert "メール要約" in msg_event.reply_to_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_slack_reply_to_text_none_for_top_level_message(self, adapter):
|
||||
"""Top-level messages (no thread_ts) must not set reply_to_text."""
|
||||
event = {
|
||||
"text": "hello",
|
||||
"user": "U_USER",
|
||||
"channel": "D123",
|
||||
"channel_type": "im",
|
||||
"ts": "1000.0",
|
||||
# no thread_ts — top-level DM
|
||||
}
|
||||
|
||||
with patch.object(
|
||||
adapter, "_resolve_user_name", new=AsyncMock(return_value="Alice")
|
||||
):
|
||||
await adapter._handle_slack_message(event)
|
||||
|
||||
assert adapter.handle_message.call_args is not None
|
||||
msg_event = adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.reply_to_text is None
|
||||
# Top-level message: reply_to_message_id must be falsy (None or empty).
|
||||
assert not msg_event.reply_to_message_id
|
||||
|
||||
@@ -276,44 +276,23 @@ class TestSlackThreadContext:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skips_bot_messages(self):
|
||||
"""Self-bot child replies are skipped to avoid circular context,
|
||||
but non-self bots (e.g. cron posts, third-party integrations) are kept.
|
||||
|
||||
Regression guard for the fix in _fetch_thread_context: previously ALL
|
||||
bot messages were dropped, which lost context when the bot was replying
|
||||
to a cron-posted thread parent."""
|
||||
adapter = _make_adapter()
|
||||
mock_client = adapter._team_clients["T1"]
|
||||
mock_client.conversations_replies = AsyncMock(return_value={
|
||||
"messages": [
|
||||
{"ts": "1000.0", "user": "U1", "text": "Parent"},
|
||||
# Self-bot reply -> must be skipped (circular)
|
||||
{
|
||||
"ts": "1000.1",
|
||||
"bot_id": "B_SELF",
|
||||
"user": "U_BOT",
|
||||
"text": "Previous bot self-reply (should be skipped)",
|
||||
},
|
||||
# Third-party bot child -> kept (useful context)
|
||||
{
|
||||
"ts": "1000.15",
|
||||
"bot_id": "B_OTHER",
|
||||
"user": "U_OTHER_BOT",
|
||||
"text": "Deploy succeeded",
|
||||
},
|
||||
{"ts": "1000.1", "bot_id": "B1", "text": "Bot reply (should be skipped)"},
|
||||
{"ts": "1000.2", "user": "U1", "text": "Current"},
|
||||
]
|
||||
})
|
||||
adapter._user_name_cache = {"U1": "Alice", "U_OTHER_BOT": "DeployBot"}
|
||||
adapter._user_name_cache = {"U1": "Alice"}
|
||||
|
||||
context = await adapter._fetch_thread_context(
|
||||
channel_id="C1", thread_ts="1000.0", current_ts="1000.2", team_id="T1"
|
||||
)
|
||||
|
||||
assert "Previous bot self-reply" not in context
|
||||
assert "Bot reply" not in context
|
||||
assert "Alice: Parent" in context
|
||||
# Third-party bot message must now be included
|
||||
assert "Deploy succeeded" in context
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_thread(self):
|
||||
@@ -337,166 +316,6 @@ class TestSlackThreadContext:
|
||||
)
|
||||
assert context == ""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_thread_context_includes_bot_parent(self):
|
||||
"""The thread parent posted by a bot (e.g. a cron summary) must be
|
||||
included in the context, prefixed with ``[thread parent]``."""
|
||||
adapter = _make_adapter()
|
||||
mock_client = adapter._team_clients["T1"]
|
||||
mock_client.conversations_replies = AsyncMock(return_value={
|
||||
"messages": [
|
||||
# Bot-posted parent (cron job)
|
||||
{
|
||||
"ts": "1000.0",
|
||||
"bot_id": "B123",
|
||||
"subtype": "bot_message",
|
||||
"username": "cron",
|
||||
"text": "メール要約: 本日の新着3件",
|
||||
},
|
||||
# User reply that triggered the fetch
|
||||
{"ts": "1000.1", "user": "U1", "text": "詳細を教えて"},
|
||||
]
|
||||
})
|
||||
adapter._user_name_cache = {"U1": "Alice"}
|
||||
|
||||
context = await adapter._fetch_thread_context(
|
||||
channel_id="C1",
|
||||
thread_ts="1000.0",
|
||||
current_ts="1000.1", # exclude the trigger message itself
|
||||
team_id="T1",
|
||||
)
|
||||
|
||||
assert "[thread parent]" in context
|
||||
assert "メール要約: 本日の新着3件" in context
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_thread_context_excludes_self_bot_replies(self):
|
||||
"""Parent (non-self bot) is kept, self-bot child replies are dropped,
|
||||
user replies are kept."""
|
||||
adapter = _make_adapter()
|
||||
mock_client = adapter._team_clients["T1"]
|
||||
mock_client.conversations_replies = AsyncMock(return_value={
|
||||
"messages": [
|
||||
{"ts": "1000.0", "bot_id": "B_CRON", "text": "Cron summary"},
|
||||
# Self-bot child reply -> excluded
|
||||
{
|
||||
"ts": "1000.1",
|
||||
"bot_id": "B_SELF",
|
||||
"user": "U_BOT", # matches adapter._bot_user_id
|
||||
"text": "Previous self reply",
|
||||
},
|
||||
# User reply -> kept
|
||||
{"ts": "1000.2", "user": "U1", "text": "Follow-up question"},
|
||||
# Current trigger (excluded by current_ts match)
|
||||
{"ts": "1000.3", "user": "U1", "text": "Current"},
|
||||
]
|
||||
})
|
||||
adapter._user_name_cache = {"U1": "Alice"}
|
||||
|
||||
context = await adapter._fetch_thread_context(
|
||||
channel_id="C1", thread_ts="1000.0", current_ts="1000.3", team_id="T1"
|
||||
)
|
||||
|
||||
assert "Cron summary" in context
|
||||
assert "[thread parent]" in context
|
||||
assert "Previous self reply" not in context
|
||||
assert "Follow-up question" in context
|
||||
assert "Current" not in context
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_thread_context_multi_workspace(self):
|
||||
"""Self-bot filtering must use the per-workspace bot user id so a
|
||||
self-bot id that belongs to a different workspace does not accidentally
|
||||
filter out a legitimate message in the current workspace."""
|
||||
adapter = _make_adapter()
|
||||
# Add a second workspace with a different bot user id
|
||||
adapter._team_clients["T2"] = AsyncMock()
|
||||
adapter._team_bot_user_ids = {"T1": "U_BOT_T1", "T2": "U_BOT_T2"}
|
||||
adapter._bot_user_id = "U_BOT_T1"
|
||||
adapter._channel_team["C2"] = "T2"
|
||||
|
||||
mock_client = adapter._team_clients["T2"]
|
||||
mock_client.conversations_replies = AsyncMock(return_value={
|
||||
"messages": [
|
||||
{"ts": "2000.0", "user": "U2", "text": "Parent T2"},
|
||||
# This has the *T1* bot's user id — from T2's perspective this
|
||||
# is a third-party bot, so it must be kept.
|
||||
{
|
||||
"ts": "2000.1",
|
||||
"bot_id": "B_FOREIGN",
|
||||
"user": "U_BOT_T1",
|
||||
"team": "T2",
|
||||
"text": "Cross-workspace bot reply",
|
||||
},
|
||||
# Self-bot for T2 — must be skipped
|
||||
{
|
||||
"ts": "2000.2",
|
||||
"bot_id": "B_SELF_T2",
|
||||
"user": "U_BOT_T2",
|
||||
"team": "T2",
|
||||
"text": "Own T2 bot reply",
|
||||
},
|
||||
{"ts": "2000.3", "user": "U2", "text": "Current"},
|
||||
]
|
||||
})
|
||||
adapter._user_name_cache = {"U2": "Bob"}
|
||||
|
||||
context = await adapter._fetch_thread_context(
|
||||
channel_id="C2", thread_ts="2000.0", current_ts="2000.3", team_id="T2"
|
||||
)
|
||||
|
||||
assert "Parent T2" in context
|
||||
assert "Cross-workspace bot reply" in context
|
||||
assert "Own T2 bot reply" not in context
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_thread_context_current_ts_excluded(self):
|
||||
"""Regression guard: the message whose ts == current_ts must never
|
||||
appear in the context output (it will be delivered as the user
|
||||
message itself)."""
|
||||
adapter = _make_adapter()
|
||||
mock_client = adapter._team_clients["T1"]
|
||||
mock_client.conversations_replies = AsyncMock(return_value={
|
||||
"messages": [
|
||||
{"ts": "1000.0", "user": "U1", "text": "Parent"},
|
||||
{"ts": "1000.1", "user": "U1", "text": "DO NOT INCLUDE THIS"},
|
||||
]
|
||||
})
|
||||
adapter._user_name_cache = {"U1": "Alice"}
|
||||
|
||||
context = await adapter._fetch_thread_context(
|
||||
channel_id="C1", thread_ts="1000.0", current_ts="1000.1", team_id="T1"
|
||||
)
|
||||
|
||||
assert "Parent" in context
|
||||
assert "DO NOT INCLUDE THIS" not in context
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_thread_parent_text_from_cache(self):
|
||||
"""_fetch_thread_parent_text should reuse the thread-context cache
|
||||
when it is warm, avoiding an extra conversations.replies call."""
|
||||
adapter = _make_adapter()
|
||||
mock_client = adapter._team_clients["T1"]
|
||||
mock_client.conversations_replies = AsyncMock(return_value={
|
||||
"messages": [
|
||||
{"ts": "1000.0", "bot_id": "B123", "text": "Parent summary"},
|
||||
{"ts": "1000.1", "user": "U1", "text": "reply"},
|
||||
]
|
||||
})
|
||||
|
||||
# Warm the cache via _fetch_thread_context
|
||||
await adapter._fetch_thread_context(
|
||||
channel_id="C1", thread_ts="1000.0", current_ts="1000.1", team_id="T1"
|
||||
)
|
||||
assert mock_client.conversations_replies.await_count == 1
|
||||
|
||||
parent = await adapter._fetch_thread_parent_text(
|
||||
channel_id="C1", thread_ts="1000.0", team_id="T1"
|
||||
)
|
||||
assert parent == "Parent summary"
|
||||
# No additional API call
|
||||
assert mock_client.conversations_replies.await_count == 1
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# _has_active_session_for_thread — session key fix (#5833)
|
||||
|
||||
@@ -55,12 +55,10 @@ CHANNEL_ID = "C0AQWDLHY9M"
|
||||
OTHER_CHANNEL_ID = "C9999999999"
|
||||
|
||||
|
||||
def _make_adapter(require_mention=None, strict_mention=None, free_response_channels=None):
|
||||
def _make_adapter(require_mention=None, free_response_channels=None):
|
||||
extra = {}
|
||||
if require_mention is not None:
|
||||
extra["require_mention"] = require_mention
|
||||
if strict_mention is not None:
|
||||
extra["strict_mention"] = strict_mention
|
||||
if free_response_channels is not None:
|
||||
extra["free_response_channels"] = free_response_channels
|
||||
|
||||
@@ -136,48 +134,6 @@ def test_require_mention_env_var_default_true(monkeypatch):
|
||||
assert adapter._slack_require_mention() is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: _slack_strict_mention
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_strict_mention_defaults_to_false(monkeypatch):
|
||||
monkeypatch.delenv("SLACK_STRICT_MENTION", raising=False)
|
||||
adapter = _make_adapter()
|
||||
assert adapter._slack_strict_mention() is False
|
||||
|
||||
|
||||
def test_strict_mention_true():
|
||||
adapter = _make_adapter(strict_mention=True)
|
||||
assert adapter._slack_strict_mention() is True
|
||||
|
||||
|
||||
def test_strict_mention_false():
|
||||
adapter = _make_adapter(strict_mention=False)
|
||||
assert adapter._slack_strict_mention() is False
|
||||
|
||||
|
||||
def test_strict_mention_string_true():
|
||||
adapter = _make_adapter(strict_mention="true")
|
||||
assert adapter._slack_strict_mention() is True
|
||||
|
||||
|
||||
def test_strict_mention_string_off():
|
||||
adapter = _make_adapter(strict_mention="off")
|
||||
assert adapter._slack_strict_mention() is False
|
||||
|
||||
|
||||
def test_strict_mention_malformed_stays_false():
|
||||
"""Unrecognised values keep strict mode OFF (fail-open to legacy behavior)."""
|
||||
adapter = _make_adapter(strict_mention="maybe")
|
||||
assert adapter._slack_strict_mention() is False
|
||||
|
||||
|
||||
def test_strict_mention_env_var_fallback(monkeypatch):
|
||||
monkeypatch.setenv("SLACK_STRICT_MENTION", "true")
|
||||
adapter = _make_adapter() # no config value -> falls back to env
|
||||
assert adapter._slack_strict_mention() is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: _slack_free_response_channels
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -354,109 +310,3 @@ def test_config_bridges_slack_free_response_channels(monkeypatch, tmp_path):
|
||||
import os as _os
|
||||
assert _os.environ["SLACK_REQUIRE_MENTION"] == "false"
|
||||
assert _os.environ["SLACK_FREE_RESPONSE_CHANNELS"] == "C0AQWDLHY9M,C9999999999"
|
||||
|
||||
|
||||
def test_config_bridges_slack_reply_in_thread(monkeypatch, tmp_path):
|
||||
from gateway.config import load_gateway_config
|
||||
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "config.yaml").write_text(
|
||||
"slack:\n"
|
||||
" reply_in_thread: false\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setenv("SLACK_BOT_TOKEN", "xoxb-test")
|
||||
|
||||
config = load_gateway_config()
|
||||
|
||||
assert config is not None
|
||||
slack_config = config.platforms[Platform.SLACK]
|
||||
assert slack_config.extra.get("reply_in_thread") is False
|
||||
|
||||
adapter = SlackAdapter(slack_config)
|
||||
assert adapter._resolve_thread_ts(reply_to="171.000", metadata={}) is None
|
||||
|
||||
# Top-level channel messages arrive with metadata.thread_id == reply_to
|
||||
# because the inbound handler uses event.ts as a session-keying fallback.
|
||||
# Those must be treated as non-threaded so reply_in_thread=false takes
|
||||
# effect in channels, not just DMs.
|
||||
assert adapter._resolve_thread_ts(
|
||||
reply_to="171.000",
|
||||
metadata={"thread_id": "171.000"},
|
||||
) is None
|
||||
|
||||
# Real thread replies (reply_to differs from thread parent) must still
|
||||
# resolve to the parent thread so conversation context is preserved.
|
||||
assert adapter._resolve_thread_ts(
|
||||
reply_to="171.500",
|
||||
metadata={"thread_id": "171.000"},
|
||||
) == "171.000"
|
||||
|
||||
|
||||
def test_config_bridges_slack_strict_mention(monkeypatch, tmp_path):
|
||||
from gateway.config import load_gateway_config
|
||||
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "config.yaml").write_text(
|
||||
"slack:\n"
|
||||
" strict_mention: true\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.delenv("SLACK_STRICT_MENTION", raising=False)
|
||||
|
||||
config = load_gateway_config()
|
||||
|
||||
assert config is not None
|
||||
import os as _os
|
||||
assert _os.environ["SLACK_STRICT_MENTION"] == "true"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regression: strict mode must NOT persist mentions into _mentioned_threads
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prevents agent-to-agent ack loops — if a strict-mode bot remembered every
|
||||
# thread it was mentioned in, the next message from the other agent in that
|
||||
# thread would re-trigger the bot and defeat the entire feature.
|
||||
|
||||
def test_mention_in_strict_mode_does_not_register_thread():
|
||||
adapter = _make_adapter(strict_mention=True)
|
||||
adapter._bot_user_id = "U_BOT"
|
||||
adapter._mentioned_threads = set()
|
||||
adapter._MENTIONED_THREADS_MAX = 5000
|
||||
|
||||
thread_ts = "1700000000.100200"
|
||||
event_thread_ts = thread_ts # incoming message is inside an existing thread
|
||||
|
||||
# Mirror the handler's @mention + strict-mode guard that protects
|
||||
# _mentioned_threads.add(). If strict is on, we must skip the add.
|
||||
text = "<@U_BOT> hello"
|
||||
is_mentioned = f"<@{adapter._bot_user_id}>" in text
|
||||
assert is_mentioned
|
||||
if event_thread_ts and not adapter._slack_strict_mention():
|
||||
adapter._mentioned_threads.add(event_thread_ts)
|
||||
|
||||
assert thread_ts not in adapter._mentioned_threads
|
||||
|
||||
|
||||
def test_mention_outside_strict_mode_still_registers_thread():
|
||||
adapter = _make_adapter(strict_mention=False)
|
||||
adapter._bot_user_id = "U_BOT"
|
||||
adapter._mentioned_threads = set()
|
||||
adapter._MENTIONED_THREADS_MAX = 5000
|
||||
|
||||
thread_ts = "1700000000.100200"
|
||||
event_thread_ts = thread_ts
|
||||
|
||||
text = "<@U_BOT> hello"
|
||||
is_mentioned = f"<@{adapter._bot_user_id}>" in text
|
||||
assert is_mentioned
|
||||
if event_thread_ts and not adapter._slack_strict_mention():
|
||||
adapter._mentioned_threads.add(event_thread_ts)
|
||||
|
||||
assert thread_ts in adapter._mentioned_threads
|
||||
|
||||
@@ -12,9 +12,9 @@ from gateway.platforms.base import MessageEvent
|
||||
from gateway.session import SessionEntry, SessionSource, build_session_key
|
||||
|
||||
|
||||
def _make_source(platform: Platform = Platform.TELEGRAM) -> SessionSource:
|
||||
def _make_source() -> SessionSource:
|
||||
return SessionSource(
|
||||
platform=platform,
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="u1",
|
||||
chat_id="c1",
|
||||
user_name="tester",
|
||||
@@ -22,24 +22,24 @@ def _make_source(platform: Platform = Platform.TELEGRAM) -> SessionSource:
|
||||
)
|
||||
|
||||
|
||||
def _make_event(text: str, *, platform: Platform = Platform.TELEGRAM) -> MessageEvent:
|
||||
def _make_event(text: str) -> MessageEvent:
|
||||
return MessageEvent(
|
||||
text=text,
|
||||
source=_make_source(platform),
|
||||
source=_make_source(),
|
||||
message_id="m1",
|
||||
)
|
||||
|
||||
|
||||
def _make_runner(session_entry: SessionEntry, *, platform: Platform = Platform.TELEGRAM):
|
||||
def _make_runner(session_entry: SessionEntry):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={platform: PlatformConfig(enabled=True, token="***")}
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||
)
|
||||
adapter = MagicMock()
|
||||
adapter.send = AsyncMock()
|
||||
runner.adapters = {platform: adapter}
|
||||
runner.adapters = {Platform.TELEGRAM: adapter}
|
||||
runner._voice_mode = {}
|
||||
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
|
||||
runner.session_store = MagicMock()
|
||||
@@ -224,93 +224,6 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_first_run_slack_home_channel_onboarding_uses_parent_command(monkeypatch):
|
||||
import gateway.run as gateway_run
|
||||
|
||||
session_entry = SessionEntry(
|
||||
session_key=build_session_key(_make_source(Platform.SLACK)),
|
||||
session_id="sess-1",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.SLACK,
|
||||
chat_type="dm",
|
||||
)
|
||||
runner = _make_runner(session_entry, platform=Platform.SLACK)
|
||||
runner.session_store.load_transcript.return_value = []
|
||||
runner.session_store.has_any_sessions.return_value = False
|
||||
runner._run_agent = AsyncMock(
|
||||
return_value={
|
||||
"final_response": "ok",
|
||||
"messages": [],
|
||||
"tools": [],
|
||||
"history_offset": 0,
|
||||
"last_prompt_tokens": 0,
|
||||
"input_tokens": 0,
|
||||
"output_tokens": 0,
|
||||
"model": "openai/test-model",
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.delenv("SLACK_HOME_CHANNEL", raising=False)
|
||||
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
|
||||
monkeypatch.setattr(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
lambda *_args, **_kwargs: 100000,
|
||||
)
|
||||
|
||||
result = await runner._handle_message(_make_event("hello", platform=Platform.SLACK))
|
||||
|
||||
assert result == "ok"
|
||||
runner.adapters[Platform.SLACK].send.assert_awaited_once()
|
||||
onboarding = runner.adapters[Platform.SLACK].send.await_args.args[1]
|
||||
assert "/hermes sethome" in onboarding
|
||||
assert "Type /sethome" not in onboarding
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_first_run_non_slack_home_channel_onboarding_keeps_direct_command(monkeypatch):
|
||||
import gateway.run as gateway_run
|
||||
|
||||
session_entry = SessionEntry(
|
||||
session_key=build_session_key(_make_source(Platform.TELEGRAM)),
|
||||
session_id="sess-1",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
runner = _make_runner(session_entry, platform=Platform.TELEGRAM)
|
||||
runner.session_store.load_transcript.return_value = []
|
||||
runner.session_store.has_any_sessions.return_value = False
|
||||
runner._run_agent = AsyncMock(
|
||||
return_value={
|
||||
"final_response": "ok",
|
||||
"messages": [],
|
||||
"tools": [],
|
||||
"history_offset": 0,
|
||||
"last_prompt_tokens": 0,
|
||||
"input_tokens": 0,
|
||||
"output_tokens": 0,
|
||||
"model": "openai/test-model",
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.delenv("TELEGRAM_HOME_CHANNEL", raising=False)
|
||||
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
|
||||
monkeypatch.setattr(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
lambda *_args, **_kwargs: 100000,
|
||||
)
|
||||
|
||||
result = await runner._handle_message(_make_event("hello", platform=Platform.TELEGRAM))
|
||||
|
||||
assert result == "ok"
|
||||
runner.adapters[Platform.TELEGRAM].send.assert_awaited_once()
|
||||
onboarding = runner.adapters[Platform.TELEGRAM].send.await_args.args[1]
|
||||
assert "Type /sethome" in onboarding
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_message_discards_stale_result_after_session_invalidation(monkeypatch):
|
||||
import gateway.run as gateway_run
|
||||
|
||||
@@ -20,8 +20,6 @@ from hermes_cli.commands import (
|
||||
discord_skill_commands,
|
||||
gateway_help_lines,
|
||||
resolve_command,
|
||||
slack_app_manifest,
|
||||
slack_native_slashes,
|
||||
slack_subcommand_map,
|
||||
telegram_bot_commands,
|
||||
telegram_menu_commands,
|
||||
@@ -258,115 +256,6 @@ class TestSlackSubcommandMap:
|
||||
assert cmd.name not in mapping
|
||||
|
||||
|
||||
class TestSlackNativeSlashes:
|
||||
"""Slack native slash command generation — used to register every
|
||||
COMMAND_REGISTRY entry as a first-class Slack slash, matching Discord
|
||||
and Telegram."""
|
||||
|
||||
def test_returns_triples(self):
|
||||
slashes = slack_native_slashes()
|
||||
assert len(slashes) >= 10
|
||||
for entry in slashes:
|
||||
assert isinstance(entry, tuple) and len(entry) == 3
|
||||
name, desc, hint = entry
|
||||
assert isinstance(name, str) and name
|
||||
assert isinstance(desc, str)
|
||||
assert isinstance(hint, str)
|
||||
|
||||
def test_hermes_catchall_is_first(self):
|
||||
"""``/hermes`` must be reserved as the first slot so the legacy
|
||||
``/hermes <subcommand>`` form keeps working after we add new
|
||||
commands and hit the 50-slash cap."""
|
||||
slashes = slack_native_slashes()
|
||||
assert slashes[0][0] == "hermes"
|
||||
|
||||
def test_names_respect_slack_limits(self):
|
||||
for name, _desc, _hint in slack_native_slashes():
|
||||
# Slack: lowercase a-z, 0-9, hyphens, underscores; max 32 chars
|
||||
assert len(name) <= 32, f"slash {name!r} exceeds 32 chars"
|
||||
assert name == name.lower()
|
||||
for ch in name:
|
||||
assert ch.isalnum() or ch in "-_", f"invalid char {ch!r} in {name!r}"
|
||||
|
||||
def test_under_fifty_command_cap(self):
|
||||
"""Slack allows at most 50 slash commands per app."""
|
||||
assert len(slack_native_slashes()) <= 50
|
||||
|
||||
def test_unique_names(self):
|
||||
names = [n for n, _d, _h in slack_native_slashes()]
|
||||
assert len(names) == len(set(names)), "duplicate Slack slash names"
|
||||
|
||||
def test_includes_canonical_commands(self):
|
||||
names = {n for n, _d, _h in slack_native_slashes()}
|
||||
# Sample of gateway-available canonical commands
|
||||
for expected in ("new", "stop", "background", "model", "help", "status"):
|
||||
assert expected in names, f"missing canonical /{expected}"
|
||||
|
||||
def test_includes_aliases_as_first_class_slashes(self):
|
||||
"""Aliases (/btw, /bg, /reset, /q) must be registered as standalone
|
||||
slashes — this is the whole point of native-slashes parity."""
|
||||
names = {n for n, _d, _h in slack_native_slashes()}
|
||||
assert "btw" in names
|
||||
assert "bg" in names
|
||||
assert "reset" in names
|
||||
assert "q" in names
|
||||
|
||||
def test_telegram_parity(self):
|
||||
"""Every Telegram bot command must be registerable on Slack too.
|
||||
|
||||
This catches the old behavior where Slack users couldn't invoke
|
||||
commands like /btw natively. If a future command surfaces on
|
||||
Telegram but not Slack (because of Slack's 50-slash cap), this
|
||||
test fails loudly so we can curate the list rather than silently
|
||||
dropping parity.
|
||||
"""
|
||||
slack_names = {n for n, _d, _h in slack_native_slashes()}
|
||||
tg_names = {n for n, _d in telegram_bot_commands()}
|
||||
# Some Telegram names have underscores where Slack uses hyphens
|
||||
# (e.g. set_home vs sethome). Normalize both sides for comparison.
|
||||
def _norm(s: str) -> str:
|
||||
return s.replace("-", "_").replace("__", "_").strip("_")
|
||||
|
||||
slack_norm = {_norm(n) for n in slack_names}
|
||||
tg_norm = {_norm(n) for n in tg_names}
|
||||
missing = tg_norm - slack_norm
|
||||
assert not missing, (
|
||||
f"commands on Telegram but missing from Slack native slashes: {sorted(missing)}"
|
||||
)
|
||||
|
||||
|
||||
class TestSlackAppManifest:
|
||||
"""Generated Slack app manifest (used by `hermes slack manifest`)."""
|
||||
|
||||
def test_returns_dict(self):
|
||||
m = slack_app_manifest()
|
||||
assert isinstance(m, dict)
|
||||
assert "features" in m
|
||||
assert "slash_commands" in m["features"]
|
||||
|
||||
def test_each_slash_has_required_fields(self):
|
||||
m = slack_app_manifest()
|
||||
for entry in m["features"]["slash_commands"]:
|
||||
assert entry["command"].startswith("/")
|
||||
assert "description" in entry
|
||||
assert "url" in entry
|
||||
# should_escape must be present (Slack defaults to True which
|
||||
# HTML-escapes args — we want the raw text)
|
||||
assert "should_escape" in entry
|
||||
|
||||
def test_btw_is_in_manifest(self):
|
||||
"""Regression: /btw must be a native Slack slash, not just a
|
||||
/hermes subcommand."""
|
||||
m = slack_app_manifest()
|
||||
commands = [c["command"] for c in m["features"]["slash_commands"]]
|
||||
assert "/btw" in commands
|
||||
|
||||
def test_custom_request_url(self):
|
||||
m = slack_app_manifest(request_url="https://example.com/slack")
|
||||
for entry in m["features"]["slash_commands"]:
|
||||
assert entry["url"] == "https://example.com/slack"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config-gated gateway commands
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,210 @@
|
||||
"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import kanban as kc
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace flag parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value,expected",
|
||||
[
|
||||
("scratch", ("scratch", None)),
|
||||
("worktree", ("worktree", None)),
|
||||
("dir:/tmp/work", ("dir", "/tmp/work")),
|
||||
],
|
||||
)
|
||||
def test_parse_workspace_flag_valid(value, expected):
|
||||
assert kc._parse_workspace_flag(value) == expected
|
||||
|
||||
|
||||
def test_parse_workspace_flag_expands_user():
|
||||
kind, path = kc._parse_workspace_flag("dir:~/vault")
|
||||
assert kind == "dir"
|
||||
assert path.endswith("/vault")
|
||||
assert not path.startswith("~")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
|
||||
def test_parse_workspace_flag_rejects(bad):
|
||||
if not bad:
|
||||
# Empty -> defaults; not an error.
|
||||
assert kc._parse_workspace_flag(bad) == ("scratch", None)
|
||||
return
|
||||
with pytest.raises(argparse.ArgumentTypeError):
|
||||
kc._parse_workspace_flag(bad)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_run_slash_no_args_shows_usage(kanban_home):
|
||||
out = kc.run_slash("")
|
||||
assert "kanban" in out.lower()
|
||||
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
|
||||
|
||||
|
||||
def test_run_slash_create_and_list(kanban_home):
|
||||
out = kc.run_slash("create 'ship feature' --assignee alice")
|
||||
assert "Created" in out
|
||||
out = kc.run_slash("list")
|
||||
assert "ship feature" in out
|
||||
assert "alice" in out
|
||||
|
||||
|
||||
def test_run_slash_create_with_parent_and_cascade(kanban_home):
|
||||
# Parent then child via --parent
|
||||
out1 = kc.run_slash("create 'parent' --assignee alice")
|
||||
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
|
||||
import re
|
||||
m = re.search(r"(t_[a-f0-9]+)", out1)
|
||||
assert m
|
||||
p = m.group(1)
|
||||
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
|
||||
assert "todo" in out2 # child starts as todo
|
||||
|
||||
# Complete parent; list should promote child to ready
|
||||
kc.run_slash(f"complete {p}")
|
||||
# Explicit filter: child should now be ready (was todo before complete).
|
||||
ready_list = kc.run_slash("list --status ready")
|
||||
assert "child" in ready_list
|
||||
|
||||
|
||||
def test_run_slash_show_includes_comments(kanban_home):
|
||||
out = kc.run_slash("create 'x'")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
kc.run_slash(f"comment {tid} 'source is paywalled'")
|
||||
show = kc.run_slash(f"show {tid}")
|
||||
assert "source is paywalled" in show
|
||||
|
||||
|
||||
def test_run_slash_block_unblock_cycle(kanban_home):
|
||||
out = kc.run_slash("create 'x' --assignee alice")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
# Claim first so block() finds it running
|
||||
kc.run_slash(f"claim {tid}")
|
||||
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
|
||||
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
|
||||
|
||||
|
||||
def test_run_slash_json_output(kanban_home):
|
||||
out = kc.run_slash("create 'jsontask' --assignee alice --json")
|
||||
payload = json.loads(out)
|
||||
assert payload["title"] == "jsontask"
|
||||
assert payload["assignee"] == "alice"
|
||||
assert payload["status"] == "ready"
|
||||
|
||||
|
||||
def test_run_slash_dispatch_dry_run_counts(kanban_home):
|
||||
kc.run_slash("create 'a' --assignee alice")
|
||||
kc.run_slash("create 'b' --assignee bob")
|
||||
out = kc.run_slash("dispatch --dry-run")
|
||||
assert "Spawned:" in out
|
||||
|
||||
|
||||
def test_run_slash_context_output_format(kanban_home):
|
||||
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
kc.run_slash(f"comment {tid} 'remember to include performance section'")
|
||||
ctx = kc.run_slash(f"context {tid}")
|
||||
assert "tech spec" in ctx
|
||||
assert "write an RFC" in ctx
|
||||
assert "performance section" in ctx
|
||||
|
||||
|
||||
def test_run_slash_tenant_filter(kanban_home):
|
||||
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
|
||||
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
|
||||
a = kc.run_slash("list --tenant biz-a")
|
||||
b = kc.run_slash("list --tenant biz-b")
|
||||
assert "biz-a task" in a and "biz-b task" not in a
|
||||
assert "biz-b task" in b and "biz-a task" not in b
|
||||
|
||||
|
||||
def test_run_slash_usage_error_returns_message(kanban_home):
|
||||
# Missing required argument for create
|
||||
out = kc.run_slash("create")
|
||||
assert "usage" in out.lower() or "error" in out.lower()
|
||||
|
||||
|
||||
def test_run_slash_assign_reassigns(kanban_home):
|
||||
out = kc.run_slash("create 'x' --assignee alice")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
|
||||
show = kc.run_slash(f"show {tid}")
|
||||
assert "bob" in show
|
||||
|
||||
|
||||
def test_run_slash_link_unlink(kanban_home):
|
||||
a = kc.run_slash("create 'a'")
|
||||
b = kc.run_slash("create 'b'")
|
||||
import re
|
||||
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
|
||||
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
|
||||
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
|
||||
# After link, b is todo
|
||||
show = kc.run_slash(f"show {tb}")
|
||||
assert "todo" in show
|
||||
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration with the COMMAND_REGISTRY
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_kanban_is_resolvable():
|
||||
from hermes_cli.commands import resolve_command
|
||||
|
||||
cmd = resolve_command("kanban")
|
||||
assert cmd is not None
|
||||
assert cmd.name == "kanban"
|
||||
|
||||
|
||||
def test_kanban_bypasses_active_session_guard():
|
||||
from hermes_cli.commands import should_bypass_active_session
|
||||
|
||||
assert should_bypass_active_session("kanban")
|
||||
|
||||
|
||||
def test_kanban_in_autocomplete_table():
|
||||
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
|
||||
|
||||
assert "/kanban" in COMMANDS
|
||||
subs = SUBCOMMANDS.get("/kanban") or []
|
||||
assert "create" in subs
|
||||
assert "dispatch" in subs
|
||||
|
||||
|
||||
def test_kanban_not_gateway_only():
|
||||
# kanban is available in BOTH CLI and gateway surfaces.
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
|
||||
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
|
||||
assert not cmd.cli_only
|
||||
assert not cmd.gateway_only
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,438 @@
|
||||
"""Tests for the Kanban DB layer (hermes_cli.kanban_db)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with an empty kanban DB."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema / init
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_init_db_is_idempotent(kanban_home):
|
||||
# Second call should not error or drop data.
|
||||
with kb.connect() as conn:
|
||||
kb.create_task(conn, title="persisted")
|
||||
kb.init_db()
|
||||
with kb.connect() as conn:
|
||||
tasks = kb.list_tasks(conn)
|
||||
assert len(tasks) == 1
|
||||
assert tasks[0].title == "persisted"
|
||||
|
||||
|
||||
def test_init_creates_expected_tables(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
|
||||
).fetchall()
|
||||
names = {r["name"] for r in rows}
|
||||
assert {"tasks", "task_links", "task_comments", "task_events"} <= names
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Task creation + status inference
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_create_task_no_parents_is_ready(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
tid = kb.create_task(conn, title="ship it", assignee="alice")
|
||||
t = kb.get_task(conn, tid)
|
||||
assert t is not None
|
||||
assert t.status == "ready"
|
||||
assert t.assignee == "alice"
|
||||
assert t.workspace_kind == "scratch"
|
||||
|
||||
|
||||
def test_create_task_with_parent_is_todo_until_parent_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="parent")
|
||||
c = kb.create_task(conn, title="child", parents=[p])
|
||||
assert kb.get_task(conn, c).status == "todo"
|
||||
kb.complete_task(conn, p, result="ok")
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
def test_create_task_unknown_parent_errors(kanban_home):
|
||||
with kb.connect() as conn, pytest.raises(ValueError, match="unknown parent"):
|
||||
kb.create_task(conn, title="orphan", parents=["t_ghost"])
|
||||
|
||||
|
||||
def test_workspace_kind_validation(kanban_home):
|
||||
with kb.connect() as conn, pytest.raises(ValueError, match="workspace_kind"):
|
||||
kb.create_task(conn, title="bad ws", workspace_kind="cloud")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Links + dependency resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_link_demotes_ready_child_to_todo_when_parent_not_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b")
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.link_tasks(conn, a, b)
|
||||
assert kb.get_task(conn, b).status == "todo"
|
||||
|
||||
|
||||
def test_link_keeps_ready_child_when_parent_already_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
kb.complete_task(conn, a)
|
||||
b = kb.create_task(conn, title="b")
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.link_tasks(conn, a, b)
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
|
||||
|
||||
def test_link_rejects_self_loop(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
with pytest.raises(ValueError, match="itself"):
|
||||
kb.link_tasks(conn, a, a)
|
||||
|
||||
|
||||
def test_link_detects_cycle(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b", parents=[a])
|
||||
c = kb.create_task(conn, title="c", parents=[b])
|
||||
with pytest.raises(ValueError, match="cycle"):
|
||||
kb.link_tasks(conn, c, a)
|
||||
with pytest.raises(ValueError, match="cycle"):
|
||||
kb.link_tasks(conn, b, a)
|
||||
|
||||
|
||||
def test_recompute_ready_cascades_through_chain(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b", parents=[a])
|
||||
c = kb.create_task(conn, title="c", parents=[b])
|
||||
assert [kb.get_task(conn, x).status for x in (a, b, c)] == \
|
||||
["ready", "todo", "todo"]
|
||||
kb.complete_task(conn, a)
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.complete_task(conn, b)
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
def test_recompute_ready_fan_in_waits_for_all_parents(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b")
|
||||
c = kb.create_task(conn, title="c", parents=[a, b])
|
||||
kb.complete_task(conn, a)
|
||||
assert kb.get_task(conn, c).status == "todo"
|
||||
kb.complete_task(conn, b)
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Atomic claim (CAS)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_claim_once_wins_second_loses(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
first = kb.claim_task(conn, t, claimer="host:1")
|
||||
assert first is not None and first.status == "running"
|
||||
second = kb.claim_task(conn, t, claimer="host:2")
|
||||
assert second is None
|
||||
|
||||
|
||||
def test_claim_fails_on_non_ready(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
# Move to todo by introducing an unsatisfied parent.
|
||||
p = kb.create_task(conn, title="p")
|
||||
kb.link_tasks(conn, p, t)
|
||||
assert kb.get_task(conn, t).status == "todo"
|
||||
assert kb.claim_task(conn, t) is None
|
||||
|
||||
|
||||
def test_stale_claim_reclaimed(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
# Rewind claim_expires so it looks stale.
|
||||
conn.execute(
|
||||
"UPDATE tasks SET claim_expires = ? WHERE id = ?",
|
||||
(int(time.time()) - 3600, t),
|
||||
)
|
||||
reclaimed = kb.release_stale_claims(conn)
|
||||
assert reclaimed == 1
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
|
||||
|
||||
def test_heartbeat_extends_claim(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
claimer = "host:hb"
|
||||
kb.claim_task(conn, t, claimer=claimer, ttl_seconds=60)
|
||||
original = kb.get_task(conn, t).claim_expires
|
||||
# Rewind then heartbeat.
|
||||
conn.execute("UPDATE tasks SET claim_expires = ? WHERE id = ?", (0, t))
|
||||
ok = kb.heartbeat_claim(conn, t, claimer=claimer, ttl_seconds=3600)
|
||||
assert ok
|
||||
new = kb.get_task(conn, t).claim_expires
|
||||
assert new > int(time.time()) + 3000
|
||||
|
||||
|
||||
def test_concurrent_claims_only_one_wins(kanban_home):
|
||||
"""Fire N threads claiming the same task; exactly one must win."""
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="race", assignee="a")
|
||||
|
||||
def attempt(i):
|
||||
with kb.connect() as c:
|
||||
return kb.claim_task(c, t, claimer=f"host:{i}")
|
||||
|
||||
n_workers = 8
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as ex:
|
||||
results = list(ex.map(attempt, range(n_workers)))
|
||||
winners = [r for r in results if r is not None]
|
||||
assert len(winners) == 1
|
||||
assert winners[0].status == "running"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Complete / block / unblock / archive / assign
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_complete_records_result(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
assert kb.complete_task(conn, t, result="done and dusted")
|
||||
task = kb.get_task(conn, t)
|
||||
assert task.status == "done"
|
||||
assert task.result == "done and dusted"
|
||||
assert task.completed_at is not None
|
||||
|
||||
|
||||
def test_block_then_unblock(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
assert kb.block_task(conn, t, reason="need input")
|
||||
assert kb.get_task(conn, t).status == "blocked"
|
||||
assert kb.unblock_task(conn, t)
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
|
||||
|
||||
def test_assign_refuses_while_running(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
with pytest.raises(RuntimeError, match="currently running"):
|
||||
kb.assign_task(conn, t, "b")
|
||||
|
||||
|
||||
def test_assign_reassigns_when_not_running(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
assert kb.assign_task(conn, t, "b")
|
||||
assert kb.get_task(conn, t).assignee == "b"
|
||||
|
||||
|
||||
def test_archive_hides_from_default_list(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
kb.complete_task(conn, t)
|
||||
assert kb.archive_task(conn, t)
|
||||
assert len(kb.list_tasks(conn)) == 0
|
||||
assert len(kb.list_tasks(conn, include_archived=True)) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments / events / worker context
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_comments_recorded_in_order(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
kb.add_comment(conn, t, "user", "first")
|
||||
kb.add_comment(conn, t, "researcher", "second")
|
||||
comments = kb.list_comments(conn, t)
|
||||
assert [c.body for c in comments] == ["first", "second"]
|
||||
assert [c.author for c in comments] == ["user", "researcher"]
|
||||
|
||||
|
||||
def test_empty_comment_rejected(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
with pytest.raises(ValueError, match="body is required"):
|
||||
kb.add_comment(conn, t, "user", "")
|
||||
|
||||
|
||||
def test_events_capture_lifecycle(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
kb.complete_task(conn, t, result="ok")
|
||||
events = kb.list_events(conn, t)
|
||||
kinds = [e.kind for e in events]
|
||||
assert "created" in kinds
|
||||
assert "claimed" in kinds
|
||||
assert "completed" in kinds
|
||||
|
||||
|
||||
def test_worker_context_includes_parent_results_and_comments(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="p")
|
||||
kb.complete_task(conn, p, result="PARENT_RESULT_MARKER")
|
||||
c = kb.create_task(conn, title="child", parents=[p])
|
||||
kb.add_comment(conn, c, "user", "CLARIFICATION_MARKER")
|
||||
ctx = kb.build_worker_context(conn, c)
|
||||
assert "PARENT_RESULT_MARKER" in ctx
|
||||
assert "CLARIFICATION_MARKER" in ctx
|
||||
assert c in ctx
|
||||
assert "child" in ctx
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_dispatch_dry_run_does_not_claim(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t1 = kb.create_task(conn, title="a", assignee="alice")
|
||||
t2 = kb.create_task(conn, title="b", assignee="bob")
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert {s[0] for s in res.spawned} == {t1, t2}
|
||||
with kb.connect() as conn:
|
||||
# Dry run must NOT mutate status.
|
||||
assert kb.get_task(conn, t1).status == "ready"
|
||||
assert kb.get_task(conn, t2).status == "ready"
|
||||
|
||||
|
||||
def test_dispatch_skips_unassigned(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="floater")
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert t in res.skipped_unassigned
|
||||
assert not res.spawned
|
||||
|
||||
|
||||
def test_dispatch_promotes_ready_and_spawns(kanban_home):
|
||||
spawns = []
|
||||
|
||||
def fake_spawn(task, workspace):
|
||||
spawns.append((task.id, task.assignee, workspace))
|
||||
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="p", assignee="alice")
|
||||
c = kb.create_task(conn, title="c", assignee="bob", parents=[p])
|
||||
# Finish parent outside dispatch; promotion happens inside.
|
||||
kb.complete_task(conn, p)
|
||||
res = kb.dispatch_once(conn, spawn_fn=fake_spawn)
|
||||
# Spawned c (a was already done when dispatch was called).
|
||||
assert len(spawns) == 1
|
||||
assert spawns[0][0] == c
|
||||
assert spawns[0][1] == "bob"
|
||||
# c is now running
|
||||
with kb.connect() as conn:
|
||||
assert kb.get_task(conn, c).status == "running"
|
||||
|
||||
|
||||
def test_dispatch_spawn_failure_releases_claim(kanban_home):
|
||||
def boom(task, workspace):
|
||||
raise RuntimeError("spawn failed")
|
||||
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="boom", assignee="alice")
|
||||
kb.dispatch_once(conn, spawn_fn=boom)
|
||||
# Must return to ready so the next tick can retry.
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
assert kb.get_task(conn, t).claim_lock is None
|
||||
|
||||
|
||||
def test_dispatch_reclaims_stale_before_spawning(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="alice")
|
||||
kb.claim_task(conn, t)
|
||||
conn.execute(
|
||||
"UPDATE tasks SET claim_expires = ? WHERE id = ?",
|
||||
(int(time.time()) - 1, t),
|
||||
)
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert res.reclaimed == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_scratch_workspace_created_under_hermes_home(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
assert ws.exists()
|
||||
assert ws.is_dir()
|
||||
assert "kanban" in str(ws)
|
||||
|
||||
|
||||
def test_dir_workspace_honors_given_path(kanban_home, tmp_path):
|
||||
target = tmp_path / "my-vault"
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(
|
||||
conn, title="biz", workspace_kind="dir", workspace_path=str(target)
|
||||
)
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
assert ws == target
|
||||
assert ws.exists()
|
||||
|
||||
|
||||
def test_worktree_workspace_returns_intended_path(kanban_home, tmp_path):
|
||||
target = str(tmp_path / ".worktrees" / "my-task")
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(
|
||||
conn, title="ship", workspace_kind="worktree", workspace_path=target
|
||||
)
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
# We do NOT auto-create worktrees; the worker's skill handles that.
|
||||
assert str(ws) == target
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tenancy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_tenant_column_filters_listings(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
kb.create_task(conn, title="a1", tenant="biz-a")
|
||||
kb.create_task(conn, title="b1", tenant="biz-b")
|
||||
kb.create_task(conn, title="shared") # no tenant
|
||||
biz_a = kb.list_tasks(conn, tenant="biz-a")
|
||||
biz_b = kb.list_tasks(conn, tenant="biz-b")
|
||||
assert [t.title for t in biz_a] == ["a1"]
|
||||
assert [t.title for t in biz_b] == ["b1"]
|
||||
|
||||
|
||||
def test_tenant_propagates_to_events(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="tenant-task", tenant="biz-a")
|
||||
events = kb.list_events(conn, t)
|
||||
# The "created" event should have tenant in its payload.
|
||||
created = [e for e in events if e.kind == "created"]
|
||||
assert created and created[0].payload.get("tenant") == "biz-a"
|
||||
@@ -56,7 +56,7 @@ def three_source_env(monkeypatch, hub_env):
|
||||
import tools.skills_tool as skills_tool
|
||||
|
||||
monkeypatch.setattr(hub, "HubLockFile", lambda: _DummyLockFile([_HUB_ENTRY]))
|
||||
monkeypatch.setattr(skills_tool, "_find_all_skills", lambda **_kwargs: list(_ALL_THREE_SKILLS))
|
||||
monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: list(_ALL_THREE_SKILLS))
|
||||
monkeypatch.setattr(skills_sync, "_read_manifest", lambda: dict(_BUILTIN_MANIFEST))
|
||||
|
||||
return hub_env
|
||||
@@ -107,7 +107,7 @@ def test_do_list_initializes_hub_dir(monkeypatch, hub_env):
|
||||
import tools.skills_sync as skills_sync
|
||||
import tools.skills_tool as skills_tool
|
||||
|
||||
monkeypatch.setattr(skills_tool, "_find_all_skills", lambda **_kwargs: [])
|
||||
monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: [])
|
||||
monkeypatch.setattr(skills_sync, "_read_manifest", lambda: {})
|
||||
|
||||
hub_dir = hub_env
|
||||
@@ -154,74 +154,6 @@ def test_do_list_filter_builtin(three_source_env):
|
||||
assert "local-skill" not in output
|
||||
|
||||
|
||||
def test_do_list_renders_status_column(three_source_env, monkeypatch):
|
||||
"""Every list row should carry an enabled/disabled status (new in PR that
|
||||
answered Mr Mochizuki's 'I just want to see what's live' question)."""
|
||||
from agent import skill_utils
|
||||
|
||||
monkeypatch.setattr(skill_utils, "get_disabled_skill_names", lambda platform=None: set())
|
||||
output = _capture()
|
||||
|
||||
assert "Status" in output
|
||||
assert "enabled" in output.lower()
|
||||
# Summary counts enabled skills.
|
||||
assert "3 enabled, 0 disabled" in output
|
||||
|
||||
|
||||
def test_do_list_marks_disabled_skills(three_source_env, monkeypatch):
|
||||
from agent import skill_utils
|
||||
|
||||
# Simulate `skills.disabled: [hub-skill]` in config.
|
||||
monkeypatch.setattr(
|
||||
skill_utils, "get_disabled_skill_names",
|
||||
lambda platform=None: {"hub-skill"},
|
||||
)
|
||||
output = _capture()
|
||||
|
||||
# Row still appears (no --enabled-only), but marked disabled
|
||||
assert "hub-skill" in output
|
||||
assert "disabled" in output.lower()
|
||||
assert "2 enabled, 1 disabled" in output
|
||||
|
||||
|
||||
def test_do_list_enabled_only_hides_disabled(three_source_env, monkeypatch):
|
||||
from agent import skill_utils
|
||||
|
||||
monkeypatch.setattr(
|
||||
skill_utils, "get_disabled_skill_names",
|
||||
lambda platform=None: {"hub-skill"},
|
||||
)
|
||||
sink = StringIO()
|
||||
console = Console(file=sink, force_terminal=False, color_system=None)
|
||||
do_list(enabled_only=True, console=console)
|
||||
output = sink.getvalue()
|
||||
|
||||
assert "hub-skill" not in output
|
||||
assert "builtin-skill" in output
|
||||
assert "local-skill" in output
|
||||
assert "enabled only" in output.lower()
|
||||
assert "2 enabled shown" in output
|
||||
|
||||
|
||||
def test_do_list_platform_env_is_ignored(three_source_env, monkeypatch):
|
||||
"""`hermes skills list` reads the active profile's config via
|
||||
HERMES_HOME (swapped by -p), so it must NOT pass a platform arg to
|
||||
``get_disabled_skill_names`` — otherwise per-platform overrides
|
||||
would silently leak in from HERMES_PLATFORM env."""
|
||||
from agent import skill_utils
|
||||
|
||||
seen = {}
|
||||
|
||||
def _fake(platform=None):
|
||||
seen["platform"] = platform
|
||||
return set()
|
||||
|
||||
monkeypatch.setattr(skill_utils, "get_disabled_skill_names", _fake)
|
||||
_capture()
|
||||
|
||||
assert seen["platform"] is None
|
||||
|
||||
|
||||
def test_do_check_reports_available_updates(monkeypatch):
|
||||
output = _capture_check(monkeypatch, [
|
||||
{"name": "hub-skill", "source": "skills.sh", "status": "update_available"},
|
||||
|
||||
@@ -0,0 +1,822 @@
|
||||
"""Tests for the Kanban dashboard plugin backend (plugins/kanban/dashboard/plugin_api.py).
|
||||
|
||||
The plugin mounts as /api/plugins/kanban/ inside the dashboard's FastAPI app,
|
||||
but here we attach its router to a bare FastAPI instance so we can test the
|
||||
REST surface without spinning up the whole dashboard.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_plugin_router():
|
||||
"""Dynamically load plugins/kanban/dashboard/plugin_api.py and return its router."""
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
plugin_file = repo_root / "plugins" / "kanban" / "dashboard" / "plugin_api.py"
|
||||
assert plugin_file.exists(), f"plugin file missing: {plugin_file}"
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"hermes_dashboard_plugin_kanban_test", plugin_file,
|
||||
)
|
||||
assert spec is not None and spec.loader is not None
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[spec.name] = mod
|
||||
spec.loader.exec_module(mod)
|
||||
return mod.router
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with an empty kanban DB."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(kanban_home):
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /board on an empty DB
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_empty(client):
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# All canonical columns present (triage + the rest), each empty.
|
||||
names = [c["name"] for c in data["columns"]]
|
||||
for expected in ("triage", "todo", "ready", "running", "blocked", "done"):
|
||||
assert expected in names, f"missing column {expected}: {names}"
|
||||
assert all(len(c["tasks"]) == 0 for c in data["columns"])
|
||||
assert data["tenants"] == []
|
||||
assert data["assignees"] == []
|
||||
assert data["latest_event_id"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /tasks then GET /board sees it
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_create_task_appears_on_board(client):
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={
|
||||
"title": "Research LLM caching",
|
||||
"assignee": "researcher",
|
||||
"priority": 3,
|
||||
"tenant": "acme",
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
assert task["title"] == "Research LLM caching"
|
||||
assert task["assignee"] == "researcher"
|
||||
assert task["status"] == "ready" # no parents -> immediately ready
|
||||
assert task["priority"] == 3
|
||||
assert task["tenant"] == "acme"
|
||||
task_id = task["id"]
|
||||
|
||||
# Board now lists it under 'ready'.
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
ready = next(c for c in data["columns"] if c["name"] == "ready")
|
||||
assert len(ready["tasks"]) == 1
|
||||
assert ready["tasks"][0]["id"] == task_id
|
||||
assert "acme" in data["tenants"]
|
||||
assert "researcher" in data["assignees"]
|
||||
|
||||
|
||||
def test_tenant_filter(client):
|
||||
client.post("/api/plugins/kanban/tasks", json={"title": "A", "tenant": "t1"})
|
||||
client.post("/api/plugins/kanban/tasks", json={"title": "B", "tenant": "t2"})
|
||||
|
||||
r = client.get("/api/plugins/kanban/board?tenant=t1")
|
||||
counts = {c["name"]: len(c["tasks"]) for c in r.json()["columns"]}
|
||||
total = sum(counts.values())
|
||||
assert total == 1
|
||||
|
||||
r = client.get("/api/plugins/kanban/board?tenant=t2")
|
||||
total = sum(len(c["tasks"]) for c in r.json()["columns"])
|
||||
assert total == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /tasks/:id returns body + comments + events + links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_task_detail_includes_links_and_events(client):
|
||||
parent = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "parent"},
|
||||
).json()["task"]
|
||||
child = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "child", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
assert child["status"] == "todo" # parent not done yet
|
||||
|
||||
# Detail for the child shows the parent link.
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{child['id']}")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert data["task"]["id"] == child["id"]
|
||||
assert parent["id"] in data["links"]["parents"]
|
||||
|
||||
# Detail for the parent shows the child.
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{parent['id']}")
|
||||
assert child["id"] in r.json()["links"]["children"]
|
||||
|
||||
# Events exist from creation.
|
||||
assert len(data["events"]) >= 1
|
||||
|
||||
|
||||
def test_task_detail_404_on_unknown(client):
|
||||
r = client.get("/api/plugins/kanban/tasks/does-not-exist")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PATCH /tasks/:id — status transitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_patch_status_complete(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "done", "result": "shipped"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "done"
|
||||
|
||||
# Board reflects the move.
|
||||
done = next(
|
||||
c for c in client.get("/api/plugins/kanban/board").json()["columns"]
|
||||
if c["name"] == "done"
|
||||
)
|
||||
assert any(x["id"] == t["id"] for x in done["tasks"])
|
||||
|
||||
|
||||
def test_patch_block_then_unblock(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "blocked", "block_reason": "need input"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "blocked"
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "ready"
|
||||
|
||||
|
||||
def test_patch_drag_drop_move_todo_to_ready(client):
|
||||
"""Direct status write: the drag-drop path for statuses without a
|
||||
dedicated verb (e.g. manually promoting todo -> ready)."""
|
||||
parent = client.post("/api/plugins/kanban/tasks", json={"title": "p"}).json()["task"]
|
||||
child = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "c", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
assert child["status"] == "todo"
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{child['id']}",
|
||||
json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "ready"
|
||||
|
||||
|
||||
def test_patch_reassign(client):
|
||||
t = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "x", "assignee": "a"},
|
||||
).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"assignee": "b"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["assignee"] == "b"
|
||||
|
||||
|
||||
def test_patch_priority_and_edit(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"priority": 5, "title": "renamed"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()["task"]
|
||||
assert data["priority"] == 5
|
||||
assert data["title"] == "renamed"
|
||||
|
||||
|
||||
def test_patch_invalid_status(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "banana"},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments + Links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_add_comment(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}/comments",
|
||||
json={"body": "how's progress?", "author": "teknium"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{t['id']}")
|
||||
comments = r.json()["comments"]
|
||||
assert len(comments) == 1
|
||||
assert comments[0]["body"] == "how's progress?"
|
||||
assert comments[0]["author"] == "teknium"
|
||||
|
||||
|
||||
def test_add_comment_empty_rejected(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}/comments",
|
||||
json={"body": " "},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
def test_add_link_and_delete_link(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{b['id']}")
|
||||
assert a["id"] in r.json()["links"]["parents"]
|
||||
|
||||
r = client.delete(
|
||||
"/api/plugins/kanban/links",
|
||||
params={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["ok"] is True
|
||||
|
||||
|
||||
def test_add_link_cycle_rejected(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": b["id"], "child_id": a["id"]},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch nudge
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dispatch_dry_run(client):
|
||||
client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "work", "assignee": "researcher"},
|
||||
)
|
||||
r = client.post("/api/plugins/kanban/dispatch?dry_run=true&max=4")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
# DispatchResult is serialized as a dataclass dict.
|
||||
assert isinstance(body, dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Triage column (new v1 status)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_create_triage_lands_in_triage_column(client):
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "rough idea, spec me", "triage": True},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
task = r.json()["task"]
|
||||
assert task["status"] == "triage"
|
||||
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
triage = next(c for c in r.json()["columns"] if c["name"] == "triage")
|
||||
assert len(triage["tasks"]) == 1
|
||||
assert triage["tasks"][0]["title"] == "rough idea, spec me"
|
||||
|
||||
|
||||
def test_triage_task_not_promoted_to_ready(client):
|
||||
"""Triage tasks must stay in triage even when they have no parents."""
|
||||
client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "must stay put", "triage": True},
|
||||
)
|
||||
# Run the dispatcher — it should NOT promote the triage task.
|
||||
client.post("/api/plugins/kanban/dispatch?dry_run=false&max=4")
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
triage = next(c for c in r.json()["columns"] if c["name"] == "triage")
|
||||
ready = next(c for c in r.json()["columns"] if c["name"] == "ready")
|
||||
assert len(triage["tasks"]) == 1
|
||||
assert len(ready["tasks"]) == 0
|
||||
|
||||
|
||||
def test_patch_status_triage_works(client):
|
||||
"""A user (or specifier) can push a task back into triage, and out of it."""
|
||||
t = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "x"},
|
||||
).json()["task"]
|
||||
# Normal creation is 'ready'; push to triage.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "triage"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "triage"
|
||||
|
||||
# Now promote to todo.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "todo"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "todo"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Progress rollup (done children / total children)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_progress_rollup(client):
|
||||
parent = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "parent"},
|
||||
).json()["task"]
|
||||
child_a = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
child_b = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "b", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
# Children start as "todo" because the parent isn't done yet; promote
|
||||
# them to "ready" so complete_task will accept the transition.
|
||||
for cid in (child_a["id"], child_b["id"]):
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{cid}", json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
# 0/2 done.
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
parent_row = next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == parent["id"]
|
||||
)
|
||||
assert parent_row["progress"] == {"done": 0, "total": 2}
|
||||
|
||||
# Complete one child. 1/2.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{child_a['id']}",
|
||||
json={"status": "done"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
parent_row = next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == parent["id"]
|
||||
)
|
||||
assert parent_row["progress"] == {"done": 1, "total": 2}
|
||||
|
||||
# Childless tasks report progress=None, not {0/0}.
|
||||
assert next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == child_b["id"]
|
||||
)["progress"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auto-init on first board read
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_auto_initializes_missing_db(tmp_path, monkeypatch):
|
||||
"""If kanban.db doesn't exist yet, GET /board must create it, not 500."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
# Deliberately DO NOT call kb.init_db().
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
c = TestClient(app)
|
||||
r = c.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
assert (home / "kanban.db").exists(), "init_db wasn't invoked by /board"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebSocket auth (query-param token)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_ws_events_rejects_when_token_required(tmp_path, monkeypatch):
|
||||
"""When _SESSION_TOKEN is set (normal dashboard context), a missing or
|
||||
wrong ?token= query param must be rejected with policy-violation."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
|
||||
# Stub web_server so _check_ws_token has a token to compare against.
|
||||
import types
|
||||
stub = types.SimpleNamespace(_SESSION_TOKEN="secret-xyz")
|
||||
monkeypatch.setitem(sys.modules, "hermes_cli.web_server", stub)
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
c = TestClient(app)
|
||||
|
||||
# No token → policy violation close.
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with c.websocket_connect("/api/plugins/kanban/events"):
|
||||
pass
|
||||
assert exc.value.code == 1008
|
||||
|
||||
# Wrong token → policy violation close.
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with c.websocket_connect("/api/plugins/kanban/events?token=nope"):
|
||||
pass
|
||||
assert exc.value.code == 1008
|
||||
|
||||
# Correct token → accepted (connect then close cleanly from our side).
|
||||
with c.websocket_connect(
|
||||
"/api/plugins/kanban/events?token=secret-xyz"
|
||||
) as ws:
|
||||
assert ws is not None # handshake succeeded
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk actions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_bulk_status_ready(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"]
|
||||
# Parent-less tasks land in "ready" already; push them to blocked first.
|
||||
for tid in (a["id"], b["id"], c2["id"]):
|
||||
client.patch(f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "blocked", "block_reason": "wait"})
|
||||
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"], c2["id"]], "status": "ready"})
|
||||
assert r.status_code == 200
|
||||
results = r.json()["results"]
|
||||
assert all(r["ok"] for r in results)
|
||||
# All three are now ready.
|
||||
board = client.get("/api/plugins/kanban/board").json()
|
||||
ready = next(col for col in board["columns"] if col["name"] == "ready")
|
||||
ids = {t["id"] for t in ready["tasks"]}
|
||||
assert {a["id"], b["id"], c2["id"]}.issubset(ids)
|
||||
|
||||
|
||||
def test_bulk_archive(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"]], "archive": True})
|
||||
assert r.status_code == 200
|
||||
assert all(r["ok"] for r in r.json()["results"])
|
||||
# Default board (archived hidden) — both gone.
|
||||
board = client.get("/api/plugins/kanban/board").json()
|
||||
ids = {t["id"] for col in board["columns"] for t in col["tasks"]}
|
||||
assert a["id"] not in ids
|
||||
assert b["id"] not in ids
|
||||
|
||||
|
||||
def test_bulk_reassign(client):
|
||||
a = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "assignee": "old"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "b", "assignee": "old"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"]], "assignee": "new"})
|
||||
assert r.status_code == 200
|
||||
for tid in (a["id"], b["id"]):
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"]
|
||||
assert t["assignee"] == "new"
|
||||
|
||||
|
||||
def test_bulk_unassign_via_empty_string(client):
|
||||
a = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "assignee": "x"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"]], "assignee": ""})
|
||||
assert r.status_code == 200
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{a['id']}").json()["task"]
|
||||
assert t["assignee"] is None
|
||||
|
||||
|
||||
def test_bulk_partial_failure_doesnt_abort_siblings(client):
|
||||
"""One bad id in the middle of a batch must not prevent others from
|
||||
applying."""
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], "bogus-id", c2["id"]], "priority": 7})
|
||||
assert r.status_code == 200
|
||||
results = r.json()["results"]
|
||||
assert len(results) == 3
|
||||
ok_ids = {r["id"] for r in results if r["ok"]}
|
||||
assert a["id"] in ok_ids
|
||||
assert c2["id"] in ok_ids
|
||||
assert any(not r["ok"] and r["id"] == "bogus-id" for r in results)
|
||||
# Good siblings actually got the priority bump.
|
||||
for tid in (a["id"], c2["id"]):
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"]
|
||||
assert t["priority"] == 7
|
||||
|
||||
|
||||
def test_bulk_empty_ids_400(client):
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk", json={"ids": []})
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /config endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_config_returns_defaults_when_section_missing(client):
|
||||
r = client.get("/api/plugins/kanban/config")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# Defaults when dashboard.kanban is missing.
|
||||
assert data["default_tenant"] == ""
|
||||
assert data["lane_by_profile"] is True
|
||||
assert data["include_archived_by_default"] is False
|
||||
assert data["render_markdown"] is True
|
||||
|
||||
|
||||
def test_config_reads_dashboard_kanban_section(tmp_path, monkeypatch, client):
|
||||
home = Path(os.environ["HERMES_HOME"])
|
||||
(home / "config.yaml").write_text(
|
||||
"dashboard:\n"
|
||||
" kanban:\n"
|
||||
" default_tenant: acme\n"
|
||||
" lane_by_profile: false\n"
|
||||
" include_archived_by_default: true\n"
|
||||
" render_markdown: false\n"
|
||||
)
|
||||
r = client.get("/api/plugins/kanban/config")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert data["default_tenant"] == "acme"
|
||||
assert data["lane_by_profile"] is False
|
||||
assert data["include_archived_by_default"] is True
|
||||
assert data["render_markdown"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Runs surfacing (vulcan-artivus RFC feedback)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_task_detail_includes_runs(client):
|
||||
"""GET /tasks/:id carries a runs[] array with the attempt history."""
|
||||
r = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "port x", "assignee": "worker"}).json()
|
||||
tid = r["task"]["id"]
|
||||
|
||||
# Drive status running to force a run creation: PATCH to running
|
||||
# doesn't call claim_task (the PATCH path uses _set_status_direct),
|
||||
# so use the bulk/claim indirection via the kernel.
|
||||
import hermes_cli.kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.claim_task(conn, tid)
|
||||
_kb.complete_task(
|
||||
conn, tid,
|
||||
result="done",
|
||||
summary="tested on rate limiter",
|
||||
metadata={"changed_files": ["limiter.py"]},
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
d = client.get(f"/api/plugins/kanban/tasks/{tid}").json()
|
||||
assert "runs" in d
|
||||
assert len(d["runs"]) == 1
|
||||
run = d["runs"][0]
|
||||
assert run["outcome"] == "completed"
|
||||
assert run["profile"] == "worker"
|
||||
assert run["summary"] == "tested on rate limiter"
|
||||
assert run["metadata"] == {"changed_files": ["limiter.py"]}
|
||||
assert run["ended_at"] is not None
|
||||
|
||||
|
||||
def test_task_detail_runs_empty_before_claim(client):
|
||||
"""A task that's never been claimed has an empty runs[] list, not
|
||||
a missing key."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "fresh"}).json()
|
||||
d = client.get(f"/api/plugins/kanban/tasks/{r['task']['id']}").json()
|
||||
assert d["runs"] == []
|
||||
|
||||
|
||||
def test_patch_status_done_with_summary_and_metadata(client):
|
||||
"""PATCH /tasks/:id with status=done + summary + metadata must
|
||||
reach complete_task, so the dashboard has CLI parity."""
|
||||
# Create + claim.
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "x", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={
|
||||
"status": "done",
|
||||
"summary": "shipped the thing",
|
||||
"metadata": {"changed_files": ["a.py", "b.py"], "tests_run": 7},
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
|
||||
# The run must have the summary + metadata attached.
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run = kb.latest_run(conn, tid)
|
||||
assert run.outcome == "completed"
|
||||
assert run.summary == "shipped the thing"
|
||||
assert run.metadata == {"changed_files": ["a.py", "b.py"], "tests_run": 7}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_patch_status_done_without_summary_still_works(client):
|
||||
"""Back-compat: PATCH without the new fields still completes."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "y", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "done", "result": "legacy shape"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run = kb.latest_run(conn, tid)
|
||||
assert run.outcome == "completed"
|
||||
assert run.summary == "legacy shape" # falls back to result
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_patch_status_archive_closes_running_run(client):
|
||||
"""PATCH to archived while running must close the in-flight run."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "z", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
open_run = kb.latest_run(conn, tid)
|
||||
assert open_run.ended_at is None
|
||||
finally:
|
||||
conn.close()
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "archived"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
conn = kb.connect()
|
||||
try:
|
||||
task = kb.get_task(conn, tid)
|
||||
assert task.status == "archived"
|
||||
assert task.current_run_id is None
|
||||
assert kb.latest_run(conn, tid).outcome == "reclaimed"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_event_dict_includes_run_id(client):
|
||||
"""GET /tasks/:id returns events with run_id populated."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "e", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
run_id = kb.latest_run(conn, tid).id
|
||||
kb.complete_task(conn, tid, summary="wss")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{tid}")
|
||||
assert r.status_code == 200
|
||||
events = r.json()["events"]
|
||||
# Every event in the response must have a run_id key (None or int).
|
||||
for e in events:
|
||||
assert "run_id" in e, f"missing run_id in event: {e}"
|
||||
# completed event must have the actual run_id.
|
||||
comp = [e for e in events if e["kind"] == "completed"]
|
||||
assert comp[0]["run_id"] == run_id
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-task force-loaded skills via REST
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_create_task_with_skills_roundtrips(client):
|
||||
"""POST /tasks accepts `skills: [...]`, GET /tasks/:id returns it."""
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={
|
||||
"title": "translate docs",
|
||||
"assignee": "linguist",
|
||||
"skills": ["translation", "github-code-review"],
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
assert task["skills"] == ["translation", "github-code-review"]
|
||||
|
||||
# Fetch via GET /tasks/:id as the drawer does.
|
||||
got = client.get(f"/api/plugins/kanban/tasks/{task['id']}").json()
|
||||
assert got["task"]["skills"] == ["translation", "github-code-review"]
|
||||
|
||||
|
||||
def test_create_task_without_skills_defaults_to_empty_list(client):
|
||||
"""_task_dict serializes Task.skills=None as [] so the drawer can
|
||||
always .length check without guarding against null."""
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "no skills", "assignee": "x"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
# Task.skills is None in-memory; _task_dict serializes via
|
||||
# dataclasses.asdict which keeps it None. The drawer's
|
||||
# `t.skills && t.skills.length > 0` guard handles both null and [].
|
||||
assert task.get("skills") in (None, [])
|
||||
@@ -1,73 +0,0 @@
|
||||
"""Regression tests for background review agent cleanup."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import run_agent as run_agent_module
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def _bare_agent() -> AIAgent:
|
||||
agent = object.__new__(AIAgent)
|
||||
agent.model = "fake-model"
|
||||
agent.platform = "telegram"
|
||||
agent.provider = "openai"
|
||||
agent.base_url = ""
|
||||
agent.api_key = ""
|
||||
agent.api_mode = ""
|
||||
agent.session_id = "test-session"
|
||||
agent._parent_session_id = ""
|
||||
agent._credential_pool = None
|
||||
agent._memory_store = object()
|
||||
agent._memory_enabled = True
|
||||
agent._user_profile_enabled = False
|
||||
agent._MEMORY_REVIEW_PROMPT = "review memory"
|
||||
agent._SKILL_REVIEW_PROMPT = "review skills"
|
||||
agent._COMBINED_REVIEW_PROMPT = "review both"
|
||||
agent.background_review_callback = None
|
||||
agent.status_callback = None
|
||||
agent._safe_print = lambda *_args, **_kwargs: None
|
||||
return agent
|
||||
|
||||
|
||||
class ImmediateThread:
|
||||
def __init__(self, *, target, daemon=None, name=None):
|
||||
self._target = target
|
||||
|
||||
def start(self):
|
||||
self._target()
|
||||
|
||||
|
||||
def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
|
||||
events = []
|
||||
|
||||
class FakeReviewAgent:
|
||||
def __init__(self, **kwargs):
|
||||
events.append(("init", kwargs))
|
||||
self._session_messages = []
|
||||
|
||||
def run_conversation(self, **kwargs):
|
||||
events.append(("run_conversation", kwargs))
|
||||
|
||||
def shutdown_memory_provider(self):
|
||||
events.append(("shutdown_memory_provider", None))
|
||||
|
||||
def close(self):
|
||||
events.append(("close", None))
|
||||
|
||||
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
||||
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
||||
|
||||
agent = _bare_agent()
|
||||
|
||||
AIAgent._spawn_background_review(
|
||||
agent,
|
||||
messages_snapshot=[{"role": "user", "content": "hello"}],
|
||||
review_memory=True,
|
||||
)
|
||||
|
||||
assert [name for name, _payload in events] == [
|
||||
"init",
|
||||
"run_conversation",
|
||||
"shutdown_memory_provider",
|
||||
"close",
|
||||
]
|
||||
@@ -0,0 +1,41 @@
|
||||
# Stress / battle-test suite
|
||||
|
||||
Long-running tests that exercise the Kanban kernel under adversarial
|
||||
conditions. **Not run by `scripts/run_tests.sh`** because they can
|
||||
take 30+ seconds each and spawn real subprocesses.
|
||||
|
||||
Run manually:
|
||||
|
||||
```bash
|
||||
./venv/bin/python -m pytest tests/stress/ -v -s
|
||||
# or individual files:
|
||||
./venv/bin/python tests/stress/test_concurrency.py
|
||||
./venv/bin/python tests/stress/test_subprocess_e2e.py
|
||||
./venv/bin/python tests/stress/test_property_fuzzing.py
|
||||
./venv/bin/python tests/stress/test_benchmarks.py
|
||||
```
|
||||
|
||||
## What's covered
|
||||
|
||||
- **test_concurrency.py** — 5 workers, 100 tasks, race-for-claim. Asserts
|
||||
no double-claims, no orphan runs, no SQLite errors escape retry.
|
||||
- **test_concurrency_mixed.py** — 10 workers + 1 reclaimer, 500 tasks,
|
||||
random ops (claim/complete/block/unblock/archive). Same invariants
|
||||
under adversarial scheduling.
|
||||
- **test_concurrency_reclaim_race.py** — TTL < work duration so the
|
||||
reclaimer intentionally yanks tasks mid-work; verifies the worker's
|
||||
late-complete is refused cleanly (CAS guard works).
|
||||
- **test_subprocess_e2e.py** — dispatcher spawns real Python subprocess
|
||||
workers that heartbeat + complete via the CLI; crash detection
|
||||
against a real dead PID.
|
||||
- **test_property_fuzzing.py** — 500 random operation sequences,
|
||||
~40k operations total, 9 invariant checks after each step.
|
||||
- **test_atypical_scenarios.py** — 28 scenarios covering atypical
|
||||
user inputs: unicode/emoji/RTL, 1 MB strings, SQL injection
|
||||
attempts, cycles, self-parents, wide fan-in/out, clock skew,
|
||||
HERMES_HOME with spaces/unicode/symlinks, 1000 runs on one
|
||||
task, idempotency-key race across processes, terminal-state
|
||||
resurrection attempts, dashboard REST with weird JSON.
|
||||
- **test_benchmarks.py** — latency at 100/1k/10k tasks for dispatch,
|
||||
recompute_ready, list_tasks, build_worker_context, etc. Results saved
|
||||
to JSON for regression diffing.
|
||||
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fake worker process that exercises the real subprocess contract.
|
||||
|
||||
Reads HERMES_KANBAN_TASK from env, heartbeats periodically, does short
|
||||
work, completes via the CLI. Designed to be spawned by the dispatcher
|
||||
exactly the way `hermes chat -q` would be, minus the LLM cost.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
def main():
|
||||
tid = os.environ["HERMES_KANBAN_TASK"]
|
||||
workspace = os.environ.get("HERMES_KANBAN_WORKSPACE", "")
|
||||
|
||||
# Announce via CLI (goes through real argparse + init_db + etc)
|
||||
subprocess.run(
|
||||
["hermes", "kanban", "heartbeat", tid, "--note", "started"],
|
||||
check=True, capture_output=True,
|
||||
)
|
||||
|
||||
# Simulate work with periodic heartbeats
|
||||
for i in range(3):
|
||||
time.sleep(0.3)
|
||||
subprocess.run(
|
||||
["hermes", "kanban", "heartbeat", tid, "--note", f"progress {i+1}/3"],
|
||||
check=True, capture_output=True,
|
||||
)
|
||||
|
||||
# Complete with structured handoff
|
||||
subprocess.run(
|
||||
[
|
||||
"hermes", "kanban", "complete", tid,
|
||||
"--summary", f"real-subprocess worker finished {tid}",
|
||||
"--metadata", json.dumps({
|
||||
"workspace": workspace,
|
||||
"worker_pid": os.getpid(),
|
||||
"iterations": 3,
|
||||
}),
|
||||
],
|
||||
check=True, capture_output=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,37 @@
|
||||
"""pytest config for the stress/ subdirectory.
|
||||
|
||||
These tests are slow (30s+), spawn subprocesses, and are not run by
|
||||
default. Enable via `pytest --run-stress` or by running the scripts
|
||||
directly.
|
||||
|
||||
The scripts are primarily __main__-executable entry points; pytest
|
||||
isn't expected to collect individual test functions from them.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
if config.getoption("--run-stress", default=False):
|
||||
return
|
||||
skip_stress = pytest.mark.skip(
|
||||
reason="stress test (opt-in via --run-stress or run script directly)"
|
||||
)
|
||||
for item in items:
|
||||
if "tests/stress" in str(item.fspath):
|
||||
item.add_marker(skip_stress)
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--run-stress",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Run the stress/battle-test suite (slow, spawns subprocesses).",
|
||||
)
|
||||
|
||||
|
||||
collect_ignore_glob = [
|
||||
# The stress scripts have top-level code and hard-coded paths; they're
|
||||
# meant to run as `python tests/stress/<name>.py`, not as pytest modules.
|
||||
"*.py",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,221 @@
|
||||
"""Scale benchmarks for the Kanban kernel.
|
||||
|
||||
Measures:
|
||||
- dispatch_once latency at 100, 1000, 10000 tasks
|
||||
- recompute_ready latency at 100, 1000, 10000 todo tasks with wide parent graphs
|
||||
- build_worker_context latency with 1, 10, 50 parent dependencies
|
||||
- board list/stats query latency
|
||||
- task_runs query latency at scale
|
||||
|
||||
Results printed as a table. Saved to JSON for regression-diffing in CI
|
||||
or future reviews. Not a pass/fail test — records numbers so we know
|
||||
when a change regresses latency by 10x and can decide whether to care.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
|
||||
def bench(label, fn, iterations=5):
|
||||
"""Time fn over `iterations` runs, return (min, median, max) in ms."""
|
||||
times = []
|
||||
for _ in range(iterations):
|
||||
t0 = time.perf_counter()
|
||||
fn()
|
||||
times.append((time.perf_counter() - t0) * 1000)
|
||||
times.sort()
|
||||
mn = times[0]
|
||||
md = times[len(times) // 2]
|
||||
mx = times[-1]
|
||||
return {"label": label, "iter": iterations, "min_ms": mn, "median_ms": md, "max_ms": mx}
|
||||
|
||||
|
||||
def seed_tasks(conn, kb, n, assignee="bench-worker", with_parents=False):
|
||||
"""Seed n tasks. Optionally give each task 5 parents."""
|
||||
ids = []
|
||||
for i in range(n):
|
||||
if with_parents and i >= 5:
|
||||
parents = random.sample(ids[:i], 5)
|
||||
else:
|
||||
parents = ()
|
||||
tid = kb.create_task(
|
||||
conn, title=f"bench {i}", assignee=assignee,
|
||||
tenant="bench", parents=parents,
|
||||
)
|
||||
ids.append(tid)
|
||||
return ids
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_bench_")
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
|
||||
results = []
|
||||
|
||||
# ============ dispatch_once latency ============
|
||||
for n in [100, 1000, 10000]:
|
||||
print(f"\n== dispatch_once @ {n} tasks ==")
|
||||
# Fresh DB each time so we're not measuring cumulative effects
|
||||
import shutil
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
seed_tasks(conn, kb, n, assignee=None) # no assignee → won't spawn
|
||||
r = bench(
|
||||
f"dispatch_once (n={n}, no spawn)",
|
||||
lambda: kb.dispatch_once(conn, spawn_fn=lambda *_: None),
|
||||
iterations=5,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["n"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ recompute_ready at scale with parent graphs ============
|
||||
for n in [100, 1000, 10000]:
|
||||
print(f"\n== recompute_ready @ {n} tasks (5 parents each) ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
ids = seed_tasks(conn, kb, n, assignee=None, with_parents=True)
|
||||
# Complete the first 100 so some todo tasks might get promoted
|
||||
for tid in ids[:min(100, n // 10)]:
|
||||
kb.complete_task(conn, tid, result="bench")
|
||||
r = bench(
|
||||
f"recompute_ready (n={n}, with parents)",
|
||||
lambda: kb.recompute_ready(conn),
|
||||
iterations=5,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["n"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ build_worker_context with N parents ============
|
||||
for parent_count in [1, 10, 50]:
|
||||
print(f"\n== build_worker_context with {parent_count} parents ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
# Create parents, complete them with summaries+metadata
|
||||
parent_ids = []
|
||||
for i in range(parent_count):
|
||||
pid = kb.create_task(conn, title=f"parent {i}", assignee="p")
|
||||
kb.claim_task(conn, pid)
|
||||
kb.complete_task(
|
||||
conn, pid,
|
||||
summary=f"parent {i} result that is longer than a single token "
|
||||
f"so we actually measure the IO",
|
||||
metadata={"files": [f"file_{j}.py" for j in range(5)], "i": i},
|
||||
)
|
||||
parent_ids.append(pid)
|
||||
child_id = kb.create_task(
|
||||
conn, title="child", assignee="c", parents=parent_ids,
|
||||
)
|
||||
r = bench(
|
||||
f"build_worker_context (parents={parent_count})",
|
||||
lambda: kb.build_worker_context(conn, child_id),
|
||||
iterations=10,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["parent_count"] = parent_count
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ list_tasks at scale ============
|
||||
for n in [100, 1000, 10000]:
|
||||
print(f"\n== list_tasks @ {n} ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
seed_tasks(conn, kb, n)
|
||||
r = bench(
|
||||
f"list_tasks (n={n})",
|
||||
lambda: kb.list_tasks(conn),
|
||||
iterations=5,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["n"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ board_stats at scale ============
|
||||
for n in [100, 1000, 10000]:
|
||||
print(f"\n== board_stats @ {n} ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
seed_tasks(conn, kb, n)
|
||||
r = bench(
|
||||
f"board_stats (n={n})",
|
||||
lambda: kb.board_stats(conn),
|
||||
iterations=5,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["n"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ list_runs at scale ============
|
||||
for n in [100, 1000]:
|
||||
print(f"\n== list_runs for task with {n} attempts ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
tid = kb.create_task(conn, title="x", assignee="w")
|
||||
# Create N attempts via claim/release
|
||||
for i in range(n):
|
||||
kb.claim_task(conn, tid, ttl_seconds=0)
|
||||
kb.release_stale_claims(conn)
|
||||
r = bench(
|
||||
f"list_runs (runs={n})",
|
||||
lambda: kb.list_runs(conn, tid),
|
||||
iterations=10,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["run_count"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ SUMMARY TABLE ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("SUMMARY")
|
||||
print("=" * 60)
|
||||
print(f"{'Benchmark':<50} {'min':>8} {'median':>8} {'max':>8}")
|
||||
for r in results:
|
||||
print(f"{r['label']:<50} {r['min_ms']:>7.1f}ms {r['median_ms']:>7.1f}ms {r['max_ms']:>7.1f}ms")
|
||||
|
||||
# Save for future diffing.
|
||||
out_path = "/tmp/kanban_bench_results.json"
|
||||
with open(out_path, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
print(f"\nResults saved to {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,302 @@
|
||||
"""Multi-process concurrency stress test for the Kanban kernel.
|
||||
|
||||
5 worker processes race for claims on a shared DB with 100 tasks. Each
|
||||
worker loops: claim -> simulate work -> complete. Asserts the invariants
|
||||
that make the system worth building:
|
||||
|
||||
- No task claimed by two workers simultaneously
|
||||
- No task completed twice
|
||||
- Every claim produces exactly one run row
|
||||
- Every completion closes exactly one run row
|
||||
- Zero SQLite locking errors that escape the retry layer
|
||||
- Total run count == total claim events == total completed events
|
||||
|
||||
This test is the primary justification for WAL + CAS-based claim. If it
|
||||
passes, the architecture holds. If it fails, we have a real bug to fix
|
||||
before anyone runs this in anger.
|
||||
"""
|
||||
|
||||
import json
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import random
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
NUM_WORKERS = 5
|
||||
NUM_TASKS = 100
|
||||
WORKER_TIMEOUT_S = 60
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
|
||||
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
|
||||
"""One worker's inner loop. Runs in a fresh Python process.
|
||||
|
||||
Tries to claim a ready task, marks it done with a per-worker summary,
|
||||
repeats until the ready pool is empty. Records every claim + complete
|
||||
into its own JSON result file for later aggregation.
|
||||
"""
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
empty_polls = 0
|
||||
start = time.monotonic()
|
||||
|
||||
while time.monotonic() - start < WORKER_TIMEOUT_S:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
# Find any ready task (non-deterministic order intentional — we
|
||||
# want workers to race on popular assignees).
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status = 'ready' "
|
||||
"AND claim_lock IS NULL LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
empty_polls += 1
|
||||
if empty_polls > 20:
|
||||
break # queue empty long enough, stop
|
||||
time.sleep(0.01)
|
||||
continue
|
||||
empty_polls = 0
|
||||
|
||||
tid = row["id"]
|
||||
try:
|
||||
claimed = kb.claim_task(
|
||||
conn, tid, claimer=f"worker-{worker_id}",
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err_on_claim", "task": tid, "err": str(e)})
|
||||
continue
|
||||
if claimed is None:
|
||||
# Someone else beat us — expected contention, not an error.
|
||||
events.append({"kind": "lost_claim_race", "task": tid})
|
||||
continue
|
||||
|
||||
run = kb.latest_run(conn, tid)
|
||||
events.append({
|
||||
"kind": "claimed",
|
||||
"task": tid,
|
||||
"worker": worker_id,
|
||||
"run_id": run.id,
|
||||
"t": time.monotonic() - start,
|
||||
})
|
||||
|
||||
# Simulate short, variable work
|
||||
time.sleep(random.uniform(0.001, 0.05))
|
||||
|
||||
try:
|
||||
kb.complete_task(
|
||||
conn, tid,
|
||||
result=f"done by worker-{worker_id}",
|
||||
summary=f"worker-{worker_id} finished task {tid}",
|
||||
metadata={"worker_id": worker_id, "run_id": run.id},
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err_on_complete", "task": tid, "err": str(e)})
|
||||
continue
|
||||
events.append({
|
||||
"kind": "completed",
|
||||
"task": tid,
|
||||
"worker": worker_id,
|
||||
"run_id": run.id,
|
||||
"t": time.monotonic() - start,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_concurrency_")
|
||||
print(f"HERMES_HOME = {home}")
|
||||
|
||||
# Seed.
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
tids = []
|
||||
for i in range(NUM_TASKS):
|
||||
tid = kb.create_task(
|
||||
conn, title=f"task #{i}", assignee="shared",
|
||||
tenant="concurrency-test",
|
||||
)
|
||||
tids.append(tid)
|
||||
conn.close()
|
||||
print(f"Seeded {NUM_TASKS} tasks.")
|
||||
|
||||
# Spawn workers.
|
||||
ctx = mp.get_context("spawn")
|
||||
result_files = [f"/tmp/concurrency_worker_{i}.json" for i in range(NUM_WORKERS)]
|
||||
procs = []
|
||||
start = time.monotonic()
|
||||
for i in range(NUM_WORKERS):
|
||||
p = ctx.Process(target=worker_loop, args=(i, home, result_files[i]))
|
||||
p.start()
|
||||
procs.append(p)
|
||||
|
||||
for p in procs:
|
||||
p.join(timeout=WORKER_TIMEOUT_S + 30)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
print(f"All workers done in {elapsed:.1f}s")
|
||||
|
||||
# Aggregate worker events.
|
||||
all_events = []
|
||||
for i, f in enumerate(result_files):
|
||||
if not os.path.isfile(f):
|
||||
print(f" WORKER {i} produced no result file — died?")
|
||||
continue
|
||||
with open(f) as fh:
|
||||
events = json.load(fh)
|
||||
all_events.extend(events)
|
||||
|
||||
# ============ INVARIANT CHECKS ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("INVARIANT CHECKS")
|
||||
print("=" * 60)
|
||||
|
||||
failures = []
|
||||
|
||||
# Check 1: no task claimed by two different workers
|
||||
claims_by_task = {}
|
||||
for e in all_events:
|
||||
if e["kind"] == "claimed":
|
||||
if e["task"] in claims_by_task:
|
||||
prev = claims_by_task[e["task"]]
|
||||
if prev["worker"] != e["worker"]:
|
||||
failures.append(
|
||||
f"DOUBLE CLAIM: task {e['task']} claimed by "
|
||||
f"worker {prev['worker']} AND worker {e['worker']}"
|
||||
)
|
||||
claims_by_task[e["task"]] = e
|
||||
|
||||
# Check 2: every completion has a matching claim from the same worker
|
||||
for e in all_events:
|
||||
if e["kind"] == "completed":
|
||||
prev_claim = claims_by_task.get(e["task"])
|
||||
if prev_claim is None:
|
||||
failures.append(f"COMPLETION WITHOUT CLAIM: task {e['task']}")
|
||||
elif prev_claim["worker"] != e["worker"]:
|
||||
failures.append(
|
||||
f"WORKER MISMATCH: task {e['task']} claimed by "
|
||||
f"{prev_claim['worker']} but completed by {e['worker']}"
|
||||
)
|
||||
|
||||
# Check 3: DB state — every task should be in 'done', no dangling claims
|
||||
conn = kb.connect()
|
||||
try:
|
||||
bad_status = conn.execute(
|
||||
"SELECT id, status, claim_lock, current_run_id FROM tasks "
|
||||
"WHERE status != 'done' OR claim_lock IS NOT NULL "
|
||||
"OR current_run_id IS NOT NULL"
|
||||
).fetchall()
|
||||
if bad_status:
|
||||
for row in bad_status:
|
||||
failures.append(
|
||||
f"BAD FINAL STATE: task {row['id']} status={row['status']} "
|
||||
f"claim_lock={row['claim_lock']} current_run_id={row['current_run_id']}"
|
||||
)
|
||||
|
||||
# Check 4: exactly one run per task, all closed as completed
|
||||
bad_runs = conn.execute(
|
||||
"SELECT task_id, COUNT(*) as n FROM task_runs "
|
||||
"GROUP BY task_id HAVING n != 1"
|
||||
).fetchall()
|
||||
if bad_runs:
|
||||
for row in bad_runs:
|
||||
failures.append(
|
||||
f"WRONG RUN COUNT: task {row['task_id']} has {row['n']} runs (expected 1)"
|
||||
)
|
||||
|
||||
open_runs = conn.execute(
|
||||
"SELECT id, task_id FROM task_runs WHERE ended_at IS NULL"
|
||||
).fetchall()
|
||||
for row in open_runs:
|
||||
failures.append(f"OPEN RUN: run {row['id']} on task {row['task_id']}")
|
||||
|
||||
wrong_outcomes = conn.execute(
|
||||
"SELECT task_id, outcome FROM task_runs "
|
||||
"WHERE outcome IS NULL OR outcome != 'completed'"
|
||||
).fetchall()
|
||||
for row in wrong_outcomes:
|
||||
failures.append(
|
||||
f"WRONG OUTCOME: task {row['task_id']} run outcome={row['outcome']}"
|
||||
)
|
||||
|
||||
# Check 5: event counts — exactly NUM_TASKS completed events
|
||||
completed_events = conn.execute(
|
||||
"SELECT COUNT(*) as n FROM task_events WHERE kind='completed'"
|
||||
).fetchone()["n"]
|
||||
if completed_events != NUM_TASKS:
|
||||
failures.append(
|
||||
f"EVENT COUNT MISMATCH: {completed_events} completed events "
|
||||
f"expected {NUM_TASKS}"
|
||||
)
|
||||
|
||||
# Check 6: count SQLite errors that escaped retry
|
||||
sqlite_errs = sum(
|
||||
1 for e in all_events if e["kind"].startswith("sqlite_err")
|
||||
)
|
||||
if sqlite_errs > 0:
|
||||
failures.append(f"UNRETRIED SQLITE ERRORS: {sqlite_errs}")
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ============ STATS ============
|
||||
print()
|
||||
total_claims = sum(1 for e in all_events if e["kind"] == "claimed")
|
||||
total_completes = sum(1 for e in all_events if e["kind"] == "completed")
|
||||
total_lost_races = sum(1 for e in all_events if e["kind"] == "lost_claim_race")
|
||||
|
||||
per_worker = {}
|
||||
for e in all_events:
|
||||
if e["kind"] == "completed":
|
||||
per_worker.setdefault(e["worker"], 0)
|
||||
per_worker[e["worker"]] += 1
|
||||
|
||||
print(f"Total claims: {total_claims}")
|
||||
print(f"Total completes: {total_completes}")
|
||||
print(f"Lost claim races: {total_lost_races} (expected contention; not a bug)")
|
||||
print(f"Elapsed: {elapsed:.2f}s")
|
||||
print(f"Throughput: {NUM_TASKS/elapsed:.1f} tasks/sec")
|
||||
print(f"Per-worker completions:")
|
||||
for w in sorted(per_worker.keys()):
|
||||
print(f" worker-{w}: {per_worker[w]}")
|
||||
|
||||
if failures:
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"FAILURES ({len(failures)}):")
|
||||
print("=" * 60)
|
||||
for f in failures[:20]:
|
||||
print(f" {f}")
|
||||
if len(failures) > 20:
|
||||
print(f" ... and {len(failures) - 20} more")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print()
|
||||
print("✔ ALL INVARIANTS HELD")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,350 @@
|
||||
"""Harder concurrency stress: mixed operations + larger scale.
|
||||
|
||||
Scales to 500 tasks, 10 workers, 60s runtime. Each worker randomly:
|
||||
- claims + completes (70%)
|
||||
- claims + blocks with a reason (15%)
|
||||
- unblocks a random blocked task (10%)
|
||||
- archives a random done task (5%)
|
||||
|
||||
Adds a background "dispatcher" process that calls release_stale_claims
|
||||
and detect_crashed_workers every 200ms, racing against the workers to
|
||||
surface TTL + crash detection races.
|
||||
|
||||
Pass criteria: runs invariant holds, no double-completions, no orphan
|
||||
runs, no SQLite errors escape the retry layer.
|
||||
"""
|
||||
|
||||
import json
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import random
|
||||
import sqlite3
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
NUM_WORKERS = 10
|
||||
NUM_TASKS = 500
|
||||
RUN_DURATION_S = 30
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
|
||||
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
start = time.monotonic()
|
||||
idle_rounds = 0
|
||||
|
||||
while time.monotonic() - start < RUN_DURATION_S:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
op = random.random()
|
||||
|
||||
if op < 0.10:
|
||||
# Try to unblock a blocked task.
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status='blocked' "
|
||||
"ORDER BY RANDOM() LIMIT 1"
|
||||
).fetchone()
|
||||
if row:
|
||||
try:
|
||||
ok = kb.unblock_task(conn, row["id"])
|
||||
events.append({"kind": "unblocked" if ok else "unblock_noop",
|
||||
"task": row["id"], "worker": worker_id})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "unblock",
|
||||
"task": row["id"], "err": str(e)[:100]})
|
||||
continue
|
||||
|
||||
if op < 0.15:
|
||||
# Try to archive a done task.
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status='done' "
|
||||
"ORDER BY RANDOM() LIMIT 1"
|
||||
).fetchone()
|
||||
if row:
|
||||
try:
|
||||
kb.archive_task(conn, row["id"])
|
||||
events.append({"kind": "archived", "task": row["id"],
|
||||
"worker": worker_id})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "archive",
|
||||
"task": row["id"], "err": str(e)[:100]})
|
||||
continue
|
||||
|
||||
# Default: claim + complete-or-block.
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status='ready' "
|
||||
"AND claim_lock IS NULL LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
idle_rounds += 1
|
||||
if idle_rounds > 50:
|
||||
break
|
||||
time.sleep(0.02)
|
||||
continue
|
||||
idle_rounds = 0
|
||||
|
||||
tid = row["id"]
|
||||
try:
|
||||
claimed = kb.claim_task(
|
||||
conn, tid, claimer=f"worker-{worker_id}",
|
||||
ttl_seconds=5, # short TTL so reclaim races in
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "claim",
|
||||
"task": tid, "err": str(e)[:100]})
|
||||
continue
|
||||
if claimed is None:
|
||||
events.append({"kind": "lost_claim_race", "task": tid})
|
||||
continue
|
||||
|
||||
run = kb.latest_run(conn, tid)
|
||||
events.append({"kind": "claimed", "task": tid, "worker": worker_id,
|
||||
"run_id": run.id, "t": time.monotonic() - start})
|
||||
|
||||
time.sleep(random.uniform(0.005, 0.05))
|
||||
|
||||
# 20% of the time, block instead of complete
|
||||
if random.random() < 0.20:
|
||||
try:
|
||||
kb.block_task(conn, tid,
|
||||
reason=f"blocked by worker-{worker_id}")
|
||||
events.append({"kind": "blocked", "task": tid,
|
||||
"worker": worker_id, "run_id": run.id})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "block",
|
||||
"task": tid, "err": str(e)[:100]})
|
||||
else:
|
||||
try:
|
||||
kb.complete_task(
|
||||
conn, tid,
|
||||
result=f"done by worker-{worker_id}",
|
||||
summary=f"worker-{worker_id} ok",
|
||||
metadata={"worker_id": worker_id},
|
||||
)
|
||||
events.append({"kind": "completed", "task": tid,
|
||||
"worker": worker_id, "run_id": run.id,
|
||||
"t": time.monotonic() - start})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "complete",
|
||||
"task": tid, "err": str(e)[:100]})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def reclaimer_loop(hermes_home: str, result_file: str) -> None:
|
||||
"""Background dispatcher-like loop that reclaims stale tasks."""
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
start = time.monotonic()
|
||||
while time.monotonic() - start < RUN_DURATION_S + 2:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
try:
|
||||
reclaimed = kb.release_stale_claims(conn)
|
||||
if reclaimed:
|
||||
events.append({"kind": "reclaimed", "count": reclaimed,
|
||||
"t": time.monotonic() - start})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "reclaim",
|
||||
"err": str(e)[:100]})
|
||||
finally:
|
||||
conn.close()
|
||||
time.sleep(0.2)
|
||||
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_mixed_stress_")
|
||||
print(f"HERMES_HOME = {home}")
|
||||
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
for i in range(NUM_TASKS):
|
||||
kb.create_task(
|
||||
conn, title=f"t#{i}", assignee="shared", tenant="mixed-stress",
|
||||
)
|
||||
conn.close()
|
||||
print(f"Seeded {NUM_TASKS} tasks, launching {NUM_WORKERS} workers + 1 reclaimer")
|
||||
|
||||
ctx = mp.get_context("spawn")
|
||||
worker_results = [f"/tmp/mixed_worker_{i}.json" for i in range(NUM_WORKERS)]
|
||||
reclaim_result = "/tmp/mixed_reclaim.json"
|
||||
|
||||
procs = []
|
||||
start = time.monotonic()
|
||||
for i in range(NUM_WORKERS):
|
||||
p = ctx.Process(target=worker_loop, args=(i, home, worker_results[i]))
|
||||
p.start()
|
||||
procs.append(p)
|
||||
r = ctx.Process(target=reclaimer_loop, args=(home, reclaim_result))
|
||||
r.start()
|
||||
procs.append(r)
|
||||
|
||||
for p in procs:
|
||||
p.join(timeout=RUN_DURATION_S + 30)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
print(f"Done in {elapsed:.1f}s")
|
||||
|
||||
# Aggregate.
|
||||
all_events = []
|
||||
for i, f in enumerate(worker_results):
|
||||
if os.path.isfile(f):
|
||||
with open(f) as fh:
|
||||
all_events.extend(json.load(fh))
|
||||
else:
|
||||
print(f" WORKER {i} died with no result file!")
|
||||
reclaim_events = []
|
||||
if os.path.isfile(reclaim_result):
|
||||
with open(reclaim_result) as fh:
|
||||
reclaim_events = json.load(fh)
|
||||
|
||||
# ============ INVARIANT CHECKS ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("INVARIANT CHECKS")
|
||||
print("=" * 60)
|
||||
|
||||
failures = []
|
||||
|
||||
# Per-run attribution tracking
|
||||
claims = [e for e in all_events if e["kind"] == "claimed"]
|
||||
completions = [e for e in all_events if e["kind"] == "completed"]
|
||||
blocks = [e for e in all_events if e["kind"] == "blocked"]
|
||||
|
||||
# Every completion must have a matching claim on the same run_id AND
|
||||
# the same worker (workers don't steal each other's runs).
|
||||
claims_by_run = {c["run_id"]: c for c in claims}
|
||||
for comp in completions:
|
||||
claim = claims_by_run.get(comp["run_id"])
|
||||
if claim is None:
|
||||
# It's possible this worker saw a reclaimed run from another worker
|
||||
# — that's still a bug: the worker shouldn't be able to complete
|
||||
# a run it didn't claim. But let me check if reclaim happened first.
|
||||
failures.append(
|
||||
f"COMPLETION WITHOUT CLAIM: task {comp['task']} run {comp['run_id']} "
|
||||
f"by worker {comp['worker']}"
|
||||
)
|
||||
elif claim["worker"] != comp["worker"]:
|
||||
failures.append(
|
||||
f"CROSS-WORKER COMPLETION: run {comp['run_id']} claimed by "
|
||||
f"worker {claim['worker']} but completed by worker {comp['worker']}"
|
||||
)
|
||||
|
||||
# SQLite errors that escaped the retry layer
|
||||
sqlite_errs = [e for e in all_events if e["kind"] == "sqlite_err"]
|
||||
if sqlite_errs:
|
||||
for e in sqlite_errs[:5]:
|
||||
failures.append(f"SQLITE ERROR: op={e.get('op')} err={e.get('err')}")
|
||||
if len(sqlite_errs) > 5:
|
||||
failures.append(f" ... and {len(sqlite_errs) - 5} more sqlite errs")
|
||||
|
||||
# DB final state — every task should be in a clean terminal state.
|
||||
conn = kb.connect()
|
||||
try:
|
||||
# Invariant: current_run_id NULL iff latest run is terminal
|
||||
inconsistent = conn.execute("""
|
||||
SELECT t.id, t.status, t.current_run_id
|
||||
FROM tasks t
|
||||
WHERE t.current_run_id IS NOT NULL
|
||||
AND EXISTS (SELECT 1 FROM task_runs r
|
||||
WHERE r.id = t.current_run_id AND r.ended_at IS NOT NULL)
|
||||
""").fetchall()
|
||||
for row in inconsistent:
|
||||
failures.append(
|
||||
f"INVARIANT VIOLATION: task {row['id']} status={row['status']} "
|
||||
f"has current_run_id={row['current_run_id']} but run is ended"
|
||||
)
|
||||
|
||||
# Invariant: no orphan open runs
|
||||
orphans = conn.execute("""
|
||||
SELECT r.id, r.task_id, r.status
|
||||
FROM task_runs r
|
||||
LEFT JOIN tasks t ON t.current_run_id = r.id
|
||||
WHERE r.ended_at IS NULL AND t.id IS NULL
|
||||
""").fetchall()
|
||||
for row in orphans:
|
||||
failures.append(
|
||||
f"ORPHAN OPEN RUN: run {row['id']} on task {row['task_id']}"
|
||||
)
|
||||
|
||||
# Counts — should roughly balance.
|
||||
status_counts = dict(
|
||||
conn.execute("SELECT status, COUNT(*) FROM tasks GROUP BY status").fetchall()
|
||||
)
|
||||
run_outcome_counts = dict(
|
||||
conn.execute(
|
||||
"SELECT outcome, COUNT(*) FROM task_runs "
|
||||
"WHERE ended_at IS NOT NULL GROUP BY outcome"
|
||||
).fetchall()
|
||||
)
|
||||
active_runs = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_runs WHERE ended_at IS NULL"
|
||||
).fetchone()[0]
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ============ STATS ============
|
||||
print()
|
||||
print(f"Workers: {NUM_WORKERS}, Tasks: {NUM_TASKS}")
|
||||
print(f"Elapsed: {elapsed:.1f}s")
|
||||
print(f"Events collected: {len(all_events)} (+{len(reclaim_events)} reclaim)")
|
||||
print()
|
||||
print("Operations:")
|
||||
op_counts = {}
|
||||
for e in all_events:
|
||||
op_counts[e["kind"]] = op_counts.get(e["kind"], 0) + 1
|
||||
for k in sorted(op_counts.keys()):
|
||||
print(f" {k:<25} {op_counts[k]}")
|
||||
|
||||
print()
|
||||
print("Final task status:")
|
||||
for s, n in sorted(status_counts.items()):
|
||||
print(f" {s:<10} {n}")
|
||||
print("Final run outcomes:")
|
||||
for o, n in sorted(run_outcome_counts.items(), key=lambda x: (x[0] or '',)):
|
||||
print(f" {o:<12} {n}")
|
||||
print(f" active {active_runs}")
|
||||
|
||||
if failures:
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"FAILURES ({len(failures)}):")
|
||||
print("=" * 60)
|
||||
for f in failures[:30]:
|
||||
print(f" {f}")
|
||||
if len(failures) > 30:
|
||||
print(f" ... and {len(failures) - 30} more")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print()
|
||||
print("✔ ALL INVARIANTS HELD UNDER MIXED STRESS")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,241 @@
|
||||
"""Target the reclaim race specifically.
|
||||
|
||||
Workers claim tasks with a 1s TTL but sleep 2s before completing. The
|
||||
reclaimer runs every 200ms. Scenario: worker claims, reclaimer expires
|
||||
the claim mid-work, worker tries to complete AFTER its run has been
|
||||
reclaimed.
|
||||
|
||||
Expected behavior (per design): the worker's complete_task should
|
||||
either succeed on the reclaimed-and-re-claimed-by-another-worker case
|
||||
(no, it should refuse — the claim was invalidated), OR succeed by
|
||||
grace (we "forgive" a late complete from the original worker if no
|
||||
one else picked it up).
|
||||
|
||||
Actually looking at complete_task: it doesn't check claim_lock. It just
|
||||
transitions from 'running' -> 'done'. So if the reclaimer moved it back
|
||||
to 'ready', the late worker's complete_task will fail (CAS on
|
||||
status='running' fails). This is the CORRECT behavior.
|
||||
|
||||
Invariant being tested: race between worker.complete and
|
||||
dispatcher.reclaim must not produce a double-run-close or other
|
||||
inconsistency.
|
||||
"""
|
||||
|
||||
import json
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import random
|
||||
import sqlite3
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
NUM_WORKERS = 5
|
||||
NUM_TASKS = 50
|
||||
TTL = 1
|
||||
WORK_DURATION_S = 2.0 # longer than TTL => reclaimer wins
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
|
||||
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
start = time.monotonic()
|
||||
idle = 0
|
||||
|
||||
while time.monotonic() - start < 40:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status='ready' AND claim_lock IS NULL LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
idle += 1
|
||||
if idle > 30:
|
||||
break
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
idle = 0
|
||||
tid = row["id"]
|
||||
try:
|
||||
claimed = kb.claim_task(conn, tid, claimer=f"worker-{worker_id}",
|
||||
ttl_seconds=TTL)
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "claim", "err": str(e)[:100]})
|
||||
continue
|
||||
if claimed is None:
|
||||
events.append({"kind": "lost_claim", "task": tid})
|
||||
continue
|
||||
run = kb.latest_run(conn, tid)
|
||||
events.append({"kind": "claimed", "task": tid, "worker": worker_id,
|
||||
"run_id": run.id})
|
||||
|
||||
# Sleep longer than TTL so reclaimer has a chance to intervene
|
||||
time.sleep(WORK_DURATION_S + random.uniform(-0.3, 0.3))
|
||||
|
||||
try:
|
||||
ok = kb.complete_task(
|
||||
conn, tid,
|
||||
result=f"by worker-{worker_id}",
|
||||
summary=f"worker-{worker_id} finished",
|
||||
)
|
||||
events.append({"kind": "complete_ok" if ok else "complete_refused",
|
||||
"task": tid, "worker": worker_id, "run_id": run.id})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "complete", "err": str(e)[:100]})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def reclaimer_loop(hermes_home: str, result_file: str) -> None:
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
start = time.monotonic()
|
||||
while time.monotonic() - start < 42:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
try:
|
||||
n = kb.release_stale_claims(conn)
|
||||
if n:
|
||||
events.append({"kind": "reclaimed", "count": n,
|
||||
"t": time.monotonic() - start})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "err": str(e)[:100]})
|
||||
finally:
|
||||
conn.close()
|
||||
time.sleep(0.2)
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_reclaim_race_")
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
for i in range(NUM_TASKS):
|
||||
kb.create_task(conn, title=f"t{i}", assignee="shared",
|
||||
tenant="reclaim-race")
|
||||
conn.close()
|
||||
print(f"Seeded {NUM_TASKS} tasks. TTL={TTL}s, work_duration={WORK_DURATION_S}s")
|
||||
print(f"(worker work > TTL guarantees reclaims)")
|
||||
|
||||
ctx = mp.get_context("spawn")
|
||||
worker_results = [f"/tmp/rc_worker_{i}.json" for i in range(NUM_WORKERS)]
|
||||
reclaim_result = "/tmp/rc_reclaim.json"
|
||||
procs = []
|
||||
for i in range(NUM_WORKERS):
|
||||
p = ctx.Process(target=worker_loop, args=(i, home, worker_results[i]))
|
||||
p.start()
|
||||
procs.append(p)
|
||||
r = ctx.Process(target=reclaimer_loop, args=(home, reclaim_result))
|
||||
r.start()
|
||||
procs.append(r)
|
||||
|
||||
for p in procs:
|
||||
p.join(timeout=60)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
|
||||
# Aggregate.
|
||||
all_events = []
|
||||
for f in worker_results:
|
||||
if os.path.isfile(f):
|
||||
with open(f) as fh:
|
||||
all_events.extend(json.load(fh))
|
||||
reclaim_events = []
|
||||
if os.path.isfile(reclaim_result):
|
||||
with open(reclaim_result) as fh:
|
||||
reclaim_events = json.load(fh)
|
||||
|
||||
op_counts = {}
|
||||
for e in all_events:
|
||||
op_counts[e["kind"]] = op_counts.get(e["kind"], 0) + 1
|
||||
total_reclaims = sum(e.get("count", 0) for e in reclaim_events)
|
||||
print(f"\nReclaimer fired {len(reclaim_events)} times, total tasks reclaimed: {total_reclaims}")
|
||||
print("Worker events:")
|
||||
for k in sorted(op_counts):
|
||||
print(f" {k:<25} {op_counts[k]}")
|
||||
|
||||
# Invariant checks
|
||||
failures = []
|
||||
conn = kb.connect()
|
||||
try:
|
||||
# Any task stuck with current_run_id pointing at a closed run?
|
||||
bad = conn.execute("""
|
||||
SELECT t.id, t.status, t.current_run_id, r.ended_at, r.outcome
|
||||
FROM tasks t
|
||||
JOIN task_runs r ON r.id = t.current_run_id
|
||||
WHERE r.ended_at IS NOT NULL
|
||||
""").fetchall()
|
||||
for row in bad:
|
||||
failures.append(
|
||||
f"INVARIANT VIOLATION: task {row['id']} status={row['status']} "
|
||||
f"current_run_id={row['current_run_id']} but run ended "
|
||||
f"outcome={row['outcome']}"
|
||||
)
|
||||
# Every run with NULL ended_at should still have the task pointing at it
|
||||
orphans = conn.execute("""
|
||||
SELECT r.id, r.task_id
|
||||
FROM task_runs r
|
||||
LEFT JOIN tasks t ON t.current_run_id = r.id
|
||||
WHERE r.ended_at IS NULL AND t.id IS NULL
|
||||
""").fetchall()
|
||||
for row in orphans:
|
||||
failures.append(f"ORPHAN OPEN RUN: run {row['id']} on task {row['task_id']}")
|
||||
# Event counts
|
||||
claim_evts = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_events WHERE kind='claimed'").fetchone()[0]
|
||||
reclaim_evts = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_events WHERE kind='reclaimed'").fetchone()[0]
|
||||
comp_evts = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_events WHERE kind='completed'").fetchone()[0]
|
||||
print(f"\nDB event counts: claimed={claim_evts} reclaimed={reclaim_evts} completed={comp_evts}")
|
||||
# Every reclaimed run must have ended_at set
|
||||
unended_reclaims = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_runs WHERE outcome='reclaimed' AND ended_at IS NULL"
|
||||
).fetchone()[0]
|
||||
if unended_reclaims:
|
||||
failures.append(f"UNENDED RECLAIMED RUNS: {unended_reclaims}")
|
||||
# Count of completed runs
|
||||
comp_runs = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_runs WHERE outcome='completed'"
|
||||
).fetchone()[0]
|
||||
reclaim_runs = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_runs WHERE outcome='reclaimed'"
|
||||
).fetchone()[0]
|
||||
print(f"DB run outcomes: completed={comp_runs} reclaimed={reclaim_runs}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if reclaim_runs == 0:
|
||||
failures.append("NO RECLAIMS HAPPENED — test didn't stress what it was supposed to")
|
||||
|
||||
if failures:
|
||||
print(f"\nFAILURES ({len(failures)}):")
|
||||
for f in failures[:20]:
|
||||
print(f" {f}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\n✔ RECLAIM RACE INVARIANTS HELD")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,283 @@
|
||||
"""Randomized property testing for the Kanban kernel.
|
||||
|
||||
Generates 1000 random operation sequences, each 20-50 ops, on small
|
||||
task graphs. After each step, checks the full invariant set:
|
||||
|
||||
I1. If tasks.current_run_id IS NOT NULL, the run MUST exist AND
|
||||
ended_at MUST be NULL (we never point at a closed run).
|
||||
I2. If a run has ended_at NULL, SOME task MUST have current_run_id
|
||||
pointing at it (no orphan open runs).
|
||||
I3. task.status in the valid set {triage, todo, ready, running,
|
||||
blocked, done, archived}.
|
||||
I4. task.claim_lock NULL iff status not in (running,).
|
||||
I5. Every run has started_at <= ended_at (or ended_at is NULL).
|
||||
I6. If outcome is set, ended_at must also be set.
|
||||
I7. Events are strictly monotonic in (created_at, id).
|
||||
I8. task_events.run_id references a task_runs.id that exists
|
||||
(or is NULL).
|
||||
I9. Parent completion invariant: if all parents are 'done', the
|
||||
child cannot be in 'todo' status (recompute_ready should have
|
||||
promoted it). This is called out in the comment on
|
||||
recompute_ready; verify it holds after every random seq.
|
||||
|
||||
Not using hypothesis the lib; just Python random for simplicity.
|
||||
"""
|
||||
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
NUM_SEQUENCES = 500
|
||||
OPS_PER_SEQUENCE = 100
|
||||
TASK_POOL = 10
|
||||
|
||||
OPS = [
|
||||
"create", "create_child", "claim", "complete", "block", "unblock",
|
||||
"archive", "heartbeat", "release_stale", "detect_crashed",
|
||||
"recompute_ready", "reassign",
|
||||
]
|
||||
|
||||
|
||||
def assert_invariants(conn, kb, ops_log):
|
||||
"""Run all invariant checks; raise AssertionError with context on any."""
|
||||
failures = []
|
||||
|
||||
# I1: current_run_id → run exists and not ended
|
||||
bad_ptr = conn.execute("""
|
||||
SELECT t.id, t.current_run_id, r.ended_at, r.outcome
|
||||
FROM tasks t
|
||||
LEFT JOIN task_runs r ON r.id = t.current_run_id
|
||||
WHERE t.current_run_id IS NOT NULL
|
||||
AND (r.id IS NULL OR r.ended_at IS NOT NULL)
|
||||
""").fetchall()
|
||||
for row in bad_ptr:
|
||||
if row["ended_at"] is None and row["outcome"] is None:
|
||||
detail = "missing"
|
||||
else:
|
||||
detail = f"closed ({row['outcome']})"
|
||||
failures.append(
|
||||
f"I1: task {row['id']} points at run {row['current_run_id']} "
|
||||
f"which is {detail}"
|
||||
)
|
||||
|
||||
# I2: open run → some task points at it
|
||||
orphans = conn.execute("""
|
||||
SELECT r.id, r.task_id
|
||||
FROM task_runs r
|
||||
WHERE r.ended_at IS NULL
|
||||
AND NOT EXISTS (SELECT 1 FROM tasks t WHERE t.current_run_id = r.id)
|
||||
""").fetchall()
|
||||
for row in orphans:
|
||||
failures.append(f"I2: open run {row['id']} on task {row['task_id']} has no pointer")
|
||||
|
||||
# I3: valid statuses
|
||||
valid = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
|
||||
bad_status = conn.execute("SELECT id, status FROM tasks").fetchall()
|
||||
for row in bad_status:
|
||||
if row["status"] not in valid:
|
||||
failures.append(f"I3: task {row['id']} has invalid status {row['status']!r}")
|
||||
|
||||
# I4: claim_lock set only when running
|
||||
bad_lock = conn.execute("""
|
||||
SELECT id, status, claim_lock FROM tasks
|
||||
WHERE (status != 'running' AND claim_lock IS NOT NULL)
|
||||
""").fetchall()
|
||||
for row in bad_lock:
|
||||
failures.append(
|
||||
f"I4: task {row['id']} status={row['status']} but claim_lock={row['claim_lock']!r}"
|
||||
)
|
||||
|
||||
# I5: run started_at <= ended_at
|
||||
bad_times = conn.execute("""
|
||||
SELECT id, started_at, ended_at FROM task_runs
|
||||
WHERE ended_at IS NOT NULL AND started_at > ended_at
|
||||
""").fetchall()
|
||||
for row in bad_times:
|
||||
failures.append(
|
||||
f"I5: run {row['id']} started_at={row['started_at']} > ended_at={row['ended_at']}"
|
||||
)
|
||||
|
||||
# I6: outcome set → ended_at set
|
||||
bad_outcome = conn.execute("""
|
||||
SELECT id, outcome, ended_at FROM task_runs
|
||||
WHERE outcome IS NOT NULL AND ended_at IS NULL
|
||||
""").fetchall()
|
||||
for row in bad_outcome:
|
||||
failures.append(f"I6: run {row['id']} outcome={row['outcome']} but ended_at NULL")
|
||||
|
||||
# I7: events monotonic in id (always true for autoincrement)
|
||||
# Skip — autoincrement guarantees it.
|
||||
|
||||
# I8: event.run_id references existing run
|
||||
bad_ev_fk = conn.execute("""
|
||||
SELECT e.id, e.run_id FROM task_events e
|
||||
LEFT JOIN task_runs r ON r.id = e.run_id
|
||||
WHERE e.run_id IS NOT NULL AND r.id IS NULL
|
||||
""").fetchall()
|
||||
for row in bad_ev_fk:
|
||||
failures.append(f"I8: event {row['id']} references missing run {row['run_id']}")
|
||||
|
||||
# I9: if all parents done → child not in todo
|
||||
# (Only applies to children with at least one parent)
|
||||
orphaned_todo = conn.execute("""
|
||||
SELECT c.id AS child_id,
|
||||
COUNT(*) AS n_parents,
|
||||
SUM(CASE WHEN p.status = 'done' THEN 1 ELSE 0 END) AS done_parents
|
||||
FROM tasks c
|
||||
JOIN task_links l ON l.child_id = c.id
|
||||
JOIN tasks p ON p.id = l.parent_id
|
||||
WHERE c.status = 'todo'
|
||||
GROUP BY c.id
|
||||
HAVING n_parents > 0 AND n_parents = done_parents
|
||||
""").fetchall()
|
||||
for row in orphaned_todo:
|
||||
failures.append(
|
||||
f"I9: task {row['child_id']} is todo but all {row['n_parents']} parents are done"
|
||||
)
|
||||
|
||||
if failures:
|
||||
print(f"\n!!! INVARIANT VIOLATION after {len(ops_log)} ops:")
|
||||
for f in failures[:10]:
|
||||
print(f" {f}")
|
||||
if len(failures) > 10:
|
||||
print(f" ... and {len(failures) - 10} more")
|
||||
print("\nLast 10 ops:")
|
||||
for op in ops_log[-10:]:
|
||||
print(f" {op}")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def random_op(rng, conn, kb, task_pool):
|
||||
op = rng.choice(OPS)
|
||||
|
||||
if op == "create":
|
||||
tid = kb.create_task(
|
||||
conn,
|
||||
title=f"rand {rng.randint(0, 1000)}",
|
||||
assignee=rng.choice(["w1", "w2", "w3", None]),
|
||||
)
|
||||
task_pool.append(tid)
|
||||
return {"op": "create", "tid": tid}
|
||||
|
||||
if op == "create_child" and task_pool:
|
||||
parent = rng.choice(task_pool)
|
||||
tid = kb.create_task(
|
||||
conn, title=f"child of {parent}",
|
||||
assignee=rng.choice(["w1", "w2", "w3", None]),
|
||||
parents=[parent],
|
||||
)
|
||||
task_pool.append(tid)
|
||||
return {"op": "create_child", "tid": tid, "parent": parent}
|
||||
|
||||
if not task_pool:
|
||||
return None
|
||||
|
||||
tid = rng.choice(task_pool)
|
||||
task = kb.get_task(conn, tid)
|
||||
if task is None:
|
||||
task_pool.remove(tid)
|
||||
return None
|
||||
|
||||
if op == "claim":
|
||||
claimed = kb.claim_task(conn, tid, ttl_seconds=rng.choice([1, 3, 10]))
|
||||
return {"op": "claim", "tid": tid, "ok": claimed is not None}
|
||||
if op == "complete":
|
||||
summary = rng.choice([None, f"done via op {rng.randint(0, 1000)}"])
|
||||
ok = kb.complete_task(conn, tid, summary=summary)
|
||||
return {"op": "complete", "tid": tid, "ok": ok}
|
||||
if op == "block":
|
||||
reason = rng.choice([None, "rand block"])
|
||||
ok = kb.block_task(conn, tid, reason=reason)
|
||||
return {"op": "block", "tid": tid, "ok": ok}
|
||||
if op == "unblock":
|
||||
ok = kb.unblock_task(conn, tid)
|
||||
return {"op": "unblock", "tid": tid, "ok": ok}
|
||||
if op == "archive":
|
||||
ok = kb.archive_task(conn, tid)
|
||||
if ok:
|
||||
task_pool.remove(tid)
|
||||
return {"op": "archive", "tid": tid, "ok": ok}
|
||||
if op == "heartbeat":
|
||||
ok = kb.heartbeat_worker(conn, tid)
|
||||
return {"op": "heartbeat", "tid": tid, "ok": ok}
|
||||
if op == "release_stale":
|
||||
n = kb.release_stale_claims(conn)
|
||||
return {"op": "release_stale", "n": n}
|
||||
if op == "detect_crashed":
|
||||
# Force-kill a fake PID first so there's something to detect
|
||||
crashed = kb.detect_crashed_workers(conn)
|
||||
return {"op": "detect_crashed", "n": len(crashed)}
|
||||
if op == "recompute_ready":
|
||||
n = kb.recompute_ready(conn)
|
||||
return {"op": "recompute_ready", "promoted": n}
|
||||
if op == "reassign":
|
||||
# Reassignment isn't a direct API; simulate via assign_task
|
||||
new_a = rng.choice(["w1", "w2", "w3", None])
|
||||
try:
|
||||
kb.assign_task(conn, tid, new_a)
|
||||
return {"op": "reassign", "tid": tid, "to": new_a}
|
||||
except Exception as e:
|
||||
return {"op": "reassign", "tid": tid, "err": str(e)[:50]}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
total_ops = 0
|
||||
total_violations = 0
|
||||
|
||||
for seq_idx in range(NUM_SEQUENCES):
|
||||
seed = random.randint(0, 10**9)
|
||||
rng = random.Random(seed)
|
||||
home = tempfile.mkdtemp(prefix=f"hermes_fuzz_{seq_idx}_")
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
|
||||
# Fresh module state per sequence to avoid cached init paths.
|
||||
for m in list(sys.modules.keys()):
|
||||
if m.startswith("hermes_cli"):
|
||||
del sys.modules[m]
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
task_pool = []
|
||||
ops_log = []
|
||||
|
||||
try:
|
||||
for i in range(OPS_PER_SEQUENCE):
|
||||
result = random_op(rng, conn, kb, task_pool)
|
||||
if result is None:
|
||||
continue
|
||||
ops_log.append(result)
|
||||
total_ops += 1
|
||||
if not assert_invariants(conn, kb, ops_log):
|
||||
total_violations += 1
|
||||
print(f" sequence {seq_idx} (seed={seed}) failed at op {i}")
|
||||
break
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if seq_idx % 10 == 0:
|
||||
print(f" seq {seq_idx:3d}: {total_ops} ops so far, {total_violations} violations")
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"Total sequences: {NUM_SEQUENCES}")
|
||||
print(f"Total operations: {total_ops}")
|
||||
print(f"Invariant violations: {total_violations}")
|
||||
if total_violations == 0:
|
||||
print("\n✔ ALL INVARIANTS HELD ACROSS RANDOMIZED SEQUENCES")
|
||||
else:
|
||||
print("\n✗ INVARIANT VIOLATIONS FOUND")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,228 @@
|
||||
"""E2E: dispatcher spawns real Python subprocess workers.
|
||||
|
||||
This validates the IPC + lifecycle story that mocks can't:
|
||||
- spawn_fn returns a real PID
|
||||
- the child process resolves hermes_cli.kanban_db on its own
|
||||
- the child writes heartbeats via the CLI (real argparse, real init_db)
|
||||
- the child completes via the CLI with --summary + --metadata
|
||||
- the dispatcher observes all of this through the DB only
|
||||
- worker logs are captured to HERMES_HOME/kanban/logs/<task>.log
|
||||
- crash detection works against a real dead PID
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
FAKE_WORKER = str(Path(__file__).parent / "_fake_worker.py")
|
||||
PY = sys.executable
|
||||
|
||||
|
||||
def make_spawn_fn(home: str):
|
||||
"""Return a spawn_fn the dispatcher can call. Launches the fake
|
||||
worker as a detached subprocess."""
|
||||
|
||||
def _spawn(task, workspace):
|
||||
log_path = os.path.join(home, f"worker_{task.id}.log")
|
||||
env = {
|
||||
**os.environ,
|
||||
"HERMES_HOME": home,
|
||||
"HOME": home,
|
||||
"PYTHONPATH": WT,
|
||||
"HERMES_KANBAN_TASK": task.id,
|
||||
"HERMES_KANBAN_WORKSPACE": workspace,
|
||||
"PATH": f"{os.path.dirname(PY)}:{os.environ.get('PATH','')}",
|
||||
}
|
||||
log_f = open(log_path, "ab")
|
||||
proc = subprocess.Popen(
|
||||
[PY, FAKE_WORKER],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=log_f,
|
||||
stderr=subprocess.STDOUT,
|
||||
env=env,
|
||||
start_new_session=True,
|
||||
)
|
||||
return proc.pid
|
||||
|
||||
return _spawn
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_e2e_")
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
# Point the `hermes` CLI child processes will run at the worktree
|
||||
# hermes_cli.main. We do this by putting a shim on PATH.
|
||||
shim_dir = os.path.join(home, "bin")
|
||||
os.makedirs(shim_dir, exist_ok=True)
|
||||
shim_path = os.path.join(shim_dir, "hermes")
|
||||
with open(shim_path, "w") as f:
|
||||
f.write(f"""#!/bin/sh
|
||||
exec {PY} -m hermes_cli.main "$@"
|
||||
""")
|
||||
os.chmod(shim_path, 0o755)
|
||||
os.environ["PATH"] = f"{shim_dir}:{os.environ.get('PATH','')}"
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
|
||||
# ============ SCENARIO A: happy path, 3 tasks ============
|
||||
print("=" * 60)
|
||||
print("A. Real-subprocess happy path (3 tasks)")
|
||||
print("=" * 60)
|
||||
|
||||
tids = []
|
||||
for i in range(3):
|
||||
tid = kb.create_task(
|
||||
conn, title=f"real-e2e-{i}", assignee="worker",
|
||||
)
|
||||
tids.append(tid)
|
||||
|
||||
spawn_fn = make_spawn_fn(home)
|
||||
result = kb.dispatch_once(conn, spawn_fn=spawn_fn)
|
||||
print(f" dispatched: {len(result.spawned)} spawned")
|
||||
spawned_pids = []
|
||||
# The dispatcher sets worker_pid on each claimed task via _set_worker_pid.
|
||||
for tid in tids:
|
||||
task = kb.get_task(conn, tid)
|
||||
spawned_pids.append(task.worker_pid)
|
||||
print(f" task {tid}: pid={task.worker_pid} status={task.status}")
|
||||
|
||||
# Wait for all workers to complete (up to 10s).
|
||||
deadline = time.monotonic() + 10
|
||||
while time.monotonic() < deadline:
|
||||
statuses = [kb.get_task(conn, tid).status for tid in tids]
|
||||
if all(s == "done" for s in statuses):
|
||||
break
|
||||
time.sleep(0.2)
|
||||
|
||||
print()
|
||||
failures = []
|
||||
for tid in tids:
|
||||
task = kb.get_task(conn, tid)
|
||||
runs = kb.list_runs(conn, tid)
|
||||
print(f" task {tid}: status={task.status}, current_run_id={task.current_run_id}, "
|
||||
f"runs={[(r.id, r.outcome) for r in runs]}")
|
||||
if task.status != "done":
|
||||
failures.append(f"task {tid} not done: status={task.status}")
|
||||
if task.current_run_id is not None:
|
||||
failures.append(f"task {tid} has dangling current_run_id={task.current_run_id}")
|
||||
if len(runs) != 1:
|
||||
failures.append(f"task {tid} has {len(runs)} runs, expected 1")
|
||||
else:
|
||||
r = runs[0]
|
||||
if r.outcome != "completed":
|
||||
failures.append(f"task {tid} run outcome={r.outcome}, expected completed")
|
||||
if not r.summary or "real-subprocess worker finished" not in r.summary:
|
||||
failures.append(f"task {tid} summary missing: {r.summary!r}")
|
||||
if not r.metadata or r.metadata.get("iterations") != 3:
|
||||
failures.append(f"task {tid} metadata missing iterations: {r.metadata}")
|
||||
# Heartbeat events should be present
|
||||
events = kb.list_events(conn, tid)
|
||||
heartbeats = [e for e in events if e.kind == "heartbeat"]
|
||||
if len(heartbeats) < 3: # start + 3 progress
|
||||
failures.append(f"task {tid} heartbeats={len(heartbeats)} expected >=3")
|
||||
|
||||
if failures:
|
||||
print("\nFAILURES:")
|
||||
for f in failures:
|
||||
print(f" {f}")
|
||||
sys.exit(1)
|
||||
|
||||
print("\n ✔ Scenario A: all 3 real-subprocess workers completed cleanly")
|
||||
|
||||
# ============ SCENARIO B: crashed worker ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("B. Crashed worker (kill -9 mid-heartbeat)")
|
||||
print("=" * 60)
|
||||
|
||||
crash_tid = kb.create_task(
|
||||
conn, title="crash-e2e", assignee="worker",
|
||||
)
|
||||
|
||||
# Spawn a worker that sleeps long enough for us to kill it.
|
||||
# CRITICAL: spawn through a double-fork so when we kill the child it
|
||||
# doesn't zombify under our pid (which would fool kill -0 liveness
|
||||
# checks into thinking it's still alive). In production the
|
||||
# dispatcher daemon is long-lived but its workers are reaped by init
|
||||
# after exit; the test needs to match that orphaning behavior.
|
||||
def spawn_sleeper(task, workspace):
|
||||
r, w = os.pipe()
|
||||
middleman = subprocess.Popen(
|
||||
[
|
||||
PY, "-c",
|
||||
"import os,sys,subprocess;"
|
||||
"p=subprocess.Popen(['sleep','30'],"
|
||||
"stdin=subprocess.DEVNULL,"
|
||||
"stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL,"
|
||||
"start_new_session=True);"
|
||||
"os.write(int(sys.argv[1]), str(p.pid).encode());"
|
||||
"sys.exit(0)",
|
||||
str(w),
|
||||
],
|
||||
pass_fds=(w,),
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
os.close(w)
|
||||
middleman.wait() # middleman exits immediately, orphaning the sleep
|
||||
grandchild_pid = int(os.read(r, 16))
|
||||
os.close(r)
|
||||
return grandchild_pid
|
||||
|
||||
result = kb.dispatch_once(conn, spawn_fn=spawn_sleeper)
|
||||
task = kb.get_task(conn, crash_tid)
|
||||
print(f" spawned sleeper pid={task.worker_pid} for {crash_tid}")
|
||||
# Kill the sleeper forcibly
|
||||
os.kill(task.worker_pid, 9)
|
||||
# Give the OS a moment to reap
|
||||
time.sleep(0.5)
|
||||
|
||||
# Simulate next dispatcher tick — should detect the crashed PID
|
||||
crashed = kb.detect_crashed_workers(conn)
|
||||
print(f" detect_crashed_workers returned {len(crashed)} crashed (expected 1)")
|
||||
|
||||
task = kb.get_task(conn, crash_tid)
|
||||
runs = kb.list_runs(conn, crash_tid)
|
||||
print(f" task status={task.status}, runs={[(r.id, r.outcome) for r in runs]}")
|
||||
|
||||
if len(crashed) < 1:
|
||||
print(" ✗ crash NOT detected")
|
||||
sys.exit(1)
|
||||
if task.status != "ready":
|
||||
print(f" ✗ task should be back to ready, got {task.status}")
|
||||
sys.exit(1)
|
||||
if runs[0].outcome != "crashed":
|
||||
print(f" ✗ run outcome should be 'crashed', got {runs[0].outcome!r}")
|
||||
sys.exit(1)
|
||||
print("\n ✔ Scenario B: crash detected, task re-queued, run outcome=crashed")
|
||||
|
||||
# ============ SCENARIO C: worker log was captured ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("C. Worker log captured to disk")
|
||||
print("=" * 60)
|
||||
# Scenario A workers wrote to /tmp/hermes_e2e_*/worker_*.log
|
||||
import glob
|
||||
logs = glob.glob(os.path.join(home, "worker_*.log"))
|
||||
print(f" {len(logs)} worker log files")
|
||||
for lp in logs[:3]:
|
||||
size = os.path.getsize(lp)
|
||||
print(f" {os.path.basename(lp)}: {size} bytes")
|
||||
# Our fake worker is quiet (no prints); size=0 is fine
|
||||
|
||||
conn.close()
|
||||
print("\n✔ ALL E2E SCENARIOS PASS")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1485,48 +1485,6 @@ class TestListSessionsRich:
|
||||
assert "\n" not in sessions[0]["preview"]
|
||||
assert "Line one Line two" in sessions[0]["preview"]
|
||||
|
||||
def test_branch_session_visible_in_list(self, db):
|
||||
"""Branch sessions (parent ended with 'branched') must appear in list_sessions_rich."""
|
||||
db.create_session("parent", "cli")
|
||||
db.end_session("parent", "branched")
|
||||
db.create_session("branch", "cli", parent_session_id="parent")
|
||||
db.append_message("branch", "user", "Exploring the alternative approach")
|
||||
|
||||
sessions = db.list_sessions_rich()
|
||||
ids = [s["id"] for s in sessions]
|
||||
assert "branch" in ids, "Branch session should be visible in default list"
|
||||
|
||||
def test_subagent_session_still_hidden(self, db):
|
||||
"""Sub-agent children (parent NOT ended with 'branched') remain hidden."""
|
||||
db.create_session("root", "cli")
|
||||
db.create_session("delegate", "cli", parent_session_id="root")
|
||||
|
||||
sessions = db.list_sessions_rich()
|
||||
ids = [s["id"] for s in sessions]
|
||||
assert "delegate" not in ids, "Delegate sub-agent should not appear in default list"
|
||||
assert "root" in ids
|
||||
|
||||
def test_compression_child_still_hidden(self, db):
|
||||
"""Compression continuation sessions remain hidden (parent ended with 'compression')."""
|
||||
import time as _time
|
||||
t0 = _time.time()
|
||||
db.create_session("root", "cli")
|
||||
db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, "root"))
|
||||
db._conn.execute(
|
||||
"UPDATE sessions SET ended_at=?, end_reason='compression' WHERE id=?",
|
||||
(t0 + 1800, "root"),
|
||||
)
|
||||
db._conn.commit()
|
||||
db.create_session("continuation", "cli", parent_session_id="root")
|
||||
db._conn.execute(
|
||||
"UPDATE sessions SET started_at=? WHERE id=?", (t0 + 1801, "continuation")
|
||||
)
|
||||
db._conn.commit()
|
||||
|
||||
sessions = db.list_sessions_rich(project_compression_tips=False)
|
||||
ids = [s["id"] for s in sessions]
|
||||
assert "continuation" not in ids, "Compression continuation should stay hidden"
|
||||
|
||||
|
||||
class TestCompressionChainProjection:
|
||||
"""Tests for lineage-aware list_sessions_rich — compressed conversations
|
||||
|
||||
@@ -1807,112 +1807,3 @@ def test_model_options_propagates_list_exception(monkeypatch):
|
||||
assert "error" in resp
|
||||
assert resp["error"]["code"] == 5033
|
||||
assert "catalog blew up" in resp["error"]["message"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# prompt.submit — auto-title
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _ImmediateThread:
|
||||
"""Runs the target callable synchronously so assertions can follow."""
|
||||
|
||||
def __init__(self, target=None, daemon=None):
|
||||
self._target = target
|
||||
|
||||
def start(self):
|
||||
self._target()
|
||||
|
||||
|
||||
def test_prompt_submit_auto_titles_session_on_complete(monkeypatch):
|
||||
"""maybe_auto_title is called after a successful (complete) prompt."""
|
||||
|
||||
class _Agent:
|
||||
def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
|
||||
return {
|
||||
"final_response": "Rome was founded in 753 BC.",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Tell me about Rome"},
|
||||
{"role": "assistant", "content": "Rome was founded in 753 BC."},
|
||||
],
|
||||
}
|
||||
|
||||
server._sessions["sid"] = _session(agent=_Agent())
|
||||
monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
|
||||
monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
|
||||
monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
|
||||
monkeypatch.setattr(server, "_get_db", lambda: None)
|
||||
|
||||
with patch("agent.title_generator.maybe_auto_title") as mock_title:
|
||||
server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "prompt.submit",
|
||||
"params": {"session_id": "sid", "text": "Tell me about Rome"},
|
||||
}
|
||||
)
|
||||
|
||||
mock_title.assert_called_once()
|
||||
args = mock_title.call_args.args
|
||||
assert args[1] == "session-key"
|
||||
assert args[2] == "Tell me about Rome"
|
||||
assert args[3] == "Rome was founded in 753 BC."
|
||||
|
||||
|
||||
def test_prompt_submit_skips_auto_title_when_interrupted(monkeypatch):
|
||||
"""maybe_auto_title must NOT be called when the agent was interrupted."""
|
||||
|
||||
class _Agent:
|
||||
def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
|
||||
return {
|
||||
"final_response": "partial answer",
|
||||
"interrupted": True,
|
||||
"messages": [],
|
||||
}
|
||||
|
||||
server._sessions["sid"] = _session(agent=_Agent())
|
||||
monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
|
||||
monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
|
||||
monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
|
||||
monkeypatch.setattr(server, "_get_db", lambda: None)
|
||||
|
||||
with patch("agent.title_generator.maybe_auto_title") as mock_title:
|
||||
server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "prompt.submit",
|
||||
"params": {"session_id": "sid", "text": "Tell me about Rome"},
|
||||
}
|
||||
)
|
||||
|
||||
mock_title.assert_not_called()
|
||||
|
||||
|
||||
def test_prompt_submit_skips_auto_title_when_response_empty(monkeypatch):
|
||||
"""maybe_auto_title must NOT be called when the agent returns an empty reply."""
|
||||
|
||||
class _Agent:
|
||||
def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
|
||||
return {
|
||||
"final_response": "",
|
||||
"messages": [],
|
||||
}
|
||||
|
||||
server._sessions["sid"] = _session(agent=_Agent())
|
||||
monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
|
||||
monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
|
||||
monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
|
||||
monkeypatch.setattr(server, "_get_db", lambda: None)
|
||||
|
||||
with patch("agent.title_generator.maybe_auto_title") as mock_title:
|
||||
server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "prompt.submit",
|
||||
"params": {"session_id": "sid", "text": "Tell me about Rome"},
|
||||
}
|
||||
)
|
||||
|
||||
mock_title.assert_not_called()
|
||||
|
||||
@@ -1,248 +0,0 @@
|
||||
"""Tests for hybrid browser-backend routing (LAN/localhost auto-local).
|
||||
|
||||
When a cloud browser provider (Browserbase / Browser-Use / Firecrawl) is
|
||||
configured globally, ``browser.auto_local_for_private_urls`` (default True)
|
||||
causes ``browser_navigate`` to transparently spawn a local Chromium sidecar
|
||||
for URLs whose host resolves to a private/loopback/LAN address, while
|
||||
public URLs continue to hit the cloud session in the same conversation.
|
||||
|
||||
These tests cover the routing decision layer — session_key selection,
|
||||
sidecar detection, last-active-session tracking, and the config toggle.
|
||||
The downstream session creation is covered by test_browser_cloud_fallback.py.
|
||||
"""
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
import tools.browser_tool as browser_tool
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_routing_state(monkeypatch):
|
||||
"""Clear module-level caches so each test starts clean."""
|
||||
monkeypatch.setattr(browser_tool, "_active_sessions", {})
|
||||
monkeypatch.setattr(browser_tool, "_last_active_session_key", {})
|
||||
monkeypatch.setattr(browser_tool, "_cached_cloud_provider", None)
|
||||
monkeypatch.setattr(browser_tool, "_cloud_provider_resolved", False)
|
||||
monkeypatch.setattr(browser_tool, "_auto_local_for_private_urls_resolved", False)
|
||||
monkeypatch.setattr(browser_tool, "_cached_auto_local_for_private_urls", True)
|
||||
monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None)
|
||||
monkeypatch.setattr(browser_tool, "_update_session_activity", lambda t: None)
|
||||
# Default: no CDP override, no Camofox
|
||||
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
|
||||
monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
|
||||
|
||||
|
||||
class TestNavigationSessionKey:
|
||||
"""Tests for _navigation_session_key URL-based routing decisions."""
|
||||
|
||||
def test_public_url_uses_bare_task_id(self, monkeypatch):
|
||||
"""Public URL with cloud provider configured → bare task_id (cloud)."""
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
key = browser_tool._navigation_session_key("default", "https://github.com/x/y")
|
||||
assert key == "default"
|
||||
|
||||
def test_localhost_routes_to_local_sidecar(self, monkeypatch):
|
||||
"""``localhost`` URL → ``::local`` suffix when cloud configured + flag on."""
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
|
||||
assert key == "default::local"
|
||||
|
||||
def test_loopback_ipv4_routes_to_local_sidecar(self, monkeypatch):
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
key = browser_tool._navigation_session_key("default", "http://127.0.0.1:8080/")
|
||||
assert key == "default::local"
|
||||
|
||||
def test_rfc1918_lan_routes_to_local_sidecar(self, monkeypatch):
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
key = browser_tool._navigation_session_key("default", "http://192.168.1.50:8000/")
|
||||
assert key == "default::local"
|
||||
|
||||
def test_ipv6_loopback_routes_to_local_sidecar(self, monkeypatch):
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
key = browser_tool._navigation_session_key("default", "http://[::1]:3000/")
|
||||
assert key == "default::local"
|
||||
|
||||
def test_public_ip_literal_uses_bare_task_id(self, monkeypatch):
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
key = browser_tool._navigation_session_key("default", "https://8.8.8.8/")
|
||||
assert key == "default"
|
||||
|
||||
def test_mdns_local_hostname_routes_to_sidecar(self, monkeypatch):
|
||||
"""``*.local`` mDNS / ``*.lan`` / ``*.internal`` hostnames route to sidecar."""
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
for host in ("raspberrypi.local", "printer.lan", "db.internal"):
|
||||
key = browser_tool._navigation_session_key("default", f"http://{host}/")
|
||||
assert key == "default::local", f"host {host!r} did not route to sidecar"
|
||||
|
||||
def test_no_cloud_provider_stays_on_bare_task_id(self, monkeypatch):
|
||||
"""When cloud provider is not configured, no hybrid routing happens."""
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: None)
|
||||
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
|
||||
assert key == "default"
|
||||
|
||||
def test_camofox_mode_stays_on_bare_task_id(self, monkeypatch):
|
||||
"""Camofox is already local — no hybrid routing needed."""
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: True)
|
||||
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
|
||||
assert key == "default"
|
||||
|
||||
def test_cdp_override_stays_on_bare_task_id(self, monkeypatch):
|
||||
"""A user-supplied CDP endpoint owns the whole session — no hybrid."""
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "ws://localhost:9222")
|
||||
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
|
||||
assert key == "default"
|
||||
|
||||
def test_feature_flag_off_disables_hybrid_routing(self, monkeypatch):
|
||||
"""``auto_local_for_private_urls: false`` keeps private URLs on cloud."""
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
monkeypatch.setattr(browser_tool, "_auto_local_for_private_urls", lambda: False)
|
||||
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
|
||||
assert key == "default"
|
||||
|
||||
def test_none_task_id_defaults(self, monkeypatch):
|
||||
"""``None`` task_id resolves to 'default'."""
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
|
||||
key = browser_tool._navigation_session_key(None, "http://localhost:3000/")
|
||||
assert key == "default::local"
|
||||
|
||||
|
||||
class TestSessionKeyHelpers:
|
||||
def test_is_local_sidecar_key(self):
|
||||
assert browser_tool._is_local_sidecar_key("default::local")
|
||||
assert browser_tool._is_local_sidecar_key("my_task::local")
|
||||
assert not browser_tool._is_local_sidecar_key("default")
|
||||
assert not browser_tool._is_local_sidecar_key("my_task")
|
||||
|
||||
def test_last_session_key_falls_back_to_task_id(self, monkeypatch):
|
||||
"""Without a recorded last-active key, returns the bare task_id."""
|
||||
monkeypatch.setattr(browser_tool, "_last_active_session_key", {})
|
||||
assert browser_tool._last_session_key("default") == "default"
|
||||
assert browser_tool._last_session_key("task-42") == "task-42"
|
||||
assert browser_tool._last_session_key(None) == "default"
|
||||
|
||||
def test_last_session_key_returns_recorded_key(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
browser_tool,
|
||||
"_last_active_session_key",
|
||||
{"default": "default::local", "task-42": "task-42"},
|
||||
)
|
||||
assert browser_tool._last_session_key("default") == "default::local"
|
||||
assert browser_tool._last_session_key("task-42") == "task-42"
|
||||
# Unknown task_id still falls back
|
||||
assert browser_tool._last_session_key("other") == "other"
|
||||
|
||||
|
||||
class TestHybridRoutingSessionCreation:
|
||||
"""_get_session_info must force a local session when the key carries ``::local``."""
|
||||
|
||||
def test_local_sidecar_key_skips_cloud_provider(self, monkeypatch):
|
||||
"""A ``::local``-suffixed key creates a local session even when cloud is set."""
|
||||
provider = Mock()
|
||||
provider.create_session.return_value = {
|
||||
"session_name": "should_not_be_used",
|
||||
"bb_session_id": "bb_xxx",
|
||||
"cdp_url": "wss://fake.browserbase.com/ws",
|
||||
}
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
|
||||
monkeypatch.setattr(browser_tool, "_ensure_cdp_supervisor", lambda t: None)
|
||||
|
||||
session = browser_tool._get_session_info("default::local")
|
||||
|
||||
assert provider.create_session.call_count == 0
|
||||
assert session["bb_session_id"] is None
|
||||
assert session["cdp_url"] is None
|
||||
assert session["features"]["local"] is True
|
||||
|
||||
def test_bare_task_id_with_cloud_provider_uses_cloud(self, monkeypatch):
|
||||
"""A bare task_id with cloud provider configured hits the cloud path."""
|
||||
provider = Mock()
|
||||
provider.create_session.return_value = {
|
||||
"session_name": "cloud-sess",
|
||||
"bb_session_id": "bb_123",
|
||||
"cdp_url": "wss://real.browserbase.com/ws",
|
||||
}
|
||||
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
|
||||
monkeypatch.setattr(browser_tool, "_ensure_cdp_supervisor", lambda t: None)
|
||||
monkeypatch.setattr(browser_tool, "_resolve_cdp_override", lambda u: u)
|
||||
|
||||
session = browser_tool._get_session_info("default")
|
||||
|
||||
assert provider.create_session.call_count == 1
|
||||
assert session["bb_session_id"] == "bb_123"
|
||||
|
||||
|
||||
class TestCleanupHybridSessions:
|
||||
"""cleanup_browser(bare_task_id) must reap both cloud + local sidecar sessions."""
|
||||
|
||||
def test_cleanup_reaps_both_primary_and_sidecar(self, monkeypatch):
|
||||
"""Given a bare task_id with both sessions alive, both get cleaned."""
|
||||
reaped = []
|
||||
|
||||
def _fake_cleanup_one(key):
|
||||
reaped.append(key)
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
|
||||
monkeypatch.setattr(
|
||||
browser_tool,
|
||||
"_active_sessions",
|
||||
{
|
||||
"default": {"session_name": "cloud_sess"},
|
||||
"default::local": {"session_name": "local_sess"},
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
browser_tool, "_last_active_session_key", {"default": "default::local"}
|
||||
)
|
||||
|
||||
browser_tool.cleanup_browser("default")
|
||||
|
||||
assert set(reaped) == {"default", "default::local"}
|
||||
# last-active pointer dropped
|
||||
assert "default" not in browser_tool._last_active_session_key
|
||||
|
||||
def test_cleanup_reaps_only_primary_when_no_sidecar(self, monkeypatch):
|
||||
"""When no sidecar exists, only the primary is reaped."""
|
||||
reaped = []
|
||||
|
||||
def _fake_cleanup_one(key):
|
||||
reaped.append(key)
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
|
||||
monkeypatch.setattr(
|
||||
browser_tool,
|
||||
"_active_sessions",
|
||||
{"default": {"session_name": "cloud_sess"}},
|
||||
)
|
||||
|
||||
browser_tool.cleanup_browser("default")
|
||||
|
||||
assert reaped == ["default"]
|
||||
|
||||
def test_cleanup_sidecar_directly_keeps_primary(self, monkeypatch):
|
||||
"""Calling cleanup with a ``::local`` key reaps only the sidecar."""
|
||||
reaped = []
|
||||
|
||||
def _fake_cleanup_one(key):
|
||||
reaped.append(key)
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
|
||||
monkeypatch.setattr(
|
||||
browser_tool,
|
||||
"_active_sessions",
|
||||
{
|
||||
"default": {"session_name": "cloud_sess"},
|
||||
"default::local": {"session_name": "local_sess"},
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
browser_tool, "_last_active_session_key", {"default": "default::local"}
|
||||
)
|
||||
|
||||
browser_tool.cleanup_browser("default::local")
|
||||
|
||||
assert reaped == ["default::local"]
|
||||
# Last-active pointer NOT dropped (primary task is still alive)
|
||||
assert browser_tool._last_active_session_key.get("default") == "default::local"
|
||||
@@ -1,210 +0,0 @@
|
||||
"""Tests for credential_pool .env fallback and auth credential_pool lookup.
|
||||
|
||||
Covers the fix from #15914 / PR #15920:
|
||||
- _seed_from_env reads API keys from ~/.hermes/.env when not in os.environ
|
||||
- _resolve_api_key_provider_secret falls back to credential_pool when env vars are empty
|
||||
- env vars take priority over .env file (handled by get_env_value itself)
|
||||
- env vars take priority over credential pool (fallback only kicks in when env is empty)
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_pconfig(provider_id="deepseek", env_vars=None):
|
||||
"""Create a minimal ProviderConfig for testing.
|
||||
|
||||
Default provider_id is 'deepseek' because it's a real api_key provider
|
||||
in PROVIDER_REGISTRY (needed for _seed_from_env's generic path).
|
||||
"""
|
||||
from hermes_cli.auth import ProviderConfig
|
||||
return ProviderConfig(
|
||||
id=provider_id,
|
||||
name=provider_id.title(),
|
||||
auth_type="api_key",
|
||||
api_key_env_vars=tuple(env_vars or [f"{provider_id.upper()}_API_KEY"]),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_hermes_home(tmp_path, monkeypatch):
|
||||
"""Point HERMES_HOME at a temp dir and clear known API key env vars.
|
||||
|
||||
Also invalidates any cached get_env_value state by patching Path.home().
|
||||
"""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
# Clear all known API key env vars so get_env_value falls through to .env
|
||||
for key in [
|
||||
"OPENAI_API_KEY", "ANTHROPIC_API_KEY", "OPENROUTER_API_KEY",
|
||||
"ZAI_API_KEY", "DEEPSEEK_API_KEY", "ANTHROPIC_TOKEN",
|
||||
"CLAUDE_CODE_OAUTH_TOKEN", "OPENAI_BASE_URL",
|
||||
]:
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
return home
|
||||
|
||||
|
||||
def _write_env_file(home: Path, **kwargs) -> None:
|
||||
"""Write key=value pairs to ~/.hermes/.env."""
|
||||
lines = [f"{k}={v}" for k, v in kwargs.items()]
|
||||
(home / ".env").write_text("\n".join(lines) + "\n")
|
||||
|
||||
|
||||
class TestCredentialPoolSeedsFromDotEnv:
|
||||
"""_seed_from_env must read keys from ~/.hermes/.env, not just os.environ.
|
||||
|
||||
This is the load-bearing behaviour for the fix: when a user adds a key to
|
||||
.env mid-session or via a non-CLI entry point that doesn't run
|
||||
load_hermes_dotenv, the credential pool must still discover it.
|
||||
"""
|
||||
|
||||
def test_deepseek_key_from_dotenv_only(self, isolated_hermes_home):
|
||||
"""Key in .env but not os.environ → _seed_from_env adds a pool entry."""
|
||||
_write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-only-12345")
|
||||
assert "DEEPSEEK_API_KEY" not in os.environ
|
||||
|
||||
from agent.credential_pool import _seed_from_env
|
||||
entries = []
|
||||
changed, active_sources = _seed_from_env("deepseek", entries)
|
||||
|
||||
assert changed is True
|
||||
assert "env:DEEPSEEK_API_KEY" in active_sources
|
||||
assert any(
|
||||
e.access_token == "sk-dotenv-only-12345"
|
||||
and e.source == "env:DEEPSEEK_API_KEY"
|
||||
for e in entries
|
||||
), f"Expected seeded entry with dotenv key, got: {[(e.source, e.access_token) for e in entries]}"
|
||||
|
||||
def test_openrouter_key_from_dotenv_only(self, isolated_hermes_home):
|
||||
"""OpenRouter path has its own branch — verify it also reads .env."""
|
||||
_write_env_file(isolated_hermes_home, OPENROUTER_API_KEY="sk-or-dotenv-abc")
|
||||
assert "OPENROUTER_API_KEY" not in os.environ
|
||||
|
||||
from agent.credential_pool import _seed_from_env
|
||||
entries = []
|
||||
changed, active_sources = _seed_from_env("openrouter", entries)
|
||||
|
||||
assert changed is True
|
||||
assert "env:OPENROUTER_API_KEY" in active_sources
|
||||
assert any(
|
||||
e.access_token == "sk-or-dotenv-abc" for e in entries
|
||||
)
|
||||
|
||||
def test_empty_dotenv_no_entries(self, isolated_hermes_home):
|
||||
"""No .env file, no env vars → no entries seeded (and no crash)."""
|
||||
from agent.credential_pool import _seed_from_env
|
||||
entries = []
|
||||
changed, active_sources = _seed_from_env("deepseek", entries)
|
||||
assert changed is False
|
||||
assert active_sources == set()
|
||||
assert entries == []
|
||||
|
||||
def test_os_environ_still_wins_over_dotenv(self, isolated_hermes_home, monkeypatch):
|
||||
"""get_env_value checks os.environ first — verify seeding picks that up."""
|
||||
_write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-stale")
|
||||
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-fresh-xyz")
|
||||
|
||||
from agent.credential_pool import _seed_from_env
|
||||
entries = []
|
||||
changed, _ = _seed_from_env("deepseek", entries)
|
||||
|
||||
assert changed is True
|
||||
seeded = [e for e in entries if e.source == "env:DEEPSEEK_API_KEY"]
|
||||
assert len(seeded) == 1
|
||||
assert seeded[0].access_token == "sk-env-fresh-xyz"
|
||||
|
||||
|
||||
class TestAuthResolvesFromDotEnv:
|
||||
"""_resolve_api_key_provider_secret must also read from ~/.hermes/.env."""
|
||||
|
||||
def test_key_from_dotenv_only(self, isolated_hermes_home):
|
||||
"""Key in .env but not os.environ → _resolve returns it with the env var source."""
|
||||
_write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-resolve-789")
|
||||
assert "DEEPSEEK_API_KEY" not in os.environ
|
||||
|
||||
from hermes_cli.auth import _resolve_api_key_provider_secret
|
||||
key, source = _resolve_api_key_provider_secret(
|
||||
provider_id="deepseek",
|
||||
pconfig=_make_pconfig(),
|
||||
)
|
||||
assert key == "sk-dotenv-resolve-789"
|
||||
assert source == "DEEPSEEK_API_KEY"
|
||||
|
||||
|
||||
class TestAuthCredentialPoolFallback:
|
||||
"""_resolve_api_key_provider_secret falls back to credential pool when env + dotenv are empty."""
|
||||
|
||||
def test_credential_pool_fallback_structure(self, isolated_hermes_home):
|
||||
"""Empty env + empty .env → auth falls back to credential pool."""
|
||||
mock_entry = MagicMock()
|
||||
mock_entry.access_token = "test-pool-key-12345"
|
||||
mock_entry.runtime_api_key = ""
|
||||
|
||||
mock_pool = MagicMock()
|
||||
mock_pool.has_credentials.return_value = True
|
||||
mock_pool.peek.return_value = mock_entry
|
||||
|
||||
from hermes_cli.auth import _resolve_api_key_provider_secret
|
||||
with patch("agent.credential_pool.load_pool", return_value=mock_pool):
|
||||
key, source = _resolve_api_key_provider_secret(
|
||||
provider_id="deepseek",
|
||||
pconfig=_make_pconfig(),
|
||||
)
|
||||
assert "test-pool-key-12345" in key
|
||||
assert "credential_pool" in source
|
||||
|
||||
def test_credential_pool_empty_returns_empty(self, isolated_hermes_home):
|
||||
"""Empty env + empty .env + empty pool → empty string."""
|
||||
mock_pool = MagicMock()
|
||||
mock_pool.has_credentials.return_value = False
|
||||
|
||||
from hermes_cli.auth import _resolve_api_key_provider_secret
|
||||
with patch("agent.credential_pool.load_pool", return_value=mock_pool):
|
||||
key, source = _resolve_api_key_provider_secret(
|
||||
provider_id="deepseek",
|
||||
pconfig=_make_pconfig(),
|
||||
)
|
||||
assert key == ""
|
||||
|
||||
def test_env_var_takes_priority_over_pool(self, isolated_hermes_home, monkeypatch):
|
||||
"""os.environ key wins — credential pool is NEVER consulted."""
|
||||
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-key-first-abc123")
|
||||
|
||||
mock_pool = MagicMock()
|
||||
mock_pool.has_credentials.return_value = True
|
||||
|
||||
from hermes_cli.auth import _resolve_api_key_provider_secret
|
||||
with patch("agent.credential_pool.load_pool", return_value=mock_pool) as mp:
|
||||
key, source = _resolve_api_key_provider_secret(
|
||||
provider_id="deepseek",
|
||||
pconfig=_make_pconfig(),
|
||||
)
|
||||
assert key == "sk-env-key-first-abc123"
|
||||
assert source == "DEEPSEEK_API_KEY"
|
||||
# Pool should not even have been loaded — env var satisfied the request first
|
||||
mp.assert_not_called()
|
||||
|
||||
def test_dotenv_takes_priority_over_pool(self, isolated_hermes_home):
|
||||
"""Key in .env beats credential pool — pool only fires when both env sources are empty."""
|
||||
_write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-priority-xyz")
|
||||
assert "DEEPSEEK_API_KEY" not in os.environ
|
||||
|
||||
mock_pool = MagicMock()
|
||||
mock_pool.has_credentials.return_value = True
|
||||
|
||||
from hermes_cli.auth import _resolve_api_key_provider_secret
|
||||
with patch("agent.credential_pool.load_pool", return_value=mock_pool) as mp:
|
||||
key, source = _resolve_api_key_provider_secret(
|
||||
provider_id="deepseek",
|
||||
pconfig=_make_pconfig(),
|
||||
)
|
||||
assert key == "sk-dotenv-priority-xyz"
|
||||
assert source == "DEEPSEEK_API_KEY"
|
||||
mp.assert_not_called()
|
||||
@@ -0,0 +1,494 @@
|
||||
"""Tests for the Kanban tool surface (tools/kanban_tools.py).
|
||||
|
||||
Verifies:
|
||||
- Tools are gated on HERMES_KANBAN_TASK: a normal chat session sees
|
||||
zero kanban tools in its schema; a worker session sees all seven.
|
||||
- Each handler's happy path.
|
||||
- Error paths (missing required args, bad metadata type, etc).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_kanban_tools_hidden_without_env_var(monkeypatch, tmp_path):
|
||||
"""Normal `hermes chat` sessions (no HERMES_KANBAN_TASK) must have
|
||||
zero kanban_* tools in their schema."""
|
||||
monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
import tools.kanban_tools # ensure registered
|
||||
from tools.registry import registry
|
||||
from toolsets import resolve_toolset
|
||||
|
||||
schema = registry.get_definitions(set(resolve_toolset("hermes-cli")), quiet=True)
|
||||
names = {s["function"].get("name") for s in schema if "function" in s}
|
||||
kanban = {n for n in names if n and n.startswith("kanban_")}
|
||||
assert kanban == set(), (
|
||||
f"kanban tools leaked into normal chat schema: {kanban}"
|
||||
)
|
||||
|
||||
|
||||
def test_kanban_tools_visible_with_env_var(monkeypatch, tmp_path):
|
||||
"""Worker sessions (HERMES_KANBAN_TASK set) must have all 7 tools."""
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake")
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
import tools.kanban_tools # ensure registered
|
||||
from tools.registry import registry
|
||||
from toolsets import resolve_toolset
|
||||
|
||||
schema = registry.get_definitions(set(resolve_toolset("hermes-cli")), quiet=True)
|
||||
names = {s["function"].get("name") for s in schema if "function" in s}
|
||||
kanban = {n for n in names if n and n.startswith("kanban_")}
|
||||
expected = {
|
||||
"kanban_show", "kanban_complete", "kanban_block", "kanban_heartbeat",
|
||||
"kanban_comment", "kanban_create", "kanban_link",
|
||||
}
|
||||
assert kanban == expected, f"expected {expected}, got {kanban}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Handler happy paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def worker_env(monkeypatch, tmp_path):
|
||||
"""Simulate being a worker: HERMES_HOME isolated, HERMES_KANBAN_TASK set
|
||||
after we've created the task."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setenv("HERMES_PROFILE", "test-worker")
|
||||
from pathlib import Path as _Path
|
||||
monkeypatch.setattr(_Path, "home", lambda: tmp_path)
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
try:
|
||||
tid = kb.create_task(conn, title="worker-test", assignee="test-worker")
|
||||
kb.claim_task(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", tid)
|
||||
return tid
|
||||
|
||||
|
||||
def test_show_defaults_to_env_task_id(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_show({})
|
||||
d = json.loads(out)
|
||||
assert "task" in d
|
||||
assert d["task"]["id"] == worker_env
|
||||
assert d["task"]["status"] == "running"
|
||||
assert "worker_context" in d
|
||||
assert "runs" in d
|
||||
|
||||
|
||||
def test_show_explicit_task_id(worker_env):
|
||||
"""Peek at a different task than the one in env."""
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
other = kb.create_task(conn, title="other task", assignee="peer")
|
||||
finally:
|
||||
conn.close()
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_show({"task_id": other})
|
||||
d = json.loads(out)
|
||||
assert d["task"]["id"] == other
|
||||
|
||||
|
||||
def test_complete_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_complete({
|
||||
"summary": "got the thing done",
|
||||
"metadata": {"files": 2},
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
assert d["task_id"] == worker_env
|
||||
# Verify via kernel
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run = kb.latest_run(conn, worker_env)
|
||||
assert run.outcome == "completed"
|
||||
assert run.summary == "got the thing done"
|
||||
assert run.metadata == {"files": 2}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_complete_with_result_only(worker_env):
|
||||
"""`result` alone (without summary) is accepted for legacy compat."""
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_complete({"result": "legacy result"})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
|
||||
|
||||
def test_complete_rejects_no_handoff(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_complete({})
|
||||
assert json.loads(out).get("error"), "should have errored"
|
||||
|
||||
|
||||
def test_complete_rejects_non_dict_metadata(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_complete({"summary": "x", "metadata": [1, 2, 3]})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_block_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_block({"reason": "need clarification"})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
assert kb.get_task(conn, worker_env).status == "blocked"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_block_rejects_empty_reason(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
for bad in ["", " ", None]:
|
||||
out = kt._handle_block({"reason": bad})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_heartbeat_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_heartbeat({"note": "progress"})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
|
||||
|
||||
def test_heartbeat_without_note(worker_env):
|
||||
"""note is optional."""
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_heartbeat({})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
|
||||
|
||||
def test_comment_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_comment({
|
||||
"task_id": worker_env,
|
||||
"body": "hello thread",
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
assert d["comment_id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
comments = kb.list_comments(conn, worker_env)
|
||||
assert len(comments) == 1
|
||||
# Author defaults to HERMES_PROFILE env we set in the fixture
|
||||
assert comments[0].author == "test-worker"
|
||||
assert comments[0].body == "hello thread"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_comment_rejects_empty_body(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_comment({"task_id": worker_env, "body": " "})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_comment_custom_author(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_comment({
|
||||
"task_id": worker_env, "body": "hi", "author": "custom-bot",
|
||||
})
|
||||
assert json.loads(out)["ok"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
comments = kb.list_comments(conn, worker_env)
|
||||
assert comments[0].author == "custom-bot"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_create_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_create({
|
||||
"title": "child task",
|
||||
"assignee": "peer",
|
||||
"parents": [worker_env],
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
assert d["task_id"]
|
||||
assert d["status"] == "todo" # parent isn't done yet
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
child = kb.get_task(conn, d["task_id"])
|
||||
assert child.title == "child task"
|
||||
assert child.assignee == "peer"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_create_rejects_no_title(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
assert json.loads(kt._handle_create({"assignee": "x"})).get("error")
|
||||
assert json.loads(kt._handle_create({"title": " ", "assignee": "x"})).get("error")
|
||||
|
||||
|
||||
def test_create_rejects_no_assignee(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
assert json.loads(kt._handle_create({"title": "t"})).get("error")
|
||||
|
||||
|
||||
def test_create_rejects_non_list_parents(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_create({"title": "t", "assignee": "a", "parents": 42})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_create_accepts_string_parent(worker_env):
|
||||
"""Convenience: a single parent id as string is coerced to [id]."""
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_create({
|
||||
"title": "t", "assignee": "a", "parents": worker_env,
|
||||
})
|
||||
assert json.loads(out)["ok"]
|
||||
|
||||
|
||||
def test_create_accepts_skills_list(worker_env):
|
||||
"""Tool writes the per-task skills through to the kernel."""
|
||||
from tools import kanban_tools as kt
|
||||
from hermes_cli import kanban_db as kb
|
||||
out = kt._handle_create({
|
||||
"title": "skilled",
|
||||
"assignee": "linguist",
|
||||
"skills": ["translation", "github-code-review"],
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
with kb.connect() as conn:
|
||||
task = kb.get_task(conn, d["task_id"])
|
||||
assert task.skills == ["translation", "github-code-review"]
|
||||
|
||||
|
||||
def test_create_accepts_skills_string(worker_env):
|
||||
"""Convenience: a single skill name as string is coerced to [name]."""
|
||||
from tools import kanban_tools as kt
|
||||
from hermes_cli import kanban_db as kb
|
||||
out = kt._handle_create({
|
||||
"title": "one-skill",
|
||||
"assignee": "a",
|
||||
"skills": "translation",
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
with kb.connect() as conn:
|
||||
task = kb.get_task(conn, d["task_id"])
|
||||
assert task.skills == ["translation"]
|
||||
|
||||
|
||||
def test_create_rejects_non_list_skills(worker_env):
|
||||
"""skills: 42 must be rejected, not silently dropped."""
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_create({
|
||||
"title": "t", "assignee": "a", "skills": 42,
|
||||
})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_link_happy_path(worker_env):
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
a = kb.create_task(conn, title="A", assignee="x")
|
||||
b = kb.create_task(conn, title="B", assignee="x")
|
||||
finally:
|
||||
conn.close()
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_link({"parent_id": a, "child_id": b})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
|
||||
|
||||
def test_link_rejects_self_reference(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_link({"parent_id": worker_env, "child_id": worker_env})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_link_rejects_missing_args(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
assert json.loads(kt._handle_link({"parent_id": "x"})).get("error")
|
||||
assert json.loads(kt._handle_link({"child_id": "y"})).get("error")
|
||||
|
||||
|
||||
def test_link_rejects_cycle(worker_env):
|
||||
"""A → B, then try to link B → A."""
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
a = kb.create_task(conn, title="A", assignee="x")
|
||||
b = kb.create_task(conn, title="B", assignee="x", parents=[a])
|
||||
finally:
|
||||
conn.close()
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_link({"parent_id": b, "child_id": a})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end: simulate a full worker lifecycle through the tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_worker_lifecycle_through_tools(worker_env):
|
||||
"""Drive the full claim -> heartbeat -> comment -> complete lifecycle
|
||||
exclusively through the tools, then verify the DB state matches what
|
||||
the dispatcher/notifier expect."""
|
||||
from tools import kanban_tools as kt
|
||||
|
||||
# 1. show — worker orientation
|
||||
show = json.loads(kt._handle_show({}))
|
||||
assert show["task"]["id"] == worker_env
|
||||
|
||||
# 2. heartbeat during long op
|
||||
assert json.loads(kt._handle_heartbeat({"note": "warming up"}))["ok"]
|
||||
|
||||
# 3. comment for a future peer
|
||||
assert json.loads(kt._handle_comment({
|
||||
"task_id": worker_env,
|
||||
"body": "note: using stdlib sqlite3 bindings",
|
||||
}))["ok"]
|
||||
|
||||
# 4. spawn a child task for follow-up
|
||||
child_out = json.loads(kt._handle_create({
|
||||
"title": "write integration test",
|
||||
"assignee": "qa",
|
||||
"parents": [worker_env],
|
||||
}))
|
||||
assert child_out["ok"]
|
||||
|
||||
# 5. complete with structured handoff
|
||||
comp = json.loads(kt._handle_complete({
|
||||
"summary": "implemented + spawned QA follow-up",
|
||||
"metadata": {"child_task": child_out["task_id"]},
|
||||
}))
|
||||
assert comp["ok"]
|
||||
|
||||
# Verify final state
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.get_task(conn, worker_env)
|
||||
assert parent.status == "done"
|
||||
assert parent.current_run_id is None
|
||||
run = kb.latest_run(conn, worker_env)
|
||||
assert run.outcome == "completed"
|
||||
assert run.metadata == {"child_task": child_out["task_id"]}
|
||||
# Child is todo (parent just finished, but recompute_ready may
|
||||
# have promoted it — complete_task runs recompute internally).
|
||||
child = kb.get_task(conn, child_out["task_id"])
|
||||
assert child.status == "ready", (
|
||||
f"child should be ready after parent done, got {child.status}"
|
||||
)
|
||||
# Comment is visible
|
||||
assert len(kb.list_comments(conn, worker_env)) == 1
|
||||
# Heartbeat event recorded
|
||||
hb = [e for e in kb.list_events(conn, worker_env) if e.kind == "heartbeat"]
|
||||
assert len(hb) == 1
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# System-prompt guidance injection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_kanban_guidance_not_in_normal_prompt(monkeypatch, tmp_path):
|
||||
"""A normal chat session (no HERMES_KANBAN_TASK) must NOT have
|
||||
KANBAN_GUIDANCE in its system prompt."""
|
||||
monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
from pathlib import Path as _P
|
||||
monkeypatch.setattr(_P, "home", lambda: tmp_path)
|
||||
|
||||
from run_agent import AIAgent
|
||||
a = AIAgent(
|
||||
api_key="test",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
prompt = a._build_system_prompt()
|
||||
assert "You are a Kanban worker" not in prompt
|
||||
assert "kanban_show()" not in prompt
|
||||
|
||||
|
||||
def test_kanban_guidance_in_worker_prompt(monkeypatch, tmp_path):
|
||||
"""A worker session (HERMES_KANBAN_TASK set) MUST have the full
|
||||
lifecycle guidance in its system prompt."""
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake")
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
from pathlib import Path as _P
|
||||
monkeypatch.setattr(_P, "home", lambda: tmp_path)
|
||||
|
||||
from run_agent import AIAgent
|
||||
a = AIAgent(
|
||||
api_key="test",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
prompt = a._build_system_prompt()
|
||||
# Header phrase
|
||||
assert "You are a Kanban worker" in prompt
|
||||
# Lifecycle signals
|
||||
assert "kanban_show()" in prompt
|
||||
assert "kanban_complete" in prompt
|
||||
assert "kanban_block" in prompt
|
||||
assert "kanban_create" in prompt
|
||||
# Anti-shell guidance
|
||||
assert "Do not shell out" in prompt or "tools — they work" in prompt
|
||||
|
||||
|
||||
def test_kanban_guidance_prompt_size_bounded(monkeypatch, tmp_path):
|
||||
"""Sanity: the guidance block is under 4 KB so it doesn't blow
|
||||
up the cached prompt."""
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake")
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
from pathlib import Path as _P
|
||||
monkeypatch.setattr(_P, "home", lambda: tmp_path)
|
||||
|
||||
from agent.prompt_builder import KANBAN_GUIDANCE
|
||||
assert 1_500 < len(KANBAN_GUIDANCE) < 4_096, (
|
||||
f"KANBAN_GUIDANCE is {len(KANBAN_GUIDANCE)} chars — too short (missing?) or too long"
|
||||
)
|
||||
@@ -810,44 +810,6 @@ class TestParseTargetRefE164:
|
||||
assert _parse_target_ref("matrix", "+15551234567")[2] is False
|
||||
|
||||
|
||||
class TestParseTargetRefSlack:
|
||||
"""_parse_target_ref recognizes Slack channel/user IDs as explicit."""
|
||||
|
||||
def test_public_channel_id_is_explicit(self):
|
||||
chat_id, thread_id, is_explicit = _parse_target_ref("slack", "C0B0QV5434G")
|
||||
assert chat_id == "C0B0QV5434G"
|
||||
assert thread_id is None
|
||||
assert is_explicit is True
|
||||
|
||||
def test_private_channel_id_is_explicit(self):
|
||||
assert _parse_target_ref("slack", "G123ABCDEF")[2] is True
|
||||
|
||||
def test_dm_id_is_explicit(self):
|
||||
assert _parse_target_ref("slack", "D123ABCDEF")[2] is True
|
||||
|
||||
def test_user_id_is_not_explicit(self):
|
||||
"""Slack user IDs (U...) and workspace IDs (W...) are NOT explicit send
|
||||
targets. chat.postMessage rejects them — a DM must be opened first via
|
||||
conversations.open to obtain a D... conversation ID.
|
||||
"""
|
||||
assert _parse_target_ref("slack", "U123ABCDEF")[2] is False
|
||||
assert _parse_target_ref("slack", "W123ABCDEF")[2] is False
|
||||
|
||||
def test_whitespace_is_stripped(self):
|
||||
chat_id, _, is_explicit = _parse_target_ref("slack", " C0B0QV5434G ")
|
||||
assert chat_id == "C0B0QV5434G"
|
||||
assert is_explicit is True
|
||||
|
||||
def test_lowercase_or_short_id_is_not_explicit(self):
|
||||
assert _parse_target_ref("slack", "c0b0qv5434g")[2] is False
|
||||
assert _parse_target_ref("slack", "C123")[2] is False
|
||||
assert _parse_target_ref("slack", "X0B0QV5434G")[2] is False
|
||||
|
||||
def test_slack_id_not_explicit_for_other_platforms(self):
|
||||
assert _parse_target_ref("discord", "C0B0QV5434G")[2] is False
|
||||
assert _parse_target_ref("telegram", "C0B0QV5434G")[2] is False
|
||||
|
||||
|
||||
class TestSendDiscordThreadId:
|
||||
"""_send_discord uses thread_id when provided."""
|
||||
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
"""
|
||||
Regression tests for the shared-container task_id mapping.
|
||||
|
||||
The top-level agent and all delegate_task subagents share a single
|
||||
terminal sandbox keyed by ``"default"``. ``_resolve_container_task_id``
|
||||
is the sole gatekeeper for which tool-call task_ids go to the shared
|
||||
container vs. get their own isolated sandbox. RL / benchmark
|
||||
environments opt in to isolation by calling
|
||||
``register_task_env_overrides(task_id, {...})`` before the agent loop;
|
||||
every other task_id collapses back to ``"default"``.
|
||||
|
||||
If you change the collapse logic, update both the helper and these
|
||||
tests -- see `hermes-agent-dev` skill, "Why do subagents get their own
|
||||
containers?" section, and the Container lifecycle paragraph under
|
||||
Docker Backend in ``website/docs/user-guide/configuration.md``.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from tools import terminal_tool
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_overrides():
|
||||
"""Ensure no stray overrides from other tests leak in."""
|
||||
before = dict(terminal_tool._task_env_overrides)
|
||||
terminal_tool._task_env_overrides.clear()
|
||||
yield
|
||||
terminal_tool._task_env_overrides.clear()
|
||||
terminal_tool._task_env_overrides.update(before)
|
||||
|
||||
|
||||
def test_none_task_id_maps_to_default():
|
||||
assert terminal_tool._resolve_container_task_id(None) == "default"
|
||||
|
||||
|
||||
def test_empty_task_id_maps_to_default():
|
||||
assert terminal_tool._resolve_container_task_id("") == "default"
|
||||
|
||||
|
||||
def test_literal_default_stays_default():
|
||||
assert terminal_tool._resolve_container_task_id("default") == "default"
|
||||
|
||||
|
||||
def test_subagent_task_id_collapses_to_default():
|
||||
# delegate_task constructs IDs like "subagent-<N>-<uuid_hex>"; these
|
||||
# should share the parent's container, not spin up their own.
|
||||
assert terminal_tool._resolve_container_task_id("subagent-0-deadbeef") == "default"
|
||||
assert terminal_tool._resolve_container_task_id("subagent-42-cafef00d") == "default"
|
||||
|
||||
|
||||
def test_arbitrary_session_id_collapses_to_default():
|
||||
# Session UUIDs or anything else without an override still collapse.
|
||||
assert terminal_tool._resolve_container_task_id("sess-123e4567-e89b-12d3") == "default"
|
||||
|
||||
|
||||
def test_rl_task_with_override_keeps_its_own_id():
|
||||
# RL / benchmark pattern: register a per-task image, then the task_id
|
||||
# must survive ``_resolve_container_task_id`` so the rollout lands in
|
||||
# its own sandbox.
|
||||
terminal_tool.register_task_env_overrides(
|
||||
"tb2-task-fix-git", {"docker_image": "tb2:fix-git", "cwd": "/app"}
|
||||
)
|
||||
try:
|
||||
assert (
|
||||
terminal_tool._resolve_container_task_id("tb2-task-fix-git")
|
||||
== "tb2-task-fix-git"
|
||||
)
|
||||
finally:
|
||||
terminal_tool.clear_task_env_overrides("tb2-task-fix-git")
|
||||
|
||||
|
||||
def test_cleared_override_collapses_again():
|
||||
terminal_tool.register_task_env_overrides("tb2-x", {"docker_image": "x:y"})
|
||||
assert terminal_tool._resolve_container_task_id("tb2-x") == "tb2-x"
|
||||
terminal_tool.clear_task_env_overrides("tb2-x")
|
||||
assert terminal_tool._resolve_container_task_id("tb2-x") == "default"
|
||||
|
||||
|
||||
def test_get_active_env_reads_shared_container_from_subagent_id():
|
||||
"""``get_active_env`` must see the shared ``"default"`` sandbox when
|
||||
called with a subagent's task_id, so the agent loop's turn-budget
|
||||
enforcement reads the real env (not None) during delegation."""
|
||||
sentinel = object()
|
||||
terminal_tool._active_environments["default"] = sentinel
|
||||
try:
|
||||
assert terminal_tool.get_active_env("subagent-7-cafe") is sentinel
|
||||
assert terminal_tool.get_active_env(None) is sentinel
|
||||
assert terminal_tool.get_active_env("default") is sentinel
|
||||
finally:
|
||||
terminal_tool._active_environments.pop("default", None)
|
||||
|
||||
|
||||
def test_get_active_env_honours_rl_override():
|
||||
rl_env = object()
|
||||
default_env = object()
|
||||
terminal_tool._active_environments["default"] = default_env
|
||||
terminal_tool._active_environments["rl-42"] = rl_env
|
||||
terminal_tool.register_task_env_overrides("rl-42", {"docker_image": "x"})
|
||||
try:
|
||||
# With an override registered, lookup returns the task's own env,
|
||||
# not the shared "default" one.
|
||||
assert terminal_tool.get_active_env("rl-42") is rl_env
|
||||
finally:
|
||||
terminal_tool.clear_task_env_overrides("rl-42")
|
||||
terminal_tool._active_environments.pop("default", None)
|
||||
terminal_tool._active_environments.pop("rl-42", None)
|
||||
+49
-280
@@ -483,147 +483,6 @@ def _is_local_backend() -> bool:
|
||||
return _is_camofox_mode() or _get_cloud_provider() is None
|
||||
|
||||
|
||||
_auto_local_for_private_urls_resolved = False
|
||||
_cached_auto_local_for_private_urls: bool = True
|
||||
|
||||
|
||||
def _auto_local_for_private_urls() -> bool:
|
||||
"""Return whether a cloud-configured install should auto-spawn a local
|
||||
Chromium for LAN/localhost URLs.
|
||||
|
||||
Reads ``browser.auto_local_for_private_urls`` once (default ``True``) and
|
||||
caches it for the process lifetime. When enabled, ``browser_navigate``
|
||||
routes URLs whose host resolves to a private/loopback/LAN address to a
|
||||
local headless Chromium sidecar even when a cloud provider (Browserbase
|
||||
/ Browser-Use / Firecrawl) is configured globally. Public URLs continue
|
||||
to use the cloud provider in the same conversation.
|
||||
"""
|
||||
global _auto_local_for_private_urls_resolved, _cached_auto_local_for_private_urls
|
||||
if _auto_local_for_private_urls_resolved:
|
||||
return _cached_auto_local_for_private_urls
|
||||
|
||||
_auto_local_for_private_urls_resolved = True
|
||||
try:
|
||||
from hermes_cli.config import read_raw_config
|
||||
cfg = read_raw_config()
|
||||
browser_cfg = cfg.get("browser", {})
|
||||
if isinstance(browser_cfg, dict) and "auto_local_for_private_urls" in browser_cfg:
|
||||
_cached_auto_local_for_private_urls = bool(
|
||||
browser_cfg.get("auto_local_for_private_urls")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Could not read auto_local_for_private_urls from config: %s", e)
|
||||
return _cached_auto_local_for_private_urls
|
||||
|
||||
|
||||
def _url_is_private(url: str) -> bool:
|
||||
"""Return True when the URL's host resolves to a private/LAN/loopback address.
|
||||
|
||||
Reuses ``tools.url_safety.is_safe_url`` as the oracle — if the SSRF check
|
||||
would reject the URL, we treat it as "private" for routing purposes. DNS
|
||||
resolution failures are treated as NOT private (fall through to whatever
|
||||
backend is configured, which will surface the DNS error naturally).
|
||||
"""
|
||||
try:
|
||||
from tools.url_safety import is_safe_url
|
||||
# is_safe_url returns False for private/loopback/link-local/CGNAT AND
|
||||
# for DNS failures. We only want the private-network case here, so
|
||||
# we parse + check the host shape as a DNS-failure sieve first.
|
||||
from urllib.parse import urlparse
|
||||
import ipaddress
|
||||
import socket
|
||||
parsed = urlparse(url)
|
||||
hostname = (parsed.hostname or "").strip().lower().rstrip(".")
|
||||
if not hostname:
|
||||
return False
|
||||
# Literal IP → check directly
|
||||
try:
|
||||
ip = ipaddress.ip_address(hostname)
|
||||
return (
|
||||
ip.is_private
|
||||
or ip.is_loopback
|
||||
or ip.is_link_local
|
||||
or ip in ipaddress.ip_network("100.64.0.0/10")
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
# Hostname — must resolve to confirm it's private (bare "localhost"
|
||||
# resolves to 127.0.0.1 via /etc/hosts). Short-circuit on obvious
|
||||
# names to avoid a DNS hop.
|
||||
if hostname in ("localhost",) or hostname.endswith(".localhost"):
|
||||
return True
|
||||
if hostname.endswith(".local") or hostname.endswith(".lan") or hostname.endswith(".internal"):
|
||||
return True
|
||||
try:
|
||||
addr_info = socket.getaddrinfo(hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM)
|
||||
except socket.gaierror:
|
||||
return False # DNS fail → not private, let the normal path fail
|
||||
for _, _, _, _, sockaddr in addr_info:
|
||||
try:
|
||||
ip = ipaddress.ip_address(sockaddr[0])
|
||||
except ValueError:
|
||||
continue
|
||||
if (
|
||||
ip.is_private
|
||||
or ip.is_loopback
|
||||
or ip.is_link_local
|
||||
or ip in ipaddress.ip_network("100.64.0.0/10")
|
||||
):
|
||||
return True
|
||||
return False
|
||||
except Exception as exc:
|
||||
logger.debug("URL-privacy check failed for %s: %s", url, exc)
|
||||
return False
|
||||
|
||||
|
||||
def _navigation_session_key(task_id: str, url: str) -> str:
|
||||
"""Pick the session key that should handle ``url`` for ``task_id``.
|
||||
|
||||
Returns the bare task_id unless ALL of these are true:
|
||||
1. A cloud provider is configured (``_get_cloud_provider()`` is not None).
|
||||
2. Auto-local routing is enabled (``browser.auto_local_for_private_urls``,
|
||||
default True).
|
||||
3. The URL resolves to a private/LAN/loopback address.
|
||||
4. A CDP override is not active (that path owns the whole session).
|
||||
5. Camofox mode is not active (Camofox is already local-only).
|
||||
|
||||
When all are true, returns ``f"{task_id}::local"`` so the hybrid-routing
|
||||
path spawns a local Chromium sidecar while the cloud session (if any)
|
||||
continues to serve public URLs.
|
||||
"""
|
||||
if task_id is None:
|
||||
task_id = "default"
|
||||
if _get_cdp_override():
|
||||
return task_id
|
||||
if _is_camofox_mode():
|
||||
return task_id
|
||||
if _get_cloud_provider() is None:
|
||||
return task_id
|
||||
if not _auto_local_for_private_urls():
|
||||
return task_id
|
||||
if not _url_is_private(url):
|
||||
return task_id
|
||||
return f"{task_id}{_LOCAL_SUFFIX}"
|
||||
|
||||
|
||||
def _is_local_sidecar_key(session_key: str) -> bool:
|
||||
"""Return True when ``session_key`` is a hybrid-routing local sidecar."""
|
||||
return session_key.endswith(_LOCAL_SUFFIX)
|
||||
|
||||
|
||||
def _last_session_key(task_id: str) -> str:
|
||||
"""Return the session key to use for a non-nav browser tool call.
|
||||
|
||||
If a previous ``browser_navigate`` on this task_id set a last-active key,
|
||||
use it so snapshot/click/fill/etc. hit the same session. Otherwise fall
|
||||
back to the bare task_id (matches original behavior for tasks that never
|
||||
triggered hybrid routing).
|
||||
"""
|
||||
if task_id is None:
|
||||
task_id = "default"
|
||||
return _last_active_session_key.get(task_id, task_id)
|
||||
|
||||
|
||||
def _allow_private_urls() -> bool:
|
||||
"""Return whether the browser is allowed to navigate to private/internal addresses.
|
||||
|
||||
@@ -662,25 +521,10 @@ def _socket_safe_tmpdir() -> str:
|
||||
return tempfile.gettempdir()
|
||||
|
||||
|
||||
# Track active sessions per "session key".
|
||||
#
|
||||
# A "session key" is either the bare task_id (cloud/default path) OR a composite
|
||||
# like f"{task_id}::local" when the hybrid-routing feature spawns a local sidecar
|
||||
# browser for a LAN/localhost URL while a cloud provider is configured globally.
|
||||
# Both forms flow through the same _active_sessions / _run_browser_command /
|
||||
# cleanup_browser code paths — the key is opaque to those internals.
|
||||
#
|
||||
# Track active sessions per task
|
||||
# Stores: session_name (always), bb_session_id + cdp_url (cloud mode only)
|
||||
_active_sessions: Dict[str, Dict[str, str]] = {} # session_key -> {session_name, ...}
|
||||
_recording_sessions: set = set() # session_keys with active recordings
|
||||
|
||||
# Tracks the most recent session_key used per task_id. Set by browser_navigate()
|
||||
# after it chooses a backend for a URL; read by every non-nav browser tool
|
||||
# (snapshot/click/fill/eval/...) so they target the session that served the last
|
||||
# navigation. Without this, a task that navigated to localhost on the local
|
||||
# sidecar would fall back to the cloud session on its next snapshot call.
|
||||
_last_active_session_key: Dict[str, str] = {} # task_id -> session_key
|
||||
_LOCAL_SUFFIX = "::local"
|
||||
_active_sessions: Dict[str, Dict[str, str]] = {} # task_id -> {session_name, ...}
|
||||
_recording_sessions: set = set() # task_ids with active recordings
|
||||
|
||||
# Flag to track if cleanup has been done
|
||||
_cleanup_done = False
|
||||
@@ -1170,48 +1014,37 @@ def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]:
|
||||
|
||||
def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
|
||||
"""
|
||||
Get or create session info for the given session key.
|
||||
|
||||
Get or create session info for the given task.
|
||||
|
||||
In cloud mode, creates a Browserbase session with proxies enabled.
|
||||
In local mode, generates a session name for agent-browser --session.
|
||||
Also starts the inactivity cleanup thread and updates activity tracking.
|
||||
Thread-safe: multiple subagents can call this concurrently.
|
||||
|
||||
|
||||
Args:
|
||||
task_id: Session key. Normally the task_id as-is, but may carry the
|
||||
``::local`` suffix for the hybrid-routing local sidecar — in that
|
||||
case the cloud provider is skipped even when one is configured,
|
||||
and a local Chromium session is created instead.
|
||||
|
||||
task_id: Unique identifier for the task
|
||||
|
||||
Returns:
|
||||
Dict with session_name (always), bb_session_id + cdp_url (cloud only)
|
||||
"""
|
||||
if task_id is None:
|
||||
task_id = "default"
|
||||
|
||||
|
||||
# Start the cleanup thread if not running (handles inactivity timeouts)
|
||||
_start_browser_cleanup_thread()
|
||||
|
||||
|
||||
# Update activity timestamp for this session
|
||||
_update_session_activity(task_id)
|
||||
|
||||
|
||||
with _cleanup_lock:
|
||||
# Check if we already have a session for this task
|
||||
if task_id in _active_sessions:
|
||||
return _active_sessions[task_id]
|
||||
|
||||
# Hybrid routing: session keys ending with ``::local`` force a local
|
||||
# Chromium regardless of the globally-configured cloud provider. Public
|
||||
# URLs in the same conversation continue to use the cloud session under
|
||||
# the bare task_id key.
|
||||
force_local = _is_local_sidecar_key(task_id)
|
||||
|
||||
|
||||
# Create session outside the lock (network call in cloud mode)
|
||||
cdp_override = _get_cdp_override()
|
||||
if cdp_override and not force_local:
|
||||
if cdp_override:
|
||||
session_info = _create_cdp_session(task_id, cdp_override)
|
||||
elif force_local:
|
||||
session_info = _create_local_session(task_id)
|
||||
else:
|
||||
provider = _get_cloud_provider()
|
||||
if provider is None:
|
||||
@@ -1248,7 +1081,7 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
|
||||
session_info["fallback_from_cloud"] = True
|
||||
session_info["fallback_reason"] = str(e)
|
||||
session_info["fallback_provider"] = provider_name
|
||||
|
||||
|
||||
with _cleanup_lock:
|
||||
# Double-check: another thread may have created a session while we
|
||||
# were doing the network call. Use the existing one to avoid leaking
|
||||
@@ -1260,9 +1093,7 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
|
||||
# Lazy-start the CDP supervisor now that the session exists (if the
|
||||
# backend surfaces a CDP URL via override or session_info["cdp_url"]).
|
||||
# Idempotent; swallows errors. See _ensure_cdp_supervisor for details.
|
||||
# Skip for local sidecars — they have no CDP URL.
|
||||
if not force_local:
|
||||
_ensure_cdp_supervisor(task_id)
|
||||
_ensure_cdp_supervisor(task_id)
|
||||
|
||||
return session_info
|
||||
|
||||
@@ -1690,21 +1521,9 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
||||
# SSRF protection — block private/internal addresses before navigating.
|
||||
# Skipped for local backends (Camofox, headless Chromium without a cloud
|
||||
# provider) because the agent already has full local network access via
|
||||
# the terminal tool. Also skipped when hybrid routing will auto-spawn a
|
||||
# local Chromium sidecar for this URL (cloud provider configured +
|
||||
# private URL + ``browser.auto_local_for_private_urls`` enabled) — the
|
||||
# cloud provider never sees the URL in that case. Can also be opted
|
||||
# out globally via ``browser.allow_private_urls`` in config.
|
||||
effective_task_id = task_id or "default"
|
||||
nav_session_key = _navigation_session_key(effective_task_id, url)
|
||||
auto_local_this_nav = _is_local_sidecar_key(nav_session_key)
|
||||
|
||||
if (
|
||||
not _is_local_backend()
|
||||
and not auto_local_this_nav
|
||||
and not _allow_private_urls()
|
||||
and not _is_safe_url(url)
|
||||
):
|
||||
# the terminal tool. Can also be opted out for cloud mode via
|
||||
# ``browser.allow_private_urls`` in config.
|
||||
if not _is_local_backend() and not _allow_private_urls() and not _is_safe_url(url):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Blocked: URL targets a private or internal address",
|
||||
@@ -1724,31 +1543,19 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
||||
from tools.browser_camofox import camofox_navigate
|
||||
return camofox_navigate(url, task_id)
|
||||
|
||||
if auto_local_this_nav:
|
||||
logger.info(
|
||||
"browser_navigate: auto-routing %s to local Chromium sidecar "
|
||||
"(cloud provider %s stays on cloud for public URLs; "
|
||||
"set browser.auto_local_for_private_urls: false to disable)",
|
||||
url,
|
||||
type(_get_cloud_provider()).__name__ if _get_cloud_provider() else "none",
|
||||
)
|
||||
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Get session info to check if this is a new session
|
||||
# (will create one with features logged if not exists)
|
||||
session_info = _get_session_info(nav_session_key)
|
||||
session_info = _get_session_info(effective_task_id)
|
||||
is_first_nav = session_info.get("_first_nav", True)
|
||||
|
||||
|
||||
# Auto-start recording if configured and this is first navigation
|
||||
if is_first_nav:
|
||||
session_info["_first_nav"] = False
|
||||
_maybe_start_recording(nav_session_key)
|
||||
_maybe_start_recording(effective_task_id)
|
||||
|
||||
result = _run_browser_command(nav_session_key, "open", [url], timeout=max(_get_command_timeout(), 60))
|
||||
|
||||
# Remember which session served this nav so snapshot/click/fill/...
|
||||
# on the same task_id hit it (critical when hybrid routing has both a
|
||||
# cloud session and a local sidecar alive concurrently).
|
||||
_last_active_session_key[effective_task_id] = nav_session_key
|
||||
result = _run_browser_command(effective_task_id, "open", [url], timeout=max(_get_command_timeout(), 60))
|
||||
|
||||
if result.get("success"):
|
||||
data = result.get("data", {})
|
||||
@@ -1758,17 +1565,10 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
||||
# Post-redirect SSRF check — if the browser followed a redirect to a
|
||||
# private/internal address, block the result so the model can't read
|
||||
# internal content via subsequent browser_snapshot calls.
|
||||
# Skipped for local backends (same rationale as the pre-nav check),
|
||||
# and for the hybrid local sidecar (we're already on a local browser
|
||||
# hitting a private URL by design).
|
||||
if (
|
||||
not _is_local_backend()
|
||||
and not auto_local_this_nav
|
||||
and not _allow_private_urls()
|
||||
and final_url and final_url != url and not _is_safe_url(final_url)
|
||||
):
|
||||
# Skipped for local backends (same rationale as the pre-nav check).
|
||||
if not _is_local_backend() and not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url):
|
||||
# Navigate away to a blank page to prevent snapshot leaks
|
||||
_run_browser_command(nav_session_key, "open", ["about:blank"], timeout=10)
|
||||
_run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10)
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Blocked: redirect landed on a private/internal address",
|
||||
@@ -1812,7 +1612,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
||||
# Auto-take a compact snapshot so the model can act immediately
|
||||
# without a separate browser_snapshot call.
|
||||
try:
|
||||
snap_result = _run_browser_command(nav_session_key, "snapshot", ["-c"])
|
||||
snap_result = _run_browser_command(effective_task_id, "snapshot", ["-c"])
|
||||
if snap_result.get("success"):
|
||||
snap_data = snap_result.get("data", {})
|
||||
snapshot_text = snap_data.get("snapshot", "")
|
||||
@@ -1852,7 +1652,7 @@ def browser_snapshot(
|
||||
from tools.browser_camofox import camofox_snapshot
|
||||
return camofox_snapshot(full, task_id, user_task)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Build command args based on full flag
|
||||
args = []
|
||||
@@ -1914,7 +1714,7 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str:
|
||||
from tools.browser_camofox import camofox_click
|
||||
return camofox_click(ref, task_id)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Ensure ref starts with @
|
||||
if not ref.startswith("@"):
|
||||
@@ -1950,7 +1750,7 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
|
||||
from tools.browser_camofox import camofox_type
|
||||
return camofox_type(ref, text, task_id)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Ensure ref starts with @
|
||||
if not ref.startswith("@"):
|
||||
@@ -2004,7 +1804,7 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
|
||||
result = camofox_scroll(direction, task_id)
|
||||
return result
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)])
|
||||
if not result.get("success"):
|
||||
@@ -2033,7 +1833,7 @@ def browser_back(task_id: Optional[str] = None) -> str:
|
||||
from tools.browser_camofox import camofox_back
|
||||
return camofox_back(task_id)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
result = _run_browser_command(effective_task_id, "back", [])
|
||||
|
||||
if result.get("success"):
|
||||
@@ -2064,7 +1864,7 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
|
||||
from tools.browser_camofox import camofox_press
|
||||
return camofox_press(key, task_id)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
result = _run_browser_command(effective_task_id, "press", [key])
|
||||
|
||||
if result.get("success"):
|
||||
@@ -2106,7 +1906,7 @@ def browser_console(clear: bool = False, expression: Optional[str] = None, task_
|
||||
from tools.browser_camofox import camofox_console
|
||||
return camofox_console(clear, task_id)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
console_args = ["--clear"] if clear else []
|
||||
error_args = ["--clear"] if clear else []
|
||||
@@ -2145,7 +1945,7 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
|
||||
if _is_camofox_mode():
|
||||
return _camofox_eval(expression, task_id)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
result = _run_browser_command(effective_task_id, "eval", [expression])
|
||||
|
||||
if not result.get("success"):
|
||||
@@ -2277,7 +2077,7 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
|
||||
from tools.browser_camofox import camofox_get_images
|
||||
return camofox_get_images(task_id)
|
||||
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Use eval to run JavaScript that extracts images
|
||||
js_code = """JSON.stringify(
|
||||
@@ -2347,7 +2147,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
|
||||
import base64
|
||||
import uuid as uuid_mod
|
||||
effective_task_id = _last_session_key(task_id or "default")
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Save screenshot to persistent location so it can be shared with users
|
||||
from hermes_constants import get_hermes_dir
|
||||
@@ -2550,47 +2350,17 @@ def _cleanup_old_recordings(max_age_hours=72):
|
||||
|
||||
def cleanup_browser(task_id: Optional[str] = None) -> None:
|
||||
"""
|
||||
Clean up browser session(s) for a task.
|
||||
|
||||
Clean up browser session for a task.
|
||||
|
||||
Called automatically when a task completes or when inactivity timeout is reached.
|
||||
Closes both the agent-browser/Browserbase session and Camofox sessions.
|
||||
|
||||
When ``task_id`` is a bare task identifier (no ``::local`` suffix), reaps
|
||||
BOTH the cloud/primary session AND any hybrid-routing local sidecar that
|
||||
may have been spawned for LAN/localhost URLs in the same task. When
|
||||
``task_id`` already carries a ``::local`` suffix (called from the inactivity
|
||||
cleanup loop against a specific session key), reaps only that one.
|
||||
|
||||
|
||||
Args:
|
||||
task_id: Task identifier (or explicit session key)
|
||||
task_id: Task identifier to clean up
|
||||
"""
|
||||
if task_id is None:
|
||||
task_id = "default"
|
||||
|
||||
# Expand to the full set of session keys to reap. For a bare task_id
|
||||
# that includes the cloud/primary key + the local sidecar if one exists.
|
||||
if _is_local_sidecar_key(task_id):
|
||||
session_keys = [task_id]
|
||||
bare_task_id = task_id[: -len(_LOCAL_SUFFIX)]
|
||||
else:
|
||||
session_keys = [task_id]
|
||||
sidecar_key = f"{task_id}{_LOCAL_SUFFIX}"
|
||||
with _cleanup_lock:
|
||||
if sidecar_key in _active_sessions:
|
||||
session_keys.append(sidecar_key)
|
||||
bare_task_id = task_id
|
||||
|
||||
for session_key in session_keys:
|
||||
_cleanup_single_browser_session(session_key)
|
||||
|
||||
# Drop the last-active pointer only when the bare task is being cleaned
|
||||
# (i.e. not when we're only reaping a sidecar mid-task).
|
||||
if not _is_local_sidecar_key(task_id):
|
||||
_last_active_session_key.pop(bare_task_id, None)
|
||||
|
||||
|
||||
def _cleanup_single_browser_session(task_id: str) -> None:
|
||||
"""Internal: reap a single browser session by its exact session key."""
|
||||
# Stop the CDP supervisor for this task FIRST so we close our WebSocket
|
||||
# before the backend tears down the underlying CDP endpoint.
|
||||
_stop_cdp_supervisor(task_id)
|
||||
@@ -2609,33 +2379,32 @@ def _cleanup_single_browser_session(task_id: str) -> None:
|
||||
|
||||
logger.debug("cleanup_browser called for task_id: %s", task_id)
|
||||
logger.debug("Active sessions: %s", list(_active_sessions.keys()))
|
||||
|
||||
|
||||
# Check if session exists (under lock), but don't remove yet -
|
||||
# _run_browser_command needs it to build the close command.
|
||||
with _cleanup_lock:
|
||||
session_info = _active_sessions.get(task_id)
|
||||
|
||||
|
||||
if session_info:
|
||||
bb_session_id = session_info.get("bb_session_id", "unknown")
|
||||
logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id)
|
||||
|
||||
|
||||
# Stop auto-recording before closing (saves the file)
|
||||
_maybe_stop_recording(task_id)
|
||||
|
||||
|
||||
# Try to close via agent-browser first (needs session in _active_sessions)
|
||||
try:
|
||||
_run_browser_command(task_id, "close", [], timeout=10)
|
||||
logger.debug("agent-browser close command completed for task %s", task_id)
|
||||
except Exception as e:
|
||||
logger.warning("agent-browser close failed for task %s: %s", task_id, e)
|
||||
|
||||
|
||||
# Now remove from tracking under lock
|
||||
with _cleanup_lock:
|
||||
_active_sessions.pop(task_id, None)
|
||||
_session_last_activity.pop(task_id, None)
|
||||
|
||||
# Cloud mode: close the cloud browser session via provider API.
|
||||
# Local sidecars have bb_session_id=None so this no-ops for them.
|
||||
|
||||
# Cloud mode: close the cloud browser session via provider API
|
||||
if bb_session_id:
|
||||
provider = _get_cloud_provider()
|
||||
if provider is not None:
|
||||
|
||||
@@ -440,10 +440,9 @@ def _get_or_create_env(task_id: str):
|
||||
_active_environments, _env_lock, _create_environment,
|
||||
_get_env_config, _last_activity, _start_cleanup_thread,
|
||||
_creation_locks, _creation_locks_lock, _task_env_overrides,
|
||||
_resolve_container_task_id,
|
||||
)
|
||||
|
||||
effective_task_id = _resolve_container_task_id(task_id)
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Fast path: environment already exists
|
||||
with _env_lock:
|
||||
|
||||
+2
-16
@@ -88,14 +88,8 @@ def _resolve_path(filepath: str, task_id: str = "default") -> Path:
|
||||
|
||||
def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
|
||||
"""Return the task's live terminal cwd for bookkeeping when available."""
|
||||
try:
|
||||
from tools.terminal_tool import _resolve_container_task_id
|
||||
container_key = _resolve_container_task_id(task_id)
|
||||
except Exception:
|
||||
container_key = task_id
|
||||
|
||||
with _file_ops_lock:
|
||||
cached = _file_ops_cache.get(container_key) or _file_ops_cache.get(task_id)
|
||||
cached = _file_ops_cache.get(task_id)
|
||||
if cached is not None:
|
||||
live_cwd = getattr(getattr(cached, "env", None), "cwd", None) or getattr(
|
||||
cached, "cwd", None
|
||||
@@ -107,7 +101,7 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
|
||||
from tools.terminal_tool import _active_environments, _env_lock
|
||||
|
||||
with _env_lock:
|
||||
env = _active_environments.get(container_key) or _active_environments.get(task_id)
|
||||
env = _active_environments.get(task_id)
|
||||
live_cwd = getattr(env, "cwd", None) if env is not None else None
|
||||
if live_cwd:
|
||||
return live_cwd
|
||||
@@ -267,23 +261,15 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
||||
|
||||
Thread-safe: uses the same per-task creation locks as terminal_tool to
|
||||
prevent duplicate sandbox creation from concurrent tool calls.
|
||||
|
||||
Note: subagent task_ids are collapsed to "default" via
|
||||
``_resolve_container_task_id`` so delegate_task children share the
|
||||
parent's container and its cached file_ops. RL/benchmark task_ids with
|
||||
a registered env override keep their isolation.
|
||||
"""
|
||||
from tools.terminal_tool import (
|
||||
_active_environments, _env_lock, _create_environment,
|
||||
_get_env_config, _last_activity, _start_cleanup_thread,
|
||||
_creation_locks,
|
||||
_creation_locks_lock,
|
||||
_resolve_container_task_id,
|
||||
)
|
||||
import time
|
||||
|
||||
task_id = _resolve_container_task_id(task_id)
|
||||
|
||||
# Fast path: check cache -- but also verify the underlying environment
|
||||
# is still alive (it may have been killed by the cleanup thread).
|
||||
with _file_ops_lock:
|
||||
|
||||
@@ -0,0 +1,726 @@
|
||||
"""Kanban tools — structured tool-call surface for worker + orchestrator agents.
|
||||
|
||||
These tools are only registered into the model's schema when the agent is
|
||||
running under the dispatcher (env var ``HERMES_KANBAN_TASK`` set). A
|
||||
normal ``hermes chat`` session sees **zero** kanban tools in its schema.
|
||||
|
||||
Why tools instead of just shelling out to ``hermes kanban``?
|
||||
|
||||
1. **Backend portability.** A worker whose terminal tool points at Docker
|
||||
/ Modal / Singularity / SSH would run ``hermes kanban complete …``
|
||||
inside the container, where ``hermes`` isn't installed and the DB
|
||||
isn't mounted. Tools run in the agent's Python process, so they
|
||||
always reach ``~/.hermes/kanban.db`` regardless of terminal backend.
|
||||
|
||||
2. **No shell-quoting footguns.** Passing ``--metadata '{"x": [...]}'``
|
||||
through shlex+argparse is fragile. Structured tool args skip it.
|
||||
|
||||
3. **Better errors.** Tool-call failures return structured JSON the
|
||||
model can reason about, not stderr strings it has to parse.
|
||||
|
||||
Humans continue to use the CLI (``hermes kanban …``), the dashboard
|
||||
(``hermes dashboard``), and the slash command (``/kanban …``) — all
|
||||
three bypass the agent entirely. The tools are ONLY for the worker
|
||||
agent's handoff back to the kernel.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
from tools.registry import registry, tool_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_kanban_mode() -> bool:
|
||||
"""Tools are available iff the current process has ``HERMES_KANBAN_TASK``
|
||||
set in its env, which the dispatcher sets when spawning a worker.
|
||||
|
||||
Humans running ``hermes chat`` see zero kanban tools. Workers spawned
|
||||
by ``hermes kanban daemon`` see all seven.
|
||||
"""
|
||||
return bool(os.environ.get("HERMES_KANBAN_TASK"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _default_task_id(arg: Optional[str]) -> Optional[str]:
|
||||
"""Resolve ``task_id`` arg or fall back to the env var the dispatcher set."""
|
||||
if arg:
|
||||
return arg
|
||||
env_tid = os.environ.get("HERMES_KANBAN_TASK")
|
||||
return env_tid or None
|
||||
|
||||
|
||||
def _connect():
|
||||
"""Import + connect lazily so the module imports cleanly in non-kanban
|
||||
contexts (e.g. test rigs that import every tool module)."""
|
||||
from hermes_cli import kanban_db as kb
|
||||
return kb, kb.connect()
|
||||
|
||||
|
||||
def _ok(**fields: Any) -> str:
|
||||
return json.dumps({"ok": True, **fields})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _handle_show(args: dict, **kw) -> str:
|
||||
"""Read a task's full state: task row, parents, children, comments,
|
||||
runs (attempt history), and the last N events."""
|
||||
tid = _default_task_id(args.get("task_id"))
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (or set HERMES_KANBAN_TASK in the env)"
|
||||
)
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
task = kb.get_task(conn, tid)
|
||||
if task is None:
|
||||
return tool_error(f"task {tid} not found")
|
||||
comments = kb.list_comments(conn, tid)
|
||||
events = kb.list_events(conn, tid)
|
||||
runs = kb.list_runs(conn, tid)
|
||||
parents = kb.parent_ids(conn, tid)
|
||||
children = kb.child_ids(conn, tid)
|
||||
|
||||
def _task_dict(t):
|
||||
return {
|
||||
"id": t.id, "title": t.title, "body": t.body,
|
||||
"assignee": t.assignee, "status": t.status,
|
||||
"tenant": t.tenant, "priority": t.priority,
|
||||
"workspace_kind": t.workspace_kind,
|
||||
"workspace_path": t.workspace_path,
|
||||
"created_by": t.created_by, "created_at": t.created_at,
|
||||
"started_at": t.started_at,
|
||||
"completed_at": t.completed_at,
|
||||
"result": t.result,
|
||||
"current_run_id": t.current_run_id,
|
||||
}
|
||||
|
||||
def _run_dict(r):
|
||||
return {
|
||||
"id": r.id, "profile": r.profile,
|
||||
"status": r.status, "outcome": r.outcome,
|
||||
"summary": r.summary, "error": r.error,
|
||||
"metadata": r.metadata,
|
||||
"started_at": r.started_at, "ended_at": r.ended_at,
|
||||
}
|
||||
|
||||
return json.dumps({
|
||||
"task": _task_dict(task),
|
||||
"parents": parents,
|
||||
"children": children,
|
||||
"comments": [
|
||||
{"author": c.author, "body": c.body,
|
||||
"created_at": c.created_at}
|
||||
for c in comments
|
||||
],
|
||||
"events": [
|
||||
{"kind": e.kind, "payload": e.payload,
|
||||
"created_at": e.created_at, "run_id": e.run_id}
|
||||
for e in events[-50:] # cap; full log via CLI
|
||||
],
|
||||
"runs": [_run_dict(r) for r in runs],
|
||||
# Also surface the worker's own context block so the
|
||||
# agent can include it directly if it wants. This is
|
||||
# the same string build_worker_context returns to the
|
||||
# dispatcher at spawn time.
|
||||
"worker_context": kb.build_worker_context(conn, tid),
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_show failed")
|
||||
return tool_error(f"kanban_show: {e}")
|
||||
|
||||
|
||||
def _handle_complete(args: dict, **kw) -> str:
|
||||
"""Mark the current task done with a structured handoff."""
|
||||
tid = _default_task_id(args.get("task_id"))
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (or set HERMES_KANBAN_TASK in the env)"
|
||||
)
|
||||
summary = args.get("summary")
|
||||
metadata = args.get("metadata")
|
||||
result = args.get("result")
|
||||
if not (summary or result):
|
||||
return tool_error(
|
||||
"provide at least one of: summary (preferred), result"
|
||||
)
|
||||
if metadata is not None and not isinstance(metadata, dict):
|
||||
return tool_error(
|
||||
f"metadata must be an object/dict, got {type(metadata).__name__}"
|
||||
)
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
ok = kb.complete_task(
|
||||
conn, tid,
|
||||
result=result, summary=summary, metadata=metadata,
|
||||
)
|
||||
if not ok:
|
||||
return tool_error(
|
||||
f"could not complete {tid} (unknown id or already terminal)"
|
||||
)
|
||||
run = kb.latest_run(conn, tid)
|
||||
return _ok(task_id=tid, run_id=run.id if run else None)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_complete failed")
|
||||
return tool_error(f"kanban_complete: {e}")
|
||||
|
||||
|
||||
def _handle_block(args: dict, **kw) -> str:
|
||||
"""Transition the task to blocked with a reason a human will read."""
|
||||
tid = _default_task_id(args.get("task_id"))
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (or set HERMES_KANBAN_TASK in the env)"
|
||||
)
|
||||
reason = args.get("reason")
|
||||
if not reason or not str(reason).strip():
|
||||
return tool_error("reason is required — explain what input you need")
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
ok = kb.block_task(conn, tid, reason=reason)
|
||||
if not ok:
|
||||
return tool_error(
|
||||
f"could not block {tid} (unknown id or not in "
|
||||
f"running/ready)"
|
||||
)
|
||||
run = kb.latest_run(conn, tid)
|
||||
return _ok(task_id=tid, run_id=run.id if run else None)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_block failed")
|
||||
return tool_error(f"kanban_block: {e}")
|
||||
|
||||
|
||||
def _handle_heartbeat(args: dict, **kw) -> str:
|
||||
"""Signal that the worker is still alive during a long operation."""
|
||||
tid = _default_task_id(args.get("task_id"))
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (or set HERMES_KANBAN_TASK in the env)"
|
||||
)
|
||||
note = args.get("note")
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
ok = kb.heartbeat_worker(conn, tid, note=note)
|
||||
if not ok:
|
||||
return tool_error(
|
||||
f"could not heartbeat {tid} (unknown id or not running)"
|
||||
)
|
||||
return _ok(task_id=tid)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_heartbeat failed")
|
||||
return tool_error(f"kanban_heartbeat: {e}")
|
||||
|
||||
|
||||
def _handle_comment(args: dict, **kw) -> str:
|
||||
"""Append a comment to a task's thread."""
|
||||
tid = args.get("task_id")
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (use the current task id if that's what "
|
||||
"you mean — pulls from env but kept explicit here)"
|
||||
)
|
||||
body = args.get("body")
|
||||
if not body or not str(body).strip():
|
||||
return tool_error("body is required")
|
||||
author = args.get("author") or os.environ.get("HERMES_PROFILE") or "worker"
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
cid = kb.add_comment(conn, tid, author=author, body=str(body))
|
||||
return _ok(task_id=tid, comment_id=cid)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_comment failed")
|
||||
return tool_error(f"kanban_comment: {e}")
|
||||
|
||||
|
||||
def _handle_create(args: dict, **kw) -> str:
|
||||
"""Create a child task. Orchestrator workers use this to fan out.
|
||||
|
||||
``parents`` can be a list of task ids; dependency-gated promotion
|
||||
works as usual.
|
||||
"""
|
||||
title = args.get("title")
|
||||
if not title or not str(title).strip():
|
||||
return tool_error("title is required")
|
||||
assignee = args.get("assignee")
|
||||
if not assignee:
|
||||
return tool_error(
|
||||
"assignee is required — name the profile that should execute this "
|
||||
"task (the dispatcher will only spawn tasks with an assignee)"
|
||||
)
|
||||
body = args.get("body")
|
||||
parents = args.get("parents") or []
|
||||
tenant = args.get("tenant") or os.environ.get("HERMES_TENANT")
|
||||
priority = args.get("priority")
|
||||
workspace_kind = args.get("workspace_kind") or "scratch"
|
||||
workspace_path = args.get("workspace_path")
|
||||
triage = bool(args.get("triage"))
|
||||
idempotency_key = args.get("idempotency_key")
|
||||
max_runtime_seconds = args.get("max_runtime_seconds")
|
||||
skills = args.get("skills")
|
||||
if isinstance(skills, str):
|
||||
# Accept a single skill name as a string for convenience.
|
||||
skills = [skills]
|
||||
if skills is not None and not isinstance(skills, (list, tuple)):
|
||||
return tool_error(
|
||||
f"skills must be a list of skill names, got {type(skills).__name__}"
|
||||
)
|
||||
if isinstance(parents, str):
|
||||
parents = [parents]
|
||||
if not isinstance(parents, (list, tuple)):
|
||||
return tool_error(
|
||||
f"parents must be a list of task ids, got {type(parents).__name__}"
|
||||
)
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
new_tid = kb.create_task(
|
||||
conn,
|
||||
title=str(title).strip(),
|
||||
body=body,
|
||||
assignee=str(assignee),
|
||||
parents=tuple(parents),
|
||||
tenant=tenant,
|
||||
priority=int(priority) if priority is not None else 0,
|
||||
workspace_kind=str(workspace_kind),
|
||||
workspace_path=workspace_path,
|
||||
triage=triage,
|
||||
idempotency_key=idempotency_key,
|
||||
max_runtime_seconds=(
|
||||
int(max_runtime_seconds)
|
||||
if max_runtime_seconds is not None else None
|
||||
),
|
||||
skills=skills,
|
||||
created_by=os.environ.get("HERMES_PROFILE") or "worker",
|
||||
)
|
||||
new_task = kb.get_task(conn, new_tid)
|
||||
return _ok(
|
||||
task_id=new_tid,
|
||||
status=new_task.status if new_task else None,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_create failed")
|
||||
return tool_error(f"kanban_create: {e}")
|
||||
|
||||
|
||||
def _handle_link(args: dict, **kw) -> str:
|
||||
"""Add a parent→child dependency edge after the fact."""
|
||||
parent_id = args.get("parent_id")
|
||||
child_id = args.get("child_id")
|
||||
if not parent_id or not child_id:
|
||||
return tool_error("both parent_id and child_id are required")
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
kb.link_tasks(conn, parent_id=parent_id, child_id=child_id)
|
||||
return _ok(parent_id=parent_id, child_id=child_id)
|
||||
finally:
|
||||
conn.close()
|
||||
except ValueError as e:
|
||||
# Covers cycle + self-parent rejections
|
||||
return tool_error(f"kanban_link: {e}")
|
||||
except Exception as e:
|
||||
logger.exception("kanban_link failed")
|
||||
return tool_error(f"kanban_link: {e}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DESC_TASK_ID_DEFAULT = (
|
||||
"Task id. If omitted, defaults to HERMES_KANBAN_TASK from the env "
|
||||
"(the task the dispatcher spawned you to work on)."
|
||||
)
|
||||
|
||||
KANBAN_SHOW_SCHEMA = {
|
||||
"name": "kanban_show",
|
||||
"description": (
|
||||
"Read a task's full state — title, body, assignee, parent task "
|
||||
"handoffs, your prior attempts on this task if any, comments, "
|
||||
"and recent events. Use this to (re)orient yourself before "
|
||||
"starting work, especially on retries. The response includes a "
|
||||
"pre-formatted ``worker_context`` string suitable for inclusion "
|
||||
"verbatim in your reasoning."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": _DESC_TASK_ID_DEFAULT,
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_COMPLETE_SCHEMA = {
|
||||
"name": "kanban_complete",
|
||||
"description": (
|
||||
"Mark your current task done with a structured handoff for "
|
||||
"downstream workers and humans. Prefer ``summary`` for a "
|
||||
"human-readable 1-3 sentence description of what you did; put "
|
||||
"machine-readable facts in ``metadata`` (changed_files, "
|
||||
"tests_run, decisions, findings, etc). At least one of "
|
||||
"``summary`` or ``result`` is required."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": _DESC_TASK_ID_DEFAULT,
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Human-readable handoff, 1-3 sentences. Appears in "
|
||||
"Run History on the dashboard and in downstream "
|
||||
"workers' context."
|
||||
),
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": (
|
||||
"Free-form dict of structured facts about this "
|
||||
"attempt — {\"changed_files\": [...], \"tests_run\": 12, "
|
||||
"\"findings\": [...]}. Surfaced to downstream "
|
||||
"workers alongside ``summary``."
|
||||
),
|
||||
},
|
||||
"result": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Short result log line (legacy field, maps to "
|
||||
"task.result). Use ``summary`` instead when "
|
||||
"possible; this exists for compatibility with "
|
||||
"callers that still set --result on the CLI."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_BLOCK_SCHEMA = {
|
||||
"name": "kanban_block",
|
||||
"description": (
|
||||
"Transition the task to blocked because you need human input "
|
||||
"to proceed. ``reason`` will be shown to the human on the "
|
||||
"board and included in context when someone unblocks you. "
|
||||
"Use for genuine blockers only — don't block on things you can "
|
||||
"resolve yourself."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": _DESC_TASK_ID_DEFAULT,
|
||||
},
|
||||
"reason": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"What you need answered, in one or two sentences. "
|
||||
"Don't paste the whole conversation; the human has "
|
||||
"the board and can ask follow-ups via comments."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["reason"],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_HEARTBEAT_SCHEMA = {
|
||||
"name": "kanban_heartbeat",
|
||||
"description": (
|
||||
"Signal that you're still alive during a long operation "
|
||||
"(training, encoding, large crawls). Call every few minutes so "
|
||||
"humans see liveness separately from PID checks. Pure side "
|
||||
"effect — no work changes."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": _DESC_TASK_ID_DEFAULT,
|
||||
},
|
||||
"note": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional short note describing current progress. "
|
||||
"Shown in the event log."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_COMMENT_SCHEMA = {
|
||||
"name": "kanban_comment",
|
||||
"description": (
|
||||
"Append a comment to a task's thread. Use for durable notes "
|
||||
"that should outlive this run (questions for the next worker, "
|
||||
"partial findings, rationale). Ephemeral reasoning doesn't "
|
||||
"belong here — use your normal response instead."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Task id. Required (may be your own task or "
|
||||
"another's — comment threads are per-task)."
|
||||
),
|
||||
},
|
||||
"body": {
|
||||
"type": "string",
|
||||
"description": "Markdown-supported comment body.",
|
||||
},
|
||||
"author": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Override author name. Defaults to the current "
|
||||
"profile (HERMES_PROFILE env)."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["task_id", "body"],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_CREATE_SCHEMA = {
|
||||
"name": "kanban_create",
|
||||
"description": (
|
||||
"Create a new kanban task, optionally as a child of the current "
|
||||
"one (pass the current task id in ``parents``). Used by "
|
||||
"orchestrator workers to fan out — decompose work into child "
|
||||
"tasks with specific assignees, link them into a pipeline, "
|
||||
"then complete your own task. The dispatcher picks up the new "
|
||||
"tasks on its next tick and spawns the assigned profiles."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Short task title (required).",
|
||||
},
|
||||
"assignee": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Profile name that should execute this task "
|
||||
"(e.g. 'researcher-a', 'reviewer', 'writer'). "
|
||||
"Required — tasks without an assignee are never "
|
||||
"dispatched."
|
||||
),
|
||||
},
|
||||
"body": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Opening post: full spec, acceptance criteria, "
|
||||
"links. The assigned worker reads this as part of "
|
||||
"its context."
|
||||
),
|
||||
},
|
||||
"parents": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": (
|
||||
"Parent task ids. The new task stays in 'todo' "
|
||||
"until every parent reaches 'done'; then it "
|
||||
"auto-promotes to 'ready'. Typical fan-in: list "
|
||||
"all the researcher task ids when creating a "
|
||||
"synthesizer task."
|
||||
),
|
||||
},
|
||||
"tenant": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional namespace for multi-project isolation. "
|
||||
"Defaults to HERMES_TENANT env if set."
|
||||
),
|
||||
},
|
||||
"priority": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Dispatcher tiebreaker. Higher = picked sooner "
|
||||
"when multiple ready tasks share an assignee."
|
||||
),
|
||||
},
|
||||
"workspace_kind": {
|
||||
"type": "string",
|
||||
"enum": ["scratch", "dir", "worktree"],
|
||||
"description": (
|
||||
"Workspace flavor: 'scratch' (fresh tmp dir, "
|
||||
"default), 'dir' (shared directory, requires "
|
||||
"absolute workspace_path), 'worktree' (git worktree)."
|
||||
),
|
||||
},
|
||||
"workspace_path": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Absolute path for 'dir' or 'worktree' workspace. "
|
||||
"Relative paths are rejected at dispatch."
|
||||
),
|
||||
},
|
||||
"triage": {
|
||||
"type": "boolean",
|
||||
"description": (
|
||||
"If true, task lands in 'triage' instead of 'todo' "
|
||||
"— a specifier profile is expected to flesh out "
|
||||
"the body before work starts."
|
||||
),
|
||||
},
|
||||
"idempotency_key": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"If a non-archived task with this key already "
|
||||
"exists, return that task's id instead of creating "
|
||||
"a duplicate. Useful for retry-safe automation."
|
||||
),
|
||||
},
|
||||
"max_runtime_seconds": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Per-task runtime cap. When exceeded, the "
|
||||
"dispatcher SIGTERMs the worker and re-queues the "
|
||||
"task with outcome='timed_out'."
|
||||
),
|
||||
},
|
||||
"skills": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": (
|
||||
"Skill names to force-load into the dispatched "
|
||||
"worker (in addition to the built-in kanban-worker "
|
||||
"skill). Use this to pin a task to a specialist "
|
||||
"context — e.g. ['translation'] for a translation "
|
||||
"task, ['github-code-review'] for a reviewer task. "
|
||||
"The names must match skills installed on the "
|
||||
"assignee's profile."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["title", "assignee"],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_LINK_SCHEMA = {
|
||||
"name": "kanban_link",
|
||||
"description": (
|
||||
"Add a parent→child dependency edge after both tasks already "
|
||||
"exist. The child won't promote to 'ready' until all parents "
|
||||
"are 'done'. Cycles and self-links are rejected."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"parent_id": {"type": "string", "description": "Parent task id."},
|
||||
"child_id": {"type": "string", "description": "Child task id."},
|
||||
},
|
||||
"required": ["parent_id", "child_id"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
registry.register(
|
||||
name="kanban_show",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_SHOW_SCHEMA,
|
||||
handler=_handle_show,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="📋",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_complete",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_COMPLETE_SCHEMA,
|
||||
handler=_handle_complete,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="✔",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_block",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_BLOCK_SCHEMA,
|
||||
handler=_handle_block,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="⏸",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_heartbeat",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_HEARTBEAT_SCHEMA,
|
||||
handler=_handle_heartbeat,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="💓",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_comment",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_COMMENT_SCHEMA,
|
||||
handler=_handle_comment,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="💬",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_create",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_CREATE_SCHEMA,
|
||||
handler=_handle_create,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="➕",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_link",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_LINK_SCHEMA,
|
||||
handler=_handle_link,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="🔗",
|
||||
)
|
||||
@@ -776,7 +776,7 @@ class ProcessRegistry:
|
||||
|
||||
# Only enqueue completion notification on the FIRST move. Without
|
||||
# this guard, kill_process() and the reader thread can both call
|
||||
# _move_to_finished(), producing duplicate [IMPORTANT: ...] messages.
|
||||
# _move_to_finished(), producing duplicate [SYSTEM: ...] messages.
|
||||
if was_running and session.notify_on_complete:
|
||||
from tools.ansi_strip import strip_ansi
|
||||
output_tail = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
|
||||
|
||||
@@ -20,13 +20,6 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_TELEGRAM_TOPIC_TARGET_RE = re.compile(r"^\s*(-?\d+)(?::(\d+))?\s*$")
|
||||
_FEISHU_TARGET_RE = re.compile(r"^\s*((?:oc|ou|on|chat|open)_[-A-Za-z0-9]+)(?::([-A-Za-z0-9_]+))?\s*$")
|
||||
# Slack conversation IDs: C (public channel), G (private/group channel), D (DM).
|
||||
# Must be uppercase alphanumeric, 9+ chars. User IDs (U...) and workspace IDs
|
||||
# (W...) are NOT valid chat.postMessage channel values — posting to them fails
|
||||
# because the API requires a conversation ID. To DM a user you must first call
|
||||
# conversations.open to obtain a D... ID. Without this gate, Slack IDs fall
|
||||
# through to channel-name resolution, which only matches by name and fails.
|
||||
_SLACK_TARGET_RE = re.compile(r"^\s*([CGD][A-Z0-9]{8,})\s*$")
|
||||
_WEIXIN_TARGET_RE = re.compile(r"^\s*((?:wxid|gh|v\d+|wm|wb)_[A-Za-z0-9_-]+|[A-Za-z0-9._-]+@chatroom|filehelper)\s*$")
|
||||
# Discord snowflake IDs are numeric, same regex pattern as Telegram topic targets.
|
||||
_NUMERIC_TOPIC_RE = _TELEGRAM_TOPIC_TARGET_RE
|
||||
@@ -325,10 +318,6 @@ def _parse_target_ref(platform_name: str, target_ref: str):
|
||||
match = _NUMERIC_TOPIC_RE.fullmatch(target_ref)
|
||||
if match:
|
||||
return match.group(1), match.group(2), True
|
||||
if platform_name == "slack":
|
||||
match = _SLACK_TARGET_RE.fullmatch(target_ref)
|
||||
if match:
|
||||
return match.group(1), None, True
|
||||
if platform_name == "weixin":
|
||||
match = _WEIXIN_TARGET_RE.fullmatch(target_ref)
|
||||
if match:
|
||||
|
||||
+3
-32
@@ -803,31 +803,6 @@ def clear_task_env_overrides(task_id: str):
|
||||
"""
|
||||
_task_env_overrides.pop(task_id, None)
|
||||
|
||||
|
||||
def _resolve_container_task_id(task_id: Optional[str]) -> str:
|
||||
"""
|
||||
Map a tool-call ``task_id`` to the container/sandbox key used by
|
||||
``_active_environments``.
|
||||
|
||||
The top-level agent passes ``task_id=None`` and lands on ``"default"``.
|
||||
``delegate_task`` children pass their own subagent ID so that
|
||||
file-state tracking, the active-subagents registry, and TUI events stay
|
||||
distinct per child -- but we deliberately collapse that ID back to
|
||||
``"default"`` here so subagents share the parent's long-lived container
|
||||
(one bash, one /workspace, one set of installed packages).
|
||||
|
||||
Exception: RL / benchmark environments (TerminalBench2, HermesSweEnv, ...)
|
||||
call ``register_task_env_overrides(task_id, {...})`` to request a
|
||||
per-task Docker/Modal image. When an override is registered for a
|
||||
task_id, we honour it by returning the task_id unchanged -- those
|
||||
rollouts need their own isolated sandbox, which is the whole point of
|
||||
the override.
|
||||
"""
|
||||
if task_id and task_id in _task_env_overrides:
|
||||
return task_id
|
||||
return "default"
|
||||
|
||||
|
||||
# Configuration from environment variables
|
||||
|
||||
def _parse_env_var(name: str, default: str, converter=int, type_label: str = "integer"):
|
||||
@@ -1164,9 +1139,8 @@ def _stop_cleanup_thread():
|
||||
|
||||
def get_active_env(task_id: str):
|
||||
"""Return the active BaseEnvironment for *task_id*, or None."""
|
||||
lookup = _resolve_container_task_id(task_id)
|
||||
with _env_lock:
|
||||
return _active_environments.get(lookup) or _active_environments.get(task_id)
|
||||
return _active_environments.get(task_id)
|
||||
|
||||
|
||||
def is_persistent_env(task_id: str) -> bool:
|
||||
@@ -1499,11 +1473,8 @@ def terminal_tool(
|
||||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
|
||||
# Use task_id for environment isolation. By default all subagent
|
||||
# task_ids collapse back to "default" so the top-level agent and
|
||||
# every delegate_task child share one container; only task_ids with
|
||||
# a registered env override (RL benchmarks) get isolated sandboxes.
|
||||
effective_task_id = _resolve_container_task_id(task_id)
|
||||
# Use task_id for environment isolation
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Check per-task overrides (set by environments like TerminalBench2Env)
|
||||
# before falling back to global env var config
|
||||
|
||||
+21
@@ -60,6 +60,11 @@ _HERMES_CORE_TOOLS = [
|
||||
"send_message",
|
||||
# Home Assistant smart home control (gated on HASS_TOKEN via check_fn)
|
||||
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
|
||||
# Kanban multi-agent coordination — only in schema when the agent is
|
||||
# spawned as a kanban worker (HERMES_KANBAN_TASK env set), otherwise
|
||||
# zero schema footprint. Gated via check_fn in tools/kanban_tools.py.
|
||||
"kanban_show", "kanban_complete", "kanban_block", "kanban_heartbeat",
|
||||
"kanban_comment", "kanban_create", "kanban_link",
|
||||
]
|
||||
|
||||
|
||||
@@ -202,6 +207,22 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"kanban": {
|
||||
"description": (
|
||||
"Kanban multi-agent coordination — only active when the agent "
|
||||
"is spawned by `hermes kanban daemon` (HERMES_KANBAN_TASK env "
|
||||
"set). Lets workers mark tasks done with structured handoffs, "
|
||||
"block for human input, heartbeat during long ops, comment "
|
||||
"on threads, and (for orchestrators) fan out into child tasks."
|
||||
),
|
||||
"tools": [
|
||||
"kanban_show", "kanban_complete", "kanban_block",
|
||||
"kanban_heartbeat", "kanban_comment",
|
||||
"kanban_create", "kanban_link",
|
||||
],
|
||||
"includes": [],
|
||||
},
|
||||
|
||||
"discord": {
|
||||
"description": "Discord read and participate tools (fetch messages, search members, create threads)",
|
||||
"tools": ["discord"],
|
||||
|
||||
@@ -2321,26 +2321,6 @@ def _(rid, params: dict) -> dict:
|
||||
payload["rendered"] = rendered
|
||||
_emit("message.complete", sid, payload)
|
||||
|
||||
if (
|
||||
status == "complete"
|
||||
and isinstance(raw, str)
|
||||
and raw.strip()
|
||||
and isinstance(text, str)
|
||||
and text.strip()
|
||||
):
|
||||
try:
|
||||
from agent.title_generator import maybe_auto_title
|
||||
|
||||
maybe_auto_title(
|
||||
_get_db(),
|
||||
session.get("session_key") or sid,
|
||||
text,
|
||||
raw,
|
||||
session.get("history", []),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# CLI parity: when voice-mode TTS is on, speak the agent reply
|
||||
# (cli.py:_voice_speak_response). Only the final text — tool
|
||||
# calls / reasoning already stream separately and would be
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import {
|
||||
boundedLiveRenderText,
|
||||
edgePreview,
|
||||
estimateRows,
|
||||
estimateTokensRough,
|
||||
@@ -107,25 +106,3 @@ describe('estimateRows', () => {
|
||||
expect(estimateRows(snake, w)).toBe(estimateRows(plain, w))
|
||||
})
|
||||
})
|
||||
|
||||
describe('boundedLiveRenderText', () => {
|
||||
it('keeps short text unchanged', () => {
|
||||
expect(boundedLiveRenderText('alpha\nbeta', { maxChars: 50, maxLines: 5 })).toBe('alpha\nbeta')
|
||||
})
|
||||
|
||||
it('keeps the tail of long live text', () => {
|
||||
const text = Array.from({ length: 6 }, (_, i) => `line-${i + 1}`).join('\n')
|
||||
const out = boundedLiveRenderText(text, { maxChars: 100, maxLines: 3 })
|
||||
|
||||
expect(out).toContain('omitted 3 lines')
|
||||
expect(out.endsWith('line-4\nline-5\nline-6')).toBe(true)
|
||||
expect(out).not.toContain('line-1')
|
||||
})
|
||||
|
||||
it('bounds very long single-line text by chars', () => {
|
||||
const out = boundedLiveRenderText('a'.repeat(60), { maxChars: 12, maxLines: 5 })
|
||||
|
||||
expect(out).toContain('omitted 48 chars')
|
||||
expect(out.endsWith('a'.repeat(12))).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -2,7 +2,6 @@ import { REASONING_PULSE_MS, STREAM_BATCH_MS } from '../config/timing.js'
|
||||
import type { SessionInterruptResponse, SubagentEventPayload } from '../gatewayTypes.js'
|
||||
import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
|
||||
import {
|
||||
boundedLiveRenderText,
|
||||
buildToolTrailLine,
|
||||
estimateTokensRough,
|
||||
isTransientTrailLine,
|
||||
@@ -493,7 +492,7 @@ class TurnController {
|
||||
this.streamTimer = null
|
||||
const raw = this.bufRef.trimStart()
|
||||
const visible = hasReasoningTag(raw) ? splitReasoning(raw).text : raw
|
||||
patchTurnState({ streaming: boundedLiveRenderText(visible) })
|
||||
patchTurnState({ streaming: visible })
|
||||
}, STREAM_BATCH_MS)
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import { LONG_MSG } from '../config/limits.js'
|
||||
import { sectionMode } from '../domain/details.js'
|
||||
import { userDisplay } from '../domain/messages.js'
|
||||
import { ROLE } from '../domain/roles.js'
|
||||
import { boundedLiveRenderText, compactPreview, hasAnsi, isPasteBackedText, stripAnsi } from '../lib/text.js'
|
||||
import { compactPreview, hasAnsi, isPasteBackedText, stripAnsi } from '../lib/text.js'
|
||||
import type { Theme } from '../theme.js'
|
||||
import type { DetailsMode, Msg, SectionVisibility } from '../types.js'
|
||||
|
||||
@@ -84,11 +84,7 @@ export const MessageLine = memo(function MessageLine({
|
||||
}
|
||||
|
||||
if (msg.role === 'assistant') {
|
||||
return isStreaming ? (
|
||||
<Text color={body}>{boundedLiveRenderText(msg.text)}</Text>
|
||||
) : (
|
||||
<Md compact={compact} t={t} text={msg.text} />
|
||||
)
|
||||
return isStreaming ? <Text color={body}>{msg.text}</Text> : <Md compact={compact} t={t} text={msg.text} />
|
||||
}
|
||||
|
||||
if (msg.role === 'user' && msg.text.length > LONG_MSG && isPasteBackedText(msg.text)) {
|
||||
|
||||
@@ -16,7 +16,6 @@ import {
|
||||
widthByDepth
|
||||
} from '../lib/subagentTree.js'
|
||||
import {
|
||||
boundedLiveRenderText,
|
||||
compactPreview,
|
||||
estimateTokensRough,
|
||||
fmtK,
|
||||
@@ -634,12 +633,7 @@ export const Thinking = memo(function Thinking({
|
||||
streaming?: boolean
|
||||
t: Theme
|
||||
}) {
|
||||
const preview = useMemo(() => {
|
||||
const raw = thinkingPreview(reasoning, mode, THINKING_COT_MAX)
|
||||
|
||||
return mode === 'full' ? boundedLiveRenderText(raw) : raw
|
||||
}, [mode, reasoning])
|
||||
|
||||
const preview = useMemo(() => thinkingPreview(reasoning, mode, THINKING_COT_MAX), [mode, reasoning])
|
||||
const lines = useMemo(() => preview.split('\n').map(line => line.replace(/\t/g, ' ')), [preview])
|
||||
|
||||
if (!preview && !active) {
|
||||
@@ -874,8 +868,8 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
const hasTools = groups.length > 0
|
||||
const hasSubagents = subagents.length > 0
|
||||
const hasMeta = meta.length > 0
|
||||
const hasThinking = !!cot || reasoningActive || busy
|
||||
const thinkingLive = reasoningActive || reasoningStreaming
|
||||
const hasThinking = !!cot || thinkingLive
|
||||
|
||||
const tokenCount =
|
||||
reasoningTokens && reasoningTokens > 0 ? reasoningTokens : reasoning ? estimateTokensRough(reasoning) : 0
|
||||
@@ -1008,7 +1002,7 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
open: openThinking,
|
||||
render: rails => (
|
||||
<Thinking
|
||||
active={thinkingLive}
|
||||
active={reasoningActive}
|
||||
branch="last"
|
||||
mode="full"
|
||||
rails={rails}
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
export const LARGE_PASTE = { chars: 8000, lines: 80 }
|
||||
export const LIVE_RENDER_MAX_CHARS = 16_000
|
||||
export const LIVE_RENDER_MAX_LINES = 240
|
||||
export const LONG_MSG = 300
|
||||
export const MAX_HISTORY = 800
|
||||
export const THINKING_COT_MAX = 160
|
||||
|
||||
@@ -14,9 +14,7 @@ const gw = new GatewayClient()
|
||||
gw.start()
|
||||
|
||||
const dumpNotice = (snap: MemorySnapshot, dump: HeapDumpResult | null) =>
|
||||
snap.source === 'heap'
|
||||
? `hermes-tui: ${snap.level} heap (${formatBytes(snap.heapUsed)}, rss ${formatBytes(snap.rss)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n`
|
||||
: `hermes-tui: ${snap.level} rss (${formatBytes(snap.rss)}, native ${formatBytes(snap.nativeUsed)}) — auto diagnostics → ${dump?.diagPath ?? '(failed)'}\n`
|
||||
`hermes-tui: ${snap.level} memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n`
|
||||
|
||||
setupGracefulExit({
|
||||
cleanups: [() => gw.kill()],
|
||||
|
||||
@@ -145,11 +145,11 @@ export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promis
|
||||
// Diagnostics first — heap-snapshot serialization can crash on very large
|
||||
// heaps, and the JSON sidecar is the most actionable artifact if so.
|
||||
const diagnostics = await captureMemoryDiagnostics(trigger)
|
||||
const dir = memoryDumpDir()
|
||||
const dir = process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps')
|
||||
|
||||
await mkdir(dir, { recursive: true })
|
||||
|
||||
const base = memoryDumpBase(trigger)
|
||||
const base = `hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}`
|
||||
const heapPath = join(dir, `${base}.heapsnapshot`)
|
||||
const diagPath = join(dir, `${base}.diagnostics.json`)
|
||||
|
||||
@@ -162,23 +162,6 @@ export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promis
|
||||
}
|
||||
}
|
||||
|
||||
export async function performDiagnosticsDump(trigger: MemoryTrigger = 'manual'): Promise<HeapDumpResult> {
|
||||
try {
|
||||
const diagnostics = await captureMemoryDiagnostics(trigger)
|
||||
const dir = memoryDumpDir()
|
||||
|
||||
await mkdir(dir, { recursive: true })
|
||||
|
||||
const diagPath = join(dir, `${memoryDumpBase(trigger)}.diagnostics.json`)
|
||||
|
||||
await writeFile(diagPath, JSON.stringify(diagnostics, null, 2), { mode: 0o600 })
|
||||
|
||||
return { diagPath, success: true }
|
||||
} catch (e) {
|
||||
return { error: e instanceof Error ? e.message : String(e), success: false }
|
||||
}
|
||||
}
|
||||
|
||||
export function formatBytes(bytes: number): string {
|
||||
if (!Number.isFinite(bytes) || bytes <= 0) {
|
||||
return '0B'
|
||||
@@ -194,11 +177,6 @@ const UNITS = ['B', 'KB', 'MB', 'GB', 'TB']
|
||||
|
||||
const STARTED_AT = { rss: process.memoryUsage().rss, uptime: process.uptime() }
|
||||
|
||||
const memoryDumpDir = () => process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps')
|
||||
|
||||
const memoryDumpBase = (trigger: MemoryTrigger) =>
|
||||
`hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}`
|
||||
|
||||
// Returns undefined when the probe isn't available (non-Linux paths, sandboxed FS).
|
||||
const swallow = async <T>(fn: () => Promise<T>): Promise<T | undefined> => {
|
||||
try {
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
const memory = vi.hoisted(() => ({
|
||||
performDiagnosticsDump: vi.fn(async () => ({ diagPath: '/tmp/diag.json', success: true })),
|
||||
performHeapDump: vi.fn(async () => ({ heapPath: '/tmp/heap.heapsnapshot', success: true }))
|
||||
}))
|
||||
|
||||
vi.mock('./memory.js', () => memory)
|
||||
|
||||
import { type MemorySnapshot, startMemoryMonitor } from './memoryMonitor.js'
|
||||
|
||||
const GB = 1024 ** 3
|
||||
|
||||
const usage = (heapUsed: number, rss: number): NodeJS.MemoryUsage =>
|
||||
({
|
||||
arrayBuffers: 0,
|
||||
external: 0,
|
||||
heapTotal: heapUsed,
|
||||
heapUsed,
|
||||
rss
|
||||
}) as NodeJS.MemoryUsage
|
||||
|
||||
describe('startMemoryMonitor', () => {
|
||||
let memoryUsageSpy: ReturnType<typeof vi.spyOn>
|
||||
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers()
|
||||
memory.performDiagnosticsDump.mockClear()
|
||||
memory.performHeapDump.mockClear()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
memoryUsageSpy?.mockRestore()
|
||||
vi.useRealTimers()
|
||||
})
|
||||
|
||||
it('captures diagnostics only for native RSS pressure', async () => {
|
||||
memoryUsageSpy = vi.spyOn(process, 'memoryUsage').mockReturnValue(usage(100 * 1024 ** 2, 5 * GB))
|
||||
|
||||
const snaps: MemorySnapshot[] = []
|
||||
|
||||
const stop = startMemoryMonitor({
|
||||
intervalMs: 1000,
|
||||
onHigh: snap => snaps.push(snap),
|
||||
rssHighBytes: 4 * GB
|
||||
})
|
||||
|
||||
await vi.advanceTimersByTimeAsync(1000)
|
||||
stop()
|
||||
|
||||
expect(memory.performDiagnosticsDump).toHaveBeenCalledWith('auto-high')
|
||||
expect(memory.performHeapDump).not.toHaveBeenCalled()
|
||||
expect(snaps[0]).toMatchObject({ level: 'high', source: 'rss' })
|
||||
expect(snaps[0]?.nativeUsed).toBeGreaterThan(4 * GB)
|
||||
})
|
||||
|
||||
it('keeps heap dumps for V8 heap pressure', async () => {
|
||||
memoryUsageSpy = vi.spyOn(process, 'memoryUsage').mockReturnValue(usage(3 * GB, 3.5 * GB))
|
||||
|
||||
const snaps: MemorySnapshot[] = []
|
||||
|
||||
const stop = startMemoryMonitor({
|
||||
intervalMs: 1000,
|
||||
onCritical: snap => snaps.push(snap)
|
||||
})
|
||||
|
||||
await vi.advanceTimersByTimeAsync(1000)
|
||||
stop()
|
||||
|
||||
expect(memory.performHeapDump).toHaveBeenCalledWith('auto-critical')
|
||||
expect(memory.performDiagnosticsDump).not.toHaveBeenCalled()
|
||||
expect(snaps[0]).toMatchObject({ level: 'critical', source: 'heap' })
|
||||
})
|
||||
})
|
||||
@@ -1,14 +1,11 @@
|
||||
import { type HeapDumpResult, performDiagnosticsDump, performHeapDump } from './memory.js'
|
||||
import { type HeapDumpResult, performHeapDump } from './memory.js'
|
||||
|
||||
export type MemoryLevel = 'critical' | 'high' | 'normal'
|
||||
export type MemoryTriggerSource = 'heap' | 'rss'
|
||||
|
||||
export interface MemorySnapshot {
|
||||
heapUsed: number
|
||||
level: MemoryLevel
|
||||
nativeUsed: number
|
||||
rss: number
|
||||
source: MemoryTriggerSource
|
||||
}
|
||||
|
||||
export interface MemoryMonitorOptions {
|
||||
@@ -17,61 +14,35 @@ export interface MemoryMonitorOptions {
|
||||
intervalMs?: number
|
||||
onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
|
||||
onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
|
||||
rssCriticalBytes?: number
|
||||
rssHighBytes?: number
|
||||
}
|
||||
|
||||
const GB = 1024 ** 3
|
||||
|
||||
const maxLevel = (heapLevel: MemoryLevel, rssLevel: MemoryLevel): MemoryLevel => {
|
||||
if (heapLevel === 'critical' || rssLevel === 'critical') {
|
||||
return 'critical'
|
||||
}
|
||||
|
||||
return heapLevel === 'high' || rssLevel === 'high' ? 'high' : 'normal'
|
||||
}
|
||||
|
||||
export function startMemoryMonitor({
|
||||
criticalBytes = 2.5 * GB,
|
||||
highBytes = 1.5 * GB,
|
||||
intervalMs = 10_000,
|
||||
onCritical,
|
||||
onHigh,
|
||||
rssCriticalBytes = 8 * GB,
|
||||
rssHighBytes = 4 * GB
|
||||
onHigh
|
||||
}: MemoryMonitorOptions = {}): () => void {
|
||||
const dumped = new Set<`${MemoryTriggerSource}:${Exclude<MemoryLevel, 'normal'>}`>()
|
||||
const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
|
||||
|
||||
const tick = async () => {
|
||||
const { heapUsed, rss } = process.memoryUsage()
|
||||
const nativeUsed = Math.max(0, rss - heapUsed)
|
||||
const heapLevel: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
|
||||
const rssLevel: MemoryLevel = rss >= rssCriticalBytes ? 'critical' : rss >= rssHighBytes ? 'high' : 'normal'
|
||||
const level = maxLevel(heapLevel, rssLevel)
|
||||
const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
|
||||
|
||||
if (level === 'normal') {
|
||||
return void dumped.clear()
|
||||
}
|
||||
|
||||
const source: MemoryTriggerSource =
|
||||
heapLevel === level || (heapLevel !== 'normal' && rssLevel === level) ? 'heap' : 'rss'
|
||||
|
||||
const key = `${source}:${level}` as const
|
||||
|
||||
if (dumped.has(key)) {
|
||||
if (dumped.has(level)) {
|
||||
return
|
||||
}
|
||||
|
||||
dumped.add(key)
|
||||
dumped.add(level)
|
||||
const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
|
||||
|
||||
const trigger = level === 'critical' ? 'auto-critical' : 'auto-high'
|
||||
|
||||
const dump =
|
||||
source === 'heap'
|
||||
? await performHeapDump(trigger).catch(() => null)
|
||||
: await performDiagnosticsDump(trigger).catch(() => null)
|
||||
|
||||
const snap: MemorySnapshot = { heapUsed, level, nativeUsed, rss, source }
|
||||
const snap: MemorySnapshot = { heapUsed, level, rss }
|
||||
|
||||
;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
|
||||
}
|
||||
|
||||
+1
-56
@@ -1,4 +1,4 @@
|
||||
import { LIVE_RENDER_MAX_CHARS, LIVE_RENDER_MAX_LINES, THINKING_COT_MAX } from '../config/limits.js'
|
||||
import { THINKING_COT_MAX } from '../config/limits.js'
|
||||
import type { ThinkingMode } from '../types.js'
|
||||
|
||||
const ESC = String.fromCharCode(27)
|
||||
@@ -76,61 +76,6 @@ export const thinkingPreview = (reasoning: string, mode: ThinkingMode, max: numb
|
||||
return !raw || mode === 'collapsed' ? '' : mode === 'full' ? raw : compactPreview(raw.replace(WS_RE, ' '), max)
|
||||
}
|
||||
|
||||
export const boundedLiveRenderText = (
|
||||
text: string,
|
||||
{ maxChars = LIVE_RENDER_MAX_CHARS, maxLines = LIVE_RENDER_MAX_LINES } = {}
|
||||
) => {
|
||||
if (text.length <= maxChars && text.split('\n', maxLines + 1).length <= maxLines) {
|
||||
return text
|
||||
}
|
||||
|
||||
let start = 0
|
||||
let idx = text.length
|
||||
|
||||
for (let seen = 0; seen < maxLines && idx > 0; seen++) {
|
||||
idx = text.lastIndexOf('\n', idx - 1)
|
||||
start = idx < 0 ? 0 : idx + 1
|
||||
|
||||
if (idx < 0) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const lineStart = start
|
||||
start = Math.max(lineStart, text.length - maxChars)
|
||||
|
||||
if (start > lineStart) {
|
||||
const nextBreak = text.indexOf('\n', start)
|
||||
|
||||
if (nextBreak >= 0 && nextBreak < text.length - 1) {
|
||||
start = nextBreak + 1
|
||||
}
|
||||
}
|
||||
|
||||
const tail = text.slice(start).trimStart()
|
||||
const omittedLines = countNewlines(text, start)
|
||||
const omittedChars = Math.max(0, text.length - tail.length)
|
||||
|
||||
const label =
|
||||
omittedLines > 0
|
||||
? `[showing live tail; omitted ${fmtK(omittedLines)} lines / ${fmtK(omittedChars)} chars]\n`
|
||||
: `[showing live tail; omitted ${fmtK(omittedChars)} chars]\n`
|
||||
|
||||
return `${label}${tail.trimStart()}`
|
||||
}
|
||||
|
||||
const countNewlines = (text: string, end: number) => {
|
||||
let count = 0
|
||||
|
||||
for (let i = 0; i < end; i++) {
|
||||
if (text.charCodeAt(i) === 10) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
|
||||
export const stripTrailingPasteNewlines = (text: string) => (/[^\n]/.test(text) ? text.replace(/\n+$/, '') : text)
|
||||
|
||||
export const toolTrailLabel = (name: string) =>
|
||||
|
||||
@@ -41,11 +41,11 @@ hermes [global-options] <command> [subcommand/options]
|
||||
| `hermes gateway` | Run or manage the messaging gateway service. |
|
||||
| `hermes setup` | Interactive setup wizard for all or part of the configuration. |
|
||||
| `hermes whatsapp` | Configure and pair the WhatsApp bridge. |
|
||||
| `hermes slack` | Slack helpers (currently: generate the app manifest with every command as a native slash). |
|
||||
| `hermes auth` | Manage credentials — add, list, remove, reset, set strategy. Handles OAuth flows for Codex/Nous/Anthropic. |
|
||||
| `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. |
|
||||
| `hermes status` | Show agent, auth, and platform status. |
|
||||
| `hermes cron` | Inspect and tick the cron scheduler. |
|
||||
| `hermes kanban` | Multi-profile collaboration board (tasks, links, dispatcher). |
|
||||
| `hermes webhook` | Manage dynamic webhook subscriptions for event-driven activation. |
|
||||
| `hermes doctor` | Diagnose config and dependency issues. |
|
||||
| `hermes dump` | Copy-pasteable setup summary for support/debugging. |
|
||||
@@ -222,33 +222,6 @@ hermes whatsapp
|
||||
|
||||
Runs the WhatsApp pairing/setup flow, including mode selection and QR-code pairing.
|
||||
|
||||
## `hermes slack`
|
||||
|
||||
```bash
|
||||
hermes slack manifest # print manifest to stdout
|
||||
hermes slack manifest --write # write to ~/.hermes/slack-manifest.json
|
||||
hermes slack manifest --slashes-only # just the features.slash_commands array
|
||||
```
|
||||
|
||||
Generates a Slack app manifest that registers every gateway command in
|
||||
`COMMAND_REGISTRY` (`/btw`, `/stop`, `/model`, …) as a first-class
|
||||
Slack slash command — matching Discord and Telegram parity. Paste the
|
||||
output into your Slack app config at
|
||||
[https://api.slack.com/apps](https://api.slack.com/apps) → your app →
|
||||
**Features → App Manifest → Edit**, then **Save**. Slack prompts for
|
||||
reinstall if scopes or slash commands changed.
|
||||
|
||||
| Flag | Default | Purpose |
|
||||
|------|---------|---------|
|
||||
| `--write [PATH]` | stdout | Write to a file instead of stdout. Bare `--write` writes `$HERMES_HOME/slack-manifest.json`. |
|
||||
| `--name NAME` | `Hermes` | Bot display name in Slack. |
|
||||
| `--description DESC` | default blurb | Bot description shown in the Slack app directory. |
|
||||
| `--slashes-only` | off | Emit only `features.slash_commands` for merging into a manually-maintained manifest. |
|
||||
|
||||
Run `hermes slack manifest --write` again after `hermes update` to pick
|
||||
up any new commands.
|
||||
|
||||
|
||||
## `hermes login` / `hermes logout` *(Deprecated)*
|
||||
|
||||
:::caution
|
||||
@@ -300,6 +273,38 @@ hermes cron <list|create|edit|pause|resume|run|remove|status|tick>
|
||||
| `status` | Check whether the cron scheduler is running. |
|
||||
| `tick` | Run due jobs once and exit. |
|
||||
|
||||
## `hermes kanban`
|
||||
|
||||
```bash
|
||||
hermes kanban <action> [options]
|
||||
```
|
||||
|
||||
Multi-profile collaboration board. Tasks live in `~/.hermes/kanban.db` (WAL-mode SQLite); every profile reads and writes the same board. A `cron`-driven dispatcher (`hermes kanban dispatch`) atomically claims ready tasks and spawns the assigned profile as its own process with an isolated workspace.
|
||||
|
||||
| Action | Purpose |
|
||||
|--------|---------|
|
||||
| `init` | Create `kanban.db` if missing. Idempotent. |
|
||||
| `create "<title>"` | Create a new task. Flags: `--body`, `--assignee`, `--parent` (repeatable), `--workspace scratch\|worktree\|dir:<path>`, `--tenant`, `--priority`. |
|
||||
| `list` / `ls` | List tasks. Filter with `--mine`, `--assignee`, `--status`, `--tenant`, `--archived`, `--json`. |
|
||||
| `show <id>` | Show a task with comments and events. `--json` for machine output. |
|
||||
| `assign <id> <profile>` | Assign or reassign. Use `none` to unassign. Refused while task is running. |
|
||||
| `link <parent> <child>` | Add a dependency. Cycle-detected. |
|
||||
| `unlink <parent> <child>` | Remove a dependency. |
|
||||
| `claim <id>` | Atomically claim a ready task. Prints resolved workspace path. |
|
||||
| `comment <id> "<text>"` | Append a comment. Visible to the next worker that runs the task. |
|
||||
| `complete <id>` | Mark task done. Flag: `--result "<summary>"` (goes into children's parent-result context). |
|
||||
| `block <id> "<reason>"` | Mark task blocked. Also appends the reason as a comment. |
|
||||
| `unblock <id>` | Return a blocked task to ready. |
|
||||
| `archive <id>` | Hide from default list. `gc` will remove scratch workspaces. |
|
||||
| `tail <id>` | Follow a task's event stream. |
|
||||
| `dispatch` | One dispatcher pass. Flags: `--dry-run`, `--max N`, `--json`. |
|
||||
| `context <id>` | Print the full context a worker would see (title + body + parent results + comments). |
|
||||
| `gc` | Remove scratch workspaces for archived tasks. |
|
||||
|
||||
All actions are also available as a slash command in the gateway (`/kanban …`), with the same argument surface.
|
||||
|
||||
For the full design — comparison with Cline Kanban / Paperclip / NanoClaw / Gemini Enterprise, eight collaboration patterns, four user stories, concurrency correctness proof — see `docs/hermes-kanban-v1-spec.pdf` in the repository or the [Kanban user guide](/docs/user-guide/features/kanban).
|
||||
|
||||
## `hermes webhook`
|
||||
|
||||
```bash
|
||||
|
||||
@@ -146,9 +146,7 @@ terminal:
|
||||
|
||||
**Requirements:** Docker Desktop or Docker Engine installed and running. Hermes probes `$PATH` plus common macOS install locations (`/usr/local/bin/docker`, `/opt/homebrew/bin/docker`, Docker Desktop app bundle).
|
||||
|
||||
**Container lifecycle:** Hermes reuses a single long-lived container (`docker run -d ... sleep 2h`) for every terminal and file-tool call, across sessions, `/new`, `/reset`, and `delegate_task` subagents, for the lifetime of the Hermes process. Commands run via `docker exec` with a login shell, so working-directory changes, installed packages, and files in `/workspace` all persist from one tool call to the next. The container is stopped and removed on Hermes shutdown (or when the idle-sweep reclaims it).
|
||||
|
||||
Parallel subagents spawned via `delegate_task(tasks=[...])` share this one container — concurrent `cd`, env mutations, and writes to the same path will collide. If a subagent needs an isolated sandbox, it must register a per-task image override via `register_task_env_overrides()`, which RL and benchmark environments (TerminalBench2, HermesSweEnv, etc.) do automatically for their per-task Docker images.
|
||||
**Container lifecycle:** Each session starts a long-lived container (`docker run -d ... sleep 2h`). Commands run via `docker exec` with a login shell. On cleanup, the container is stopped and removed.
|
||||
|
||||
**Security hardening:**
|
||||
- `--cap-drop ALL` with only `DAC_OVERRIDE`, `CHOWN`, `FOWNER` added back
|
||||
|
||||
@@ -86,40 +86,6 @@ FIRECRAWL_API_URL=http://localhost:3002
|
||||
FIRECRAWL_BROWSER_TTL=600
|
||||
```
|
||||
|
||||
### Hybrid routing: cloud for public URLs, local for LAN/localhost
|
||||
|
||||
When a cloud provider is configured, Hermes auto-spawns a **local Chromium sidecar**
|
||||
for URLs that resolve to a private/loopback/LAN address (`localhost`, `127.0.0.1`,
|
||||
`192.168.x.x`, `10.x.x.x`, `172.16-31.x.x`, `*.local`, `*.lan`, `*.internal`,
|
||||
IPv6 loopback `::1`, link-local `169.254.x.x`). Public URLs continue to use the
|
||||
cloud provider in the same conversation.
|
||||
|
||||
This solves the common "I'm developing locally but using Browserbase" workflow —
|
||||
the agent can screenshot your dashboard at `http://localhost:3000` AND scrape
|
||||
`https://github.com` without you switching providers or disabling the SSRF guard.
|
||||
The cloud provider never sees the private URL.
|
||||
|
||||
The feature is **on by default**. To disable it (all URLs go to the configured
|
||||
cloud provider, as before):
|
||||
|
||||
```yaml
|
||||
# ~/.hermes/config.yaml
|
||||
browser:
|
||||
cloud_provider: browserbase
|
||||
auto_local_for_private_urls: false
|
||||
```
|
||||
|
||||
With auto-routing disabled, private URLs are rejected with
|
||||
`"Blocked: URL targets a private or internal address"` unless you also set
|
||||
`browser.allow_private_urls: true` (which lets the cloud provider attempt them —
|
||||
usually won't work since Browserbase etc. can't reach your LAN).
|
||||
|
||||
Requirements: the local sidecar uses the same `agent-browser` CLI as pure local
|
||||
mode, so you need it installed (`hermes setup tools → Browser Automation`
|
||||
auto-installs it). Post-navigation redirects from a public URL onto a private
|
||||
address are still blocked (you can't use a redirect-to-internal trick to reach
|
||||
your LAN through the public path).
|
||||
|
||||
### Camofox local mode
|
||||
|
||||
[Camofox](https://github.com/jo-inc/camofox-browser) is a self-hosted Node.js server wrapping Camoufox (a Firefox fork with C++ fingerprint spoofing). It provides local anti-detection browsing without cloud dependencies.
|
||||
|
||||
@@ -0,0 +1,263 @@
|
||||
# Kanban tutorial
|
||||
|
||||
A walkthrough of the four use-cases the Hermes Kanban system was designed for, with the dashboard open in a browser. If you haven't read the [Kanban overview](./kanban) yet, start there — this assumes you know what a task, run, assignee, and dispatcher are.
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
hermes kanban init # optional; first `hermes kanban <anything>` auto-inits
|
||||
hermes dashboard # opens http://127.0.0.1:9119 in your browser
|
||||
# click Kanban in the left nav
|
||||
```
|
||||
|
||||
The dashboard is the most comfortable place to learn the system. Everything you see here is also available via `hermes kanban <verb>` on the CLI — the two surfaces share the same SQLite database at `~/.hermes/kanban.db`.
|
||||
|
||||
## The board at a glance
|
||||
|
||||

|
||||
|
||||
Six columns, left to right:
|
||||
|
||||
- **Triage** — raw ideas, a specifier will flesh out the spec before anyone works on them.
|
||||
- **Todo** — created but waiting on dependencies, or not yet assigned.
|
||||
- **Ready** — assigned and waiting for the dispatcher to claim.
|
||||
- **In progress** — a worker is actively running the task. With "Lanes by profile" on (the default), this column sub-groups by assignee so you can see at a glance what each worker is doing.
|
||||
- **Blocked** — a worker asked for human input, or the circuit breaker tripped.
|
||||
- **Done** — completed.
|
||||
|
||||
The top bar has filters for search, tenant, and assignee, plus a `Lanes by profile` toggle and a `Nudge dispatcher` button that runs one dispatch tick right now instead of waiting for the daemon's next interval. Clicking any card opens its drawer on the right.
|
||||
|
||||
### Flat view
|
||||
|
||||
If the profile lanes are noisy, toggle "Lanes by profile" off and the In Progress column collapses to a single flat list ordered by claim time:
|
||||
|
||||

|
||||
|
||||
## Story 1 — Solo dev shipping a feature
|
||||
|
||||
You're building a feature. Classic flow: design a schema, implement the API, write the tests. Three tasks with parent→child dependencies.
|
||||
|
||||
```bash
|
||||
SCHEMA=$(hermes kanban create "Design auth schema" \
|
||||
--assignee backend-dev --tenant auth-project --priority 2 \
|
||||
--body "Design the user/session/token schema for the auth module." \
|
||||
--json | jq -r .id)
|
||||
|
||||
API=$(hermes kanban create "Implement auth API endpoints" \
|
||||
--assignee backend-dev --tenant auth-project --priority 2 \
|
||||
--parent $SCHEMA \
|
||||
--body "POST /register, POST /login, POST /refresh, POST /logout." \
|
||||
--json | jq -r .id)
|
||||
|
||||
hermes kanban create "Write auth integration tests" \
|
||||
--assignee qa-dev --tenant auth-project --priority 2 \
|
||||
--parent $API \
|
||||
--body "Cover happy path, wrong password, expired token, concurrent refresh."
|
||||
```
|
||||
|
||||
Because `API` has `SCHEMA` as its parent, and `tests` has `API` as its parent, only `SCHEMA` starts in `ready`. The other two sit in `todo` until their parents complete. This is the dependency promotion engine doing its job — no other worker will pick up the test-writing until there's an API to test.
|
||||
|
||||
Claim the schema task, do the work, hand off:
|
||||
|
||||
```bash
|
||||
hermes kanban claim $SCHEMA
|
||||
|
||||
# (you design the schema, commit, etc.)
|
||||
|
||||
hermes kanban complete $SCHEMA \
|
||||
--summary "users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); refresh tokens stored as sessions with type='refresh'" \
|
||||
--metadata '{
|
||||
"changed_files": ["migrations/001_users.sql", "migrations/002_sessions.sql"],
|
||||
"decisions": ["bcrypt for hashing", "JWT for session tokens", "7-day refresh, 15-min access"]
|
||||
}'
|
||||
```
|
||||
|
||||
When `SCHEMA` hits `done`, the dependency engine promotes `API` to `ready` automatically. The API worker, when it picks up, will read `SCHEMA`'s summary and metadata in its context — so it knows the schema decisions without re-reading a long design doc.
|
||||
|
||||
Click the completed schema task on the board and the drawer shows everything:
|
||||
|
||||

|
||||
|
||||
The Run History section at the bottom is the key addition. One attempt: outcome `completed`, worker `@backend-dev`, duration, timestamp, and the handoff summary in full. The metadata blob (`changed_files`, `decisions`) is stored on the run too and surfaced to any downstream worker that reads this parent.
|
||||
|
||||
On the CLI:
|
||||
|
||||
```bash
|
||||
hermes kanban show $SCHEMA
|
||||
hermes kanban runs $SCHEMA
|
||||
# # OUTCOME PROFILE ELAPSED STARTED
|
||||
# 1 completed backend-dev 0s 2026-04-27 19:34
|
||||
# → users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); refresh tokens ...
|
||||
```
|
||||
|
||||
## Story 2 — Fleet farming
|
||||
|
||||
You have three workers (a translator, a transcriber, a copywriter) and a pile of independent tasks. You want all three pulling in parallel and making visible progress. This is the simplest kanban use-case and the one the original design optimized for.
|
||||
|
||||
Create the work:
|
||||
|
||||
```bash
|
||||
for lang in Spanish French German; do
|
||||
hermes kanban create "Translate homepage to $lang" \
|
||||
--assignee translator --tenant content-ops
|
||||
done
|
||||
for i in 1 2 3 4 5; do
|
||||
hermes kanban create "Transcribe Q3 customer call #$i" \
|
||||
--assignee transcriber --tenant content-ops
|
||||
done
|
||||
for sku in 1001 1002 1003 1004; do
|
||||
hermes kanban create "Generate product description: SKU-$sku" \
|
||||
--assignee copywriter --tenant content-ops
|
||||
done
|
||||
```
|
||||
|
||||
Start the daemon and walk away:
|
||||
|
||||
```bash
|
||||
hermes kanban daemon --assignee translator &
|
||||
hermes kanban daemon --assignee transcriber &
|
||||
hermes kanban daemon --assignee copywriter &
|
||||
```
|
||||
|
||||
Now filter the board to `content-ops` (or just search for "Transcribe") and you get this:
|
||||
|
||||

|
||||
|
||||
Two transcribes done, one running, two ready waiting for the next dispatcher tick. The In Progress column is grouped by profile (the "Lanes by profile" default) so you see each worker's active task without scanning a mixed list. The dispatcher will promote the next ready task to running as soon as the current one completes. With three daemons working on three assignee pools in parallel, the whole content queue drains without further human input.
|
||||
|
||||
**Everything Story 1 said about structured handoff still applies here.** A translator worker completing a call can pass `--summary "translated 4 pages, style matched existing marketing voice"` and `--metadata '{"duration_seconds": 720, "tokens_used": 2100}'` — useful for analytics and for any downstream task that depends on this one.
|
||||
|
||||
## Story 3 — Role pipeline with retry
|
||||
|
||||
This is where Kanban earns its keep over a flat TODO list. A PM writes a spec. An engineer implements it. A reviewer rejects the first attempt. The engineer tries again with changes. The reviewer approves.
|
||||
|
||||
The dashboard view, filtered by `auth-project`:
|
||||
|
||||

|
||||
|
||||
Three-stage chain visible at once: `Spec: password reset flow` (DONE, pm), `Implement password reset flow` (DONE, backend-dev), `Review password reset PR` (READY, reviewer). Each has its parent in green at the bottom and children as dependencies.
|
||||
|
||||
The interesting one is the implementation task, because it was blocked and retried:
|
||||
|
||||
```bash
|
||||
# PM completes the spec with acceptance criteria in metadata
|
||||
hermes kanban complete $SPEC \
|
||||
--summary "spec approved; POST /forgot-password sends email, GET /reset/:token renders form, POST /reset applies new password" \
|
||||
--metadata '{"acceptance": [
|
||||
"expired token returns 410",
|
||||
"reused last-3 password returns 400 with message",
|
||||
"successful reset invalidates all active sessions"
|
||||
]}'
|
||||
|
||||
# Engineer claims + implements, but review blocks it for missing strength check
|
||||
hermes kanban claim $IMPL
|
||||
hermes kanban block $IMPL "Review: password strength check missing, reset link isn't single-use (can be replayed within 30min)"
|
||||
|
||||
# Engineer iterates, resolves, completes
|
||||
hermes kanban unblock $IMPL
|
||||
hermes kanban claim $IMPL
|
||||
hermes kanban complete $IMPL \
|
||||
--summary "added zxcvbn strength check, reset tokens are now single-use (stored + deleted on success)" \
|
||||
--metadata '{
|
||||
"changed_files": ["auth/reset.py", "auth/tests/test_reset.py", "migrations/003_single_use_reset_tokens.sql"],
|
||||
"tests_run": 11,
|
||||
"review_iteration": 2
|
||||
}'
|
||||
```
|
||||
|
||||
Click the implementation task. The drawer shows **two attempts**:
|
||||
|
||||

|
||||
|
||||
- **Run 1** — `blocked` by `@backend-dev`. The review feedback sits right under the outcome: "password strength check missing, reset link isn't single-use (can be replayed within 30min)".
|
||||
- **Run 2** — `completed` by `@backend-dev`. Fresh summary, fresh metadata.
|
||||
|
||||
Each run is a row in `task_runs` with its own outcome, summary, and metadata. Retry history is not a conceptual afterthought layered on top of a "latest state" task — it's the primary representation. When a retrying worker opens the task, `build_worker_context` shows it the prior attempts, so the second-pass worker sees why the first pass was blocked and addresses those specific findings instead of re-running from scratch.
|
||||
|
||||
The reviewer picks up next. When they open `Review password reset PR`, they see:
|
||||
|
||||

|
||||
|
||||
The parent link is the completed implementation. When the reviewer's worker calls `build_worker_context`, it pulls the parent's most-recent-completed-run summary + metadata — so the reviewer reads "added zxcvbn strength check, reset tokens are now single-use" and has the list of changed files in hand before looking at a diff.
|
||||
|
||||
## Story 4 — Circuit breaker and crash recovery
|
||||
|
||||
Real workers fail. Missing credentials, OOM kills, transient network errors. The dispatcher has two lines of defense: a **circuit breaker** that auto-blocks after N consecutive failures so the board doesn't thrash forever, and **crash detection** that reclaims a task whose worker PID went away before its TTL expired.
|
||||
|
||||
### Circuit breaker — permanent-looking failure
|
||||
|
||||
A deploy task that can't spawn its worker because `AWS_ACCESS_KEY_ID` isn't set in the profile's environment:
|
||||
|
||||
```bash
|
||||
hermes kanban create "Deploy to staging (missing creds)" \
|
||||
--assignee deploy-bot --tenant ops
|
||||
```
|
||||
|
||||
The dispatcher tries to spawn the worker. Spawn fails (`RuntimeError: AWS_ACCESS_KEY_ID not set`). The dispatcher releases the claim, increments a failure counter, and tries again next tick. After three consecutive failures (the default `failure_limit`), the circuit trips: the task goes to `blocked` with outcome `gave_up`. No more retries until a human unblocks it.
|
||||
|
||||
Click the blocked task:
|
||||
|
||||

|
||||
|
||||
Three runs, all with the same error on the `error` field. The first two are `spawn_failed` (retryable), the third is `gave_up` (terminal). The event log above shows the full sequence: `created → claimed → spawn_failed → claimed → spawn_failed → claimed → gave_up`.
|
||||
|
||||
On the terminal:
|
||||
|
||||
```bash
|
||||
hermes kanban runs t_ef5d
|
||||
# # OUTCOME PROFILE ELAPSED STARTED
|
||||
# 1 spawn_failed deploy-bot 0s 2026-04-27 19:34
|
||||
# ! AWS_ACCESS_KEY_ID not set in deploy-bot env
|
||||
# 2 spawn_failed deploy-bot 0s 2026-04-27 19:34
|
||||
# ! AWS_ACCESS_KEY_ID not set in deploy-bot env
|
||||
# 3 gave_up deploy-bot 0s 2026-04-27 19:34
|
||||
# ! AWS_ACCESS_KEY_ID not set in deploy-bot env
|
||||
```
|
||||
|
||||
If Telegram / Discord / Slack is wired in, a gateway notification fires on the `gave_up` event so you hear about the outage without having to check the board.
|
||||
|
||||
### Crash recovery — worker dies mid-flight
|
||||
|
||||
Sometimes the spawn succeeds but the worker process dies later — segfault, OOM, `systemctl stop`. The dispatcher polls `kill(pid, 0)` and detects the dead pid; the claim releases, the task goes back to `ready`, and the next tick gives it to a fresh worker.
|
||||
|
||||
The example in the seed data is a migration that was running out of memory:
|
||||
|
||||
```bash
|
||||
# Worker claims, starts scanning 2.4M rows, OOM kills it at ~2.3M
|
||||
# Dispatcher detects dead pid, releases claim, increments attempt counter
|
||||
# Retry with a chunked strategy succeeds
|
||||
```
|
||||
|
||||
The drawer shows the full two-attempt history:
|
||||
|
||||

|
||||
|
||||
Run 1 — `crashed`, with the error `OOM kill at row 2.3M (process 99999 gone)`. Run 2 — `completed`, with `"strategy": "chunked with LIMIT + WHERE id > last_id"` in its metadata. The retrying worker saw the crash of run 1 in its context and picked a safer strategy; the metadata makes it obvious to a future observer (or postmortem writer) what changed.
|
||||
|
||||
## Structured handoff — why `--summary` and `--metadata` matter
|
||||
|
||||
In every story above, workers passed `--summary` and `--metadata` on completion. That's not decoration — it's the primary handoff channel between stages of a workflow.
|
||||
|
||||
When a worker on task B reads its context, it gets:
|
||||
|
||||
- B's **prior attempts** (previous runs: outcome, summary, error, metadata) so a retrying worker doesn't repeat a failed path.
|
||||
- **Parent task results** — for each parent, the most-recent completed run's summary and metadata — so downstream workers see why and how the upstream work was done.
|
||||
|
||||
This replaces the "dig through comments and the work output" dance that plagues flat kanban systems. A PM writes acceptance criteria in the spec's metadata, and the engineer's worker sees them structurally. An engineer records which tests they ran and how many passed, and the reviewer's worker has that list in hand before opening a diff.
|
||||
|
||||
The bulk-close guard exists because this data is per-run. `hermes kanban complete a b c --summary X` is refused — copy-pasting the same summary to three tasks is almost always wrong. Bulk close without the handoff flags still works for the common "I finished a pile of admin tasks" case.
|
||||
|
||||
## Inspecting a task currently running
|
||||
|
||||
For completeness — here's the drawer of a task still in flight (the API implementation from Story 1, claimed by `backend-dev` but not yet complete):
|
||||
|
||||

|
||||
|
||||
Status is `Running`. The active run appears in the Run History section with outcome `active` and no `ended_at`. If this worker dies or times out, the dispatcher closes this run with the appropriate outcome and opens a new one on the next claim — the attempt row never disappears.
|
||||
|
||||
## Next steps
|
||||
|
||||
- [Kanban overview](./kanban) — the full data model, event vocabulary, and CLI reference.
|
||||
- `hermes kanban --help` — every subcommand, every flag.
|
||||
- `hermes kanban watch --kinds completed,gave_up,timed_out` — live stream terminal events across the whole board.
|
||||
- `hermes kanban notify-subscribe <task> --platform telegram --chat-id <id>` — get a gateway ping when a specific task finishes.
|
||||
@@ -0,0 +1,500 @@
|
||||
---
|
||||
sidebar_position: 12
|
||||
title: "Kanban (Multi-Agent Board)"
|
||||
description: "Durable SQLite-backed task board for coordinating multiple Hermes profiles"
|
||||
---
|
||||
|
||||
# Kanban — Multi-Agent Profile Collaboration
|
||||
|
||||
> **Want a walkthrough?** Read the [Kanban tutorial](./kanban-tutorial) — four user stories (solo dev, fleet farming, role pipeline with retry, circuit breaker) with dashboard screenshots of each. This page is the reference; the tutorial is the narrative.
|
||||
|
||||
Hermes Kanban is a durable task board, shared across all your Hermes profiles, that lets multiple named agents collaborate on work without fragile in-process subagent swarms. Every task is a row in `~/.hermes/kanban.db`; every handoff is a row anyone can read and write; every worker is a full OS process with its own identity.
|
||||
|
||||
This is the shape that covers the workloads `delegate_task` can't:
|
||||
|
||||
- **Research triage** — parallel researchers + analyst + writer, human-in-the-loop.
|
||||
- **Scheduled ops** — recurring daily briefs that build a journal over weeks.
|
||||
- **Digital twins** — persistent named assistants (`inbox-triage`, `ops-review`) that accumulate memory over time.
|
||||
- **Engineering pipelines** — decompose → implement in parallel worktrees → review → iterate → PR.
|
||||
- **Fleet work** — one specialist managing N subjects (50 social accounts, 12 monitored services).
|
||||
|
||||
For the full design rationale, comparative analysis against Cline Kanban / Paperclip / NanoClaw / Google Gemini Enterprise, and the eight canonical collaboration patterns, see `docs/hermes-kanban-v1-spec.pdf` in the repository.
|
||||
|
||||
## Kanban vs. `delegate_task`
|
||||
|
||||
They look similar; they are not the same primitive.
|
||||
|
||||
| | `delegate_task` | Kanban |
|
||||
|---|---|---|
|
||||
| Shape | RPC call (fork → join) | Durable message queue + state machine |
|
||||
| Parent | Blocks until child returns | Fire-and-forget after `create` |
|
||||
| Child identity | Anonymous subagent | Named profile with persistent memory |
|
||||
| Resumability | None — failed = failed | Block → unblock → re-run; crash → reclaim |
|
||||
| Human in the loop | Not supported | Comment / unblock at any point |
|
||||
| Agents per task | One call = one subagent | N agents over task's life (retry, review, follow-up) |
|
||||
| Audit trail | Lost on context compression | Durable rows in SQLite forever |
|
||||
| Coordination | Hierarchical (caller → callee) | Peer — any profile reads/writes any task |
|
||||
|
||||
**One-sentence distinction:** `delegate_task` is a function call; Kanban is a work queue where every handoff is a row any profile (or human) can see and edit.
|
||||
|
||||
**Use `delegate_task` when** the parent agent needs a short reasoning answer before continuing, no humans involved, result goes back into the parent's context.
|
||||
|
||||
**Use Kanban when** work crosses agent boundaries, needs to survive restarts, might need human input, might be picked up by a different role, or needs to be discoverable after the fact.
|
||||
|
||||
They coexist: a kanban worker may call `delegate_task` internally during its run.
|
||||
|
||||
## Core concepts
|
||||
|
||||
- **Task** — a row with title, optional body, one assignee (a profile name), status (`triage | todo | ready | running | blocked | done | archived`), optional tenant namespace, optional idempotency key (dedup for retried automation).
|
||||
- **Link** — `task_links` row recording a parent → child dependency. The dispatcher promotes `todo → ready` when all parents are `done`.
|
||||
- **Comment** — the inter-agent protocol. Agents and humans append comments; when a worker is (re-)spawned it reads the full comment thread as part of its context.
|
||||
- **Workspace** — the directory a worker operates in. Three kinds:
|
||||
- `scratch` (default) — fresh tmp dir under `~/.hermes/kanban/workspaces/<id>/`.
|
||||
- `dir:<path>` — an existing shared directory (Obsidian vault, mail ops dir, per-account folder). **Must be an absolute path.** Relative paths like `dir:../tenants/foo/` are rejected at dispatch because they'd resolve against whatever CWD the dispatcher happens to be in, which is ambiguous and a confused-deputy escape vector. The path is otherwise trusted — it's your box, your filesystem, the worker runs with your uid. This is the trusted-local-user threat model; kanban is single-host by design.
|
||||
- `worktree` — a git worktree under `.worktrees/<id>/` for coding tasks. Worker-side `git worktree add` creates it.
|
||||
- **Dispatcher** — a long-lived loop that, every N seconds (default 60): reclaims stale claims, reclaims crashed workers (PID gone but TTL not yet expired), promotes ready tasks, atomically claims, spawns assigned profiles. Runs as `hermes kanban daemon` (foreground) or as a systemd user service. After ~5 consecutive spawn failures on the same task the dispatcher auto-blocks it with the last error as the reason — prevents thrashing on tasks whose profile doesn't exist, workspace can't mount, etc.
|
||||
- **Tenant** — optional string namespace. One specialist fleet can serve multiple businesses (`--tenant business-a`) with data isolation by workspace path and memory key prefix.
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
# 1. Create the board
|
||||
hermes kanban init
|
||||
|
||||
# 2. Start the dispatcher (foreground; Ctrl-C to stop)
|
||||
hermes kanban daemon &
|
||||
|
||||
# 3. Create a task
|
||||
hermes kanban create "research AI funding landscape" --assignee researcher
|
||||
|
||||
# 4. Watch activity live
|
||||
hermes kanban watch
|
||||
|
||||
# 5. See the board
|
||||
hermes kanban list
|
||||
hermes kanban stats
|
||||
```
|
||||
|
||||
### Running the dispatcher as a service
|
||||
|
||||
For production, install the systemd user unit shipped at
|
||||
`plugins/kanban/systemd/hermes-kanban-dispatcher.service`:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.config/systemd/user
|
||||
cp plugins/kanban/systemd/hermes-kanban-dispatcher.service \
|
||||
~/.config/systemd/user/
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user enable --now hermes-kanban-dispatcher.service
|
||||
systemctl --user status hermes-kanban-dispatcher
|
||||
journalctl --user -u hermes-kanban-dispatcher -f # follow logs
|
||||
```
|
||||
|
||||
Without a running dispatcher `ready` tasks stay where they are — `hermes kanban init` will remind you of this on first run.
|
||||
|
||||
### Idempotent create (for automation / webhooks)
|
||||
|
||||
```bash
|
||||
# First call creates the task. Any subsequent call with the same key
|
||||
# returns the existing task id instead of duplicating.
|
||||
hermes kanban create "nightly ops review" \
|
||||
--assignee ops \
|
||||
--idempotency-key "nightly-ops-$(date -u +%Y-%m-%d)" \
|
||||
--json
|
||||
```
|
||||
|
||||
### Bulk CLI verbs
|
||||
|
||||
All the lifecycle verbs accept multiple ids so you can clean up a batch
|
||||
in one command:
|
||||
|
||||
```bash
|
||||
hermes kanban complete t_abc t_def t_hij --result "batch wrap"
|
||||
hermes kanban archive t_abc t_def t_hij
|
||||
hermes kanban unblock t_abc t_def
|
||||
hermes kanban block t_abc "need input" --ids t_def t_hij
|
||||
```
|
||||
|
||||
## How workers interact with the board
|
||||
|
||||
When the dispatcher spawns a worker, it sets `HERMES_KANBAN_TASK` in the child's env. That env var is the gate for a dedicated **kanban toolset** — 7 tools that the normal agent schema never sees:
|
||||
|
||||
| Tool | Purpose |
|
||||
|---|---|
|
||||
| `kanban_show` | Read the current task (title, body, prior attempts, parent handoffs, comments, full `worker_context`). Defaults to the env's task id. |
|
||||
| `kanban_complete` | Finish with `summary` + `metadata` structured handoff. |
|
||||
| `kanban_block` | Escalate for human input. |
|
||||
| `kanban_heartbeat` | Signal liveness during long operations. |
|
||||
| `kanban_comment` | Append to the task thread. |
|
||||
| `kanban_create` | (Orchestrators) fan out into child tasks. |
|
||||
| `kanban_link` | (Orchestrators) add dependency edges after the fact. |
|
||||
|
||||
**Why tools and not just shelling to `hermes kanban`?** Three reasons:
|
||||
|
||||
1. **Backend portability.** Workers whose terminal tool points at a remote backend (Docker / Modal / Singularity / SSH) would run `hermes kanban complete` inside the container where `hermes` isn't installed and the DB isn't mounted. The kanban tools run in the agent's own Python process and always reach `~/.hermes/kanban.db` regardless of terminal backend.
|
||||
2. **No shell-quoting fragility.** Passing `--metadata '{"files": [...]}'` through shlex + argparse is a latent footgun. Structured tool args skip it.
|
||||
3. **Better errors.** Tool results are structured JSON the model can reason about, not stderr strings it has to parse.
|
||||
|
||||
**Zero schema footprint on normal sessions.** A regular `hermes chat` session has zero `kanban_*` tools in its schema. The `check_fn` on each tool only returns True when `HERMES_KANBAN_TASK` is set, which only happens when the dispatcher spawned this process. No tool bloat for users who never touch kanban.
|
||||
|
||||
The `kanban-worker` and `kanban-orchestrator` skills teach the model which tool to call when and in what order.
|
||||
|
||||
### The worker skill
|
||||
|
||||
Any profile that should be able to work kanban tasks must load the `kanban-worker` skill. It teaches the worker the full lifecycle:
|
||||
|
||||
1. On spawn, call `kanban_show()` to read title + body + parent handoffs + prior attempts + full comment thread.
|
||||
2. `cd $HERMES_KANBAN_WORKSPACE` and do the work there.
|
||||
3. Call `kanban_heartbeat(note="...")` every few minutes during long operations.
|
||||
4. Complete with `kanban_complete(summary="...", metadata={...})`, or `kanban_block(reason="...")` if stuck.
|
||||
|
||||
Load it with:
|
||||
|
||||
```bash
|
||||
hermes skills install devops/kanban-worker
|
||||
```
|
||||
|
||||
The dispatcher also auto-passes `--skills kanban-worker` when spawning every worker, so the worker always has the pattern library available even if a profile's default skills config doesn't include it.
|
||||
|
||||
### Pinning extra skills to a specific task
|
||||
|
||||
Sometimes a single task needs specialist context the assignee profile doesn't carry by default — a translation job that needs the `translation` skill, a review task that needs `github-code-review`, a security audit that needs `security-pr-audit`. Rather than editing the assignee's profile every time, attach the skills directly to the task:
|
||||
|
||||
```bash
|
||||
# CLI — repeat --skill for each extra skill
|
||||
hermes kanban create "translate README to Japanese" \
|
||||
--assignee linguist \
|
||||
--skill translation
|
||||
|
||||
# Multiple skills
|
||||
hermes kanban create "audit auth flow" \
|
||||
--assignee reviewer \
|
||||
--skill security-pr-audit \
|
||||
--skill github-code-review
|
||||
```
|
||||
|
||||
From the dashboard's inline create form, type the skills comma-separated into the **skills** field. From another agent (orchestrator pattern), use `kanban_create(skills=[...])`:
|
||||
|
||||
```
|
||||
kanban_create(
|
||||
title="translate README to Japanese",
|
||||
assignee="linguist",
|
||||
skills=["translation"],
|
||||
)
|
||||
```
|
||||
|
||||
These skills are **additive** to the built-in `kanban-worker` — the dispatcher emits one `--skills <name>` flag for each (and for the built-in), so the worker spawns with all of them loaded. The skill names must match skills that are actually installed on the assignee's profile (run `hermes skills list` to see what's available); there's no runtime install.
|
||||
|
||||
### The orchestrator skill
|
||||
|
||||
A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to a specialist, and steps back. The `kanban-orchestrator` skill encodes this: anti-temptation rules, a standard specialist roster (`researcher`, `writer`, `analyst`, `backend-eng`, `reviewer`, `ops`), and a decomposition playbook.
|
||||
|
||||
Load it into your orchestrator profile:
|
||||
|
||||
```bash
|
||||
hermes skills install devops/kanban-orchestrator
|
||||
```
|
||||
|
||||
For best results, pair it with a profile whose toolsets are restricted to board operations (`kanban`, `gateway`, `memory`) so the orchestrator literally cannot execute implementation tasks even if it tries.
|
||||
|
||||
## Dashboard (GUI)
|
||||
|
||||
The `/kanban` CLI and slash command are enough to run the board headlessly, but a visual board is often the right interface for humans-in-the-loop: triage, cross-profile supervision, reading comment threads, and dragging cards between columns. Hermes ships this as a **bundled dashboard plugin** at `plugins/kanban/` — not a core feature, not a separate service — following the model laid out in [Extending the Dashboard](./extending-the-dashboard).
|
||||
|
||||
Open it with:
|
||||
|
||||
```bash
|
||||
hermes kanban init # one-time: create kanban.db if not already present
|
||||
hermes dashboard # "Kanban" tab appears in the nav, after "Skills"
|
||||
```
|
||||
|
||||
### What the plugin gives you
|
||||
|
||||
- A **Kanban** tab showing one column per status: `triage`, `todo`, `ready`, `running`, `blocked`, `done` (plus `archived` when the toggle is on).
|
||||
- `triage` is the parking column for rough ideas a specifier is expected to flesh out. Tasks created with `hermes kanban create --triage` (or via the Triage column's inline create) land here and the dispatcher leaves them alone until a human or specifier promotes them to `todo` / `ready`.
|
||||
- Cards show the task id, title, priority badge, tenant tag, assigned profile, comment/link counts, a **progress pill** (`N/M` children done when the task has dependents), and "created N ago". A per-card checkbox enables multi-select.
|
||||
- **Per-profile lanes inside Running** — toolbar checkbox toggles sub-grouping of the Running column by assignee.
|
||||
- **Live updates via WebSocket** — the plugin tails the append-only `task_events` table on a short poll interval; the board reflects changes the instant any profile (CLI, gateway, or another dashboard tab) acts. Reloads are debounced so a burst of events triggers a single refetch.
|
||||
- **Drag-drop** cards between columns to change status. The drop sends `PATCH /api/plugins/kanban/tasks/:id` which routes through the same `kanban_db` code the CLI uses — the three surfaces can never drift. Moves into destructive statuses (`done`, `archived`, `blocked`) prompt for confirmation. Touch devices use a pointer-based fallback so the board is usable from a tablet.
|
||||
- **Inline create** — click `+` on any column header to type a title, assignee, priority, and (optionally) a parent task from a dropdown over every existing task. Creating from the Triage column automatically parks the new task in triage.
|
||||
- **Multi-select with bulk actions** — shift/ctrl-click a card or tick its checkbox to add it to the selection. A bulk action bar appears at the top with batch status transitions, archive, and reassign (by profile dropdown, or "(unassign)"). Destructive batches confirm first. Per-id partial failures are reported without aborting the rest.
|
||||
- **Click a card** (without shift/ctrl) to open a side drawer (Escape or click-outside closes) with:
|
||||
- **Editable title** — click the heading to rename.
|
||||
- **Editable assignee / priority** — click the meta row to rewrite.
|
||||
- **Editable description** — markdown-rendered by default (headings, bold, italic, inline code, fenced code, `http(s)` / `mailto:` links, bullet lists), with an "edit" button that swaps in a textarea. Markdown rendering is a tiny, XSS-safe renderer — every substitution runs on HTML-escaped input, only `http(s)` / `mailto:` links pass through, and `target="_blank"` + `rel="noopener noreferrer"` are always set.
|
||||
- **Dependency editor** — chip list of parents and children, each with an `×` to unlink, plus dropdowns over every other task to add a new parent or child. Cycle attempts are rejected server-side with a clear message.
|
||||
- **Status action row** (→ triage / → ready / → running / block / unblock / complete / archive) with confirm prompts for destructive transitions.
|
||||
- Result section (also markdown-rendered), comment thread with Enter-to-submit, the last 20 events.
|
||||
- **Toolbar filters** — free-text search, tenant dropdown (defaults to `dashboard.kanban.default_tenant` from `config.yaml`), assignee dropdown, "show archived" toggle, "lanes by profile" toggle, and a **Nudge dispatcher** button so you don't have to wait for the next 60 s tick.
|
||||
|
||||
Visually the target is the familiar Linear / Fusion layout: dark theme, column headers with counts, coloured status dots, pill chips for priority and tenant. The plugin reads only theme CSS vars (`--color-*`, `--radius`, `--font-mono`, ...), so it reskins automatically with whichever dashboard theme is active.
|
||||
|
||||
### Architecture
|
||||
|
||||
The GUI is strictly a **read-through-the-DB + write-through-kanban_db** layer with no domain logic of its own:
|
||||
|
||||
```
|
||||
┌────────────────────────┐ WebSocket (tails task_events)
|
||||
│ React SPA (plugin) │ ◀──────────────────────────────────┐
|
||||
│ HTML5 drag-and-drop │ │
|
||||
└──────────┬─────────────┘ │
|
||||
│ REST over fetchJSON │
|
||||
▼ │
|
||||
┌────────────────────────┐ writes call kanban_db.* │
|
||||
│ FastAPI router │ directly — same code path │
|
||||
│ plugins/kanban/ │ the CLI /kanban verbs use │
|
||||
│ dashboard/plugin_api.py │
|
||||
└──────────┬─────────────┘ │
|
||||
│ │
|
||||
▼ │
|
||||
┌────────────────────────┐ │
|
||||
│ ~/.hermes/kanban.db │ ───── append task_events ──────────┘
|
||||
│ (WAL, shared) │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
### REST surface
|
||||
|
||||
All routes are mounted under `/api/plugins/kanban/` and protected by the dashboard's ephemeral session token:
|
||||
|
||||
| Method | Path | Purpose |
|
||||
|---|---|---|
|
||||
| `GET` | `/board?tenant=<name>&include_archived=…` | Full board grouped by status column, plus tenants + assignees for filter dropdowns |
|
||||
| `GET` | `/tasks/:id` | Task + comments + events + links |
|
||||
| `POST` | `/tasks` | Create (wraps `kanban_db.create_task`, accepts `triage: bool` and `parents: [id, …]`) |
|
||||
| `PATCH` | `/tasks/:id` | Status / assignee / priority / title / body / result |
|
||||
| `POST` | `/tasks/bulk` | Apply the same patch (status / archive / assignee / priority) to every id in `ids`. Per-id failures reported without aborting siblings |
|
||||
| `POST` | `/tasks/:id/comments` | Append a comment |
|
||||
| `POST` | `/links` | Add a dependency (`parent_id` → `child_id`) |
|
||||
| `DELETE` | `/links?parent_id=…&child_id=…` | Remove a dependency |
|
||||
| `POST` | `/dispatch?max=…&dry_run=…` | Nudge the dispatcher — skip the 60 s wait |
|
||||
| `GET` | `/config` | Read `dashboard.kanban` preferences from `config.yaml` — `default_tenant`, `lane_by_profile`, `include_archived_by_default`, `render_markdown` |
|
||||
| `WS` | `/events?since=<event_id>` | Live stream of `task_events` rows |
|
||||
|
||||
Every handler is a thin wrapper — the plugin is ~700 lines of Python (router + WebSocket tail + bulk batcher + config reader) and adds no new business logic. A tiny `_conn()` helper auto-initializes `kanban.db` on every read and write, so a fresh install works whether the user opened the dashboard first, hit the REST API directly, or ran `hermes kanban init`.
|
||||
|
||||
### Dashboard config
|
||||
|
||||
Any of these keys under `dashboard.kanban` in `~/.hermes/config.yaml` changes the tab's defaults — the plugin reads them at load time via `GET /config`:
|
||||
|
||||
```yaml
|
||||
dashboard:
|
||||
kanban:
|
||||
default_tenant: acme # preselects the tenant filter
|
||||
lane_by_profile: true # default for the "lanes by profile" toggle
|
||||
include_archived_by_default: false
|
||||
render_markdown: true # set false for plain <pre> rendering
|
||||
```
|
||||
|
||||
Each key is optional and falls back to the shown default.
|
||||
|
||||
### Security model
|
||||
|
||||
The dashboard's HTTP auth middleware [explicitly skips `/api/plugins/`](./extending-the-dashboard#backend-api-routes) — plugin routes are unauthenticated by design because the dashboard binds to localhost by default. That means the kanban REST surface is reachable from any process on the host.
|
||||
|
||||
The WebSocket takes one additional step: it requires the dashboard's ephemeral session token as a `?token=…` query parameter (browsers can't set `Authorization` on an upgrade request), matching the pattern used by the in-browser PTY bridge.
|
||||
|
||||
If you run `hermes dashboard --host 0.0.0.0`, every plugin route — kanban included — becomes reachable from the network. **Don't do that on a shared host.** The board contains task bodies, comments, and workspace paths; an attacker reaching these routes gets read access to your entire collaboration surface and can also create / reassign / archive tasks.
|
||||
|
||||
Tasks in `~/.hermes/kanban.db` are profile-agnostic on purpose (that's the coordination primitive). If you open the dashboard with `hermes -p <profile> dashboard`, the board still shows tasks created by any other profile on the host. Same user owns all profiles, but this is worth knowing if multiple personas coexist.
|
||||
|
||||
### Live updates
|
||||
|
||||
`task_events` is an append-only SQLite table with a monotonic `id`. The WebSocket endpoint holds each client's last-seen event id and pushes new rows as they land. When a burst of events arrives, the frontend reloads the (very cheap) board endpoint — simpler and more correct than trying to patch local state from every event kind. WAL mode means the read loop never blocks the dispatcher's `BEGIN IMMEDIATE` claim transactions.
|
||||
|
||||
### Extending it
|
||||
|
||||
The plugin uses the standard Hermes dashboard plugin contract — see [Extending the Dashboard](./extending-the-dashboard) for the full manifest reference, shell slots, page-scoped slots, and the Plugin SDK. Extra columns, custom card chrome, tenant-filtered layouts, or full `tab.override` replacements are all expressible without forking this plugin.
|
||||
|
||||
To disable without removing: add `dashboard.plugins.kanban.enabled: false` to `config.yaml` (or delete `plugins/kanban/dashboard/manifest.json`).
|
||||
|
||||
### Scope boundary
|
||||
|
||||
The GUI is deliberately thin. Everything the plugin does is reachable from the CLI; the plugin just makes it comfortable for humans. Auto-assignment, budgets, governance gates, and org-chart views remain user-space — a router profile, another plugin, or a reuse of `tools/approval.py` — exactly as listed in the out-of-scope section of the design spec.
|
||||
|
||||
## CLI command reference
|
||||
|
||||
```
|
||||
hermes kanban init # create kanban.db + print daemon hint
|
||||
hermes kanban create "<title>" [--body ...] [--assignee <profile>]
|
||||
[--parent <id>]... [--tenant <name>]
|
||||
[--workspace scratch|worktree|dir:<path>]
|
||||
[--priority N] [--triage] [--idempotency-key KEY]
|
||||
[--max-runtime 30m|2h|1d|<seconds>]
|
||||
[--skill <name>]...
|
||||
[--json]
|
||||
hermes kanban list [--mine] [--assignee P] [--status S] [--tenant T] [--archived] [--json]
|
||||
hermes kanban show <id> [--json]
|
||||
hermes kanban assign <id> <profile> # or 'none' to unassign
|
||||
hermes kanban link <parent_id> <child_id>
|
||||
hermes kanban unlink <parent_id> <child_id>
|
||||
hermes kanban claim <id> [--ttl SECONDS]
|
||||
hermes kanban comment <id> "<text>" [--author NAME]
|
||||
|
||||
# Bulk verbs — accept multiple ids:
|
||||
hermes kanban complete <id>... [--result "..."]
|
||||
hermes kanban block <id> "<reason>" [--ids <id>...]
|
||||
hermes kanban unblock <id>...
|
||||
hermes kanban archive <id>...
|
||||
|
||||
hermes kanban tail <id> # follow a single task's event stream
|
||||
hermes kanban watch [--assignee P] [--tenant T] # live stream ALL events to the terminal
|
||||
[--kinds completed,blocked,…] [--interval SECS]
|
||||
hermes kanban heartbeat <id> [--note "..."] # worker liveness signal for long ops
|
||||
hermes kanban runs <id> [--json] # attempt history (one row per run)
|
||||
hermes kanban assignees [--json] # profiles on disk + per-assignee task counts
|
||||
hermes kanban dispatch [--dry-run] [--max N] # one-shot pass
|
||||
[--failure-limit N] [--json]
|
||||
hermes kanban daemon [--interval SECS] [--max N] # long-lived loop
|
||||
[--failure-limit N] [--pidfile PATH] [-v]
|
||||
hermes kanban stats [--json] # per-status + per-assignee counts
|
||||
hermes kanban log <id> [--tail BYTES] # worker log from ~/.hermes/kanban/logs/
|
||||
hermes kanban notify-subscribe <id> # gateway bridge hook (used by /kanban in the gateway)
|
||||
--platform <name> --chat-id <id> [--thread-id <id>] [--user-id <id>]
|
||||
hermes kanban notify-list [<id>] [--json]
|
||||
hermes kanban notify-unsubscribe <id>
|
||||
--platform <name> --chat-id <id> [--thread-id <id>]
|
||||
hermes kanban context <id> # what a worker sees
|
||||
hermes kanban gc [--event-retention-days N] # workspaces + old events + old logs
|
||||
[--log-retention-days N]
|
||||
```
|
||||
|
||||
All commands are also available as a slash command in the gateway (`/kanban list`, `/kanban comment t_abc "need docs"`, etc.). The slash command bypasses the running-agent guard, so you can `/kanban unblock` a stuck worker while the main agent is still chatting.
|
||||
|
||||
## Collaboration patterns
|
||||
|
||||
The board supports these eight patterns without any new primitives:
|
||||
|
||||
| Pattern | Shape | Example |
|
||||
|---|---|---|
|
||||
| **P1 Fan-out** | N siblings, same role | "research 5 angles in parallel" |
|
||||
| **P2 Pipeline** | role chain: scout → editor → writer | daily brief assembly |
|
||||
| **P3 Voting / quorum** | N siblings + 1 aggregator | 3 researchers → 1 reviewer picks |
|
||||
| **P4 Long-running journal** | same profile + shared dir + cron | Obsidian vault |
|
||||
| **P5 Human-in-the-loop** | worker blocks → user comments → unblock | ambiguous decisions |
|
||||
| **P6 `@mention`** | inline routing from prose | `@reviewer look at this` |
|
||||
| **P7 Thread-scoped workspace** | `/kanban here` in a thread | per-project gateway threads |
|
||||
| **P8 Fleet farming** | one profile, N subjects | 50 social accounts |
|
||||
| **P9 Triage specifier** | rough idea → `triage` → specifier expands body → `todo` | "turn this one-liner into a spec' task" |
|
||||
|
||||
For worked examples of each, see `docs/hermes-kanban-v1-spec.pdf`.
|
||||
|
||||
## Multi-tenant usage
|
||||
|
||||
When one specialist fleet serves multiple businesses, tag each task with a tenant:
|
||||
|
||||
```bash
|
||||
hermes kanban create "monthly report" \
|
||||
--assignee researcher \
|
||||
--tenant business-a \
|
||||
--workspace dir:~/tenants/business-a/data/
|
||||
```
|
||||
|
||||
Workers receive `$HERMES_TENANT` and namespace their memory writes by prefix. The board, the dispatcher, and the profile definitions are all shared; only the data is scoped.
|
||||
|
||||
## Gateway notifications
|
||||
|
||||
When you run `/kanban create …` from the gateway (Telegram, Discord, Slack, etc.), the originating chat is automatically subscribed to the new task. The gateway's background notifier polls `task_events` every few seconds and delivers one message per terminal event (`completed`, `blocked`, `gave_up`, `crashed`, `timed_out`) to that chat. Completed tasks also send the first line of the worker's `--result` so you see the outcome without having to `/kanban show`.
|
||||
|
||||
You can manage subscriptions explicitly from the CLI — useful when a script / cron job wants to notify a chat it didn't originate from:
|
||||
|
||||
```bash
|
||||
hermes kanban notify-subscribe t_abcd \
|
||||
--platform telegram --chat-id 12345678 --thread-id 7
|
||||
hermes kanban notify-list
|
||||
hermes kanban notify-unsubscribe t_abcd \
|
||||
--platform telegram --chat-id 12345678 --thread-id 7
|
||||
```
|
||||
|
||||
A subscription removes itself automatically once the task reaches `done` or `archived`; no cleanup needed.
|
||||
|
||||
## Runs — one row per attempt
|
||||
|
||||
A task is a logical unit of work; a **run** is one attempt to execute it. When the dispatcher claims a ready task it creates a row in `task_runs` and points `tasks.current_run_id` at it. When that attempt ends — completed, blocked, crashed, timed out, spawn-failed, reclaimed — the run row closes with an `outcome` and the task's pointer clears. A task that's been attempted three times has three `task_runs` rows.
|
||||
|
||||
Why two tables instead of just mutating the task: you need **full attempt history** for real-world postmortems ("the second reviewer attempt got to approve, the third merged"), and you need a clean place to hang per-attempt metadata — which files changed, which tests ran, which findings a reviewer noted. Those are run facts, not task facts.
|
||||
|
||||
Runs are also where **structured handoff** lives. When a worker completes a task it can pass:
|
||||
|
||||
- `--result "<short log line>"` — goes on the task row as before (for back-compat).
|
||||
- `--summary "<human handoff>"` — goes on the run; downstream children see it in their `build_worker_context`.
|
||||
- `--metadata '{"changed_files": [...], "tests_run": 12}'` — JSON dict on the run; children see it serialized alongside the summary.
|
||||
|
||||
Downstream children read the most recent completed run's summary + metadata for each parent. Retrying workers read the prior attempts on their own task (outcome, summary, error) so they don't repeat a path that already failed.
|
||||
|
||||
```bash
|
||||
# Worker completes with a structured handoff:
|
||||
hermes kanban complete t_abcd \
|
||||
--result "rate limiter shipped" \
|
||||
--summary "implemented token bucket, keys on user_id with IP fallback, all tests pass" \
|
||||
--metadata '{"changed_files": ["limiter.py", "tests/test_limiter.py"], "tests_run": 14}'
|
||||
|
||||
# Review the attempt history on a retried task:
|
||||
hermes kanban runs t_abcd
|
||||
# # OUTCOME PROFILE ELAPSED STARTED
|
||||
# 1 blocked worker 12s 2026-04-27 14:02
|
||||
# → BLOCKED: need decision on rate-limit key
|
||||
# 2 completed worker 8m 2026-04-27 15:18
|
||||
# → implemented token bucket, keys on user_id with IP fallback
|
||||
```
|
||||
|
||||
Runs are exposed on the dashboard (Run History section in the drawer, one coloured row per attempt) and on the REST API (`GET /api/plugins/kanban/tasks/:id` returns a `runs[]` array). `PATCH /api/plugins/kanban/tasks/:id` with `{status: "done", summary, metadata}` forwards both to the kernel, so the dashboard's "mark done" button is CLI-equivalent. `task_events` rows carry the `run_id` they belong to so the UI can group them by attempt, and the `completed` event embeds the first-line summary in its payload (capped at 400 chars) so gateway notifiers can render structured handoffs without a second SQL round-trip.
|
||||
|
||||
**Bulk close caveat.** `hermes kanban complete a b c --summary X` is refused — structured handoff is per-run, so copy-pasting the same summary to N tasks is almost always wrong. Bulk close *without* `--summary` / `--metadata` still works for the common "I finished a pile of admin tasks" case.
|
||||
|
||||
**Reclaimed runs from status changes.** If you drag a running task off `running` in the dashboard (back to `ready`, or straight to `todo`), or archive a task that was still running, the in-flight run closes with `outcome='reclaimed'` rather than being orphaned. The `task_runs` row is always in a terminal state when `tasks.current_run_id` is `NULL`, and vice versa — that invariant holds across CLI, dashboard, dispatcher, and notifier.
|
||||
|
||||
**Synthetic runs for never-claimed completions.** Completing or blocking a task that was never claimed (e.g. a human closes a `ready` task from the dashboard with a summary, or a CLI user runs `hermes kanban complete <ready-task> --summary X`) would otherwise drop the handoff. Instead the kernel inserts a zero-duration run row (`started_at == ended_at`) carrying the summary / metadata / reason so attempt history stays complete. The `completed` / `blocked` event's `run_id` points at that row.
|
||||
|
||||
**Live drawer refresh.** When the dashboard's WebSocket event stream reports new events for the task the user is currently viewing, the drawer reloads itself (via a per-task event counter threaded into its `useEffect` dependency list). Closing and reopening is no longer required to see a run's new row or updated outcome.
|
||||
|
||||
### Forward compatibility
|
||||
|
||||
Two nullable columns on `tasks` are reserved for v2 workflow routing: `workflow_template_id` (which template this task belongs to) and `current_step_key` (which step in that template is active). The v1 kernel ignores them for routing but lets clients write them, so a v2 release can add the routing machinery without another schema migration.
|
||||
|
||||
## Event reference
|
||||
|
||||
Every transition appends a row to `task_events`. Each row carries an optional `run_id` so UIs can group events by attempt. Kinds group into three clusters so filtering is easy (`hermes kanban watch --kinds completed,gave_up,timed_out`):
|
||||
|
||||
**Lifecycle** (what changed about the task as a logical unit):
|
||||
|
||||
| Kind | Payload | When |
|
||||
|---|---|---|
|
||||
| `created` | `{assignee, status, parents, tenant}` | Task inserted. `run_id` is `NULL`. |
|
||||
| `promoted` | — | `todo → ready` because all parents hit `done`. `run_id` is `NULL`. |
|
||||
| `claimed` | `{lock, expires, run_id}` | Dispatcher atomically claimed a `ready` task for spawn. |
|
||||
| `completed` | `{result_len, summary?}` | Worker wrote `--result` / `--summary` and task hit `done`. `summary` is the first-line handoff (400-char cap); full version lives on the run row. If `complete_task` is called on a never-claimed task with handoff fields, a zero-duration run is synthesized so `run_id` still points at something. |
|
||||
| `blocked` | `{reason}` | Worker or human flipped the task to `blocked`. Synthesizes a zero-duration run when called on a never-claimed task with `--reason`. |
|
||||
| `unblocked` | — | `blocked → ready`, either manually or via `/unblock`. `run_id` is `NULL`. |
|
||||
| `archived` | — | Hidden from the default board. If the task was still running, carries the `run_id` of the run that was reclaimed as a side effect. |
|
||||
|
||||
**Edits** (human-driven changes that aren't transitions):
|
||||
|
||||
| Kind | Payload | When |
|
||||
|---|---|---|
|
||||
| `assigned` | `{assignee}` | Assignee changed (including unassignment). |
|
||||
| `edited` | `{fields}` | Title or body updated. |
|
||||
| `reprioritized` | `{priority}` | Priority changed. |
|
||||
| `status` | `{status}` | Dashboard drag-drop wrote a status directly (e.g. `todo → ready`). Carries the `run_id` of the run that was reclaimed when dragging off `running`; otherwise `run_id` is NULL. |
|
||||
|
||||
**Worker telemetry** (about the execution process, not the logical task):
|
||||
|
||||
| Kind | Payload | When |
|
||||
|---|---|---|
|
||||
| `spawned` | `{pid}` | Dispatcher successfully started a worker process. |
|
||||
| `heartbeat` | `{note?}` | Worker called `hermes kanban heartbeat $TASK` to signal liveness during long operations. |
|
||||
| `reclaimed` | `{stale_lock}` | Claim TTL expired without a completion; task goes back to `ready`. |
|
||||
| `crashed` | `{pid, claimer}` | Worker PID no longer alive but TTL hadn't expired yet. |
|
||||
| `timed_out` | `{pid, elapsed_seconds, limit_seconds, sigkill}` | `max_runtime_seconds` exceeded; dispatcher SIGTERM'd (then SIGKILL'd after 5 s grace) and re-queued. |
|
||||
| `spawn_failed` | `{error, failures}` | One spawn attempt failed (missing PATH, workspace unmountable, …). Counter increments; task returns to `ready` for retry. |
|
||||
| `gave_up` | `{failures, error}` | Circuit breaker fired after N consecutive `spawn_failed`. Task auto-blocks with the last error. Default N = 5; override via `--failure-limit`. |
|
||||
|
||||
`hermes kanban tail <id>` shows these for a single task. `hermes kanban watch` streams them board-wide.
|
||||
|
||||
## Out of scope
|
||||
|
||||
Kanban is deliberately single-host. `~/.hermes/kanban.db` is a local SQLite file and the dispatcher spawns workers on the same machine. Running a shared board across two hosts is not supported — there's no coordination primitive for "worker X on host A, worker Y on host B," and the crash-detection path assumes PIDs are host-local. If you need multi-host, run an independent board per host and use `delegate_task` / a message queue to bridge them.
|
||||
|
||||
## Design spec
|
||||
|
||||
The complete design — architecture, concurrency correctness, comparison with other systems, implementation plan, risks, open questions — lives in `docs/hermes-kanban-v1-spec.pdf`. Read that before filing any behavior-change PR.
|
||||
@@ -29,36 +29,13 @@ the steps below.
|
||||
|
||||
## Step 1: Create a Slack App
|
||||
|
||||
The fastest path is to paste a manifest Hermes generates for you. It
|
||||
declares every built-in slash command (`/btw`, `/stop`, `/model`, …),
|
||||
every required OAuth scope, every event subscription, and enables Socket
|
||||
Mode — all at once.
|
||||
|
||||
### Option A: From a Hermes-generated manifest (recommended)
|
||||
|
||||
1. Generate the manifest:
|
||||
```bash
|
||||
hermes slack manifest --write
|
||||
```
|
||||
This writes `~/.hermes/slack-manifest.json` and prints paste-in
|
||||
instructions.
|
||||
2. Go to [https://api.slack.com/apps](https://api.slack.com/apps) →
|
||||
**Create New App** → **From an app manifest**
|
||||
3. Pick your workspace, paste the JSON contents, review, click **Next**
|
||||
→ **Create**
|
||||
4. Skip ahead to **Step 6: Install App to Workspace**. The manifest
|
||||
handled scopes, events, and slash commands for you.
|
||||
|
||||
### Option B: From scratch (manual)
|
||||
|
||||
1. Go to [https://api.slack.com/apps](https://api.slack.com/apps)
|
||||
2. Click **Create New App**
|
||||
3. Choose **From scratch**
|
||||
4. Enter an app name (e.g., "Hermes Agent") and select your workspace
|
||||
5. Click **Create App**
|
||||
|
||||
You'll land on the app's **Basic Information** page. Continue with
|
||||
Steps 2–6 below.
|
||||
You'll land on the app's **Basic Information** page.
|
||||
|
||||
---
|
||||
|
||||
@@ -82,8 +59,7 @@ Navigate to **Features → OAuth & Permissions** in the sidebar. Scroll to **Sco
|
||||
|
||||
:::caution Missing scopes = missing features
|
||||
Without `channels:history` and `groups:history`, the bot **will not receive messages in channels** —
|
||||
it will only work in DMs. Without `files:read`, Hermes can chat but **cannot reliably read user-uploaded attachments**.
|
||||
These are the most commonly missed scopes.
|
||||
it will only work in DMs. These are the most commonly missed scopes.
|
||||
:::
|
||||
|
||||
**Optional scopes:**
|
||||
@@ -227,57 +203,6 @@ The bot will **not** automatically join channels. You must invite it to each cha
|
||||
|
||||
---
|
||||
|
||||
## Slash Commands
|
||||
|
||||
Every Hermes command (`/btw`, `/stop`, `/new`, `/model`, `/help`, ...)
|
||||
is a native Slack slash command — exactly the way they work on Telegram
|
||||
and Discord. Type `/` in Slack and the autocomplete picker lists every
|
||||
Hermes command with its description.
|
||||
|
||||
Under the hood: Hermes ships with a generated Slack app manifest (see
|
||||
Step 1, Option A) that declares every command in
|
||||
[`COMMAND_REGISTRY`](https://github.com/NousResearch/hermes-agent/blob/main/hermes_cli/commands.py)
|
||||
as a slash command. In Socket Mode, Slack routes the command event
|
||||
through the WebSocket regardless of the manifest's `url` field.
|
||||
|
||||
### Refreshing slash commands after updates
|
||||
|
||||
When Hermes adds new commands (e.g. after `hermes update`), regenerate
|
||||
the manifest and update your Slack app:
|
||||
|
||||
```bash
|
||||
hermes slack manifest --write
|
||||
```
|
||||
|
||||
Then in Slack:
|
||||
1. Open [https://api.slack.com/apps](https://api.slack.com/apps) →
|
||||
your Hermes app
|
||||
2. **Features → App Manifest → Edit**
|
||||
3. Paste the new contents of `~/.hermes/slack-manifest.json`
|
||||
4. **Save**. Slack will prompt to reinstall the app if scopes or slash
|
||||
commands changed.
|
||||
|
||||
### Legacy `/hermes <subcommand>` still works
|
||||
|
||||
For backward compatibility with older manifests, you can still type
|
||||
`/hermes btw run the tests` — Hermes routes it the same way as `/btw
|
||||
run the tests`. Free-form questions also work: `/hermes what's the
|
||||
weather?` is treated as a regular message.
|
||||
|
||||
### Advanced: emit only the slash-commands array
|
||||
|
||||
If you maintain your Slack manifest by hand and just want the slash
|
||||
command list:
|
||||
|
||||
```bash
|
||||
hermes slack manifest --slashes-only > /tmp/slashes.json
|
||||
```
|
||||
|
||||
Paste that array into the `features.slash_commands` key of your
|
||||
existing manifest.
|
||||
|
||||
---
|
||||
|
||||
## How the Bot Responds
|
||||
|
||||
Understanding how Hermes behaves in different contexts:
|
||||
@@ -521,8 +446,7 @@ Keys are Slack channel IDs (find them via channel details → "About" → scroll
|
||||
| "Sending messages to this app has been turned off" in DMs | Enable the **Messages Tab** in App Home settings (see Step 5) |
|
||||
| "not_authed" or "invalid_auth" errors | Regenerate your Bot Token and App Token, update `.env` |
|
||||
| Bot responds but can't post in a channel | Invite the bot to the channel with `/invite @Hermes Agent` |
|
||||
| Bot can chat but can't read uploaded images/files | Add `files:read`, then **reinstall** the app. Hermes now surfaces attachment access diagnostics in-chat when Slack returns scope/auth/permission failures. |
|
||||
| `missing_scope` error | Add the required scope in OAuth & Permissions, then **reinstall** the app |
|
||||
| "missing_scope" error | Add the required scope in OAuth & Permissions, then **reinstall** the app |
|
||||
| Socket disconnects frequently | Check your network; Bolt auto-reconnects but unstable connections cause lag |
|
||||
| Changed scopes/events but nothing changed | You **must reinstall** the app to your workspace after any scope or event subscription change |
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ CATEGORY_LABELS = {
|
||||
"dogfood": "Dogfood",
|
||||
"domain": "Domain",
|
||||
"email": "Email",
|
||||
"feeds": "Feeds",
|
||||
"gaming": "Gaming",
|
||||
"gifs": "GIFs",
|
||||
"github": "GitHub",
|
||||
|
||||
@@ -60,6 +60,8 @@ const sidebars: SidebarsConfig = {
|
||||
items: [
|
||||
'user-guide/features/cron',
|
||||
'user-guide/features/delegation',
|
||||
'user-guide/features/kanban',
|
||||
'user-guide/features/kanban-tutorial',
|
||||
'user-guide/features/code-execution',
|
||||
'user-guide/features/hooks',
|
||||
'user-guide/features/batch-processing',
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-26T19:27:12Z",
|
||||
"updated_at": "2026-04-26T12:34:42Z",
|
||||
"metadata": {
|
||||
"source": "hermes-agent repo",
|
||||
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog"
|
||||
@@ -16,6 +16,14 @@
|
||||
"id": "moonshotai/kimi-k2.6",
|
||||
"description": "recommended"
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v4-pro",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v4-flash",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.7",
|
||||
"description": ""
|
||||
@@ -155,6 +163,12 @@
|
||||
{
|
||||
"id": "moonshotai/kimi-k2.6"
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v4-pro"
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v4-flash"
|
||||
},
|
||||
{
|
||||
"id": "xiaomi/mimo-v2.5-pro"
|
||||
},
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 748 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 764 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 476 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user