Compare commits

..

1 Commits

Author SHA1 Message Date
Brooklyn Nicholson b9393296fc Fix TUI input field ANSI leaks and text selection issues
1. Fix ANSI escape code leakage during scroll operations:
   - Add ensureSafeAnsi utility to ensure all ANSI sequences are properly terminated
   - Modify renderWithCursor and renderWithSelection to always include reset codes
   - Add final safety check in text rendering to catch any potential leaks

2. Improve text selection in input field:
   - Add proper mouse drag event handling for text selection
   - Enhance click handlers to support selection operations
   - Fix edge cases in selection rendering
   - Ensure proper reset of ANSI codes after selections

Fixes reported issues where users couldn't copy/paste text in input field and
experienced ANSI leakage during scrolling operations.
2026-04-26 23:31:49 -05:00
330 changed files with 1966 additions and 25856 deletions
-1
View File
@@ -69,4 +69,3 @@ mini-swe-agent/
.nix-stamps/
result
website/static/api/skills-index.json
models-dev-upstream/
+2 -6
View File
@@ -30,22 +30,18 @@ WORKDIR /opt/hermes
# unless the lockfiles themselves change.
COPY package.json package-lock.json ./
COPY web/package.json web/package-lock.json web/
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/
RUN npm install --prefer-offline --no-audit && \
npx playwright install --with-deps chromium --only-shell && \
(cd web && npm install --prefer-offline --no-audit) && \
(cd ui-tui && npm install --prefer-offline --no-audit) && \
npm cache clean --force
# ---------- Source code ----------
# .dockerignore excludes node_modules, so the installs above survive.
COPY --chown=hermes:hermes . .
# Build browser dashboard and terminal UI assets.
RUN cd web && npm run build && \
cd ../ui-tui && npm run build
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
RUN cd web && npm run build
# ---------- Permissions ----------
# Make install dir world-readable so any HERMES_UID can read it at runtime.
-632
View File
@@ -1,632 +0,0 @@
"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
backend. Each request starts a short-lived ACP session, sends the formatted
conversation as a single prompt, collects text chunks, and converts the result
back into the minimal shape Hermes expects from an OpenAI client.
"""
from __future__ import annotations
import json
import os
import queue
import re
import shlex
import subprocess
import threading
import time
from collections import deque
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
_TOOL_CALL_JSON_RE = re.compile(
r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}",
re.DOTALL,
)
def _resolve_command() -> str:
return (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
or os.getenv("COPILOT_CLI_PATH", "").strip()
or "copilot"
)
def _resolve_args() -> list[str]:
raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
if not raw:
return ["--acp", "--stdio"]
return shlex.split(raw)
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"error": {
"code": code,
"message": message,
},
}
def _permission_denied(message_id: Any) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "cancelled",
}
},
}
def _format_messages_as_prompt(
messages: list[dict[str, Any]],
model: str | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
) -> str:
sections: list[str] = [
"You are being used as the active ACP agent backend for Hermes.",
"Use ACP capabilities to complete tasks.",
"IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
"If no tool is needed, answer normally.",
]
if model:
sections.append(f"Hermes requested model hint: {model}")
if isinstance(tools, list) and tools:
tool_specs: list[dict[str, Any]] = []
for t in tools:
if not isinstance(t, dict):
continue
fn = t.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
continue
tool_specs.append(
{
"name": name.strip(),
"description": fn.get("description", ""),
"parameters": fn.get("parameters", {}),
}
)
if tool_specs:
sections.append(
"Available tools (OpenAI function schema). "
"When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
"containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
+ json.dumps(tool_specs, ensure_ascii=False)
)
if tool_choice is not None:
sections.append(
f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}"
)
transcript: list[str] = []
for message in messages:
if not isinstance(message, dict):
continue
role = str(message.get("role") or "unknown").strip().lower()
if role == "tool":
role = "tool"
elif role not in {"system", "user", "assistant"}:
role = "context"
content = message.get("content")
rendered = _render_message_content(content)
if not rendered:
continue
label = {
"system": "System",
"user": "User",
"assistant": "Assistant",
"tool": "Tool",
"context": "Context",
}.get(role, role.title())
transcript.append(f"{label}:\n{rendered}")
if transcript:
sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
sections.append("Continue the conversation from the latest user request.")
return "\n\n".join(
section.strip() for section in sections if section and section.strip()
)
def _render_message_content(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content.strip()
if isinstance(content, dict):
if "text" in content:
return str(content.get("text") or "").strip()
if "content" in content and isinstance(content.get("content"), str):
return str(content.get("content") or "").strip()
return json.dumps(content, ensure_ascii=True)
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str) and text.strip():
parts.append(text.strip())
return "\n".join(parts).strip()
return str(content).strip()
def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
if not isinstance(text, str) or not text.strip():
return [], ""
extracted: list[SimpleNamespace] = []
consumed_spans: list[tuple[int, int]] = []
def _try_add_tool_call(raw_json: str) -> None:
try:
obj = json.loads(raw_json)
except Exception:
return
if not isinstance(obj, dict):
return
fn = obj.get("function")
if not isinstance(fn, dict):
return
fn_name = fn.get("name")
if not isinstance(fn_name, str) or not fn_name.strip():
return
fn_args = fn.get("arguments", "{}")
if not isinstance(fn_args, str):
fn_args = json.dumps(fn_args, ensure_ascii=False)
call_id = obj.get("id")
if not isinstance(call_id, str) or not call_id.strip():
call_id = f"acp_call_{len(extracted) + 1}"
extracted.append(
SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=None,
type="function",
function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
)
)
for m in _TOOL_CALL_BLOCK_RE.finditer(text):
raw = m.group(1)
_try_add_tool_call(raw)
consumed_spans.append((m.start(), m.end()))
# Only try bare-JSON fallback when no XML blocks were found.
if not extracted:
for m in _TOOL_CALL_JSON_RE.finditer(text):
raw = m.group(0)
_try_add_tool_call(raw)
consumed_spans.append((m.start(), m.end()))
if not consumed_spans:
return extracted, text.strip()
consumed_spans.sort()
merged: list[tuple[int, int]] = []
for start, end in consumed_spans:
if not merged or start > merged[-1][1]:
merged.append((start, end))
else:
merged[-1] = (merged[-1][0], max(merged[-1][1], end))
parts: list[str] = []
cursor = 0
for start, end in merged:
if cursor < start:
parts.append(text[cursor:start])
cursor = max(cursor, end)
if cursor < len(text):
parts.append(text[cursor:])
cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
return extracted, cleaned
def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
candidate = Path(path_text)
if not candidate.is_absolute():
raise PermissionError("ACP file-system paths must be absolute.")
resolved = candidate.resolve()
root = Path(cwd).resolve()
try:
resolved.relative_to(root)
except ValueError as exc:
raise PermissionError(
f"Path '{resolved}' is outside the session cwd '{root}'."
) from exc
return resolved
class _ACPChatCompletions:
def __init__(self, client: CopilotACPClient):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _ACPChatNamespace:
def __init__(self, client: CopilotACPClient):
self.completions = _ACPChatCompletions(client)
class CopilotACPClient:
"""Minimal OpenAI-client-compatible facade for Copilot ACP."""
def __init__(
self,
*,
api_key: str | None = None,
base_url: str | None = None,
default_headers: dict[str, str] | None = None,
acp_command: str | None = None,
acp_args: list[str] | None = None,
acp_cwd: str | None = None,
command: str | None = None,
args: list[str] | None = None,
**_: Any,
):
self.api_key = api_key or "copilot-acp"
self.base_url = base_url or ACP_MARKER_BASE_URL
self._default_headers = dict(default_headers or {})
self._acp_command = acp_command or command or _resolve_command()
self._acp_args = list(acp_args or args or _resolve_args())
self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
self.chat = _ACPChatNamespace(self)
self.is_closed = False
self._active_process: subprocess.Popen[str] | None = None
self._active_process_lock = threading.Lock()
def close(self) -> None:
proc: subprocess.Popen[str] | None
with self._active_process_lock:
proc = self._active_process
self._active_process = None
self.is_closed = True
if proc is None:
return
try:
proc.terminate()
proc.wait(timeout=2)
except Exception:
try:
proc.kill()
except Exception:
pass
def _create_chat_completion(
self,
*,
model: str | None = None,
messages: list[dict[str, Any]] | None = None,
timeout: float | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
**_: Any,
) -> Any:
prompt_text = _format_messages_as_prompt(
messages or [],
model=model,
tools=tools,
tool_choice=tool_choice,
)
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
# (used natively by the OpenAI SDK) rather than a plain float.
if timeout is None:
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
elif isinstance(timeout, (int, float)):
_effective_timeout = float(timeout)
else:
# httpx.Timeout or similar — pick the largest component so the
# subprocess has enough wall-clock time for the full response.
_candidates = [
getattr(timeout, attr, None)
for attr in ("read", "write", "connect", "pool", "timeout")
]
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
response_text, reasoning_text = self._run_prompt(
prompt_text,
timeout_seconds=_effective_timeout,
)
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
usage = SimpleNamespace(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
assistant_message = SimpleNamespace(
content=cleaned_text,
tool_calls=tool_calls,
reasoning=reasoning_text or None,
reasoning_content=reasoning_text or None,
reasoning_details=None,
)
finish_reason = "tool_calls" if tool_calls else "stop"
choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
return SimpleNamespace(
choices=[choice],
usage=usage,
model=model or "copilot-acp",
)
def _run_prompt(
self, prompt_text: str, *, timeout_seconds: float
) -> tuple[str, str]:
try:
proc = subprocess.Popen(
[self._acp_command] + self._acp_args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1,
cwd=self._acp_cwd,
)
except FileNotFoundError as exc:
raise RuntimeError(
f"Could not start Copilot ACP command '{self._acp_command}'. "
"Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
) from exc
if proc.stdin is None or proc.stdout is None:
proc.kill()
raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
self.is_closed = False
with self._active_process_lock:
self._active_process = proc
inbox: queue.Queue[dict[str, Any]] = queue.Queue()
stderr_tail: deque[str] = deque(maxlen=40)
def _stdout_reader() -> None:
if proc.stdout is None:
return
for line in proc.stdout:
try:
inbox.put(json.loads(line))
except Exception:
inbox.put({"raw": line.rstrip("\n")})
def _stderr_reader() -> None:
if proc.stderr is None:
return
for line in proc.stderr:
stderr_tail.append(line.rstrip("\n"))
out_thread = threading.Thread(target=_stdout_reader, daemon=True)
err_thread = threading.Thread(target=_stderr_reader, daemon=True)
out_thread.start()
err_thread.start()
next_id = 0
def _request(
method: str,
params: dict[str, Any],
*,
text_parts: list[str] | None = None,
reasoning_parts: list[str] | None = None,
) -> Any:
nonlocal next_id
next_id += 1
request_id = next_id
payload = {
"jsonrpc": "2.0",
"id": request_id,
"method": method,
"params": params,
}
assert proc.stdin is not None # always set: Popen(stdin=PIPE)
proc.stdin.write(json.dumps(payload) + "\n")
proc.stdin.flush()
deadline = time.time() + timeout_seconds
while time.time() < deadline:
if proc.poll() is not None:
break
try:
msg = inbox.get(timeout=0.1)
except queue.Empty:
continue
if self._handle_server_message(
msg,
process=proc,
cwd=self._acp_cwd,
text_parts=text_parts,
reasoning_parts=reasoning_parts,
):
continue
if msg.get("id") != request_id:
continue
if "error" in msg:
err = msg.get("error") or {}
raise RuntimeError(
f"Copilot ACP {method} failed: {err.get('message') or err}"
)
return msg.get("result")
stderr_text = "\n".join(stderr_tail).strip()
if proc.poll() is not None and stderr_text:
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
raise TimeoutError(
f"Timed out waiting for Copilot ACP response to {method}."
)
try:
_request(
"initialize",
{
"protocolVersion": 1,
"clientCapabilities": {
"fs": {
"readTextFile": True,
"writeTextFile": True,
}
},
"clientInfo": {
"name": "hermes-agent",
"title": "Hermes Agent",
"version": "0.0.0",
},
},
)
session = (
_request(
"session/new",
{
"cwd": self._acp_cwd,
"mcpServers": [],
},
)
or {}
)
session_id = str(session.get("sessionId") or "").strip()
if not session_id:
raise RuntimeError("Copilot ACP did not return a sessionId.")
text_parts: list[str] = []
reasoning_parts: list[str] = []
_request(
"session/prompt",
{
"sessionId": session_id,
"prompt": [
{
"type": "text",
"text": prompt_text,
}
],
},
text_parts=text_parts,
reasoning_parts=reasoning_parts,
)
return "".join(text_parts), "".join(reasoning_parts)
finally:
self.close()
def _handle_server_message(
self,
msg: dict[str, Any],
*,
process: subprocess.Popen[str],
cwd: str,
text_parts: list[str] | None,
reasoning_parts: list[str] | None,
) -> bool:
method = msg.get("method")
if not isinstance(method, str):
return False
if method == "session/update":
params = msg.get("params") or {}
update = params.get("update") or {}
kind = str(update.get("sessionUpdate") or "").strip()
content = update.get("content") or {}
chunk_text = ""
if isinstance(content, dict):
chunk_text = str(content.get("text") or "")
if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
text_parts.append(chunk_text)
elif (
kind == "agent_thought_chunk"
and chunk_text
and reasoning_parts is not None
):
reasoning_parts.append(chunk_text)
return True
if process.stdin is None:
return True
message_id = msg.get("id")
params = msg.get("params") or {}
if method == "session/request_permission":
response = _permission_denied(message_id)
elif method == "fs/read_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
block_error = get_read_block_error(str(path))
if block_error:
raise PermissionError(block_error)
content = path.read_text() if path.exists() else ""
line = params.get("line")
limit = params.get("limit")
if isinstance(line, int) and line > 1:
lines = content.splitlines(keepends=True)
start = line - 1
end = (
start + limit if isinstance(limit, int) and limit > 0 else None
)
content = "".join(lines[start:end])
if content:
content = redact_sensitive_text(content)
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"content": content,
},
}
except Exception as exc:
response = _jsonrpc_error(message_id, -32602, str(exc))
elif method == "fs/write_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
if is_write_denied(str(path)):
raise PermissionError(
f"Write denied: '{path}' is a protected system/credential file."
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(params.get("content") or ""))
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": None,
}
except Exception as exc:
response = _jsonrpc_error(message_id, -32602, str(exc))
else:
response = _jsonrpc_error(
message_id,
-32601,
f"ACP client method '{method}' is not supported by Hermes yet.",
)
process.stdin.write(json.dumps(response) + "\n")
process.stdin.flush()
return True
+59 -127
View File
@@ -82,8 +82,6 @@ _PROVIDER_ALIASES = {
"moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn",
"moonshot-cn": "kimi-coding-cn",
"gmi-cloud": "gmi",
"gmicloud": "gmi",
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
"claude": "anthropic",
@@ -151,31 +149,22 @@ def _fixed_temperature_for_model(
return None
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
def _get_aux_model_for_provider(provider_id: str) -> str:
"""Return the cheap auxiliary model for a provider.
Reads from ProviderProfile.default_aux_model first, falling back to the
legacy hardcoded dict for providers that predate the profiles system.
"""
try:
from providers import get_provider_profile
_p = get_provider_profile(provider_id)
if _p and _p.default_aux_model:
return _p.default_aux_model
except Exception:
pass
return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
# Fallback for providers not yet migrated to ProviderProfile.default_aux_model.
# New providers should set default_aux_model on their profile instead.
_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
"zai": "glm-4.5-flash",
"kimi-coding": "kimi-k2-turbo-preview",
"stepfun": "step-3.5-flash",
"kimi-coding-cn": "kimi-k2-turbo-preview",
"minimax": "MiniMax-M2.7",
"minimax-cn": "MiniMax-M2.7",
"anthropic": "claude-haiku-4-5-20251001",
"ai-gateway": "google/gemini-3-flash",
"opencode-zen": "gemini-3-flash",
"opencode-go": "glm-5",
"kilocode": "google/gemini-3-flash-preview",
"ollama-cloud": "nemotron-3-nano:30b",
}
# Legacy alias — callers that haven't been updated yet can still use this.
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
# Vision-specific model overrides for direct providers.
# When the user's main provider has a dedicated vision/multimodal model that
# differs from their main chat model, map it here. The vision auto-detect
@@ -876,7 +865,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
base_url = _to_openai_base_url(
_pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
)
model = _get_aux_model_for_provider(provider_id) or None
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
if model is None:
continue # skip provider if we don't know a valid aux model
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
@@ -885,22 +874,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if is_native_gemini_base_url(base_url):
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
from hermes_cli.models import copilot_default_headers
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
else:
try:
from providers import get_provider_profile as _gpf_aux
_ph_aux = _gpf_aux(provider_id)
if _ph_aux and _ph_aux.default_headers:
extra["default_headers"] = dict(_ph_aux.default_headers)
except Exception:
pass
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
extra["default_headers"] = copilot_default_headers()
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
creds = resolve_api_key_provider_credentials(provider_id)
api_key = str(creds.get("api_key", "")).strip()
@@ -910,7 +891,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
base_url = _to_openai_base_url(
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
)
model = _get_aux_model_for_provider(provider_id) or None
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
if model is None:
continue # skip provider if we don't know a valid aux model
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
@@ -926,14 +907,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
else:
try:
from providers import get_provider_profile as _gpf_aux2
_ph_aux2 = _gpf_aux2(provider_id)
if _ph_aux2 and _ph_aux2.default_headers:
extra["default_headers"] = dict(_ph_aux2.default_headers)
except Exception:
pass
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
return None, None
@@ -1282,7 +1255,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
from agent.anthropic_adapter import _is_oauth_token
is_oauth = _is_oauth_token(token)
model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
try:
real_client = build_anthropic_client(token, base_url)
@@ -1644,14 +1617,8 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
# below — never look up auth env vars ad-hoc.
def _to_async_client(sync_client, model: str, is_vision: bool = False):
"""Convert a sync client to its async counterpart, preserving Codex routing.
When ``is_vision=True`` and the underlying base URL is Copilot, the
resulting async client carries the ``Copilot-Vision-Request: true``
header so the request is routed to Copilot's vision-capable
infrastructure (otherwise vision payloads silently time out).
"""
def _to_async_client(sync_client, model: str):
"""Convert a sync client to its async counterpart, preserving Codex routing."""
from openai import AsyncOpenAI
if isinstance(sync_client, CodexAuxiliaryClient):
@@ -1666,7 +1633,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
except ImportError:
pass
try:
from acp_adapter.copilot_client import CopilotACPClient
from agent.copilot_acp_client import CopilotACPClient
if isinstance(sync_client, CopilotACPClient):
return sync_client, model
except ImportError:
@@ -1680,11 +1647,9 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
if base_url_host_matches(sync_base_url, "openrouter.ai"):
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
from hermes_cli.models import copilot_default_headers
async_kwargs["default_headers"] = copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
)
async_kwargs["default_headers"] = copilot_default_headers()
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
return AsyncOpenAI(**async_kwargs), model
@@ -1711,7 +1676,6 @@ def resolve_provider_client(
explicit_api_key: str = None,
api_mode: str = None,
main_runtime: Optional[Dict[str, Any]] = None,
is_vision: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format.
@@ -1795,7 +1759,7 @@ def resolve_provider_client(
"auxiliary provider (using %r instead)", model, resolved)
model = None
final_model = model or resolved
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenRouter ───────────────────────────────────────────────────
@@ -1808,7 +1772,7 @@ def resolve_provider_client(
)
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Nous Portal (OAuth) ──────────────────────────────────────────
@@ -1825,7 +1789,7 @@ def resolve_provider_client(
"but Nous Portal not configured (run: hermes auth)")
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
@@ -1852,7 +1816,7 @@ def resolve_provider_client(
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
@@ -1881,13 +1845,11 @@ def resolve_provider_client(
if base_url_host_matches(custom_base, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
extra["default_headers"] = copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
)
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
client = _wrap_if_needed(client, final_model, custom_base)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# Try custom first, then codex, then API-key providers
for try_fn in (_try_custom_endpoint, _try_codex,
@@ -1897,7 +1859,7 @@ def resolve_provider_client(
final_model = _normalize_resolved_model(model or default, provider)
_cbase = str(getattr(client, "base_url", "") or "")
client = _wrap_if_needed(client, final_model, _cbase)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning("resolve_provider_client: custom/main requested "
"but no endpoint credentials found")
@@ -1942,7 +1904,7 @@ def resolve_provider_client(
provider,
)
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
sync_anthropic = AnthropicAuxiliaryClient(
real_client, final_model, custom_key, custom_base, is_oauth=False,
@@ -1961,7 +1923,7 @@ def resolve_provider_client(
client = CodexAuxiliaryClient(client, final_model)
else:
client = _wrap_if_needed(client, final_model, custom_base)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning(
"resolve_provider_client: named custom provider %r has no base_url",
@@ -1993,7 +1955,7 @@ def resolve_provider_client(
logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
return None, None
final_model = _normalize_resolved_model(model or default_model, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model))
return (_to_async_client(client, final_model) if async_mode else (client, final_model))
creds = resolve_api_key_provider_credentials(provider)
api_key = str(creds.get("api_key", "")).strip()
@@ -2010,7 +1972,7 @@ def resolve_provider_client(
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
)
default_model = _get_aux_model_for_provider(provider)
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
final_model = _normalize_resolved_model(model or default_model, provider)
if provider == "gemini":
@@ -2019,7 +1981,7 @@ def resolve_provider_client(
if is_native_gemini_base_url(base_url):
client = GeminiNativeClient(api_key=api_key, base_url=base_url)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# Provider-specific headers
@@ -2027,11 +1989,9 @@ def resolve_provider_client(
if base_url_host_matches(base_url, "api.kimi.com"):
headers["User-Agent"] = "claude-code/0.1.0"
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
from hermes_cli.models import copilot_default_headers
headers.update(copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
))
headers.update(copilot_default_headers())
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
@@ -2057,7 +2017,7 @@ def resolve_provider_client(
client = _wrap_if_needed(client, final_model, base_url)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
if pconfig.auth_type == "external_process":
@@ -2080,7 +2040,7 @@ def resolve_provider_client(
"process credentials are incomplete"
)
return None, None
from acp_adapter.copilot_client import CopilotACPClient
from agent.copilot_acp_client import CopilotACPClient
client = CopilotACPClient(
api_key=api_key,
@@ -2089,7 +2049,7 @@ def resolve_provider_client(
args=args,
)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning("resolve_provider_client: external-process provider %s not "
"directly supported", provider)
@@ -2125,7 +2085,7 @@ def resolve_provider_client(
base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
)
logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
@@ -2200,13 +2160,8 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
return _normalize_aux_provider(provider)
def _resolve_strict_vision_backend(
provider: str,
model: Optional[str] = None,
) -> Tuple[Optional[Any], Optional[str]]:
def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
provider = _normalize_vision_provider(provider)
if provider == "copilot":
return resolve_provider_client("copilot", model, is_vision=True)
if provider == "openrouter":
return _try_openrouter()
if provider == "nous":
@@ -2274,7 +2229,7 @@ def resolve_vision_provider_client(
return resolved_provider, None, None
final_model = resolved_model or default_model
if async_mode:
async_client, async_model = _to_async_client(sync_client, final_model, is_vision=True)
async_client, async_model = _to_async_client(sync_client, final_model)
return resolved_provider, async_client, async_model
return resolved_provider, sync_client, final_model
@@ -2306,11 +2261,8 @@ def resolve_vision_provider_client(
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
if main_provider == "nous":
sync_client, default_model = _resolve_strict_vision_backend(
main_provider, vision_model
)
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
@@ -2318,10 +2270,10 @@ def resolve_vision_provider_client(
)
return _finalize(main_provider, sync_client, default_model)
else:
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode,
is_vision=True)
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
@@ -2343,14 +2295,11 @@ def resolve_vision_provider_client(
return None, None, None
if requested in _VISION_AUTO_PROVIDER_ORDER:
sync_client, default_model = _resolve_strict_vision_backend(
requested, resolved_model
)
sync_client, default_model = _resolve_strict_vision_backend(requested)
return _finalize(requested, sync_client, default_model)
client, final_model = _get_cached_client(requested, resolved_model, async_mode,
api_mode=resolved_api_mode,
is_vision=True)
api_mode=resolved_api_mode)
if client is None:
return requested, None, None
return requested, client, final_model
@@ -2414,11 +2363,10 @@ def _client_cache_key(
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
is_vision: bool = False,
) -> tuple:
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision)
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -2444,7 +2392,6 @@ def _refresh_nous_auxiliary_client(
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
is_vision: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
runtime = _resolve_nous_runtime_api(force_refresh=True)
@@ -2462,7 +2409,7 @@ def _refresh_nous_auxiliary_client(
current_loop = _aio.get_event_loop()
except RuntimeError:
pass
client, final_model = _to_async_client(sync_client, final_model or "", is_vision=is_vision)
client, final_model = _to_async_client(sync_client, final_model or "")
else:
client = sync_client
@@ -2473,7 +2420,6 @@ def _refresh_nous_auxiliary_client(
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
is_vision=is_vision,
)
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
return client, final_model
@@ -2585,19 +2531,12 @@ def _is_openrouter_client(client: Any) -> bool:
return False
def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
"""Best-effort check for cached clients that accept ``vendor/model`` IDs."""
if _is_openrouter_client(client):
return True
return bool(cached_default and "/" in cached_default)
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
"""Keep slash-bearing model IDs only for cached clients that support them.
"""Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
"""
if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
if model and "/" in model and not _is_openrouter_client(client):
return cached_default
return model or cached_default
@@ -2610,7 +2549,6 @@ def _get_cached_client(
api_key: str = None,
api_mode: str = None,
main_runtime: Optional[Dict[str, Any]] = None,
is_vision: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Get or create a cached client for the given provider.
@@ -2647,7 +2585,6 @@ def _get_cached_client(
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
is_vision=is_vision,
)
with _client_cache_lock:
if cache_key in _client_cache:
@@ -2679,7 +2616,6 @@ def _get_cached_client(
explicit_api_key=api_key,
api_mode=api_mode,
main_runtime=runtime,
is_vision=is_vision,
)
if client is not None:
# For async clients, remember which loop they were created on so we
@@ -3143,7 +3079,6 @@ def call_llm(
api_key=resolved_api_key,
api_mode=resolved_api_mode,
main_runtime=main_runtime,
is_vision=(task == "vision"),
)
if refreshed_client is not None:
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
@@ -3434,7 +3369,6 @@ async def async_call_llm(
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
is_vision=(task == "vision"),
)
if refreshed_client is not None:
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
@@ -3503,9 +3437,7 @@ async def async_call_llm(
extra_body=effective_extra_body,
base_url=str(getattr(fb_client, "base_url", "") or ""))
# Convert sync fallback client to async
async_fb, async_fb_model = _to_async_client(
fb_client, fb_model or "", is_vision=(task == "vision")
)
async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
if async_fb_model and async_fb_model != fb_kwargs.get("model"):
fb_kwargs["model"] = async_fb_model
return _validate_llm_response(
+5 -113
View File
@@ -61,52 +61,9 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
# Chars per token rough estimate
_CHARS_PER_TOKEN = 4
# Flat token cost per attached image part. Real cost varies by provider and
# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
# that keeps compression budgeting honest for multi-image conversations.
# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
_IMAGE_TOKEN_ESTIMATE = 1600
# Same figure expressed in the char-budget currency the rest of the
# compressor speaks in. Used when accumulating message "content length"
# for tail-cut decisions.
_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _content_length_for_budget(raw_content: Any) -> int:
"""Return the effective char-length of a message's content for token budgeting.
Plain strings: ``len(content)``. Multimodal lists: sum of text-part
``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
(``image_url`` / ``input_image`` / Anthropic-style ``image``). This
keeps the compressor from treating a turn with 5 attached images as
near-zero tokens just because the text part is empty.
"""
if isinstance(raw_content, str):
return len(raw_content)
if not isinstance(raw_content, list):
return len(str(raw_content or ""))
total = 0
for p in raw_content:
if isinstance(p, str):
total += len(p)
continue
if not isinstance(p, dict):
total += len(str(p))
continue
ptype = p.get("type")
if ptype in {"image_url", "input_image", "image"}:
total += _IMAGE_CHAR_EQUIVALENT
else:
# text / input_text / tool_result-with-text / anything else with
# a text field. Ignore the raw base64 payload inside image_url
# dicts — dimensions don't matter, only whether it's an image.
total += len(p.get("text", "") or "")
return total
def _content_text_for_contains(content: Any) -> str:
"""Return a best-effort text view of message content.
@@ -338,10 +295,6 @@ class ContextCompressor(ContextEngine):
self._context_probe_persistable = False
self._previous_summary = None
self._last_summary_error = None
self._last_summary_dropped_count = 0
self._last_summary_fallback_used = False
self._last_aux_model_failure_error = None
self._last_aux_model_failure_model = None
self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0
@@ -445,17 +398,6 @@ class ContextCompressor(ContextEngine):
self._ineffective_compression_count: int = 0
self._summary_failure_cooldown_until: float = 0.0
self._last_summary_error: Optional[str] = None
# When summary generation fails and a static fallback is inserted,
# record how many turns were unrecoverably dropped so callers
# (gateway hygiene, /compress) can surface a visible warning.
self._last_summary_dropped_count: int = 0
self._last_summary_fallback_used: bool = False
# When a user-configured summary model fails and we recover by
# retrying on the main model, record the failure so gateway /
# CLI callers can still warn the user even though compression
# succeeded. Silent recovery would hide the broken config.
self._last_aux_model_failure_error: Optional[str] = None
self._last_aux_model_failure_model: Optional[str] = None
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""
@@ -542,7 +484,7 @@ class ContextCompressor(ContextEngine):
for i in range(len(result) - 1, -1, -1):
msg = result[i]
raw_content = msg.get("content") or ""
content_len = _content_length_for_budget(raw_content)
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -915,50 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
"Falling back to main model '%s' for compression.",
self.summary_model, e, self.model,
)
# Record the aux-model failure so callers can warn the user
# even if the retry-on-main succeeds — a misconfigured aux
# model is something the user needs to fix.
_err_text = str(e).strip() or e.__class__.__name__
if len(_err_text) > 220:
_err_text = _err_text[:217].rstrip() + "..."
self._last_aux_model_failure_error = _err_text
self._last_aux_model_failure_model = self.summary_model
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately
# Unknown-error best-effort retry on main model. Losing N turns of
# context is almost always worse than one extra summary attempt, so
# if we haven't already fallen back and the summary model differs
# from the main model, try once more on main before entering
# cooldown. Errors that DID match _is_model_not_found above are
# already handled by the fast-path retry; this branch catches
# everything else (400s, provider-specific "no route" strings,
# aggregator rejections, etc.) where auto-retry is still safer
# than dropping the turns.
if (
self.summary_model
and self.summary_model != self.model
and not getattr(self, "_summary_model_fallen_back", False)
):
self._summary_model_fallen_back = True
logging.warning(
"Summary model '%s' failed (%s). "
"Retrying on main model '%s' before giving up.",
self.summary_model, e, self.model,
)
# Record the aux-model failure (see 404 branch above) — user
# should know their configured model is broken even if main
# recovers the call.
_err_text = str(e).strip() or e.__class__.__name__
if len(_err_text) > 220:
_err_text = _err_text[:217].rstrip() + "..."
self._last_aux_model_failure_error = _err_text
self._last_aux_model_failure_model = self.summary_model
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
# Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
@@ -1180,9 +1082,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
for i in range(n - 1, head_end - 1, -1):
msg = messages[i]
raw_content = msg.get("content") or ""
content_len = _content_length_for_budget(raw_content)
msg_tokens = content_len // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
content = msg.get("content") or ""
msg_tokens = len(content) // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
# Include tool call arguments in estimate
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -1251,13 +1152,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
related to this topic and be more aggressive about compressing
everything else. Inspired by Claude Code's ``/compact``.
"""
# Reset per-call summary failure state — callers inspect these fields
# after compress() returns to decide whether to surface a warning.
self._last_summary_dropped_count = 0
self._last_summary_fallback_used = False
self._last_summary_error = None
self._last_aux_model_failure_error = None
self._last_aux_model_failure_model = None
n_messages = len(messages)
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
_min_for_compress = self.protect_first_n + 3 + 1
@@ -1336,13 +1230,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
if not self.quiet_mode:
logger.warning("Summary generation failed — inserting static fallback context marker")
n_dropped = compress_end - compress_start
self._last_summary_dropped_count = n_dropped
self._last_summary_fallback_used = True
summary = (
f"{SUMMARY_PREFIX}\n"
f"Summary generation was unavailable. {n_dropped} message(s) were "
f"Summary generation was unavailable. {n_dropped} conversation turns were "
f"removed to free context space but could not be summarized. The removed "
f"messages contained earlier work in this session. Continue based on the "
f"turns contained earlier work in this session. Continue based on the "
f"recent messages below and the current state of any files or resources."
)
+643 -5
View File
@@ -1,8 +1,646 @@
"""Backward-compatibility shim.
"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
CopilotACPClient has moved to acp_adapter/copilot_client.py.
This module re-exports it so existing callers continue to work.
This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
backend. Each request starts a short-lived ACP session, sends the formatted
conversation as a single prompt, collects text chunks, and converts the result
back into the minimal shape Hermes expects from an OpenAI client.
"""
from acp_adapter.copilot_client import CopilotACPClient # noqa: F401
__all__ = ["CopilotACPClient"]
from __future__ import annotations
import json
import os
import queue
import re
import shlex
import subprocess
import threading
import time
from collections import deque
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
def _resolve_command() -> str:
return (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
or os.getenv("COPILOT_CLI_PATH", "").strip()
or "copilot"
)
def _resolve_args() -> list[str]:
raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
if not raw:
return ["--acp", "--stdio"]
return shlex.split(raw)
def _resolve_home_dir() -> str:
"""Return a stable HOME for child ACP processes."""
try:
from hermes_constants import get_subprocess_home
profile_home = get_subprocess_home()
if profile_home:
return profile_home
except Exception:
pass
home = os.environ.get("HOME", "").strip()
if home:
return home
expanded = os.path.expanduser("~")
if expanded and expanded != "~":
return expanded
try:
import pwd
resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
if resolved:
return resolved
except Exception:
pass
# Last resort: /tmp (writable on any POSIX system). Avoids crashing the
# subprocess with no HOME; callers can set HERMES_HOME explicitly if they
# need a different writable dir.
return "/tmp"
def _build_subprocess_env() -> dict[str, str]:
env = os.environ.copy()
env["HOME"] = _resolve_home_dir()
return env
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"error": {
"code": code,
"message": message,
},
}
def _permission_denied(message_id: Any) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "cancelled",
}
},
}
def _format_messages_as_prompt(
messages: list[dict[str, Any]],
model: str | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
) -> str:
sections: list[str] = [
"You are being used as the active ACP agent backend for Hermes.",
"Use ACP capabilities to complete tasks.",
"IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
"If no tool is needed, answer normally.",
]
if model:
sections.append(f"Hermes requested model hint: {model}")
if isinstance(tools, list) and tools:
tool_specs: list[dict[str, Any]] = []
for t in tools:
if not isinstance(t, dict):
continue
fn = t.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
continue
tool_specs.append(
{
"name": name.strip(),
"description": fn.get("description", ""),
"parameters": fn.get("parameters", {}),
}
)
if tool_specs:
sections.append(
"Available tools (OpenAI function schema). "
"When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
"containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
+ json.dumps(tool_specs, ensure_ascii=False)
)
if tool_choice is not None:
sections.append(f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}")
transcript: list[str] = []
for message in messages:
if not isinstance(message, dict):
continue
role = str(message.get("role") or "unknown").strip().lower()
if role == "tool":
role = "tool"
elif role not in {"system", "user", "assistant"}:
role = "context"
content = message.get("content")
rendered = _render_message_content(content)
if not rendered:
continue
label = {
"system": "System",
"user": "User",
"assistant": "Assistant",
"tool": "Tool",
"context": "Context",
}.get(role, role.title())
transcript.append(f"{label}:\n{rendered}")
if transcript:
sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
sections.append("Continue the conversation from the latest user request.")
return "\n\n".join(section.strip() for section in sections if section and section.strip())
def _render_message_content(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content.strip()
if isinstance(content, dict):
if "text" in content:
return str(content.get("text") or "").strip()
if "content" in content and isinstance(content.get("content"), str):
return str(content.get("content") or "").strip()
return json.dumps(content, ensure_ascii=True)
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str) and text.strip():
parts.append(text.strip())
return "\n".join(parts).strip()
return str(content).strip()
def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
if not isinstance(text, str) or not text.strip():
return [], ""
extracted: list[SimpleNamespace] = []
consumed_spans: list[tuple[int, int]] = []
def _try_add_tool_call(raw_json: str) -> None:
try:
obj = json.loads(raw_json)
except Exception:
return
if not isinstance(obj, dict):
return
fn = obj.get("function")
if not isinstance(fn, dict):
return
fn_name = fn.get("name")
if not isinstance(fn_name, str) or not fn_name.strip():
return
fn_args = fn.get("arguments", "{}")
if not isinstance(fn_args, str):
fn_args = json.dumps(fn_args, ensure_ascii=False)
call_id = obj.get("id")
if not isinstance(call_id, str) or not call_id.strip():
call_id = f"acp_call_{len(extracted)+1}"
extracted.append(
SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=None,
type="function",
function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
)
)
for m in _TOOL_CALL_BLOCK_RE.finditer(text):
raw = m.group(1)
_try_add_tool_call(raw)
consumed_spans.append((m.start(), m.end()))
# Only try bare-JSON fallback when no XML blocks were found.
if not extracted:
for m in _TOOL_CALL_JSON_RE.finditer(text):
raw = m.group(0)
_try_add_tool_call(raw)
consumed_spans.append((m.start(), m.end()))
if not consumed_spans:
return extracted, text.strip()
consumed_spans.sort()
merged: list[tuple[int, int]] = []
for start, end in consumed_spans:
if not merged or start > merged[-1][1]:
merged.append((start, end))
else:
merged[-1] = (merged[-1][0], max(merged[-1][1], end))
parts: list[str] = []
cursor = 0
for start, end in merged:
if cursor < start:
parts.append(text[cursor:start])
cursor = max(cursor, end)
if cursor < len(text):
parts.append(text[cursor:])
cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
return extracted, cleaned
def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
candidate = Path(path_text)
if not candidate.is_absolute():
raise PermissionError("ACP file-system paths must be absolute.")
resolved = candidate.resolve()
root = Path(cwd).resolve()
try:
resolved.relative_to(root)
except ValueError as exc:
raise PermissionError(f"Path '{resolved}' is outside the session cwd '{root}'.") from exc
return resolved
class _ACPChatCompletions:
def __init__(self, client: "CopilotACPClient"):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _ACPChatNamespace:
def __init__(self, client: "CopilotACPClient"):
self.completions = _ACPChatCompletions(client)
class CopilotACPClient:
"""Minimal OpenAI-client-compatible facade for Copilot ACP."""
def __init__(
self,
*,
api_key: str | None = None,
base_url: str | None = None,
default_headers: dict[str, str] | None = None,
acp_command: str | None = None,
acp_args: list[str] | None = None,
acp_cwd: str | None = None,
command: str | None = None,
args: list[str] | None = None,
**_: Any,
):
self.api_key = api_key or "copilot-acp"
self.base_url = base_url or ACP_MARKER_BASE_URL
self._default_headers = dict(default_headers or {})
self._acp_command = acp_command or command or _resolve_command()
self._acp_args = list(acp_args or args or _resolve_args())
self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
self.chat = _ACPChatNamespace(self)
self.is_closed = False
self._active_process: subprocess.Popen[str] | None = None
self._active_process_lock = threading.Lock()
def close(self) -> None:
proc: subprocess.Popen[str] | None
with self._active_process_lock:
proc = self._active_process
self._active_process = None
self.is_closed = True
if proc is None:
return
try:
proc.terminate()
proc.wait(timeout=2)
except Exception:
try:
proc.kill()
except Exception:
pass
def _create_chat_completion(
self,
*,
model: str | None = None,
messages: list[dict[str, Any]] | None = None,
timeout: float | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
**_: Any,
) -> Any:
prompt_text = _format_messages_as_prompt(
messages or [],
model=model,
tools=tools,
tool_choice=tool_choice,
)
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
# (used natively by the OpenAI SDK) rather than a plain float.
if timeout is None:
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
elif isinstance(timeout, (int, float)):
_effective_timeout = float(timeout)
else:
# httpx.Timeout or similar — pick the largest component so the
# subprocess has enough wall-clock time for the full response.
_candidates = [
getattr(timeout, attr, None)
for attr in ("read", "write", "connect", "pool", "timeout")
]
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
response_text, reasoning_text = self._run_prompt(
prompt_text,
timeout_seconds=_effective_timeout,
)
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
usage = SimpleNamespace(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
assistant_message = SimpleNamespace(
content=cleaned_text,
tool_calls=tool_calls,
reasoning=reasoning_text or None,
reasoning_content=reasoning_text or None,
reasoning_details=None,
)
finish_reason = "tool_calls" if tool_calls else "stop"
choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
return SimpleNamespace(
choices=[choice],
usage=usage,
model=model or "copilot-acp",
)
def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
try:
proc = subprocess.Popen(
[self._acp_command] + self._acp_args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1,
cwd=self._acp_cwd,
env=_build_subprocess_env(),
)
except FileNotFoundError as exc:
raise RuntimeError(
f"Could not start Copilot ACP command '{self._acp_command}'. "
"Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
) from exc
if proc.stdin is None or proc.stdout is None:
proc.kill()
raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
self.is_closed = False
with self._active_process_lock:
self._active_process = proc
inbox: queue.Queue[dict[str, Any]] = queue.Queue()
stderr_tail: deque[str] = deque(maxlen=40)
def _stdout_reader() -> None:
if proc.stdout is None:
return
for line in proc.stdout:
try:
inbox.put(json.loads(line))
except Exception:
inbox.put({"raw": line.rstrip("\n")})
def _stderr_reader() -> None:
if proc.stderr is None:
return
for line in proc.stderr:
stderr_tail.append(line.rstrip("\n"))
out_thread = threading.Thread(target=_stdout_reader, daemon=True)
err_thread = threading.Thread(target=_stderr_reader, daemon=True)
out_thread.start()
err_thread.start()
next_id = 0
def _request(method: str, params: dict[str, Any], *, text_parts: list[str] | None = None, reasoning_parts: list[str] | None = None) -> Any:
nonlocal next_id
next_id += 1
request_id = next_id
payload = {
"jsonrpc": "2.0",
"id": request_id,
"method": method,
"params": params,
}
proc.stdin.write(json.dumps(payload) + "\n")
proc.stdin.flush()
deadline = time.time() + timeout_seconds
while time.time() < deadline:
if proc.poll() is not None:
break
try:
msg = inbox.get(timeout=0.1)
except queue.Empty:
continue
if self._handle_server_message(
msg,
process=proc,
cwd=self._acp_cwd,
text_parts=text_parts,
reasoning_parts=reasoning_parts,
):
continue
if msg.get("id") != request_id:
continue
if "error" in msg:
err = msg.get("error") or {}
raise RuntimeError(
f"Copilot ACP {method} failed: {err.get('message') or err}"
)
return msg.get("result")
stderr_text = "\n".join(stderr_tail).strip()
if proc.poll() is not None and stderr_text:
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
try:
_request(
"initialize",
{
"protocolVersion": 1,
"clientCapabilities": {
"fs": {
"readTextFile": True,
"writeTextFile": True,
}
},
"clientInfo": {
"name": "hermes-agent",
"title": "Hermes Agent",
"version": "0.0.0",
},
},
)
session = _request(
"session/new",
{
"cwd": self._acp_cwd,
"mcpServers": [],
},
) or {}
session_id = str(session.get("sessionId") or "").strip()
if not session_id:
raise RuntimeError("Copilot ACP did not return a sessionId.")
text_parts: list[str] = []
reasoning_parts: list[str] = []
_request(
"session/prompt",
{
"sessionId": session_id,
"prompt": [
{
"type": "text",
"text": prompt_text,
}
],
},
text_parts=text_parts,
reasoning_parts=reasoning_parts,
)
return "".join(text_parts), "".join(reasoning_parts)
finally:
self.close()
def _handle_server_message(
self,
msg: dict[str, Any],
*,
process: subprocess.Popen[str],
cwd: str,
text_parts: list[str] | None,
reasoning_parts: list[str] | None,
) -> bool:
method = msg.get("method")
if not isinstance(method, str):
return False
if method == "session/update":
params = msg.get("params") or {}
update = params.get("update") or {}
kind = str(update.get("sessionUpdate") or "").strip()
content = update.get("content") or {}
chunk_text = ""
if isinstance(content, dict):
chunk_text = str(content.get("text") or "")
if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
text_parts.append(chunk_text)
elif kind == "agent_thought_chunk" and chunk_text and reasoning_parts is not None:
reasoning_parts.append(chunk_text)
return True
if process.stdin is None:
return True
message_id = msg.get("id")
params = msg.get("params") or {}
if method == "session/request_permission":
response = _permission_denied(message_id)
elif method == "fs/read_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
block_error = get_read_block_error(str(path))
if block_error:
raise PermissionError(block_error)
content = path.read_text() if path.exists() else ""
line = params.get("line")
limit = params.get("limit")
if isinstance(line, int) and line > 1:
lines = content.splitlines(keepends=True)
start = line - 1
end = start + limit if isinstance(limit, int) and limit > 0 else None
content = "".join(lines[start:end])
if content:
content = redact_sensitive_text(content)
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"content": content,
},
}
except Exception as exc:
response = _jsonrpc_error(message_id, -32602, str(exc))
elif method == "fs/write_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
if is_write_denied(str(path)):
raise PermissionError(
f"Write denied: '{path}' is a protected system/credential file."
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(params.get("content") or ""))
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": None,
}
except Exception as exc:
response = _jsonrpc_error(message_id, -32602, str(exc))
else:
response = _jsonrpc_error(
message_id,
-32601,
f"ACP client method '{method}' is not supported by Hermes yet.",
)
process.stdin.write(json.dumps(response) + "\n")
process.stdin.flush()
return True
-31
View File
@@ -42,7 +42,6 @@ class FailoverReason(enum.Enum):
# Context / payload
context_overflow = "context_overflow" # Context too large — compress, not failover
payload_too_large = "payload_too_large" # 413 — compress payload
image_too_large = "image_too_large" # Native image part exceeds provider's per-image limit — shrink and retry
# Model
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
@@ -148,20 +147,6 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
"error code: 413",
]
# Image-size patterns. Matched against 400 bodies (not 413) because most
# providers return a 400 with a specific image-too-big message before the
# whole request hits the 413 size limit. Anthropic's wording is the most
# important here (hard 5 MB per image, returned as
# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
_IMAGE_TOO_LARGE_PATTERNS = [
"image exceeds", # Anthropic: "image exceeds 5 MB maximum"
"image too large", # generic
"image_too_large", # error_code variant
"image size exceeds", # variant
# "request_too_large" on a request known to contain an image → image is
# the likely culprit; we still try the shrink path before giving up.
]
# Context overflow patterns
_CONTEXT_OVERFLOW_PATTERNS = [
"context length",
@@ -686,15 +671,6 @@ def _classify_400(
) -> ClassifiedError:
"""Classify 400 Bad Request — context overflow, format error, or generic."""
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
# Must be checked BEFORE context_overflow because messages can trip both
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
return result_fn(
FailoverReason.image_too_large,
retryable=True,
)
# Context overflow from 400
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
return result_fn(
@@ -822,13 +798,6 @@ def _classify_by_message(
should_compress=True,
)
# Image-too-large patterns (from message text when no status_code)
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
return result_fn(
FailoverReason.image_too_large,
retryable=True,
)
# Usage-limit patterns need the same disambiguation as 402: some providers
# surface "usage limit" errors without an HTTP status code. A transient
# signal ("try again", "resets at", …) means it's a periodic quota, not
-236
View File
@@ -1,236 +0,0 @@
"""Routing helpers for inbound user-attached images.
Two modes:
native — attach images as OpenAI-style ``image_url`` content parts on the
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
OpenAI chat.completions) already translate these into their
vendor-specific multimodal formats.
text — run ``vision_analyze`` on each image up-front and prepend the
description to the user's text. The model never sees the pixels;
it only sees a lossy text summary. This is the pre-existing
behaviour and still the right choice for non-vision models.
The decision is made once per message turn by :func:`decide_image_input_mode`.
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
| ``text``, default ``auto``) and the active model's capability metadata.
In ``auto`` mode:
- If the user has explicitly configured ``auxiliary.vision.provider``
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
regardless of the main model — they've opted in to a specific vision
backend for a reason (cost, quality, local-only, etc.).
- Otherwise, if the active model reports ``supports_vision=True`` in its
models.dev metadata, we attach natively.
- Otherwise (non-vision model, no explicit override), we fall back to text.
This keeps ``vision_analyze`` surfaced as a tool in every session — skills
and agent flows that chain it (browser screenshots, deeper inspection of
URL-referenced images, style-gating loops) keep working. The routing only
affects *how user-attached images on the current turn* are presented to the
main model.
"""
from __future__ import annotations
import base64
import logging
import mimetypes
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
_VALID_MODES = frozenset({"auto", "native", "text"})
def _coerce_mode(raw: Any) -> str:
"""Normalize a config value into one of the valid modes."""
if not isinstance(raw, str):
return "auto"
val = raw.strip().lower()
if val in _VALID_MODES:
return val
return "auto"
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
"""True when the user configured a specific auxiliary vision backend.
An explicit override means the user *wants* the text pipeline (they're
paying for a dedicated vision model), so we don't silently bypass it.
"""
if not isinstance(cfg, dict):
return False
aux = cfg.get("auxiliary") or {}
if not isinstance(aux, dict):
return False
vision = aux.get("vision") or {}
if not isinstance(vision, dict):
return False
provider = str(vision.get("provider") or "").strip().lower()
model = str(vision.get("model") or "").strip()
base_url = str(vision.get("base_url") or "").strip()
# "auto" / "" / blank = not explicit
if provider in ("", "auto") and not model and not base_url:
return False
return True
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
"""Return True/False if we can resolve caps, None if unknown."""
if not provider or not model:
return None
try:
from agent.models_dev import get_model_capabilities
caps = get_model_capabilities(provider, model)
except Exception as exc: # pragma: no cover - defensive
logger.debug("image_routing: caps lookup failed for %s:%s%s", provider, model, exc)
return None
if caps is None:
return None
return bool(caps.supports_vision)
def decide_image_input_mode(
provider: str,
model: str,
cfg: Optional[Dict[str, Any]],
) -> str:
"""Return ``"native"`` or ``"text"`` for the given turn.
Args:
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
model: active model slug as it would be sent to the provider.
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
"""
mode_cfg = "auto"
if isinstance(cfg, dict):
agent_cfg = cfg.get("agent") or {}
if isinstance(agent_cfg, dict):
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
if mode_cfg == "native":
return "native"
if mode_cfg == "text":
return "text"
# auto
if _explicit_aux_vision_override(cfg):
return "text"
supports = _lookup_supports_vision(provider, model)
if supports is True:
return "native"
return "text"
# Image size handling is REACTIVE rather than proactive: we attempt native
# attachment at full size regardless of provider, and rely on
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
#
# Why reactive: our knowledge of provider ceilings is partial and evolving
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
# A proactive per-provider table would be stale the moment a provider raises
# or lowers its limit, and silently degrading quality for users on providers
# that would have accepted the full image is the worse failure mode.
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
# it fires, which is cheaper than permanent quality loss.
def _guess_mime(path: Path) -> str:
mime, _ = mimetypes.guess_type(str(path))
if mime and mime.startswith("image/"):
return mime
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
# the suffix looks imagey.
suffix = path.suffix.lower()
return {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
}.get(suffix, "image/jpeg")
def _file_to_data_url(path: Path) -> Optional[str]:
"""Encode a local image as a base64 data URL at its native size.
Size limits are NOT enforced here — the agent retry loop
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
provider's first rejection. Keeping this simple means providers that
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
quality tax just because one other provider is stricter.
Returns None only if the file can't be read (missing, permission
denied, etc.); the caller reports those paths in ``skipped``.
"""
try:
raw = path.read_bytes()
except Exception as exc:
logger.warning("image_routing: failed to read %s%s", path, exc)
return None
mime = _guess_mime(path)
b64 = base64.b64encode(raw).decode("ascii")
return f"data:{mime};base64,{b64}"
def build_native_content_parts(
user_text: str,
image_paths: List[str],
) -> Tuple[List[Dict[str, Any]], List[str]]:
"""Build an OpenAI-style ``content`` list for a user turn.
Shape:
[{"type": "text", "text": "..."},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
...]
Images are attached at their native size. If a provider rejects the
request because an image is too large (e.g. Anthropic's 5 MB per-image
ceiling), the agent's retry loop transparently shrinks and retries
once — see ``run_agent._try_shrink_image_parts_in_messages``.
Returns (content_parts, skipped_paths). Skipped paths are files that
couldn't be read from disk.
"""
parts: List[Dict[str, Any]] = []
skipped: List[str] = []
text = (user_text or "").strip()
if text:
parts.append({"type": "text", "text": text})
for raw_path in image_paths:
p = Path(raw_path)
if not p.exists() or not p.is_file():
skipped.append(str(raw_path))
continue
data_url = _file_to_data_url(p)
if not data_url:
skipped.append(str(raw_path))
continue
parts.append({
"type": "image_url",
"image_url": {"url": data_url},
})
# If the text was empty, add a neutral prompt so the turn isn't just images.
if not text and any(p.get("type") == "image_url" for p in parts):
parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
return parts, skipped
__all__ = [
"decide_image_input_mode",
"build_native_content_parts",
]
+5 -114
View File
@@ -63,124 +63,15 @@ def sanitize_context(text: str) -> str:
return text
class StreamingContextScrubber:
"""Stateful scrubber for streaming text that may contain split memory-context spans.
The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
a ``<memory-context>`` opened in one delta and closed in a later delta
leaks its payload to the UI because the non-greedy block regex needs
both tags in one string. This scrubber runs a small state machine
across deltas, holding back partial-tag tails and discarding
everything inside a span (including the system-note line).
Usage::
scrubber = StreamingContextScrubber()
for delta in stream:
visible = scrubber.feed(delta)
if visible:
emit(visible)
trailing = scrubber.flush() # at end of stream
if trailing:
emit(trailing)
The scrubber is re-entrant per agent instance. Callers building new
top-level responses (new turn) should create a fresh scrubber or call
``reset()``.
"""
_OPEN_TAG = "<memory-context>"
_CLOSE_TAG = "</memory-context>"
def __init__(self) -> None:
self._in_span: bool = False
self._buf: str = ""
def reset(self) -> None:
self._in_span = False
self._buf = ""
def feed(self, text: str) -> str:
"""Return the visible portion of ``text`` after scrubbing.
Any trailing fragment that could be the start of an open/close tag
is held back in the internal buffer and surfaced on the next
``feed()`` call or discarded/emitted by ``flush()``.
"""
if not text:
return ""
buf = self._buf + text
self._buf = ""
out: list[str] = []
while buf:
if self._in_span:
idx = buf.lower().find(self._CLOSE_TAG)
if idx == -1:
# Hold back a potential partial close tag; drop the rest
held = self._max_partial_suffix(buf, self._CLOSE_TAG)
self._buf = buf[-held:] if held else ""
return "".join(out)
# Found close — skip span content + tag, continue
buf = buf[idx + len(self._CLOSE_TAG):]
self._in_span = False
else:
idx = buf.lower().find(self._OPEN_TAG)
if idx == -1:
# No open tag — hold back a potential partial open tag
held = self._max_partial_suffix(buf, self._OPEN_TAG)
if held:
out.append(buf[:-held])
self._buf = buf[-held:]
else:
out.append(buf)
return "".join(out)
# Emit text before the tag, enter span
if idx > 0:
out.append(buf[:idx])
buf = buf[idx + len(self._OPEN_TAG):]
self._in_span = True
return "".join(out)
def flush(self) -> str:
"""Emit any held-back buffer at end-of-stream.
If we're still inside an unterminated span the remaining content is
discarded (safer: leaking partial memory context is worse than a
truncated answer). Otherwise the held-back partial-tag tail is
emitted verbatim (it turned out not to be a real tag).
"""
if self._in_span:
self._buf = ""
self._in_span = False
return ""
tail = self._buf
self._buf = ""
return tail
@staticmethod
def _max_partial_suffix(buf: str, tag: str) -> int:
"""Return the length of the longest buf-suffix that is a tag-prefix.
Case-insensitive. Returns 0 if no suffix could start the tag.
"""
tag_lower = tag.lower()
buf_lower = buf.lower()
max_check = min(len(buf_lower), len(tag_lower) - 1)
for i in range(max_check, 0, -1):
if tag_lower.startswith(buf_lower[-i:]):
return i
return 0
def build_memory_context_block(raw_context: str) -> str:
"""Wrap prefetched memory in a fenced block with system note."""
"""Wrap prefetched memory in a fenced block with system note.
The fence prevents the model from treating recalled context as user
discourse. Injected at API-call time only — never persisted.
"""
if not raw_context or not raw_context.strip():
return ""
clean = sanitize_context(raw_context)
if clean != raw_context:
logger.warning("memory provider returned pre-wrapped context; stripped")
return (
"<memory-context>\n"
"[System note: The following is recalled memory context, "
+16 -46
View File
@@ -51,7 +51,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"qwen-oauth",
"xiaomi",
"arcee",
"gmi",
"custom", "local",
# Common aliases
"google", "google-gemini", "google-ai-studio",
@@ -61,7 +60,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"gmi-cloud", "gmicloud",
"xai", "x-ai", "x.ai", "grok",
"nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
@@ -309,21 +307,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"api.gmi-serving.com": "gmi",
"ollama.com": "ollama-cloud",
}
# Auto-extend with hostnames derived from provider profiles.
# Any provider with a base_url not already in the map gets added automatically.
try:
from providers import list_providers as _list_providers
for _pp in _list_providers():
_host = _pp.get_hostname()
if _host and _host not in _URL_TO_PROVIDER:
_URL_TO_PROVIDER[_host] = _pp.name
except Exception:
pass
def _infer_provider_from_url(base_url: str) -> Optional[str]:
"""Infer the models.dev provider name from a base URL.
@@ -716,29 +702,6 @@ def fetch_endpoint_model_metadata(
return {}
def _resolve_endpoint_context_length(
model: str,
base_url: str,
api_key: str = "",
) -> Optional[int]:
"""Resolve context length from an endpoint's live ``/models`` metadata."""
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
matched = endpoint_metadata.get(model)
if not matched:
if len(endpoint_metadata) == 1:
matched = next(iter(endpoint_metadata.values()))
else:
for key, entry in endpoint_metadata.items():
if model in key or key in model:
matched = entry
break
if matched:
context_length = matched.get("context_length")
if isinstance(context_length, int):
return context_length
return None
def _get_context_cache_path() -> Path:
"""Return path to the persistent context length cache file."""
from hermes_constants import get_hermes_home
@@ -1332,9 +1295,22 @@ def get_model_context_length(
# returns 128k) instead of the model's full context (400k). models.dev
# has the correct per-provider values and is checked at step 5+.
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if context_length is not None:
return context_length
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
matched = endpoint_metadata.get(model)
if not matched:
# Single-model servers: if only one model is loaded, use it
if len(endpoint_metadata) == 1:
matched = next(iter(endpoint_metadata.values()))
else:
# Fuzzy match: substring in either direction
for key, entry in endpoint_metadata.items():
if model in key or key in model:
matched = entry
break
if matched:
context_length = matched.get("context_length")
if isinstance(context_length, int):
return context_length
if not _is_known_provider_base_url(base_url):
# 3. Try querying local server directly
if is_local_endpoint(base_url):
@@ -1398,12 +1374,6 @@ def get_model_context_length(
if base_url:
save_context_length(model, base_url, codex_ctx)
return codex_ctx
if effective_provider == "gmi" and base_url:
# GMI exposes authoritative context_length via /models, but it is not
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if ctx is not None:
return ctx
if effective_provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(effective_provider, model)
-6
View File
@@ -141,12 +141,6 @@ DEFAULT_AGENT_IDENTITY = (
"Be targeted and efficient in your exploration and investigations."
)
HERMES_AGENT_HELP_GUIDANCE = (
"If the user asks about configuring, setting up, or using Hermes Agent "
"itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
"before answering. Docs: https://hermes-agent.nousresearch.com/docs"
)
MEMORY_GUIDANCE = (
"You have persistent memory across sessions. Save durable facts using the memory "
"tool: user preferences, environment details, tool quirks, and stable conventions. "
+4 -33
View File
@@ -6,18 +6,12 @@ adds latency to the user-facing reply.
import logging
import threading
from typing import Callable, Optional
from typing import Optional
from agent.auxiliary_client import call_llm
logger = logging.getLogger(__name__)
# Callback signature: (task_name, exception) -> None. Used to surface
# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
# become visible instead of piling up as NULL session titles.
FailureCallback = Callable[[str, BaseException], None]
_TITLE_PROMPT = (
"Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
"following exchange. The title should capture the main topic or intent. "
@@ -25,21 +19,11 @@ _TITLE_PROMPT = (
)
def generate_title(
user_message: str,
assistant_response: str,
timeout: float = 30.0,
failure_callback: Optional[FailureCallback] = None,
) -> Optional[str]:
def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
"""Generate a session title from the first exchange.
Uses the auxiliary LLM client (cheapest/fastest available model).
Returns the title string or None on failure.
``failure_callback`` is invoked with ``(task, exception)`` when the
auxiliary call raises the caller typically wires this to
``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
of silently accumulating untitled sessions.
"""
# Truncate long messages to keep the request small
user_snippet = user_message[:500] if user_message else ""
@@ -68,15 +52,7 @@ def generate_title(
title = title[:77] + "..."
return title if title else None
except Exception as e:
# Log at WARNING so this shows up in agent.log without debug mode.
# Full detail at debug level for operators who need the stack.
logger.warning("Title generation failed: %s", e)
logger.debug("Title generation traceback", exc_info=True)
if failure_callback is not None:
try:
failure_callback("title generation", e)
except Exception:
logger.debug("Title generation failure_callback raised", exc_info=True)
logger.debug("Title generation failed: %s", e)
return None
@@ -85,7 +61,6 @@ def auto_title_session(
session_id: str,
user_message: str,
assistant_response: str,
failure_callback: Optional[FailureCallback] = None,
) -> None:
"""Generate and set a session title if one doesn't already exist.
@@ -106,9 +81,7 @@ def auto_title_session(
except Exception:
return
title = generate_title(
user_message, assistant_response, failure_callback=failure_callback
)
title = generate_title(user_message, assistant_response)
if not title:
return
@@ -125,7 +98,6 @@ def maybe_auto_title(
user_message: str,
assistant_response: str,
conversation_history: list,
failure_callback: Optional[FailureCallback] = None,
) -> None:
"""Fire-and-forget title generation after the first exchange.
@@ -147,7 +119,6 @@ def maybe_auto_title(
thread = threading.Thread(
target=auto_title_session,
args=(session_db, session_id, user_message, assistant_response),
kwargs={"failure_callback": failure_callback},
daemon=True,
name="auto-title",
)
+1 -13
View File
@@ -6,16 +6,9 @@ Usage:
result = transport.normalize_response(raw_response)
"""
from agent.transports.types import (
NormalizedResponse,
ToolCall,
Usage,
build_tool_call,
map_finish_reason,
) # noqa: F401
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
_REGISTRY: dict = {}
_discovered: bool = False
def register_transport(api_mode: str, transport_cls: type) -> None:
@@ -30,9 +23,6 @@ def get_transport(api_mode: str):
This allows gradual migration call sites can check for None
and fall back to the legacy code path.
"""
global _discovered
if not _discovered:
_discover_transports()
cls = _REGISTRY.get(api_mode)
if cls is None:
# The registry can be partially populated when a specific transport
@@ -48,8 +38,6 @@ def get_transport(api_mode: str):
def _discover_transports() -> None:
"""Import all transport modules to trigger auto-registration."""
global _discovered
_discovered = True
try:
import agent.transports.anthropic # noqa: F401
except ImportError:
+153 -161
View File
@@ -10,7 +10,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
"""
import copy
from typing import Any
from typing import Any, Dict, List, Optional
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
@@ -28,9 +28,7 @@ class ChatCompletionsTransport(ProviderTransport):
def api_mode(self) -> str:
return "chat_completions"
def convert_messages(
self, messages: list[dict[str, Any]], **kwargs
) -> list[dict[str, Any]]:
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
"""Messages are already in OpenAI format — sanitize Codex leaks only.
Strips Codex Responses API fields (``codex_reasoning_items`` /
@@ -47,9 +45,7 @@ class ChatCompletionsTransport(ProviderTransport):
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict) and (
"call_id" in tc or "response_item_id" in tc
):
if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
needs_sanitize = True
break
if needs_sanitize:
@@ -72,52 +68,76 @@ class ChatCompletionsTransport(ProviderTransport):
tc.pop("response_item_id", None)
return sanitized
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Tools are already in OpenAI format — identity."""
return tools
def build_kwargs(
self,
model: str,
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> dict[str, Any]:
) -> Dict[str, Any]:
"""Build chat.completions.create() kwargs.
params (all optional):
This is the most complex transport method it handles ~16 providers
via params rather than subclasses.
params:
timeout: float API call timeout
max_tokens: int | None user-configured max tokens
ephemeral_max_output_tokens: int | None one-shot override
ephemeral_max_output_tokens: int | None one-shot override (error recovery)
max_tokens_param_fn: callable returns {max_tokens: N} or {max_completion_tokens: N}
reasoning_config: dict | None
request_overrides: dict | None
session_id: str | None
qwen_session_metadata: dict | None {sessionId, promptId} precomputed
model_lower: str lowercase model name for pattern matching
# Provider profile path (all per-provider quirks live in providers/)
provider_profile: ProviderProfile | None when present, delegates to
_build_kwargs_from_profile(); all flag params below are bypassed.
# Remaining flags — only used by the legacy fallback for unregistered
# providers (i.e. get_provider_profile() returned None). Known
# providers all go through provider_profile.
qwen_session_metadata: dict | None
# Provider detection flags (all optional, default False)
is_openrouter: bool
is_nous: bool
is_qwen_portal: bool
is_github_models: bool
is_nvidia_nim: bool
is_kimi: bool
is_custom_provider: bool
ollama_num_ctx: int | None
# Provider routing
provider_preferences: dict | None
# Qwen-specific
qwen_prepare_fn: callable | None runs AFTER codex sanitization
qwen_prepare_inplace_fn: callable | None in-place variant for deepcopied lists
# Temperature
fixed_temperature: Any from _fixed_temperature_for_model()
omit_temperature: bool
# Reasoning
supports_reasoning: bool
github_reasoning_extra: dict | None
# Claude on OpenRouter/Nous max output
anthropic_max_output: int | None
extra_body_additions: dict | None
# Extra
extra_body_additions: dict | None pre-built extra_body entries
"""
# Codex sanitization: drop reasoning_items / call_id / response_item_id
sanitized = self.convert_messages(messages)
# ── Provider profile: single-path when present ──────────────────
_profile = params.get("provider_profile")
if _profile:
return self._build_kwargs_from_profile(
_profile, model, sanitized, tools, params
)
# ── Legacy fallback (unregistered / unknown provider) ───────────
# Reached only when get_provider_profile() returned None.
# Known providers always go through the profile path above.
# Qwen portal prep AFTER codex sanitization. If sanitize already
# deepcopied, reuse that copy via the in-place variant to avoid a
# second deepcopy.
is_qwen = params.get("is_qwen_portal", False)
if is_qwen:
qwen_prep = params.get("qwen_prepare_fn")
qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
if sanitized is messages:
if qwen_prep is not None:
sanitized = qwen_prep(sanitized)
else:
# Already deepcopied — transform in place
if qwen_prep_inplace is not None:
qwen_prep_inplace(sanitized)
elif qwen_prep is not None:
sanitized = qwen_prep(sanitized)
# Developer role swap for GPT-5/Codex models
model_lower = params.get("model_lower", (model or "").lower())
@@ -130,7 +150,7 @@ class ChatCompletionsTransport(ProviderTransport):
sanitized = list(sanitized)
sanitized[0] = {**sanitized[0], "role": "developer"}
api_kwargs: dict[str, Any] = {
api_kwargs: Dict[str, Any] = {
"model": model,
"messages": sanitized,
}
@@ -139,6 +159,19 @@ class ChatCompletionsTransport(ProviderTransport):
if timeout is not None:
api_kwargs["timeout"] = timeout
# Temperature
fixed_temp = params.get("fixed_temperature")
omit_temp = params.get("omit_temperature", False)
if omit_temp:
api_kwargs.pop("temperature", None)
elif fixed_temp is not None:
api_kwargs["temperature"] = fixed_temp
# Qwen metadata (caller precomputes {sessionId, promptId})
qwen_meta = params.get("qwen_session_metadata")
if qwen_meta and is_qwen:
api_kwargs["metadata"] = qwen_meta
# Tools
if tools:
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
@@ -153,24 +186,96 @@ class ChatCompletionsTransport(ProviderTransport):
ephemeral = params.get("ephemeral_max_output_tokens")
max_tokens = params.get("max_tokens")
anthropic_max_out = params.get("anthropic_max_output")
is_nvidia_nim = params.get("is_nvidia_nim", False)
is_kimi = params.get("is_kimi", False)
reasoning_config = params.get("reasoning_config")
if ephemeral is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(ephemeral))
elif max_tokens is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(max_tokens))
elif is_nvidia_nim and max_tokens_fn:
api_kwargs.update(max_tokens_fn(16384))
elif is_qwen and max_tokens_fn:
api_kwargs.update(max_tokens_fn(65536))
elif is_kimi and max_tokens_fn:
# Kimi/Moonshot: 32000 matches Kimi CLI's default
api_kwargs.update(max_tokens_fn(32000))
elif anthropic_max_out is not None:
api_kwargs["max_tokens"] = anthropic_max_out
# extra_body assembly
extra_body: dict[str, Any] = {}
# Kimi: top-level reasoning_effort (unless thinking disabled)
if is_kimi:
_kimi_thinking_off = bool(
reasoning_config
and isinstance(reasoning_config, dict)
and reasoning_config.get("enabled") is False
)
if not _kimi_thinking_off:
_kimi_effort = "medium"
if reasoning_config and isinstance(reasoning_config, dict):
_e = (reasoning_config.get("effort") or "").strip().lower()
if _e in ("low", "medium", "high"):
_kimi_effort = _e
api_kwargs["reasoning_effort"] = _kimi_effort
# Generic reasoning passthrough for unknown providers
# extra_body assembly
extra_body: Dict[str, Any] = {}
is_openrouter = params.get("is_openrouter", False)
is_nous = params.get("is_nous", False)
is_github_models = params.get("is_github_models", False)
provider_prefs = params.get("provider_preferences")
if provider_prefs and is_openrouter:
extra_body["provider"] = provider_prefs
# Kimi extra_body.thinking
if is_kimi:
_kimi_thinking_enabled = True
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is False:
_kimi_thinking_enabled = False
extra_body["thinking"] = {
"type": "enabled" if _kimi_thinking_enabled else "disabled",
}
# Reasoning
if params.get("supports_reasoning", False):
reasoning_config = params.get("reasoning_config")
if reasoning_config is not None:
extra_body["reasoning"] = dict(reasoning_config)
if is_github_models:
gh_reasoning = params.get("github_reasoning_extra")
if gh_reasoning is not None:
extra_body["reasoning"] = gh_reasoning
else:
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
if reasoning_config is not None:
rc = dict(reasoning_config)
if is_nous and rc.get("enabled") is False:
pass # omit for Nous when disabled
else:
extra_body["reasoning"] = rc
else:
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
if is_nous:
extra_body["tags"] = ["product=hermes-agent"]
# Ollama num_ctx
ollama_ctx = params.get("ollama_num_ctx")
if ollama_ctx:
options = extra_body.get("options", {})
options["num_ctx"] = ollama_ctx
extra_body["options"] = options
# Ollama/custom think=false
if params.get("is_custom_provider", False):
if reasoning_config and isinstance(reasoning_config, dict):
_effort = (reasoning_config.get("effort") or "").strip().lower()
_enabled = reasoning_config.get("enabled", True)
if _effort == "none" or _enabled is False:
extra_body["think"] = False
if is_qwen:
extra_body["vl_high_resolution_images"] = True
# Merge any pre-built extra_body additions
additions = params.get("extra_body_additions")
@@ -187,117 +292,6 @@ class ChatCompletionsTransport(ProviderTransport):
return api_kwargs
def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
"""Build API kwargs using a ProviderProfile — single path, no legacy flags.
This method replaces the entire flag-based kwargs assembly when a
provider_profile is passed. Every quirk comes from the profile object.
"""
from providers.base import OMIT_TEMPERATURE
# Message preprocessing
sanitized = profile.prepare_messages(sanitized)
# Developer role swap — model-name-based, applies to all providers
_model_lower = (model or "").lower()
if (
sanitized
and isinstance(sanitized[0], dict)
and sanitized[0].get("role") == "system"
and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
):
sanitized = list(sanitized)
sanitized[0] = {**sanitized[0], "role": "developer"}
api_kwargs: dict[str, Any] = {
"model": model,
"messages": sanitized,
}
# Temperature
if profile.fixed_temperature is OMIT_TEMPERATURE:
pass # Don't include temperature at all
elif profile.fixed_temperature is not None:
api_kwargs["temperature"] = profile.fixed_temperature
else:
# Use caller's temperature if provided
temp = params.get("temperature")
if temp is not None:
api_kwargs["temperature"] = temp
# Timeout
timeout = params.get("timeout")
if timeout is not None:
api_kwargs["timeout"] = timeout
# Tools — apply Moonshot/Kimi schema sanitization regardless of path
if tools:
if is_moonshot_model(model):
tools = sanitize_moonshot_tools(tools)
api_kwargs["tools"] = tools
# max_tokens resolution — priority: ephemeral > user > profile default
max_tokens_fn = params.get("max_tokens_param_fn")
ephemeral = params.get("ephemeral_max_output_tokens")
user_max = params.get("max_tokens")
anthropic_max = params.get("anthropic_max_output")
if ephemeral is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(ephemeral))
elif user_max is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(user_max))
elif profile.default_max_tokens and max_tokens_fn:
api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
elif anthropic_max is not None:
api_kwargs["max_tokens"] = anthropic_max
# Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
reasoning_config = params.get("reasoning_config")
extra_body_from_profile, top_level_from_profile = (
profile.build_api_kwargs_extras(
reasoning_config=reasoning_config,
supports_reasoning=params.get("supports_reasoning", False),
qwen_session_metadata=params.get("qwen_session_metadata"),
model=model,
ollama_num_ctx=params.get("ollama_num_ctx"),
)
)
api_kwargs.update(top_level_from_profile)
# extra_body assembly
extra_body: dict[str, Any] = {}
# Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
profile_body = profile.build_extra_body(
session_id=params.get("session_id"),
provider_preferences=params.get("provider_preferences"),
)
if profile_body:
extra_body.update(profile_body)
# Profile's reasoning/thinking extra_body entries
if extra_body_from_profile:
extra_body.update(extra_body_from_profile)
# Merge any pre-built extra_body additions from the caller
additions = params.get("extra_body_additions")
if additions:
extra_body.update(additions)
# Request overrides (user config)
overrides = params.get("request_overrides")
if overrides:
for k, v in overrides.items():
if k == "extra_body" and isinstance(v, dict):
extra_body.update(v)
else:
api_kwargs[k] = v
if extra_body:
api_kwargs["extra_body"] = extra_body
return api_kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
@@ -319,7 +313,7 @@ class ChatCompletionsTransport(ProviderTransport):
# Gemini 3 thinking models attach extra_content with
# thought_signature — without replay on the next turn the API
# rejects the request with 400.
tc_provider_data: dict[str, Any] = {}
tc_provider_data: Dict[str, Any] = {}
extra = getattr(tc, "extra_content", None)
if extra is None and hasattr(tc, "model_extra"):
extra = (tc.model_extra or {}).get("extra_content")
@@ -330,14 +324,12 @@ class ChatCompletionsTransport(ProviderTransport):
except Exception:
pass
tc_provider_data["extra_content"] = extra
tool_calls.append(
ToolCall(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
provider_data=tc_provider_data or None,
)
)
tool_calls.append(ToolCall(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
provider_data=tc_provider_data or None,
))
usage = None
if hasattr(response, "usage") and response.usage:
@@ -355,7 +347,7 @@ class ChatCompletionsTransport(ProviderTransport):
reasoning = getattr(msg, "reasoning", None)
reasoning_content = getattr(msg, "reasoning_content", None)
provider_data: dict[str, Any] = {}
provider_data: Dict[str, Any] = {}
if reasoning_content:
provider_data["reasoning_content"] = reasoning_content
rd = getattr(msg, "reasoning_details", None)
@@ -381,7 +373,7 @@ class ChatCompletionsTransport(ProviderTransport):
return False
return True
def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
usage = getattr(response, "usage", None)
if usage is None:
+14 -15
View File
@@ -12,7 +12,7 @@ from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any
from typing import Any, Dict, List, Optional
@dataclass
@@ -32,10 +32,10 @@ class ToolCall:
* Others: ``None``
"""
id: str | None
id: Optional[str]
name: str
arguments: str # JSON string
provider_data: dict[str, Any] | None = field(default=None, repr=False)
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
# ── Backward compatibility ──────────────────────────────────
# The agent loop reads tc.function.name / tc.function.arguments
@@ -47,17 +47,17 @@ class ToolCall:
return "function"
@property
def function(self) -> ToolCall:
def function(self) -> "ToolCall":
"""Return self so tc.function.name / tc.function.arguments work."""
return self
@property
def call_id(self) -> str | None:
def call_id(self) -> Optional[str]:
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
return (self.provider_data or {}).get("call_id")
@property
def response_item_id(self) -> str | None:
def response_item_id(self) -> Optional[str]:
"""Codex response_item_id from provider_data."""
return (self.provider_data or {}).get("response_item_id")
@@ -101,18 +101,18 @@ class NormalizedResponse:
* Others: ``None``
"""
content: str | None
tool_calls: list[ToolCall] | None
content: Optional[str]
tool_calls: Optional[List[ToolCall]]
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
reasoning: str | None = None
usage: Usage | None = None
provider_data: dict[str, Any] | None = field(default=None, repr=False)
reasoning: Optional[str] = None
usage: Optional[Usage] = None
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
# ── Backward compatibility ──────────────────────────────────
# The shim _nr_to_assistant_message() mapped these from provider_data.
# These properties let NormalizedResponse pass through directly.
@property
def reasoning_content(self) -> str | None:
def reasoning_content(self) -> Optional[str]:
pd = self.provider_data or {}
return pd.get("reasoning_content")
@@ -136,9 +136,8 @@ class NormalizedResponse:
# Factory helpers
# ---------------------------------------------------------------------------
def build_tool_call(
id: str | None,
id: Optional[str],
name: str,
arguments: Any,
**provider_fields: Any,
@@ -152,7 +151,7 @@ def build_tool_call(
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
"""Translate a provider-specific stop reason to the normalised set.
Falls back to ``"stop"`` for unknown or ``None`` reasons.
+40 -240
View File
@@ -15,7 +15,6 @@ Usage:
import logging
import os
import re
import shutil
import sys
import json
@@ -759,17 +758,9 @@ def _run_cleanup():
pass
try:
if _active_agent_ref and hasattr(_active_agent_ref, 'shutdown_memory_provider'):
# Forward the agent's own transcript so memory providers'
# ``on_session_end`` hooks see the real conversation instead of
# an empty list (#15165). ``_session_messages`` is set on
# ``AIAgent.__init__`` and refreshed every turn via
# ``_persist_session``. Fall back to no-arg on test stubs /
# partially-initialised agents where the attribute is missing.
_session_msgs = getattr(_active_agent_ref, '_session_messages', None)
if isinstance(_session_msgs, list):
_active_agent_ref.shutdown_memory_provider(_session_msgs)
else:
_active_agent_ref.shutdown_memory_provider()
_active_agent_ref.shutdown_memory_provider(
getattr(_active_agent_ref, 'conversation_history', None) or []
)
except Exception:
pass
@@ -1556,60 +1547,6 @@ def _should_auto_attach_clipboard_image_on_paste(pasted_text: str) -> bool:
return not pasted_text.strip()
def _strip_leaked_bracketed_paste_wrappers(text: str) -> str:
"""Strip leaked bracketed-paste wrapper markers from user-visible text.
Defensive normalization for cases where terminal/prompt_toolkit parsing
fails and bracketed-paste markers end up in the buffer as literal text.
We strip canonical wrappers unconditionally and also handle degraded
visible forms like ``[200~`` / ``[201~`` and ``00~`` / ``01~`` when they
look like wrapper boundaries, not arbitrary user content.
"""
if not text:
return text
text = (
text.replace("\x1b[200~", "")
.replace("\x1b[201~", "")
.replace("^[[200~", "")
.replace("^[[201~", "")
)
text = re.sub(r"(^|[\s\n>:\]\)])\[200~", r"\1", text)
text = re.sub(r"\[201~(?=$|[\s\n<\[\(\):;.,!?])", "", text)
text = re.sub(r"(^|[\s\n>:\]\)])00~", r"\1", text)
text = re.sub(r"01~(?=$|[\s\n<\[\(\):;.,!?])", "", text)
return text
# Cursor Position Report (CPR / DSR) response, format ``ESC[<row>;<col>R``.
# prompt_toolkit's _on_resize() + renderer send ``ESC[6n`` queries to the
# terminal; under resize storms or tab switches the terminal's reply can
# race past the input parser and end up in the input buffer as literal
# text (see issue #14692). Also matches the visible-form ``^[[<row>;<col>R``
# that appears when the ESC byte was stripped by a prior filter.
_DSR_CPR_ESC_RE = re.compile(r"\x1b\[\d+;\d+R")
_DSR_CPR_VISIBLE_RE = re.compile(r"\^\[\[\d+;\d+R")
def _strip_leaked_terminal_responses(text: str) -> str:
"""Strip leaked terminal control-response sequences from user input.
Covers Cursor Position Report (CPR / DSR) responses ``ESC[<row>;<col>R``
and the visible ``^[[<row>;<col>R`` form. These are replies the terminal
sends back to queries prompt_toolkit makes during ``_on_resize`` /
``_request_absolute_cursor_position``. When the input parser drops one
(resize storms, multiplexer focus changes, slow PTYs) the response
lands in the input buffer as literal text and corrupts what the user
typed.
"""
if not text:
return text
text = _DSR_CPR_ESC_RE.sub("", text)
text = _DSR_CPR_VISIBLE_RE.sub("", text)
return text
def _collect_query_images(query: str | None, image_arg: str | None = None) -> tuple[str, list[Path]]:
"""Collect local image attachments for single-query CLI flows."""
message = query or ""
@@ -2218,42 +2155,6 @@ class HermesCLI:
self._last_invalidate = now
self._app.invalidate()
def _force_full_redraw(self) -> None:
"""Force a clean full-screen repaint of the prompt_toolkit UI.
Used to recover from terminal buffer drift caused by external
redraws we can't detect — e.g. macOS cmux / tmux tab switches,
``clear`` issued from a subshell, or SSH window restores. These
wipe or repaint the terminal without firing SIGWINCH, so
prompt_toolkit's tracked ``_cursor_pos`` no longer matches reality
and the next incremental redraw stacks on top of stale content
(ghost status bars, duplicated prompts).
Bound to Ctrl+L and exposed as the ``/redraw`` slash command,
matching the standard terminal-UX convention (bash, zsh, fish,
vim, htop).
"""
app = getattr(self, "_app", None)
if not app:
return
try:
renderer = app.renderer
out = renderer.output
out.reset_attributes()
out.erase_screen()
out.cursor_goto(0, 0)
out.flush()
# Drop prompt_toolkit's cached screen + cursor state so the
# next _redraw() starts from a known (0, 0) origin and
# re-renders every cell rather than diffing against stale.
renderer.reset(leave_alternate_screen=False)
except Exception:
pass
try:
app.invalidate()
except Exception:
pass
def _status_bar_context_style(self, percent_used: Optional[int]) -> str:
if percent_used is None:
return "class:status-bar-dim"
@@ -6000,7 +5901,6 @@ class HermesCLI:
platform_status = {
Platform.TELEGRAM: ("Telegram", "TELEGRAM_BOT_TOKEN"),
Platform.DISCORD: ("Discord", "DISCORD_BOT_TOKEN"),
Platform.SLACK: ("Slack", "SLACK_BOT_TOKEN"),
Platform.WHATSAPP: ("WhatsApp", "WHATSAPP_ENABLED"),
}
@@ -6071,12 +5971,6 @@ class HermesCLI:
self.show_toolsets()
elif canonical == "config":
self.show_config()
elif canonical == "redraw":
# Manual recovery for terminal buffer drift from multiplexer
# tab switches, subshell ``clear``, SSH window restores, etc.
# See issue #8688 (cmux). Ctrl+L is bound to the same helper.
self._force_full_redraw()
_cprint(f" {_DIM}✓ UI redrawn{_RST}")
elif canonical == "clear":
self.new_session(silent=True)
# Clear terminal screen. Inside the TUI, Rich's console.clear()
@@ -8442,62 +8336,13 @@ class HermesCLI:
):
return None
# Route image attachments based on the active model's vision capability.
# "native" → pass pixels as OpenAI-style content parts (adapters
# translate for Anthropic/Gemini/Bedrock).
# "text" → pre-analyze each image with vision_analyze and prepend the
# description as text — works with non-vision models.
# See agent/image_routing.py for the decision table.
# Pre-process images through the vision tool (Gemini Flash) so the
# main model receives text descriptions instead of raw base64 image
# content — works with any model, not just vision-capable ones.
if images:
try:
from agent.image_routing import (
build_native_content_parts,
decide_image_input_mode,
)
from hermes_cli.config import load_config
_img_mode = decide_image_input_mode(
(self.provider or "").strip(),
(self.model or "").strip(),
load_config(),
)
except Exception as _img_exc:
logging.debug("image_routing decision failed, defaulting to text: %s", _img_exc)
_img_mode = "text"
if _img_mode == "native":
try:
_text_for_parts = message if isinstance(message, str) else ""
_img_str_paths = [str(p) for p in images]
_parts, _skipped = build_native_content_parts(
_text_for_parts,
_img_str_paths,
)
if _skipped:
_cprint(
f" {_DIM}⚠ skipped {len(_skipped)} unreadable image path(s){_RST}"
)
if any(p.get("type") == "image_url" for p in _parts):
_img_names = ", ".join(Path(p).name for p in _img_str_paths)
_cprint(
f" {_DIM}📎 attaching {len(images)} image(s) natively "
f"(model supports vision): {_img_names}{_RST}"
)
message = _parts
else:
# All images unreadable — fall back to text enrichment.
message = self._preprocess_images_with_vision(
message if isinstance(message, str) else "", images
)
except Exception as _img_exc:
logging.warning("native image attach failed, falling back to text: %s", _img_exc)
message = self._preprocess_images_with_vision(
message if isinstance(message, str) else "", images
)
else:
message = self._preprocess_images_with_vision(
message if isinstance(message, str) else "", images
)
message = self._preprocess_images_with_vision(
message if isinstance(message, str) else "", images
)
# Expand @ context references (e.g. @file:main.py, @diff, @folder:src/)
if isinstance(message, str) and "@" in message:
@@ -8800,20 +8645,12 @@ class HermesCLI:
if response and result and not result.get("failed") and not result.get("partial"):
try:
from agent.title_generator import maybe_auto_title
# Route title-generation failures through the agent's
# user-visible warning channel so a depleted auxiliary
# provider doesn't silently leave sessions untitled
# (issue #15775).
_title_failure_cb = getattr(
self.agent, "_emit_auxiliary_failure", None
) if self.agent else None
maybe_auto_title(
self._session_db,
self.session_id,
message,
response,
self.conversation_history,
failure_callback=_title_failure_cb,
)
except Exception:
pass
@@ -9691,17 +9528,6 @@ class HermesCLI:
"""Down arrow: browse history when on last line, else move cursor down."""
event.app.current_buffer.auto_down(count=event.arg)
@kb.add('c-l')
def handle_ctrl_l(event):
"""Ctrl+L: force a clean full-screen repaint.
Recovers the UI after external terminal buffer drift tmux /
cmux tab switches, ``clear`` from a subshell, SSH window
restores, etc. that prompt_toolkit can't detect on its own.
Matches the universal bash/zsh/fish/vim/htop convention.
"""
self._force_full_redraw()
@kb.add('c-c')
def handle_ctrl_c(event):
"""Handle Ctrl+C - cancel interactive prompts, interrupt agent, or exit.
@@ -9929,18 +9755,10 @@ class HermesCLI:
placeholder while preserving any existing user text in the
buffer.
"""
# Diagnostic canary: measure how long the paste handler blocks
# the prompt_toolkit event loop. If this exceeds ~500ms we log
# it so recurring "CLI freezes on paste" reports (issue #16263,
# macOS Tahoe 26 + iTerm2/Ghostty) arrive with data attached.
_paste_handler_start = time.perf_counter()
_paste_raw_size = len(event.data or "")
pasted_text = event.data or ""
# Normalise line endings — Windows \r\n and old Mac \r both become \n
# so the 5-line collapse threshold and display are consistent.
pasted_text = pasted_text.replace('\r\n', '\n').replace('\r', '\n')
pasted_text = _strip_leaked_bracketed_paste_wrappers(pasted_text)
pasted_text = _strip_leaked_terminal_responses(pasted_text)
if _should_auto_attach_clipboard_image_on_paste(pasted_text) and self._try_attach_clipboard_image():
event.app.invalidate()
if pasted_text:
@@ -9963,17 +9781,6 @@ class HermesCLI:
buf.insert_text(prefix + placeholder)
else:
buf.insert_text(pasted_text)
_paste_handler_elapsed_ms = (time.perf_counter() - _paste_handler_start) * 1000.0
if _paste_handler_elapsed_ms > 500.0:
logger.warning(
"Slow bracketed-paste handler: %.1fms to process %d bytes "
"(%d lines) on %s. If the input becomes unresponsive after "
"this, attach this log line to the bug report.",
_paste_handler_elapsed_ms,
_paste_raw_size,
pasted_text.count('\n') + 1 if pasted_text else 0,
sys.platform,
)
@kb.add('c-v')
def handle_ctrl_v(event):
@@ -10093,16 +9900,7 @@ class HermesCLI:
still batch newlines. Alt+Enter only adds 1 newline per
event so it never triggers this.
"""
text = _strip_leaked_bracketed_paste_wrappers(buf.text)
text = _strip_leaked_terminal_responses(text)
if text != buf.text:
cursor = min(buf.cursor_position, len(text))
_paste_just_collapsed[0] = True
buf.text = text
buf.cursor_position = cursor
_prev_text_len[0] = len(text)
_prev_newline_count[0] = text.count('\n')
return
text = buf.text
chars_added = len(text) - _prev_text_len[0]
_prev_text_len[0] = len(text)
if _paste_just_collapsed[0] or self._skip_paste_collapse:
@@ -10759,30 +10557,36 @@ class HermesCLI:
# only cursor_up()s by the stored layout height, missing the extra
# rows created by reflow — leaving ghost duplicates visible.
#
# It's not just column-shrink: widening, row-shrinking, and
# multiplexer-driven SIGWINCH-less redraws (cmux / tmux tab switch)
# all produce the same class of drift, where the renderer's tracked
# _cursor_pos.y no longer matches terminal reality. The only reliable
# recovery is a full screen-clear (\x1b[2J\x1b[H) before the next
# redraw, so we force one on every resize rather than trying to
# compute the exact drift.
# Fix: before the standard erase, inflate _cursor_pos.y so the
# cursor moves up far enough to cover the reflowed ghost content.
_original_on_resize = app._on_resize
def _resize_clear_ghosts():
from prompt_toolkit.data_structures import Point as _Pt
renderer = app.renderer
try:
out = renderer.output
# Reset attributes, erase the entire screen, and home the
# cursor. This overwrites any reflowed status-bar rows or
# stale content the terminal kept from the prior layout.
out.reset_attributes()
out.erase_screen()
out.cursor_goto(0, 0)
out.flush()
# Tell the renderer its tracked position is fresh so its
# own erase() inside _on_resize doesn't cursor_up() past
# the top of the screen.
renderer.reset(leave_alternate_screen=False)
old_size = renderer._last_size
new_size = renderer.output.get_size()
if (
old_size
and new_size.columns < old_size.columns
and new_size.columns > 0
):
reflow_factor = (
(old_size.columns + new_size.columns - 1)
// new_size.columns
)
last_h = (
renderer._last_screen.height
if renderer._last_screen
else 0
)
extra = last_h * (reflow_factor - 1)
if extra > 0:
renderer._cursor_pos = _Pt(
x=renderer._cursor_pos.x,
y=renderer._cursor_pos.y + extra,
)
except Exception:
pass # never break resize handling
_original_on_resize()
@@ -10790,6 +10594,7 @@ class HermesCLI:
app._on_resize = _resize_clear_ghosts
def spinner_loop():
last_idle_refresh = 0.0
while not self._should_exit:
if not self._app:
time.sleep(0.1)
@@ -10798,11 +10603,10 @@ class HermesCLI:
self._invalidate(min_interval=0.1)
time.sleep(0.1)
else:
# Do not repaint the idle prompt every second. In non-full-screen
# prompt_toolkit mode, background redraws can fight tmux/Ghostty/cmux
# viewport restoration after focus changes and visually move the
# command input area. Keep idle stable; input/agent events still
# invalidate explicitly when the UI actually changes.
now = time.monotonic()
if now - last_idle_refresh >= 1.0:
last_idle_refresh = now
self._invalidate(min_interval=1.0)
time.sleep(0.2)
spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
@@ -10844,10 +10648,6 @@ class HermesCLI:
submit_images = []
if isinstance(user_input, tuple):
user_input, submit_images = user_input
if isinstance(user_input, str):
user_input = _strip_leaked_bracketed_paste_wrappers(user_input)
user_input = _strip_leaked_terminal_responses(user_input)
# Check for commands — but detect dragged/pasted file paths first.
# See _detect_file_drop() for details.
+3 -31
View File
@@ -311,12 +311,6 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
elif schedule["kind"] == "cron":
if not HAS_CRONITER:
logger.warning(
"Cannot compute next run for cron schedule %r: 'croniter' "
"is not installed. Install the 'cron' extra (pip install "
"'hermes-agent[cron]') to re-enable recurring cron jobs.",
schedule.get("expr"),
)
return None
cron = croniter(schedule["expr"], now)
next_run = cron.get_next(datetime)
@@ -704,32 +698,10 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# If no next run, decide whether this is terminal completion
# (one-shot) or a transient failure (recurring schedule couldn't
# compute — e.g. 'croniter' missing from the runtime env).
# Recurring jobs must NEVER be silently disabled: that turns a
# missing runtime dep into "job completed" and the user's
# schedule quietly goes off. See issue #16265.
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
kind = job.get("schedule", {}).get("kind")
if kind in ("cron", "interval"):
job["state"] = "error"
if not job.get("last_error"):
job["last_error"] = (
"Failed to compute next run for recurring "
"schedule (is the 'croniter' package "
"installed in the gateway's Python env?)"
)
logger.error(
"Job '%s' (%s) could not compute next_run_at; "
"leaving enabled and marking state=error so the "
"job is not silently disabled.",
job.get("name", job["id"]),
kind,
)
else:
job["enabled"] = False
job["state"] = "completed"
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
-20
View File
@@ -822,8 +822,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
agent = None
# Mark this as a cron session so the approval system can apply cron_mode.
# This env var is process-wide and persists for the lifetime of the
# scheduler process — every job this process runs is a cron job.
@@ -1172,24 +1170,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
_session_db.close()
except (Exception, KeyboardInterrupt) as e:
logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
# Release subprocesses, terminal sandboxes, browser daemons, and the
# main OpenAI/httpx client held by this ephemeral cron agent. Without
# this, a gateway that ticks cron every N minutes leaks fds per job
# until it hits EMFILE (#10200 / "too many open files").
try:
if agent is not None:
agent.close()
except (Exception, KeyboardInterrupt) as e:
logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
# Each cron run spins up a short-lived worker thread whose event loop
# dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
# httpx clients cached under that loop are now unusable — reap them
# so their transports don't accumulate in the process-global cache.
try:
from agent.auxiliary_client import cleanup_stale_async_clients
cleanup_stale_async_clients()
except Exception as e:
logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)
def tick(verbose: bool = True, adapters=None, loop=None) -> int:
-1
View File
@@ -36,7 +36,6 @@
imports = [
./nix/packages.nix
./nix/overlays.nix
./nix/nixosModules.nix
./nix/checks.nix
./nix/devShell.nix
+1 -16
View File
@@ -566,8 +566,6 @@ def load_gateway_config() -> GatewayConfig:
existing = {}
# Deep-merge extra dicts so gateway.json defaults survive
merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
if plat_name == Platform.SLACK.value and "enabled" in plat_block:
merged_extra["_enabled_explicit"] = True
merged = {**existing, **plat_block}
if merged_extra:
merged["extra"] = merged_extra
@@ -612,21 +610,16 @@ def load_gateway_config() -> GatewayConfig:
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
else:
bridged["channel_prompts"] = channel_prompts
enabled_was_explicit = "enabled" in platform_cfg
if not bridged and not enabled_was_explicit:
if not bridged:
continue
plat_data = platforms_data.setdefault(plat.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[plat.value] = plat_data
if enabled_was_explicit:
plat_data["enabled"] = platform_cfg["enabled"]
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
if plat == Platform.SLACK and enabled_was_explicit:
extra["_enabled_explicit"] = True
extra.update(bridged)
# Slack settings → env vars (env vars take precedence)
@@ -948,14 +941,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
# No yaml config for Slack — env-only setup, enable it
config.platforms[Platform.SLACK] = PlatformConfig()
config.platforms[Platform.SLACK].enabled = True
else:
slack_config = config.platforms[Platform.SLACK]
enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
if not slack_config.enabled and not enabled_was_explicit:
# Top-level Slack settings such as channel prompts should not
# turn an env-token setup into a disabled platform. Only an
# explicit slack.enabled/platforms.slack.enabled false should.
slack_config.enabled = True
# If yaml config exists, respect its enabled flag (don't override
# explicit enabled: false). Token is still stored so skills that
# send Slack messages can use it without activating the gateway adapter.
+1 -29
View File
@@ -1702,41 +1702,13 @@ class BasePlatformAdapter(ABC):
the agent is waiting for dangerous-command approval). This is critical
for Slack's Assistant API where ``assistant_threads_setStatus`` disables
the compose box pausing lets the user type ``/approve`` or ``/deny``.
Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
network round-trip can't stall the refresh cadence. Telegram- and
Discord-side typing expire after ~5s; if any individual send_typing
takes longer than the refresh interval, the bubble would die and
stay dead until that call returns. Abandoning the slow call lets
the next tick fire a fresh send_typing on schedule as long as
one of them succeeds within the 5s platform-side window, the bubble
stays visible across provider stalls / upstream API timeouts.
"""
# Bound each send_typing round-trip so the refresh cadence isn't
# gated on network health. Must stay below ``interval`` so a slow
# call gets abandoned before the next scheduled tick.
_send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
try:
while True:
if stop_event is not None and stop_event.is_set():
return
if chat_id not in self._typing_paused:
try:
await asyncio.wait_for(
self.send_typing(chat_id, metadata=metadata),
timeout=_send_typing_timeout,
)
except asyncio.TimeoutError:
# Slow network — abandon this tick, keep the loop
# on schedule so the next send_typing fires fresh.
pass
except asyncio.CancelledError:
raise
except Exception as typing_err:
logger.debug(
"[%s] send_typing error (non-fatal): %s",
self.name, typing_err,
)
await self.send_typing(chat_id, metadata=metadata)
if stop_event is None:
await asyncio.sleep(interval)
continue
+1 -2
View File
@@ -3294,7 +3294,6 @@ class DiscordAdapter(BasePlatformAdapter):
chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)
# Build source
guild = getattr(message, "guild", None)
source = self.build_source(
chat_id=str(effective_channel.id),
chat_name=chat_name,
@@ -3304,7 +3303,7 @@ class DiscordAdapter(BasePlatformAdapter):
thread_id=thread_id,
chat_topic=chat_topic,
is_bot=getattr(message.author, "bot", False),
guild_id=str(guild.id) if guild else None,
guild_id=str(message.guild.id) if message.guild else None,
parent_chat_id=parent_channel_id,
message_id=str(message.id),
)
-3
View File
@@ -28,7 +28,6 @@ from email.header import decode_header
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email.utils import formatdate
from email import encoders
from pathlib import Path
from typing import Any, Dict, List, Optional
@@ -505,7 +504,6 @@ class EmailAdapter(BasePlatformAdapter):
msg["In-Reply-To"] = original_msg_id
msg["References"] = original_msg_id
msg["Date"] = formatdate(localtime=True)
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
msg["Message-ID"] = msg_id
@@ -588,7 +586,6 @@ class EmailAdapter(BasePlatformAdapter):
msg["In-Reply-To"] = original_msg_id
msg["References"] = original_msg_id
msg["Date"] = formatdate(localtime=True)
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
msg["Message-ID"] = msg_id
+3 -73
View File
@@ -1178,83 +1178,13 @@ class MatrixAdapter(BasePlatformAdapter):
# Event callbacks
# ------------------------------------------------------------------
def _is_self_sender(self, sender: str) -> bool:
"""Return True if the sender refers to the bot's own account.
Matrix user IDs are byte-compared after trimming whitespace and
lowercasing some homeservers normalize the localpart case
differently at different API surfaces, and the reply-loop tail
of the "hall of mirrors" bug (#15763) has been observed with the
bot's own account bypassing a case-sensitive equality check.
When ``self._user_id`` is empty (whoami hasn't resolved yet, or
login failed), we cannot prove a sender is NOT us, so we return
True defensively an unidentified bot dropping its own events
is always preferable to falling into an echo loop.
"""
own = (self._user_id or "").strip().lower()
if not own:
return True
return sender.strip().lower() == own
@staticmethod
def _is_system_or_bridge_sender(sender: str) -> bool:
"""Return True if the sender looks like a system / bridge / appservice
identity rather than a real user.
Appservice namespaces on Matrix conventionally prefix bot / puppet
user IDs with an underscore (e.g. ``@_telegram_12345:server``,
``@_discord_999:server``, ``@_slack_...:server``). Server-notices
bots and bridge-controller bots on many homeservers use the same
pattern.
We treat these as system identities for pairing purposes: they
should never be offered a pairing code, because an operator
approving the code would hand the bridge itself permanent
authorization and every outbound message relayed by the bridge
would then loop back into the agent as an "authorized user
message", which is the root of issue #15763.
Matches:
``@_something:server`` appservice namespace convention
``@:server`` malformed / empty localpart
``:server`` malformed, no leading ``@``
"""
s = (sender or "").strip()
if not s:
return True
# Localpart is everything between leading '@' and ':'
if s.startswith("@"):
s = s[1:]
if ":" in s:
localpart, _, _ = s.partition(":")
else:
localpart = s
if not localpart:
return True
return localpart.startswith("_")
async def _on_room_message(self, event: Any) -> None:
"""Handle incoming room message events (text, media)."""
room_id = str(getattr(event, "room_id", ""))
sender = str(getattr(event, "sender", ""))
# Ignore own messages (case-insensitive; also drops when our own
# user_id hasn't been resolved yet — see _is_self_sender docstring
# and issue #15763).
if self._is_self_sender(sender):
return
# Ignore appservice / bridge / system identities so they never
# trigger the pairing flow. Once a bridge user is paired, every
# outbound message it relays would loop back as an authorized
# user message (the "hall of mirrors" in #15763).
if self._is_system_or_bridge_sender(sender):
logger.debug(
"Matrix: ignoring system/bridge sender %s in %s",
sender,
room_id,
)
# Ignore own messages.
if sender == self._user_id:
return
# Deduplicate by event ID.
@@ -1724,7 +1654,7 @@ class MatrixAdapter(BasePlatformAdapter):
async def _on_reaction(self, event: Any) -> None:
"""Handle incoming reaction events."""
sender = str(getattr(event, "sender", ""))
if self._is_self_sender(sender):
if sender == self._user_id:
return
event_id = str(getattr(event, "event_id", ""))
if self._is_duplicate_event(event_id):
-20
View File
@@ -2353,26 +2353,6 @@ class TelegramAdapter(BasePlatformAdapter):
user = getattr(entity, "user", None)
if user and getattr(user, "id", None) == bot_id:
return True
elif entity_type == "bot_command" and expected:
# Telegram's official group-disambiguation form for slash
# commands (``/cmd@botname``) is emitted as a single
# ``bot_command`` entity covering the whole span — there
# is no accompanying ``mention`` entity. Treat it as a
# direct address to this bot when the ``@botname`` suffix
# matches. This is the form Telegram's own command menu
# autocomplete produces in groups, so dropping it at the
# mention gate would break /new, /reset, /help, ... for
# every group that has ``require_mention`` enabled (#15415).
offset = int(getattr(entity, "offset", -1))
length = int(getattr(entity, "length", 0))
if offset < 0 or length <= 0:
continue
command_text = source_text[offset:offset + length]
at_index = command_text.find("@")
if at_index < 0:
continue
if command_text[at_index:].strip().lower() == expected:
return True
return False
def _message_matches_mention_patterns(self, message: Message) -> bool:
+7 -223
View File
@@ -1943,21 +1943,7 @@ class GatewayRunner:
return
try:
if hasattr(agent, "shutdown_memory_provider"):
# Pass the agent's own conversation transcript so memory
# providers' ``on_session_end`` hooks see the real messages
# instead of the empty default (#15165). ``_session_messages``
# is set on ``AIAgent`` (run_agent.py:1518) and refreshed at
# the end of every ``run_conversation`` turn via
# ``_persist_session``; on an agent built through
# ``object.__new__`` (test stubs) the attribute may be
# absent, so ``getattr`` with a ``None`` default keeps the
# call signature-compatible with the pre-fix behaviour
# (``shutdown_memory_provider(messages=None)``).
session_messages = getattr(agent, "_session_messages", None)
if isinstance(session_messages, list):
agent.shutdown_memory_provider(session_messages)
else:
agent.shutdown_memory_provider()
agent.shutdown_memory_provider()
except Exception:
pass
# Close tool resources (terminal sandboxes, browser daemons,
@@ -1968,15 +1954,6 @@ class GatewayRunner:
agent.close()
except Exception:
pass
# Auxiliary async clients (session_search/web/vision/etc.) live in a
# process-global cache and are created inside worker threads. Clean up
# any entries whose event loop is now dead so their httpx transports do
# not accumulate across gateway turns.
try:
from agent.auxiliary_client import cleanup_stale_async_clients
cleanup_stale_async_clients()
except Exception:
pass
_STUCK_LOOP_THRESHOLD = 3 # restarts while active before auto-suspend
_STUCK_LOOP_FILE = ".restart_failure_counts"
@@ -2940,19 +2917,6 @@ class GatewayRunner:
# disconnect (defense in depth; safe to call repeatedly).
_kill_tool_subprocesses("final-cleanup")
# Reap the process-global auxiliary-client cache once at the very
# end of teardown. Per-turn cleanup runs in _cleanup_agent_resources
# for each active agent, but clients bound to worker-thread loops
# that died with their ThreadPoolExecutor (notably cron ticks) only
# get swept here. Without this, long-running gateways accumulate
# async httpx transports until they hit EMFILE on macOS's default
# RLIMIT_NOFILE=256. See #14210.
try:
from agent.auxiliary_client import shutdown_cached_clients
shutdown_cached_clients()
except Exception as _e:
logger.debug("shutdown_cached_clients error: %s", _e)
# Close SQLite session DBs so the WAL write lock is released.
# Without this, --replace and similar restart flows leave the
# old gateway's connection holding the WAL lock until Python
@@ -4235,18 +4199,9 @@ class GatewayRunner:
Keep the normal inbound path and the queued follow-up path on the same
preprocessing pipeline so sender attribution, image enrichment, STT,
document notes, reply context, and @ references all behave the same.
Side effect: writes ``self._pending_native_image_paths`` to a list of
local image paths when the active model supports native vision AND
the user has images attached. The caller consumes and clears this
attribute at the ``run_conversation`` site to build a multimodal user
turn. When the list is empty, the ``_enrich_message_with_vision``
text path has already run and images are represented in-text.
"""
history = history or []
message_text = event.text or ""
# Reset per-call buffer; set only when native routing is chosen.
self._pending_native_image_paths = []
_is_shared_multi_user = is_shared_multi_user_session(
source,
@@ -4267,25 +4222,10 @@ class GatewayRunner:
audio_paths.append(path)
if image_paths:
# Decide routing: native (attach pixels) vs text (vision_analyze
# pre-run + prepend description). See agent/image_routing.py.
_img_mode = self._decide_image_input_mode()
if _img_mode == "native":
# Defer attachment to the run_conversation call site.
self._pending_native_image_paths = list(image_paths)
logger.info(
"Image routing: native (model supports vision). %d image(s) will be attached inline.",
len(image_paths),
)
else:
logger.info(
"Image routing: text (mode=%s). Pre-analyzing %d image(s) via vision_analyze.",
_img_mode, len(image_paths),
)
message_text = await self._enrich_message_with_vision(
message_text,
image_paths,
)
message_text = await self._enrich_message_with_vision(
message_text,
image_paths,
)
if audio_paths:
message_text = await self._enrich_message_with_transcription(
@@ -4800,58 +4740,6 @@ class GatewayRunner:
"compression",
f"{_new_tokens:,}",
)
# If summary generation failed, the
# compressor inserted a static fallback
# placeholder and the dropped turns are
# gone for good. Surface a visible
# warning to the gateway user — agent.log
# alone is invisible on TG/Discord/etc.
_comp = getattr(_hyg_agent, "context_compressor", None)
if _comp is not None and getattr(_comp, "_last_summary_fallback_used", False):
_dropped = getattr(_comp, "_last_summary_dropped_count", 0)
_err = getattr(_comp, "_last_summary_error", None) or "unknown error"
_warn_msg = (
"⚠️ Context compression summary failed "
f"({_err}). {_dropped} historical message(s) "
"were removed and replaced with a placeholder. "
"Earlier context is no longer recoverable. "
"Consider /reset for a clean session, or check "
"your auxiliary.compression model configuration."
)
try:
_adapter = self.adapters.get(source.platform)
if _adapter and source.chat_id:
await _adapter.send(source.chat_id, _warn_msg, metadata=_hyg_meta)
except Exception as _werr:
logger.warning(
"Failed to deliver compression-failure warning to user: %s",
_werr,
)
# Separately: if the user's CONFIGURED aux
# model failed and we recovered by falling
# back to the main model, tell them — a
# misconfigured auxiliary.compression.model
# is something only they can fix, and
# silent recovery would hide it.
elif _comp is not None and getattr(_comp, "_last_aux_model_failure_model", None):
_aux_model = getattr(_comp, "_last_aux_model_failure_model", "")
_aux_err = getattr(_comp, "_last_aux_model_failure_error", None) or "unknown error"
_aux_msg = (
f"️ Configured compression model `{_aux_model}` "
f"failed ({_aux_err}). Recovered using your main "
"model — context is intact — but you may want to "
"check `auxiliary.compression.model` in config.yaml."
)
try:
_adapter = self.adapters.get(source.platform)
if _adapter and source.chat_id:
await _adapter.send(source.chat_id, _aux_msg, metadata=_hyg_meta)
except Exception as _werr:
logger.warning(
"Failed to deliver aux-model-fallback notice to user: %s",
_werr,
)
finally:
self._cleanup_agent_resources(_hyg_agent)
@@ -7395,17 +7283,6 @@ class GatewayRunner:
approx_tokens,
new_tokens,
)
# Detect summary-generation failure so we can surface a
# visible warning to the user even on the manual /compress
# path (otherwise the failure is silently logged).
_summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False))
_dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0)
_summary_err = getattr(compressor, "_last_summary_error", None)
# Separately: did the user's CONFIGURED aux model fail
# and we recovered via main? Surface that as an info
# note so they can fix their config.
_aux_fail_model = getattr(compressor, "_last_aux_model_failure_model", None)
_aux_fail_err = getattr(compressor, "_last_aux_model_failure_error", None)
finally:
self._cleanup_agent_resources(tmp_agent)
lines = [f"🗜️ {summary['headline']}"]
@@ -7414,20 +7291,6 @@ class GatewayRunner:
lines.append(summary["token_line"])
if summary["note"]:
lines.append(summary["note"])
if _summary_failed:
lines.append(
f"⚠️ Summary generation failed ({_summary_err or 'unknown error'}). "
f"{_dropped_count} historical message(s) were removed and replaced "
"with a placeholder; earlier context is no longer recoverable. "
"Consider checking your auxiliary.compression model configuration."
)
elif _aux_fail_model:
lines.append(
f"️ Configured compression model `{_aux_fail_model}` failed "
f"({_aux_fail_err or 'unknown error'}). Recovered using your main "
"model — context is intact — but you may want to check "
"`auxiliary.compression.model` in config.yaml."
)
return "\n".join(lines)
except Exception as e:
logger.warning("Manual compress failed: %s", e)
@@ -8515,29 +8378,6 @@ class GatewayRunner:
ctx = copy_context()
return await loop.run_in_executor(None, ctx.run, func, *args)
def _decide_image_input_mode(self) -> str:
"""Resolve the image-input routing for the currently active model.
Returns ``"native"`` (attach pixels on the user turn) or ``"text"``
(pre-analyze with vision_analyze and prepend the description). See
agent/image_routing.py for the full decision table.
The active provider/model are read from config.yaml so the decision
tracks ``/model`` switches automatically on the next message.
"""
try:
from agent.image_routing import decide_image_input_mode
from agent.auxiliary_client import _read_main_model, _read_main_provider
from hermes_cli.config import load_config
cfg = load_config()
provider = _read_main_provider()
model = _read_main_model()
return decide_image_input_mode(provider, model, cfg)
except Exception as exc:
logger.debug("image_routing: decision failed, falling back to text — %s", exc)
return "text"
async def _enrich_message_with_vision(
self,
user_text: str,
@@ -8560,7 +8400,6 @@ class GatewayRunner:
The enriched message string with vision descriptions prepended.
"""
from tools.vision_tools import vision_analyze_tool
from agent.memory_manager import sanitize_context
analysis_prompt = (
"Describe everything visible in this image in thorough detail. "
@@ -8579,7 +8418,6 @@ class GatewayRunner:
result = json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "")
description = sanitize_context(description)
enriched_parts.append(
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
f"[If you need a closer look, use vision_analyze with "
@@ -10556,39 +10394,7 @@ class GatewayRunner:
_approval_session_token = set_current_session_key(_approval_session_key)
register_gateway_notify(_approval_session_key, _approval_notify_sync)
try:
# If _prepare_inbound_message_text buffered image paths for native
# attachment, wrap the user turn as an OpenAI-style multimodal
# content list. Consume-and-clear so subsequent turns on the same
# runner instance don't re-attach stale images.
_native_imgs = list(getattr(self, "_pending_native_image_paths", []) or [])
self._pending_native_image_paths = []
if _native_imgs:
try:
from agent.image_routing import build_native_content_parts
_parts, _skipped = build_native_content_parts(
message,
_native_imgs,
)
if _skipped:
logger.warning(
"Native image attachment: skipped %d unreadable path(s): %s",
len(_skipped), _skipped,
)
if any(p.get("type") == "image_url" for p in _parts):
_run_message: Any = _parts
else:
# All images failed to read — fall back to plain text.
_run_message = message
except Exception as _img_exc:
logger.warning(
"Native image attachment failed, falling back to text: %s",
_img_exc,
)
_run_message = message
else:
_run_message = message
result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
finally:
unregister_gateway_notify(_approval_session_key)
reset_current_session_key(_approval_session_token)
@@ -10694,20 +10500,12 @@ class GatewayRunner:
try:
from agent.title_generator import maybe_auto_title
all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
# Route title-generation failures through the agent's
# user-visible warning channel so a depleted auxiliary
# provider doesn't silently leave sessions untitled
# (issue #15775).
_title_failure_cb = getattr(
agent, "_emit_auxiliary_failure", None
)
maybe_auto_title(
self._session_db,
effective_session_id,
message,
final_response,
all_msgs,
failure_callback=_title_failure_cb,
)
except Exception:
pass
@@ -11347,16 +11145,13 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
cron delivery path so live adapters can be used for E2EE rooms.
Also refreshes the channel directory every 5 minutes and prunes the
image/audio/document cache + expired ``hermes debug share`` pastes
once per hour.
image/audio/document cache once per hour.
"""
from cron.scheduler import tick as cron_tick
from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache
from hermes_cli.debug import _sweep_expired_pastes
IMAGE_CACHE_EVERY = 60 # ticks — once per hour at default 60s interval
CHANNEL_DIR_EVERY = 5 # ticks — every 5 minutes
PASTE_SWEEP_EVERY = 60 # ticks — once per hour
logger.info("Cron ticker started (interval=%ds)", interval)
tick_count = 0
@@ -11397,17 +11192,6 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
except Exception as e:
logger.debug("Document cache cleanup error: %s", e)
if tick_count % PASTE_SWEEP_EVERY == 0:
try:
deleted, remaining = _sweep_expired_pastes()
if deleted:
logger.info(
"Paste sweep: deleted %d expired paste(s), %d pending",
deleted, remaining,
)
except Exception as e:
logger.debug("Paste sweep error: %s", e)
stop_event.wait(timeout=interval)
logger.info("Cron ticker stopped")
-51
View File
@@ -224,14 +224,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("ARCEEAI_API_KEY",),
base_url_env_var="ARCEE_BASE_URL",
),
"gmi": ProviderConfig(
id="gmi",
name="GMI Cloud",
auth_type="api_key",
inference_base_url="https://api.gmi-serving.com/v1",
api_key_env_vars=("GMI_API_KEY",),
base_url_env_var="GMI_BASE_URL",
),
"minimax": ProviderConfig(
id="minimax",
name="MiniMax",
@@ -374,37 +366,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
),
}
# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in
# providers/ that is not already declared above. New providers only need a
# providers/*.py file — no edits to this file required.
try:
from providers import list_providers as _list_providers_for_registry
for _pp in _list_providers_for_registry():
if _pp.name in PROVIDER_REGISTRY:
continue
if _pp.auth_type != "api_key" or not _pp.env_vars:
continue
# Skip providers that need custom token resolution (copilot, kimi, zai)
# — those are already fully declared above.
if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai"}:
continue
_api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL"))
_base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None)
PROVIDER_REGISTRY[_pp.name] = ProviderConfig(
id=_pp.name,
name=_pp.display_name or _pp.name,
auth_type="api_key",
inference_base_url=_pp.base_url,
api_key_env_vars=_api_key_vars or _pp.env_vars,
base_url_env_var=_base_url_var or "",
)
# Also register aliases so resolve_provider() resolves them
for _alias in _pp.aliases:
if _alias not in PROVIDER_REGISTRY:
PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name]
except Exception:
pass
# =============================================================================
# Anthropic Key Helper
@@ -1159,7 +1120,6 @@ def resolve_provider(
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"step": "stepfun", "stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee", "arceeai": "arcee",
"gmi-cloud": "gmi", "gmicloud": "gmi",
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
"alibaba_coding_plan": "alibaba-coding-plan",
@@ -1181,17 +1141,6 @@ def resolve_provider(
"vllm": "custom", "llamacpp": "custom",
"llama.cpp": "custom", "llama-cpp": "custom",
}
# Extend with aliases declared in providers/*.py that aren't already mapped.
# This keeps providers/ as the single source for new aliases while the
# hardcoded dict above remains authoritative for existing ones.
try:
from providers import list_providers as _lp
for _pp in _lp():
for _alias in _pp.aliases:
if _alias not in _PROVIDER_ALIASES:
_PROVIDER_ALIASES[_alias] = _pp.name
except Exception:
pass
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
if normalized == "openrouter":
+1 -177
View File
@@ -36,23 +36,12 @@ _EXCLUDED_DIRS = {
"__pycache__", # bytecode caches — regenerated on import
".git", # nested git dirs (profiles shouldn't have these, but safety)
"node_modules", # js deps if website/ somehow leaks in
"backups", # prior auto-backups — don't nest backups exponentially
"checkpoints", # session-local trajectory caches — regenerated per-session,
# session-hash-keyed so they don't port to another machine anyway
}
# File-name suffixes to skip
_EXCLUDED_SUFFIXES = (
".pyc",
".pyo",
# SQLite sidecar files — the backup takes a consistent snapshot of ``*.db``
# via ``sqlite3.backup()``, so shipping the live WAL / shared-memory /
# rollback-journal alongside would pair a fresh snapshot with stale sidecar
# state and produce a torn restore on the next open. They're transient and
# regenerated on first connection anyway.
".db-wal",
".db-shm",
".db-journal",
)
# File names to skip (runtime state that's meaningless on another machine)
@@ -465,12 +454,6 @@ def run_import(args) -> None:
# Critical state files to include in quick snapshots (relative to HERMES_HOME).
# Everything else is either regeneratable (logs, cache) or managed separately
# (skills, repo, sessions/).
#
# Entries may be individual files OR directories. Directories are captured
# recursively; missing entries are silently skipped. Pairing data lives in
# platform-specific JSON blobs outside state.db, so it's listed here explicitly
# — `hermes update` snapshots this set before pulling so approved-user lists
# are recoverable if anything goes wrong (issue #15733).
_QUICK_STATE_FILES = (
"state.db",
"config.yaml",
@@ -480,10 +463,6 @@ _QUICK_STATE_FILES = (
"gateway_state.json",
"channel_directory.json",
"processes.json",
# Pairing stores (generic + per-platform JSONs outside state.db)
"pairing", # legacy location (gateway/pairing.py)
"platforms/pairing", # new location (gateway/pairing.py)
"feishu_comment_pairing.json", # Feishu comment subscription pairings
)
_QUICK_SNAPSHOTS_DIR = "state-snapshots"
@@ -519,27 +498,7 @@ def create_quick_snapshot(
for rel in _QUICK_STATE_FILES:
src = home / rel
if not src.exists():
continue
if src.is_dir():
# Walk the directory and record each file individually in the
# manifest so restore can treat them uniformly. Empty dirs are
# skipped (nothing to snapshot).
for sub in src.rglob("*"):
if not sub.is_file():
continue
sub_rel = sub.relative_to(home).as_posix()
dst = snap_dir / sub_rel
dst.parent.mkdir(parents=True, exist_ok=True)
try:
shutil.copy2(sub, dst)
manifest[sub_rel] = dst.stat().st_size
except (OSError, PermissionError) as exc:
logger.warning("Could not snapshot %s: %s", sub_rel, exc)
continue
if not src.is_file():
if not src.exists() or not src.is_file():
continue
dst = snap_dir / rel
@@ -694,138 +653,3 @@ def run_quick_backup(args) -> None:
print(f" Restore with: /snapshot restore {snap_id}")
else:
print("No state files found to snapshot.")
# ---------------------------------------------------------------------------
# Pre-update auto-backup
# ---------------------------------------------------------------------------
_PRE_UPDATE_BACKUPS_DIR = "backups"
_PRE_UPDATE_PREFIX = "pre-update-"
_PRE_UPDATE_DEFAULT_KEEP = 5
def _pre_update_backup_dir(hermes_home: Optional[Path] = None) -> Path:
home = hermes_home or get_hermes_home()
return home / _PRE_UPDATE_BACKUPS_DIR
def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
"""Remove oldest pre-update backups beyond the keep limit.
Returns the number of files deleted. Only touches files matching
``pre-update-*.zip`` so hand-made zips dropped in the same directory
are never touched.
"""
if keep < 0:
keep = 0
if not backup_dir.exists():
return 0
backups = sorted(
(p for p in backup_dir.iterdir()
if p.is_file() and p.name.startswith(_PRE_UPDATE_PREFIX) and p.suffix.lower() == ".zip"),
key=lambda p: p.name,
reverse=True,
)
deleted = 0
for p in backups[keep:]:
try:
p.unlink()
deleted += 1
except OSError as exc:
logger.warning("Failed to prune backup %s: %s", p.name, exc)
return deleted
def create_pre_update_backup(
hermes_home: Optional[Path] = None,
keep: int = _PRE_UPDATE_DEFAULT_KEEP,
) -> Optional[Path]:
"""Create a full zip backup of HERMES_HOME under ``backups/``.
Mirrors :func:`run_backup` (same exclusion rules, same SQLite safe-copy)
but writes to ``<HERMES_HOME>/backups/pre-update-<timestamp>.zip`` and
auto-prunes old pre-update backups.
Returns the path to the created zip, or ``None`` if no files were
found or the backup could not be created. Never raises the caller
(``hermes update``) should continue even if the backup fails.
"""
hermes_root = hermes_home or get_default_hermes_root()
if not hermes_root.is_dir():
return None
backup_dir = _pre_update_backup_dir(hermes_root)
try:
backup_dir.mkdir(parents=True, exist_ok=True)
except OSError as exc:
logger.warning("Could not create pre-update backup dir %s: %s", backup_dir, exc)
return None
stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
# Collect files (same logic as run_backup, minus the chatty progress prints)
files_to_add: list[tuple[Path, Path]] = []
try:
for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
dp = Path(dirpath)
# Prune excluded directories in-place so os.walk doesn't descend
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
for fname in filenames:
fpath = dp / fname
try:
rel = fpath.relative_to(hermes_root)
except ValueError:
continue
if _should_exclude(rel):
continue
# Skip the output zip itself if it already exists
try:
if fpath.resolve() == out_path.resolve():
continue
except (OSError, ValueError):
pass
files_to_add.append((fpath, rel))
except OSError as exc:
logger.warning("Pre-update backup: walk failed: %s", exc)
return None
if not files_to_add:
return None
try:
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
for abs_path, rel_path in files_to_add:
try:
if abs_path.suffix == ".db":
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
tmp_db = Path(tmp.name)
try:
if _safe_copy_db(abs_path, tmp_db):
zf.write(tmp_db, arcname=str(rel_path))
finally:
tmp_db.unlink(missing_ok=True)
else:
zf.write(abs_path, arcname=str(rel_path))
except (PermissionError, OSError, ValueError) as exc:
logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
continue
except OSError as exc:
logger.warning("Pre-update backup: zip write failed: %s", exc)
# Best-effort cleanup of partial file
try:
out_path.unlink(missing_ok=True)
except OSError:
pass
return None
_prune_pre_update_backups(backup_dir, keep=keep)
return out_path
-2
View File
@@ -62,8 +62,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
aliases=("reset",)),
CommandDef("clear", "Clear screen and start a new session", "Session",
cli_only=True),
CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
cli_only=True),
CommandDef("history", "Show conversation history", "Session",
cli_only=True),
CommandDef("save", "Save the current conversation", "Session",
-86
View File
@@ -389,20 +389,6 @@ DEFAULT_CONFIG = {
# (60+ tool iterations with tiny output) before users assume the
# bot is dead and /restart.
"gateway_notify_interval": 180,
# How user-attached images are presented to the main model on each turn.
# "auto" — attach natively when the active model reports
# supports_vision=True AND the user hasn't explicitly
# configured auxiliary.vision.provider. Otherwise fall
# back to text (vision_analyze pre-analysis).
# "native" — always attach natively; non-vision models will either
# error at the provider or get a last-chance text fallback
# (see run_agent._prepare_messages_for_api).
# "text" — always pre-analyze with vision_analyze and prepend the
# description as text; the main model never sees pixels.
# Affects gateway platforms, the TUI, and CLI /attach. vision_analyze
# remains available as a tool regardless of this setting — the routing
# only controls how inbound user images are presented.
"image_input_mode": "auto",
},
"terminal": {
@@ -1051,20 +1037,6 @@ DEFAULT_CONFIG = {
"seen": {},
},
# ``hermes update`` behaviour.
"updates": {
# Run a full ``hermes backup``-style zip of HERMES_HOME before every
# ``hermes update``. Backups land in ``<HERMES_HOME>/backups/`` and
# can be restored with ``hermes import <path>``. Off by default —
# on large HERMES_HOME directories the zip can add minutes to every
# update. Set to true to re-enable, or pass ``--backup`` to opt in
# for a single update run.
"pre_update_backup": False,
# How many pre-update backup zips to retain. Older ones are pruned
# automatically after each successful backup.
"backup_keep": 5,
},
# Config schema version - bump this when adding new required fields
"_config_version": 22,
}
@@ -1254,22 +1226,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"GMI_API_KEY": {
"description": "GMI Cloud API key",
"prompt": "GMI Cloud API key",
"url": "https://www.gmicloud.ai/",
"password": True,
"category": "provider",
"advanced": True,
},
"GMI_BASE_URL": {
"description": "GMI Cloud base URL override",
"prompt": "GMI Cloud base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"MINIMAX_API_KEY": {
"description": "MiniMax API key (international)",
"prompt": "MiniMax API key",
@@ -4252,45 +4208,3 @@ def config_command(args):
print(" hermes config path Show config file path")
print(" hermes config env-path Show .env file path")
sys.exit(1)
# ── Profile-driven env var injection ─────────────────────────────────────────
# Any provider registered in providers/ with auth_type="api_key" automatically
# gets its env_vars exposed in OPTIONAL_ENV_VARS without editing this file.
# Runs once at import time.
_profile_env_vars_injected = False
def _inject_profile_env_vars() -> None:
"""Populate OPTIONAL_ENV_VARS from provider profiles not already listed.
Called once at module load time. Idempotent repeated calls are no-ops.
"""
global _profile_env_vars_injected
if _profile_env_vars_injected:
return
_profile_env_vars_injected = True
try:
from providers import list_providers
for _pp in list_providers():
if _pp.auth_type not in ("api_key",):
continue
for _var in _pp.env_vars:
if _var in OPTIONAL_ENV_VARS:
continue
_is_key = not _var.endswith("_BASE_URL") and not _var.endswith("_URL")
OPTIONAL_ENV_VARS[_var] = {
"description": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL override'}",
"prompt": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL (leave empty for default)'}",
"url": _pp.signup_url or None,
"password": _is_key,
"category": "provider",
"advanced": True,
}
except Exception:
pass
# Eagerly inject so that OPTIONAL_ENV_VARS is fully populated at import time.
_inject_profile_env_vars()
+5 -11
View File
@@ -45,13 +45,8 @@ def _pending_file() -> Path:
Each entry: ``{"url": "...", "expire_at": <unix_ts>}``. Scheduled
DELETEs used to be handled by spawning a detached Python process per
paste that slept for 6 hours; those accumulated forever if the user
ran ``hermes debug share`` repeatedly.
Deletion is now driven by the gateway's cron ticker
(``gateway/run.py::_start_cron_ticker``) which calls
``_sweep_expired_pastes`` once per hour. ``hermes debug share`` also
runs an opportunistic sweep on entry as a fallback for CLI-only users
who never start the gateway.
ran ``hermes debug share`` repeatedly. We now persist the schedule
to disk and sweep expired entries on the next debug invocation.
"""
return get_hermes_home() / "pastes" / "pending.json"
@@ -228,10 +223,9 @@ def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SEC
interpreters that never exited until the sleep completed.
The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
and the gateway's cron ticker sweeps expired entries once per hour.
``hermes debug share`` also runs an opportunistic sweep as a fallback
for CLI-only users. If neither runs again, paste.rs's own retention
policy handles cleanup.
and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
every ``hermes debug`` invocation. If the user never runs ``hermes debug``
again, paste.rs's own retention policy handles cleanup.
"""
_record_pending(urls, delay_seconds=delay_seconds)
+20 -84
View File
@@ -46,7 +46,6 @@ _PROVIDER_ENV_HINTS = (
"Z_AI_API_KEY",
"KIMI_API_KEY",
"KIMI_CN_API_KEY",
"GMI_API_KEY",
"MINIMAX_API_KEY",
"MINIMAX_CN_API_KEY",
"KILOCODE_API_KEY",
@@ -164,84 +163,6 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
check_warn("Could not verify systemd linger", f"({linger_detail})")
_APIKEY_PROVIDERS_CACHE: list | None = None
def _build_apikey_providers_list() -> list:
"""Build the API-key provider health-check list once and cache it.
Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint)
Base list augmented with any ProviderProfile with auth_type="api_key" not
already present adding providers/*.py is sufficient to get into doctor.
"""
_static = [
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
# MiniMax: the /anthropic endpoint doesn't support /models; use the /v1 surface.
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True),
("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
# OpenCode Go has no shared /models endpoint; skip the health check.
("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
]
_known_names = {t[0] for t in _static}
# Also index by profile canonical name so profiles without display_name
# don't create duplicate entries for providers already in the static list.
_known_canonical: set[str] = set()
_name_to_canonical = {
"Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding",
"StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn",
"Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek",
"Hugging Face": "huggingface", "NVIDIA NIM": "nvidia",
"Alibaba/DashScope": "alibaba", "MiniMax": "minimax",
"MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway",
"Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen",
"OpenCode Go": "opencode-go",
}
for _label, _canonical in _name_to_canonical.items():
_known_canonical.add(_canonical)
try:
from providers import list_providers
from providers.base import ProviderProfile as _PP
for _pp in list_providers():
if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars:
continue
_label = _pp.display_name or _pp.name
if _label in _known_names or _pp.name in _known_canonical:
continue
# Separate API-key vars from base-URL override vars — the health-check
# loop sends the first found value as Authorization: Bearer, so a URL
# string must never be picked.
_key_vars = tuple(
v for v in _pp.env_vars
if not v.endswith("_BASE_URL") and not v.endswith("_URL")
)
_base_var = next(
(v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")),
None,
)
if not _key_vars:
continue
_models_url = (
(_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
if _pp.base_url else None
)
_static.append((_label, _key_vars, _models_url, _base_var, True))
except Exception:
pass
return _static
def run_doctor(args):
"""Run diagnostic checks."""
should_fix = getattr(args, 'fix', False)
@@ -1009,11 +930,26 @@ def run_doctor(args):
# -- API-key providers --
# Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
# Cached at module level after first build — profiles auto-extend it.
global _APIKEY_PROVIDERS_CACHE
if _APIKEY_PROVIDERS_CACHE is None:
_APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
_apikey_providers = _APIKEY_PROVIDERS_CACHE
# If supports_models_endpoint is False, we skip the health check and just show "configured"
_apikey_providers = [
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True),
("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
# OpenCode Go has no shared /models endpoint; skip the health check.
("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
]
for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
_key = ""
for _ev in _env_vars:
+38 -269
View File
@@ -44,7 +44,6 @@ Usage:
"""
import argparse
import json
import os
import shutil
import subprocess
@@ -596,22 +595,17 @@ def _session_browse_picker(sessions: list) -> Optional[str]:
def _resolve_last_session(source: str = "cli") -> Optional[str]:
"""Look up the most recently-used session ID for a source."""
db = None
"""Look up the most recent session ID for a source."""
try:
from hermes_state import SessionDB
db = SessionDB()
sessions = db.search_sessions(source=source, limit=1)
return sessions[0]["id"] if sessions else None
db.close()
if sessions:
return sessions[0]["id"]
except Exception:
pass
finally:
if db is not None:
try:
db.close()
except Exception:
pass
return None
@@ -766,20 +760,9 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
return None
def _read_tui_active_session_file(path: Optional[str]) -> Optional[str]:
if not path:
return None
try:
data = json.loads(Path(path).read_text(encoding="utf-8"))
sid = str(data.get("session_id") or "").strip()
return sid or None
except Exception:
return None
def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Optional[str] = None) -> None:
def _print_tui_exit_summary(session_id: Optional[str]) -> None:
"""Print a shell-visible epilogue after TUI exits."""
target = _read_tui_active_session_file(active_session_file) or session_id or _resolve_last_session(source="tui")
target = session_id or _resolve_last_session(source="tui")
if not target:
return
@@ -829,29 +812,8 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti
)
_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"})
def _tui_need_npm_install(root: Path) -> bool:
"""True when @hermes/ink is missing or node_modules is behind package-lock.json.
Compares ``package-lock.json`` against ``node_modules/.package-lock.json``
(npm's hidden lockfile) by **content**, not mtime: git checkouts and npm
rewrites can bump the root lockfile's timestamp even when installed deps
already match, which used to trigger a spurious "Installing TUI
dependencies" on every launch.
For each entry in the root lock's ``packages`` map:
- missing from hidden lock reinstall (unless the entry is marked
``optional`` or ``peer``, which npm may intentionally skip per platform)
- present but with differing fields (excluding npm-written runtime
annotations like ``ideallyInert``) reinstall
Extra entries that exist only in the hidden lock are ignored stale
transitives left over from a removed dependency don't break runtime and
we'd rather not force a reinstall for them. Falls back to mtime
comparison if either lockfile is unparseable.
"""
"""True when @hermes/ink is missing or node_modules is behind package-lock.json (post-pull)."""
ink = root / "node_modules" / "@hermes" / "ink" / "package.json"
if not ink.is_file():
return True
@@ -861,35 +823,7 @@ def _tui_need_npm_install(root: Path) -> bool:
marker = root / "node_modules" / ".package-lock.json"
if not marker.is_file():
return True
# Compare lockfile contents, not mtimes: git checkouts and npm rewrites
# can bump the root lockfile timestamp even when installed deps already
# match. Fall back to mtime when either file is unparseable.
try:
wanted = json.loads(lock.read_text(encoding="utf-8")).get("packages") or {}
installed = json.loads(marker.read_text(encoding="utf-8")).get("packages") or {}
except (OSError, UnicodeDecodeError, json.JSONDecodeError):
return lock.stat().st_mtime > marker.stat().st_mtime
def comparable(pkg: dict) -> dict:
return {k: v for k, v in pkg.items() if k not in _NPM_LOCK_RUNTIME_KEYS}
for name, pkg in wanted.items():
if not name:
continue
if not isinstance(pkg, dict):
continue
if name not in installed:
if pkg.get("optional") or pkg.get("peer"):
continue
return True
if isinstance(installed[name], dict) and comparable(pkg) != comparable(installed[name]):
return True
return False
return lock.stat().st_mtime > marker.stat().st_mtime
def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
@@ -1103,14 +1037,7 @@ def _launch_tui(
"""Replace current process with the TUI."""
tui_dir = PROJECT_ROOT / "ui-tui"
import tempfile
env = os.environ.copy()
active_session_fd, active_session_file = tempfile.mkstemp(
prefix="hermes-tui-active-session-", suffix=".json"
)
os.close(active_session_fd)
env["HERMES_TUI_ACTIVE_SESSION_FILE"] = active_session_file
env["HERMES_PYTHON_SRC_ROOT"] = os.environ.get(
"HERMES_PYTHON_SRC_ROOT", str(PROJECT_ROOT)
)
@@ -1138,20 +1065,13 @@ def _launch_tui(
env["HERMES_TUI_RESUME"] = resume_session_id
argv, cwd = _make_tui_argv(tui_dir, tui_dev)
code: Optional[int] = None
try:
try:
code = subprocess.call(argv, cwd=str(cwd), env=env)
except KeyboardInterrupt:
code = 130
code = subprocess.call(argv, cwd=str(cwd), env=env)
except KeyboardInterrupt:
code = 130
if code in (0, 130):
_print_tui_exit_summary(resume_session_id, active_session_file)
finally:
try:
os.unlink(active_session_file)
except OSError:
pass
if code in (0, 130):
_print_tui_exit_summary(resume_session_id)
sys.exit(code)
@@ -1528,21 +1448,6 @@ def cmd_model(args):
select_provider_and_model(args=args)
def _is_profile_api_key_provider(provider_id: str) -> bool:
"""Return True when provider_id maps to a profile with auth_type='api_key'.
Used as a catch-all in select_provider_and_model() so that new providers
declared in providers/*.py automatically dispatch to _model_flow_api_key_provider
without requiring an explicit elif branch here.
"""
try:
from providers import get_provider_profile
_p = get_provider_profile(provider_id)
return _p is not None and _p.auth_type == "api_key"
except Exception:
return False
def select_provider_and_model(args=None):
"""Core provider selection + model picking logic.
@@ -1832,10 +1737,9 @@ def select_provider_and_model(args=None):
"huggingface",
"xiaomi",
"arcee",
"gmi",
"nvidia",
"ollama-cloud",
) or _is_profile_api_key_provider(selected_provider):
):
_model_flow_api_key_provider(config, selected_provider, current_model)
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@@ -3428,26 +3332,7 @@ def _model_flow_named_custom(config, provider_info):
provider_entry = providers_cfg.get(provider_key)
if isinstance(provider_entry, dict):
provider_entry["default_model"] = model_name
# Only persist an inline api_key when the user originally had
# one (either a literal secret or a ``${VAR}`` template). When
# the entry relies on ``key_env``, do not synthesize a
# ``${key_env}`` api_key — the runtime already resolves the
# key from ``key_env`` directly, and writing the resolved
# secret (or even a synthesized template) would silently
# downgrade credential hygiene on entries that intentionally
# keep plaintext out of ``config.yaml``. See issue #15803.
original_api_key_ref = str(
provider_info.get("api_key_ref", "") or ""
).strip()
original_api_key = str(
provider_info.get("api_key", "") or ""
).strip()
had_inline_api_key = bool(original_api_key_ref or original_api_key)
if (
had_inline_api_key
and config_api_key
and not str(provider_entry.get("api_key", "") or "").strip()
):
if config_api_key and not str(provider_entry.get("api_key", "") or "").strip():
provider_entry["api_key"] = config_api_key
if key_env and not str(provider_entry.get("key_env", "") or "").strip():
provider_entry["key_env"] = key_env
@@ -6238,96 +6123,6 @@ def _ensure_fhs_path_guard() -> None:
print(" (reload your shell or run 'source ~/.bashrc' to pick it up)")
def _run_pre_update_backup(args) -> None:
"""Create a full zip backup of HERMES_HOME before running the update.
Gated on ``updates.pre_update_backup`` in config (default false). Off
by default because the zip can add minutes to every update on large
HERMES_HOME directories. The ``--backup`` flag on ``hermes update``
opts in for a single run; ``--no-backup`` forces it off when config
has it enabled. Never raises a backup failure should not block the
update itself.
"""
# CLI flags win over config. --no-backup beats --backup if both are set.
if getattr(args, "no_backup", False):
print("◆ Pre-update backup: skipped (--no-backup)")
print()
return
force_backup = bool(getattr(args, "backup", False))
try:
from hermes_cli.config import load_config
cfg = load_config()
except Exception as exc:
logging.getLogger(__name__).debug("Could not load config for pre-update backup: %s", exc)
cfg = {}
updates_cfg = cfg.get("updates", {}) if isinstance(cfg, dict) else {}
enabled = updates_cfg.get("pre_update_backup", False)
keep = updates_cfg.get("backup_keep", 5)
if not enabled and not force_backup:
# Silent by default — the backup is off, most users don't need to
# hear about it on every update. They can opt in via --backup
# or by flipping the config knob.
return
try:
from hermes_cli.backup import create_pre_update_backup
except Exception as exc:
print(f"⚠ Pre-update backup: could not load backup module ({exc}); continuing update.")
print()
return
print("◆ Creating pre-update backup...")
t0 = _time.monotonic()
try:
out_path = create_pre_update_backup(keep=int(keep))
except Exception as exc: # defensive — helper already swallows, but just in case
print(f" ⚠ Backup failed: {exc}")
print(" Continuing with update.")
print()
return
elapsed = _time.monotonic() - t0
if out_path is None:
print(" ⚠ Backup skipped (no files found or write failed); continuing update.")
print()
return
try:
size_bytes = out_path.stat().st_size
except OSError:
size_bytes = 0
# Human-readable size
size_str = f"{size_bytes} B"
for unit in ("KB", "MB", "GB"):
if size_bytes < 1024:
break
size_bytes /= 1024
size_str = f"{size_bytes:.1f} {unit}"
# Render path using display_hermes_home so the user sees ~/.hermes/...
try:
from hermes_constants import get_hermes_home, display_hermes_home
home = get_hermes_home()
try:
display_path = f"{display_hermes_home()}/{out_path.relative_to(home)}"
except ValueError:
display_path = str(out_path)
except Exception:
display_path = str(out_path)
print(f" Saved: {display_path} ({size_str}, {elapsed:.1f}s)")
print(f" Restore: hermes import {out_path}")
print(f" Disable: omit --backup (backups are off by default)")
print(f" set updates.pre_update_backup: false in config.yaml")
print()
def cmd_update(args):
"""Update Hermes Agent to the latest version.
@@ -6370,10 +6165,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
print("⚕ Updating Hermes Agent...")
print()
# Pre-update backup — runs before any git/file mutation so users can
# always roll back to the exact state they had before this update.
_run_pre_update_backup(args)
# Try git-based update first, fall back to ZIP download on Windows
# when git file I/O is broken (antivirus, NTFS filter drivers, etc.)
use_zip_update = False
@@ -6523,22 +6314,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
print(f"→ Found {commit_count} new commit(s)")
# Snapshot critical state (state.db, config, pairing JSONs, etc.)
# before pulling so a user can recover if something goes wrong.
# Issue #15733 reported missing pairing data after an update; even
# though `git pull` can't touch $HERMES_HOME, this is cheap
# belt-and-suspenders insurance and gives the user something to
# restore from via `/snapshot list` / `/snapshot restore <id>`.
try:
from hermes_cli.backup import create_quick_snapshot
snap_id = create_quick_snapshot(label="pre-update")
if snap_id:
print(f" ✓ Pre-update snapshot: {snap_id}")
except Exception as exc:
# Never let a snapshot failure block an update.
logger.debug("Pre-update snapshot failed: %s", exc)
print("→ Pulling updates...")
update_succeeded = False
try:
@@ -7633,22 +7408,6 @@ def cmd_logs(args):
)
def _build_provider_choices() -> list[str]:
"""Build the --provider choices list from CANONICAL_PROVIDERS + 'auto'."""
try:
from hermes_cli.models import CANONICAL_PROVIDERS as _cp
return ["auto"] + [p.slug for p in _cp]
except Exception:
# Fallback: static list guarantees the CLI always works
return [
"auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot",
"anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
"ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
"stepfun", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee",
"nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go",
]
def main():
"""Main entry point for hermes CLI."""
parser = argparse.ArgumentParser(
@@ -7842,7 +7601,29 @@ For more help on a command:
)
chat_parser.add_argument(
"--provider",
choices=_build_provider_choices(),
choices=[
"auto",
"openrouter",
"nous",
"openai-codex",
"copilot-acp",
"copilot",
"anthropic",
"gemini",
"xai",
"ollama-cloud",
"huggingface",
"zai",
"kimi-coding",
"kimi-coding-cn",
"stepfun",
"minimax",
"minimax-cn",
"kilocode",
"xiaomi",
"arcee",
"nvidia",
],
default=None,
help="Inference provider (default: auto)",
)
@@ -9761,18 +9542,6 @@ Examples:
default=False,
help="Check whether an update is available without installing anything",
)
update_parser.add_argument(
"--no-backup",
action="store_true",
default=False,
help="Skip the pre-update backup for this run (overrides updates.pre_update_backup)",
)
update_parser.add_argument(
"--backup",
action="store_true",
default=False,
help="Force a pre-update backup for this run (off by default; overrides updates.pre_update_backup)",
)
update_parser.set_defaults(func=cmd_update)
# =========================================================================
+1 -117
View File
@@ -278,14 +278,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"trinity-large-preview",
"trinity-mini",
],
"gmi": [
"zai-org/GLM-5.1-FP8",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2.5",
"google/gemini-3.1-flash-lite-preview",
"anthropic/claude-sonnet-4.6",
"openai/gpt-5.4",
],
"opencode-zen": [
"kimi-k2.5",
"gpt-5.4-pro",
@@ -717,6 +709,7 @@ class ProviderEntry(NamedTuple):
label: str
tui_desc: str # detailed description for `hermes model` TUI
CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
@@ -742,7 +735,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"),
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
@@ -750,25 +742,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
]
# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
# that is not already in the list above. Adding providers/*.py is sufficient
# to expose a new provider in the model picker, /model, and all downstream
# consumers — no edits to this file needed.
_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
try:
from providers import list_providers as _list_providers_for_canonical
for _pp in _list_providers_for_canonical():
if _pp.name in _canonical_slugs:
continue
if _pp.auth_type in ("oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"):
continue # non-api-key flows need bespoke picker UX; skip auto-inject
_label = _pp.display_name or _pp.name
_desc = _pp.description or f"{_label} (direct API)"
CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
_canonical_slugs.add(_pp.name)
except Exception:
pass
# Derived dicts — used throughout the codebase
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
@@ -796,8 +769,6 @@ _PROVIDER_ALIASES = {
"stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee",
"arceeai": "arcee",
"gmi-cloud": "gmi",
"gmicloud": "gmi",
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
"claude": "anthropic",
@@ -1878,19 +1849,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
return live
except Exception:
pass
if normalized == "gmi":
try:
from hermes_cli.auth import resolve_api_key_provider_credentials
creds = resolve_api_key_provider_credentials("gmi")
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip()
if api_key and base_url:
live = fetch_api_models(api_key, base_url)
if live:
return live
except Exception:
pass
if normalized == "custom":
base_url = _get_custom_base_url()
if base_url:
@@ -1903,34 +1861,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
live = fetch_api_models(api_key, base_url)
if live:
return live
# ── Profile-based generic live fetch (all simple api-key providers) ──
# Handles any provider registered in providers/ with auth_type="api_key".
# Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.).
try:
from providers import get_provider_profile
from hermes_cli.auth import resolve_api_key_provider_credentials
_p = get_provider_profile(normalized)
if _p and _p.auth_type == "api_key" and _p.base_url:
try:
creds = resolve_api_key_provider_credentials(normalized)
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip()
except Exception:
api_key, base_url = "", _p.base_url
if not base_url:
base_url = _p.base_url
if api_key:
live = _p.fetch_models(api_key=api_key)
if live:
return live
# Use profile's fallback_models if defined
if _p.fallback_models:
return list(_p.fallback_models)
except Exception:
pass
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
if normalized in _MODELS_DEV_PREFERRED:
return _merge_with_models_dev(normalized, curated_static)
@@ -2296,52 +2226,6 @@ def copilot_model_api_mode(
return "chat_completions"
# Azure Foundry model families that require the Responses API. Azure
# rejects /chat/completions against these deployments with
# ``400 "The requested operation is unsupported."`` — the same payload Bob
# Dobolina hit in April 2026 on ``gpt-5.3-codex`` while ``gpt-4o-pure`` on
# the same endpoint worked fine. Keep the patterns broad enough to cover
# vendor-renamed deployments (e.g. ``gpt-5.3-codex``, ``gpt-5-codex``,
# ``gpt-5.4``, ``o1-preview``) but tight enough to leave GPT-4 / 3.5 / Llama /
# Mistral / Grok deployments on chat completions.
_AZURE_FOUNDRY_RESPONSES_PREFIXES = (
"codex", # codex-*, codex-mini
"gpt-5", # gpt-5, gpt-5.x, gpt-5-codex, gpt-5.x-codex
"o1", # o1, o1-preview, o1-mini
"o3", # o3, o3-mini
"o4", # o4, o4-mini
)
def azure_foundry_model_api_mode(model_name: Optional[str]) -> Optional[str]:
"""Infer Azure Foundry api_mode from a deployment/model name.
Returns ``"codex_responses"`` when the model name matches a family that
only accepts the Responses API on Azure Foundry (GPT-5.x, codex, o1/o3/o4
reasoning models). Returns ``None`` otherwise the caller should fall
back to the configured/default api_mode (typically ``chat_completions``)
so GPT-4o, GPT-4 Turbo, Llama, Mistral, etc. keep working.
Intentionally does NOT return ``anthropic_messages``; Anthropic-style
Azure endpoints are disambiguated by URL (``/anthropic`` suffix) in
``runtime_provider._detect_api_mode_for_url`` and by the user setting
``model.api_mode: anthropic_messages`` explicitly.
"""
raw = str(model_name or "").strip().lower()
if not raw:
return None
# Strip any vendor/ prefix a user may have copied from OpenRouter / Copilot.
if "/" in raw:
raw = raw.rsplit("/", 1)[-1]
# gpt-5-mini speaks chat completions on Copilot but Azure Foundry deploys
# the full gpt-5 family uniformly on Responses API — don't carve an
# exception here.
for prefix in _AZURE_FOUNDRY_RESPONSES_PREFIXES:
if raw.startswith(prefix):
return "codex_responses"
return None
def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
"""Normalize OpenCode config IDs to the bare model slug used in API requests."""
provider = normalize_provider(provider_id)
-14
View File
@@ -79,20 +79,6 @@ VALID_HOOKS: Set[str] = {
# {"action": "allow"} / None -> normal dispatch
# Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store.
"pre_gateway_dispatch",
# Approval lifecycle hooks. Fired by tools/approval.py when a dangerous
# command needs user approval -- fires BOTH for CLI-interactive prompts
# and for gateway/ACP approvals (Telegram, Discord, Slack, TUI, etc.).
# Observers only: return values are ignored. Plugins cannot veto or
# pre-answer an approval from these hooks (use pre_tool_call to block
# a tool before it reaches approval).
#
# Kwargs for pre_approval_request:
# command: str, description: str, pattern_key: str, pattern_keys: list[str],
# session_key: str, surface: "cli" | "gateway"
# Kwargs for post_approval_response: same as above plus
# choice: "once" | "session" | "always" | "deny" | "timeout"
"pre_approval_request",
"post_approval_response",
}
ENTRY_POINTS_GROUP = "hermes_agent.plugins"
-11
View File
@@ -163,12 +163,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://api.arcee.ai/api/v1",
base_url_env_var="ARCEE_BASE_URL",
),
"gmi": HermesOverlay(
transport="openai_chat",
extra_env_vars=("GMI_API_KEY",),
base_url_override="https://api.gmi-serving.com/v1",
base_url_env_var="GMI_BASE_URL",
),
"ollama-cloud": HermesOverlay(
transport="openai_chat",
base_url_env_var="OLLAMA_BASE_URL",
@@ -303,10 +297,6 @@ ALIASES: Dict[str, str] = {
"arcee-ai": "arcee",
"arceeai": "arcee",
# gmi
"gmi-cloud": "gmi",
"gmicloud": "gmi",
# Local server aliases → virtual "local" concept (resolved via user config)
"lmstudio": "lmstudio",
"lm-studio": "lmstudio",
@@ -329,7 +319,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"copilot-acp": "GitHub Copilot ACP",
"stepfun": "StepFun Step Plan",
"xiaomi": "Xiaomi MiMo",
"gmi": "GMI Cloud",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
"ollama-cloud": "Ollama Cloud",
+10 -54
View File
@@ -214,6 +214,10 @@ def _resolve_runtime_from_pool_entry(
base_url = cfg_base_url or base_url or "https://api.anthropic.com"
elif provider == "openrouter":
base_url = base_url or OPENROUTER_BASE_URL
elif provider == "xai":
api_mode = "codex_responses"
elif provider == "nous":
api_mode = "chat_completions"
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
@@ -227,32 +231,11 @@ def _resolve_runtime_from_pool_entry(
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode:
api_mode = configured_mode
# Model-family inference for GPT-5.x / codex / o1-o4: Azure rejects
# /chat/completions on these with 400 "operation unsupported" — see
# azure_foundry_model_api_mode() for rationale. Skip when the user
# explicitly picked anthropic_messages (Anthropic-style endpoint).
if effective_model and api_mode != "anthropic_messages":
try:
from hermes_cli.models import azure_foundry_model_api_mode
inferred = azure_foundry_model_api_mode(effective_model)
except Exception:
inferred = None
if inferred:
api_mode = inferred
# For Anthropic-style endpoints, strip /v1 suffix
if api_mode == "anthropic_messages":
base_url = re.sub(r"/v1/?$", "", base_url)
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Use profile api_mode for all other known providers
try:
from providers import get_provider_profile
_p = get_provider_profile(provider)
if _p and _p.api_mode:
api_mode = _p.api_mode
except Exception:
pass
# Honour model.base_url from config.yaml when the configured provider
# matches this provider — same pattern as the Anthropic branch above.
# Only override when the pool entry has no explicit base_url (i.e. it
@@ -270,21 +253,12 @@ def _resolve_runtime_from_pool_entry(
from hermes_cli.models import opencode_model_api_mode
api_mode = opencode_model_api_mode(provider, effective_model)
else:
# Try profile api_mode first, then auto-detect from URL
try:
from providers import get_provider_profile
_p = get_provider_profile(provider)
if _p and _p.api_mode:
api_mode = _p.api_mode
except Exception:
pass
if api_mode == "chat_completions":
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
# Kimi /coding, api.openai.com → codex_responses, api.x.ai →
# codex_responses).
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
# Kimi /coding, api.openai.com → codex_responses, api.x.ai →
# codex_responses).
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
# OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
# Anthropic SDK prepends its own /v1/messages to the base_url. Strip the
@@ -634,7 +608,6 @@ def _resolve_azure_foundry_runtime(
model_cfg: Dict[str, Any],
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
target_model: Optional[str] = None,
) -> Dict[str, Any]:
"""Resolve an Azure Foundry runtime entry.
@@ -655,22 +628,6 @@ def _resolve_azure_foundry_runtime(
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
# Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
# reasoning models as Responses-API-only. Calling /chat/completions
# against them returns 400 "The requested operation is unsupported."
# Upgrade api_mode when the model name matches, unless the user has
# explicitly chosen anthropic_messages (Anthropic-style endpoint).
effective_model = str(target_model or model_cfg.get("default") or "").strip()
if effective_model and cfg_api_mode != "anthropic_messages":
try:
from hermes_cli.models import azure_foundry_model_api_mode
inferred = azure_foundry_model_api_mode(effective_model)
except Exception:
inferred = None
if inferred:
cfg_api_mode = inferred
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
if not base_url:
@@ -907,7 +864,6 @@ def resolve_runtime_provider(
model_cfg=_get_model_config(),
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
target_model=target_model,
)
return azure_runtime
+50 -191
View File
@@ -22,8 +22,6 @@ import sqlite3
import threading
import time
from pathlib import Path
from agent.memory_manager import sanitize_context
from hermes_constants import get_hermes_home
from typing import Any, Callable, Dict, List, Optional, TypeVar
@@ -33,7 +31,7 @@ T = TypeVar("T")
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
SCHEMA_VERSION = 10
SCHEMA_VERSION = 9
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS schema_version (
@@ -121,32 +119,6 @@ CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
END;
"""
# Trigram FTS5 table for CJK substring search. The default unicode61
# tokenizer splits CJK characters into individual tokens, breaking phrase
# matching. The trigram tokenizer creates overlapping 3-byte sequences so
# substring queries work natively for any script (CJK, Thai, etc.).
FTS_TRIGRAM_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
content,
content=messages,
content_rowid=id,
tokenize='trigram'
);
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
END;
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
END;
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
END;
"""
class SessionDB:
"""
@@ -394,18 +366,6 @@ class SessionDB:
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 9")
if current_version < 10:
# v10: trigram FTS5 table for CJK/substring search.
# Created via FTS_TRIGRAM_SQL below; backfill existing messages.
try:
cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
except sqlite3.OperationalError:
cursor.executescript(FTS_TRIGRAM_SQL)
cursor.execute(
"INSERT INTO messages_fts_trigram(rowid, content) "
"SELECT id, content FROM messages WHERE content IS NOT NULL"
)
cursor.execute("UPDATE schema_version SET version = 10")
# Unique title index — always ensure it exists (safe to run after migrations
# since the title column is guaranteed to exist at this point)
@@ -423,12 +383,6 @@ class SessionDB:
except sqlite3.OperationalError:
cursor.executescript(FTS_SQL)
# Trigram FTS5 for CJK/substring search
try:
cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
except sqlite3.OperationalError:
cursor.executescript(FTS_TRIGRAM_SQL)
self._conn.commit()
# =========================================================================
@@ -1201,10 +1155,7 @@ class SessionDB:
messages = []
for row in rows:
content = row["content"]
if row["role"] in {"user", "assistant"} and isinstance(content, str):
content = sanitize_context(content).strip()
msg = {"role": row["role"], "content": content}
msg = {"role": row["role"], "content": row["content"]}
if row["tool_call_id"]:
msg["tool_call_id"] = row["tool_call_id"]
if row["tool_name"]:
@@ -1340,16 +1291,6 @@ class SessionDB:
return sanitized.strip()
@staticmethod
def _is_cjk_codepoint(cp: int) -> bool:
return (0x4E00 <= cp <= 0x9FFF or # CJK Unified Ideographs
0x3400 <= cp <= 0x4DBF or # CJK Extension A
0x20000 <= cp <= 0x2A6DF or # CJK Extension B
0x3000 <= cp <= 0x303F or # CJK Symbols
0x3040 <= cp <= 0x309F or # Hiragana
0x30A0 <= cp <= 0x30FF or # Katakana
0xAC00 <= cp <= 0xD7AF) # Hangul Syllables
@staticmethod
def _contains_cjk(text: str) -> bool:
"""Check if text contains CJK (Chinese, Japanese, Korean) characters."""
@@ -1365,11 +1306,6 @@ class SessionDB:
return True
return False
@classmethod
def _count_cjk(cls, text: str) -> int:
"""Count CJK characters in text."""
return sum(1 for ch in text if cls._is_cjk_codepoint(ord(ch)))
def search_messages(
self,
query: str,
@@ -1440,113 +1376,52 @@ class SessionDB:
LIMIT ? OFFSET ?
"""
# CJK queries bypass the unicode61 FTS5 table. The default tokenizer
# splits CJK characters into individual tokens, so "大别山项目" becomes
# "大 AND 别 AND 山 AND 项 AND 目" — producing false positives and
# missing exact phrase matches.
#
# For queries with 3+ CJK characters, we use the trigram FTS5 table
# (indexed substring matching with ranking and snippets). For shorter
# CJK queries (1-2 chars), trigram can't match (it needs ≥9 UTF-8
# bytes = 3 CJK chars), so we fall back to LIKE.
is_cjk = self._contains_cjk(query)
if is_cjk:
raw_query = query.strip('"').strip()
cjk_count = self._count_cjk(raw_query)
if cjk_count >= 3:
# Trigram FTS5 path — quote each non-operator token to handle
# FTS5 special chars (%, *, etc.) while preserving boolean
# operators (AND, OR, NOT) for multi-term queries.
tokens = raw_query.split()
parts = []
for tok in tokens:
if tok.upper() in ("AND", "OR", "NOT"):
parts.append(tok)
else:
parts.append('"' + tok.replace('"', '""') + '"')
trigram_query = " ".join(parts)
tri_where = ["messages_fts_trigram MATCH ?"]
tri_params: list = [trigram_query]
if source_filter is not None:
tri_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
tri_params.extend(source_filter)
if exclude_sources is not None:
tri_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
tri_params.extend(exclude_sources)
if role_filter:
tri_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
tri_params.extend(role_filter)
tri_sql = f"""
SELECT
m.id,
m.session_id,
m.role,
snippet(messages_fts_trigram, 0, '>>>', '<<<', '...', 40) AS snippet,
m.content,
m.timestamp,
m.tool_name,
s.source,
s.model,
s.started_at AS session_started
FROM messages_fts_trigram
JOIN messages m ON m.id = messages_fts_trigram.rowid
JOIN sessions s ON s.id = m.session_id
WHERE {' AND '.join(tri_where)}
ORDER BY rank
LIMIT ? OFFSET ?
"""
tri_params.extend([limit, offset])
with self._lock:
try:
tri_cursor = self._conn.execute(tri_sql, tri_params)
except sqlite3.OperationalError:
matches = []
else:
matches = [dict(row) for row in tri_cursor.fetchall()]
else:
# Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
# Fall back to LIKE substring search.
escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
like_where = ["m.content LIKE ? ESCAPE '\\'"]
like_params: list = [f"%{escaped}%"]
if source_filter is not None:
like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
like_params.extend(source_filter)
if exclude_sources is not None:
like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
like_params.extend(exclude_sources)
if role_filter:
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
like_params.extend(role_filter)
like_sql = f"""
SELECT m.id, m.session_id, m.role,
substr(m.content,
max(1, instr(m.content, ?) - 40),
120) AS snippet,
m.content, m.timestamp, m.tool_name,
s.source, s.model, s.started_at AS session_started
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE {' AND '.join(like_where)}
ORDER BY m.timestamp DESC
LIMIT ? OFFSET ?
"""
like_params.extend([limit, offset])
# instr() parameter goes first in the bound list
like_params = [raw_query] + like_params
with self._lock:
like_cursor = self._conn.execute(like_sql, like_params)
matches = [dict(row) for row in like_cursor.fetchall()]
else:
with self._lock:
try:
cursor = self._conn.execute(sql, params)
except sqlite3.OperationalError:
# FTS5 query syntax error despite sanitization — return empty
with self._lock:
try:
cursor = self._conn.execute(sql, params)
except sqlite3.OperationalError:
# FTS5 query syntax error despite sanitization — return empty
# unless query contains CJK (fall back to LIKE below)
if not self._contains_cjk(query):
return []
else:
matches = [dict(row) for row in cursor.fetchall()]
matches = []
else:
matches = [dict(row) for row in cursor.fetchall()]
# LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
# characters individually, causing multi-character queries to fail.
if not matches and self._contains_cjk(query):
raw_query = query.strip('"').strip()
like_where = ["m.content LIKE ?"]
like_params: list = [f"%{raw_query}%"]
if source_filter is not None:
like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
like_params.extend(source_filter)
if exclude_sources is not None:
like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
like_params.extend(exclude_sources)
if role_filter:
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
like_params.extend(role_filter)
like_sql = f"""
SELECT m.id, m.session_id, m.role,
substr(m.content,
max(1, instr(m.content, ?) - 40),
120) AS snippet,
m.content, m.timestamp, m.tool_name,
s.source, s.model, s.started_at AS session_started
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE {' AND '.join(like_where)}
ORDER BY m.timestamp DESC
LIMIT ? OFFSET ?
"""
like_params.extend([limit, offset])
# instr() parameter goes first in the bound list
like_params = [raw_query] + like_params
with self._lock:
like_cursor = self._conn.execute(like_sql, like_params)
matches = [dict(row) for row in like_cursor.fetchall()]
# Add surrounding context (1 message before + after each match).
# Done outside the lock so we don't hold it across N sequential queries.
@@ -1606,32 +1481,16 @@ class SessionDB:
limit: int = 20,
offset: int = 0,
) -> List[Dict[str, Any]]:
"""List sessions, optionally filtered by source.
Returns rows enriched with a computed ``last_active`` column (latest
message timestamp for the session, falling back to ``started_at``),
ordered by most-recently-used first.
"""
select_with_last_active = (
"SELECT s.*, COALESCE(m.last_active, s.started_at) AS last_active "
"FROM sessions s "
"LEFT JOIN ("
"SELECT session_id, MAX(timestamp) AS last_active "
"FROM messages GROUP BY session_id"
") m ON m.session_id = s.id "
)
"""List sessions, optionally filtered by source."""
with self._lock:
if source:
cursor = self._conn.execute(
f"{select_with_last_active}"
"WHERE s.source = ? "
"ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
"SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
(source, limit, offset),
)
else:
cursor = self._conn.execute(
f"{select_with_last_active}"
"ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
"SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
(limit, offset),
)
return [dict(row) for row in cursor.fetchall()]
+3 -30
View File
@@ -7,7 +7,9 @@
perSystem = { pkgs, system, lib, ... }:
let
hermes-agent = inputs.self.packages.${system}.default;
hermesVenv = hermes-agent.hermesVenv;
hermesVenv = pkgs.callPackage ./python.nix {
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
};
configMergeScript = pkgs.callPackage ./configMergeScript.nix { };
@@ -191,35 +193,6 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
echo "ok" > $out/result
'';
# Verify extraPythonPackages PYTHONPATH injection
extra-python-packages = let
testPkg = pkgs.python312Packages.pyfiglet;
hermesWithExtra = hermes-agent.override {
extraPythonPackages = [ testPkg ];
};
in pkgs.runCommand "hermes-extra-python-packages" { } ''
set -e
echo "=== Checking extraPythonPackages PYTHONPATH injection ==="
grep -q "PYTHONPATH" ${hermesWithExtra}/bin/hermes || \
(echo "FAIL: PYTHONPATH not in wrapper"; exit 1)
echo "PASS: PYTHONPATH present in wrapper"
grep -q "${testPkg}" ${hermesWithExtra}/bin/hermes || \
(echo "FAIL: test package path not in PYTHONPATH"; exit 1)
echo "PASS: test package path found in wrapper"
echo "=== Checking base package has no PYTHONPATH ==="
if grep -q "PYTHONPATH" ${hermes-agent}/bin/hermes; then
echo "FAIL: base package should not have PYTHONPATH"; exit 1
fi
echo "PASS: base package clean"
echo "=== All extraPythonPackages checks passed ==="
mkdir -p $out
echo "ok" > $out/result
'';
# ── Config merge + round-trip test ────────────────────────────────
# Tests the merge script (Nix activation behavior) across 7
# scenarios, then verifies Python's load_config() reads correctly.
-186
View File
@@ -1,186 +0,0 @@
# nix/hermes-agent.nix — Overridable Hermes Agent package
#
# callPackage auto-wires nixpkgs args; flake inputs are passed explicitly.
# Users override via: pkgs.hermes-agent.override { extraPythonPackages = [...]; }
{
lib,
stdenv,
makeWrapper,
callPackage,
python312,
nodejs_22,
ripgrep,
git,
openssh,
ffmpeg,
tirith,
# Flake inputs — passed explicitly by packages.nix and overlays.nix
uv2nix,
pyproject-nix,
pyproject-build-systems,
npm-lockfile-fix,
# Overridable parameters
extraPythonPackages ? [ ],
}:
let
hermesVenv = callPackage ./python.nix {
inherit uv2nix pyproject-nix pyproject-build-systems;
};
hermesNpmLib = callPackage ./lib.nix {
inherit npm-lockfile-fix;
};
hermesTui = callPackage ./tui.nix {
inherit hermesNpmLib;
};
hermesWeb = callPackage ./web.nix {
inherit hermesNpmLib;
};
bundledSkills = lib.cleanSourceWith {
src = ../skills;
filter = path: _type: !(lib.hasInfix "/index-cache/" path);
};
runtimeDeps = [
nodejs_22
ripgrep
git
openssh
ffmpeg
tirith
];
runtimePath = lib.makeBinPath runtimeDeps;
sitePackagesPath = python312.sitePackages;
# Walk propagatedBuildInputs to include transitive Python deps in PYTHONPATH.
# Without this, a plugin listing e.g. requests as a dep would fail at runtime
# if requests isn't already in the sealed uv2nix venv.
allExtraPythonPackages = python312.pkgs.requiredPythonModules extraPythonPackages;
pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
uvLockHash =
if builtins.pathExists ../uv.lock then
builtins.hashString "sha256" (builtins.readFile ../uv.lock)
else
"none";
in
stdenv.mkDerivation {
pname = "hermes-agent";
version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version;
dontUnpack = true;
dontBuild = true;
nativeBuildInputs = [ makeWrapper ];
installPhase = ''
runHook preInstall
mkdir -p $out/share/hermes-agent $out/bin
cp -r ${bundledSkills} $out/share/hermes-agent/skills
cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
mkdir -p $out/ui-tui
cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
${lib.concatMapStringsSep "\n"
(name: ''
makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
--suffix PATH : "${runtimePath}" \
--set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
--set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
--set HERMES_TUI_DIR $out/ui-tui \
--set HERMES_PYTHON ${hermesVenv}/bin/python3 \
--set HERMES_NODE ${nodejs_22}/bin/node \
${lib.optionalString (extraPythonPackages != [ ]) ''--suffix PYTHONPATH : "${pythonPath}"''}
'')
[
"hermes"
"hermes-agent"
"hermes-acp"
]
}
${lib.optionalString (extraPythonPackages != [ ]) ''
echo "=== Checking for plugin/core package collisions ==="
${hermesVenv}/bin/python3 -c "
import pathlib, sys, re
def canonical(name):
return re.sub(r'[-_.]+', '-', name).lower()
# Collect core venv package names
core = set()
venv_sp = pathlib.Path('${hermesVenv}/${sitePackagesPath}')
for di in venv_sp.glob('*.dist-info'):
meta = di / 'METADATA'
if meta.exists():
for line in meta.read_text().splitlines():
if line.startswith('Name:'):
core.add(canonical(line.split(':', 1)[1].strip()))
break
# Check each extra package for collisions
extras_dirs = [${lib.concatMapStringsSep ", " (p: "'${toString p}'") allExtraPythonPackages}]
for edir in extras_dirs:
sp = pathlib.Path(edir) / '${sitePackagesPath}'
if not sp.exists():
continue
for di in sp.glob('*.dist-info'):
meta = di / 'METADATA'
if not meta.exists():
continue
for line in meta.read_text().splitlines():
if line.startswith('Name:'):
pkg = canonical(line.split(':', 1)[1].strip())
if pkg in core:
print(f'ERROR: plugin package \"{pkg}\" collides with a package in hermes sealed venv', file=sys.stderr)
print(f' from: {di}', file=sys.stderr)
print(f' Remove this dependency from extraPythonPackages.', file=sys.stderr)
sys.exit(1)
break
print('No collisions found.')
"
echo "=== No collisions ==="
''}
runHook postInstall
'';
passthru = {
inherit hermesTui hermesWeb hermesNpmLib hermesVenv;
devShellHook = ''
STAMP=".nix-stamps/hermes-agent"
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "hermes-agent: installing Python dependencies..."
uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
source .venv/bin/activate
uv pip install -e ".[all]"
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
[ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
mkdir -p .nix-stamps
echo "$STAMP_VALUE" > "$STAMP"
else
source .venv/bin/activate
export HERMES_PYTHON=${hermesVenv}/bin/python3
fi
'';
};
meta = with lib; {
description = "AI agent with advanced tool-calling capabilities";
homepage = "https://github.com/NousResearch/hermes-agent";
mainProgram = "hermes";
license = licenses.mit;
platforms = platforms.unix;
};
}
+6 -81
View File
@@ -28,8 +28,6 @@
let
cfg = config.services.hermes-agent;
effectivePackage = if cfg.extraPythonPackages == [ ] then cfg.package
else cfg.package.override { inherit (cfg) extraPythonPackages; };
hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;
# Deep-merge config type (from 0xrsydn/nix-hermes-agent)
@@ -458,52 +456,6 @@
description = "Extra packages available on PATH.";
};
extraPlugins = mkOption {
type = types.listOf types.package;
default = [ ];
description = ''
Directory-based plugin packages to symlink into the hermes plugins
directory. Each package should contain a plugin.yaml and __init__.py
at its root. Hermes discovers these automatically on startup.
'';
example = literalExpression ''
[
(pkgs.fetchFromGitHub {
owner = "stephenschoettler";
repo = "hermes-lcm";
name = "hermes-lcm";
rev = "v0.7.0";
hash = "sha256-...";
})
]
'';
};
extraPythonPackages = mkOption {
type = types.listOf types.package;
default = [ ];
description = ''
Python packages to add to PYTHONPATH for entry-point plugin discovery.
These are pip-packaged plugins that register via the
hermes_agent.plugins entry-point group. Each package must be built
with the same Python interpreter as hermes (python312).
'';
example = literalExpression ''
[
(pkgs.python312Packages.buildPythonPackage {
pname = "rtk-hermes";
version = "1.0.0";
src = pkgs.fetchFromGitHub {
owner = "ogallotti";
repo = "rtk-hermes";
rev = "main";
hash = "sha256-...";
};
})
]
'';
};
restart = mkOption {
type = types.str;
default = "always";
@@ -618,7 +570,7 @@
# so interactive shells share state (sessions, skills, cron) with the
# gateway service instead of creating a separate ~/.hermes/.
(lib.mkIf cfg.addToSystemPackages {
environment.systemPackages = [ effectivePackage ];
environment.systemPackages = [ cfg.package ];
environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
})
@@ -629,16 +581,6 @@
});
})
# ── Assertions ─────────────────────────────────────────────────────
{
assertions = let
names = map lib.getName cfg.extraPlugins;
in [{
assertion = (lib.length names) == (lib.length (lib.unique names));
message = "services.hermes-agent.extraPlugins: duplicate plugin names detected: ${toString names}. If using fetchFromGitHub, set name = \"plugin-name\" to disambiguate.";
}];
}
# ── Warnings ──────────────────────────────────────────────────────
(lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) {
warnings = [
@@ -660,7 +602,6 @@
"d ${cfg.stateDir}/.hermes/sessions 2770 ${cfg.user} ${cfg.group} - -"
"d ${cfg.stateDir}/.hermes/logs 2770 ${cfg.user} ${cfg.group} - -"
"d ${cfg.stateDir}/.hermes/memories 2770 ${cfg.user} ${cfg.group} - -"
"d ${cfg.stateDir}/.hermes/plugins 2770 ${cfg.user} ${cfg.group} - -"
"d ${cfg.stateDir}/home 0750 ${cfg.user} ${cfg.group} - -"
"d ${cfg.workingDirectory} 2770 ${cfg.user} ${cfg.group} - -"
];
@@ -682,7 +623,7 @@
find ${cfg.stateDir}/.hermes -maxdepth 1 \
\( -name "*.db" -o -name "*.db-wal" -o -name "*.db-shm" -o -name "SOUL.md" \) \
-exec chmod g+rw {} + 2>/dev/null || true
for _subdir in cron sessions logs memories plugins; do
for _subdir in cron sessions logs memories; do
mkdir -p "${cfg.stateDir}/.hermes/$_subdir"
chown ${cfg.user}:${cfg.group} "${cfg.stateDir}/.hermes/$_subdir"
chmod 2770 "${cfg.stateDir}/.hermes/$_subdir"
@@ -791,22 +732,6 @@ HERMES_NIX_ENV_EOF
${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
install -o ${cfg.user} -g ${cfg.group} -m 0640 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
'') cfg.documents)}
# ── Declarative plugins ─────────────────────────────────────────
# Remove stale managed symlinks (plugins removed from config)
find ${cfg.stateDir}/.hermes/plugins -maxdepth 1 -type l -name 'nix-managed-*' -delete 2>/dev/null || true
${lib.concatStringsSep "\n" (map (plugin:
let
name = lib.getName plugin;
in ''
if [ ! -f "${plugin}/plugin.yaml" ]; then
echo "ERROR: extraPlugins entry '${plugin}' has no plugin.yaml" >&2
exit 1
fi
ln -sfn ${plugin} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
chown -h ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
'') cfg.extraPlugins)}
'';
}
@@ -837,7 +762,7 @@ HERMES_NIX_ENV_EOF
# reads them at Python startup — no systemd EnvironmentFile needed.
ExecStart = lib.concatStringsSep " " ([
"${effectivePackage}/bin/hermes"
"${cfg.package}/bin/hermes"
"gateway"
] ++ cfg.extraArgs);
@@ -860,7 +785,7 @@ HERMES_NIX_ENV_EOF
};
path = [
effectivePackage
cfg.package
pkgs.bash
pkgs.coreutils
pkgs.git
@@ -885,11 +810,11 @@ HERMES_NIX_ENV_EOF
preStart = ''
# Stable symlinks — container references these, not store paths directly
ln -sfn ${effectivePackage} ${cfg.stateDir}/current-package
ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint
# GC roots so nix-collect-garbage doesn't remove store paths in use
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${effectivePackage} 2>/dev/null || true
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true
# Check if container needs (re)creation
-10
View File
@@ -1,10 +0,0 @@
# nix/overlays.nix — Expose pkgs.hermes-agent for external NixOS configs
{ inputs, ... }:
{
flake.overlays.default = final: _: {
hermes-agent = final.callPackage ./hermes-agent.nix {
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
npm-lockfile-fix = inputs.npm-lockfile-fix.packages.${final.stdenv.hostPlatform.system}.default;
};
};
}
+107 -6
View File
@@ -4,19 +4,120 @@
perSystem =
{ pkgs, inputs', ... }:
let
hermesAgent = pkgs.callPackage ./hermes-agent.nix {
hermesVenv = pkgs.callPackage ./python.nix {
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
};
hermesNpmLib = pkgs.callPackage ./lib.nix {
npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
};
hermesTui = pkgs.callPackage ./tui.nix {
inherit hermesNpmLib;
};
# Import bundled skills, excluding runtime caches
bundledSkills = pkgs.lib.cleanSourceWith {
src = ../skills;
filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
};
hermesWeb = pkgs.callPackage ./web.nix {
inherit hermesNpmLib;
};
runtimeDeps = with pkgs; [
nodejs_22
ripgrep
git
openssh
ffmpeg
tirith
];
runtimePath = pkgs.lib.makeBinPath runtimeDeps;
# Lockfile hashes for dev shell stamps
pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
uvLockHash =
if builtins.pathExists ../uv.lock then
builtins.hashString "sha256" (builtins.readFile ../uv.lock)
else
"none";
in
{
packages = {
default = hermesAgent;
tui = hermesAgent.hermesTui;
web = hermesAgent.hermesWeb;
default = pkgs.stdenv.mkDerivation {
pname = "hermes-agent";
version = (fromTOML (builtins.readFile ../pyproject.toml)).project.version;
fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles {
packages = [ hermesAgent.hermesTui hermesAgent.hermesWeb ];
dontUnpack = true;
dontBuild = true;
nativeBuildInputs = [ pkgs.makeWrapper ];
installPhase = ''
runHook preInstall
mkdir -p $out/share/hermes-agent $out/bin
cp -r ${bundledSkills} $out/share/hermes-agent/skills
cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
# copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
mkdir -p $out/ui-tui
cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
${pkgs.lib.concatMapStringsSep "\n"
(name: ''
makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
--suffix PATH : "${runtimePath}" \
--set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
--set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
--set HERMES_TUI_DIR $out/ui-tui \
--set HERMES_PYTHON ${hermesVenv}/bin/python3 \
--set HERMES_NODE ${pkgs.nodejs_22}/bin/node
'')
[
"hermes"
"hermes-agent"
"hermes-acp"
]
}
runHook postInstall
'';
passthru.devShellHook = ''
STAMP=".nix-stamps/hermes-agent"
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "hermes-agent: installing Python dependencies..."
uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
source .venv/bin/activate
uv pip install -e ".[all]"
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
[ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
mkdir -p .nix-stamps
echo "$STAMP_VALUE" > "$STAMP"
else
source .venv/bin/activate
export HERMES_PYTHON=${hermesVenv}/bin/python3
fi
'';
meta = with pkgs.lib; {
description = "AI agent with advanced tool-calling capabilities";
homepage = "https://github.com/NousResearch/hermes-agent";
mainProgram = "hermes";
license = licenses.mit;
platforms = platforms.unix;
};
};
tui = hermesTui;
web = hermesWeb;
fix-lockfiles = hermesNpmLib.mkFixLockfiles {
packages = [ hermesTui hermesWeb ];
};
};
};
+1 -2
View File
@@ -7,7 +7,6 @@
pyproject-nix,
pyproject-build-systems,
stdenv,
dependency-groups ? [ "all" ],
}:
let
workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
@@ -97,5 +96,5 @@ let
]);
in
pythonSet.mkVirtualEnv "hermes-agent-env" {
hermes-agent = dependency-groups;
hermes-agent = [ "all" ];
}
-1
View File
@@ -17,7 +17,6 @@ pkgs.buildNpmPackage (npm // {
inherit src npmDeps version;
doCheck = false;
npmFlags = [ "--legacy-peer-deps" ];
installPhase = ''
runHook preInstall
@@ -1,7 +1,7 @@
---
name: touchdesigner-mcp
description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
version: 1.1.0
version: 1.0.0
author: kshitijk4poor
license: MIT
metadata:
@@ -204,9 +204,8 @@ win.par.winopen.pulse()
| `td_input_clear` | Stop input automation |
| `td_op_screen_rect` | Get screen coords of a node |
| `td_click_screen_point` | Click a point in a screenshot |
| `td_screen_point_to_global` | Convert screenshot pixel to absolute screen coords |
The table above covers the 32 tools used in typical creative workflows. The remaining 4 tools (`td_project_quit`, `td_test_session`, `td_dev_log`, `td_clear_dev_log`) are admin/dev-mode utilities — see `references/mcp-tools.md` for the full 36-tool reference with complete parameter schemas.
See `references/mcp-tools.md` for full parameter schemas.
## Key Implementation Rules
@@ -333,21 +332,6 @@ See `references/network-patterns.md` for complete build scripts + shader code.
| `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
| `references/python-api.md` | TD Python: op(), scripting, extensions |
| `references/troubleshooting.md` | Connection diagnostics, debugging |
| `references/glsl.md` | GLSL uniforms, built-in functions, shader templates |
| `references/postfx.md` | Post-FX: bloom, CRT, chromatic aberration, feedback glow |
| `references/layout-compositor.md` | HUD layout patterns, panel grids, BSP-style layouts |
| `references/operator-tips.md` | Wireframe rendering, feedback TOP setup |
| `references/geometry-comp.md` | Geometry COMP: instancing, POP vs SOP, morphing |
| `references/audio-reactive.md` | Audio band extraction, beat detection, envelope following |
| `references/animation.md` | LFOs, timers, keyframes, easing, expression-driven motion |
| `references/midi-osc.md` | MIDI/OSC controllers, TouchOSC, multi-machine sync |
| `references/particles.md` | POPs and legacy particleSOP — emission, forces, collisions |
| `references/projection-mapping.md` | Multi-window output, corner pin, mesh warp, edge blending |
| `references/external-data.md` | HTTP, WebSocket, MQTT, Serial, TCP, webserverDAT |
| `references/panel-ui.md` | Custom params, panel COMPs, button/slider/field, panelExecuteDAT |
| `references/replicator.md` | replicatorCOMP — data-driven cloning, layouts, callbacks |
| `references/dat-scripting.md` | Execute DAT family — chop/dat/parameter/panel/op/executeDAT |
| `references/3d-scene.md` | Lighting rigs, shadows, IBL/cubemaps, multi-camera, PBR |
| `scripts/setup.sh` | Automated setup script |
---
@@ -143,20 +143,20 @@ Creating nodes with the same names you just destroyed in the SAME script causes
```python
# td_execute_python:
for c in list(root.children):
if c.valid and c.name.startswith('my_'):
if c.valid and c.name.startswith('promo_'):
c.destroy()
# ... then create my_audio, my_shader etc. in same script → CRASHES
# ... then create promo_audio, promo_shader etc. in same script → CRASHES
```
**CORRECT (two separate calls):**
```python
# Call 1: td_execute_python — clean only
for c in list(root.children):
if c.valid and c.name.startswith('my_'):
if c.valid and c.name.startswith('promo_'):
c.destroy()
# Call 2: td_execute_python — build (separate MCP call)
audio = root.create(audiofileinCHOP, 'my_audio')
audio = root.create(audiofileinCHOP, 'promo_audio')
# ... rest of build
```
@@ -361,13 +361,21 @@ win.par.winopen.pulse()
`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
### 32. Audio-reactive GLSL: TD-side pipeline
### 32. Audio-reactive GLSL: dual-layer sync pipeline
For audio-synced visuals: AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
For audio-synced visuals, use BOTH layers for maximum effect:
**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
**Key gotcha:** AudioFileIn must be cued (`par.cue=True``par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
### 33. twozero MCP: prefer native tools
### 33. twozero MCP: benchmark and prefer native tools
Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
**Always prefer native MCP tools over td_execute_python:**
- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
@@ -417,16 +425,13 @@ TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still
**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
**b) Audio device CHOP blocking the main thread (MOST COMMON).** An `audiodeviceoutCHOP` with `active=True` can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. **`volume=0` is NOT sufficient** — the audio driver still blocks. Fix: `par.active = False`. This completely stops the CHOP from interacting with the audio driver. If you need audio monitoring, enable it only during short playback checks, then disable before recording.
Verified April 2026: disabling `audiodeviceoutCHOP` (`active=False`) restored FPS from 0 to 60 instantly, recovering from 2348% budget usage to 0.1%.
**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
Diagnostic sequence when FPS=0:
1. `td_get_perf` — check if any op has extreme CPU/s (audiodeviceoutCHOP is the usual suspect)
2. If audiodeviceoutCHOP shows >100ms/s: set `par.active = False` immediately
3. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
4. Check for other blocking CHOPs (audiodevin, etc.)
5. Toggle play state (spacebar, or check if absTime.seconds is advancing)
1. `td_get_perf` — check if any op has extreme CPU/s
2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
3. Check for blocking CHOPs (audioout, audiodevin, etc.)
4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
### 39. Recording while FPS=0 produces empty or near-empty files
@@ -479,20 +484,9 @@ If `td_write_dat` fails, fall back to `td_execute_python`:
op("/project1/shader_code").text = shader_string
```
### 42. td_execute_python DOES return print() output — use it for debugging
### 42. td_execute_python does NOT return stdout or print() output
`print()` statements in `td_execute_python` scripts appear in the MCP response text. This is the correct way to read values back from scripts. The response format is: printed output first, then `[fps X.X/X] [N err/N warn]` on a separate line.
However, the `result` variable (if you set one) does NOT appear verbatim — use `print()` for anything you need to read back:
```python
# CORRECT — appears in response:
print('value:', some_value)
# WRONG — not reliably in response:
result = some_value
```
For structured data, use dedicated inspection tools (`td_get_operator_info`, `td_read_chop`) which return clean JSON.
Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
@@ -502,203 +496,13 @@ clean = response_text.rsplit('[fps', 1)[0]
data = json.loads(clean)
```
### 44. td_get_screenshot is unreliable — returns `{"status": "pending"}` and may never deliver
### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file may appear later — or may NEVER appear at all. In testing (April 2026), screenshots stayed "pending" indefinitely with no file written to disk, even though the shader was cooking at 8-30fps.
Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
**Do NOT rely on `td_get_screenshot` for frame capture.** For reliable frame capture, use MovieFileOut recording + ffmpeg frame extraction:
```bash
# Record in TD first, then extract frames:
ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
```
If you need a quick visual check, `td_get_screenshot` is worth trying (it sometimes works), but always have the recording fallback. There is no callback or completion notification — if the file doesn't appear after 5-10 seconds, it's not coming.
### 45. Heavy shaders cook below record FPS — many duplicate frames in output
A raymarched GLSL shader may only cook at 8-15fps even though MovieFileOut records at 60fps. The recording still works (TD writes the last-cooked frame each time), but the resulting file has many duplicate frames. When extracting frames for post-processing, use a lower fps filter to avoid redundant frames:
```bash
# Extract at 24fps from a 60fps recording of an 8fps shader:
ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
```
Check actual cook FPS with `td_get_perf` before committing to a long recording. If FPS < 15, the output will be a slideshow regardless of the recording codec.
### 46. Recording duration is manual — no auto-stop at audio end
### 45. Recording duration is manual — no auto-stop at audio end
MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
```bash
ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
```
### 47. AudioFileIn par.index stays at 0 in sequential mode — not a reliable progress indicator
When `audiofileinCHOP` is in `playmode=2` (sequential), `par.index.eval()` returns 0.0 even while audio IS actively playing and the spectrum IS receiving data. Do NOT use `par.index` to check playback progress in sequential mode.
**How to verify audio is actually playing:**
- Read the spectrum CHOP values via `td_read_chop` — if values are non-zero and CHANGE between reads 1-2s apart, audio is flowing
- Read the audio CHOP itself: non-zero waveform samples confirm the file is loaded and playing
- `par.play.eval()` returning True is necessary but NOT sufficient — it can be True with no audio flowing if cue is stuck
### 48. GLSL shader whiteout — clamp audio spectrum values in the shader
Raw spectrum values multiplied by Math CHOP gain can produce very large numbers (5-20+) that blow out the shader's lighting, producing flat white/grey. The shader MUST clamp audio inputs:
```glsl
float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
bass = clamp(bass, 0.0, 3.0); // prevent whiteout
mids = clamp(mids, 0.0, 3.0);
hi = clamp(hi, 0.0, 3.0);
```
Discovered when gain=10 produced ~0.13 (too dark) during quiet passages but gain=50 produced ~9.4 (total whiteout). Fix: keep gain=10, use `highfreqboost=3.0` on AudioSpectrum, clamp in shader.
### 49. Non-Commercial TD records at 1280x1280 (square) — always crop in post
Even with `resolutionw=1280, resolutionh=720` on the GLSL TOP, Non-Commercial TD may output 1280x1280 to MovieFileOut. Always check dimensions with ffprobe and crop during extraction:
```bash
# Center-crop from 1280x1280 to 1280x720:
ffmpeg -y -i /tmp/td_output.mov -t 25 -r 24 -vf "crop=1280:720:0:280" /tmp/frames/frame_%06d.png
```
Large ProRes files (1-2GB) at 1280x1280 decode at ~3fps, so 25s of footage takes ~3 minutes to extract.
## Advanced Patterns (pitfalls 51+)
### 51. Connection syntax: use `outputConnectors`/`inputConnectors`, NOT `outputs`/`inputs`
```python
# CORRECT
src.outputConnectors[0].connect(dst.inputConnectors[0])
# WRONG — raises IndexError or AttributeError
src.outputs[0].connect(dst.inputs[0])
```
For feedback TOP, BOTH are required:
```python
fb.par.top = target.path
target.outputConnectors[0].connect(fb.inputConnectors[0])
```
### 52. moviefileoutTOP `par.input` doesn't resolve via Python in TD 2025.32460
Setting `moviefileoutTOP.par.input` programmatically does NOT work. All forms fail silently with "Not enough sources specified."
**Workaround — frame capture + ffmpeg:**
```python
out = op('/project1/out')
for i in range(300):
delay = i * 5
run(f"op('/project1/out').save('/tmp/frames/f_{i:04d}.png')", delayFrames=delay)
# Then: ffmpeg -y -framerate 30 -i /tmp/frames/f_%04d.png -c:v prores -pix_fmt yuv420p /tmp/output.mov
```
### 53. Batch frame capture — use `me.fetch`/`me.store` for state across calls
```python
start = me.fetch('cap_frame', 0)
for i in range(60):
frame = start + i
op('/project1/out').save(f'/tmp/frames/frame_{str(frame).zfill(4)}.png')
me.store('cap_frame', start + 60)
```
Call 5 times for 300 frames. Each picks up where the last left off.
### 54. GLSL TOP pixel shader requirements in TD 2025
```glsl
// REQUIRED — declare output
layout(location = 0) out vec4 fragColor;
void main() {
vec3 col = vec3(1.0, 0.0, 0.0);
fragColor = TDOutputSwizzle(vec4(col, 1.0));
}
```
**Built-in uniforms available:** `uTDOutputInfo.res` (vec4), `uTDTimeInfo.seconds`, `sTD2DInputs[N]`.
**Auto-created DATs:** `name_pixel`, `name_vertex`, `name_compute` textDATs with example code.
### 55. TOP.save() doesn't advance time — identical frames in tight loops
`.save()` captures the current cooked frame without advancing TD's timeline:
```python
# WRONG — all frames identical
for i in range(300):
op('/project1/out').save(f'frames/f_{i:04d}.png')
# CORRECT — use run() with delayFrames
for i in range(300):
delay = i * 5
run(f"op('/project1/out').save('frames/f_{i:04d}.png')", delayFrames=delay)
```
**NEVER use `time.sleep()` in TD** — it blocks the main thread and freezes the UI.
### 56. Feedback loop masks input changes — force switch during capture
With feedback TOP opacity 0.7+, the buffer dominates output. Switching input produces nearly identical frames.
**Fix — force switch index per capture:**
```python
for i in range(300):
idx = (i // 8) % num_inputs
delay = i * 5
run(f"op('/project1/vswitch').par.index={idx}; op('/project1/out').save('f_{i:04d}.png')", delayFrames=delay)
```
### 57. Large td_execute_python scripts fail — split into incremental calls
10+ operator creations in one script cause timing issues. Split into 2-4 calls of 2-4 operators each. Within one call, `create()` handles work immediately. Across calls, `op('name')` may return `None` if the previous call hasn't committed.
### 58. MCP instance reconnection after project.load()
`project.load(path)` changes the PID. After loading, call `td_list_instances()` and use the new `target_instance`. For TOX files: import as child comp instead (doesn't disconnect).
### 59. TOX reverse-engineering workflow
```python
comp = root.loadTox(r'/path/to/file.tox')
comp.name = '_study_comp'
for child in comp.children:
print(f'{child.name} ({child.OPType})')
# Use td_get_operators_info, td_read_dat, check custom params
```
### 60. sliderCOMP naming — TD appends suffix
TD auto-renames: `slider_brightness``slider_brightness1`. Always check names after creation.
### 61. create() requires full operator type suffix
```python
# CORRECT
proj.create('audiofileinCHOP', 'audio_in')
proj.create('glslTOP', 'render')
# WRONG — raises "Unknown operator type"
proj.create('audiofilein', 'audio_in')
proj.create('glsl', 'render')
```
### 62. Reparenting COMPs — use copyOPs, not connect()
Moving COMPs with `inputCOMPConnectors[0].connect()` fails. Use copy + destroy:
```python
copied = target.copyOPs([source]) # preserves internal wiring
source.destroy()
# Re-wire external connections manually after the move
```
### 63. Slider wiring — expressionCHOP with op() expressions crashes TD
```python
# CRASHES TD — don't do this
echop = root.create(expressionCHOP, 'slider_ctrl')
echop.par.chan0expr = 'op("/project1/controls/slider_brightness1").par.value0'
# WORKING — parameterCHOP as bridge
pchop = root.create(parameterCHOP, 'slider_vals')
pchop.par.ops = '/project1/controls'
pchop.par.parameters = 'value0'
pchop.par.custom = True
pchop.par.builtin = False
```
-131
View File
@@ -1,131 +0,0 @@
# google_meet plugin
Let the hermes agent join a Google Meet call, transcribe it, optionally speak
in it, and do the followup work afterwards.
## What ships
| Version | What | Status |
|---|---|---|
| v1 | Transcribe-only: Playwright joins Meet, scrapes captions to transcript file | ✓ ships by default |
| v2 | Realtime duplex audio: bot speaks in-call via OpenAI Realtime + BlackHole/PulseAudio null-sink | ✓ opt in with `mode='realtime'` |
| v3 | Remote node host: run the bot on a different machine than the gateway | ✓ opt in with `node='<name>'` |
## Architecture
```
┌─ gateway (Linux box, where hermes runs) ────────────────────────────┐
│ │
│ agent → meet_join(url, mode='realtime', node='my-mac') │
│ │ │
│ └─ NodeClient ─── ws ────┐ │
│ │ │
└──────────────────────────────────┼───────────────────────────────────┘
│ wss (token auth)
┌─ node host (user's Mac, signed-in Chrome lives here) ───────────────┐
│ │
│ NodeServer (from `hermes meet node run`) │
│ │ │
│ ├─ start_bot → process_manager.start() → spawns meet_bot │
│ │ │
│ └─ meet_bot (Playwright) │
│ ├─ Chromium → meet.google.com │
│ ├─ caption scraper → transcript.txt │
│ └─ (realtime mode only) RealtimeSpeaker thread │
│ ↓ │
│ OpenAI Realtime WS → speaker.pcm │
│ ↓ │
│ paplay → null-sink ← Chrome fake mic │
│ │
└──────────────────────────────────────────────────────────────────────┘
```
Without v3: the whole right column runs on the gateway machine.
Without v2: the "realtime" path is skipped; transcribe runs alone.
## Files
| Path | Purpose |
|---|---|
| `plugin.yaml` | manifest |
| `__init__.py` | `register(ctx)` — registers 5 tools + `on_session_end` hook + `hermes meet` CLI |
| `meet_bot.py` | Playwright bot subprocess (standalone, `python -m plugins.google_meet.meet_bot`) |
| `process_manager.py` | local bot lifecycle + `enqueue_say` |
| `tools.py` | agent-facing tools + node-routing helper |
| `cli.py` | `hermes meet setup / auth / join / status / transcript / say / stop / node ...` |
| `audio_bridge.py` | v2: PulseAudio null-sink (Linux) + BlackHole probe (macOS) |
| `realtime/openai_client.py` | v2: `RealtimeSession` + `RealtimeSpeaker` (file-queue → OpenAI Realtime WS → PCM) |
| `node/protocol.py` | v3: message envelope + validation |
| `node/registry.py` | v3: `$HERMES_HOME/workspace/meetings/nodes.json` |
| `node/server.py` | v3: `NodeServer` (runs on host machine) |
| `node/client.py` | v3: `NodeClient` (used by tool handlers + CLI on gateway) |
| `node/cli.py` | v3: `hermes meet node {run,list,approve,remove,status,ping}` |
| `SKILL.md` | agent usage guide |
## Local quick start
```bash
hermes plugins enable google_meet
hermes meet install # pip + Chromium
hermes meet setup # preflight
hermes meet auth # optional
hermes meet join https://meet.google.com/abc-defg-hij # transcribe
```
## Realtime mode
Linux (preferred, most automated):
```bash
hermes meet install --realtime # installs pulseaudio-utils
echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
# then from the agent or CLI:
hermes meet say "Good morning everyone, I'm the note-taker bot."
```
macOS:
```bash
hermes meet install --realtime # runs: brew install blackhole-2ch ffmpeg
# then — manually! — open System Settings → Sound → Input → BlackHole 2ch
echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
```
On macOS, hermes will **not** switch your system audio input automatically — the
user has to do it. This is deliberate: switching default input on a whim would
be a surprising side effect.
## Remote node host
On the node machine (e.g. user's Mac with a signed-in Chrome):
```bash
pip install playwright websockets
python -m playwright install chromium
hermes plugins enable google_meet
hermes meet node run --display-name my-mac --host 0.0.0.0 --port 18789
# prints the bearer token on first run; copy it
```
On the gateway:
```bash
hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
hermes meet node ping my-mac
# now any meet_* tool call accepts node='my-mac' (or 'auto')
```
## Safety
- URL gate: only `https://meet.google.com/abc-defg-hij`, `/new`, `/lookup/<id>`.
- No calendar scanning, no auto-dial, no auto-consent announcement.
- Node server uses bearer-token auth; no key exchange, no TLS termination
built in — run it on a LAN or behind a reverse proxy you trust.
- One active meeting per (gateway, node) pair. A second `meet_join` leaves the first.
- `meet_say` refuses unless the active meeting was started with `mode='realtime'`.
## Out of scope
- **Calendar scanning** — deliberately not implemented. Join URLs must be explicit.
- **Multi-tenant node sharing** — a node serves one gateway at a time.
- **Windows** — audio bridging isn't tested; `register()` no-ops on Windows.
- **System audio input switching on macOS** — user responsibility, not the bot's.
-148
View File
@@ -1,148 +0,0 @@
---
name: google_meet
description: Join a Google Meet call, transcribe live captions, optionally speak in realtime, and do the followup work afterwards. Use when the user asks the agent to sit in on a meeting, take notes, summarize, respond in-call, or action items from it.
version: 0.2.0
platforms:
- linux
- macos
metadata:
hermes:
tags: [meetings, google-meet, transcription, realtime-voice]
---
# google_meet
## When to use
The user says any of:
- "join my Meet at <url>"
- "take notes on this meeting"
- "summarize the meeting and send followups"
- "sit in on my standup"
- "be a bot in this call and speak up when X"
## Two modes
| Mode | What the bot does |
|---|---|
| `transcribe` (default) | Joins, enables captions, scrapes a transcript. Listen-only. |
| `realtime` | Same as transcribe PLUS speaks into the meeting via OpenAI Realtime. The agent calls `meet_say(text)` and the bot's voice comes out of the call. |
Pick `realtime` only when the user actually wants the agent to speak. It costs real money (OpenAI Realtime is pay-per-audio-minute) and requires a virtual audio device set up on the machine running the bot.
## Two locations
| Location | When |
|---|---|
| Local (default) | Gateway machine runs the Playwright bot directly. |
| Remote node (`node="<name>"`) | Bot runs on a different machine that has a signed-in Chrome and (for realtime) a configured audio bridge. Useful when the gateway runs on a headless Linux box but the user's real signed-in Chrome lives on their Mac. |
## Prerequisites the user must handle once
Easiest path — run the built-in installer:
```bash
hermes plugins enable google_meet
hermes meet install # pip deps + Chromium (transcribe only)
hermes meet install --realtime # + pulseaudio-utils / brew blackhole+ffmpeg
hermes meet auth # optional; skips guest-lobby wait
hermes meet setup # preflight checks
```
`hermes meet install --realtime` prompts before running `sudo apt-get` (Linux)
or `brew install` (macOS). Pass `--yes` to skip the prompt. It will NOT touch
your macOS default-input setting — you have to select BlackHole 2ch in
System Settings yourself before starting a realtime meeting.
Or do it manually:
```bash
pip install playwright websockets && python -m playwright install chromium
# For realtime mode, additionally:
# Linux: sudo apt install pulseaudio-utils
# macOS: brew install blackhole-2ch ffmpeg
# → System Settings → Sound → Input → BlackHole 2ch
# Then set OPENAI_API_KEY or HERMES_MEET_REALTIME_KEY in ~/.hermes/.env
```
For a remote node:
```bash
# on the user's Mac (where Chrome is signed in):
pip install playwright websockets && python -m playwright install chromium
hermes plugins enable google_meet
hermes meet node run --display-name my-mac # persistent server
# copy the printed token
# on the gateway:
hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
hermes meet node ping my-mac # confirm reachable
```
Run `hermes meet setup` to preflight local prereqs.
## Flow
1. **Join** — call `meet_join(url=..., mode=..., node=...)`. Returns immediately.
2. **Announce yourself** — no auto-consent. Say (in whatever channel the user is watching): "A Hermes agent bot is in this call taking notes."
3. **Poll**`meet_status()` for liveness, `meet_transcript(last=20)` for recent captions. Don't re-read the whole transcript every turn.
4. **Speak (realtime only)**`meet_say(text="...")` queues text for TTS. The speech lags by ~2s. Don't spam it.
5. **Leave**`meet_leave()` when done, or set `duration="30m"` on `meet_join` for auto-leave.
6. **Follow up** — read `meet_transcript()` in full, summarize, and use regular tools to send the recap, file issues, schedule followups.
## Tool reference
| Tool | Parameters | Use |
|---|---|---|
| `meet_join` | `url`, `mode?`, `guest_name?`, `duration?`, `headed?`, `node?` | Start bot |
| `meet_status` | `node?` | Liveness + progress |
| `meet_transcript` | `last?`, `node?` | Read captions |
| `meet_leave` | `node?` | Close bot |
| `meet_say` | `text`, `node?` | Speak in realtime meeting |
`node?` on all tools: pass a registered node name (or `"auto"` for the sole node) to operate a remote bot instead of a local one. Omit for local.
## Important limits
- Captions are only as good as Google Meet's live captions. English-biased, lossy on overlapping speakers.
- Guest mode sits in the lobby until a host admits. Warn the user; `hermes meet auth` avoids this.
- **Lobby timeout**: if the host doesn't admit the bot within 5 minutes (configurable via `HERMES_MEET_LOBBY_TIMEOUT` env), the bot leaves and `meet_status` reports `leaveReason: "lobby_timeout"`.
- **One active meeting per install per location.** A second `meet_join` leaves the first.
- **Windows not supported.**
- Realtime mode needs a virtual audio device. If the audio bridge setup fails, the bot falls back to transcribe mode and flags it in `meet_status().error`.
- `meet_say` requires `mode='realtime'` on the originating `meet_join`. Calling it against a transcribe-mode meeting returns a clear error.
- **Barge-in is best-effort.** When a caption arrives attributed to a real participant while the bot is generating audio, the bot sends `response.cancel` to OpenAI Realtime. Captions take ~500ms to show up, so the bot will talk over the first second or so of a human interruption.
## Status dict reference
`meet_status()` returns (subset shown, there are more):
| Key | Meaning |
|---|---|
| `inCall` | Past the lobby. False while waiting for admission. |
| `lobbyWaiting` | Clicked "Ask to join", waiting on host. |
| `joinAttemptedAt` / `joinedAt` | Timestamps for lobby-click and actual admission. |
| `captioning` | Caption observer is installed. |
| `transcriptLines` / `lastCaptionAt` | Transcript progress. |
| `realtime` / `realtimeReady` | Realtime mode provisioned / WS connected. |
| `realtimeDevice` | Audio device name the bot is feeding (e.g. `hermes_meet_src`). |
| `audioBytesOut` / `lastAudioOutAt` | How much PCM the OpenAI session has produced. |
| `lastBargeInAt` | Timestamp of the most recent `response.cancel` sent. |
| `leaveReason` | `duration_expired`, `lobby_timeout`, `denied`, `page_closed`, or null. |
| `error` | Last error (soft — bot may still be running). |
## Transcript location
Local:
```
$HERMES_HOME/workspace/meetings/<meeting-id>/transcript.txt
```
Remote node: transcript lives on the node host's disk. Use `meet_transcript(node=...)` to read it over RPC.
## Safety
- URL regex: only `https://meet.google.com/...` URLs pass.
- No calendar scanning. No auto-dial.
- Remote nodes use bearer-token auth; tokens are generated on the node (32 hex chars, persisted in `$HERMES_HOME/workspace/meetings/node_token.json`) and must be copied to the gateway via `hermes meet node approve`.
- `meet_say` text is rate-limited by the OpenAI Realtime session; spam-protection is the bot's problem, not yours, but still — don't queue hundreds of lines.
-103
View File
@@ -1,103 +0,0 @@
"""google_meet plugin — let the agent join a Meet call, transcribe it, follow up.
v1: transcribe-only. Spawns a headless Chromium via Playwright, joins the Meet
URL, enables live captions, scrapes them into a transcript file. The agent then
has the transcript in its workspace and can do whatever followup work it needs
using its regular tools.
v2 (not in this PR): realtime duplex audio so the agent can speak in the
meeting, via OpenAI Realtime / Gemini Live + BlackHole / PulseAudio null-sink.
``meet_say`` exists as a stub today so the tool surface is stable.
Explicit-by-design: only joins ``https://meet.google.com/`` URLs explicitly
passed in. No calendar scanning, no auto-dial, no consent announcement.
"""
from __future__ import annotations
import logging
import platform
from plugins.google_meet import process_manager as pm
from plugins.google_meet.cli import register_cli as _register_meet_cli
from plugins.google_meet.cli import meet_command as _meet_command
from plugins.google_meet.tools import (
MEET_JOIN_SCHEMA,
MEET_LEAVE_SCHEMA,
MEET_SAY_SCHEMA,
MEET_STATUS_SCHEMA,
MEET_TRANSCRIPT_SCHEMA,
check_meet_requirements,
handle_meet_join,
handle_meet_leave,
handle_meet_say,
handle_meet_status,
handle_meet_transcript,
)
logger = logging.getLogger(__name__)
_TOOLS = (
("meet_join", MEET_JOIN_SCHEMA, handle_meet_join, "📞"),
("meet_status", MEET_STATUS_SCHEMA, handle_meet_status, "🟢"),
("meet_transcript", MEET_TRANSCRIPT_SCHEMA, handle_meet_transcript, "📝"),
("meet_leave", MEET_LEAVE_SCHEMA, handle_meet_leave, "👋"),
("meet_say", MEET_SAY_SCHEMA, handle_meet_say, "🗣️"),
)
def _on_session_end(**kwargs) -> None:
"""Best-effort cleanup — if a meet bot is still running when the session
ends, leave the call so we don't orphan a headless Chromium.
No-ops when nothing is active. Swallows all exceptions session end must
not fail because the bot cleanup hit an edge case.
"""
try:
status = pm.status()
if status.get("ok") and status.get("alive"):
pm.stop(reason="session ended")
except Exception as e: # pragma: no cover — defensive
logger.debug("google_meet on_session_end cleanup failed: %s", e)
def register(ctx) -> None:
"""Register tools, CLI, and lifecycle hooks.
Called once by the plugin loader when the plugin is enabled via
``plugins.enabled`` in config.yaml.
"""
# Windows is not supported in v1 — audio routing for v2 doesn't have a
# tested path there and guest-join Chromium is flakier. Refuse to register
# rather than half-working.
system = platform.system().lower()
if system not in ("linux", "darwin"):
logger.info(
"google_meet plugin: platform=%s not supported (linux/macos only)",
system,
)
return
for name, schema, handler, emoji in _TOOLS:
ctx.register_tool(
name=name,
toolset="google_meet",
schema=schema,
handler=handler,
check_fn=check_meet_requirements,
emoji=emoji,
)
ctx.register_cli_command(
name="meet",
help="Google Meet bot (join, transcribe, follow up)",
setup_fn=_register_meet_cli,
handler_fn=_meet_command,
description=(
"Let the hermes agent join a Google Meet call and scrape live "
"captions into a transcript. See: hermes meet setup"
),
)
ctx.register_hook("on_session_end", _on_session_end)
-244
View File
@@ -1,244 +0,0 @@
"""Virtual audio bridge for feeding generated speech into Chrome's mic.
v2 module. Provisions a platform-specific virtual audio device so the
Meet bot's Chromium instance can be pointed at an input source we
control. The OpenAI Realtime client writes PCM bytes into this device;
Chrome reads them as if they were coming from a microphone.
Linux (primary): uses pactl (PulseAudio) to create a null-sink plus a
virtual source whose master is the null-sink's monitor. Callers set
PULSE_SOURCE=<source_name> in Chrome's env and pass the fake-mic flag.
macOS: requires BlackHole 2ch to be installed. This module only
verifies its presence and returns the device name; routing OS default
input is left to the user (or a future switchaudio-osx integration) to
avoid surprising the user's system audio state.
Windows: not supported in v2.
"""
from __future__ import annotations
import platform
import subprocess
from typing import Optional
_BLACKHOLE_DEVICE = "BlackHole 2ch"
class AudioBridge:
"""Manages a virtual audio device for Chrome fake-mic input.
Call ``setup()`` once before launching the Meet bot and
``teardown()`` when the session ends. ``teardown()`` is idempotent.
"""
def __init__(self, name_prefix: str = "hermes_meet") -> None:
self._name_prefix = name_prefix
self._platform: Optional[str] = None
self._device_name: Optional[str] = None
self._write_target: Optional[str] = None
self._module_ids: list[int] = []
self._torn_down = False
# ── public properties ─────────────────────────────────────────────────
@property
def device_name(self) -> str:
if not self._device_name:
raise RuntimeError("AudioBridge not set up yet")
return self._device_name
@property
def write_target(self) -> str:
if not self._write_target:
raise RuntimeError("AudioBridge not set up yet")
return self._write_target
# ── lifecycle ─────────────────────────────────────────────────────────
def setup(self) -> dict:
"""Provision the virtual audio device.
Returns a dict describing the device. Raises RuntimeError on
unsupported platforms or when required system tools are missing.
"""
system = platform.system()
if system == "Linux":
return self._setup_linux()
if system == "Darwin":
return self._setup_darwin()
if system == "Windows":
raise RuntimeError("windows not supported in v2")
raise RuntimeError(f"unsupported platform: {system}")
def teardown(self) -> None:
"""Release the virtual audio device. Idempotent."""
if self._torn_down:
return
# Only Linux needs explicit unloading.
if self._platform == "linux" and self._module_ids:
# Unload in reverse order (virtual-source before null-sink).
for mod_id in reversed(self._module_ids):
try:
subprocess.run(
["pactl", "unload-module", str(mod_id)],
check=False,
capture_output=True,
)
except Exception:
# Best-effort teardown — never raise from here.
pass
self._module_ids = []
self._torn_down = True
# ── platform impls ────────────────────────────────────────────────────
def _setup_linux(self) -> dict:
sink_name = f"{self._name_prefix}_sink"
src_name = f"{self._name_prefix}_src"
try:
sink_out = subprocess.run(
[
"pactl",
"load-module",
"module-null-sink",
f"sink_name={sink_name}",
f"sink_properties=device.description=HermesMeetSink",
],
check=True,
capture_output=True,
text=True,
)
except FileNotFoundError as exc:
raise RuntimeError(
"pactl not found — install PulseAudio/pipewire-pulse"
) from exc
except subprocess.CalledProcessError as exc:
raise RuntimeError(
f"pactl load-module null-sink failed: {exc.stderr or exc}"
) from exc
sink_mod_id = self._parse_module_id(sink_out.stdout)
try:
src_out = subprocess.run(
[
"pactl",
"load-module",
"module-virtual-source",
f"source_name={src_name}",
f"master={sink_name}.monitor",
],
check=True,
capture_output=True,
text=True,
)
except subprocess.CalledProcessError as exc:
# Roll back the null-sink we just created so we don't leak it.
subprocess.run(
["pactl", "unload-module", str(sink_mod_id)],
check=False,
capture_output=True,
)
raise RuntimeError(
f"pactl load-module virtual-source failed: {exc.stderr or exc}"
) from exc
src_mod_id = self._parse_module_id(src_out.stdout)
self._platform = "linux"
self._device_name = src_name
self._write_target = sink_name
self._module_ids = [sink_mod_id, src_mod_id]
self._torn_down = False
return {
"platform": "linux",
"device_name": src_name,
"sample_rate": 48000,
"channels": 2,
"module_ids": list(self._module_ids),
"write_target": sink_name,
}
def _setup_darwin(self) -> dict:
try:
out = subprocess.check_output(
["system_profiler", "SPAudioDataType"],
text=True,
stderr=subprocess.STDOUT,
)
except FileNotFoundError as exc:
raise RuntimeError(
"system_profiler not found (macOS-only command)"
) from exc
except subprocess.CalledProcessError as exc:
raise RuntimeError(
f"system_profiler failed: {exc.output}"
) from exc
if "BlackHole" not in out:
raise RuntimeError(
"BlackHole virtual audio device not installed. "
"Install via: brew install blackhole-2ch"
)
self._platform = "darwin"
self._device_name = _BLACKHOLE_DEVICE
self._write_target = _BLACKHOLE_DEVICE
self._module_ids = []
self._torn_down = False
return {
"platform": "darwin",
"device_name": _BLACKHOLE_DEVICE,
"sample_rate": 48000,
"channels": 2,
"module_ids": [],
"write_target": _BLACKHOLE_DEVICE,
}
# ── helpers ──────────────────────────────────────────────────────────
@staticmethod
def _parse_module_id(stdout: str) -> int:
"""pactl load-module prints the new module ID to stdout."""
text = (stdout or "").strip()
if not text:
raise RuntimeError("pactl load-module returned empty stdout")
# Take the last whitespace-separated token on the first non-empty line.
first = text.splitlines()[0].strip()
token = first.split()[-1]
try:
return int(token)
except ValueError as exc:
raise RuntimeError(
f"could not parse pactl module id from: {stdout!r}"
) from exc
def chrome_fake_audio_flags(bridge_info: dict) -> list[str]:
"""Return Chrome flags for using the fake audio input.
The PulseAudio source is selected via the ``PULSE_SOURCE`` env var,
which callers must set in Chrome's environment before launch:
env["PULSE_SOURCE"] = bridge_info["device_name"]
On macOS the caller must ensure the system default audio input is
set to the returned BlackHole device (we do not flip that switch).
"""
system = platform.system()
if system == "Linux":
# Chromium on Linux picks up the PulseAudio source selected via
# PULSE_SOURCE env var; the fake-ui flag skips the permission
# prompt so the bot can pick "use my mic" without user input.
return ["--use-fake-ui-for-media-stream"]
if system == "Darwin":
return ["--use-fake-ui-for-media-stream"]
if system == "Windows":
raise RuntimeError("windows not supported in v2")
raise RuntimeError(f"unsupported platform: {system}")
-478
View File
@@ -1,478 +0,0 @@
"""CLI commands for the google_meet plugin.
Wires ``hermes meet <subcommand>``:
setup preflight playwright, chromium, auth file, print fixes
auth open a browser to sign into Google, save storage state
join <url> join a Meet URL synchronously (also callable from the agent)
status print current bot state
transcript print the transcript
stop leave the current meeting
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from pathlib import Path
from typing import Optional
from hermes_constants import get_hermes_home
from plugins.google_meet import process_manager as pm
from plugins.google_meet.meet_bot import _is_safe_meet_url
def _auth_state_path() -> Path:
return Path(get_hermes_home()) / "workspace" / "meetings" / "auth.json"
# ---------------------------------------------------------------------------
# argparse wiring
# ---------------------------------------------------------------------------
def register_cli(subparser: argparse.ArgumentParser) -> None:
"""Build the ``hermes meet`` argparse tree.
Called by :func:`_register_cli_commands` at plugin load time.
"""
subs = subparser.add_subparsers(dest="meet_command")
subs.add_parser("setup", help="Preflight: playwright, chromium, auth")
inst_p = subs.add_parser(
"install",
help="Install prerequisites (pip deps, Chromium, platform audio tools)",
)
inst_p.add_argument(
"--realtime", action="store_true",
help="Also install realtime audio tools (pulseaudio-utils on Linux, BlackHole+ffmpeg on macOS). Uses sudo/brew, prompts before invoking either.",
)
inst_p.add_argument(
"--yes", "-y", action="store_true",
help="Answer yes to all prompts (use with care; will run sudo apt-get or brew without asking).",
)
subs.add_parser("auth", help="Sign in to Google and save session state")
join_p = subs.add_parser("join", help="Join a Meet URL")
join_p.add_argument("url", help="https://meet.google.com/...")
join_p.add_argument("--guest-name", default="Hermes Agent")
join_p.add_argument("--duration", default=None, help="e.g. 30m, 2h, 90s")
join_p.add_argument("--headed", action="store_true", help="show browser")
join_p.add_argument(
"--mode", choices=("transcribe", "realtime"), default="transcribe",
help="transcribe (default, listen-only) or realtime (speak via OpenAI Realtime)"
)
join_p.add_argument(
"--node", default=None,
help="remote node name, or 'auto' to use the sole registered node"
)
subs.add_parser("status", help="Print current Meet bot state")
tr_p = subs.add_parser("transcript", help="Print the scraped transcript")
tr_p.add_argument("--last", type=int, default=None)
say_p = subs.add_parser("say", help="Speak text in an active realtime meeting")
say_p.add_argument("text", help="what to say")
say_p.add_argument("--node", default=None)
subs.add_parser("stop", help="Leave the current meeting")
# v3: remote node host management.
node_p = subs.add_parser(
"node",
help="Manage remote meet node hosts (run/list/approve/remove/status/ping)",
)
try:
from plugins.google_meet.node.cli import register_cli as _register_node_cli
_register_node_cli(node_p)
except Exception as e: # pragma: no cover — defensive
# If the node module fails to import for any reason (optional dep
# missing at import time etc.), leave the subparser present but
# flag it. The argparse dispatch will surface a clear error.
def _node_unavailable(args):
print(f"hermes meet node: module unavailable ({e})")
return 1
node_p.set_defaults(func=_node_unavailable)
subparser.set_defaults(func=meet_command)
# ---------------------------------------------------------------------------
# Dispatch
# ---------------------------------------------------------------------------
def meet_command(args: argparse.Namespace) -> int:
sub = getattr(args, "meet_command", None)
if not sub:
print("usage: hermes meet {setup,auth,join,status,transcript,say,stop,node}")
return 2
if sub == "setup":
return _cmd_setup()
if sub == "install":
return _cmd_install(
realtime=bool(getattr(args, "realtime", False)),
assume_yes=bool(getattr(args, "yes", False)),
)
if sub == "auth":
return _cmd_auth()
if sub == "join":
return _cmd_join(
url=args.url,
guest_name=args.guest_name,
duration=args.duration,
headed=args.headed,
mode=getattr(args, "mode", "transcribe"),
node=getattr(args, "node", None),
)
if sub == "status":
return _cmd_status()
if sub == "transcript":
return _cmd_transcript(last=args.last)
if sub == "say":
return _cmd_say(text=args.text, node=getattr(args, "node", None))
if sub == "stop":
return _cmd_stop()
if sub == "node":
# Dispatch was set by the node cli's register_cli; fall through to
# whatever its subparsers wired.
fn = getattr(args, "func", None)
if fn is None or fn is meet_command:
print("usage: hermes meet node {run,list,approve,remove,status,ping}")
return 2
return fn(args)
print(f"unknown subcommand: {sub}")
return 2
# ---------------------------------------------------------------------------
# Subcommand handlers
# ---------------------------------------------------------------------------
def _cmd_setup() -> int:
import platform as _p
print("google_meet preflight")
print("---------------------")
system = _p.system()
system_ok = system in ("Linux", "Darwin")
print(f" platform : {system} [{'ok' if system_ok else 'unsupported'}]")
try:
import playwright # noqa: F401
pw_ok = True
pw_msg = "installed"
except ImportError:
pw_ok = False
pw_msg = "NOT installed — run: pip install playwright"
print(f" playwright : {pw_msg}")
chromium_ok = False
chromium_msg = "unknown"
if pw_ok:
try:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
try:
exe = p.chromium.executable_path
if exe and Path(exe).exists():
chromium_ok = True
chromium_msg = f"ok ({exe})"
else:
chromium_msg = (
"not installed — run: "
"python -m playwright install chromium"
)
except Exception as e:
chromium_msg = f"probe failed: {e}"
except Exception as e:
chromium_msg = f"probe failed: {e}"
print(f" chromium : {chromium_msg}")
auth_path = _auth_state_path()
auth_ok = auth_path.is_file()
print(
" google auth : "
+ (f"ok ({auth_path})" if auth_ok else "not saved — run: hermes meet auth")
)
print()
all_ok = system_ok and pw_ok and chromium_ok
if all_ok:
print(
"ready. Join a meeting: "
"hermes meet join https://meet.google.com/abc-defg-hij"
)
else:
print("not ready yet — fix the items above.")
return 0 if all_ok else 1
def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
"""Install the plugin's prerequisites.
Always: pip install playwright + websockets, then
``python -m playwright install chromium``.
With ``--realtime``: also install the platform audio bridge deps.
Linux : ``sudo apt-get install -y pulseaudio-utils``
macOS : ``brew install blackhole-2ch ffmpeg`` (+ remind the user
to select BlackHole as the default input device manually)
Prompts before every package-manager invocation unless ``--yes``.
Refuses to run on Windows.
"""
import platform as _p
import shutil as _shutil
import subprocess as _sp
system = _p.system()
if system not in ("Linux", "Darwin"):
print(f"google_meet install: {system} is not supported (linux/macos only)")
return 1
def _confirm(prompt: str) -> bool:
if assume_yes:
return True
try:
ans = input(f"{prompt} [y/N] ").strip().lower()
except EOFError:
return False
return ans in ("y", "yes")
print("google_meet install")
print("-------------------")
# 1) pip deps — always safe, venv-scoped.
pip_pkgs = ["playwright", "websockets"]
print(f"\n[1/3] pip install: {' '.join(pip_pkgs)}")
try:
res = _sp.run(
[sys.executable, "-m", "pip", "install", "--upgrade", *pip_pkgs],
check=False,
)
if res.returncode != 0:
print(" pip install failed")
return 1
except Exception as e:
print(f" pip install failed: {e}")
return 1
# 2) Playwright browsers — pulls chromium (~300MB first run).
print("\n[2/3] python -m playwright install chromium")
try:
res = _sp.run(
[sys.executable, "-m", "playwright", "install", "chromium"],
check=False,
)
if res.returncode != 0:
print(" playwright install failed (may already be installed)")
except Exception as e:
print(f" playwright install failed: {e}")
return 1
# 3) Platform audio deps for realtime mode.
if realtime:
print("\n[3/3] realtime audio deps")
if system == "Linux":
if _shutil.which("paplay") and _shutil.which("pactl"):
print(" pulseaudio-utils already installed.")
else:
if not _confirm(
" install pulseaudio-utils? this runs `sudo apt-get install -y pulseaudio-utils`"
):
print(" skipped (you can run it manually later)")
else:
cmd = ["sudo", "apt-get", "install", "-y", "pulseaudio-utils"]
print(f" $ {' '.join(cmd)}")
res = _sp.run(cmd, check=False)
if res.returncode != 0:
print(" apt install failed — install pulseaudio-utils manually")
elif system == "Darwin":
have_bh = False
try:
out = _sp.check_output(["system_profiler", "SPAudioDataType"], text=True)
have_bh = "BlackHole" in out
except Exception:
pass
have_ffmpeg = bool(_shutil.which("ffmpeg"))
needs = []
if not have_bh:
needs.append("blackhole-2ch")
if not have_ffmpeg:
needs.append("ffmpeg")
if not needs:
print(" BlackHole and ffmpeg already installed.")
elif not _shutil.which("brew"):
print(
" missing: " + ", ".join(needs) + "\n"
" install Homebrew first (https://brew.sh) or install the packages manually."
)
else:
if not _confirm(f" install via brew: {' '.join(needs)}?"):
print(" skipped (you can run it manually later)")
else:
cmd = ["brew", "install", *needs]
print(f" $ {' '.join(cmd)}")
res = _sp.run(cmd, check=False)
if res.returncode != 0:
print(" brew install failed — install them manually")
print(
"\n NOTE: macOS does not auto-route audio. Open\n"
" System Settings → Sound → Input\n"
" and select 'BlackHole 2ch' before starting a realtime meeting.\n"
" hermes will not switch your default input for you."
)
else:
print("\n[3/3] skipped (pass --realtime to install audio tooling too)")
print("\ndone. verify with: hermes meet setup")
return 0
def _cmd_auth() -> int:
"""Open a headed Chromium, let the user sign in, save storage_state."""
try:
from playwright.sync_api import sync_playwright
except ImportError:
print(
"playwright is not installed. run:\n"
" pip install playwright && python -m playwright install chromium"
)
return 1
path = _auth_state_path()
path.parent.mkdir(parents=True, exist_ok=True)
print(f"opening Chromium — sign in to Google, then return here and press Enter.")
print(f"saving storage state to: {path}")
try:
with sync_playwright() as pw:
browser = pw.chromium.launch(headless=False)
context = browser.new_context()
page = context.new_page()
page.goto("https://accounts.google.com/", wait_until="domcontentloaded")
try:
input("press Enter after you've signed in ... ")
except EOFError:
pass
context.storage_state(path=str(path))
browser.close()
except Exception as e:
print(f"auth failed: {e}")
return 1
print("saved. you can now run: hermes meet join <url>")
return 0
def _cmd_join(
url: str,
*,
guest_name: str,
duration: Optional[str],
headed: bool,
mode: str = "transcribe",
node: Optional[str] = None,
) -> int:
if not _is_safe_meet_url(url):
print(f"refusing: not a meet.google.com URL: {url}")
return 2
if node:
# Remote: go through NodeClient.
try:
from plugins.google_meet.node.registry import NodeRegistry
from plugins.google_meet.node.client import NodeClient
except ImportError as e:
print(f"node module unavailable: {e}")
return 1
reg = NodeRegistry()
entry = reg.resolve(node if node != "auto" else None)
if entry is None:
print(f"no registered node matches {node!r}")
return 1
client = NodeClient(url=entry["url"], token=entry["token"])
try:
res = client.start_bot(
url=url, guest_name=guest_name, duration=duration,
headed=headed, mode=mode,
)
except Exception as e:
print(f"remote start_bot failed: {e}")
return 1
print(json.dumps({"node": entry.get("name"), **res}, indent=2))
return 0 if res.get("ok") else 1
auth = _auth_state_path()
res = pm.start(
url=url,
headed=headed,
guest_name=guest_name,
duration=duration,
auth_state=str(auth) if auth.is_file() else None,
mode=mode,
)
print(json.dumps(res, indent=2))
return 0 if res.get("ok") else 1
def _cmd_say(text: str, node: Optional[str] = None) -> int:
if not (text or "").strip():
print("refusing: empty text")
return 2
if node:
try:
from plugins.google_meet.node.registry import NodeRegistry
from plugins.google_meet.node.client import NodeClient
except ImportError as e:
print(f"node module unavailable: {e}")
return 1
reg = NodeRegistry()
entry = reg.resolve(node if node != "auto" else None)
if entry is None:
print(f"no registered node matches {node!r}")
return 1
client = NodeClient(url=entry["url"], token=entry["token"])
try:
res = client.say(text)
except Exception as e:
print(f"remote say failed: {e}")
return 1
print(json.dumps({"node": entry.get("name"), **res}, indent=2))
return 0 if res.get("ok") else 1
res = pm.enqueue_say(text)
print(json.dumps(res, indent=2))
return 0 if res.get("ok") else 1
def _cmd_status() -> int:
res = pm.status()
print(json.dumps(res, indent=2))
return 0 if res.get("ok") else 1
def _cmd_transcript(last: Optional[int]) -> int:
res = pm.transcript(last=last)
if not res.get("ok"):
print(json.dumps(res, indent=2))
return 1
for ln in res.get("lines", []):
print(ln)
return 0
def _cmd_stop() -> int:
res = pm.stop(reason="hermes meet stop")
print(json.dumps(res, indent=2))
return 0 if res.get("ok") else 1
if __name__ == "__main__": # pragma: no cover
parser = argparse.ArgumentParser(prog="hermes meet")
register_cli(parser)
ns = parser.parse_args()
sys.exit(meet_command(ns))
-852
View File
@@ -1,852 +0,0 @@
"""Headless Google Meet bot — Playwright + live-caption scraping.
Runs as a standalone subprocess spawned by ``process_manager.py``. Reads config
from env vars, writes status + transcript to files under
``$HERMES_HOME/workspace/meetings/<meeting-id>/``. The main hermes process
reads those files via the ``meet_*`` tools no IPC beyond filesystem.
The scraping strategy mirrors OpenUtter (sumansid/openutter): we don't parse
WebRTC audio, we enable Google Meet's built-in live captions and observe the
captions container in the DOM via a MutationObserver. This is lossy and
English-biased but it is:
* deterministic (no API keys, no STT billing),
* works behind Meet's normal login / admission,
* survives Meet UI rewrites fairly well because the caption container has a
stable ARIA role.
Run standalone for debugging::
HERMES_MEET_URL=https://meet.google.com/abc-defg-hij \\
HERMES_MEET_OUT_DIR=/tmp/meet-debug \\
HERMES_MEET_HEADED=1 \\
python -m plugins.google_meet.meet_bot
No meet.google.com URL exits non-zero. Any URL that doesn't start with
``https://meet.google.com/`` is rejected (explicit-by-design).
"""
from __future__ import annotations
import json
import os
import re
import signal
import sys
import threading
import time
from pathlib import Path
from typing import Optional
# Match ``https://meet.google.com/abc-defg-hij`` or ``.../lookup/...`` — the
# short three-segment code or a lookup URL. Anything else is rejected.
MEET_URL_RE = re.compile(
r"^https://meet\.google\.com/("
r"[a-z0-9]{3,}-[a-z0-9]{3,}-[a-z0-9]{3,}"
r"|lookup/[^/?#]+"
r"|new"
r")(?:[/?#].*)?$"
)
# Filenames the bot reads/writes in ``HERMES_MEET_OUT_DIR``.
SAY_QUEUE_FILENAME = "say_queue.jsonl"
SAY_PCM_FILENAME = "speaker.pcm"
def _is_safe_meet_url(url: str) -> bool:
"""Return True if *url* is a Google Meet URL we're willing to navigate to."""
if not isinstance(url, str):
return False
return bool(MEET_URL_RE.match(url.strip()))
def _meeting_id_from_url(url: str) -> str:
"""Extract the 3-segment meeting code from a Meet URL.
For ``https://meet.google.com/abc-defg-hij`` ``abc-defg-hij``.
For ``.../lookup/<id>`` or ``/new`` we fall back to a timestamped id the
bot won't know the real code until after redirect, and callers pass this
through to filename anyway.
"""
m = re.search(
r"meet\.google\.com/([a-z0-9]{3,}-[a-z0-9]{3,}-[a-z0-9]{3,})",
url or "",
)
if m:
return m.group(1)
return f"meet-{int(time.time())}"
# ---------------------------------------------------------------------------
# Status + transcript file writers
# ---------------------------------------------------------------------------
class _BotState:
"""Single-process mutable state, flushed to ``status.json`` on each change."""
def __init__(self, out_dir: Path, meeting_id: str, url: str):
self.out_dir = out_dir
self.meeting_id = meeting_id
self.url = url
self.in_call = False
self.captioning = False
self.captions_enabled_attempted = False
self.lobby_waiting = False
self.join_attempted_at: Optional[float] = None
self.joined_at: Optional[float] = None
self.last_caption_at: Optional[float] = None
self.transcript_lines = 0
self.error: Optional[str] = None
self.exited = False
# v2 realtime fields.
self.realtime = False
self.realtime_ready = False
self.realtime_device: Optional[str] = None
self.audio_bytes_out: int = 0
self.last_audio_out_at: Optional[float] = None
self.last_barge_in_at: Optional[float] = None
self.leave_reason: Optional[str] = None
# Scraped captions, in order, deduped. Each entry is a dict of
# {"ts": <epoch>, "speaker": str, "text": str}.
self._seen: set = set()
out_dir.mkdir(parents=True, exist_ok=True)
self.transcript_path = out_dir / "transcript.txt"
self.status_path = out_dir / "status.json"
self._flush()
# -------- transcript ------------------------------------------------
def record_caption(self, speaker: str, text: str) -> None:
"""Append a caption line if we haven't seen this exact (speaker, text)."""
speaker = (speaker or "").strip() or "Unknown"
text = (text or "").strip()
if not text:
return
key = f"{speaker}|{text}"
if key in self._seen:
return
self._seen.add(key)
self.transcript_lines += 1
self.last_caption_at = time.time()
ts = time.strftime("%H:%M:%S", time.localtime(self.last_caption_at))
line = f"[{ts}] {speaker}: {text}\n"
# Atomic-ish append — good enough for a single-writer.
with self.transcript_path.open("a", encoding="utf-8") as f:
f.write(line)
self._flush()
# -------- status file ----------------------------------------------
def _flush(self) -> None:
data = {
"meetingId": self.meeting_id,
"url": self.url,
"inCall": self.in_call,
"captioning": self.captioning,
"captionsEnabledAttempted": self.captions_enabled_attempted,
"lobbyWaiting": self.lobby_waiting,
"joinAttemptedAt": self.join_attempted_at,
"joinedAt": self.joined_at,
"lastCaptionAt": self.last_caption_at,
"transcriptLines": self.transcript_lines,
"transcriptPath": str(self.transcript_path),
"error": self.error,
"exited": self.exited,
"pid": os.getpid(),
# v2 realtime telemetry.
"realtime": self.realtime,
"realtimeReady": self.realtime_ready,
"realtimeDevice": self.realtime_device,
"audioBytesOut": self.audio_bytes_out,
"lastAudioOutAt": self.last_audio_out_at,
"lastBargeInAt": self.last_barge_in_at,
"leaveReason": self.leave_reason,
}
tmp = self.status_path.with_suffix(".json.tmp")
tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
tmp.replace(self.status_path)
def set(self, **kwargs) -> None:
for k, v in kwargs.items():
setattr(self, k, v)
self._flush()
# ---------------------------------------------------------------------------
# Playwright bot entry point
# ---------------------------------------------------------------------------
# JavaScript injected into the Meet tab to observe captions. Captures
# {speaker, text} tuples via a MutationObserver on the caption container,
# and exposes ``window.__hermesMeetDrain()`` to pull new entries. This
# mirrors the OpenUtter caption scraping approach.
_CAPTION_OBSERVER_JS = r"""
(() => {
if (window.__hermesMeetInstalled) return;
window.__hermesMeetInstalled = true;
window.__hermesMeetQueue = [];
const captionSelector = '[role="region"][aria-label*="aption" i], ' +
'div[jsname="YSxPC"], ' + // legacy
'div[jsname="tgaKEf"]'; // current (Apr 2026)
function pushEntry(speaker, text) {
if (!text || !text.trim()) return;
window.__hermesMeetQueue.push({
ts: Date.now(),
speaker: (speaker || '').trim(),
text: text.trim(),
});
}
function scan(root) {
// Meet captions render as a list of rows; each row contains a speaker
// label and a text block. Selectors vary across Meet rewrites; we try
// a few shapes and fall back to raw text.
const rows = root.querySelectorAll('div[jsname="dsyhDe"], div.CNusmb, div.TBMuR');
if (rows.length) {
rows.forEach((row) => {
const spkEl = row.querySelector('div.KcIKyf, div.zs7s8d, span[jsname="YSxPC"]');
const txtEl = row.querySelector('div.bh44bd, span[jsname="tgaKEf"], div.iTTPOb');
const speaker = spkEl ? spkEl.innerText : '';
const text = txtEl ? txtEl.innerText : row.innerText;
pushEntry(speaker, text);
});
return;
}
// Fallback: treat the whole region's innerText as one anonymous line.
const text = (root.innerText || '').split('\n').filter(Boolean).pop();
pushEntry('', text);
}
function attach() {
const el = document.querySelector(captionSelector);
if (!el) return false;
const obs = new MutationObserver(() => scan(el));
obs.observe(el, { childList: true, subtree: true, characterData: true });
scan(el);
return true;
}
// Try now and retry on interval the caption region only appears after
// captions are enabled and someone speaks.
if (!attach()) {
const iv = setInterval(() => { if (attach()) clearInterval(iv); }, 1500);
}
window.__hermesMeetDrain = () => {
const out = window.__hermesMeetQueue.slice();
window.__hermesMeetQueue = [];
return out;
};
})();
"""
def _enable_captions_js() -> str:
"""Return a small JS snippet that tries to click the 'Turn on captions' button.
Best-effort Meet's caption toggle is keyboard-accessible via ``c``. We
dispatch that keystroke as a cheap fallback. Real click targeting is too
brittle to rely on.
"""
return r"""
(() => {
const ev = new KeyboardEvent('keydown', {
key: 'c', code: 'KeyC', keyCode: 67, which: 67, bubbles: true,
});
document.body.dispatchEvent(ev);
return true;
})();
"""
def _start_realtime_speaker(
*,
rt: dict,
out_dir: Path,
bridge_info: dict,
api_key: str,
model: str,
voice: str,
instructions: str,
stop_flag: dict,
state: "_BotState",
) -> None:
"""Wire up the OpenAI Realtime session + speaker thread + PCM pump.
The speaker thread reads text lines from ``say_queue.jsonl``, sends each
to OpenAI Realtime, and writes PCM audio into ``speaker.pcm``. A
separate *pump* thread forwards that PCM into the OS audio sink so
Chrome's fake mic picks it up. On Linux we pipe to ``paplay`` against
the null-sink; on macOS the caller is expected to have the BlackHole
device selected as default input.
"""
try:
from plugins.google_meet.realtime.openai_client import (
RealtimeSession,
RealtimeSpeaker,
)
except Exception as e:
state.set(error=f"realtime import failed: {e}")
return
pcm_path = out_dir / SAY_PCM_FILENAME
queue_path = out_dir / SAY_QUEUE_FILENAME
processed_path = out_dir / "say_processed.jsonl"
# Reset the sink file so we start clean each session.
pcm_path.write_bytes(b"")
# Make sure the queue exists so the speaker poller doesn't error on
# first iteration.
queue_path.touch()
try:
session = RealtimeSession(
api_key=api_key,
model=model,
voice=voice,
instructions=instructions,
audio_sink_path=pcm_path,
sample_rate=24000,
)
session.connect()
except Exception as e:
state.set(error=f"realtime connect failed: {e}")
return
rt["session"] = session
def _stop_fn():
return stop_flag.get("stop", False)
rt["speaker_stop"] = lambda: stop_flag.__setitem__("stop", stop_flag.get("stop", False))
speaker = RealtimeSpeaker(
session=session,
queue_path=queue_path,
processed_path=processed_path,
)
def _speaker_loop():
try:
speaker.run_until_stopped(_stop_fn)
except Exception as e:
state.set(error=f"realtime speaker crashed: {e}")
t_speaker = threading.Thread(target=_speaker_loop, name="meet-speaker", daemon=True)
t_speaker.start()
rt["speaker_thread"] = t_speaker
# PCM pump: feeds speaker.pcm (24kHz s16le mono) into the OS audio
# device that Chrome's fake mic reads from. Different tools per
# platform, but the contract is the same — block-read the growing
# PCM file and stream it to the device in near-real-time.
platform_tag = (bridge_info or {}).get("platform")
if platform_tag == "linux":
import subprocess as _sp
sink = (bridge_info or {}).get("write_target") or "hermes_meet_sink"
try:
proc = _sp.Popen(
[
"paplay",
"--raw",
"--rate=24000",
"--format=s16le",
"--channels=1",
f"--device={sink}",
str(pcm_path),
],
stdin=_sp.DEVNULL,
stdout=_sp.DEVNULL,
stderr=_sp.DEVNULL,
)
rt["pcm_pump"] = proc
except FileNotFoundError:
state.set(error="paplay not found — install pulseaudio-utils for realtime on Linux")
elif platform_tag == "darwin":
# macOS: use ffmpeg to tail-read speaker.pcm and write it to the
# BlackHole output device. The user must have BlackHole selected
# as the default input in System Settings → Sound for Chrome to
# pick it up. We prefer ffmpeg because it's scriptable and can
# target AVFoundation devices by name; fall back to afplay-ing
# the file in a tight loop if ffmpeg is absent.
import shutil as _shutil
import subprocess as _sp
device_name = (bridge_info or {}).get("write_target") or "BlackHole 2ch"
if _shutil.which("ffmpeg"):
try:
# -re: read input at native frame rate.
# -f avfoundation -i: speaker path as raw PCM.
# -f s16le -ar 24000 -ac 1 -i <pcm>: interpret the file.
# -f audiotoolbox -audio_device_index: write to BlackHole.
# Simpler: output as raw via coreaudio using "-f audiotoolbox".
# ffmpeg's audiotoolbox output picks the current default
# output device, which isn't what we want. Instead we use
# -f avfoundation with the named device as OUTPUT via
# -vn and the device name.
proc = _sp.Popen(
[
"ffmpeg",
"-nostdin", "-hide_banner", "-loglevel", "error",
"-re",
"-f", "s16le", "-ar", "24000", "-ac", "1",
"-i", str(pcm_path),
"-f", "audiotoolbox",
"-audio_device_index", _mac_audio_device_index(device_name),
"-",
],
stdin=_sp.DEVNULL,
stdout=_sp.DEVNULL,
stderr=_sp.DEVNULL,
)
rt["pcm_pump"] = proc
except FileNotFoundError:
state.set(error="ffmpeg not found — install via `brew install ffmpeg` for realtime on macOS")
except Exception as e:
state.set(error=f"macOS pcm pump failed to start: {e}")
else:
state.set(error="ffmpeg not found — install via `brew install ffmpeg` for realtime on macOS")
def _mac_audio_device_index(device_name: str) -> str:
"""Return the ffmpeg ``-audio_device_index`` for *device_name*, as a string.
Probes ``ffmpeg -f avfoundation -list_devices true -i ''`` (which prints
the device table on stderr) and matches *device_name* case-insensitively.
Defaults to ``"0"`` if the device can't be found — caller will get a
misrouted stream but not a crash, and the error will be obvious.
"""
import subprocess as _sp
try:
out = _sp.run(
["ffmpeg", "-f", "avfoundation", "-list_devices", "true", "-i", ""],
capture_output=True,
text=True,
timeout=10,
)
except Exception:
return "0"
# ffmpeg prints the table on stderr. Lines look like:
# [AVFoundation indev @ 0x...] [0] BlackHole 2ch
import re as _re
needle = device_name.strip().lower()
for line in (out.stderr or "").splitlines():
m = _re.search(r"\[(\d+)\]\s+(.+)$", line)
if not m:
continue
if m.group(2).strip().lower() == needle:
return m.group(1)
return "0"
def run_bot() -> int: # noqa: C901 — orchestration, explicit branches
url = os.environ.get("HERMES_MEET_URL", "").strip()
out_dir_env = os.environ.get("HERMES_MEET_OUT_DIR", "").strip()
headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in ("1", "true", "yes")
auth_state = os.environ.get("HERMES_MEET_AUTH_STATE", "").strip()
guest_name = os.environ.get("HERMES_MEET_GUEST_NAME", "Hermes Agent")
duration_s = _parse_duration(os.environ.get("HERMES_MEET_DURATION", ""))
# v2: optional realtime mode. Enabled when HERMES_MEET_MODE=realtime.
mode = os.environ.get("HERMES_MEET_MODE", "transcribe").strip().lower()
realtime_model = os.environ.get("HERMES_MEET_REALTIME_MODEL", "gpt-realtime")
realtime_voice = os.environ.get("HERMES_MEET_REALTIME_VOICE", "alloy")
realtime_instructions = os.environ.get("HERMES_MEET_REALTIME_INSTRUCTIONS", "")
realtime_api_key = os.environ.get("HERMES_MEET_REALTIME_KEY") or os.environ.get("OPENAI_API_KEY", "")
if not url or not _is_safe_meet_url(url):
sys.stderr.write(
"google_meet bot: refusing to launch — HERMES_MEET_URL must be a "
"meet.google.com URL. got: %r\n" % url
)
return 2
if not out_dir_env:
sys.stderr.write("google_meet bot: HERMES_MEET_OUT_DIR is required\n")
return 2
out_dir = Path(out_dir_env)
meeting_id = _meeting_id_from_url(url)
state = _BotState(out_dir=out_dir, meeting_id=meeting_id, url=url)
# SIGTERM → exit cleanly so the parent ``meet_leave`` gets a finalized
# transcript. We set a flag instead of raising so the Playwright context
# teardown runs in the finally block below.
stop_flag = {"stop": False}
def _on_signal(_sig, _frame):
stop_flag["stop"] = True
signal.signal(signal.SIGTERM, _on_signal)
signal.signal(signal.SIGINT, _on_signal)
# v2 realtime: provision virtual audio device + start speaker thread.
# We track these in a dict so the finally block can tear them down
# regardless of how we exit. If anything in the realtime setup fails we
# fall back to transcribe mode with a status flag.
rt = {
"enabled": mode == "realtime",
"bridge": None, # AudioBridge | None
"bridge_info": None, # dict | None
"session": None, # RealtimeSession | None
"speaker_thread": None, # threading.Thread | None
"speaker_stop": None, # callable | None
}
if rt["enabled"]:
if not realtime_api_key:
state.set(error="realtime mode requested but no API key in HERMES_MEET_REALTIME_KEY/OPENAI_API_KEY — falling back to transcribe")
rt["enabled"] = False
else:
try:
from plugins.google_meet.audio_bridge import AudioBridge
bridge = AudioBridge()
rt["bridge_info"] = bridge.setup()
rt["bridge"] = bridge
state.set(realtime=True, realtime_device=rt["bridge_info"].get("device_name"))
except Exception as e:
state.set(error=f"audio bridge setup failed: {e} — falling back to transcribe")
rt["enabled"] = False
try:
from playwright.sync_api import sync_playwright
except ImportError as e:
state.set(error=f"playwright not installed: {e}", exited=True)
sys.stderr.write(
"google_meet bot: playwright is not installed. Run "
"`pip install playwright && python -m playwright install chromium`\n"
)
if rt["bridge"]:
rt["bridge"].teardown()
return 3
# Chrome env: if realtime is live on Linux, point PULSE_SOURCE at the
# virtual source so Chrome's fake mic reads the audio we generate.
chrome_env = os.environ.copy()
chrome_args = [
"--use-fake-ui-for-media-stream",
"--disable-blink-features=AutomationControlled",
]
if not rt["enabled"]:
# v1-style fake device (silence) — we don't care about mic content
# when we're not speaking.
chrome_args.insert(1, "--use-fake-device-for-media-stream")
elif rt["bridge_info"] and rt["bridge_info"].get("platform") == "linux":
chrome_env["PULSE_SOURCE"] = rt["bridge_info"].get("device_name", "")
try:
with sync_playwright() as pw:
# Playwright's launch() doesn't take env; we set PULSE_SOURCE
# via the process env before launch so the child Chrome inherits it.
for k, v in chrome_env.items():
os.environ[k] = v
browser = pw.chromium.launch(
headless=not headed,
args=chrome_args,
)
context_args = {
"viewport": {"width": 1280, "height": 800},
"user_agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
),
"permissions": ["microphone", "camera"],
}
if auth_state and Path(auth_state).is_file():
context_args["storage_state"] = auth_state
context = browser.new_context(**context_args)
page = context.new_page()
try:
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
except Exception as e:
state.set(error=f"navigate failed: {e}", exited=True)
return 4
# Guest-mode: Meet shows a name field before "Ask to join". When
# we're authed, we instead see "Join now".
_try_guest_name(page, guest_name)
_click_join(page, state)
# Install caption observer and attempt to enable captions.
try:
page.evaluate(_enable_captions_js())
state.set(captions_enabled_attempted=True)
except Exception:
pass
try:
page.evaluate(_CAPTION_OBSERVER_JS)
except Exception as e:
state.set(error=f"caption observer install failed: {e}")
# Note: in_call=False until admission is confirmed (we detect
# either the Leave button or the caption region, signalling we
# made it past the lobby).
state.set(captioning=True, join_attempted_at=time.time())
# v2 realtime: start the speaker thread reading from the
# plugin-side say queue. The thread reads JSONL lines written by
# meet_say, calls OpenAI Realtime, and streams the audio PCM to
# the virtual sink that Chrome's fake-mic is pointed at.
if rt["enabled"]:
_start_realtime_speaker(
rt=rt,
out_dir=out_dir,
bridge_info=rt["bridge_info"],
api_key=realtime_api_key,
model=realtime_model,
voice=realtime_voice,
instructions=realtime_instructions,
stop_flag=stop_flag,
state=state,
)
if rt["session"] is not None:
state.set(realtime_ready=True)
# Admission + drain loop. Runs until SIGTERM, duration expiry,
# or the page detects "You were removed / you left the
# meeting". Responsible for:
# * detecting admission (Leave button visible → in_call=True)
# * timing out stuck-in-lobby (default 5 minutes)
# * draining scraped captions into the transcript
# * triggering realtime barge-in when a human speaks while
# the bot is generating audio
# * periodically flushing realtime counters into status.json
deadline = (time.time() + duration_s) if duration_s else None
lobby_deadline = time.time() + float(
os.environ.get("HERMES_MEET_LOBBY_TIMEOUT", "300")
)
last_admission_check = 0.0
while not stop_flag["stop"]:
now = time.time()
if deadline and now > deadline:
state.set(leave_reason="duration_expired")
break
# Admission detection every ~3s until admitted.
if not state.in_call and (now - last_admission_check) > 3.0:
last_admission_check = now
admitted = _detect_admission(page)
if admitted:
state.set(
in_call=True,
lobby_waiting=False,
joined_at=now,
)
elif now > lobby_deadline:
state.set(
error=(
"lobby timeout — host never admitted the bot "
f"within {int(lobby_deadline - state.join_attempted_at) if state.join_attempted_at else 0}s"
),
leave_reason="lobby_timeout",
)
break
elif _detect_denied(page):
state.set(
error="host denied admission",
leave_reason="denied",
)
break
try:
queued = page.evaluate("window.__hermesMeetDrain && window.__hermesMeetDrain()")
if isinstance(queued, list):
for entry in queued:
if not isinstance(entry, dict):
continue
speaker = str(entry.get("speaker", ""))
text = str(entry.get("text", ""))
state.record_caption(speaker=speaker, text=text)
# Barge-in: if the bot is currently generating
# audio AND a real human just spoke, cancel the
# in-flight response so we don't talk over them.
if rt["enabled"] and rt["session"] is not None:
if _looks_like_human_speaker(speaker, guest_name):
try:
cancelled = rt["session"].cancel_response()
if cancelled:
state.set(last_barge_in_at=now)
except Exception:
pass
except Exception:
# Meet reloaded or we got booted — try to detect and
# exit gracefully rather than spinning.
if page.is_closed():
state.set(leave_reason="page_closed")
break
# Fold the realtime session's byte/timestamp counters into
# the status file so meet_status can surface them.
if rt["session"] is not None:
state.set(
audio_bytes_out=getattr(rt["session"], "audio_bytes_out", 0),
last_audio_out_at=getattr(rt["session"], "last_audio_out_at", None),
)
time.sleep(1.0)
# Try to leave cleanly — click "Leave call" button if present.
try:
page.evaluate(
"() => { const b = document.querySelector('button[aria-label*=\"eave call\"]');"
" if (b) b.click(); }"
)
except Exception:
pass
context.close()
browser.close()
# v2: teardown realtime speaker + audio bridge.
if rt["speaker_stop"]:
try:
rt["speaker_stop"]()
except Exception:
pass
if rt["speaker_thread"] is not None:
try:
rt["speaker_thread"].join(timeout=5.0)
except Exception:
pass
if rt["session"]:
try:
rt["session"].close()
except Exception:
pass
if rt["bridge"]:
try:
rt["bridge"].teardown()
except Exception:
pass
state.set(in_call=False, captioning=False, exited=True)
return 0
except Exception as e:
state.set(error=f"unhandled: {e}", exited=True)
return 1
def _try_guest_name(page, guest_name: str) -> None:
"""If Meet is showing a guest-name input, type *guest_name* into it."""
try:
# Meet's guest name input has placeholder "Your name".
locator = page.locator('input[aria-label*="name" i]').first
if locator.count() and locator.is_visible():
locator.fill(guest_name, timeout=2_000)
except Exception:
pass
def _detect_admission(page) -> bool:
"""True if we're clearly past the lobby and in the call itself.
Uses a JS-side probe because Meet's DOM structure varies by client
version. We check several high-signal indicators and declare admission
on the first hit:
1. Leave-call button is present (``aria-label`` contains "eave call").
2. Caption region has appeared (we installed the observer and it attached).
3. The participant list container is visible.
Conservative by default returns False on any error.
"""
probe = r"""
(() => {
const leave = document.querySelector('button[aria-label*="eave call" i]');
if (leave) return true;
if (window.__hermesMeetInstalled) {
const caps = document.querySelector(
'[role="region"][aria-label*="aption" i], ' +
'div[jsname="YSxPC"], div[jsname="tgaKEf"]'
);
if (caps) return true;
}
const parts = document.querySelector('[aria-label*="articipants" i]');
if (parts) return true;
return false;
})();
"""
try:
return bool(page.evaluate(probe))
except Exception:
return False
def _detect_denied(page) -> bool:
"""True when Meet is showing a 'you were denied' / 'no one admitted' page."""
probe = r"""
(() => {
const text = document.body ? document.body.innerText || '' : '';
// English only matches what shows up when the host denies or
// removes a guest.
if (/You can't join this video call/i.test(text)) return true;
if (/You were removed from the meeting/i.test(text)) return true;
if (/No one responded to your request to join/i.test(text)) return true;
return false;
})();
"""
try:
return bool(page.evaluate(probe))
except Exception:
return False
def _looks_like_human_speaker(speaker: str, bot_guest_name: str) -> bool:
"""Whether a caption line's speaker is probably a human, not our bot echo.
Meet attributes captions to the speaker's display name. When Chrome is
reading our fake mic, Meet still attributes captions to *our* bot name
(because the bot is the one "speaking"). We don't want those to trigger
barge-in. Anything else real participant names does.
Conservative: unknown / blank speakers (common when caption scraping
falls back to raw text) do NOT trigger barge-in, because we can't tell
whether it was a human or us.
"""
if not speaker or not speaker.strip():
return False
spk = speaker.strip().lower()
if spk in ("unknown", "you", bot_guest_name.strip().lower()):
return False
return True
def _click_join(page, state: _BotState) -> None:
"""Click 'Join now' or 'Ask to join' if either button is visible.
Flags ``lobby_waiting`` when we hit the "waiting for host to admit you"
state so the agent can surface that in status.
"""
for label in ("Join now", "Ask to join"):
try:
btn = page.get_by_role("button", name=label, exact=False).first
if btn.count() and btn.is_visible():
btn.click(timeout=3_000)
if label == "Ask to join":
state.set(lobby_waiting=True)
break
except Exception:
continue
def _parse_duration(raw: str) -> Optional[float]:
"""Parse ``30m`` / ``2h`` / ``90`` (seconds) → float seconds, or None."""
if not raw:
return None
raw = raw.strip().lower()
try:
if raw.endswith("h"):
return float(raw[:-1]) * 3600
if raw.endswith("m"):
return float(raw[:-1]) * 60
if raw.endswith("s"):
return float(raw[:-1])
return float(raw)
except ValueError:
return None
if __name__ == "__main__": # pragma: no cover — subprocess entry point
sys.exit(run_bot())
-54
View File
@@ -1,54 +0,0 @@
"""Remote 'node host' primitive for the google_meet plugin.
Lets the Meet bot (Playwright + Chrome) run on a different machine than
the hermes-agent gateway. The gateway speaks a small JSON-over-WebSocket
RPC protocol to the remote node; the node wraps the existing
``plugins.google_meet.process_manager`` API.
Topology
--------
gateway (Linux) ws://mac.local:18789 node server (Mac)
process_manager
meet_bot (Playwright)
Why: Google sign-in + Chrome profile live on the user's laptop. Running
the bot there reuses that profile without shipping credentials to the
server.
Public surface
--------------
NodeClient gateway-side RPC client (short-lived sync WS per call)
NodeServer long-running server that hosts the bot
NodeRegistry local JSON registry of approved nodes (name url+token)
protocol message envelope helpers (make_request, encode, decode, ...)
"""
from __future__ import annotations
from plugins.google_meet.node import protocol
from plugins.google_meet.node.client import NodeClient
from plugins.google_meet.node.protocol import (
VALID_REQUEST_TYPES,
decode,
encode,
make_error,
make_request,
make_response,
validate_request,
)
from plugins.google_meet.node.registry import NodeRegistry
from plugins.google_meet.node.server import NodeServer
__all__ = [
"NodeClient",
"NodeServer",
"NodeRegistry",
"protocol",
"make_request",
"make_response",
"make_error",
"encode",
"decode",
"validate_request",
"VALID_REQUEST_TYPES",
]
-125
View File
@@ -1,125 +0,0 @@
"""`hermes meet node ...` subcommand tree.
Wired into the existing ``hermes meet`` parser by the plugin's top-level
CLI. This module only defines the subparsers and their dispatch it
does not mutate the existing cli.py.
"""
from __future__ import annotations
import argparse
import asyncio
import json
import sys
from typing import Any
from plugins.google_meet.node.client import NodeClient
from plugins.google_meet.node.registry import NodeRegistry
from plugins.google_meet.node.server import NodeServer
def register_cli(subparser: argparse.ArgumentParser) -> None:
"""Add ``run / list / approve / remove / status / ping`` subparsers.
*subparser* is the ``hermes meet node`` argparse object typically
the result of ``meet_parser.add_parser('node', ...)``.
"""
sp = subparser.add_subparsers(dest="node_cmd", required=True)
run = sp.add_parser("run", help="Start a node server on this machine.")
run.add_argument("--host", default="0.0.0.0")
run.add_argument("--port", type=int, default=18789)
run.add_argument("--display-name", default="hermes-meet-node")
run.set_defaults(func=node_command)
lst = sp.add_parser("list", help="List approved remote nodes.")
lst.set_defaults(func=node_command)
app = sp.add_parser("approve", help="Register a remote node on the gateway.")
app.add_argument("name")
app.add_argument("url")
app.add_argument("token")
app.set_defaults(func=node_command)
rm = sp.add_parser("remove", help="Forget a registered node.")
rm.add_argument("name")
rm.set_defaults(func=node_command)
st = sp.add_parser("status", help="Ping a registered node.")
st.add_argument("name")
st.set_defaults(func=node_command)
pg = sp.add_parser("ping", help="Alias for status.")
pg.add_argument("name")
pg.set_defaults(func=node_command)
def node_command(args: argparse.Namespace) -> int:
"""Dispatch for ``hermes meet node ...``.
Returns a process exit code. Side-effects print to stdout/stderr.
"""
cmd = getattr(args, "node_cmd", None)
if cmd == "run":
server = NodeServer(
host=args.host,
port=args.port,
display_name=args.display_name,
)
token = server.ensure_token()
print(f"[meet-node] display_name={server.display_name}")
print(f"[meet-node] listening on ws://{args.host}:{args.port}")
print(f"[meet-node] token (copy to gateway): {token}")
print(f"[meet-node] approve with:")
print(f" hermes meet node approve <name> ws://<host>:{args.port} {token}")
try:
asyncio.run(server.serve())
except KeyboardInterrupt:
return 0
except RuntimeError as exc:
print(f"[meet-node] error: {exc}", file=sys.stderr)
return 2
return 0
reg = NodeRegistry()
if cmd == "list":
nodes = reg.list_all()
if not nodes:
print("no nodes registered")
return 0
for n in nodes:
print(f"{n['name']}\t{n['url']}\ttoken={n['token'][:6]}")
return 0
if cmd == "approve":
reg.add(args.name, args.url, args.token)
print(f"approved node {args.name!r} at {args.url}")
return 0
if cmd == "remove":
ok = reg.remove(args.name)
print(f"removed {args.name!r}" if ok else f"no such node: {args.name!r}")
return 0 if ok else 1
if cmd in ("status", "ping"):
entry = reg.get(args.name)
if entry is None:
print(f"no such node: {args.name!r}", file=sys.stderr)
return 1
client = NodeClient(entry["url"], entry["token"])
try:
result = client.ping()
except Exception as exc: # noqa: BLE001 — surface any connection error
print(json.dumps({"ok": False, "error": str(exc)}))
return 1
print(json.dumps({"ok": True, "node": args.name, **_coerce_dict(result)}))
return 0
print(f"unknown node command: {cmd!r}", file=sys.stderr)
return 2
def _coerce_dict(value: Any) -> dict:
return value if isinstance(value, dict) else {"result": value}
-107
View File
@@ -1,107 +0,0 @@
"""Gateway-side RPC client for a remote meet node.
Each call opens a short-lived synchronous WebSocket to the node, sends
exactly one request, reads exactly one response, and closes. This keeps
the client trivial to use from non-async tool handlers and avoids
maintaining persistent connection state across agent turns.
The ``websockets`` package is an optional dep we import it lazily so
plugin load doesn't require it.
"""
from __future__ import annotations
from typing import Any, Dict, Optional
from plugins.google_meet.node import protocol as _proto
class NodeClient:
"""Thin synchronous WS client matching the server's request surface."""
def __init__(self, url: str, token: str, timeout: float = 10.0) -> None:
if not isinstance(url, str) or not url:
raise ValueError("url must be a non-empty string")
if not isinstance(token, str) or not token:
raise ValueError("token must be a non-empty string")
self.url = url
self.token = token
self.timeout = float(timeout)
# ----- core RPC -----------------------------------------------------
def _rpc(self, type: str, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Send one request, return the response payload dict.
Raises RuntimeError when the server sends an ``error`` envelope
or the response id doesn't match.
"""
try:
from websockets.sync.client import connect # type: ignore
except ImportError as exc:
raise RuntimeError(
"NodeClient requires the 'websockets' package. "
"Install it with: pip install websockets"
) from exc
req = _proto.make_request(type, self.token, payload)
raw_out = _proto.encode(req)
with connect(self.url, open_timeout=self.timeout,
close_timeout=self.timeout) as ws:
ws.send(raw_out)
raw_in = ws.recv(timeout=self.timeout)
if isinstance(raw_in, (bytes, bytearray)):
raw_in = raw_in.decode("utf-8")
resp = _proto.decode(raw_in)
if resp.get("type") == "error":
raise RuntimeError(f"node error: {resp.get('error', '<unknown>')}")
if resp.get("id") != req["id"]:
raise RuntimeError(
f"response id mismatch: sent {req['id']}, got {resp.get('id')!r}"
)
payload_out = resp.get("payload")
if not isinstance(payload_out, dict):
# Ping returns {"type": "pong", "payload": {...}} — still a dict.
raise RuntimeError("response missing payload dict")
return payload_out
# ----- convenience methods -----------------------------------------
def start_bot(
self,
url: str,
guest_name: str = "Hermes Agent",
duration: Optional[str] = None,
headed: bool = False,
mode: str = "transcribe",
) -> Dict[str, Any]:
payload: Dict[str, Any] = {
"url": url,
"guest_name": guest_name,
"headed": bool(headed),
"mode": mode,
}
if duration is not None:
payload["duration"] = duration
return self._rpc("start_bot", payload)
def stop(self) -> Dict[str, Any]:
return self._rpc("stop", {})
def status(self) -> Dict[str, Any]:
return self._rpc("status", {})
def transcript(self, last: Optional[int] = None) -> Dict[str, Any]:
payload: Dict[str, Any] = {}
if last is not None:
payload["last"] = int(last)
return self._rpc("transcript", payload)
def say(self, text: str) -> Dict[str, Any]:
return self._rpc("say", {"text": str(text)})
def ping(self) -> Dict[str, Any]:
return self._rpc("ping", {})
-124
View File
@@ -1,124 +0,0 @@
"""Wire protocol for gateway ↔ node RPC.
Everything is a JSON object with the same envelope shape:
Request: {"type": <str>, "id": <str>, "token": <str>, "payload": <dict>}
Response: {"type": "<req-type>_res", "id": <req-id>, "payload": <dict>}
Error: {"type": "error", "id": <req-id>, "error": <str>}
Requests must carry the shared bearer token (set up via
``hermes meet node approve`` on the gateway and read off disk on the
server). Mismatched tokens are rejected before dispatch.
"""
from __future__ import annotations
import json
import uuid
from typing import Any, Dict, Tuple
VALID_REQUEST_TYPES = frozenset({
"start_bot",
"stop",
"status",
"transcript",
"say",
"ping",
})
def make_request(
type: str,
token: str,
payload: Dict[str, Any],
req_id: str | None = None,
) -> Dict[str, Any]:
"""Construct a request envelope.
``req_id`` is auto-generated (uuid4 hex) when not supplied so callers
can correlate async responses.
"""
if not isinstance(type, str) or not type:
raise ValueError("type must be a non-empty string")
if type not in VALID_REQUEST_TYPES:
raise ValueError(f"unknown request type: {type!r}")
if not isinstance(token, str):
raise ValueError("token must be a string")
if not isinstance(payload, dict):
raise ValueError("payload must be a dict")
return {
"type": type,
"id": req_id or uuid.uuid4().hex,
"token": token,
"payload": payload,
}
def make_response(req_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Build a success response. The caller supplies the *request* type;
we suffix it with ``_res`` so clients can assert they got the right
reply.
For simplicity we don't require the type here — clients usually just
key off ``id``. But we still emit a generic ``*_res`` envelope.
"""
if not isinstance(payload, dict):
raise ValueError("payload must be a dict")
return {"type": "response", "id": req_id, "payload": payload}
def make_error(req_id: str, error: str) -> Dict[str, Any]:
return {"type": "error", "id": req_id, "error": str(error)}
def encode(msg: Dict[str, Any]) -> str:
"""Serialize a message envelope to a JSON string."""
return json.dumps(msg, separators=(",", ":"), ensure_ascii=False)
def decode(raw: str) -> Dict[str, Any]:
"""Parse a JSON envelope, raising ValueError on anything malformed.
Minimal type validation: must be an object, must contain ``type`` and
``id``. Heavier validation (token match, payload shape) happens in
:func:`validate_request` on the server side.
"""
try:
obj = json.loads(raw)
except (TypeError, json.JSONDecodeError) as exc:
raise ValueError(f"malformed JSON: {exc}") from exc
if not isinstance(obj, dict):
raise ValueError("envelope must be a JSON object")
if "type" not in obj or not isinstance(obj["type"], str):
raise ValueError("envelope missing string 'type'")
if "id" not in obj or not isinstance(obj["id"], str):
raise ValueError("envelope missing string 'id'")
return obj
def validate_request(msg: Dict[str, Any], expected_token: str) -> Tuple[bool, str]:
"""Check a decoded request against the server's shared token.
Returns ``(True, "")`` when the envelope is acceptable or
``(False, <reason>)`` otherwise. Reason strings are safe to surface
back to the client in an error envelope.
"""
if not isinstance(msg, dict):
return False, "envelope must be a dict"
t = msg.get("type")
if not isinstance(t, str) or not t:
return False, "missing or non-string 'type'"
if t not in VALID_REQUEST_TYPES:
return False, f"unknown request type: {t!r}"
if not isinstance(msg.get("id"), str) or not msg.get("id"):
return False, "missing or non-string 'id'"
token = msg.get("token")
if not isinstance(token, str) or not token:
return False, "missing token"
if token != expected_token:
return False, "token mismatch"
payload = msg.get("payload")
if not isinstance(payload, dict):
return False, "payload must be a dict"
return True, ""
-112
View File
@@ -1,112 +0,0 @@
"""Local JSON registry of approved remote meet nodes.
Lives at ``$HERMES_HOME/workspace/meetings/nodes.json``. The gateway
consults it to resolve a ``chrome_node`` name to a ``(url, token)`` pair
before opening a WebSocket to the remote bot host.
Schema
------
{
"nodes": {
"<name>": {
"url": "ws://host:port",
"token": "...",
"added_at": <epoch_float>
}
}
}
"""
from __future__ import annotations
import json
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_constants import get_hermes_home
def _default_path() -> Path:
return Path(get_hermes_home()) / "workspace" / "meetings" / "nodes.json"
class NodeRegistry:
"""Simple file-backed registry. Not concurrent-safe across processes
single writer assumed (the gateway CLI)."""
def __init__(self, path: Optional[Path] = None) -> None:
self.path = Path(path) if path is not None else _default_path()
# ----- storage ------------------------------------------------------
def _load(self) -> Dict[str, Any]:
if not self.path.is_file():
return {"nodes": {}}
try:
data = json.loads(self.path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return {"nodes": {}}
if not isinstance(data, dict) or not isinstance(data.get("nodes"), dict):
return {"nodes": {}}
return data
def _save(self, data: Dict[str, Any]) -> None:
self.path.parent.mkdir(parents=True, exist_ok=True)
tmp = self.path.with_suffix(".json.tmp")
tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
tmp.replace(self.path)
# ----- public API ---------------------------------------------------
def get(self, name: str) -> Optional[Dict[str, Any]]:
data = self._load()
entry = data["nodes"].get(name)
if entry is None:
return None
return {"name": name, **entry}
def add(self, name: str, url: str, token: str) -> None:
if not isinstance(name, str) or not name:
raise ValueError("node name must be a non-empty string")
if not isinstance(url, str) or not url:
raise ValueError("url must be a non-empty string")
if not isinstance(token, str) or not token:
raise ValueError("token must be a non-empty string")
data = self._load()
data["nodes"][name] = {
"url": url,
"token": token,
"added_at": time.time(),
}
self._save(data)
def remove(self, name: str) -> bool:
data = self._load()
if name in data["nodes"]:
del data["nodes"][name]
self._save(data)
return True
return False
def list_all(self) -> List[Dict[str, Any]]:
data = self._load()
out: List[Dict[str, Any]] = []
for name, entry in sorted(data["nodes"].items()):
out.append({"name": name, **entry})
return out
def resolve(self, chrome_node: Optional[str]) -> Optional[Dict[str, Any]]:
"""Resolve a node name to its entry.
If ``chrome_node`` is provided, return that named node (or None).
If ``chrome_node`` is None, return the sole registered node when
exactly one is registered; otherwise return None (ambiguous or
empty).
"""
if chrome_node:
return self.get(chrome_node)
nodes = self.list_all()
if len(nodes) == 1:
return nodes[0]
return None
-193
View File
@@ -1,193 +0,0 @@
"""Remote node server.
Runs on the machine that will host the Meet bot (typically the user's
Mac laptop with a signed-in Chrome). Exposes a WebSocket endpoint that
accepts signed RPC requests and dispatches them to the existing
``plugins.google_meet.process_manager`` module.
Launched by ``hermes meet node run``.
Token handling
--------------
On first boot we mint 32 hex chars of entropy and persist them at
``$HERMES_HOME/workspace/meetings/node_token.json``. Subsequent boots
reuse the same token so previously-approved gateways don't need to be
re-paired. The operator copies this token out-of-band to the gateway
via ``hermes meet node approve <name> <url> <token>``.
Dependencies
------------
``websockets`` is an optional dep. We import it lazily inside
:meth:`serve` so installing the plugin doesn't require it unless you
actually host a node.
"""
from __future__ import annotations
import json
import secrets
import time
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import get_hermes_home
from plugins.google_meet.node import protocol as _proto
def _default_token_path() -> Path:
return Path(get_hermes_home()) / "workspace" / "meetings" / "node_token.json"
class NodeServer:
"""WebSocket server that executes meet bot RPCs locally."""
def __init__(
self,
host: str = "0.0.0.0",
port: int = 18789,
token_path: Optional[Path] = None,
display_name: str = "hermes-meet-node",
) -> None:
self.host = host
self.port = port
self.display_name = display_name
self.token_path = Path(token_path) if token_path is not None else _default_token_path()
self._token: Optional[str] = None
# ----- token management --------------------------------------------
def ensure_token(self) -> str:
"""Return the persisted shared secret, generating one on first use."""
if self._token:
return self._token
if self.token_path.is_file():
try:
data = json.loads(self.token_path.read_text(encoding="utf-8"))
tok = data.get("token")
if isinstance(tok, str) and tok:
self._token = tok
return tok
except (OSError, json.JSONDecodeError):
pass
tok = secrets.token_hex(16) # 32 hex chars
self.token_path.parent.mkdir(parents=True, exist_ok=True)
tmp = self.token_path.with_suffix(".json.tmp")
tmp.write_text(
json.dumps({"token": tok, "generated_at": time.time()}, indent=2),
encoding="utf-8",
)
tmp.replace(self.token_path)
self._token = tok
return tok
def get_token(self) -> str:
"""Alias for :meth:`ensure_token`; does not mutate on subsequent calls."""
return self.ensure_token()
# ----- dispatch -----------------------------------------------------
async def _handle_request(self, msg: Dict[str, Any]) -> Dict[str, Any]:
"""Validate + dispatch a single decoded request envelope.
Always returns a response envelope (success or error); never
raises. Errors from inside the process_manager are wrapped into
the response payload's ``ok``/``error`` keys (which pm already
does) rather than being re-encoded as error envelopes the
envelope-level error channel is reserved for auth / protocol
failures.
"""
expected = self.ensure_token()
ok, reason = _proto.validate_request(msg, expected)
if not ok:
return _proto.make_error(str(msg.get("id") or ""), reason)
req_id = msg["id"]
t = msg["type"]
payload = msg["payload"]
# Import lazily so test mocks can monkeypatch freely.
from plugins.google_meet import process_manager as pm
try:
if t == "ping":
return {"type": "pong", "id": req_id,
"payload": {"display_name": self.display_name,
"ts": time.time()}}
if t == "start_bot":
# Whitelist kwargs we pass through to pm.start.
kwargs = {
k: payload[k]
for k in ("url", "guest_name", "duration", "headed",
"auth_state", "session_id", "out_dir")
if k in payload
}
if "url" not in kwargs:
return _proto.make_error(req_id, "missing 'url' in payload")
result = pm.start(**kwargs)
return _proto.make_response(req_id, result)
if t == "stop":
reason_arg = payload.get("reason", "requested")
result = pm.stop(reason=reason_arg)
return _proto.make_response(req_id, result)
if t == "status":
return _proto.make_response(req_id, pm.status())
if t == "transcript":
last = payload.get("last")
result = pm.transcript(last=last)
return _proto.make_response(req_id, result)
if t == "say":
# v2 wiring: enqueue into say_queue.jsonl inside the
# active meeting's out_dir when present. The bot-side
# consumer is v3+ (for v1 this is a stub returning ok).
text = payload.get("text", "")
active = pm._read_active() # type: ignore[attr-defined]
enqueued = False
if active and active.get("out_dir"):
queue = Path(active["out_dir"]) / "say_queue.jsonl"
try:
queue.parent.mkdir(parents=True, exist_ok=True)
with queue.open("a", encoding="utf-8") as fh:
fh.write(json.dumps({"text": text, "ts": time.time()}) + "\n")
enqueued = True
except OSError:
enqueued = False
return _proto.make_response(
req_id,
{"ok": True, "enqueued": enqueued, "text": text},
)
except Exception as exc: # noqa: BLE001 — surface any pm crash to client
return _proto.make_error(req_id, f"{type(exc).__name__}: {exc}")
return _proto.make_error(req_id, f"unhandled type: {t!r}")
# ----- server loop --------------------------------------------------
async def serve(self) -> None:
"""Run the WebSocket server until cancelled.
Blocks forever. Callers typically wrap this in ``asyncio.run``.
"""
try:
import websockets # type: ignore
except ImportError as exc:
raise RuntimeError(
"NodeServer.serve requires the 'websockets' package. "
"Install it with: pip install websockets"
) from exc
self.ensure_token()
async def _handler(ws):
async for raw in ws:
try:
msg = _proto.decode(raw if isinstance(raw, str) else raw.decode("utf-8"))
except ValueError as exc:
await ws.send(_proto.encode(_proto.make_error("", f"decode: {exc}")))
continue
reply = await self._handle_request(msg)
await ws.send(_proto.encode(reply))
async with websockets.serve(_handler, self.host, self.port):
# Run until cancelled.
import asyncio
await asyncio.Future()
-16
View File
@@ -1,16 +0,0 @@
name: google_meet
version: 0.2.0
description: "Join a Google Meet call, transcribe live captions, speak in realtime, and follow up afterwards. v1 transcribe-only is the default; v2 realtime duplex audio via OpenAI Realtime + BlackHole/PulseAudio ships with mode='realtime'; v3 remote node host lets the bot run on a different machine than the gateway (gateway on Linux, Chrome+signed-in profile on the user's Mac). Explicit-by-design: only joins meet.google.com URLs passed in \u2014 no calendar scanning, no auto-dial."
author: NousResearch
kind: standalone
platforms:
- linux
- macos
provides_tools:
- meet_join
- meet_leave
- meet_status
- meet_transcript
- meet_say
hooks:
- on_session_end
-326
View File
@@ -1,326 +0,0 @@
"""Subprocess lifecycle manager for the google_meet bot.
Single active meeting at a time. Stores the running pid + out_dir in a
session-scoped state file under ``$HERMES_HOME/workspace/meetings/.active.json``
so tool calls across turns can find the bot, and ``on_session_end`` can clean
it up.
The bot runs as a detached subprocess we don't hold file descriptors open,
so the parent agent loop can't block on it. We communicate via files only.
"""
from __future__ import annotations
import json
import os
import signal
import subprocess
import sys
import time
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import get_hermes_home
# File + directory layout (under $HERMES_HOME):
#
# workspace/meetings/
# .active.json # pointer to current session's bot
# <meeting-id>/
# status.json # live bot state (written by bot each tick)
# transcript.txt # scraped captions
#
# .active.json holds:
# {"pid": 12345, "meeting_id": "abc-defg-hij", "out_dir": "...",
# "url": "https://meet.google.com/...", "started_at": 1714159200.0,
# "session_id": "optional"}
def _root() -> Path:
return Path(get_hermes_home()) / "workspace" / "meetings"
def _active_file() -> Path:
return _root() / ".active.json"
def _read_active() -> Optional[Dict[str, Any]]:
p = _active_file()
if not p.is_file():
return None
try:
return json.loads(p.read_text(encoding="utf-8"))
except Exception:
return None
def _write_active(data: Dict[str, Any]) -> None:
p = _active_file()
p.parent.mkdir(parents=True, exist_ok=True)
tmp = p.with_suffix(".json.tmp")
tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
tmp.replace(p)
def _clear_active() -> None:
try:
_active_file().unlink()
except FileNotFoundError:
pass
def _pid_alive(pid: int) -> bool:
try:
os.kill(pid, 0)
except ProcessLookupError:
return False
except PermissionError:
# Process exists but we can't signal it — treat as alive.
return True
return True
# ---------------------------------------------------------------------------
# Public API — used by tool handlers + CLI
# ---------------------------------------------------------------------------
def start(
url: str,
*,
out_dir: Optional[Path] = None,
headed: bool = False,
auth_state: Optional[str] = None,
guest_name: str = "Hermes Agent",
duration: Optional[str] = None,
session_id: Optional[str] = None,
mode: str = "transcribe",
realtime_model: Optional[str] = None,
realtime_voice: Optional[str] = None,
realtime_instructions: Optional[str] = None,
realtime_api_key: Optional[str] = None,
) -> Dict[str, Any]:
"""Spawn the meet_bot subprocess for *url*.
If a bot is already running for this hermes install, leave it first
we enforce single-active-meeting semantics.
Returns a dict summarizing the started bot.
"""
from plugins.google_meet.meet_bot import _is_safe_meet_url, _meeting_id_from_url
if not _is_safe_meet_url(url):
return {
"ok": False,
"error": (
"refusing: only https://meet.google.com/ URLs are allowed. "
"got: " + repr(url)
),
}
existing = _read_active()
if existing and _pid_alive(int(existing.get("pid", 0))):
stop(reason="replaced by new meet_join")
meeting_id = _meeting_id_from_url(url)
out = out_dir or (_root() / meeting_id)
out.mkdir(parents=True, exist_ok=True)
# Wipe any stale transcript/status files from a previous run of this
# meeting id so polling isn't confused.
for name in ("transcript.txt", "status.json"):
f = out / name
if f.exists():
try:
f.unlink()
except OSError:
pass
env = os.environ.copy()
env["HERMES_MEET_URL"] = url
env["HERMES_MEET_OUT_DIR"] = str(out)
env["HERMES_MEET_GUEST_NAME"] = guest_name
if headed:
env["HERMES_MEET_HEADED"] = "1"
if auth_state:
env["HERMES_MEET_AUTH_STATE"] = auth_state
if duration:
env["HERMES_MEET_DURATION"] = duration
# v2: realtime mode + passthroughs. The bot defaults to transcribe
# mode if HERMES_MEET_MODE isn't set, matching v1 behavior.
if mode:
env["HERMES_MEET_MODE"] = mode
if realtime_model:
env["HERMES_MEET_REALTIME_MODEL"] = realtime_model
if realtime_voice:
env["HERMES_MEET_REALTIME_VOICE"] = realtime_voice
if realtime_instructions:
env["HERMES_MEET_REALTIME_INSTRUCTIONS"] = realtime_instructions
if realtime_api_key:
env["HERMES_MEET_REALTIME_KEY"] = realtime_api_key
log_path = out / "bot.log"
# Detach: stdin=devnull, stdout/stderr → log file, new session so parent
# signals don't propagate.
log_fh = open(log_path, "ab", buffering=0)
try:
proc = subprocess.Popen(
[sys.executable, "-m", "plugins.google_meet.meet_bot"],
stdin=subprocess.DEVNULL,
stdout=log_fh,
stderr=subprocess.STDOUT,
env=env,
start_new_session=True,
close_fds=True,
)
finally:
# The subprocess now owns the log fd; we can close ours.
log_fh.close()
record = {
"pid": proc.pid,
"meeting_id": meeting_id,
"out_dir": str(out),
"url": url,
"started_at": time.time(),
"session_id": session_id,
"log_path": str(log_path),
"mode": mode,
}
_write_active(record)
return {"ok": True, **record}
def status() -> Dict[str, Any]:
"""Return the current meeting state, or ``{"ok": False, "reason": ...}``."""
active = _read_active()
if not active:
return {"ok": False, "reason": "no active meeting"}
pid = int(active.get("pid", 0))
alive = _pid_alive(pid) if pid else False
status_path = Path(active.get("out_dir", "")) / "status.json"
bot_status: Dict[str, Any] = {}
if status_path.is_file():
try:
bot_status = json.loads(status_path.read_text(encoding="utf-8"))
except Exception:
pass
return {
"ok": True,
"alive": alive,
"pid": pid,
"meetingId": active.get("meeting_id"),
"url": active.get("url"),
"startedAt": active.get("started_at"),
"outDir": active.get("out_dir"),
**bot_status,
}
def transcript(last: Optional[int] = None) -> Dict[str, Any]:
"""Read the current transcript file. Returns ok=False if none exists."""
active = _read_active()
if not active:
return {"ok": False, "reason": "no active meeting"}
tp = Path(active.get("out_dir", "")) / "transcript.txt"
if not tp.is_file():
return {
"ok": True,
"meetingId": active.get("meeting_id"),
"lines": [],
"total": 0,
"path": str(tp),
}
text = tp.read_text(encoding="utf-8", errors="replace")
all_lines = [ln for ln in text.splitlines() if ln.strip()]
lines = all_lines[-last:] if last else all_lines
return {
"ok": True,
"meetingId": active.get("meeting_id"),
"lines": lines,
"total": len(all_lines),
"path": str(tp),
}
def enqueue_say(text: str) -> Dict[str, Any]:
"""Append a ``say`` request to the active bot's JSONL queue.
Returns ``{"ok": False, "reason": ...}`` when no meeting is active or
the active bot is in transcribe-only mode. Otherwise writes a line to
``<out_dir>/say_queue.jsonl`` that the bot's realtime speaker thread
will consume.
"""
import uuid
text = (text or "").strip()
if not text:
return {"ok": False, "reason": "text is required"}
active = _read_active()
if not active:
return {"ok": False, "reason": "no active meeting"}
if active.get("mode") != "realtime":
return {
"ok": False,
"reason": (
"active meeting is in transcribe mode — pass mode='realtime' "
"to meet_join to enable agent speech"
),
}
out_dir = Path(active.get("out_dir", ""))
if not out_dir.is_dir():
return {"ok": False, "reason": f"out_dir missing: {out_dir}"}
queue_path = out_dir / "say_queue.jsonl"
entry = {"id": uuid.uuid4().hex[:12], "text": text}
with queue_path.open("a", encoding="utf-8") as f:
f.write(json.dumps(entry) + "\n")
return {
"ok": True,
"meetingId": active.get("meeting_id"),
"enqueued_id": entry["id"],
"queue_path": str(queue_path),
}
def stop(*, reason: str = "requested") -> Dict[str, Any]:
"""Signal the active bot to leave cleanly, then clear the active pointer.
Sends SIGTERM and waits up to 10s for the bot to exit. Falls back to
SIGKILL if the bot doesn't respond.
"""
active = _read_active()
if not active:
return {"ok": False, "reason": "no active meeting"}
pid = int(active.get("pid", 0))
out_dir = active.get("out_dir")
transcript_path = Path(out_dir) / "transcript.txt" if out_dir else None
if pid and _pid_alive(pid):
try:
os.kill(pid, signal.SIGTERM)
except ProcessLookupError:
pass
for _ in range(20):
if not _pid_alive(pid):
break
time.sleep(0.5)
if _pid_alive(pid):
try:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
pass
_clear_active()
return {
"ok": True,
"reason": reason,
"meetingId": active.get("meeting_id"),
"transcriptPath": str(transcript_path) if transcript_path else None,
}
-10
View File
@@ -1,10 +0,0 @@
"""Realtime speech subpackage for the google_meet plugin (v2).
Provides a thin OpenAI Realtime API client and a file-queue speaker
wrapper so the Meet bot can play synthesized speech through the
virtual audio bridge.
"""
from .openai_client import RealtimeSession, RealtimeSpeaker # noqa: F401
__all__ = ["RealtimeSession", "RealtimeSpeaker"]
@@ -1,332 +0,0 @@
"""OpenAI Realtime API WebSocket client + file-queue speaker.
This module is the "output" side of the v2 voice bridge: it takes text,
sends it to the OpenAI Realtime API, receives audio deltas back, and
appends the PCM bytes to a file. A separate consumer (the audio
bridge) streams that file into Chrome's fake microphone.
Designed for simplicity: a single synchronous WebSocket connection per
speaker, per session. The ``websockets`` package is imported lazily so
that importing this module never fails just because the optional dep
is missing.
"""
from __future__ import annotations
import base64
import json
import time
import uuid
from pathlib import Path
from typing import Any, Callable, Optional
REALTIME_URL = "wss://api.openai.com/v1/realtime"
def _require_websockets():
"""Import ``websockets.sync.client.connect`` or raise with hint."""
try:
from websockets.sync.client import connect as _connect # type: ignore
except ImportError as exc: # pragma: no cover - exercised via test
raise RuntimeError(
"websockets package is required for OpenAI Realtime; "
"install with: pip install websockets"
) from exc
return _connect
class RealtimeSession:
"""Minimal sync client for the OpenAI Realtime WebSocket API.
Usage:
sess = RealtimeSession(api_key=..., audio_sink_path=Path("out.pcm"))
sess.connect()
sess.speak("Hello team.")
sess.close()
Thread safety: ``speak`` and ``cancel_response`` may be called from
different threads; a lock serializes WebSocket writes.
"""
def __init__(
self,
api_key: str,
model: str = "gpt-realtime",
voice: str = "alloy",
instructions: str = "",
audio_sink_path: Optional[Path] = None,
sample_rate: int = 24000,
) -> None:
import threading as _threading
self.api_key = api_key
self.model = model
self.voice = voice
self.instructions = instructions
self.audio_sink_path = Path(audio_sink_path) if audio_sink_path else None
self.sample_rate = sample_rate
self._ws: Any = None
self._send_lock = _threading.Lock()
self._last_response_id: Optional[str] = None
# Public counters for status reporting.
self.audio_bytes_out: int = 0
self.last_audio_out_at: Optional[float] = None
# ── lifecycle ─────────────────────────────────────────────────────────
def connect(self) -> None:
"""Open WS and send session.update with voice+instructions."""
connect = _require_websockets()
url = f"{REALTIME_URL}?model={self.model}"
headers = [
("Authorization", f"Bearer {self.api_key}"),
("OpenAI-Beta", "realtime=v1"),
]
# websockets.sync.client.connect accepts either additional_headers=
# (newer) or extra_headers= depending on version; try the newer
# name first and fall back.
try:
self._ws = connect(url, additional_headers=headers)
except TypeError:
self._ws = connect(url, extra_headers=headers)
self._send_json(
{
"type": "session.update",
"session": {
"voice": self.voice,
"instructions": self.instructions,
"modalities": ["audio", "text"],
"output_audio_format": "pcm16",
"input_audio_format": "pcm16",
},
}
)
def close(self) -> None:
if self._ws is not None:
try:
self._ws.close()
except Exception:
pass
self._ws = None
# ── speaking ──────────────────────────────────────────────────────────
def speak(self, text: str, timeout: float = 30.0) -> dict:
"""Send ``text`` and accumulate the audio response.
Audio deltas are base64-decoded and appended to
``audio_sink_path`` (opened 'ab' and closed per call, so a
separate streaming reader can consume whatever is there).
"""
if self._ws is None:
raise RuntimeError("RealtimeSession.connect() must be called first")
start = time.monotonic()
self._send_json(
{
"type": "conversation.item.create",
"item": {
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": text}],
},
}
)
self._send_json(
{
"type": "response.create",
"response": {"modalities": ["audio"]},
}
)
bytes_written = 0
sink_fp = None
if self.audio_sink_path is not None:
self.audio_sink_path.parent.mkdir(parents=True, exist_ok=True)
sink_fp = open(self.audio_sink_path, "ab")
try:
while True:
remaining = timeout - (time.monotonic() - start)
if remaining <= 0:
raise TimeoutError(
f"realtime response did not complete within {timeout}s"
)
raw = self._recv(timeout=remaining)
if raw is None:
# Connection closed by peer.
break
try:
frame = json.loads(raw) if isinstance(raw, (str, bytes, bytearray)) else raw
except (TypeError, ValueError):
continue
if not isinstance(frame, dict):
continue
ftype = frame.get("type")
if ftype == "response.audio.delta":
b64 = frame.get("delta") or frame.get("audio") or ""
if b64 and sink_fp is not None:
try:
chunk = base64.b64decode(b64)
except (ValueError, TypeError):
chunk = b""
if chunk:
sink_fp.write(chunk)
sink_fp.flush()
bytes_written += len(chunk)
self.audio_bytes_out += len(chunk)
self.last_audio_out_at = time.time()
elif ftype == "response.created":
rid = (frame.get("response") or {}).get("id")
if rid:
self._last_response_id = rid
elif ftype in ("response.done", "response.completed", "response.cancelled"):
break
elif ftype == "error":
err = frame.get("error") or frame
raise RuntimeError(f"realtime error: {err}")
# All other frames (response.created, response.output_item.*,
# response.audio_transcript.delta, rate_limits.updated, ...)
# are ignored for v2.
finally:
if sink_fp is not None:
sink_fp.close()
duration_ms = (time.monotonic() - start) * 1000.0
return {
"ok": True,
"bytes_written": bytes_written,
"duration_ms": duration_ms,
}
# ── ws plumbing ───────────────────────────────────────────────────────
def cancel_response(self) -> bool:
"""Interrupt the in-flight response (barge-in).
Sends ``response.cancel`` on the current WebSocket so the model
stops generating audio immediately. Safe to call at any time;
returns True if a cancel was actually sent, False when there's
nothing to cancel or the socket isn't open.
"""
if self._ws is None:
return False
try:
self._send_json({"type": "response.cancel"})
return True
except Exception:
return False
def _send_json(self, payload: dict) -> None:
assert self._ws is not None
with self._send_lock:
self._ws.send(json.dumps(payload))
def _recv(self, timeout: Optional[float] = None):
assert self._ws is not None
try:
if timeout is None:
return self._ws.recv()
return self._ws.recv(timeout=timeout)
except TypeError:
# Older websockets may not accept timeout kwarg.
return self._ws.recv()
class RealtimeSpeaker:
"""File-based JSONL queue wrapper around :class:`RealtimeSession`.
Each line in ``queue_path`` is a JSON object of the form
``{"id": "<uuid>", "text": "..."}``. Processed lines are appended
to ``processed_path`` (if set) and then removed from the queue;
if ``processed_path`` is ``None``, processed lines are simply
dropped.
"""
def __init__(
self,
session: RealtimeSession,
queue_path: Path,
processed_path: Optional[Path] = None,
) -> None:
self.session = session
self.queue_path = Path(queue_path)
self.processed_path = Path(processed_path) if processed_path else None
# ── helpers ──────────────────────────────────────────────────────────
def _read_queue(self) -> list[dict]:
if not self.queue_path.exists():
return []
out: list[dict] = []
for line in self.queue_path.read_text().splitlines():
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
except ValueError:
continue
if not isinstance(entry, dict):
continue
if "id" not in entry:
entry["id"] = str(uuid.uuid4())
out.append(entry)
return out
def _rewrite_queue(self, remaining: list[dict]) -> None:
if not remaining:
# Keep the file but empty — consumers may be watching for
# new writes via mtime, and delete-then-recreate is a race.
self.queue_path.write_text("")
return
self.queue_path.write_text(
"\n".join(json.dumps(e) for e in remaining) + "\n"
)
def _append_processed(self, entry: dict, result: dict) -> None:
if self.processed_path is None:
return
self.processed_path.parent.mkdir(parents=True, exist_ok=True)
record = {"id": entry.get("id"), "text": entry.get("text", ""), "result": result}
with open(self.processed_path, "a") as fp:
fp.write(json.dumps(record) + "\n")
# ── main loop ────────────────────────────────────────────────────────
def run_until_stopped(
self,
stop_fn: Callable[[], bool],
poll_interval: float = 0.5,
) -> None:
while not stop_fn():
entries = self._read_queue()
if not entries:
time.sleep(poll_interval)
continue
# Process one at a time; re-check the queue file after each
# speak() call because new entries may have arrived.
head = entries[0]
text = (head.get("text") or "").strip()
if text:
try:
result = self.session.speak(text)
except Exception as exc:
result = {"ok": False, "error": str(exc)}
else:
result = {"ok": True, "bytes_written": 0, "duration_ms": 0.0}
self._append_processed(head, result)
# Re-read the queue from disk in case it was appended to
# while we were speaking, then drop the head.
latest = self._read_queue()
if latest and latest[0].get("id") == head.get("id"):
self._rewrite_queue(latest[1:])
else:
# Fallback: drop-by-id anywhere in the queue.
self._rewrite_queue(
[e for e in latest if e.get("id") != head.get("id")]
)
-348
View File
@@ -1,348 +0,0 @@
"""Agent-facing tools for the google_meet plugin.
Tools:
meet_join join a Google Meet URL (spawns Playwright bot locally
OR on a remote node host via node=<name>)
meet_status report bot liveness + transcript progress
meet_transcript read the current transcript (optional last-N)
meet_leave signal the bot to leave cleanly
meet_say (v2) speak text through the realtime audio bridge.
Requires the active meeting to have been joined with
mode='realtime'.
"""
from __future__ import annotations
import json
from typing import Any, Dict, Optional
from plugins.google_meet import process_manager as pm
# ---------------------------------------------------------------------------
# Runtime gate
# ---------------------------------------------------------------------------
def check_meet_requirements() -> bool:
"""Return True when the plugin can actually run LOCALLY.
Gates on:
* Python ``playwright`` package importable
* the plugin being on a supported platform (Linux or macOS)
Note: remote-node operation (``node=<name>``) only needs the
``websockets`` dep on the gateway side Chromium lives on the node.
But the plugin-level gate keeps the v1 semantics; individual tool
handlers relax the requirement when a node is addressed.
"""
import platform as _p
if _p.system().lower() not in ("linux", "darwin"):
return False
try:
import playwright # noqa: F401
except ImportError:
return False
return True
# ---------------------------------------------------------------------------
# Node client helper
# ---------------------------------------------------------------------------
def _resolve_node_client(node: Optional[str]):
"""Return (NodeClient, node_name) for *node*, or (None, None) to run local.
Raises RuntimeError with a readable message if the node is named but
unresolvable, so the handler can surface a clear error to the agent.
"""
if node is None or node == "":
return None, None
from plugins.google_meet.node.registry import NodeRegistry
from plugins.google_meet.node.client import NodeClient
reg = NodeRegistry()
entry = reg.resolve(node if node != "auto" else None)
if entry is None:
raise RuntimeError(
f"no registered meet node matches {node!r}"
"run `hermes meet node approve <name> <url> <token>` first"
)
client = NodeClient(url=entry["url"], token=entry["token"])
return client, entry.get("name")
# ---------------------------------------------------------------------------
# Schemas
# ---------------------------------------------------------------------------
MEET_JOIN_SCHEMA: Dict[str, Any] = {
"name": "meet_join",
"description": (
"Join a Google Meet call and start scraping live captions into a "
"transcript file. Only meet.google.com URLs are accepted; no calendar "
"scanning, no auto-dial. Spawns a headless Chromium subprocess that "
"runs in parallel with the agent loop — returns immediately. Poll "
"with meet_status and read captions with meet_transcript. Reminder "
"to the agent: you should announce yourself in the meeting (there is "
"no automatic consent announcement)."
),
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": (
"Full https://meet.google.com/... URL. Required."
),
},
"mode": {
"type": "string",
"enum": ["transcribe", "realtime"],
"description": (
"transcribe (default): listen-only, scrape captions. "
"realtime: also enable agent speech via meet_say "
"(requires OpenAI Realtime key + platform audio bridge)."
),
},
"guest_name": {
"type": "string",
"description": (
"Display name to use when joining as guest. Defaults to "
"'Hermes Agent'."
),
},
"duration": {
"type": "string",
"description": (
"Optional max duration before auto-leave (e.g. '30m', "
"'2h', '90s'). Omit to stay until meet_leave is called."
),
},
"headed": {
"type": "boolean",
"description": (
"Run Chromium headed instead of headless (debug only). "
"Default false."
),
},
"node": {
"type": "string",
"description": (
"Name of a registered remote node to run the bot on "
"(useful when the gateway runs on a headless Linux box "
"but the user's Chrome with a signed-in Google profile "
"lives on their Mac). Pass 'auto' to use the single "
"registered node. Default: run locally. Nodes are "
"approved via `hermes meet node approve`."
),
},
},
"required": ["url"],
"additionalProperties": False,
},
}
MEET_STATUS_SCHEMA: Dict[str, Any] = {
"name": "meet_status",
"description": (
"Report the current Meet session state — whether the bot is alive, "
"has joined, is sitting in the lobby, number of transcript lines "
"captured, and last-caption timestamp."
),
"parameters": {
"type": "object",
"properties": {
"node": {"type": "string"},
},
"additionalProperties": False,
},
}
MEET_TRANSCRIPT_SCHEMA: Dict[str, Any] = {
"name": "meet_transcript",
"description": (
"Read the scraped transcript for the active Meet session. Returns "
"full transcript unless 'last' is set, in which case returns the last "
"N lines only."
),
"parameters": {
"type": "object",
"properties": {
"last": {
"type": "integer",
"description": (
"Optional: return only the last N caption lines. Useful "
"for polling during a meeting without re-reading the "
"whole transcript."
),
"minimum": 1,
},
"node": {"type": "string"},
},
"additionalProperties": False,
},
}
MEET_LEAVE_SCHEMA: Dict[str, Any] = {
"name": "meet_leave",
"description": (
"Leave the active Meet call cleanly, stop caption scraping, and "
"finalize the transcript file. Safe to call when no meeting is "
"active — returns ok=false with a reason."
),
"parameters": {
"type": "object",
"properties": {
"node": {"type": "string"},
},
"additionalProperties": False,
},
}
MEET_SAY_SCHEMA: Dict[str, Any] = {
"name": "meet_say",
"description": (
"Speak text into the active Meet call. Requires the active meeting "
"to have been joined with mode='realtime'. The text is queued to "
"the bot's OpenAI Realtime session; the generated audio is streamed "
"into Chrome's fake microphone via a virtual audio device "
"(PulseAudio null-sink on Linux, BlackHole on macOS). Returns "
"immediately — the actual speech lags by a couple of seconds."
),
"parameters": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Text to speak."},
"node": {"type": "string"},
},
"required": ["text"],
"additionalProperties": False,
},
}
# ---------------------------------------------------------------------------
# Handlers
# ---------------------------------------------------------------------------
def _json(obj: Any) -> str:
return json.dumps(obj, ensure_ascii=False)
def _err(msg: str, **extra) -> str:
return _json({"success": False, "error": msg, **extra})
def handle_meet_join(args: Dict[str, Any], **_kw) -> str:
url = (args.get("url") or "").strip()
if not url:
return _err("url is required")
mode = (args.get("mode") or "transcribe").strip().lower()
if mode not in ("transcribe", "realtime"):
return _err(f"mode must be 'transcribe' or 'realtime' (got {mode!r})")
node = args.get("node")
try:
client, node_name = _resolve_node_client(node)
except RuntimeError as e:
return _err(str(e))
if client is not None:
# Remote path — delegate to the node host.
try:
res = client.start_bot(
url=url,
guest_name=str(args.get("guest_name") or "Hermes Agent"),
duration=str(args.get("duration")) if args.get("duration") else None,
headed=bool(args.get("headed", False)),
mode=mode,
)
return _json({"success": bool(res.get("ok")), "node": node_name, **res})
except Exception as e:
return _err(f"remote node start_bot failed: {e}", node=node_name)
# Local path — same as v1, with v2 params.
if not check_meet_requirements():
return _err(
"google_meet plugin prerequisites missing — install with "
"`pip install playwright && python -m playwright install "
"chromium`. Plugin is supported on Linux and macOS only."
)
res = pm.start(
url=url,
headed=bool(args.get("headed", False)),
guest_name=str(args.get("guest_name") or "Hermes Agent"),
duration=str(args.get("duration")) if args.get("duration") else None,
mode=mode,
)
return _json({"success": bool(res.get("ok")), **res})
def handle_meet_status(args: Dict[str, Any], **_kw) -> str:
try:
client, node_name = _resolve_node_client(args.get("node"))
except RuntimeError as e:
return _err(str(e))
if client is not None:
try:
res = client.status()
return _json({"success": bool(res.get("ok")), "node": node_name, **res})
except Exception as e:
return _err(f"remote node status failed: {e}", node=node_name)
res = pm.status()
return _json({"success": bool(res.get("ok")), **res})
def handle_meet_transcript(args: Dict[str, Any], **_kw) -> str:
last = args.get("last")
try:
last_i = int(last) if last is not None else None
if last_i is not None and last_i < 1:
last_i = None
except (TypeError, ValueError):
last_i = None
try:
client, node_name = _resolve_node_client(args.get("node"))
except RuntimeError as e:
return _err(str(e))
if client is not None:
try:
res = client.transcript(last=last_i)
return _json({"success": bool(res.get("ok")), "node": node_name, **res})
except Exception as e:
return _err(f"remote node transcript failed: {e}", node=node_name)
res = pm.transcript(last=last_i)
return _json({"success": bool(res.get("ok")), **res})
def handle_meet_leave(args: Dict[str, Any], **_kw) -> str:
try:
client, node_name = _resolve_node_client(args.get("node"))
except RuntimeError as e:
return _err(str(e))
if client is not None:
try:
res = client.stop()
return _json({"success": bool(res.get("ok")), "node": node_name, **res})
except Exception as e:
return _err(f"remote node stop failed: {e}", node=node_name)
res = pm.stop(reason="agent called meet_leave")
return _json({"success": bool(res.get("ok")), **res})
def handle_meet_say(args: Dict[str, Any], **_kw) -> str:
text = (args.get("text") or "").strip()
if not text:
return _err("text is required")
try:
client, node_name = _resolve_node_client(args.get("node"))
except RuntimeError as e:
return _err(str(e))
if client is not None:
try:
res = client.say(text)
return _json({"success": bool(res.get("ok")), "node": node_name, **res})
except Exception as e:
return _err(f"remote node say failed: {e}", node=node_name)
res = pm.enqueue_say(text)
return _json({"success": bool(res.get("ok")), **res})
+15 -31
View File
@@ -526,24 +526,16 @@ class HindsightMemoryProvider(MemoryProvider):
print("\n Configuring Hindsight memory:\n")
existing_config = self._config if isinstance(self._config, dict) else _load_config()
if not isinstance(existing_config, dict):
existing_config = {}
# Step 1: Mode selection
mode_values = ["cloud", "local_embedded", "local_external"]
mode_items = [
("Cloud", "Hindsight Cloud API (lightweight, just needs an API key)"),
("Local Embedded", "Run Hindsight locally (downloads ~200MB, needs LLM key)"),
("Local External", "Connect to an existing Hindsight instance"),
]
existing_mode = existing_config.get("mode")
mode_default_idx = mode_values.index(existing_mode) if existing_mode in mode_values else 0
mode_idx = _curses_select(" Select mode", mode_items, default=mode_default_idx)
mode = mode_values[mode_idx]
mode_idx = _curses_select(" Select mode", mode_items, default=0)
mode = ["cloud", "local_embedded", "local_external"][mode_idx]
provider_config: dict = dict(existing_config)
provider_config["mode"] = mode
provider_config: dict = {"mode": mode}
env_writes: dict = {}
# Step 2: Install/upgrade deps for selected mode
@@ -609,29 +601,21 @@ class HindsightMemoryProvider(MemoryProvider):
(p, f"default model: {_PROVIDER_DEFAULT_MODELS[p]}")
for p in providers_list
]
existing_llm_provider = provider_config.get("llm_provider")
llm_default_idx = providers_list.index(existing_llm_provider) if existing_llm_provider in providers_list else 0
llm_idx = _curses_select(" Select LLM provider", llm_items, default=llm_default_idx)
llm_idx = _curses_select(" Select LLM provider", llm_items, default=0)
llm_provider = providers_list[llm_idx]
provider_config["llm_provider"] = llm_provider
if llm_provider == "openai_compatible":
existing_base_url = provider_config.get("llm_base_url", "")
prompt = " LLM endpoint URL (e.g. http://192.168.1.10:8080/v1)"
if existing_base_url:
prompt += f" [{existing_base_url}]"
prompt += ": "
val = input(prompt).strip()
val = input(" LLM endpoint URL (e.g. http://192.168.1.10:8080/v1): ").strip()
if val:
provider_config["llm_base_url"] = val
elif llm_provider == "openrouter":
provider_config["llm_base_url"] = "https://openrouter.ai/api/v1"
provider_default_model = _PROVIDER_DEFAULT_MODELS.get(llm_provider, "gpt-4o-mini")
current_model = provider_config.get("llm_model") or provider_default_model
val = input(f" LLM model [{current_model}]: ").strip()
provider_config["llm_model"] = val or current_model
default_model = _PROVIDER_DEFAULT_MODELS.get(llm_provider, "gpt-4o-mini")
val = input(f" LLM model [{default_model}]: ").strip()
provider_config["llm_model"] = val or default_model
sys.stdout.write(" LLM API key: ")
sys.stdout.flush()
@@ -649,16 +633,15 @@ class HindsightMemoryProvider(MemoryProvider):
env_writes["HINDSIGHT_LLM_API_KEY"] = existing_llm_key
# Step 4: Save everything
provider_config.setdefault("bank_id", "hermes")
provider_config.setdefault("recall_budget", "mid")
# Read existing timeout from config if present, otherwise use default.
# Preserve explicit 0 values instead of treating them as blank.
existing_timeout = provider_config.get("timeout")
timeout_val = existing_timeout if existing_timeout is not None else _DEFAULT_TIMEOUT
provider_config["bank_id"] = "hermes"
provider_config["recall_budget"] = "mid"
# Read existing timeout from config if present, otherwise use default
existing_timeout = self._config.get("timeout") if self._config else None
timeout_val = existing_timeout if existing_timeout else _DEFAULT_TIMEOUT
provider_config["timeout"] = timeout_val
env_writes["HINDSIGHT_TIMEOUT"] = str(timeout_val)
if mode == "local_embedded":
existing_idle_timeout = provider_config.get("idle_timeout")
existing_idle_timeout = self._config.get("idle_timeout") if self._config else None
idle_timeout_val = existing_idle_timeout if existing_idle_timeout is not None else _DEFAULT_IDLE_TIMEOUT
provider_config["idle_timeout"] = idle_timeout_val
env_writes["HINDSIGHT_IDLE_TIMEOUT"] = str(idle_timeout_val)
@@ -1221,6 +1204,7 @@ class HindsightMemoryProvider(MemoryProvider):
def _sync():
try:
client = self._get_client()
item = self._build_retain_kwargs(
content,
context=self._retain_context,
+6 -81
View File
@@ -22,7 +22,6 @@ import threading
import time
from typing import Any, Dict, List, Optional
from agent.memory_manager import sanitize_context
from agent.memory_provider import MemoryProvider
from tools.registry import tool_error
@@ -38,10 +37,7 @@ PROFILE_SCHEMA = {
"description": (
"Retrieve or update a peer card from Honcho — a curated list of key facts "
"about that peer (name, role, preferences, communication style, patterns). "
"Pass `card` to update; omit `card` to read. If the card is empty, the "
"result includes a `hint` field explaining why (observation disabled, "
"fresh peer, dialectic layer still warming up, etc.) — this is NOT an "
"error. Peer cards accumulate over time from observed conversation."
"Pass `card` to update; omit `card` to read."
),
"parameters": {
"type": "object",
@@ -1060,63 +1056,6 @@ class HonchoMemoryProvider(MemoryProvider):
return chunks
def _empty_profile_hint(self, peer: str) -> Dict[str, Any]:
"""Build a diagnostic hint when honcho_profile returns an empty card.
A literal "No profile facts available yet." tells the model nothing
about WHY. The model then often surfaces it to the user as a cryptic
error. This hint enumerates the likely causes so the model can
explain the situation (or retry with a different peer).
Ordered by likelihood for a typical deployment:
1. Observation is disabled for this peer
2. Card hasn't accumulated yet (fresh peer, not enough dialectic
cycles dialectic cadence runs every N turns)
3. Self-hosted Honcho backend doesn't support peer cards
(honcho-ai server < 3.x)
"""
cfg = self._config
reasons: List[str] = []
if cfg is not None:
if peer == "user":
observe_me = bool(getattr(cfg, "user_observe_me", True))
observe_others = bool(getattr(cfg, "user_observe_others", True))
else:
observe_me = bool(getattr(cfg, "ai_observe_me", True))
observe_others = bool(getattr(cfg, "ai_observe_others", True))
if not (observe_me or observe_others):
reasons.append(
f"observation is disabled for peer '{peer}' "
f"(user_observe_me/ai_observe_me in config)"
)
cadence = getattr(self, "_dialectic_cadence", 1)
turn = getattr(self, "_turn_count", 0)
if turn < max(2, cadence):
reasons.append(
f"this session has only {turn} turn(s); peer cards accumulate "
f"as the dialectic layer reasons over conversation history "
f"(cadence every {cadence} turn(s))"
)
if not reasons:
reasons.append(
"peer card has no facts yet — Honcho's dialectic layer builds "
"this over time from observed turns; self-hosted Honcho < 3.x "
"does not support peer cards at all"
)
return {
"result": "No profile facts available yet.",
"hint": (
"This is not an error. "
+ "; ".join(reasons)
+ ". Try honcho_reasoning for a synthesized answer, or "
"honcho_search to query raw conversation excerpts."
),
}
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
"""Record the conversation turn in Honcho (non-blocking).
@@ -1129,15 +1068,13 @@ class HonchoMemoryProvider(MemoryProvider):
return
msg_limit = self._config.message_max_chars if self._config else 25000
clean_user_content = sanitize_context(user_content or "").strip()
clean_assistant_content = sanitize_context(assistant_content or "").strip()
def _sync():
try:
session = self._manager.get_or_create(self._session_key)
for chunk in self._chunk_message(clean_user_content, msg_limit):
for chunk in self._chunk_message(user_content, msg_limit):
session.add_message("user", chunk)
for chunk in self._chunk_message(clean_assistant_content, msg_limit):
for chunk in self._chunk_message(assistant_content, msg_limit):
session.add_message("assistant", chunk)
self._manager._flush_session(session)
except Exception as e:
@@ -1150,20 +1087,8 @@ class HonchoMemoryProvider(MemoryProvider):
)
self._sync_thread.start()
def on_memory_write(
self,
action: str,
target: str,
content: str,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
"""Mirror built-in user profile writes as Honcho conclusions.
``metadata`` is accepted for compatibility with the write-origin
work landed in main (commit 6a957a74); it's not yet threaded into
the Honcho conclusion payload. Left as a follow-up so this PR
stays focused on the 7-PR consolidation and its review follow-ups.
"""
def on_memory_write(self, action: str, target: str, content: str) -> None:
"""Mirror built-in user profile writes as Honcho conclusions."""
if action != "add" or target != "user" or not content:
return
if self._cron_skipped:
@@ -1229,7 +1154,7 @@ class HonchoMemoryProvider(MemoryProvider):
return json.dumps({"result": f"Peer card updated ({len(result)} facts).", "card": result})
card = self._manager.get_peer_card(self._session_key, peer=peer)
if not card:
return json.dumps(self._empty_profile_hint(peer))
return json.dumps({"result": "No profile facts available yet."})
return json.dumps({"result": card})
elif tool_name == "honcho_search":
+2 -31
View File
@@ -273,38 +273,9 @@ def _write_config(cfg: dict, path: Path | None = None) -> None:
def _resolve_api_key(cfg: dict) -> str:
"""Resolve API key with host -> root -> env fallback.
For self-hosted instances configured with ``baseUrl`` instead of an API
key, returns ``"local"`` so that credential guards throughout the CLI
don't reject a valid configuration. The ``baseUrl`` is scheme-validated
(http/https only) so that a typo like ``baseUrl: true`` can't silently
pass the guard. Schemeless strings that look like host:port (legacy
config shapes, e.g. ``localhost:8000``) still pass the Honcho SDK
will reject them itself with a clearer error than ours.
"""
"""Resolve API key with host -> root -> env fallback."""
host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
key = host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
if not key:
base_url = cfg.get("baseUrl") or cfg.get("base_url") or os.environ.get("HONCHO_BASE_URL", "")
base_url = (base_url or "").strip()
if base_url:
from urllib.parse import urlparse
try:
parsed = urlparse(base_url)
except (TypeError, ValueError):
parsed = None
if parsed and parsed.scheme in ("http", "https") and parsed.netloc:
return "local"
# Schemeless but looks like a host (contains '.' or ':' and isn't
# a boolean literal): let it through so legacy configs don't
# regress into "no API key configured" when they previously worked.
lowered = base_url.lower()
if lowered not in ("true", "false", "none", "null") and any(
c in base_url for c in ".:"
) and not base_url.isdigit():
return "local"
return key
return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
+3 -67
View File
@@ -16,7 +16,6 @@ from __future__ import annotations
import json
import os
import logging
import hashlib
from dataclasses import dataclass, field
from pathlib import Path
@@ -28,6 +27,7 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
HOST = "hermes"
@@ -53,11 +53,6 @@ def resolve_active_host() -> str:
return HOST
def resolve_global_config_path() -> Path:
"""Return the shared Honcho config path for the current HOME."""
return Path.home() / ".honcho" / "config.json"
def resolve_config_path() -> Path:
"""Return the active Honcho config path.
@@ -77,7 +72,7 @@ def resolve_config_path() -> Path:
if default_path != local_path and default_path.exists():
return default_path
return resolve_global_config_path()
return GLOBAL_CONFIG_PATH
_RECALL_MODE_ALIASES = {"auto": "hybrid"}
@@ -143,15 +138,6 @@ def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] |
return None
# Default HTTP timeout (seconds) applied when no explicit timeout is
# configured via HonchoClientConfig.timeout, honcho.timeout / requestTimeout,
# or HONCHO_TIMEOUT. Honcho calls happen on the post-response path of
# run_conversation; without a cap the agent can block indefinitely when
# the Honcho backend is unreachable, preventing the gateway from
# delivering the already-generated response.
_DEFAULT_HTTP_TIMEOUT = 30.0
def _resolve_optional_float(*values: Any) -> float | None:
"""Return the first non-empty value coerced to a positive float."""
for value in values:
@@ -240,13 +226,6 @@ class HonchoClientConfig:
# Identity
peer_name: str | None = None
ai_peer: str = "hermes"
# When True, ``peer_name`` wins over any gateway-supplied runtime
# identity (Telegram UID, Discord ID, …) when resolving the user peer.
# This keeps memory unified across platforms for single-user deployments
# where Honcho's one peer-name is an unambiguous identity — otherwise
# each platform would fork memory into its own peer (#14984). Default
# ``False`` preserves existing multi-user behaviour.
pin_peer_name: bool = False
# Toggles
enabled: bool = False
save_messages: bool = True
@@ -441,11 +420,6 @@ class HonchoClientConfig:
timeout=timeout,
peer_name=host_block.get("peerName") or raw.get("peerName"),
ai_peer=ai_peer,
pin_peer_name=_resolve_bool(
host_block.get("pinPeerName"),
raw.get("pinPeerName"),
default=False,
),
enabled=enabled,
save_messages=save_messages,
write_frequency=write_frequency,
@@ -548,39 +522,6 @@ class HonchoClientConfig:
pass
return None
# Honcho enforces a 100-char limit on session IDs. Long gateway session keys
# (Matrix "!room:server" + thread event IDs, Telegram supergroup reply
# chains, Slack thread IDs with long workspace prefixes) can overflow this
# limit after sanitization; the Honcho API then rejects every call for that
# session with "session_id too long". See issue #13868.
_HONCHO_SESSION_ID_MAX_LEN = 100
_HONCHO_SESSION_ID_HASH_LEN = 8
@classmethod
def _enforce_session_id_limit(cls, sanitized: str, original: str) -> str:
"""Truncate a sanitized session ID to Honcho's 100-char limit.
The common case (short keys) short-circuits with no modification.
For over-limit keys, keep a prefix of the sanitized ID and append a
deterministic ``-<sha256 prefix>`` suffix so two distinct long keys
that share a leading segment don't collide onto the same truncated ID.
The hash is taken over the *original* pre-sanitization key, so two
inputs that sanitize to the same string still collide intentionally
(same logical session), but two inputs that only share a prefix do not.
"""
max_len = cls._HONCHO_SESSION_ID_MAX_LEN
if len(sanitized) <= max_len:
return sanitized
hash_len = cls._HONCHO_SESSION_ID_HASH_LEN
digest = hashlib.sha256(original.encode("utf-8")).hexdigest()[:hash_len]
# max_len - hash_len - 1 (for the '-' separator) chars of the sanitized
# prefix, then '-<hash>'. Strip any trailing hyphen from the prefix so
# the result doesn't double up on separators.
prefix_len = max_len - hash_len - 1
prefix = sanitized[:prefix_len].rstrip("-")
return f"{prefix}-{digest}"
def resolve_session_name(
self,
cwd: str | None = None,
@@ -625,7 +566,7 @@ class HonchoClientConfig:
if gateway_session_key:
sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
if sanitized:
return self._enforce_session_id_limit(sanitized, gateway_session_key)
return sanitized
# per-session: inherit Hermes session_id (new Honcho session each run)
if self.session_strategy == "per-session" and session_id:
@@ -705,11 +646,6 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
except Exception:
pass
# Fall back to the default so an unconfigured install cannot hang
# indefinitely on a stalled Honcho request.
if resolved_timeout is None:
resolved_timeout = _DEFAULT_HTTP_TIMEOUT
if resolved_base_url:
logger.info("Initializing Honcho client (base_url: %s, workspace: %s)", resolved_base_url, config.workspace_id)
else:
+32 -57
View File
@@ -95,7 +95,6 @@ class HonchoSessionManager:
self._config = config
self._runtime_user_peer_name = runtime_user_peer_name
self._cache: dict[str, HonchoSession] = {}
self._cache_lock = threading.RLock()
self._peers_cache: dict[str, Any] = {}
self._sessions_cache: dict[str, Any] = {}
@@ -274,35 +273,17 @@ class HonchoSessionManager:
Returns:
The session.
"""
with self._cache_lock:
if key in self._cache:
logger.debug("Local session cache hit: %s", key)
return self._cache[key]
if key in self._cache:
logger.debug("Local session cache hit: %s", key)
return self._cache[key]
# Determine peer IDs — no lock needed (read-only, no shared state mutation).
# Gateway sessions normally use the runtime user identity (the
# platform-native ID: Telegram UID, Discord snowflake, Slack user,
# etc.) so multi-user bots scope memory per user. For a single-user
# deployment the config-supplied ``peer_name`` is an unambiguous
# identity and we should keep it unified across platforms — see
# #14984. Opt into that with ``hosts.<host>.pinPeerName: true`` in
# ``honcho.json`` (or root-level ``pinPeerName: true``).
# `is True` (not `bool(...)`) is deliberate: several multi-user tests
# pass a ``MagicMock`` for ``config`` where ``mock.pin_peer_name``
# silently returns another MagicMock — truthy by default. Requiring
# strict ``True`` keeps pinning as opt-in even for callers that
# haven't updated their mocks yet; real configs built via
# ``from_global_config`` always produce a proper boolean.
pin_peer_name = (
self._config is not None
and bool(getattr(self._config, "peer_name", None))
and getattr(self._config, "pin_peer_name", False) is True
)
if self._runtime_user_peer_name and not pin_peer_name:
# Gateway sessions should use the runtime user identity when available.
if self._runtime_user_peer_name:
user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
elif self._config and self._config.peer_name:
user_peer_id = self._sanitize_id(self._config.peer_name)
else:
# Fallback: derive from session key
parts = key.split(":", 1)
channel = parts[0] if len(parts) > 1 else "default"
chat_id = parts[1] if len(parts) > 1 else key
@@ -312,14 +293,19 @@ class HonchoSessionManager:
self._config.ai_peer if self._config else "hermes-assistant"
)
# All expensive I/O outside the lock — Honcho's persistence is source of truth
# Sanitize session ID for Honcho
honcho_session_id = self._sanitize_id(key)
# Get or create peers
user_peer = self._get_or_create_peer(user_peer_id)
assistant_peer = self._get_or_create_peer(assistant_peer_id)
# Get or create Honcho session
honcho_session, existing_messages = self._get_or_create_honcho_session(
honcho_session_id, user_peer, assistant_peer
)
# Convert Honcho messages to local format
local_messages = []
for msg in existing_messages:
role = "assistant" if msg.peer_id == assistant_peer_id else "user"
@@ -327,9 +313,10 @@ class HonchoSessionManager:
"role": role,
"content": msg.content,
"timestamp": msg.created_at.isoformat() if msg.created_at else "",
"_synced": True,
"_synced": True, # Already in Honcho
})
# Create local session wrapper with existing messages
session = HonchoSession(
key=key,
user_peer_id=user_peer_id,
@@ -338,9 +325,7 @@ class HonchoSessionManager:
messages=local_messages,
)
# Write to cache under lock — only one writer wins
with self._cache_lock:
self._cache[key] = session
self._cache[key] = session
return session
def _flush_session(self, session: HonchoSession) -> bool:
@@ -371,15 +356,13 @@ class HonchoSessionManager:
for msg in new_messages:
msg["_synced"] = True
logger.debug("Synced %d messages to Honcho for %s", len(honcho_messages), session.key)
with self._cache_lock:
self._cache[session.key] = session
self._cache[session.key] = session
return True
except Exception as e:
for msg in new_messages:
msg["_synced"] = False
logger.error("Failed to sync messages to Honcho: %s", e)
with self._cache_lock:
self._cache[session.key] = session
self._cache[session.key] = session
return False
def _async_writer_loop(self) -> None:
@@ -451,9 +434,7 @@ class HonchoSessionManager:
Called at session end for "session" write_frequency, or to force
a sync before process exit regardless of mode.
"""
with self._cache_lock:
sessions = list(self._cache.values())
for session in sessions:
for session in list(self._cache.values()):
try:
self._flush_session(session)
except Exception as e:
@@ -478,10 +459,9 @@ class HonchoSessionManager:
def delete(self, key: str) -> bool:
"""Delete a session from local cache."""
with self._cache_lock:
if key in self._cache:
del self._cache[key]
return True
if key in self._cache:
del self._cache[key]
return True
return False
def new_session(self, key: str) -> HonchoSession:
@@ -493,25 +473,20 @@ class HonchoSessionManager:
"""
import time
# Hold the reentrant lock across get_or_create so a concurrent caller
# can't observe the (old-popped, new-not-yet-inserted) gap and create
# its own session under the raw key. `_cache_lock` is an RLock so
# nested reacquisition inside get_or_create is safe.
with self._cache_lock:
# Remove old session from caches (but don't delete from Honcho)
old_session = self._cache.pop(key, None)
if old_session:
self._sessions_cache.pop(old_session.honcho_session_id, None)
# Remove old session from caches (but don't delete from Honcho)
old_session = self._cache.pop(key, None)
if old_session:
self._sessions_cache.pop(old_session.honcho_session_id, None)
# Create new session with timestamp suffix
timestamp = int(time.time())
new_key = f"{key}:{timestamp}"
# Create new session with timestamp suffix
timestamp = int(time.time())
new_key = f"{key}:{timestamp}"
# get_or_create will create a fresh session
session = self.get_or_create(new_key)
# get_or_create will create a fresh session
session = self.get_or_create(new_key)
# Cache under the original key so callers find it by the expected name
self._cache[key] = session
# Cache under the original key so callers find it by the expected name
self._cache[key] = session
logger.info("Created new session for %s (honcho: %s)", key, session.honcho_session_id)
return session
-307
View File
@@ -1,307 +0,0 @@
# providers/
Single source of truth for every inference provider Hermes knows about.
Each provider is declared once here as a `ProviderProfile`. Every other layer —
auth resolution, transport kwargs, model listing, runtime routing — reads from
these profiles instead of maintaining its own parallel data.
---
## Directory layout
```
providers/
├── base.py ProviderProfile dataclass + OMIT_TEMPERATURE sentinel
├── __init__.py Registry: register_provider(), get_provider_profile()
├── README.md This file
├── # Simple providers — just identity + auth + endpoint
├── alibaba.py Alibaba Cloud DashScope
├── arcee.py Arcee AI
├── bedrock.py AWS Bedrock (api_mode=bedrock_converse)
├── deepseek.py DeepSeek
├── huggingface.py Hugging Face Inference API
├── kilocode.py Kilo Code
├── minimax.py MiniMax (international + CN)
├── nvidia.py NVIDIA NIM (default_max_tokens=16384)
├── ollama_cloud.py Ollama Cloud
├── stepfun.py StepFun
├── xiaomi.py Xiaomi MiMo
├── xai.py xAI Grok (api_mode=codex_responses)
├── zai.py Z.AI / GLM
├── # Medium — one or two quirks
├── anthropic.py Native Anthropic (x-api-key header, api_mode=anthropic_messages)
├── copilot.py GitHub Copilot (auth_type=copilot, reasoning per model)
├── copilot_acp.py Copilot ACP subprocess (api_mode=copilot_acp)
├── custom.py Custom/Ollama local (think=false, num_ctx)
├── gemini.py Google Gemini AI Studio + Cloud Code OAuth
├── kimi.py Kimi Coding (OMIT_TEMPERATURE, thinking, dual endpoint)
├── openai_codex.py OpenAI Codex OAuth (api_mode=codex_responses)
├── opencode.py OpenCode Zen + Go (per-model api_mode routing)
├── # Complex — subclasses with multiple overrides
├── nous.py Nous Portal (tags, attribution, reasoning omit-when-disabled)
├── openrouter.py OpenRouter (provider preferences, public model fetch)
├── qwen.py Qwen OAuth (message normalization, cache_control, vl_hires)
└── vercel.py Vercel AI Gateway (attribution headers, reasoning passthrough)
```
---
## ProviderProfile fields
```python
@dataclass
class ProviderProfile:
# Identity
name: str # canonical ID — auto-registered as PROVIDER_REGISTRY key for new api-key providers
api_mode: str # "chat_completions" | "anthropic_messages" |
# "codex_responses" | "bedrock_converse" | "copilot_acp"
aliases: tuple # alternate names resolved by get_provider_profile()
# Auth & endpoints
env_vars: tuple # env var names holding the API key, in priority order
base_url: str # default inference endpoint
models_url: str # explicit models endpoint; falls back to {base_url}/models
# set when the models catalog lives at a different URL
# (e.g. OpenRouter: public /api/v1/models vs /api/v1 inference)
auth_type: str # "api_key" | "oauth_device_code" | "oauth_external" |
# "copilot" | "aws" | "external_process"
# Client-level quirks
default_headers: dict # extra HTTP headers sent on every request
# Request-level quirks
fixed_temperature: Any # None = use caller's default; OMIT_TEMPERATURE = don't send
default_max_tokens: int|None # inject max_tokens when caller omits it
default_aux_model: str # cheap model for auxiliary tasks (compression, vision, etc.)
# empty string = use main model (default)
```
---
## Hooks (override in a subclass)
| Method | When to override |
|--------|-----------------|
| `prepare_messages(messages)` | Provider needs message pre-processing (Qwen: string → list-of-parts, cache_control) |
| `build_extra_body(*, session_id, **ctx)` | Provider-specific `extra_body` fields (Nous: tags, OpenRouter: provider preferences) |
| `build_api_kwargs_extras(*, reasoning_config, **ctx)` | Returns `(extra_body_additions, top_level_kwargs)` — use when some fields go to `extra_body` and some go top-level (Kimi: `reasoning_effort` top-level; OpenRouter: `reasoning` in extra_body) |
| `fetch_models(*, api_key, timeout)` | Custom model listing (Anthropic: x-api-key header; OpenRouter: public endpoint, no auth; Bedrock/copilot-acp: return None) |
All hooks have safe defaults — only override what differs from the base.
---
## How to add a new provider
### 1. Simple (standard OpenAI-compatible endpoint)
```python
# providers/myprovider.py
from providers import register_provider
from providers.base import ProviderProfile
myprovider = ProviderProfile(
name="myprovider", # must match id in hermes_cli/auth.py PROVIDER_REGISTRY
aliases=("my-provider", "myp"),
api_mode="chat_completions",
env_vars=("MYPROVIDER_API_KEY",),
base_url="https://api.myprovider.com/v1",
auth_type="api_key",
)
register_provider(myprovider)
```
The default `fetch_models()` will call `GET https://api.myprovider.com/v1/models`
with Bearer auth automatically. No override needed for standard `/v1/models`.
### 2. With quirks (subclass)
```python
# providers/myprovider.py
from typing import Any
from providers import register_provider
from providers.base import ProviderProfile
class MyProviderProfile(ProviderProfile):
"""My provider — custom reasoning header."""
def build_api_kwargs_extras(
self,
*,
reasoning_config: dict | None = None,
**ctx: Any,
) -> tuple[dict[str, Any], dict[str, Any]]:
extra_body: dict[str, Any] = {}
if reasoning_config:
extra_body["my_reasoning"] = reasoning_config.get("effort", "medium")
return extra_body, {}
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
# Override only if your endpoint differs from standard /v1/models
return super().fetch_models(api_key=api_key, timeout=timeout)
myprovider = MyProviderProfile(
name="myprovider",
aliases=("myp",),
env_vars=("MYPROVIDER_API_KEY",),
base_url="https://api.myprovider.com/v1",
)
register_provider(myprovider)
```
### 3. Wire it up
After creating the file, add `name` to the `_PROFILE_ACTIVE_PROVIDERS` set in
`run_agent.py` once you've verified parity against the legacy flag path. Start
with a simple provider (no message prep, no reasoning quirks) and work up.
---
## fetch_models contract
```python
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
...
```
- Returns `list[str]`: model IDs from the provider's live endpoint.
- Returns `None`: provider doesn't support REST model listing (Bedrock, copilot-acp),
or the request failed. Callers **must** fall back to `_PROVIDER_MODELS` on `None`.
- Never raises — swallow exceptions and return `None`.
- Default implementation: `GET {base_url}/models` with Bearer auth. Works for any
standard OpenAI-compatible provider.
**Override when:**
- Auth header is not `Bearer` (Anthropic: `x-api-key`)
- Endpoint path differs from `/models` AND you can't just set `models_url` (OpenRouter: public endpoint, pass `api_key=None` explicitly)
- Response format differs (extra wrapping, non-standard `id` field)
- Provider has no REST endpoint (Bedrock, copilot-acp → return `None`)
- Filtering needed post-fetch (only tool-capable models, etc.)
Use `models_url` instead of overriding when the only difference is the URL:
```python
# No subclass needed — just set models_url
myprovider = ProviderProfile(
name="myprovider",
base_url="https://api.myprovider.com/v1",
models_url="https://catalog.myprovider.com/models", # different host
)
```
---
## Debugging
### Check if a provider resolves
```python
from providers import get_provider_profile
p = get_provider_profile("myprovider")
print(p) # ProviderProfile(name='myprovider', ...)
print(p.base_url)
print(p.api_mode)
```
### Check all registered providers
```python
from providers import _REGISTRY
print(list(_REGISTRY.keys()))
```
### Test live model fetch
```python
import os
from providers import get_provider_profile
p = get_provider_profile("myprovider")
key = os.getenv("MYPROVIDER_API_KEY")
models = p.fetch_models(api_key=key, timeout=5.0)
print(models) # list of model IDs, or None on failure
```
### Test alias resolution
```python
from providers import get_provider_profile
# All of these should return the same profile
assert get_provider_profile("openrouter").name == "openrouter"
assert get_provider_profile("or").name == "openrouter"
```
### Run the provider test suite
```bash
# From the repo root
source venv/bin/activate
python -m pytest tests/providers/ -v
```
### Check ruff + ty compliance
```bash
source venv/bin/activate
ruff format providers/*.py
ruff check providers/*.py --select UP,E,F,I,W
ty check providers/*.py
```
---
## Common mistakes
**Wrong `name`** — must be the same string that appears as the key in
`hermes_cli/auth.py` `PROVIDER_REGISTRY`. New api-key providers auto-register
into `PROVIDER_REGISTRY` from the profile, so the name IS the key. For providers
with a pre-existing `PROVIDER_REGISTRY` entry, use the exact `id` field value.
**Wrong `env_vars`** — separate API-key vars from base-URL override vars in the
tuple. Env vars that end with `_BASE_URL` or `_URL` are treated as URL overrides;
everything else is treated as an API key. Getting this wrong causes the doctor
health check to send a URL string as a Bearer token.
**Wrong `base_url`** — several providers have non-obvious paths:
`stepfun: /step_plan/v1`, `opencode-go: /zen/go/v1`. The profile's `base_url`
is also used as the `inference_base_url` when auto-registering into `PROVIDER_REGISTRY`
for new providers, so it must be correct for auth resolution to work.
**Skipping `api_mode`** — defaults to `chat_completions`. Providers that use
`anthropic_messages`, `codex_responses`, `bedrock_converse`, or `copilot_acp`
must set it explicitly.
**Forgetting `register_provider()`** — auto-discovery runs `pkgutil.iter_modules`
over the package and imports each module, but only if `register_provider()` is
called at module level. Without it the profile is never in `_REGISTRY`.
**`fetch_models` returning the wrong shape** — must return `list[str]` (plain
model IDs), not `list[tuple]` or `list[dict]`. Callers expect plain strings.
**Wrong `build_api_kwargs_extras` return shape** — must return a 2-tuple
`(extra_body_dict, top_level_dict)`. Returning a single dict causes a
`ValueError: not enough values to unpack` in the transport.
**`build_api_kwargs_extras` wrong tuple** — must return `(extra_body_dict,
top_level_dict)`. Returning a flat dict or swapping the order silently sends
fields to the wrong place.
-76
View File
@@ -1,76 +0,0 @@
"""Provider module registry.
Auto-discovers ProviderProfile instances from providers/*.py modules.
Each module should define a module-level PROVIDER or PROVIDERS list.
Usage:
from providers import get_provider_profile
profile = get_provider_profile("nvidia") # returns ProviderProfile or None
profile = get_provider_profile("kimi") # checks name + aliases
"""
from __future__ import annotations
from providers.base import OMIT_TEMPERATURE, ProviderProfile # noqa: F401
_REGISTRY: dict[str, ProviderProfile] = {}
_ALIASES: dict[str, str] = {}
_discovered = False
def register_provider(profile: ProviderProfile) -> None:
"""Register a provider profile by name and aliases."""
_REGISTRY[profile.name] = profile
for alias in profile.aliases:
_ALIASES[alias] = profile.name
def get_provider_profile(name: str) -> ProviderProfile | None:
"""Look up a provider profile by name or alias.
Returns None if the provider has no profile (falls back to generic).
"""
if not _discovered:
_discover_providers()
canonical = _ALIASES.get(name, name)
return _REGISTRY.get(canonical)
def list_providers() -> list[ProviderProfile]:
"""Return all registered provider profiles (one per canonical name)."""
if not _discovered:
_discover_providers()
# Deduplicate: _REGISTRY has canonical names; _ALIASES points to same objects
seen: set[int] = set()
result: list[ProviderProfile] = []
for profile in _REGISTRY.values():
pid = id(profile)
if pid not in seen:
seen.add(pid)
result.append(profile)
return result
def _discover_providers() -> None:
"""Import all provider modules to trigger registration."""
global _discovered
if _discovered:
return
_discovered = True
import importlib
import pkgutil
import providers as _pkg
for _importer, modname, _ispkg in pkgutil.iter_modules(_pkg.__path__):
if modname.startswith("_") or modname == "base":
continue
try:
importlib.import_module(f"providers.{modname}")
except ImportError as e:
import logging
logging.getLogger(__name__).warning(
"Failed to import provider module %s: %s", modname, e
)
-13
View File
@@ -1,13 +0,0 @@
"""Alibaba Cloud DashScope provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
alibaba = ProviderProfile(
name="alibaba",
aliases=("dashscope", "alibaba-cloud", "qwen-dashscope"),
env_vars=("DASHSCOPE_API_KEY",),
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
register_provider(alibaba)
-52
View File
@@ -1,52 +0,0 @@
"""Native Anthropic provider profile."""
import json
import logging
import urllib.request
from providers import register_provider
from providers.base import ProviderProfile
logger = logging.getLogger(__name__)
class AnthropicProfile(ProviderProfile):
"""Native Anthropic — uses x-api-key header, not Bearer."""
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
"""Anthropic uses x-api-key header and anthropic-version."""
if not api_key:
return None
try:
req = urllib.request.Request("https://api.anthropic.com/v1/models")
req.add_header("x-api-key", api_key)
req.add_header("anthropic-version", "2023-06-01")
req.add_header("Accept", "application/json")
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
return [
m["id"]
for m in data.get("data", [])
if isinstance(m, dict) and "id" in m
]
except Exception as exc:
logger.debug("fetch_models(anthropic): %s", exc)
return None
anthropic = AnthropicProfile(
name="anthropic",
aliases=("claude", "claude-oauth", "claude-code"),
api_mode="anthropic_messages",
env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
base_url="https://api.anthropic.com",
auth_type="api_key",
default_aux_model="claude-haiku-4-5-20251001",
)
register_provider(anthropic)
-13
View File
@@ -1,13 +0,0 @@
"""Arcee AI provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
arcee = ProviderProfile(
name="arcee",
aliases=("arcee-ai", "arceeai"),
env_vars=("ARCEEAI_API_KEY",),
base_url="https://api.arcee.ai/api/v1",
)
register_provider(arcee)
-165
View File
@@ -1,165 +0,0 @@
"""Provider profile base class.
A ProviderProfile declares everything about an inference provider in one place:
auth, endpoints, client quirks, request-time quirks. The transport reads this
instead of receiving 20+ boolean flags.
Provider profiles are DECLARATIVE they describe the provider's behavior.
They do NOT own client construction, credential rotation, or streaming.
Those stay on AIAgent.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Any
logger = logging.getLogger(__name__)
# Sentinel for "omit temperature entirely" (Kimi: server manages it)
OMIT_TEMPERATURE = object()
@dataclass
class ProviderProfile:
"""Base provider profile — subclass or instantiate with overrides."""
# ── Identity ─────────────────────────────────────────────
name: str
api_mode: str = "chat_completions"
aliases: tuple = ()
# ── Human-readable metadata ───────────────────────────────
display_name: str = "" # e.g. "GMI Cloud" — shown in picker/labels
description: str = "" # e.g. "GMI Cloud (multi-model direct API)" — picker subtitle
signup_url: str = "" # e.g. "https://www.gmicloud.ai/" — shown during setup
# ── Auth & endpoints ─────────────────────────────────────
env_vars: tuple = ()
base_url: str = ""
models_url: str = "" # explicit models endpoint; falls back to {base_url}/models
auth_type: str = "api_key" # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
# ── Model catalog ─────────────────────────────────────────
# fallback_models: curated list shown in /model picker when live fetch fails.
# Only agentic models that support tool calling should appear here.
fallback_models: tuple = ()
# hostname: base hostname for URL→provider reverse-mapping in model_metadata.py
# e.g. "api.gmi-serving.com". Derived from base_url when empty.
hostname: str = ""
# ── Client-level quirks (set once at client construction) ─
default_headers: dict[str, str] = field(default_factory=dict)
# ── Request-level quirks ─────────────────────────────────
# Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send
fixed_temperature: Any = None
default_max_tokens: int | None = None
default_aux_model: str = (
"" # cheap model for auxiliary tasks (compression, vision, etc.)
)
# empty = use main model
# ── Hooks (override in subclass for complex providers) ───
def get_hostname(self) -> str:
"""Return the provider's base hostname for URL-based detection.
Uses self.hostname if set explicitly, otherwise derives it from base_url.
e.g. 'https://api.gmi-serving.com/v1' 'api.gmi-serving.com'
"""
if self.hostname:
return self.hostname
if self.base_url:
from urllib.parse import urlparse
return urlparse(self.base_url).hostname or ""
return ""
def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Provider-specific message preprocessing.
Called AFTER codex field sanitization, BEFORE developer role swap.
Default: pass-through.
"""
return messages
def build_extra_body(
self, *, session_id: str | None = None, **context: Any
) -> dict[str, Any]:
"""Provider-specific extra_body fields.
Merged into the API kwargs extra_body. Default: empty dict.
"""
return {}
def build_api_kwargs_extras(
self,
*,
reasoning_config: dict | None = None,
**context: Any,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Provider-specific kwargs split between extra_body and top-level api_kwargs.
Returns (extra_body_additions, top_level_kwargs).
The transport merges extra_body_additions into extra_body, and
top_level_kwargs directly into api_kwargs.
This split exists because some providers put reasoning config in
extra_body (OpenRouter: extra_body.reasoning) while others put it
as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort).
Default: ({}, {}).
"""
return {}, {}
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
"""Fetch the live model list from the provider's models endpoint.
Returns a list of model ID strings, or None if the fetch failed or
the provider does not support live model listing.
Resolution order for the endpoint URL:
1. self.models_url (explicit override use when the models
endpoint differs from the inference base URL, e.g. OpenRouter
exposes a public catalog at /api/v1/models while inference is
at /api/v1)
2. self.base_url + "/models" (standard OpenAI-compat fallback)
The default implementation sends Bearer auth when api_key is given
and forwards self.default_headers. Override to customise auth, path,
response shape, or to return None for providers with no REST catalog.
Callers must always fall back to the static _PROVIDER_MODELS list
when this returns None.
"""
url = (self.models_url or "").strip()
if not url:
if not self.base_url:
return None
url = self.base_url.rstrip("/") + "/models"
import json
import urllib.request
req = urllib.request.Request(url)
if api_key:
req.add_header("Authorization", f"Bearer {api_key}")
req.add_header("Accept", "application/json")
for k, v in self.default_headers.items():
req.add_header(k, v)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
items = data if isinstance(data, list) else data.get("data", [])
return [m["id"] for m in items if isinstance(m, dict) and "id" in m]
except Exception as exc:
logger.debug("fetch_models(%s): %s", self.name, exc)
return None
-29
View File
@@ -1,29 +0,0 @@
"""AWS Bedrock provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
class BedrockProfile(ProviderProfile):
"""AWS Bedrock — no REST /v1/models endpoint; uses AWS SDK."""
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
"""Bedrock model listing requires AWS SDK, not a REST call."""
return None
bedrock = BedrockProfile(
name="bedrock",
aliases=("aws", "aws-bedrock", "amazon-bedrock", "amazon"),
api_mode="bedrock_converse",
env_vars=(), # AWS SDK credentials — not env vars
base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
auth_type="aws_sdk",
)
register_provider(bedrock)
-58
View File
@@ -1,58 +0,0 @@
"""Copilot / GitHub Models provider profile.
Copilot uses per-model api_mode routing:
- GPT-5+ / Codex models codex_responses
- Claude models anthropic_messages
- Everything else chat_completions (this profile covers that subset)
Key quirks for the chat_completions subset:
- Editor attribution headers (via copilot_default_headers())
- GitHub Models reasoning extra_body (model-catalog gated)
"""
from typing import Any
from providers import register_provider
from providers.base import ProviderProfile
class CopilotProfile(ProviderProfile):
"""GitHub Copilot / GitHub Models — editor headers + reasoning."""
def build_api_kwargs_extras(
self,
*,
model: str | None = None,
reasoning_config: dict | None = None,
supports_reasoning: bool = False,
**ctx,
) -> tuple[dict[str, Any], dict[str, Any]]:
extra_body: dict[str, Any] = {}
if supports_reasoning and model:
try:
from hermes_cli.models import github_model_reasoning_efforts
supported_efforts = github_model_reasoning_efforts(model)
if supported_efforts and reasoning_config:
effort = reasoning_config.get("effort", "medium")
# Normalize non-standard effort levels to the nearest supported
if effort == "xhigh":
effort = "high"
if effort in supported_efforts:
extra_body["reasoning"] = {"effort": effort}
elif supported_efforts:
extra_body["reasoning"] = {"effort": "medium"}
except Exception:
pass
return extra_body, {}
copilot = CopilotProfile(
name="copilot",
aliases=("github-copilot", "github-models", "github-model", "github"),
env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
base_url="https://api.githubcopilot.com",
auth_type="copilot",
)
register_provider(copilot)
-34
View File
@@ -1,34 +0,0 @@
"""GitHub Copilot ACP provider profile.
copilot-acp uses an external ACP subprocess NOT the standard
transport. api_mode="copilot_acp" is handled separately in run_agent.py.
The profile captures auth + endpoint metadata for registry migration.
"""
from providers import register_provider
from providers.base import ProviderProfile
class CopilotACPProfile(ProviderProfile):
"""GitHub Copilot ACP — external process, no REST models endpoint."""
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
"""Model listing is handled by the ACP subprocess."""
return None
copilot_acp = CopilotACPProfile(
name="copilot-acp",
aliases=("github-copilot-acp", "copilot-acp-agent"),
api_mode="chat_completions", # ACP subprocess uses chat_completions routing
env_vars=(), # Managed by ACP subprocess
base_url="acp://copilot", # ACP internal scheme
auth_type="external_process",
)
register_provider(copilot_acp)
-71
View File
@@ -1,71 +0,0 @@
"""Custom / Ollama (local) provider profile.
Covers any endpoint registered as provider="custom", including local
Ollama instances. Key quirks:
- ollama_num_ctx extra_body.options.num_ctx (local context window)
- reasoning_config disabled extra_body.think = False
"""
from typing import Any
from providers import register_provider
from providers.base import ProviderProfile
class CustomProfile(ProviderProfile):
"""Custom/Ollama local provider — think=false and num_ctx support."""
def build_api_kwargs_extras(
self,
*,
reasoning_config: dict | None = None,
ollama_num_ctx: int | None = None,
**ctx: Any,
) -> tuple[dict[str, Any], dict[str, Any]]:
extra_body: dict[str, Any] = {}
# Ollama context window
if ollama_num_ctx:
options = extra_body.get("options", {})
options["num_ctx"] = ollama_num_ctx
extra_body["options"] = options
# Disable thinking when reasoning is turned off
if reasoning_config and isinstance(reasoning_config, dict):
_effort = (reasoning_config.get("effort") or "").strip().lower()
_enabled = reasoning_config.get("enabled", True)
if _effort == "none" or _enabled is False:
extra_body["think"] = False
return extra_body, {}
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
"""Custom/Ollama: base_url is user-configured; fetch if set."""
if not self.base_url:
return None
return super().fetch_models(api_key=api_key, timeout=timeout)
custom = CustomProfile(
name="custom",
aliases=(
"ollama",
"local",
"lmstudio",
"lm-studio",
"lm_studio",
"vllm",
"llamacpp",
"llama.cpp",
"llama-cpp",
),
env_vars=(), # No fixed key — custom endpoint
base_url="", # User-configured
)
register_provider(custom)
-20
View File
@@ -1,20 +0,0 @@
"""DeepSeek provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
deepseek = ProviderProfile(
name="deepseek",
aliases=("deepseek-chat",),
env_vars=("DEEPSEEK_API_KEY",),
display_name="DeepSeek",
description="DeepSeek — native DeepSeek API",
signup_url="https://platform.deepseek.com/",
fallback_models=(
"deepseek-chat",
"deepseek-reasoner",
),
base_url="https://api.deepseek.com/v1",
)
register_provider(deepseek)
-34
View File
@@ -1,34 +0,0 @@
"""Google Gemini provider profiles.
gemini: Google AI Studio (API key) uses GeminiNativeClient
google-gemini-cli: Google Cloud Code Assist (OAuth) uses GeminiCloudCodeClient
Both report api_mode="chat_completions" but use custom native clients
that bypass the standard OpenAI transport. The profile captures auth
and endpoint metadata for auth.py / runtime_provider.py migration.
"""
from providers import register_provider
from providers.base import ProviderProfile
gemini = ProviderProfile(
name="gemini",
aliases=("google", "google-gemini", "google-ai-studio"),
api_mode="chat_completions",
env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
base_url="https://generativelanguage.googleapis.com/v1beta",
auth_type="api_key",
default_aux_model="gemini-3-flash-preview",
)
google_gemini_cli = ProviderProfile(
name="google-gemini-cli",
aliases=("gemini-cli", "gemini-oauth"),
api_mode="chat_completions",
env_vars=(), # OAuth — no API key
base_url="cloudcode-pa://google", # Cloud Code Assist internal scheme
auth_type="oauth_external",
)
register_provider(gemini)
register_provider(google_gemini_cli)
-26
View File
@@ -1,26 +0,0 @@
"""GMI Cloud provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
gmi = ProviderProfile(
name="gmi",
aliases=("gmi-cloud", "gmicloud"),
display_name="GMI Cloud",
description="GMI Cloud — multi-model direct API (slash-form model IDs)",
signup_url="https://www.gmicloud.ai/",
env_vars=("GMI_API_KEY", "GMI_BASE_URL"),
base_url="https://api.gmi-serving.com/v1",
auth_type="api_key",
default_aux_model="google/gemini-3.1-flash-lite-preview",
fallback_models=(
"zai-org/GLM-5.1-FP8",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2.5",
"google/gemini-3.1-flash-lite-preview",
"anthropic/claude-sonnet-4.6",
"openai/gpt-5.4",
),
)
register_provider(gmi)
-20
View File
@@ -1,20 +0,0 @@
"""Hugging Face provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
huggingface = ProviderProfile(
name="huggingface",
aliases=("hf", "hugging-face", "huggingface-hub"),
env_vars=("HF_TOKEN",),
display_name="HuggingFace",
description="HuggingFace Inference API",
signup_url="https://huggingface.co/settings/tokens",
fallback_models=(
"Qwen/Qwen3.5-72B-Instruct",
"deepseek-ai/DeepSeek-V3.2",
),
base_url="https://router.huggingface.co/v1",
)
register_provider(huggingface)
-14
View File
@@ -1,14 +0,0 @@
"""Kilo Code provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
kilocode = ProviderProfile(
name="kilocode",
aliases=("kilo-code", "kilo", "kilo-gateway"),
env_vars=("KILOCODE_API_KEY",),
base_url="https://api.kilo.ai/api/gateway",
default_aux_model="google/gemini-3-flash-preview",
)
register_provider(kilocode)
-71
View File
@@ -1,71 +0,0 @@
"""Kimi / Moonshot provider profiles.
Kimi has dual endpoints:
- sk-kimi-* keys api.kimi.com/coding (Anthropic Messages API)
- legacy keys api.moonshot.ai/v1 (OpenAI chat completions)
This module covers the chat_completions path (/v1 endpoint).
"""
from typing import Any
from providers import register_provider
from providers.base import OMIT_TEMPERATURE, ProviderProfile
class KimiProfile(ProviderProfile):
"""Kimi/Moonshot — temperature omitted, thinking + reasoning_effort."""
def build_api_kwargs_extras(
self, *, reasoning_config: dict | None = None, **context
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Kimi uses extra_body.thinking + top-level reasoning_effort."""
extra_body = {}
top_level = {}
if not reasoning_config or not isinstance(reasoning_config, dict):
# No config → thinking enabled, default effort
extra_body["thinking"] = {"type": "enabled"}
top_level["reasoning_effort"] = "medium"
return extra_body, top_level
enabled = reasoning_config.get("enabled", True)
if enabled is False:
extra_body["thinking"] = {"type": "disabled"}
return extra_body, top_level
# Enabled
extra_body["thinking"] = {"type": "enabled"}
effort = (reasoning_config.get("effort") or "").strip().lower()
if effort in ("low", "medium", "high"):
top_level["reasoning_effort"] = effort
else:
top_level["reasoning_effort"] = "medium"
return extra_body, top_level
kimi = KimiProfile(
name="kimi-coding",
aliases=("kimi", "moonshot", "kimi-for-coding"),
env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
base_url="https://api.moonshot.ai/v1",
fixed_temperature=OMIT_TEMPERATURE,
default_max_tokens=32000,
default_headers={"User-Agent": "hermes-agent/1.0"},
default_aux_model="kimi-k2-turbo-preview",
)
kimi_cn = KimiProfile(
name="kimi-coding-cn",
aliases=("kimi-cn", "moonshot-cn"),
env_vars=("KIMI_CN_API_KEY",),
base_url="https://api.moonshot.cn/v1",
fixed_temperature=OMIT_TEMPERATURE,
default_max_tokens=32000,
default_headers={"User-Agent": "hermes-agent/1.0"},
default_aux_model="kimi-k2-turbo-preview",
)
register_provider(kimi)
register_provider(kimi_cn)
-31
View File
@@ -1,31 +0,0 @@
"""MiniMax provider profiles (international + China).
Both use anthropic_messages api_mode their inference_base_url
ends with /anthropic which triggers auto-detection to anthropic_messages.
"""
from providers import register_provider
from providers.base import ProviderProfile
minimax = ProviderProfile(
name="minimax",
aliases=("mini-max",),
api_mode="anthropic_messages",
env_vars=("MINIMAX_API_KEY",),
base_url="https://api.minimax.io/anthropic",
auth_type="api_key",
default_aux_model="MiniMax-M2.7",
)
minimax_cn = ProviderProfile(
name="minimax-cn",
aliases=("minimax-china", "minimax_cn"),
api_mode="anthropic_messages",
env_vars=("MINIMAX_CN_API_KEY",),
base_url="https://api.minimaxi.com/anthropic",
auth_type="api_key",
default_aux_model="MiniMax-M2.7",
)
register_provider(minimax)
register_provider(minimax_cn)
-53
View File
@@ -1,53 +0,0 @@
"""Nous Portal provider profile."""
from typing import Any
from providers import register_provider
from providers.base import ProviderProfile
class NousProfile(ProviderProfile):
"""Nous Portal — product tags, reasoning with Nous-specific omission."""
def build_extra_body(
self, *, session_id: str | None = None, **context
) -> dict[str, Any]:
return {"tags": ["product=hermes-agent"]}
def build_api_kwargs_extras(
self,
*,
reasoning_config: dict | None = None,
supports_reasoning: bool = False,
**context,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Nous: passes full reasoning_config, but OMITS when disabled."""
extra_body = {}
if supports_reasoning:
if reasoning_config is not None:
rc = dict(reasoning_config)
if rc.get("enabled") is False:
pass # Nous omits reasoning when disabled
else:
extra_body["reasoning"] = rc
else:
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
return extra_body, {}
nous = NousProfile(
name="nous",
aliases=("nous-portal", "nousresearch"),
env_vars=("NOUS_API_KEY",),
display_name="Nous Research",
description="Nous Research — Hermes model family",
signup_url="https://nousresearch.com/",
fallback_models=(
"hermes-3-405b",
"hermes-3-70b",
),
base_url="https://inference.nousresearch.com/v1",
auth_type="oauth_device_code",
)
register_provider(nous)
-21
View File
@@ -1,21 +0,0 @@
"""NVIDIA NIM provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
nvidia = ProviderProfile(
name="nvidia",
aliases=("nvidia-nim",),
env_vars=("NVIDIA_API_KEY",),
display_name="NVIDIA NIM",
description="NVIDIA NIM — accelerated inference",
signup_url="https://build.nvidia.com/",
fallback_models=(
"nvidia/llama-3.1-nemotron-70b-instruct",
"nvidia/llama-3.3-70b-instruct",
),
base_url="https://integrate.api.nvidia.com/v1",
default_max_tokens=16384,
)
register_provider(nvidia)
-14
View File
@@ -1,14 +0,0 @@
"""Ollama Cloud provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
ollama_cloud = ProviderProfile(
name="ollama-cloud",
aliases=("ollama_cloud",),
default_aux_model="nemotron-3-nano:30b",
env_vars=("OLLAMA_API_KEY",),
base_url="https://ollama.com/v1",
)
register_provider(ollama_cloud)
-15
View File
@@ -1,15 +0,0 @@
"""OpenAI Codex (Responses API) provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
openai_codex = ProviderProfile(
name="openai-codex",
aliases=("codex", "openai_codex"),
api_mode="codex_responses",
env_vars=(), # OAuth external — no API key
base_url="https://chatgpt.com/backend-api/codex",
auth_type="oauth_external",
)
register_provider(openai_codex)
-30
View File
@@ -1,30 +0,0 @@
"""OpenCode provider profiles (Zen + Go).
Both use per-model api_mode routing:
- OpenCode Zen: Claude anthropic_messages, GPT-5/Codex codex_responses,
everything else chat_completions (this profile)
- OpenCode Go: MiniMax anthropic_messages, GLM/Kimi chat_completions
(this profile)
"""
from providers import register_provider
from providers.base import ProviderProfile
opencode_zen = ProviderProfile(
name="opencode-zen",
aliases=("opencode", "opencode_zen", "zen"),
env_vars=("OPENCODE_ZEN_API_KEY",),
base_url="https://opencode.ai/zen/v1",
default_aux_model="gemini-3-flash",
)
opencode_go = ProviderProfile(
name="opencode-go",
aliases=("opencode_go", "go", "opencode-go-sub"),
env_vars=("OPENCODE_GO_API_KEY",),
base_url="https://opencode.ai/zen/go/v1",
default_aux_model="glm-5",
)
register_provider(opencode_zen)
register_provider(opencode_go)
-86
View File
@@ -1,86 +0,0 @@
"""OpenRouter provider profile."""
import logging
from typing import Any
from providers import register_provider
from providers.base import ProviderProfile
logger = logging.getLogger(__name__)
_CACHE: list[str] | None = None
class OpenRouterProfile(ProviderProfile):
"""OpenRouter aggregator — provider preferences, reasoning config passthrough."""
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
"""Fetch from public OpenRouter catalog — no auth required.
Note: Tool-call capability filtering is applied by hermes_cli/models.py
via fetch_openrouter_models() _openrouter_model_supports_tools(), not
here. The picker early-returns via the dedicated openrouter path before
reaching this method, so filtering here would be unreachable.
"""
global _CACHE # noqa: PLW0603
if _CACHE is not None:
return _CACHE
try:
result = super().fetch_models(api_key=None, timeout=timeout)
if result is not None:
_CACHE = result
return result
except Exception as exc:
logger.debug("fetch_models(openrouter): %s", exc)
return None
def build_extra_body(
self, *, session_id: str | None = None, **context: Any
) -> dict[str, Any]:
body: dict[str, Any] = {}
prefs = context.get("provider_preferences")
if prefs:
body["provider"] = prefs
return body
def build_api_kwargs_extras(
self,
*,
reasoning_config: dict | None = None,
supports_reasoning: bool = False,
**context: Any,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
extra_body: dict[str, Any] = {}
if supports_reasoning:
if reasoning_config is not None:
extra_body["reasoning"] = dict(reasoning_config)
else:
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
return extra_body, {}
openrouter = OpenRouterProfile(
name="openrouter",
aliases=("or",),
env_vars=("OPENROUTER_API_KEY",),
display_name="OpenRouter",
description="OpenRouter — unified API for 200+ models",
signup_url="https://openrouter.ai/keys",
base_url="https://openrouter.ai/api/v1",
models_url="https://openrouter.ai/api/v1/models",
fallback_models=(
"anthropic/claude-sonnet-4.6",
"openai/gpt-5.4",
"deepseek/deepseek-chat",
"google/gemini-3-flash-preview",
"qwen/qwen3-plus",
),
)
register_provider(openrouter)
-82
View File
@@ -1,82 +0,0 @@
"""Qwen Portal provider profile."""
import copy
from typing import Any
from providers import register_provider
from providers.base import ProviderProfile
class QwenProfile(ProviderProfile):
"""Qwen Portal — message normalization, vl_high_resolution, metadata top-level."""
def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Normalize content to list-of-dicts format.
Inject cache_control on system message.
Matches the behavior of run_agent.py:_qwen_prepare_chat_messages().
"""
prepared = copy.deepcopy(messages)
if not prepared:
return prepared
for msg in prepared:
if not isinstance(msg, dict):
continue
content = msg.get("content")
if isinstance(content, str):
msg["content"] = [{"type": "text", "text": content}]
elif isinstance(content, list):
normalized_parts = []
for part in content:
if isinstance(part, str):
normalized_parts.append({"type": "text", "text": part})
elif isinstance(part, dict):
normalized_parts.append(part)
if normalized_parts:
msg["content"] = normalized_parts
# Inject cache_control on the last part of the system message.
for msg in prepared:
if isinstance(msg, dict) and msg.get("role") == "system":
content = msg.get("content")
if (
isinstance(content, list)
and content
and isinstance(content[-1], dict)
):
content[-1]["cache_control"] = {"type": "ephemeral"}
break
return prepared
def build_extra_body(
self, *, session_id: str | None = None, **context
) -> dict[str, Any]:
return {"vl_high_resolution_images": True}
def build_api_kwargs_extras(
self,
*,
reasoning_config: dict | None = None,
qwen_session_metadata: dict | None = None,
**context,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Qwen metadata goes to top-level api_kwargs, not extra_body."""
top_level = {}
if qwen_session_metadata:
top_level["metadata"] = qwen_session_metadata
return {}, top_level
qwen = QwenProfile(
name="qwen-oauth",
aliases=("qwen", "qwen-portal", "qwen-cli"),
env_vars=("QWEN_API_KEY",),
base_url="https://portal.qwen.ai/v1",
auth_type="oauth_external",
default_max_tokens=65536,
)
register_provider(qwen)
-14
View File
@@ -1,14 +0,0 @@
"""StepFun provider profile."""
from providers import register_provider
from providers.base import ProviderProfile
stepfun = ProviderProfile(
name="stepfun",
aliases=("step", "stepfun-coding-plan"),
default_aux_model="step-3.5-flash",
env_vars=("STEPFUN_API_KEY",),
base_url="https://api.stepfun.ai/step_plan/v1",
)
register_provider(stepfun)

Some files were not shown because too many files have changed in this diff Show More