Compare commits

..

1 Commits

Author SHA1 Message Date
kshitijk4poor 965d2fec98 feat(provider): add codex-cli external-process provider
Add an external-process inference provider that shells out to the
Codex CLI (codex exec --json) for inference.  This lets users
delegate Hermes requests to their local Codex CLI installation,
leveraging Codex's agent loop while keeping Hermes as the driver.

Key design:
- Text-in/text-out MVP — Hermes tools are disabled (Codex handles its
  own tool calling internally).
- Streaming is disabled (subprocess stdio returns a single
  SimpleNamespace, not an iterable generator).
- Follows the copilot-acp external-process pattern for routing,
  streaming exclusion, and credential resolution.

Files:
- agent/codex_cli_client.py  — Client facade, parses JSONL events
- hermes_cli/auth.py  — ProviderConfig, status helper, cred resolver
- hermes_cli/runtime_provider.py  — Runtime resolution
- run_agent.py  — Client routing, tool disable, streaming exclusion
- hermes_cli/models.py  — Provider entry, aliases, model list
- hermes_cli/main.py  — --provider choices

Env var support: HERMES_CODEX_CLI_COMMAND, CODEX_CLI_PATH,
HERMES_CODEX_CLI_ARGS.
2026-05-09 21:02:32 +05:30
714 changed files with 6680 additions and 76324 deletions
-26
View File
@@ -143,18 +143,6 @@
# Also requires ~/.honcho/config.json with enabled=true (see README).
# HONCHO_API_KEY=
# =============================================================================
# HYPERLIQUID OPTIONAL SKILL
# =============================================================================
# Optional defaults for the Hyperliquid skill in optional-skills/blockchain/hyperliquid
#
# Hyperliquid API base URL override
# Default: https://api.hyperliquid.xyz
# HYPERLIQUID_API_URL=https://api.hyperliquid-testnet.xyz
#
# Default address for account-level commands like state, fills, orders, and review
# HYPERLIQUID_USER_ADDRESS=0x0000000000000000000000000000000000000000
# =============================================================================
# TERMINAL TOOL CONFIGURATION
# =============================================================================
@@ -273,20 +261,6 @@ BROWSER_SESSION_TIMEOUT=300
# Browser sessions are automatically closed after this period of no activity
BROWSER_INACTIVITY_TIMEOUT=120
# Camofox local anti-detection browser (Camoufox-based Firefox).
# Set CAMOFOX_URL to route the browser tools through a local Camofox server
# instead of agent-browser/Browserbase. See docs/user-guide/features/browser.md.
# CAMOFOX_URL=http://localhost:9377
# Externally managed Camofox sessions — when another app owns the visible
# Camofox browser, set these so Hermes shares the same userId/profile instead
# of creating its own isolated session.
# CAMOFOX_USER_ID=
# CAMOFOX_SESSION_KEY=
# Set to true to reuse an already-open Camofox tab for this identity before
# creating a new one (useful for gateway restarts).
# CAMOFOX_ADOPT_EXISTING_TAB=false
# =============================================================================
# SESSION LOGGING
# =============================================================================
+1 -2
View File
@@ -122,8 +122,7 @@ jobs:
retention-days: 14
- name: Post / update PR comment
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
continue-on-error: true
if: github.event_name == 'pull_request'
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
with:
script: |
+1 -4
View File
@@ -55,14 +55,11 @@ jobs:
e2e:
runs-on: ubuntu-latest
timeout-minutes: 15
timeout-minutes: 10
steps:
- name: Checkout code
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y ripgrep
- name: Install uv
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+4 -8
View File
@@ -540,14 +540,10 @@ Full authoring guide: `website/docs/developer-guide/model-provider-plugin.md`.
### Dashboard / context-engine / image-gen plugin directories
`plugins/context_engine/`, `plugins/image_gen/`, etc. follow the same
pattern (ABC + orchestrator + per-plugin directory). Context engines
plug into `agent/context_engine.py`; image-gen providers into
`agent/image_gen_provider.py`. Reference / docs-companion plugins
(`example-dashboard`, `strike-freedom-cockpit`, `plugin-llm-example`,
`plugin-llm-async-example`) live in the
[`hermes-example-plugins`](https://github.com/NousResearch/hermes-example-plugins)
companion repo, not in this tree.
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
Context engines plug into `agent/context_engine.py`; image-gen providers
into `agent/image_gen_provider.py`.
---
+55 -302
View File
@@ -1,331 +1,84 @@
# Hermes Agent Security Policy
This document describes Hermes Agent's trust model, names the one
security boundary the project treats as load-bearing, and defines the
scope for vulnerability reports.
This document outlines the security protocols, trust model, and deployment hardening guidelines for the **Hermes Agent** project.
## 1. Reporting a Vulnerability
## 1. Vulnerability Reporting
Report privately via [GitHub Security Advisories](https://github.com/NousResearch/hermes-agent/security/advisories/new)
or **security@nousresearch.com**. Do not open public issues for
security vulnerabilities. **Hermes Agent does not operate a bug
bounty program.**
Hermes Agent does **not** operate a bug bounty program. Security issues should be reported via [GitHub Security Advisories (GHSA)](https://github.com/NousResearch/hermes-agent/security/advisories/new) or by emailing **security@nousresearch.com**. Do not open public issues for security vulnerabilities.
A useful report includes:
- A concise description and severity assessment.
- The affected component, identified by file path and line range
(e.g. `path/to/file.py:120-145`).
- Environment details (`hermes version`, commit SHA, OS, Python
version).
- A reproduction against `main` or the latest release.
- A statement of which trust boundary in §2 is crossed.
Please read §2 and §3 before submitting. Reports that demonstrate
limits of an in-process heuristic this policy does not treat as a
boundary will be closed as out-of-scope under §3 — but see §3.2:
they are still welcome as regular issues or pull requests, just not
through the private security channel.
### Required Submission Details
- **Title & Severity:** Concise description and CVSS score/rating.
- **Affected Component:** Exact file path and line range (e.g., `tools/approval.py:120-145`).
- **Environment:** Output of `hermes version`, commit SHA, OS, and Python version.
- **Reproduction:** Step-by-step Proof-of-Concept (PoC) against `main` or the latest release.
- **Impact:** Explanation of what trust boundary was crossed.
---
## 2. Trust Model
Hermes Agent is a single-tenant personal agent. Its posture is
layered, and the layers are not equally load-bearing. Reporters and
operators should reason about them in the same terms.
The core assumption is that Hermes is a **personal agent** with one trusted operator.
### 2.1 Definitions
### Operator & Session Trust
- **Single Tenant:** The system protects the operator from LLM actions, not from malicious co-tenants. Multi-user isolation must happen at the OS/host level.
- **Gateway Security:** Authorized callers (Telegram, Discord, Slack, etc.) receive equal trust. Session keys are used for routing, not as authorization boundaries.
- **Execution:** Defaults to `terminal.backend: local` (direct host execution). Container isolation (Docker, Modal, Daytona) is opt-in for sandboxing.
- **Agent process.** The Python interpreter running Hermes Agent,
including any Python modules it has loaded (skills, plugins,
hook handlers).
- **Terminal backend.** A pluggable execution target for the
`terminal()` tool. The default runs commands directly on the host.
Other backends run commands inside a container, cloud sandbox, or
remote host.
- **Input surface.** Any channel through which content enters the
agent's context: operator input, web fetches, email, gateway
messages, file reads, MCP server responses, tool results.
- **Trust envelope.** The set of resources an operator has implicitly
granted Hermes Agent access to by running it — typically, whatever
the operator's own user account can reach on the host.
- **Stance.** An explicit statement in Hermes Agent's documentation
or code about how a consuming layer (adapter, UI, file writer,
shell) should treat agent output — e.g. "the dashboard renders
agent output as inert HTML."
### Dangerous Command Approval
The approval system (`tools/approval.py`) is a core security boundary. Terminal commands, file operations, and other potentially destructive actions are gated behind explicit user confirmation before execution. The approval mode is configurable via `approvals.mode` in `config.yaml`:
- `"on"` (default) — prompts the user to approve dangerous commands.
- `"auto"` — auto-approves after a configurable delay.
- `"off"` — disables the gate entirely (break-glass; see Section 3).
### 2.2 The Boundary: OS-Level Isolation
### Output Redaction
`agent/redact.py` strips secret-like patterns (API keys, tokens, credentials) from all display output before it reaches the terminal or gateway platform. This prevents accidental credential leakage in chat logs, tool previews, and response text. Redaction operates on the display layer only — underlying values remain intact for internal agent operations.
**The only security boundary against an adversarial LLM is the
operating system.** Nothing inside the agent process constitutes
containment — not the approval gate, not output redaction, not any
pattern scanner, not any tool allowlist. Any in-process component
that screens LLM output is a heuristic operating on an
attacker-influenced string, and this policy treats it as such.
### Skills vs. MCP Servers
- **Installed Skills:** High trust. Equivalent to local host code; skills can read environment variables and run arbitrary commands.
- **MCP Servers:** Lower trust. MCP subprocesses receive a filtered environment (`_build_safe_env()` in `tools/mcp_tool.py`) — only safe baseline variables (`PATH`, `HOME`, `XDG_*`) plus variables explicitly declared in the server's `env` config block are passed through. Host credentials are stripped by default. Additionally, packages invoked via `npx`/`uvx` are checked against the OSV malware database before spawning.
Hermes Agent supports two OS-level isolation postures. They address
different threats and an operator should choose deliberately.
### Code Execution Sandbox
The `execute_code` tool (`tools/code_execution_tool.py`) runs LLM-generated Python scripts in a child process with API keys and tokens stripped from the environment to prevent credential exfiltration. Only environment variables explicitly declared by loaded skills (via `env_passthrough`) or by the user in `config.yaml` (`terminal.env_passthrough`) are passed through. The child accesses Hermes tools via RPC, not direct API calls.
#### Terminal-backend isolation
A non-default terminal backend runs LLM-emitted shell commands
inside a container, remote host, or cloud sandbox. The file tools
(`read_file`, `write_file`, `patch`) also run through this backend,
since they are implemented on top of the shell contract — they
cannot reach paths the backend doesn't expose.
What this confines: anything the agent does by issuing shell or
file operations. What this does **not** confine: everything the
agent does in its own Python process. That includes the
code-execution tool (spawned as a host subprocess), MCP subprocesses
(spawned from the agent's environment), plugin loading, hook
dispatch, and skill loading (all imported into the agent
interpreter).
Terminal-backend isolation is the right posture when the concern is
LLM-emitted destructive shell or unwanted file-tool writes, and the
operator is otherwise trusted.
#### Whole-process wrapping
Whole-process wrapping runs the entire agent process tree inside a
sandbox. Every code path — shell, code-execution, MCP, file tools,
plugins, hooks, skill loading — is subject to the same filesystem,
network, process, and (where applicable) inference policy.
Hermes Agent supports this in two ways:
- **Hermes Agent's own Docker image and Compose setup.** Lighter-
weight; the agent runs in a standard container with operator-
configured mounts and network policy.
- **[NVIDIA OpenShell](https://github.com/NVIDIA/OpenShell)**.
OpenShell provides per-session sandboxes with declarative policy
across filesystem, network (L7 egress), process/syscall, and
inference-routing layers. Network and inference policies are
hot-reloadable. Credentials are injected from a Provider store
and never touch the sandbox filesystem.
Under a whole-process wrapper, Hermes Agent's in-process heuristics
(§2.4) function as accident-prevention layered on top of a real
boundary. This is the supported posture when the agent ingests
content from surfaces the operator does not control — the open web,
inbound email, multi-user channels, untrusted MCP servers — and for
production or shared deployments.
Operators running the default local backend with untrusted input
surfaces, or running a terminal-backend sandbox and expecting it to
contain code paths that don't go through the shell, are operating
outside the supported security posture.
### 2.3 Credential Scoping
Hermes Agent filters the environment it passes to its lower-trust
in-process components: shell subprocesses, MCP subprocesses, and
the code-execution child. Credentials like provider API keys and
gateway tokens are stripped by default; variables explicitly
declared by the operator or by a loaded skill are passed through.
This reduces casual exfiltration. It is not containment. Any
component running inside the agent process (skills, plugins, hook
handlers) can read whatever the agent itself can read, including
in-memory credentials. The mitigation against a compromised
in-process component is operator review before install (§2.4,
§2.5), not environment scrubbing.
### 2.4 In-Process Heuristics
The following components screen or warn about LLM behavior. They
are useful. They are not boundaries.
- The **approval gate** detects common destructive shell patterns
and prompts the operator before execution. Shell is Turing-
complete; a denylist over shell strings is structurally
incomplete. The gate catches cooperative-mode mistakes, not
adversarial output.
- **Output redaction** strips secret-like patterns from display.
A motivated output producer will defeat it.
- **Skills Guard** scans installable skill content for injection
patterns. It is a review aid; the boundary for third-party skills
is operator review before install. Reviewing a skill means
reading its Python code and scripts, not just its SKILL.md
description — skills execute arbitrary Python at import time.
### 2.5 Plugin Trust Model
Plugins load into the agent process and run with full agent
privileges: they can read the same credentials, call the same
tools, register the same hooks, and import the same modules as
anything shipped in-tree. The boundary for third-party plugins is
operator review before install — the same rule as skills (§2.4),
called out separately because plugins are architecturally heavier
and often ship their own background services, network listeners,
and dependencies.
A malicious or buggy plugin is not a vulnerability in Hermes Agent
itself. Bugs in Hermes Agent's plugin-install or plugin-discovery
path that prevent the operator from seeing what they're installing
are in scope under §3.1.
### 2.6 External Surfaces
An **external surface** is any channel outside the local agent
process through which a caller can dispatch agent work, resolve
approvals, or receive agent output. Each surface has its own
authorization model, but the rules below apply uniformly.
**Surfaces in Hermes Agent:**
- **Gateway platform adapters.** Messaging integrations in
`gateway/platforms/` (Telegram, Discord, Slack, email, SMS, etc.)
and analogous adapters shipped as plugins.
- **Network-exposed HTTP surfaces.** The API server adapter, the
dashboard plugin, the kanban plugin's HTTP endpoints, and any
other plugin that binds a listening socket.
- **Editor / IDE adapters.** The ACP adapter (`acp_adapter/`) and
equivalent integrations that accept requests from a local client
process.
- **The TUI gateway (`tui_gateway/`).** JSON-RPC backend for the
Ink terminal UI, reached over local IPC.
**Uniform rules:**
1. **Authorization is required at every surface that crosses a
trust boundary.** For messaging and network HTTP surfaces, the
boundary is the network: authorization means an operator-
configured caller allowlist. For editor and local-IPC surfaces
(ACP, TUI gateway), the boundary is the host's user account:
authorization means relying on OS-level access control (file
permissions, loopback-only binds) and not exposing the surface
beyond the local user without an explicit network auth layer.
2. **An allowlist is required for every enabled network-exposed
adapter.** Adapters must refuse to dispatch agent work, resolve
approvals, or relay output until an allowlist is set. Code paths
that fail open when no allowlist is configured are code bugs in
scope under §3.1.
3. **Session identifiers are routing handles, not authorization
boundaries.** Knowing another caller's session ID does not grant
access to their approvals or output; authorization is always
re-checked against the allowlist (or OS-level equivalent).
4. **Within the authorized set, all callers are equally trusted.**
Hermes Agent does not model per-caller capabilities inside a
single adapter. Operators who need capability separation should
run separate agent instances with separate allowlists.
5. **Binding a local-only surface to a non-loopback interface is a
break-glass operator decision (§3.2).** The dashboard and other
plugin HTTP servers default to loopback; exposing them via
`--host 0.0.0.0` or equivalent makes public-exposure hardening
(§4) the operator's responsibility.
### Subagents
- **No recursive delegation:** The `delegate_task` tool is disabled for child agents.
- **Depth limit:** `MAX_DEPTH = 2` — parent (depth 0) can spawn a child (depth 1); grandchildren are rejected.
- **Memory isolation:** Subagents run with `skip_memory=True` and do not have access to the parent's persistent memory provider. The parent receives only the task prompt and final response as an observation.
---
## 3. Scope
## 3. Out of Scope (Non-Vulnerabilities)
### 3.1 In Scope
- Escape from a declared OS-level isolation posture (§2.2): an
attacker-controlled code path reaching state that the posture
claimed to confine.
- Unauthorized external-surface access: a caller outside the
configured authorization set (allowlist, or OS-level equivalent
for local-IPC surfaces) dispatching work, receiving output, or
resolving approvals (§2.6).
- Credential exfiltration: leakage of operator credentials or
session authorization material to a destination outside the
trust envelope, via a mechanism that should have prevented it
(environment scrubbing bug, adapter logging, transport error
that flushes credentials to an upstream, etc.).
- Trust-model documentation violations: code behaving contrary to
what this policy, Hermes Agent's own documentation, or reasonable
operator expectations would predict — including cases where
Hermes Agent has documented a stance about how its output should
be rendered by a consuming layer (dashboard, gateway adapter,
file writer, shell) and a code path breaks that stance.
### 3.2 Out of Scope
"Out of scope" here means "not a security vulnerability under this
policy." It does not mean "not worth reporting." Improvements to the
in-process heuristics, hardening ideas, and UX fixes are welcome as
regular issues or pull requests — the approval gate can always catch
more patterns, redaction can always get smarter, adapter behavior
can always be tightened. These items just don't go through the
private-disclosure channel and don't receive advisories.
- **Bypasses of in-process heuristics (§2.4)** — approval-gate regex
bypasses, redaction bypasses, Skills Guard pattern bypasses, and
analogous reports against future heuristics. These components are
not boundaries; defeating them is not a vulnerability under this
policy.
- **Prompt injection per se.** Getting the LLM to emit unusual
output — via injected content, hallucination, training artifacts,
or any other cause — is not itself a vulnerability. "I achieved
prompt injection" without a chained §3.1 outcome is not an
actionable report under this policy.
- **Consequences of a chosen isolation posture.** Reports that a
code path operating within its posture's scope can do what that
posture permits are not vulnerabilities. Examples: shell or file
tools reaching host state under the local backend; code-execution
or MCP subprocesses reaching host state under terminal-backend
isolation that only sandboxes shell; reports whose preconditions
require pre-existing write access to operator-owned configuration
or credential files (those are already inside the trust envelope).
- **Documented break-glass settings.** Operator-selected trade-offs
that explicitly disable protections: `--insecure` and equivalent
flags on the dashboard or other components, disabled approvals,
local backend in production, development profiles that bypass
hermes-home security, and similar. Reports against those
configurations are not vulnerabilities — that's the flag's job.
- **Community-contributed skills and plugins.** Third-party skills
(including the community skills repository) and third-party
plugins are in the operator's review surface, not Hermes Agent's
trust surface (§2.4, §2.5). A skill or plugin doing something
malicious is the expected failure mode of one that wasn't
reviewed, not a vulnerability in Hermes Agent. Bugs in Hermes
Agent's skill-install or plugin-install path that prevent the
operator from seeing what they're installing are in scope under
§3.1.
- **Public exposure without external controls.** Exposing the
gateway or API to the public internet without authentication,
VPN, or firewall.
- **Tool-level read/write restrictions on a posture where shell is
permitted.** If a path is reachable via the terminal tool, reports
that other file tools can reach it add nothing.
The following scenarios are **not** considered security breaches:
- **Prompt Injection:** Unless it results in a concrete bypass of the approval system, toolset restrictions, or container sandbox.
- **Public Exposure:** Deploying the gateway to the public internet without external authentication or network protection.
- **Trusted State Access:** Reports that require pre-existing write access to `~/.hermes/`, `.env`, or `config.yaml` (these are operator-owned files).
- **Default Behavior:** Host-level command execution when `terminal.backend` is set to `local` — this is the documented default, not a vulnerability.
- **Configuration Trade-offs:** Intentional break-glass settings such as `approvals.mode: "off"` or `terminal.backend: local` in production.
- **Tool-level read/access restrictions:** The agent has unrestricted shell access via the `terminal` tool by design. Reports that a specific tool (e.g., `read_file`) can access a resource are not vulnerabilities if the same access is available through `terminal`. Tool-level deny lists only constitute a meaningful security boundary when paired with equivalent restrictions on the terminal side (as with write operations, where `WRITE_DENIED_PATHS` is paired with the dangerous command approval system).
---
## 4. Deployment Hardening
## 4. Deployment Hardening & Best Practices
The single most important hardening decision is matching isolation
(§2.2) to the trust of the content the agent will ingest. Beyond
that:
### Filesystem & Network
- **Production sandboxing:** Use container backends (`docker`, `modal`, `daytona`) instead of `local` for untrusted workloads.
- **File permissions:** Run as non-root (the Docker image uses UID 10000); protect credentials with `chmod 600 ~/.hermes/.env` on local installs.
- **Network exposure:** Do not expose the gateway or API server to the public internet without VPN, Tailscale, or firewall protection. SSRF protection is enabled by default across all gateway platform adapters (Telegram, Discord, Slack, Matrix, Mattermost, etc.) with redirect validation. Note: the local terminal backend does not apply SSRF filtering, as it operates within the trusted operator's environment.
- Run the agent as a non-root user. The supplied container image
does this by default.
- Keep credentials in the operator credential file with tight
permissions, never in the main config, never in version control.
Under OpenShell, use the Provider store rather than an on-disk
credential file.
- Do not expose the gateway or API to the public internet without
VPN, Tailscale, or firewall protection. Under OpenShell, use the
network policy layer to restrict egress.
- Configure a caller allowlist for every network-exposed adapter
you enable (§2.6).
- Review third-party skills and plugins before install (§2.4,
§2.5). For skills, this means reading the Python and scripts,
not just SKILL.md. Skills Guard reports and the install audit
log are the review surface.
- Hermes Agent includes supply-chain guards for MCP server
launches and for dependency / bundled-package changes in CI; see
`CONTRIBUTING.md` for specifics.
### Skills & Supply Chain
- **Skill installation:** Review Skills Guard reports (`tools/skills_guard.py`) before installing third-party skills. The audit log at `~/.hermes/skills/.hub/audit.log` tracks every install and removal.
- **MCP safety:** OSV malware checking runs automatically for `npx`/`uvx` packages before MCP server processes are spawned.
- **CI/CD:** GitHub Actions are pinned to full commit SHAs. The `supply-chain-audit.yml` workflow blocks PRs containing `.pth` files or suspicious `base64`+`exec` patterns.
### Credential Storage
- API keys and tokens belong exclusively in `~/.hermes/.env` — never in `config.yaml` or checked into version control.
- The credential pool system (`agent/credential_pool.py`) handles key rotation and fallback. Credentials are resolved from environment variables, not stored in plaintext databases.
---
## 5. Disclosure
## 5. Disclosure Process
- **Coordinated disclosure window:** 90 days from report, or until a
fix is released, whichever comes first.
- **Channel:** the GHSA thread or email correspondence with
security@nousresearch.com.
- **Credit:** reporters are credited in release notes unless
anonymity is requested.
- **Coordinated Disclosure:** 90-day window or until a fix is released, whichever comes first.
- **Communication:** All updates occur via the GHSA thread or email correspondence with security@nousresearch.com.
- **Credits:** Reporters are credited in release notes unless anonymity is requested.
-1
View File
@@ -601,7 +601,6 @@ class SessionManager:
),
"quiet_mode": True,
"session_id": session_id,
"session_db": self._get_db(),
"model": model or default_model,
}
+2 -2
View File
@@ -769,8 +769,8 @@ def _build_patch_mode_content(patch_text: str) -> List[Any]:
old_chunks: list[str] = []
new_chunks: list[str] = []
for hunk in op.hunks:
old_lines = [line.content for line in hunk.lines if line.prefix in {" ", "-"}]
new_lines = [line.content for line in hunk.lines if line.prefix in {" ", "+"}]
old_lines = [line.content for line in hunk.lines if line.prefix in (" ", "-")]
new_lines = [line.content for line in hunk.lines if line.prefix in (" ", "+")]
if old_lines or new_lines:
old_chunks.append("\n".join(old_lines))
new_chunks.append("\n".join(new_lines))
+1 -1
View File
@@ -47,7 +47,7 @@ def _title_case_slug(value: Optional[str]) -> Optional[str]:
def _parse_dt(value: Any) -> Optional[datetime]:
if value in {None, ""}:
if value in (None, ""):
return None
if isinstance(value, (int, float)):
return datetime.fromtimestamp(float(value), tz=timezone.utc)
+4 -20
View File
@@ -35,14 +35,6 @@ def _get_anthropic_sdk():
"""Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
global _anthropic_sdk
if _anthropic_sdk is ...:
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("provider.anthropic", prompt=False)
except ImportError:
pass
except Exception:
# FeatureUnavailable — fall through to ImportError handling below
pass
try:
import anthropic as _sdk
_anthropic_sdk = _sdk
@@ -1297,21 +1289,13 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
continue
if name:
seen_names.add(name)
anthropic_tool: Dict[str, Any] = {
result.append({
"name": name,
"description": fn.get("description", ""),
"input_schema": _normalize_tool_input_schema(
fn.get("parameters", {"type": "object", "properties": {}})
),
}
# Forward cache_control marker when present on the OpenAI-format
# tool dict (set by ``mark_tools_for_long_lived_cache``). Anthropic's
# tools array supports cache_control on the last tool to cache the
# entire schema cross-session.
cache_control = t.get("cache_control")
if isinstance(cache_control, dict):
anthropic_tool["cache_control"] = dict(cache_control)
result.append(anthropic_tool)
})
return result
@@ -1553,7 +1537,7 @@ def convert_messages_to_anthropic(
# downgraded to a spurious text block on the last assistant message.
reasoning_content = m.get("reasoning_content")
_already_has_thinking = any(
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
for b in blocks
)
if isinstance(reasoning_content, str) and not _already_has_thinking:
@@ -1704,7 +1688,7 @@ def convert_messages_to_anthropic(
if isinstance(m["content"], list):
m["content"] = [
b for b in m["content"]
if not (isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"})
if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
]
prev_blocks = fixed[-1]["content"]
curr_blocks = m["content"]
+96 -620
View File
@@ -175,7 +175,7 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
# Resolve to the user's actual main provider so named custom providers
# and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
main_prov = (_read_main_provider() or "").strip().lower()
if main_prov and main_prov not in {"auto", "main", ""}:
if main_prov and main_prov not in ("auto", "main", ""):
normalized = main_prov
else:
return "custom"
@@ -382,7 +382,7 @@ _AI_GATEWAY_HEADERS = {
# Nous Portal extra_body for product attribution.
# Callers should pass this as extra_body in chat.completions.create()
# when the auxiliary client is backed by Nous Portal.
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]}
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent"]}
# Set at resolve time — True if the auxiliary client points to Nous Portal
auxiliary_is_nous: bool = False
@@ -490,29 +490,6 @@ def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]:
return True, None
def _peek_pool_entry(provider: str) -> Optional[Any]:
"""Best-effort current/next pool entry without mutating selection order."""
try:
pool = load_pool(provider)
except Exception as exc:
logger.debug("Auxiliary client: could not load pool for %s (peek): %s", provider, exc)
return None
if not pool or not pool.has_credentials():
return None
try:
current_fn = getattr(pool, "current", None)
if callable(current_fn):
current = current_fn()
if current is not None:
return current
peek_fn = getattr(pool, "peek", None)
if callable(peek_fn):
return peek_fn()
except Exception as exc:
logger.debug("Auxiliary client: could not peek pool entry for %s: %s", provider, exc)
return None
def _pool_runtime_api_key(entry: Any) -> str:
if entry is None:
return ""
@@ -578,7 +555,7 @@ def _convert_content_for_responses(content: Any) -> Any:
if detail:
entry["detail"] = detail
converted.append(entry)
elif ptype in {"input_text", "input_image"}:
elif ptype in ("input_text", "input_image"):
# Already in Responses format — pass through
converted.append(part)
else:
@@ -706,16 +683,6 @@ class _CodexCompletionsAdapter:
close()
except Exception:
logger.debug("Codex auxiliary: client close during timeout failed", exc_info=True)
# The cached auxiliary client wraps this same ``self._client``
# (or *is* a ``CodexAuxiliaryClient`` whose ``_real_client`` is
# this instance). After we close the httpx transport above, the
# cache must drop that entry — otherwise the next auxiliary call
# (compression retry, memory flush, etc.) reuses the dead client
# and fails fast with a connection error. See issue #23432.
try:
_evict_cached_client_instance(self._client)
except Exception:
logger.debug("Codex auxiliary: cache eviction on timeout failed", exc_info=True)
def _check_cancelled() -> None:
if deadline is not None and time.monotonic() >= deadline:
@@ -798,7 +765,7 @@ class _CodexCompletionsAdapter:
if item_type == "message":
for part in (_item_get(item, "content") or []):
ptype = _item_get(part, "type")
if ptype in {"output_text", "text"}:
if ptype in ("output_text", "text"):
text_parts.append(_item_get(part, "text", ""))
elif item_type == "function_call":
tool_calls_raw.append(SimpleNamespace(
@@ -900,14 +867,6 @@ class AsyncCodexAuxiliaryClient:
self.chat = _AsyncCodexChatShim(async_adapter)
self.api_key = sync_wrapper.api_key
self.base_url = sync_wrapper.base_url
# Mirror the sync wrapper's _real_client so cache eviction by leaf
# OpenAI client (e.g. _close_client_on_timeout in #23482) drops
# this async entry too. Without this, sync and async cache entries
# diverge on poisoning: the sync entry is evicted but the async
# entry keeps reusing the closed transport, failing every
# subsequent async aux call with 'Connection error' until the
# gateway restarts.
self._real_client = sync_wrapper._real_client
class _AnthropicCompletionsAdapter:
@@ -1043,9 +1002,6 @@ class AsyncAnthropicAuxiliaryClient:
self.chat = _AsyncAnthropicChatShim(async_adapter)
self.api_key = sync_wrapper.api_key
self.base_url = sync_wrapper.base_url
# See AsyncCodexAuxiliaryClient: mirror _real_client so cache
# eviction on a poisoned underlying client also drops this entry.
self._real_client = sync_wrapper._real_client
def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
@@ -1484,16 +1440,7 @@ def _read_main_model() -> str:
config.yaml model.default is the single source of truth for the active
model. Environment variables are no longer consulted.
Runtime override: when an AIAgent is active with a CLI/gateway-provided
model that differs from config.yaml, ``set_runtime_main()`` records the
override in a process-local global. This is consulted FIRST so tools
that gate on "the active main model" (e.g. ``vision_analyze``'s native
fast path) see the live runtime, not the persisted config default.
"""
override = _RUNTIME_MAIN_MODEL
if isinstance(override, str) and override.strip():
return override.strip()
try:
from hermes_cli.config import load_config
cfg = load_config()
@@ -1514,13 +1461,7 @@ def _read_main_provider() -> str:
Returns the lowercase provider id (e.g. "alibaba", "openrouter") or ""
if not configured.
Runtime override: see ``_read_main_model`` — same mechanism for the
provider half of the runtime tuple.
"""
override = _RUNTIME_MAIN_PROVIDER
if isinstance(override, str) and override.strip():
return override.strip().lower()
try:
from hermes_cli.config import load_config
cfg = load_config()
@@ -1534,32 +1475,6 @@ def _read_main_provider() -> str:
return ""
# Process-local override set by AIAgent at session/turn start. Single-threaded
# per turn — no lock needed. Cleared by ``clear_runtime_main()``.
_RUNTIME_MAIN_PROVIDER: str = ""
_RUNTIME_MAIN_MODEL: str = ""
def set_runtime_main(provider: str, model: str) -> None:
"""Record the live runtime provider/model for the current AIAgent.
Called by ``run_agent.AIAgent._sync_runtime_main_for_aux_routing`` (or
equivalent setter) at the top of each turn so that
``_read_main_provider`` / ``_read_main_model`` reflect CLI/gateway
overrides instead of the stale config.yaml default.
"""
global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL
_RUNTIME_MAIN_PROVIDER = (provider or "").strip().lower()
_RUNTIME_MAIN_MODEL = (model or "").strip()
def clear_runtime_main() -> None:
"""Clear the runtime override (e.g. on session end)."""
global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL
_RUNTIME_MAIN_PROVIDER = ""
_RUNTIME_MAIN_MODEL = ""
def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""Resolve the active custom/main endpoint the same way the main CLI does.
@@ -1841,113 +1756,6 @@ def _get_provider_chain() -> List[tuple]:
]
# ── Auxiliary "recently 402'd" unhealthy-provider cache ────────────────────
#
# When an auxiliary provider returns HTTP 402 (Payment Required / credit
# exhaustion), retrying it on every subsequent aux call is wasteful — the
# provider stays depleted for hours or days, but the chain re-tries it as
# the FIRST entry on every compression/title-gen/session-search call,
# burns ~1 RTT, gets 402 again, then falls back. On a long Discord/LCM
# session that adds up to dozens of doomed 402s.
#
# Solution: when ANY caller observes a payment error against a provider,
# mark it unhealthy for ``_AUX_UNHEALTHY_TTL_SECONDS``. ``_resolve_auto``
# Step-2 and ``_try_payment_fallback`` both consult this cache and skip
# unhealthy entries (logging once per skip-reason so the user sees what
# happened). Entries auto-expire so a topped-up account recovers without
# manual intervention.
#
# Failure isolation: the cache is in-process only. A second hermes
# process won't inherit the unhealthy mark — that's intentional, since
# the user might be running two profiles with different OpenRouter keys.
_AUX_UNHEALTHY_TTL_SECONDS = 600 # 10 minutes
_aux_unhealthy_until: Dict[str, float] = {}
_aux_unhealthy_logged_at: Dict[str, float] = {}
# Map provider names that show up in resolved_provider / explicit-config
# back to the chain labels used by _get_provider_chain(). Keep in sync
# with the alias map in _try_payment_fallback below.
_AUX_UNHEALTHY_LABEL_ALIASES = {
"openrouter": "openrouter",
"nous": "nous",
"custom": "local/custom",
"local/custom": "local/custom",
"openai-codex": "openai-codex",
"codex": "openai-codex",
}
def _normalize_chain_label(provider: str) -> str:
"""Normalize a resolved_provider value to a chain label used by
``_get_provider_chain()``. Falls back to the lowercased input for
direct API-key providers (deepseek, alibaba, minimax, etc.) which
each report their own provider name from the api-key chain.
"""
if not provider:
return ""
p = str(provider).strip().lower()
return _AUX_UNHEALTHY_LABEL_ALIASES.get(p, p)
def _mark_provider_unhealthy(provider: str, ttl: Optional[float] = None) -> None:
"""Mark ``provider`` as recently-402'd, hidden from chain iteration
until the TTL expires. Called from the payment-fallback branches in
``call_llm`` and ``acall_llm`` after a confirmed payment error.
"""
label = _normalize_chain_label(provider)
if not label:
return
expires_at = time.time() + (ttl if ttl is not None else _AUX_UNHEALTHY_TTL_SECONDS)
_aux_unhealthy_until[label] = expires_at
logger.warning(
"Auxiliary: marking %s unhealthy for %ds (payment / credit error). "
"Subsequent auxiliary calls will skip it until %s.",
label,
int(ttl if ttl is not None else _AUX_UNHEALTHY_TTL_SECONDS),
time.strftime("%H:%M:%S", time.localtime(expires_at)),
)
def _is_provider_unhealthy(label: str) -> bool:
"""True iff ``label`` is in the unhealthy cache and the TTL hasn't expired.
Lazily evicts expired entries so the cache stays small.
"""
if not label:
return False
expires_at = _aux_unhealthy_until.get(label)
if expires_at is None:
return False
if time.time() >= expires_at:
_aux_unhealthy_until.pop(label, None)
_aux_unhealthy_logged_at.pop(label, None)
return False
return True
def _log_skip_unhealthy(label: str, task: Optional[str] = None) -> None:
"""Emit a single info-level log per minute when we skip an unhealthy
provider. Avoids spamming the log on bursty sessions while still
giving the user a trail.
"""
now = time.time()
last = _aux_unhealthy_logged_at.get(label, 0.0)
if now - last >= 60:
_aux_unhealthy_logged_at[label] = now
expires_at = _aux_unhealthy_until.get(label, now)
logger.info(
"Auxiliary %s: skipping %s (recently returned payment error, retry in %ds)",
task or "call", label, max(0, int(expires_at - now)),
)
def _reset_aux_unhealthy_cache() -> None:
"""Clear the unhealthy cache. Used by tests and by a future explicit
user trigger (e.g. ``hermes config aux reset``)."""
_aux_unhealthy_until.clear()
_aux_unhealthy_logged_at.clear()
def _is_payment_error(exc: Exception) -> bool:
"""Detect payment/credit/quota exhaustion errors.
@@ -1960,7 +1768,7 @@ def _is_payment_error(exc: Exception) -> bool:
err_lower = str(exc).lower()
# OpenRouter and other providers include "credits" or "afford" in 402 bodies,
# but sometimes wrap them in 429 or other codes.
if status in {402, 429, None}:
if status in (402, 429, None):
if any(kw in err_lower for kw in ("credits", "insufficient funds",
"can only afford", "billing",
"payment required")):
@@ -2009,12 +1817,10 @@ def _is_connection_error(exc: Exception) -> bool:
distinct from API errors (4xx/5xx) which indicate the provider IS
reachable but returned an error.
"""
try:
from openai import APIConnectionError, APITimeoutError
if isinstance(exc, (APIConnectionError, APITimeoutError)):
return True
except ImportError:
pass
from openai import APIConnectionError, APITimeoutError
if isinstance(exc, (APIConnectionError, APITimeoutError)):
return True
# urllib3 / httpx / httpcore connection errors
err_type = type(exc).__name__
if any(kw in err_type for kw in ("Connection", "Timeout", "DNS", "SSL")):
@@ -2024,16 +1830,6 @@ def _is_connection_error(exc: Exception) -> bool:
"connection refused", "name or service not known",
"no route to host", "network is unreachable",
"timed out", "connection reset",
# httpcore / httpx streaming premature-close errors. These surface
# when a proxy or provider drops the connection mid-stream and are
# transient by nature — the request should be retried or rerouted.
# See issue #18458.
"incomplete chunked read",
"peer closed connection",
"response ended prematurely",
"unexpected eof",
"remoteprotocolerror",
"localprotocolerror",
)):
return True
return False
@@ -2112,246 +1908,6 @@ def _evict_cached_clients(provider: str) -> None:
_client_cache.pop(key, None)
def _evict_cached_client_instance(target: Any) -> bool:
"""Drop the cache entry whose stored client is *target*.
Used when a specific cached client has been poisoned (closed httpx
transport after a timeout, broken streaming session, etc.) so the next
auxiliary call rebuilds rather than reusing the dead instance.
Walks both sync and async wrappers (``CodexAuxiliaryClient``,
``AnthropicAuxiliaryClient``, ``AsyncCodexAuxiliaryClient``, etc.) via
their ``_real_client`` attribute so a timeout that closes the underlying
``OpenAI`` (or native provider) client evicts every cached shim that
exposed it. Async wrappers must mirror their sync sibling's
``_real_client`` for this to work — otherwise the sync entry is evicted
but the async entry survives and keeps reusing the dead transport.
Returns True when at least one entry was evicted.
"""
if target is None:
return False
evicted = False
with _client_cache_lock:
for key in list(_client_cache.keys()):
entry = _client_cache.get(key)
if entry is None:
continue
cached = entry[0]
if cached is None:
continue
real = getattr(cached, "_real_client", None)
if cached is target or real is target:
del _client_cache[key]
evicted = True
return evicted
def _pool_cache_hint(
provider: str,
*,
main_runtime: Optional[Dict[str, Any]] = None,
) -> str:
"""Return a stable cache discriminator for pooled providers."""
normalized = _normalize_aux_provider(provider)
if normalized == "auto":
runtime = _normalize_main_runtime(main_runtime)
normalized = _normalize_aux_provider(runtime.get("provider") or _read_main_provider())
if normalized in {"", "auto", "custom"}:
return ""
entry = _peek_pool_entry(normalized)
if entry is None:
return ""
entry_id = str(getattr(entry, "id", "") or "").strip()
if not entry_id:
return ""
return f"{normalized}:{entry_id}"
def _pool_error_context(exc: Exception) -> Dict[str, Any]:
status = getattr(exc, "status_code", None)
payload: Dict[str, Any] = {"message": str(exc)}
if status is not None:
payload["status_code"] = status
return payload
def _recoverable_pool_provider(resolved_provider: str, client: Any) -> Optional[str]:
"""Infer which provider pool can recover the current auxiliary client."""
normalized = _normalize_aux_provider(resolved_provider)
if normalized not in {"", "auto", "custom"}:
return normalized
base = str(getattr(client, "base_url", "") or "")
if base_url_host_matches(base, "chatgpt.com"):
return "openai-codex"
if base_url_host_matches(base, "openrouter.ai"):
return "openrouter"
if base_url_host_matches(base, "inference-api.nousresearch.com"):
return "nous"
if base_url_host_matches(base, "api.anthropic.com"):
return "anthropic"
if base_url_host_matches(base, "api.githubcopilot.com"):
return "copilot"
if base_url_host_matches(base, "api.kimi.com"):
return "kimi-coding"
return None
def _recover_provider_pool(provider: str, exc: Exception) -> bool:
"""Try same-provider credential-pool recovery for auxiliary calls."""
normalized = _normalize_aux_provider(provider)
try:
pool = load_pool(normalized)
except Exception as load_exc:
logger.debug("Auxiliary client: could not load pool for %s recovery: %s", normalized, load_exc)
return False
if not pool or not pool.has_credentials():
return False
status_code = getattr(exc, "status_code", None)
error_context = _pool_error_context(exc)
if _is_auth_error(exc):
refreshed = pool.try_refresh_current()
if refreshed is not None:
_evict_cached_clients(normalized)
return True
next_entry = pool.mark_exhausted_and_rotate(
status_code=status_code if status_code is not None else 401,
error_context=error_context,
)
if next_entry is not None:
_evict_cached_clients(normalized)
return True
return False
if _is_payment_error(exc) or _is_rate_limit_error(exc):
fallback_status = 402 if _is_payment_error(exc) else 429
next_entry = pool.mark_exhausted_and_rotate(
status_code=status_code if status_code is not None else fallback_status,
error_context=error_context,
)
if next_entry is not None:
_evict_cached_clients(normalized)
return True
return False
def _retry_same_provider_sync(
*,
task: Optional[str],
resolved_provider: str,
resolved_model: Optional[str],
resolved_base_url: Optional[str],
resolved_api_key: Optional[str],
resolved_api_mode: Optional[str],
main_runtime: Optional[Dict[str, Any]],
final_model: Optional[str],
messages: list,
temperature: Optional[float],
max_tokens: Optional[int],
tools: Optional[list],
effective_timeout: float,
effective_extra_body: dict,
) -> Any:
if task == "vision":
_, retry_client, retry_model = resolve_vision_provider_client(
provider=resolved_provider,
model=final_model,
base_url=resolved_base_url,
api_key=resolved_api_key,
async_mode=False,
)
else:
retry_client, retry_model = _get_cached_client(
resolved_provider,
resolved_model,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
main_runtime=main_runtime,
)
if retry_client is None:
raise RuntimeError(
f"Auxiliary {task or 'call'}: provider {resolved_provider} could not be rebuilt after recovery"
)
retry_base = str(getattr(retry_client, "base_url", "") or "")
retry_kwargs = _build_call_kwargs(
resolved_provider,
retry_model or final_model,
messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
timeout=effective_timeout,
extra_body=effective_extra_body,
base_url=retry_base or resolved_base_url,
)
if _is_anthropic_compat_endpoint(resolved_provider, retry_base):
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
return _validate_llm_response(
retry_client.chat.completions.create(**retry_kwargs), task,
)
async def _retry_same_provider_async(
*,
task: Optional[str],
resolved_provider: str,
resolved_model: Optional[str],
resolved_base_url: Optional[str],
resolved_api_key: Optional[str],
resolved_api_mode: Optional[str],
final_model: Optional[str],
messages: list,
temperature: Optional[float],
max_tokens: Optional[int],
tools: Optional[list],
effective_timeout: float,
effective_extra_body: dict,
) -> Any:
if task == "vision":
_, retry_client, retry_model = resolve_vision_provider_client(
provider=resolved_provider,
model=final_model,
base_url=resolved_base_url,
api_key=resolved_api_key,
async_mode=True,
)
else:
retry_client, retry_model = _get_cached_client(
resolved_provider,
resolved_model,
async_mode=True,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
)
if retry_client is None:
raise RuntimeError(
f"Auxiliary {task or 'call'}: provider {resolved_provider} could not be rebuilt after recovery"
)
retry_base = str(getattr(retry_client, "base_url", "") or "")
retry_kwargs = _build_call_kwargs(
resolved_provider,
retry_model or final_model,
messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
timeout=effective_timeout,
extra_body=effective_extra_body,
base_url=retry_base or resolved_base_url,
)
if _is_anthropic_compat_endpoint(resolved_provider, retry_base):
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
return _validate_llm_response(
await retry_client.chat.completions.create(**retry_kwargs), task,
)
def _refresh_provider_credentials(provider: str) -> bool:
"""Refresh short-lived credentials for OAuth-backed auxiliary providers."""
normalized = _normalize_aux_provider(provider)
@@ -2424,10 +1980,6 @@ def _try_payment_fallback(
for label, try_fn in _get_provider_chain():
if label in skip_chain_labels:
continue
if _is_provider_unhealthy(label):
_log_skip_unhealthy(label, task)
tried.append(f"{label} (unhealthy)")
continue
client, model = try_fn()
if client is not None:
logger.info(
@@ -2496,7 +2048,7 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
main_provider = runtime_provider or _read_main_provider()
main_model = runtime_model or _read_main_model()
if (main_provider and main_model
and main_provider not in {"auto", ""}):
and main_provider not in ("auto", "")):
resolved_provider = main_provider
explicit_base_url = None
explicit_api_key = None
@@ -2504,34 +2056,21 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
resolved_provider = "custom"
explicit_base_url = runtime_base_url
explicit_api_key = runtime_api_key or None
# Skip Step-1 if the main provider was recently 402'd. The unhealthy
# cache TTL bounds how long we bypass it, so a topped-up account
# recovers automatically. If we tried Step-1 anyway, every aux call
# on a depleted main provider would pay one doomed 402 RTT before
# falling to Step-2.
main_chain_label = _normalize_chain_label(resolved_provider)
if main_chain_label and _is_provider_unhealthy(main_chain_label):
_log_skip_unhealthy(main_chain_label)
else:
client, resolved = resolve_provider_client(
resolved_provider,
main_model,
explicit_base_url=explicit_base_url,
explicit_api_key=explicit_api_key,
api_mode=runtime_api_mode or None,
)
if client is not None:
logger.info("Auxiliary auto-detect: using main provider %s (%s)",
main_provider, resolved or main_model)
return client, resolved or main_model
client, resolved = resolve_provider_client(
resolved_provider,
main_model,
explicit_base_url=explicit_base_url,
explicit_api_key=explicit_api_key,
api_mode=runtime_api_mode or None,
)
if client is not None:
logger.info("Auxiliary auto-detect: using main provider %s (%s)",
main_provider, resolved or main_model)
return client, resolved or main_model
# ── Step 2: aggregator / fallback chain ──────────────────────────────
tried = []
for label, try_fn in _get_provider_chain():
if _is_provider_unhealthy(label):
_log_skip_unhealthy(label)
tried.append(f"{label} (unhealthy)")
continue
client, model = try_fn()
if client is not None:
if tried:
@@ -3157,7 +2696,7 @@ def resolve_provider_client(
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
elif pconfig.auth_type in {"oauth_device_code", "oauth_external"}:
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
# OAuth providers — route through their specific try functions
if provider == "nous":
return resolve_provider_client("nous", model, async_mode)
@@ -3266,7 +2805,7 @@ def get_available_vision_backends() -> List[str]:
available: List[str] = []
# 1. Active provider — if the user configured a provider, try it first.
main_provider = _read_main_provider()
if main_provider and main_provider not in {"auto", ""}:
if main_provider and main_provider not in ("auto", ""):
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
if _strict_vision_backend_available(main_provider):
available.append(main_provider)
@@ -3312,7 +2851,7 @@ def resolve_vision_provider_client(
if resolved_base_url:
provider_for_base_override = (
requested if requested and requested not in {"", "auto"} else "custom"
requested if requested and requested not in ("", "auto") else "custom"
)
client, final_model = resolve_provider_client(
provider_for_base_override,
@@ -3340,7 +2879,7 @@ def resolve_vision_provider_client(
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in {"auto", ""}:
if main_provider and main_provider not in ("auto", ""):
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
if main_provider == "nous":
sync_client, default_model = _resolve_strict_vision_backend(
@@ -3494,8 +3033,7 @@ def _client_cache_key(
) -> tuple:
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime)
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint)
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision)
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -3828,7 +3366,7 @@ def _resolve_task_provider_model(
# (e.g. OPENROUTER_API_KEY) instead of locking into "custom".
return cfg_provider, resolved_model, cfg_base_url, None, resolved_api_mode
if cfg_provider and cfg_provider != "auto":
return cfg_provider, resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
return cfg_provider, resolved_model, None, None, resolved_api_mode
return "auto", resolved_model, None, None, resolved_api_mode
@@ -4026,7 +3564,7 @@ def _build_call_kwargs(
# Provider-specific extra_body
merged_extra = dict(extra_body or {})
if provider == "nous" or auxiliary_is_nous:
merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"])
merged_extra.setdefault("tags", []).extend(["product=hermes-agent"])
if merged_extra:
kwargs["extra_body"] = merged_extra
@@ -4146,7 +3684,7 @@ def call_llm(
# credentials were found, fail fast instead of silently routing
# through OpenRouter (which causes confusing 404s).
_explicit = (resolved_provider or "").strip().lower()
if _explicit and _explicit not in {"auto", "openrouter", "custom"}:
if _explicit and _explicit not in ("auto", "openrouter", "custom"):
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_explicit.upper()}_API_KEY environment "
@@ -4276,63 +3814,46 @@ def call_llm(
# ── Auth refresh retry ───────────────────────────────────────
if (_is_auth_error(first_err)
and resolved_provider not in {"auto", "", None}
and resolved_provider not in ("auto", "", None)
and not client_is_nous):
if _refresh_provider_credentials(resolved_provider):
logger.info(
"Auxiliary %s: refreshed %s credentials after auth error, retrying",
task or "call", resolved_provider,
)
return _retry_same_provider_sync(
task=task,
resolved_provider=resolved_provider,
resolved_model=resolved_model,
resolved_base_url=resolved_base_url,
resolved_api_key=resolved_api_key,
resolved_api_mode=resolved_api_mode,
main_runtime=main_runtime,
final_model=final_model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
effective_timeout=effective_timeout,
effective_extra_body=effective_extra_body,
retry_client, retry_model = (
resolve_vision_provider_client(
provider=resolved_provider,
model=final_model,
async_mode=False,
)[1:]
if task == "vision"
else _get_cached_client(
resolved_provider,
resolved_model,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
main_runtime=main_runtime,
)
)
# ── Same-provider credential-pool recovery ─────────────────────
pool_provider = _recoverable_pool_provider(resolved_provider, client)
if pool_provider and (_is_auth_error(first_err) or _is_payment_error(first_err) or _is_rate_limit_error(first_err)):
recovery_err = first_err
if _is_rate_limit_error(first_err):
try:
if retry_client is not None:
retry_kwargs = _build_call_kwargs(
resolved_provider,
retry_model or final_model,
messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
timeout=effective_timeout,
extra_body=effective_extra_body,
base_url=resolved_base_url,
)
_retry_base = str(getattr(retry_client, "base_url", "") or "")
if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
return _validate_llm_response(
client.chat.completions.create(**kwargs), task)
except Exception as retry_err:
if not (_is_auth_error(retry_err) or _is_payment_error(retry_err) or _is_rate_limit_error(retry_err)):
raise
recovery_err = retry_err
if _recover_provider_pool(pool_provider, recovery_err):
logger.info(
"Auxiliary %s: recovered %s via credential-pool rotation after %s",
task or "call", pool_provider, type(recovery_err).__name__,
)
return _retry_same_provider_sync(
task=task,
resolved_provider=resolved_provider,
resolved_model=resolved_model,
resolved_base_url=resolved_base_url,
resolved_api_key=resolved_api_key,
resolved_api_mode=resolved_api_mode,
main_runtime=main_runtime,
final_model=final_model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
effective_timeout=effective_timeout,
effective_extra_body=effective_extra_body,
)
retry_client.chat.completions.create(**retry_kwargs), task)
# ── Payment / credit exhaustion fallback ──────────────────────
# When the resolved provider returns 402 or a credit-related error,
@@ -4359,17 +3880,10 @@ def call_llm(
# Only try alternative providers when the user didn't explicitly
# configure this task's provider. Explicit provider = hard constraint;
# auto (the default) = best-effort fallback chain. (#7559)
is_auto = resolved_provider in {"auto", "", None}
is_auto = resolved_provider in ("auto", "", None)
if should_fallback and is_auto:
if _is_payment_error(first_err):
reason = "payment error"
# Resolve the actual provider label (resolved_provider may be
# "auto"; the client's base_url tells us which backend got the
# 402). Mark THAT label unhealthy so subsequent aux calls
# skip it instead of paying another doomed RTT.
_mark_provider_unhealthy(
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
)
elif _is_rate_limit_error(first_err):
reason = "rate limit"
else:
@@ -4387,17 +3901,6 @@ def call_llm(
base_url=str(getattr(fb_client, "base_url", "") or ""))
return _validate_llm_response(
fb_client.chat.completions.create(**fb_kwargs), task)
# Connection/timeout errors leave the cached client poisoned (closed
# httpx transport, half-read stream, dead async loop). Drop it from
# the cache regardless of whether we found a fallback above so the
# next auxiliary call rebuilds a fresh client instead of reusing the
# dead one. See issue #23432.
if _is_connection_error(first_err):
try:
_evict_cached_client_instance(client)
except Exception:
logger.debug("Auxiliary: cache eviction after connection error failed",
exc_info=True)
raise
@@ -4515,7 +4018,7 @@ async def async_call_llm(
)
if client is None:
_explicit = (resolved_provider or "").strip().lower()
if _explicit and _explicit not in {"auto", "openrouter", "custom"}:
if _explicit and _explicit not in ("auto", "openrouter", "custom"):
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_explicit.upper()}_API_KEY environment "
@@ -4626,61 +4129,45 @@ async def async_call_llm(
# ── Auth refresh retry (mirrors sync call_llm) ───────────────
if (_is_auth_error(first_err)
and resolved_provider not in {"auto", "", None}
and resolved_provider not in ("auto", "", None)
and not client_is_nous):
if _refresh_provider_credentials(resolved_provider):
logger.info(
"Auxiliary %s (async): refreshed %s credentials after auth error, retrying",
task or "call", resolved_provider,
)
return await _retry_same_provider_async(
task=task,
resolved_provider=resolved_provider,
resolved_model=resolved_model,
resolved_base_url=resolved_base_url,
resolved_api_key=resolved_api_key,
resolved_api_mode=resolved_api_mode,
final_model=final_model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
effective_timeout=effective_timeout,
effective_extra_body=effective_extra_body,
)
# ── Same-provider credential-pool recovery (mirrors sync) ─────
pool_provider = _recoverable_pool_provider(resolved_provider, client)
if pool_provider and (_is_auth_error(first_err) or _is_payment_error(first_err) or _is_rate_limit_error(first_err)):
recovery_err = first_err
if _is_rate_limit_error(first_err):
try:
if task == "vision":
_, retry_client, retry_model = resolve_vision_provider_client(
provider=resolved_provider,
model=final_model,
async_mode=True,
)
else:
retry_client, retry_model = _get_cached_client(
resolved_provider,
resolved_model,
async_mode=True,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
)
if retry_client is not None:
retry_kwargs = _build_call_kwargs(
resolved_provider,
retry_model or final_model,
messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
timeout=effective_timeout,
extra_body=effective_extra_body,
base_url=resolved_base_url,
)
_retry_base = str(getattr(retry_client, "base_url", "") or "")
if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
return _validate_llm_response(
await client.chat.completions.create(**kwargs), task)
except Exception as retry_err:
if not (_is_auth_error(retry_err) or _is_payment_error(retry_err) or _is_rate_limit_error(retry_err)):
raise
recovery_err = retry_err
if _recover_provider_pool(pool_provider, recovery_err):
logger.info(
"Auxiliary %s (async): recovered %s via credential-pool rotation after %s",
task or "call", pool_provider, type(recovery_err).__name__,
)
return await _retry_same_provider_async(
task=task,
resolved_provider=resolved_provider,
resolved_model=resolved_model,
resolved_base_url=resolved_base_url,
resolved_api_key=resolved_api_key,
resolved_api_mode=resolved_api_mode,
final_model=final_model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
effective_timeout=effective_timeout,
effective_extra_body=effective_extra_body,
)
await retry_client.chat.completions.create(**retry_kwargs), task)
# ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
should_fallback = (
@@ -4688,13 +4175,10 @@ async def async_call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
)
is_auto = resolved_provider in {"auto", "", None}
is_auto = resolved_provider in ("auto", "", None)
if should_fallback and is_auto:
if _is_payment_error(first_err):
reason = "payment error"
_mark_provider_unhealthy(
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
)
elif _is_rate_limit_error(first_err):
reason = "rate limit"
else:
@@ -4718,12 +4202,4 @@ async def async_call_llm(
fb_kwargs["model"] = async_fb_model
return _validate_llm_response(
await async_fb.chat.completions.create(**fb_kwargs), task)
# Mirror the sync path: drop poisoned clients on connection/timeout
# so the next aux call rebuilds. See issue #23432.
if _is_connection_error(first_err):
try:
_evict_cached_client_instance(client)
except Exception:
logger.debug("Auxiliary (async): cache eviction after connection error failed",
exc_info=True)
raise
+334
View File
@@ -0,0 +1,334 @@
"""OpenAI-compatible shim that forwards Hermes requests to ``codex exec --json``.
This adapter lets Hermes treat the OpenAI Codex CLI as a chat-style backend.
Each request spawns ``codex exec --json --ephemeral --dangerously-bypass-approvals-and-sandbox``,
parses the JSONL event stream, extracts the agent message text and token usage,
and converts the result into the minimal shape Hermes expects from an OpenAI client.
"""
from __future__ import annotations
import json
import logging
import os
import subprocess
import threading
import time
from pathlib import Path
from types import SimpleNamespace
from typing import Any
logger = logging.getLogger(__name__)
_CODEX_CLI_BASE_URL = "codex-cli://local"
_DEFAULT_TIMEOUT_SECONDS = 900.0
def _resolve_command() -> str:
return (
os.getenv("HERMES_CODEX_CLI_COMMAND", "").strip()
or os.getenv("CODEX_CLI_PATH", "").strip()
or "codex"
)
def _resolve_args() -> list[str]:
raw = os.getenv("HERMES_CODEX_CLI_ARGS", "").strip()
if not raw:
return [
"exec",
"--json",
"--ephemeral",
"--dangerously-bypass-approvals-and-sandbox",
"--skip-git-repo-check",
]
import shlex
return shlex.split(raw)
def _build_subprocess_env() -> dict[str, str]:
env = os.environ.copy()
# Preserve HOME so codex can find ~/.codex/auth.json
home = os.environ.get("HOME", "")
if not home:
home = os.path.expanduser("~")
if home and home != "~":
env["HOME"] = home
return env
def _parse_turn_completed_usage(event: dict[str, Any]) -> SimpleNamespace:
usage = event.get("usage") or {}
input_tokens = int(usage.get("input_tokens") or 0)
cached_tokens = int(usage.get("cached_input_tokens") or 0)
output_tokens = int(usage.get("output_tokens") or 0)
reasoning_tokens = int(usage.get("reasoning_output_tokens") or 0)
return SimpleNamespace(
prompt_tokens=input_tokens,
completion_tokens=output_tokens + reasoning_tokens,
total_tokens=input_tokens + output_tokens + reasoning_tokens,
prompt_tokens_details=SimpleNamespace(cached_tokens=cached_tokens),
)
class _CodexCLIChatCompletions:
def __init__(self, client: "CodexCLIClient"):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _CodexCLIChatNamespace:
def __init__(self, client: "CodexCLIClient"):
self.completions = _CodexCLIChatCompletions(client)
class CodexCLIClient:
"""Minimal OpenAI-client-compatible facade for Codex CLI."""
def __init__(
self,
*,
api_key: str | None = None,
base_url: str | None = None,
default_headers: dict[str, str] | None = None,
command: str | None = None,
args: list[str] | None = None,
**_: Any,
):
self.api_key = api_key or "codex-cli"
self.base_url = base_url or _CODEX_CLI_BASE_URL
self._default_headers = dict(default_headers or {})
self._command = command or _resolve_command()
self._args = list(args or _resolve_args())
self.chat = _CodexCLIChatNamespace(self)
self.is_closed = False
self._active_process: subprocess.Popen[str] | None = None
self._active_process_lock = threading.Lock()
def close(self) -> None:
proc: subprocess.Popen[str] | None
with self._active_process_lock:
proc = self._active_process
self._active_process = None
self.is_closed = True
if proc is None:
return
try:
proc.terminate()
proc.wait(timeout=2)
except Exception:
try:
proc.kill()
except Exception:
pass
def _build_prompt(self, messages: list[dict[str, Any]], model: str | None = None) -> str:
sections: list[str] = [
"You are being used as the active Codex CLI agent backend for Hermes.",
"Respond to the user's request directly. Do NOT call tools — Hermes handles tools.",
]
if model:
sections.append(f"Hermes requested model hint: {model}")
transcript: list[str] = []
for message in messages:
if not isinstance(message, dict):
continue
role = str(message.get("role") or "unknown").strip().lower()
content = message.get("content")
if content is None:
continue
if isinstance(content, list):
parts = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict) and "text" in item:
parts.append(str(item["text"]))
content = "\n".join(parts).strip()
if not content:
continue
label = {
"system": "System",
"user": "User",
"assistant": "Assistant",
"tool": "Tool",
}.get(role, role.title())
transcript.append(f"{label}:\n{content}")
if transcript:
sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
sections.append("Continue the conversation from the latest user request.")
return "\n\n".join(s.strip() for s in sections if s and s.strip())
def _create_chat_completion(
self,
*,
model: str | None = None,
messages: list[dict[str, Any]] | None = None,
timeout: float | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
**_: Any,
) -> Any:
prompt_text = self._build_prompt(messages or [], model=model)
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
if timeout is None:
effective_timeout = _DEFAULT_TIMEOUT_SECONDS
elif isinstance(timeout, (int, float)):
effective_timeout = float(timeout)
else:
candidates = [
getattr(timeout, attr, None)
for attr in ("read", "write", "connect", "pool", "timeout")
]
numeric = [float(v) for v in candidates if isinstance(v, (int, float))]
effective_timeout = max(numeric) if numeric else _DEFAULT_TIMEOUT_SECONDS
response_text, usage = self._run_prompt(prompt_text, timeout_seconds=effective_timeout)
assistant_message = SimpleNamespace(
content=response_text,
tool_calls=[],
reasoning=None,
reasoning_content=None,
reasoning_details=None,
)
choice = SimpleNamespace(message=assistant_message, finish_reason="stop")
return SimpleNamespace(
choices=[choice],
usage=usage,
model=model or "codex-cli",
)
def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, SimpleNamespace]:
cmd = [self._command] + self._args
# The prompt is a positional arg — pass it via stdin with pipe
try:
proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1,
env=_build_subprocess_env(),
)
except FileNotFoundError as exc:
raise RuntimeError(
f"Could not start Codex CLI command '{self._command}'. "
"Install Codex CLI (npm install -g @openai/codex) or set "
f"HERMES_CODEX_CLI_COMMAND / CODEX_CLI_PATH."
) from exc
if proc.stdin is None or proc.stdout is None:
proc.kill()
raise RuntimeError("Codex CLI process did not expose stdin/stdout pipes.")
self.is_closed = False
with self._active_process_lock:
self._active_process = proc
response_parts: list[str] = []
usage = SimpleNamespace(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
stderr_lines: list[str] = []
try:
# Write prompt to stdin and close it to signal end of input
proc.stdin.write(prompt_text)
proc.stdin.close()
deadline = time.monotonic() + timeout_seconds
stdout_thread = threading.Thread(target=lambda: None, daemon=True)
# Collect stdout lines
stdout_lines: list[str] = []
def _read_stdout():
if proc.stdout is None:
return
for line in proc.stdout:
stdout_lines.append(line.rstrip("\n"))
stdout_thread = threading.Thread(target=_read_stdout, daemon=True)
stdout_thread.start()
# We'll also collect stderr
stderr_output: list[str] = []
def _read_stderr():
if proc.stderr is None:
return
for line in proc.stderr:
stderr_output.append(line.rstrip("\n"))
stderr_thread = threading.Thread(target=_read_stderr, daemon=True)
stderr_thread.start()
# Wait for process to complete or timeout
remaining = deadline - time.monotonic()
while remaining > 0:
if proc.poll() is not None:
break
time.sleep(0.1)
remaining = deadline - time.monotonic()
if proc.poll() is None:
proc.kill()
raise TimeoutError("Timed out waiting for Codex CLI response.")
# Wait for threads to finish reading
stdout_thread.join(timeout=5)
stderr_thread.join(timeout=5)
# Parse JSONL output
agent_text = ""
for line in stdout_lines:
try:
event = json.loads(line)
except Exception:
# Non-JSON line (banner, status) — skip
continue
event_type = event.get("type", "")
if event_type == "item.completed":
item = event.get("item") or {}
if item.get("type") == "agent_message":
text = item.get("text") or ""
if text:
agent_text += text
elif event_type == "turn.completed":
usage = _parse_turn_completed_usage(event)
if agent_text:
response_parts.append(agent_text)
# Stderr with useful diagnostics
for line in stderr_output:
if line.strip():
stderr_lines.append(line)
if stderr_lines and not agent_text:
raise RuntimeError(
"Codex CLI produced no agent message. "
f"stderr: {'; '.join(stderr_lines[-5:])}"
)
return "\n".join(response_parts).strip(), usage
finally:
if proc.poll() is None:
try:
proc.kill()
except Exception:
pass
with self._active_process_lock:
if self._active_process is proc:
self._active_process = None
+1 -52
View File
@@ -410,29 +410,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
call_id = raw_tool_call_id.strip()
if not isinstance(call_id, str) or not call_id.strip():
continue
# Multimodal tool result: convert OpenAI-style content list into
# Responses ``function_call_output.output`` array. The Responses
# API accepts ``output`` as either a string or an array of
# ``input_text``/``input_image`` items. See
# https://developers.openai.com/api/reference/python/resources/responses/.
tool_content = msg.get("content")
output_value: Any
if isinstance(tool_content, list):
converted = _chat_content_to_responses_parts(
tool_content, role="user",
)
if converted:
output_value = converted
else:
output_value = ""
else:
output_value = str(tool_content or "")
items.append({
"type": "function_call_output",
"call_id": call_id,
"output": output_value,
"output": str(msg.get("content", "") or ""),
})
return items
@@ -485,38 +466,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
output = item.get("output", "")
if output is None:
output = ""
# Output may be a string OR an array of structured content
# items (input_text / input_image) for multimodal tool results.
# Both shapes are accepted by the Responses API. We preserve
# the array form when present.
if isinstance(output, list):
# Validate each item is a recognised content shape; drop
# anything else to avoid 4xx from the API.
cleaned: List[Dict[str, Any]] = []
for part in output:
if not isinstance(part, dict):
continue
ptype = part.get("type")
if ptype == "input_text":
text = part.get("text")
if isinstance(text, str) and text:
cleaned.append({"type": "input_text", "text": text})
elif ptype == "input_image":
url = part.get("image_url")
if isinstance(url, str) and url:
entry: Dict[str, Any] = {"type": "input_image", "image_url": url}
detail = part.get("detail")
if isinstance(detail, str) and detail.strip():
entry["detail"] = detail.strip()
cleaned.append(entry)
normalized.append(
{
"type": "function_call_output",
"call_id": call_id.strip(),
"output": cleaned if cleaned else "",
}
)
continue
if not isinstance(output, str):
output = str(output)
+15 -24
View File
@@ -23,7 +23,7 @@ import re
import time
from typing import Any, Dict, List, Optional
from agent.auxiliary_client import call_llm, _is_connection_error
from agent.auxiliary_client import call_llm
from agent.context_engine import ContextEngine
from agent.model_metadata import (
MINIMUM_CONTEXT_LENGTH,
@@ -167,7 +167,7 @@ def _strip_image_parts_from_parts(parts: Any) -> Any:
out.append(part)
continue
ptype = part.get("type")
if ptype in {"image", "image_url", "input_image"}:
if ptype in ("image", "image_url", "input_image"):
had_image = True
out.append({"type": "text", "text": "[screenshot removed to save context]"})
else:
@@ -274,8 +274,8 @@ def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) ->
mode = args.get("mode", "replace")
return f"[patch] {mode} in {path} ({content_len:,} chars result)"
if tool_name in {"browser_navigate", "browser_click", "browser_snapshot",
"browser_type", "browser_scroll", "browser_vision"}:
if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
"browser_type", "browser_scroll", "browser_vision"):
url = args.get("url", "")
ref = args.get("ref", "")
detail = f" {url}" if url else (f" ref={ref}" if ref else "")
@@ -304,7 +304,7 @@ def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) ->
code_preview += "..."
return f"[execute_code] `{code_preview}` ({line_count} lines output)"
if tool_name in {"skill_view", "skills_list", "skill_manage"}:
if tool_name in ("skill_view", "skills_list", "skill_manage"):
name = args.get("name", "?")
return f"[{tool_name}] name={name} ({content_len:,} chars)"
@@ -979,13 +979,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
_status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
_err_str = str(e).lower()
_is_model_not_found = (
_status in {404, 503}
_status in (404, 503)
or "model_not_found" in _err_str
or "does not exist" in _err_str
or "no available channel" in _err_str
)
_is_timeout = (
_status in {408, 429, 502, 504}
_status in (408, 429, 502, 504)
or "timeout" in _err_str
)
# Non-JSON / malformed-body responses from misconfigured providers
@@ -1000,14 +1000,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
isinstance(e, json.JSONDecodeError)
or "expecting value" in _err_str
)
# httpcore / httpx streaming premature-close errors surface as
# ConnectionError subclasses or plain Exception with characteristic
# substrings ("incomplete chunked read", "peer closed connection",
# "response ended prematurely", "unexpected eof"). These are
# transient network events; treat them like a timeout so we fall
# back to the main model instead of entering a 60-second cooldown.
# See issue #18458.
_is_streaming_closed = _is_connection_error(e)
if _is_json_decode and not _is_model_not_found and not _is_timeout:
logger.error(
"Context compression failed: auxiliary LLM returned a "
@@ -1020,7 +1012,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
e,
)
if (
(_is_model_not_found or _is_timeout or _is_json_decode or _is_streaming_closed)
(_is_model_not_found or _is_timeout or _is_json_decode)
and self.summary_model
and self.summary_model != self.model
and not getattr(self, "_summary_model_fallen_back", False)
@@ -1029,8 +1021,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
_reason = "returned invalid JSON"
elif _is_model_not_found:
_reason = "unavailable"
elif _is_streaming_closed:
_reason = "closed stream prematurely"
else:
_reason = "timed out"
self._fallback_to_main_for_compression(e, _reason)
@@ -1053,10 +1043,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
self._fallback_to_main_for_compression(e, "failed")
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
# Transient errors (timeout, rate limit, network, JSON decode,
# streaming premature-close) — shorter cooldown for JSON decode and
# streaming-closed since those conditions can self-resolve quickly.
_transient_cooldown = 30 if (_is_json_decode or _is_streaming_closed) else 60
# Transient errors (timeout, rate limit, network, JSON decode) —
# shorter cooldown for JSON decode since the body shape can flip
# back to valid quickly when an upstream proxy recovers.
_transient_cooldown = 30 if _is_json_decode else 60
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
err_text = str(e).strip() or e.__class__.__name__
if len(err_text) > 220:
@@ -1316,7 +1306,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Ensure we protect at least min_tail messages
fallback_cut = n - min_tail
cut_idx = min(cut_idx, fallback_cut)
if cut_idx > fallback_cut:
cut_idx = fallback_cut
# If the token budget would protect everything (small conversations),
# force a cut after the head so compression can still remove middle turns.
@@ -1479,7 +1470,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
first_tail_role = messages[compress_end].get("role", "user") if compress_end < n_messages else "user"
# Pick a role that avoids consecutive same-role with both neighbors.
# Priority: avoid colliding with head (already committed), then tail.
if last_head_role in {"assistant", "tool"}:
if last_head_role in ("assistant", "tool"):
summary_role = "user"
else:
summary_role = "assistant"
+1 -1
View File
@@ -149,7 +149,7 @@ class PooledCredential:
}
result: Dict[str, Any] = {}
for field_def in fields(self):
if field_def.name in {"provider", "extra"}:
if field_def.name in ("provider", "extra"):
continue
value = getattr(self, field_def.name)
if value is not None or field_def.name in _ALWAYS_EMIT:
-107
View File
@@ -72,7 +72,6 @@ def _default_state() -> Dict[str, Any]:
"last_run_at": None,
"last_run_duration_seconds": None,
"last_run_summary": None,
"last_run_summary_shown_at": None,
"last_report_path": None,
"paused": False,
"run_count": 0,
@@ -877,96 +876,6 @@ def _reconcile_classification(
return {"consolidated": consolidated, "pruned": pruned}
def _build_rename_summary(
*,
before_names: Set[str],
after_report: List[Dict[str, Any]],
tool_calls: List[Dict[str, Any]],
model_final: str,
) -> str:
"""Format the user-visible rename map for a curator run.
Renders the "where did my skills go?" lines that get appended to the
`final_summary` string fed to gateway/CLI receivers. Empty string when
nothing was archived this run most ticks are no-op and shouldn't add
extra log noise.
Format::
archived 4 skill(s):
pdf-extraction document-tools
docx-extraction document-tools
flaky-thing pruned (stale)
old-utility spreadsheet-ops
full report: hermes curator status
keep an umbrella stable: hermes curator pin document-tools
Cap is 10 entries so a 50-skill consolidation doesn't blow up
agent.log; the full list is always in REPORT.md. The pin hint only
appears when at least one consolidation produced an umbrella worth
pinning (pruned-only runs skip it).
"""
after_by_name = {r.get("name"): r for r in after_report if isinstance(r, dict)}
after_names = set(after_by_name.keys())
removed = sorted(before_names - after_names)
added = sorted(after_names - before_names)
if not removed:
return ""
heuristic = _classify_removed_skills(
removed=removed,
added=added,
after_names=after_names,
tool_calls=tool_calls,
)
model_block = _parse_structured_summary(model_final)
destinations = set(after_names) | set(added)
absorbed_declarations = _extract_absorbed_into_declarations(tool_calls)
classification = _reconcile_classification(
removed=removed,
heuristic=heuristic,
model_block=model_block,
destinations=destinations,
absorbed_declarations=absorbed_declarations,
)
consolidated = classification["consolidated"]
pruned = classification["pruned"]
SHOW = 10
lines: List[str] = []
total = len(consolidated) + len(pruned)
lines.append(f"archived {total} skill(s):")
shown = 0
for entry in consolidated:
if shown >= SHOW:
break
name = entry.get("name", "?")
into = entry.get("into", "?")
lines.append(f"{name}{into}")
shown += 1
for entry in pruned:
if shown >= SHOW:
break
name = entry.get("name", "?") if isinstance(entry, dict) else str(entry)
lines.append(f"{name} — pruned (stale)")
shown += 1
if total > SHOW:
lines.append(f" … and {total - SHOW} more")
lines.append("full report: hermes curator status")
# Pin hint — only surface it when there's actually a destination skill
# worth pinning. The umbrella skills that absorbed content are the natural
# candidates: pinning one tells future curator runs to leave it alone.
# Pruned-only runs don't get this hint (nothing surviving to pin).
if consolidated:
umbrellas = sorted({e.get("into") for e in consolidated if e.get("into")})
if umbrellas:
example = umbrellas[0]
lines.append(
f"keep an umbrella stable: hermes curator pin {example}"
)
return "\n".join(lines)
def _write_run_report(
*,
started_at: datetime,
@@ -1489,22 +1398,6 @@ def run_curator_review(
"error": str(e),
}
# Append the rename map (`old-name → umbrella`) to the user-visible
# summary so people don't have to dig into REPORT.md to find out where
# their skills went. Best-effort: classification is pure but never
# block the run on a formatting issue.
try:
rename_lines = _build_rename_summary(
before_names=before_names,
after_report=skill_usage.agent_created_report(),
tool_calls=llm_meta.get("tool_calls", []) or [],
model_final=llm_meta.get("final", "") or "",
)
if rename_lines:
final_summary = f"{final_summary}\n{rename_lines}"
except Exception as e:
logger.debug("Curator rename summary build failed: %s", e, exc_info=True)
elapsed = (datetime.now(timezone.utc) - start).total_seconds()
state2 = load_state()
state2["last_run_duration_seconds"] = elapsed
+8 -30
View File
@@ -83,7 +83,7 @@ class ClassifiedError:
@property
def is_auth(self) -> bool:
return self.reason in {FailoverReason.auth, FailoverReason.auth_permanent}
return self.reason in (FailoverReason.auth, FailoverReason.auth_permanent)
@@ -254,20 +254,6 @@ _THINKING_SIG_PATTERNS = [
"signature", # Combined with "thinking" check
]
# Message-string patterns that indicate a provider-side timeout even when
# the exception type is generic (e.g. RuntimeError from a local shim that
# wraps a subprocess timeout). Checked before the type-based transport
# heuristics so custom-provider "timed out" errors don't fall through to
# the unknown bucket and get misreported as empty responses.
_TIMEOUT_MESSAGE_PATTERNS = [
"timed out",
"turn timed out",
"request timed out",
"deadline exceeded",
"operation timed out",
"upstream timed out",
]
# Transport error type names
_TRANSPORT_ERROR_TYPES = frozenset({
"ReadTimeout", "ConnectTimeout", "PoolTimeout",
@@ -688,10 +674,10 @@ def _classify_by_status(
result_fn=result_fn,
)
if status_code in {500, 502}:
if status_code in (500, 502):
return result_fn(FailoverReason.server_error, retryable=True)
if status_code in {503, 529}:
if status_code in (503, 529):
return result_fn(FailoverReason.overloaded, retryable=True)
# Other 4xx — non-retryable
@@ -810,7 +796,7 @@ def _classify_400(
# Responses API (and some providers) use flat body: {"message": "..."}
if not err_body_msg:
err_body_msg = str(body.get("message") or "").strip().lower()
is_generic = len(err_body_msg) < 30 or err_body_msg in {"error", ""}
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
# Absolute token/message-count thresholds are only a proxy for smaller
# context windows. Large-context sessions can have many messages while
# still being far below their actual token budget.
@@ -841,14 +827,14 @@ def _classify_by_error_code(
"""Classify by structured error codes from the response body."""
code_lower = error_code.lower()
if code_lower in {"resource_exhausted", "throttled", "rate_limit_exceeded"}:
if code_lower in ("resource_exhausted", "throttled", "rate_limit_exceeded"):
return result_fn(
FailoverReason.rate_limit,
retryable=True,
should_rotate_credential=True,
)
if code_lower in {"insufficient_quota", "billing_not_active", "payment_required"}:
if code_lower in ("insufficient_quota", "billing_not_active", "payment_required"):
return result_fn(
FailoverReason.billing,
retryable=False,
@@ -856,14 +842,14 @@ def _classify_by_error_code(
should_fallback=True,
)
if code_lower in {"model_not_found", "model_not_available", "invalid_model"}:
if code_lower in ("model_not_found", "model_not_available", "invalid_model"):
return result_fn(
FailoverReason.model_not_found,
retryable=False,
should_fallback=True,
)
if code_lower in {"context_length_exceeded", "max_tokens_exceeded"}:
if code_lower in ("context_length_exceeded", "max_tokens_exceeded"):
return result_fn(
FailoverReason.context_overflow,
retryable=True,
@@ -977,14 +963,6 @@ def _classify_by_message(
should_fallback=True,
)
# Timeout message patterns — generic exception types (e.g. RuntimeError)
# raised by local shims or custom providers that internally wrap a
# subprocess/HTTP timeout. Classified as transport timeout so the retry
# loop rebuilds the client instead of treating the turn as an empty
# model response.
if any(p in error_msg for p in _TIMEOUT_MESSAGE_PATTERNS):
return result_fn(FailoverReason.timeout, retryable=True)
return None
+1 -1
View File
@@ -77,7 +77,7 @@ def _coerce_content_to_text(content: Any) -> str:
if p.get("type") == "text" and isinstance(p.get("text"), str):
pieces.append(p["text"])
# Multimodal (image_url, etc.) — stub for now; log and skip
elif p.get("type") in {"image_url", "input_audio"}:
elif p.get("type") in ("image_url", "input_audio"):
logger.debug("Dropping multimodal part (not yet supported): %s", p.get("type"))
return "\n".join(pieces)
return str(content)
-6
View File
@@ -945,12 +945,6 @@ class AsyncGeminiNativeClient:
self.api_key = sync_client.api_key
self.base_url = sync_client.base_url
self.chat = _AsyncGeminiChatNamespace(self)
# Expose the underlying sync client as _real_client so the auxiliary
# cache's eviction-by-leaf-client helper (#23482) can find and drop
# this async entry when the sync GeminiNativeClient is poisoned.
# GeminiNativeClient is itself the leaf (no OpenAI client beneath
# it), so we point at the sync_client directly.
self._real_client = sync_client
async def _create_chat_completion(self, **kwargs: Any) -> Any:
stream = bool(kwargs.get("stream"))
+4 -29
View File
@@ -39,45 +39,20 @@ from typing import Any
logger = logging.getLogger(__name__)
SUPPORTED_LANGUAGES: tuple[str, ...] = (
"en", "zh", "zh-hant", "ja", "de", "es", "fr", "tr", "uk",
"af", "ko", "it", "ga", "pt", "ru", "hu",
)
SUPPORTED_LANGUAGES: tuple[str, ...] = ("en", "zh", "ja", "de", "es", "fr", "tr", "uk")
DEFAULT_LANGUAGE = "en"
# Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
# get the right catalog instead of silently falling back to English.
_LANGUAGE_ALIASES: dict[str, str] = {
"english": "en", "en-us": "en", "en-gb": "en",
# Simplified Chinese — explicit codes route here; bare "chinese" / "mandarin"
# also default to Simplified since that's the larger user base.
"chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-hans": "zh", "zh-sg": "zh",
# Traditional Chinese — distinct catalog. Cover Taiwan / Hong Kong / Macau
# locale tags plus the common "traditional" alias.
"traditional-chinese": "zh-hant", "traditional_chinese": "zh-hant",
"zh-tw": "zh-hant", "zh-hk": "zh-hant", "zh-mo": "zh-hant",
"chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-tw": "zh", "zh-hans": "zh", "zh-hant": "zh",
"japanese": "ja", "jp": "ja", "ja-jp": "ja",
"german": "de", "deutsch": "de", "de-de": "de", "de-at": "de", "de-ch": "de",
"spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es", "es-ar": "es",
"german": "de", "deutsch": "de", "de-de": "de",
"spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es",
"french": "fr", "français": "fr", "france": "fr", "fr-fr": "fr", "fr-be": "fr", "fr-ca": "fr", "fr-ch": "fr",
"ukrainian": "uk", "ukrainisch": "uk", "українська": "uk", "uk-ua": "uk", "ua": "uk",
"turkish": "tr", "türkçe": "tr", "tr-tr": "tr",
# Afrikaans — South African Dutch-derived language; "af-ZA" is the common BCP-47 tag.
"afrikaans": "af", "af-za": "af",
# Korean
"korean": "ko", "한국어": "ko", "ko-kr": "ko",
# Italian
"italian": "it", "italiano": "it", "it-it": "it", "it-ch": "it",
# Irish (Gaeilge) — ga is the BCP-47 code
"irish": "ga", "gaeilge": "ga", "ga-ie": "ga",
# Portuguese — bare "portuguese" routes to European Portuguese; pt-br
# is in the same family but rendered identically here (no separate br catalog).
"portuguese": "pt", "português": "pt", "portugues": "pt",
"pt-pt": "pt", "pt-br": "pt", "brazilian": "pt", "brasileiro": "pt",
# Russian
"russian": "ru", "русский": "ru", "ru-ru": "ru",
# Hungarian
"hungarian": "hu", "magyar": "hu", "hu-hu": "hu",
}
_catalog_cache: dict[str, dict[str, str]] = {}
+4 -4
View File
@@ -76,7 +76,7 @@ def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
base_url = str(vision.get("base_url") or "").strip()
# "auto" / "" / blank = not explicit
if provider in {"", "auto"} and not model and not base_url:
if provider in ("", "auto") and not model and not base_url:
return False
return True
@@ -163,7 +163,7 @@ def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
if raw.startswith(b"\xff\xd8\xff"):
return "image/jpeg"
# GIF87a / GIF89a
if raw[:6] in {b"GIF87a", b"GIF89a"}:
if raw[:6] in (b"GIF87a", b"GIF89a"):
return "image/gif"
# WEBP: "RIFF" .... "WEBP"
if len(raw) >= 12 and raw[:4] == b"RIFF" and raw[8:12] == b"WEBP":
@@ -172,9 +172,9 @@ def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
if raw.startswith(b"BM"):
return "image/bmp"
# HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in (
b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
}:
):
return "image/heic"
return None
-106
View File
@@ -1,106 +0,0 @@
"""Language Server Protocol (LSP) integration for Hermes Agent.
Hermes runs full language servers (pyright, gopls, rust-analyzer,
typescript-language-server, etc.) as subprocesses and pipes their
``textDocument/publishDiagnostics`` output into the post-write lint
delta filter used by ``write_file`` and ``patch``.
LSP is **gated on git workspace detection** if the agent's cwd is
inside a git repository, LSP runs against that workspace; otherwise the
file_operations layer falls back to its existing in-process syntax
checks. This keeps users on user-home cwd's (e.g. Telegram gateway
chats) from spawning daemons they don't need.
Public API:
from agent.lsp import get_service
svc = get_service()
if svc and svc.enabled_for(path):
await svc.touch_file(path)
diags = svc.diagnostics_for(path)
The bulk of the wiring is internal most callers only need the layer
in :func:`tools.file_operations.FileOperations._check_lint_delta`,
which is already wired (see that module).
Architecture is documented in ``website/docs/user-guide/features/lsp.md``.
"""
from __future__ import annotations
import atexit
import logging
import threading
from typing import Optional
from agent.lsp.manager import LSPService
logger = logging.getLogger("agent.lsp")
_service: Optional[LSPService] = None
_atexit_registered = False
_service_lock = threading.Lock()
def get_service() -> Optional[LSPService]:
"""Return the process-wide LSP service singleton, or None when disabled.
The service is created lazily on first call. ``None`` is returned
when LSP is disabled in config, when no workspace can be detected,
or when the platform doesn't support subprocess-based LSP servers.
On first creation, registers an :mod:`atexit` handler that tears
down spawned language servers on Python exit so a long-running
CLI or gateway session doesn't leak pyright/gopls/etc. processes
when it terminates.
"""
global _service, _atexit_registered
if _service is not None:
return _service if _service.is_active() else None
with _service_lock:
if _service is not None:
return _service if _service.is_active() else None
_service = LSPService.create_from_config()
if not _atexit_registered:
# ``atexit`` handlers run in LIFO order on normal Python
# exit and on SystemExit, but NOT on os._exit() or
# uncaught signals. Language servers are stateless
# subprocesses — losing them on SIGKILL is fine; they'll
# be reaped by the kernel along with their parent. We
# care about clean exits where Python flushes stdio
# before terminating; without this hook every
# ``hermes chat`` exit would leak pyright processes that
# outlive the parent for a few seconds while their
# stdout buffers drain.
atexit.register(_atexit_shutdown)
_atexit_registered = True
return _service if (_service is not None and _service.is_active()) else None
def shutdown_service() -> None:
"""Tear down the LSP service if one was started.
Safe to call multiple times; safe to call when no service was created.
"""
global _service
with _service_lock:
svc = _service
_service = None
if svc is not None:
try:
svc.shutdown()
except Exception as e: # noqa: BLE001
logger.debug("LSP shutdown error: %s", e)
def _atexit_shutdown() -> None:
"""atexit-registered wrapper. Logs at debug because by the time
atexit fires the user has already seen the agent's final output —
a noisy shutdown line on top of that is just clutter."""
try:
shutdown_service()
except Exception as e: # noqa: BLE001
logger.debug("atexit LSP shutdown failed: %s", e)
__all__ = ["get_service", "shutdown_service", "LSPService"]
-308
View File
@@ -1,308 +0,0 @@
"""``hermes lsp`` CLI subcommand.
Subcommands:
- ``status`` show service state, configured servers, install status.
- ``install <server_id>`` eagerly install one server's binary.
- ``install-all`` try to install every server with a known recipe.
- ``restart`` tear down running clients so the next edit re-spawns.
- ``which <server_id>`` print the resolved binary path for one server.
- ``list`` print the registry of supported servers.
The handlers are kept here (rather than in
``hermes_cli/main.py``) so the LSP module ships self-contained.
"""
from __future__ import annotations
import argparse
import sys
from typing import Optional
def register_subparser(subparsers: argparse._SubParsersAction) -> None:
"""Wire the ``hermes lsp`` subcommand tree into the main argparse."""
parser = subparsers.add_parser(
"lsp",
help="Language Server Protocol management",
description=(
"Manage the LSP layer that powers post-write semantic "
"diagnostics in write_file/patch."
),
)
sub = parser.add_subparsers(dest="lsp_command")
sub_status = sub.add_parser("status", help="Show LSP service status")
sub_status.add_argument(
"--json", action="store_true", help="Emit machine-readable JSON"
)
sub_list = sub.add_parser("list", help="List supported language servers")
sub_list.add_argument(
"--installed-only",
action="store_true",
help="Only show servers whose binary is currently available",
)
sub_install = sub.add_parser("install", help="Install a server binary")
sub_install.add_argument("server", help="Server id (e.g. pyright, gopls)")
sub_install_all = sub.add_parser(
"install-all",
help="Install every server with a known auto-install recipe",
)
sub_install_all.add_argument(
"--include-manual",
action="store_true",
help="Even attempt servers marked manual-install (best effort)",
)
sub_restart = sub.add_parser(
"restart",
help="Tear down running LSP clients (next edit re-spawns)",
)
sub_which = sub.add_parser("which", help="Print binary path for a server")
sub_which.add_argument("server", help="Server id")
parser.set_defaults(func=run_lsp_command)
def run_lsp_command(args: argparse.Namespace) -> int:
"""Top-level dispatcher for ``hermes lsp <subcommand>``."""
sub = getattr(args, "lsp_command", None) or "status"
try:
if sub == "status":
return _cmd_status(getattr(args, "json", False))
if sub == "list":
return _cmd_list(getattr(args, "installed_only", False))
if sub == "install":
return _cmd_install(args.server)
if sub == "install-all":
return _cmd_install_all(getattr(args, "include_manual", False))
if sub == "restart":
return _cmd_restart()
if sub == "which":
return _cmd_which(args.server)
sys.stderr.write(f"unknown lsp subcommand: {sub}\n")
return 2
except KeyboardInterrupt:
return 130
def _cmd_status(emit_json: bool) -> int:
from agent.lsp import get_service
from agent.lsp.servers import SERVERS
from agent.lsp.install import detect_status
svc = get_service()
service_active = svc is not None
info = svc.get_status() if svc is not None else {"enabled": False}
if emit_json:
import json
payload = {
"service": info,
"registry": [
{
"server_id": s.server_id,
"extensions": list(s.extensions),
"description": s.description,
"binary_status": detect_status(_recipe_pkg_for(s.server_id)),
}
for s in SERVERS
],
}
sys.stdout.write(json.dumps(payload, indent=2) + "\n")
return 0
out = []
out.append("LSP Service")
out.append("===========")
out.append(f" enabled: {info.get('enabled', False)}")
if service_active:
out.append(f" wait_mode: {info.get('wait_mode')}")
out.append(f" wait_timeout: {info.get('wait_timeout')}s")
out.append(f" install_strategy:{info.get('install_strategy')}")
clients = info.get("clients") or []
if clients:
out.append(f" active clients: {len(clients)}")
for c in clients:
out.append(
f" - {c['server_id']:20s} state={c['state']:10s} root={c['workspace_root']}"
)
else:
out.append(" active clients: none")
broken = info.get("broken") or []
if broken:
out.append(f" broken pairs: {len(broken)}")
for b in broken:
out.append(f" - {b}")
disabled = info.get("disabled_servers") or []
if disabled:
out.append(f" disabled in cfg: {', '.join(disabled)}")
# Surface backend-tool gaps that aren't visible in the registry table:
# some servers spawn fine but emit no diagnostics without a sidecar
# binary (bash-language-server -> shellcheck).
backend_warnings = _backend_warnings()
if backend_warnings:
out.append("")
out.append("Backend warnings")
out.append("================")
for line in backend_warnings:
out.append(f" ! {line}")
out.append("")
out.append("Registered Servers")
out.append("==================")
for s in SERVERS:
pkg = _recipe_pkg_for(s.server_id)
status = detect_status(pkg)
marker = {
"installed": "",
"missing": "·",
"manual-only": "?",
}.get(status, " ")
ext_summary = ", ".join(list(s.extensions)[:5])
if len(s.extensions) > 5:
ext_summary += f", … (+{len(s.extensions) - 5})"
out.append(
f" {marker} {s.server_id:24s} [{status:11s}] {ext_summary}"
)
if s.description:
out.append(f" {s.description}")
sys.stdout.write("\n".join(out) + "\n")
return 0
def _cmd_list(installed_only: bool) -> int:
from agent.lsp.servers import SERVERS
from agent.lsp.install import detect_status
for s in SERVERS:
pkg = _recipe_pkg_for(s.server_id)
status = detect_status(pkg)
if installed_only and status != "installed":
continue
sys.stdout.write(
f"{s.server_id:24s} [{status:11s}] {','.join(s.extensions)}\n"
)
return 0
def _cmd_install(server_id: str) -> int:
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
pkg = _recipe_pkg_for(server_id)
pre_status = detect_status(pkg)
if pre_status == "installed":
sys.stdout.write(f"{server_id} already installed\n")
return 0
sys.stdout.write(f"installing {server_id} (pkg={pkg}) ...\n")
sys.stdout.flush()
bin_path = try_install(pkg, "auto")
if bin_path is None:
recipe = INSTALL_RECIPES.get(pkg)
if recipe and recipe.get("strategy") == "manual":
sys.stderr.write(
f"{server_id}: this server requires a manual install. "
f"See documentation.\n"
)
else:
sys.stderr.write(f"{server_id}: install failed (see logs).\n")
return 1
sys.stdout.write(f"installed: {bin_path}\n")
return 0
def _cmd_install_all(include_manual: bool) -> int:
from agent.lsp.servers import SERVERS
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
rc = 0
for s in SERVERS:
pkg = _recipe_pkg_for(s.server_id)
recipe = INSTALL_RECIPES.get(pkg)
if recipe is None:
continue
if recipe.get("strategy") == "manual" and not include_manual:
continue
if detect_status(pkg) == "installed":
sys.stdout.write(f" {s.server_id:24s} already installed\n")
continue
sys.stdout.write(f" installing {s.server_id} (pkg={pkg}) ... ")
sys.stdout.flush()
path = try_install(pkg, "auto")
if path:
sys.stdout.write(f"ok ({path})\n")
else:
sys.stdout.write("FAILED\n")
rc = 1
return rc
def _cmd_restart() -> int:
from agent.lsp import shutdown_service
shutdown_service()
sys.stdout.write("LSP service shut down. Next edit will respawn clients.\n")
return 0
def _cmd_which(server_id: str) -> int:
from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
import os
import shutil as _shutil
recipe = INSTALL_RECIPES.get(server_id)
bin_name = (recipe or {}).get("bin", server_id)
staged = hermes_lsp_bin_dir() / bin_name
if staged.exists():
sys.stdout.write(str(staged) + "\n")
return 0
on_path = _shutil.which(bin_name)
if on_path:
sys.stdout.write(on_path + "\n")
return 0
sys.stderr.write(f"{server_id}: not installed\n")
return 1
def _recipe_pkg_for(server_id: str) -> str:
"""Map a registry ``server_id`` to its install-recipe package key."""
# The mapping lives here (not in install.py) because it's a CLI
# convenience layer. Most server_ids are also their own recipe
# key, but a few differ (e.g. ``vue-language-server`` →
# ``@vue/language-server``).
aliases = {
"vue-language-server": "@vue/language-server",
"astro-language-server": "@astrojs/language-server",
"dockerfile-ls": "dockerfile-language-server-nodejs",
"typescript": "typescript-language-server",
}
return aliases.get(server_id, server_id)
def _backend_warnings() -> list:
"""Return human-readable notes about LSP backend tools that are missing
in a way that won't surface elsewhere.
Some language servers ship as thin wrappers around an external CLI for
actual diagnostics they spawn cleanly but never emit any errors when
the sidecar binary isn't on PATH. bash-language-server / shellcheck
is the load-bearing example.
Returned strings are short, actionable, and include the install
suggestion across common platforms.
"""
import shutil as _shutil
from agent.lsp.install import hermes_lsp_bin_dir
notes: list = []
bash_installed = _shutil.which("bash-language-server") is not None or (
(hermes_lsp_bin_dir() / "bash-language-server").exists()
)
if bash_installed and _shutil.which("shellcheck") is None:
notes.append(
"bash-language-server is installed but shellcheck is missing — "
"diagnostics will be empty (apt: shellcheck, brew: shellcheck, "
"scoop: shellcheck)."
)
return notes
-930
View File
@@ -1,930 +0,0 @@
"""Async LSP client over stdin/stdout.
One :class:`LSPClient` corresponds to one ``(language_server, workspace_root)``
pair exactly what OpenCode keys clients on, and the same shape Claude
Code uses. The client owns a child process, drives the JSON-RPC
exchange, and exposes:
- :meth:`open_file` / :meth:`change_file` text document sync
- :meth:`wait_for_diagnostics` block until the server emits fresh
diagnostics for a specific file (or a timeout fires)
- :meth:`diagnostics_for` read the current per-file diagnostic store
- :meth:`shutdown` graceful close + SIGTERM/SIGKILL fallback
The class is designed for async use from a single asyncio event loop.
The :class:`agent.lsp.manager.LSPService` runs an event loop in a
background thread so the synchronous file_operations layer can call
into it via :func:`agent.lsp.manager.LSPService.touch_file`.
Implementation notes:
- Push diagnostics are stored per-URI in :attr:`_push_diagnostics` from
``textDocument/publishDiagnostics`` notifications. Pull diagnostics
go in :attr:`_pull_diagnostics`. The merged view dedupes by content.
- Whole-document sync. Even when the server advertises incremental
sync, we send a single ``contentChanges`` entry replacing the
entire document. Pretending to be incremental while sending a
full replacement is well-tolerated by every major server and saves
range bookkeeping. See OpenCode's ``client.ts:584-659`` for the
same trick.
- The "touch-file dance": every ``open_file`` call also fires a
``workspace/didChangeWatchedFiles`` notification (CREATED on the
first open, CHANGED thereafter). Some servers (clangd, eslint)
only re-scan when this notification fires, even though the LSP spec
doesn't strictly require it.
- ``ContentModified`` (-32801) errors get retried with exponential
backoff up to 3 times. This matches Claude Code's
``LSPServerInstance.sendRequest``.
"""
from __future__ import annotations
import asyncio
import logging
import os
from pathlib import Path
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
from urllib.parse import quote, unquote
from agent.lsp.protocol import (
ERROR_CONTENT_MODIFIED,
ERROR_METHOD_NOT_FOUND,
LSPProtocolError,
LSPRequestError,
classify_message,
encode_message,
make_error_response,
make_notification,
make_request,
make_response,
read_message,
)
logger = logging.getLogger("agent.lsp.client")
# Timeouts (seconds) — mirror OpenCode's constants, scaled to seconds.
INITIALIZE_TIMEOUT = 45.0
DIAGNOSTICS_DOCUMENT_WAIT = 5.0
DIAGNOSTICS_FULL_WAIT = 10.0
DIAGNOSTICS_REQUEST_TIMEOUT = 3.0
PUSH_DEBOUNCE = 0.15
SHUTDOWN_GRACE = 1.0 # seconds between SIGTERM and SIGKILL
# Retry policy for transient ContentModified errors.
MAX_CONTENT_MODIFIED_RETRIES = 3
RETRY_BASE_DELAY = 0.5 # 0.5, 1.0, 2.0 — exponential
def file_uri(path: str) -> str:
"""Return ``file://`` URI for an absolute filesystem path.
Mirrors Node's ``pathToFileURL`` — handles spaces, unicode, and
Windows drive letters (``C:\\foo`` ``file:///C:/foo``).
"""
abs_path = os.path.abspath(path)
if os.name == "nt":
# Windows: backslash → forward slash, prepend extra slash so
# the drive letter shows up as part of the path component.
abs_path = abs_path.replace("\\", "/")
if not abs_path.startswith("/"):
abs_path = "/" + abs_path
return "file://" + quote(abs_path, safe="/:")
def uri_to_path(uri: str) -> str:
"""Inverse of :func:`file_uri`."""
if not uri.startswith("file://"):
return uri
raw = uri[len("file://"):]
if os.name == "nt" and raw.startswith("/") and len(raw) > 2 and raw[2] == ":":
raw = raw[1:] # strip leading slash before drive letter
return os.path.normpath(unquote(raw))
def _end_position(text: str) -> Dict[str, int]:
"""Return the LSP Position at the end of ``text``.
Used to construct a single-range "replace whole document" change
for ``textDocument/didChange`` regardless of the server's declared
sync mode.
"""
if not text:
return {"line": 0, "character": 0}
lines = text.splitlines(keepends=False)
last_line = len(lines) - 1
last_col = len(lines[-1]) if lines else 0
# If the text ends with a trailing newline, ``splitlines`` won't
# represent it. The end position is then the start of the next
# (empty) line — line index is len(lines), column 0.
if text.endswith(("\n", "\r")):
return {"line": last_line + 1, "character": 0}
return {"line": last_line, "character": last_col}
class LSPClient:
"""Async LSP client tied to one server process and one workspace root.
Lifecycle:
c = LSPClient(server_id, workspace_root, command, args, init_options)
await c.start() # spawn + initialize
ver = await c.open_file("/path/to/foo.py")
await c.wait_for_diagnostics("/path/to/foo.py", ver)
diags = c.diagnostics_for("/path/to/foo.py")
await c.shutdown()
"""
# ------------------------------------------------------------------
# construction + lifecycle
# ------------------------------------------------------------------
def __init__(
self,
*,
server_id: str,
workspace_root: str,
command: List[str],
env: Optional[Dict[str, str]] = None,
cwd: Optional[str] = None,
initialization_options: Optional[Dict[str, Any]] = None,
seed_diagnostics_on_first_push: bool = False,
) -> None:
self.server_id = server_id
self.workspace_root = workspace_root
self._command = list(command)
self._env = env
self._cwd = cwd or workspace_root
self._init_options = initialization_options or {}
self._seed_first_push = seed_diagnostics_on_first_push
# Process + streams
self._proc: Optional[asyncio.subprocess.Process] = None
self._stderr_task: Optional[asyncio.Task] = None
self._reader_task: Optional[asyncio.Task] = None
# Request/response correlation
self._next_id: int = 0
self._pending: Dict[int, asyncio.Future] = {}
# Server-side request handlers (server → client requests).
# Kept small and explicit; everything else returns method-not-found.
self._request_handlers: Dict[str, Callable[[Any], Awaitable[Any]]] = {
"window/workDoneProgress/create": self._handle_work_done_create,
"workspace/configuration": self._handle_workspace_configuration,
"client/registerCapability": self._handle_register_capability,
"client/unregisterCapability": self._handle_unregister_capability,
"workspace/workspaceFolders": self._handle_workspace_folders,
"workspace/diagnostic/refresh": self._handle_diagnostic_refresh,
}
# Notifications (server → client) we care about.
self._notification_handlers: Dict[str, Callable[[Any], None]] = {
"textDocument/publishDiagnostics": self._handle_publish_diagnostics,
# Everything else (window/showMessage, $/progress, etc.)
# is silently dropped by default.
}
# Tracked file state — required for didChange version bumps.
self._files: Dict[str, Dict[str, Any]] = {}
# Diagnostic stores, keyed by file path (NOT URI).
self._push_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
self._pull_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
# Per-path "last published" time so wait-for-fresh logic works.
self._published: Dict[str, float] = {}
# Per-path version of the latest push (matches our didChange
# version when the server respects it).
self._published_version: Dict[str, int] = {}
# First-push seen flag, for typescript-style seed-on-first-push.
self._first_push_seen: Set[str] = set()
# Capability registrations — only diagnostic ones are tracked.
self._diagnostic_registrations: Dict[str, Dict[str, Any]] = {}
# State machine
self._state: str = "stopped"
self._initialize_result: Optional[Dict[str, Any]] = None
self._sync_kind: int = 1 # 1=Full, 2=Incremental
self._stopping: bool = False
# Push event for waiters.
self._push_event = asyncio.Event()
# Monotonic counter incremented on every publishDiagnostics push.
# Waiters snapshot it on entry and treat any increase as
# "something happened, recheck the predicate". Avoids the
# asyncio.Event sticky-state trap.
self._push_counter = 0
# Registration change event so wait_for_diagnostics can re-loop
# when the server announces a new dynamic provider.
self._registration_event = asyncio.Event()
@property
def is_running(self) -> bool:
return self._state == "running" and self._proc is not None and self._proc.returncode is None
@property
def state(self) -> str:
return self._state
async def start(self) -> None:
"""Spawn the server and complete the initialize handshake.
Raises any exception encountered during spawn/init. On failure
the process is killed and the client is left in state
``"error"`` re-call ``start()`` to retry.
"""
if self._state in ("running", "starting"):
return
self._state = "starting"
try:
await self._spawn()
await self._initialize()
self._state = "running"
except Exception:
self._state = "error"
await self._cleanup_process()
raise
async def _spawn(self) -> None:
env = dict(os.environ)
if self._env:
env.update(self._env)
try:
self._proc = await asyncio.create_subprocess_exec(
self._command[0],
*self._command[1:],
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
cwd=self._cwd,
)
except FileNotFoundError as e:
raise LSPProtocolError(
f"LSP server binary not found: {self._command[0]} ({e})"
) from e
# Drain stderr at debug level — if we don't, the pipe buffer
# fills and the server hangs.
self._stderr_task = asyncio.create_task(self._drain_stderr())
# Start the reader loop.
self._reader_task = asyncio.create_task(self._reader_loop())
async def _drain_stderr(self) -> None:
if self._proc is None or self._proc.stderr is None:
return
try:
while True:
line = await self._proc.stderr.readline()
if not line:
break
text = line.decode("utf-8", errors="replace").rstrip()
if text:
logger.debug("[%s] stderr: %s", self.server_id, text[:1000])
except (asyncio.CancelledError, OSError):
pass
async def _reader_loop(self) -> None:
if self._proc is None or self._proc.stdout is None:
return
try:
while True:
msg = await read_message(self._proc.stdout)
if msg is None:
logger.debug("[%s] server closed stdout cleanly", self.server_id)
break
kind, key = classify_message(msg)
if kind == "response":
self._dispatch_response(key, msg)
elif kind == "request":
asyncio.create_task(self._dispatch_request(key, msg))
elif kind == "notification":
self._dispatch_notification(key, msg)
else:
logger.warning("[%s] dropping invalid message: %r", self.server_id, msg)
except LSPProtocolError as e:
logger.warning("[%s] protocol error in reader loop: %s", self.server_id, e)
except (asyncio.CancelledError, OSError):
pass
finally:
# Wake up any pending requests so they can fail fast.
for fut in list(self._pending.values()):
if not fut.done():
fut.set_exception(LSPProtocolError("server connection closed"))
self._pending.clear()
async def _initialize(self) -> None:
params = {
"rootUri": file_uri(self.workspace_root),
"rootPath": self.workspace_root,
"processId": os.getpid(),
"workspaceFolders": [
{"name": "workspace", "uri": file_uri(self.workspace_root)}
],
"initializationOptions": self._init_options,
"capabilities": {
"window": {"workDoneProgress": True},
"workspace": {
"configuration": True,
"workspaceFolders": True,
"didChangeWatchedFiles": {"dynamicRegistration": True},
"diagnostics": {"refreshSupport": False},
},
"textDocument": {
"synchronization": {
"dynamicRegistration": False,
"didOpen": True,
"didChange": True,
"didSave": True,
"willSave": False,
"willSaveWaitUntil": False,
},
"diagnostic": {
"dynamicRegistration": True,
"relatedDocumentSupport": True,
},
"publishDiagnostics": {
"relatedInformation": True,
"tagSupport": {"valueSet": [1, 2]},
"versionSupport": True,
"codeDescriptionSupport": True,
"dataSupport": False,
},
"hover": {"contentFormat": ["markdown", "plaintext"]},
"definition": {"linkSupport": True},
"references": {},
"documentSymbol": {"hierarchicalDocumentSymbolSupport": True},
},
"general": {"positionEncodings": ["utf-16"]},
},
}
result = await asyncio.wait_for(
self._send_request("initialize", params),
timeout=INITIALIZE_TIMEOUT,
)
self._initialize_result = result
self._sync_kind = self._extract_sync_kind(result.get("capabilities") or {})
await self._send_notification("initialized", {})
if self._init_options:
# Some servers (vtsls, eslint) want config pushed via
# didChangeConfiguration even if it was sent in
# initializationOptions.
await self._send_notification(
"workspace/didChangeConfiguration",
{"settings": self._init_options},
)
@staticmethod
def _extract_sync_kind(capabilities: dict) -> int:
sync = capabilities.get("textDocumentSync")
if isinstance(sync, int):
return sync
if isinstance(sync, dict):
change = sync.get("change")
if isinstance(change, int):
return change
return 1 # default to Full
async def shutdown(self) -> None:
"""Best-effort graceful shutdown.
Sends ``shutdown`` + ``exit``, then SIGTERMs/SIGKILLs the
process if it doesn't exit cleanly. Idempotent.
"""
if self._stopping:
return
self._stopping = True
try:
if self.is_running:
try:
await asyncio.wait_for(self._send_request("shutdown", None), timeout=2.0)
except (asyncio.TimeoutError, LSPRequestError, LSPProtocolError):
pass
try:
await self._send_notification("exit", None)
except Exception:
pass
finally:
self._state = "stopped"
await self._cleanup_process()
async def _cleanup_process(self) -> None:
if self._reader_task is not None and not self._reader_task.done():
self._reader_task.cancel()
try:
await self._reader_task
except (asyncio.CancelledError, Exception): # noqa: BLE001
pass
if self._stderr_task is not None and not self._stderr_task.done():
self._stderr_task.cancel()
try:
await self._stderr_task
except (asyncio.CancelledError, Exception): # noqa: BLE001
pass
proc = self._proc
self._proc = None
if proc is None:
return
if proc.returncode is None:
try:
proc.terminate()
try:
await asyncio.wait_for(proc.wait(), timeout=SHUTDOWN_GRACE)
except asyncio.TimeoutError:
try:
proc.kill()
await proc.wait()
except ProcessLookupError:
pass
except ProcessLookupError:
pass
# ------------------------------------------------------------------
# request / notification plumbing
# ------------------------------------------------------------------
async def _send_request(self, method: str, params: Any) -> Any:
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
raise LSPProtocolError(f"cannot send {method!r}: stdin closed")
loop = asyncio.get_running_loop()
req_id = self._next_id
self._next_id += 1
fut: asyncio.Future = loop.create_future()
self._pending[req_id] = fut
try:
self._proc.stdin.write(encode_message(make_request(req_id, method, params)))
await self._proc.stdin.drain()
except (BrokenPipeError, ConnectionResetError, OSError) as e:
self._pending.pop(req_id, None)
raise LSPProtocolError(f"send failed for {method!r}: {e}") from e
try:
return await fut
finally:
self._pending.pop(req_id, None)
async def _send_request_with_retry(self, method: str, params: Any, *, timeout: float) -> Any:
"""Send a request, retrying on ``ContentModified`` (-32801).
Other errors propagate. The retry policy matches Claude Code's
``LSPServerInstance.sendRequest`` 3 attempts with delays
0.5s, 1.0s, 2.0s.
"""
for attempt in range(MAX_CONTENT_MODIFIED_RETRIES + 1):
try:
return await asyncio.wait_for(self._send_request(method, params), timeout=timeout)
except LSPRequestError as e:
if e.code == ERROR_CONTENT_MODIFIED and attempt < MAX_CONTENT_MODIFIED_RETRIES:
await asyncio.sleep(RETRY_BASE_DELAY * (2 ** attempt))
continue
raise
async def _send_notification(self, method: str, params: Any) -> None:
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
return
try:
self._proc.stdin.write(encode_message(make_notification(method, params)))
await self._proc.stdin.drain()
except (BrokenPipeError, ConnectionResetError, OSError) as e:
logger.debug("[%s] notify %s failed: %s", self.server_id, method, e)
async def _send_response(self, req_id: Any, result: Any) -> None:
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
return
try:
self._proc.stdin.write(encode_message(make_response(req_id, result)))
await self._proc.stdin.drain()
except (BrokenPipeError, ConnectionResetError, OSError):
pass
async def _send_error_response(self, req_id: Any, code: int, message: str) -> None:
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
return
try:
self._proc.stdin.write(encode_message(make_error_response(req_id, code, message)))
await self._proc.stdin.drain()
except (BrokenPipeError, ConnectionResetError, OSError):
pass
def _dispatch_response(self, req_id: int, msg: dict) -> None:
fut = self._pending.get(req_id)
if fut is None or fut.done():
return
if "error" in msg:
err = msg["error"] or {}
fut.set_exception(
LSPRequestError(
code=int(err.get("code", -32000)),
message=str(err.get("message", "unknown")),
data=err.get("data"),
)
)
else:
fut.set_result(msg.get("result"))
async def _dispatch_request(self, req_id: Any, msg: dict) -> None:
method = msg.get("method", "")
params = msg.get("params")
handler = self._request_handlers.get(method)
if handler is None:
await self._send_error_response(req_id, ERROR_METHOD_NOT_FOUND, f"method not found: {method}")
return
try:
result = await handler(params)
except Exception as e: # noqa: BLE001 — protocol must not blow up
logger.warning("[%s] request handler %s failed: %s", self.server_id, method, e)
await self._send_error_response(req_id, -32000, f"handler failed: {e}")
return
await self._send_response(req_id, result)
def _dispatch_notification(self, method: str, msg: dict) -> None:
handler = self._notification_handlers.get(method)
if handler is None:
return
try:
handler(msg.get("params"))
except Exception as e: # noqa: BLE001
logger.debug("[%s] notification handler %s failed: %s", self.server_id, method, e)
# ------------------------------------------------------------------
# built-in server-→-client request handlers
# ------------------------------------------------------------------
async def _handle_work_done_create(self, params: Any) -> Any:
# Acknowledge progress tokens — required by some servers.
return None
async def _handle_workspace_configuration(self, params: Any) -> Any:
# Walk dotted sections through initializationOptions. Mirrors
# OpenCode's `client.ts:198-220` — return null when missing.
if not isinstance(params, dict):
return [None]
items = params.get("items") or []
out: List[Any] = []
for item in items:
if not isinstance(item, dict):
out.append(None)
continue
section = item.get("section")
if not section or not self._init_options:
out.append(self._init_options or None)
continue
cur: Any = self._init_options
for part in str(section).split("."):
if isinstance(cur, dict) and part in cur:
cur = cur[part]
else:
cur = None
break
out.append(cur)
return out
async def _handle_register_capability(self, params: Any) -> Any:
if not isinstance(params, dict):
return None
for reg in params.get("registrations") or []:
if not isinstance(reg, dict):
continue
method = reg.get("method")
reg_id = reg.get("id")
if method == "textDocument/diagnostic" and reg_id:
self._diagnostic_registrations[str(reg_id)] = reg
self._registration_event.set()
return None
async def _handle_unregister_capability(self, params: Any) -> Any:
if not isinstance(params, dict):
return None
for unreg in params.get("unregisterations") or []:
if not isinstance(unreg, dict):
continue
reg_id = unreg.get("id")
if reg_id:
self._diagnostic_registrations.pop(str(reg_id), None)
return None
async def _handle_workspace_folders(self, params: Any) -> Any:
return [{"name": "workspace", "uri": file_uri(self.workspace_root)}]
async def _handle_diagnostic_refresh(self, params: Any) -> Any:
# We don't honour refresh — we re-pull on every touchFile.
return None
# ------------------------------------------------------------------
# publishDiagnostics handler
# ------------------------------------------------------------------
def _handle_publish_diagnostics(self, params: Any) -> None:
if not isinstance(params, dict):
return
uri = params.get("uri")
if not isinstance(uri, str):
return
path = uri_to_path(uri)
diagnostics = params.get("diagnostics") or []
if not isinstance(diagnostics, list):
diagnostics = []
version = params.get("version")
loop_time = asyncio.get_event_loop().time()
if self._seed_first_push and path not in self._first_push_seen:
# First push: seed without firing the event so a waiter
# doesn't resolve on the very first push (which arrives
# before the user-triggered didChange could've produced
# fresh diagnostics).
self._first_push_seen.add(path)
self._push_diagnostics[path] = diagnostics
self._published[path] = loop_time
if isinstance(version, int):
self._published_version[path] = version
return
self._push_diagnostics[path] = diagnostics
self._published[path] = loop_time
if isinstance(version, int):
self._published_version[path] = version
self._first_push_seen.add(path)
# Bump the monotonic push counter and wake every waiter. We
# keep the Event sticky-set so any wait already in progress
# resolves; waiters re-check their predicate after waking and
# decide whether to keep waiting. ``_push_counter`` is what
# they actually compare against to detect a fresh event.
self._push_counter += 1
self._push_event.set()
# ------------------------------------------------------------------
# public file-sync API
# ------------------------------------------------------------------
async def open_file(self, path: str, *, language_id: str = "plaintext") -> int:
"""Send didOpen (first time) or didChange (subsequent) for ``path``.
Returns the new document version number that the agent's
``wait_for_diagnostics`` should match against.
"""
if not self.is_running:
raise LSPProtocolError("client not running")
abs_path = os.path.abspath(path)
try:
text = Path(abs_path).read_text(encoding="utf-8", errors="replace")
except OSError as e:
raise LSPProtocolError(f"cannot read {abs_path}: {e}") from e
uri = file_uri(abs_path)
existing = self._files.get(abs_path)
if existing is not None:
# Re-open: bump version, fire didChangeWatchedFiles + didChange.
await self._send_notification(
"workspace/didChangeWatchedFiles",
{"changes": [{"uri": uri, "type": 2}]}, # 2 = CHANGED
)
new_version = existing["version"] + 1
old_text = existing["text"]
content_changes: List[Dict[str, Any]]
if self._sync_kind == 2:
content_changes = [
{
"range": {
"start": {"line": 0, "character": 0},
"end": _end_position(old_text),
},
"text": text,
}
]
else:
content_changes = [{"text": text}]
await self._send_notification(
"textDocument/didChange",
{
"textDocument": {"uri": uri, "version": new_version},
"contentChanges": content_changes,
},
)
self._files[abs_path] = {"version": new_version, "text": text}
return new_version
# First open: didChangeWatchedFiles CREATED + didOpen.
await self._send_notification(
"workspace/didChangeWatchedFiles",
{"changes": [{"uri": uri, "type": 1}]}, # 1 = CREATED
)
# Clear any stale push/pull entries — fresh open should start
# from scratch.
self._push_diagnostics.pop(abs_path, None)
self._pull_diagnostics.pop(abs_path, None)
self._published.pop(abs_path, None)
self._published_version.pop(abs_path, None)
await self._send_notification(
"textDocument/didOpen",
{
"textDocument": {
"uri": uri,
"languageId": language_id,
"version": 0,
"text": text,
}
},
)
self._files[abs_path] = {"version": 0, "text": text}
return 0
async def save_file(self, path: str) -> None:
"""Send didSave for ``path``. Some linters re-scan only on save."""
if not self.is_running:
return
abs_path = os.path.abspath(path)
await self._send_notification(
"textDocument/didSave",
{"textDocument": {"uri": file_uri(abs_path)}},
)
# ------------------------------------------------------------------
# diagnostics: pull + wait
# ------------------------------------------------------------------
async def _pull_document_diagnostics(self, path: str) -> None:
"""Send ``textDocument/diagnostic`` for one file.
Stores results into :attr:`_pull_diagnostics`. Silently
no-ops on errors (server may not support the pull endpoint).
"""
try:
params: Dict[str, Any] = {
"textDocument": {"uri": file_uri(os.path.abspath(path))}
}
result = await self._send_request_with_retry(
"textDocument/diagnostic",
params,
timeout=DIAGNOSTICS_REQUEST_TIMEOUT,
)
except (LSPRequestError, LSPProtocolError, asyncio.TimeoutError) as e:
logger.debug("[%s] document diagnostic pull failed: %s", self.server_id, e)
return
if not isinstance(result, dict):
return
items = result.get("items")
if isinstance(items, list):
self._pull_diagnostics[os.path.abspath(path)] = items
related = result.get("relatedDocuments")
if isinstance(related, dict):
for uri, sub in related.items():
if not isinstance(sub, dict):
continue
sub_items = sub.get("items")
if isinstance(sub_items, list):
self._pull_diagnostics[uri_to_path(uri)] = sub_items
async def wait_for_diagnostics(
self,
path: str,
version: int,
*,
mode: str = "document",
) -> None:
"""Wait for the server to publish diagnostics for ``path`` at ``version``.
``mode`` is ``"document"`` (5s budget, document pulls) or
``"full"`` (10s budget, also workspace pulls). Best-effort
returns silently on timeout. Does NOT throw if the server
doesn't support pull diagnostics; we still get the push side.
"""
budget = DIAGNOSTICS_FULL_WAIT if mode == "full" else DIAGNOSTICS_DOCUMENT_WAIT
deadline = asyncio.get_event_loop().time() + budget
abs_path = os.path.abspath(path)
while True:
remaining = deadline - asyncio.get_event_loop().time()
if remaining <= 0:
return
# Concurrent: document pull + push wait.
pull_task = asyncio.create_task(self._pull_document_diagnostics(abs_path))
push_task = asyncio.create_task(self._wait_for_fresh_push(abs_path, version, remaining))
done, pending = await asyncio.wait(
{pull_task, push_task},
timeout=remaining,
return_when=asyncio.FIRST_COMPLETED,
)
for t in pending:
t.cancel()
for t in pending:
try:
await t
except (asyncio.CancelledError, Exception): # noqa: BLE001
pass
# If we got a fresh push for our version, we're done.
current_v = self._published_version.get(abs_path)
if abs_path in self._published and (
current_v is None or current_v >= version
):
return
# Pull may have populated _pull_diagnostics — that's also
# success.
if abs_path in self._pull_diagnostics:
return
# Loop until budget runs out.
async def _wait_for_fresh_push(self, path: str, version: int, timeout: float) -> None:
"""Wait until a publishDiagnostics arrives for ``path`` at ``version``+."""
deadline = asyncio.get_event_loop().time() + timeout
baseline = self._push_counter
while True:
current_v = self._published_version.get(path)
if path in self._published and (current_v is None or current_v >= version):
# Debounce — wait a tick in case more diagnostics arrive
# immediately after. TS often emits in pairs. We
# snapshot the counter so we wake on a *new* push, not
# on the one that satisfied us a moment ago.
debounce_baseline = self._push_counter
debounce_deadline = asyncio.get_event_loop().time() + PUSH_DEBOUNCE
while self._push_counter == debounce_baseline:
remaining = debounce_deadline - asyncio.get_event_loop().time()
if remaining <= 0:
break
self._push_event.clear()
try:
await asyncio.wait_for(self._push_event.wait(), timeout=remaining)
except asyncio.TimeoutError:
break
return
remaining = deadline - asyncio.get_event_loop().time()
if remaining <= 0:
return
if self._push_counter > baseline:
# New event arrived but predicate still false — re-check
# immediately without waiting again.
baseline = self._push_counter
continue
self._push_event.clear()
try:
await asyncio.wait_for(self._push_event.wait(), timeout=min(remaining, 0.5))
except asyncio.TimeoutError:
continue
def diagnostics_for(self, path: str) -> List[Dict[str, Any]]:
"""Return current merged + deduped diagnostics for one file.
Diagnostics from push and pull stores are concatenated and
deduplicated by ``(severity, code, message, range)`` content
key. Empty list if the server hasn't published anything.
"""
abs_path = os.path.abspath(path)
push = self._push_diagnostics.get(abs_path) or []
pull = self._pull_diagnostics.get(abs_path) or []
return _dedupe(push, pull)
def _dedupe(*lists: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
seen: Set[str] = set()
out: List[Dict[str, Any]] = []
for lst in lists:
for d in lst:
if not isinstance(d, dict):
continue
key = _diagnostic_key(d)
if key in seen:
continue
seen.add(key)
out.append(d)
return out
def _diagnostic_key(d: Dict[str, Any]) -> str:
"""Content-equality key for a diagnostic.
Matches the structural-equality used in claude-code's
``areDiagnosticsEqual`` message + severity + source + code +
range coords. The range is reduced to a tuple to keep the key
stable across dict orderings.
"""
rng = d.get("range") or {}
start = rng.get("start") or {}
end = rng.get("end") or {}
code = d.get("code")
if code is not None and not isinstance(code, str):
code = str(code)
return "\x00".join(
[
str(d.get("severity") or 1),
str(code or ""),
str(d.get("source") or ""),
str(d.get("message") or "").strip(),
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
]
)
__all__ = [
"LSPClient",
"file_uri",
"uri_to_path",
"INITIALIZE_TIMEOUT",
"DIAGNOSTICS_DOCUMENT_WAIT",
"DIAGNOSTICS_FULL_WAIT",
]
-213
View File
@@ -1,213 +0,0 @@
"""Structured logging with steady-state silence for the LSP layer.
The LSP layer fires on every write_file/patch. In a busy session
that's hundreds of events. We want users to be able to ``rg`` the
log for "did LSP fire on that edit?" without drowning in noise.
The level model:
- ``DEBUG`` for steady-state events that have no novel signal:
``clean``, ``feature off``, ``extension not mapped``, ``no project
root for already-announced file``, ``server unavailable for
already-announced binary``. These never reach ``agent.log`` at the
default INFO threshold.
- ``INFO`` for state transitions worth surfacing exactly once per
session: ``active for <root>`` the first time a (server_id,
workspace_root) client starts, ``no project root for <path>``
the first time we see that file. Plus every diagnostic event
(those are inherently rare and per-edit, exactly what users grep
for).
- ``WARNING`` for action-required failures: ``server unavailable``
(binary not on PATH) the first time per (server_id, binary),
``no server configured`` once per language. Per-call WARNING for
timeouts and unexpected bridge exceptions.
The dedup is in-process module-level sets. Each set grows at most by
the number of distinct (server_id, root) and (server_id, binary)
pairs touched in one Python process bytes of memory in even an
aggressive monorepo session. Bounded LRU was rejected: evicting an
entry would risk re-firing the WARNING/INFO line we explicitly want
to suppress.
Grep recipe::
tail -f ~/.hermes/logs/agent.log | rg 'lsp\\['
"""
from __future__ import annotations
import logging
import os
import threading
from typing import Tuple
# Dedicated logger name so the documented grep recipe survives a
# ``logging.getLogger(__name__)`` rename of any internal module.
event_log = logging.getLogger("hermes.lint.lsp")
# ---------------------------------------------------------------------------
# Once-per-X dedup sets
# ---------------------------------------------------------------------------
_announce_lock = threading.Lock()
_announced_active: set = set() # keys: (server_id, workspace_root)
_announced_unavailable: set = set() # keys: (server_id, binary_path_or_name)
_announced_no_root: set = set() # keys: (server_id, file_path)
_announced_no_server: set = set() # keys: (server_id,)
def _short_path(file_path: str) -> str:
"""Render *file_path* relative to the cwd when sensible, else absolute.
Keeps log lines readable for the common case (the user is inside
the project they're editing) without emitting brittle ``../../..``
chains for the cross-tree case.
"""
if not file_path:
return file_path
try:
rel = os.path.relpath(file_path)
except ValueError:
return file_path
if rel.startswith(".." + os.sep) or rel == "..":
return file_path
return rel
def _emit(server_id: str, level: int, message: str) -> None:
event_log.log(level, "lsp[%s] %s", server_id, message)
def _announce_once(bucket: set, key: Tuple) -> bool:
"""Return True if *key* has not been announced for *bucket* yet.
Atomically marks the key as announced so concurrent callers
cannot both win the race and double-log.
"""
with _announce_lock:
if key in bucket:
return False
bucket.add(key)
return True
# ---------------------------------------------------------------------------
# Public event helpers — call these from the LSP layer.
# ---------------------------------------------------------------------------
def log_clean(server_id: str, file_path: str) -> None:
"""No diagnostics emitted for *file_path*. DEBUG (silent at default)."""
_emit(server_id, logging.DEBUG, f"clean ({_short_path(file_path)})")
def log_disabled(server_id: str, file_path: str, reason: str) -> None:
"""LSP intentionally skipped for this file (feature off, ext unmapped,
backend not local, etc.). DEBUG."""
_emit(server_id, logging.DEBUG, f"skipped: {reason} ({_short_path(file_path)})")
def log_active(server_id: str, workspace_root: str) -> None:
"""A new LSP client started for (server_id, workspace_root).
INFO once per (server_id, workspace_root); DEBUG thereafter.
Lets users verify "is LSP actually running?" with a single grep.
"""
key = (server_id, workspace_root)
if _announce_once(_announced_active, key):
_emit(server_id, logging.INFO, f"active for {workspace_root}")
else:
_emit(server_id, logging.DEBUG, f"reused client for {workspace_root}")
def log_diagnostics(server_id: str, file_path: str, count: int) -> None:
"""Diagnostics arrived for a file. INFO every time — these are the
failure signals users actually want to grep for, and they are
inherently rare per edit."""
_emit(server_id, logging.INFO, f"{count} diags ({_short_path(file_path)})")
def log_no_project_root(server_id: str, file_path: str) -> None:
"""File had no recognised project marker. INFO once per file,
DEBUG thereafter."""
key = (server_id, file_path)
if _announce_once(_announced_no_root, key):
_emit(server_id, logging.INFO, f"no project root for {_short_path(file_path)}")
else:
_emit(server_id, logging.DEBUG, f"no project root for {_short_path(file_path)}")
def log_server_unavailable(server_id: str, binary_or_pkg: str) -> None:
"""The server binary couldn't be resolved. WARNING once per
(server_id, binary), DEBUG thereafter so a hundred subsequent
.py edits don't spam the log."""
key = (server_id, binary_or_pkg)
if _announce_once(_announced_unavailable, key):
_emit(
server_id,
logging.WARNING,
f"server unavailable: {binary_or_pkg} not found "
"(install via `hermes lsp install <id>` or set lsp.servers.<id>.command)",
)
else:
_emit(server_id, logging.DEBUG, f"server still unavailable: {binary_or_pkg}")
def log_no_server_configured(server_id: str) -> None:
"""No spawn recipe for this language. WARNING once."""
if _announce_once(_announced_no_server, (server_id,)):
_emit(server_id, logging.WARNING, "no server configured")
def log_timeout(server_id: str, file_path: str, kind: str = "diagnostics") -> None:
"""A request to the server timed out. WARNING every time — these are
inherently novel events worth surfacing on each occurrence."""
_emit(
server_id,
logging.WARNING,
f"{kind} timed out for {_short_path(file_path)}",
)
def log_server_error(server_id: str, file_path: str, exc: BaseException) -> None:
"""An unexpected exception bubbled out of the LSP layer. WARNING."""
_emit(
server_id,
logging.WARNING,
f"unexpected error for {_short_path(file_path)}: {type(exc).__name__}: {exc}",
)
def log_spawn_failed(server_id: str, workspace_root: str, exc: BaseException) -> None:
"""The LSP server failed to spawn or initialize. WARNING."""
_emit(
server_id,
logging.WARNING,
f"spawn/initialize failed for {workspace_root}: {type(exc).__name__}: {exc}",
)
def reset_announce_caches() -> None:
"""Test-only: clear the dedup caches. Production code never calls this."""
with _announce_lock:
_announced_active.clear()
_announced_unavailable.clear()
_announced_no_root.clear()
_announced_no_server.clear()
__all__ = [
"event_log",
"log_clean",
"log_disabled",
"log_active",
"log_diagnostics",
"log_no_project_root",
"log_server_unavailable",
"log_no_server_configured",
"log_timeout",
"log_server_error",
"log_spawn_failed",
"reset_announce_caches",
]
-376
View File
@@ -1,376 +0,0 @@
"""Auto-installation of LSP server binaries.
Tries to install missing servers using whatever package manager is
appropriate. All installs go to a Hermes-owned bin staging dir,
``<HERMES_HOME>/lsp/bin/``, so we don't pollute the user's global
toolchain.
Strategies:
- ``auto`` attempt to install with the best available package
manager. This is the default.
- ``manual`` never install; if a binary is missing, the server is
silently skipped and the user is told about it via ``hermes lsp
status``.
- ``off`` same as ``manual`` for now (kept distinct so we can
evolve behavior later, e.g. logging differently).
The actual installs happen synchronously the first time a server is
needed and concurrent calls to :func:`try_install` for the same
package are deduplicated via a per-package lock.
Failure modes are non-fatal: every install path is wrapped in
try/except and returns ``None`` on failure. The tool layer then
falls back to its in-process syntax checker, exactly as if the user
hadn't enabled LSP at all.
"""
from __future__ import annotations
import logging
import os
import shutil
import subprocess
import sys
import threading
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger("agent.lsp.install")
# Package-name → install-strategy hint registry. Each entry is a
# tuple of strategy name + package name + executable name. When the
# install completes, we look for the executable in
# ``<HERMES_HOME>/lsp/bin/`` first, then on PATH.
#
# Optional fields:
# - ``extra_pkgs``: list of sibling packages to install alongside
# ``pkg`` in the same node_modules tree. Used when an LSP server
# has a runtime peer dependency that npm doesn't auto-pull (e.g.
# typescript-language-server needs ``typescript``).
INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
# Python
"pyright": {"strategy": "npm", "pkg": "pyright", "bin": "pyright-langserver"},
# JS/TS family
"typescript-language-server": {
"strategy": "npm",
"pkg": "typescript-language-server",
"bin": "typescript-language-server",
# typescript-language-server requires the `typescript` SDK
# (tsserver) to be importable from the same node_modules tree;
# otherwise initialize() fails with "Could not find a valid
# TypeScript installation". Install them together.
"extra_pkgs": ["typescript"],
},
"@vue/language-server": {
"strategy": "npm",
"pkg": "@vue/language-server",
"bin": "vue-language-server",
},
"svelte-language-server": {
"strategy": "npm",
"pkg": "svelte-language-server",
"bin": "svelteserver",
},
"@astrojs/language-server": {
"strategy": "npm",
"pkg": "@astrojs/language-server",
"bin": "astro-ls",
},
"yaml-language-server": {
"strategy": "npm",
"pkg": "yaml-language-server",
"bin": "yaml-language-server",
},
"bash-language-server": {
"strategy": "npm",
"pkg": "bash-language-server",
"bin": "bash-language-server",
},
"intelephense": {"strategy": "npm", "pkg": "intelephense", "bin": "intelephense"},
"dockerfile-language-server-nodejs": {
"strategy": "npm",
"pkg": "dockerfile-language-server-nodejs",
"bin": "docker-langserver",
},
# Go
"gopls": {"strategy": "go", "pkg": "golang.org/x/tools/gopls@latest", "bin": "gopls"},
# Rust — too heavy (hundreds of MB to bootstrap). We do NOT
# auto-install rust-analyzer; users install via rustup.
"rust-analyzer": {"strategy": "manual", "pkg": "", "bin": "rust-analyzer"},
# C/C++ — manual (clangd ships with LLVM, very heavy)
"clangd": {"strategy": "manual", "pkg": "", "bin": "clangd"},
# Lua — manual (LuaLS is platform-specific binaries from GitHub
# releases; complex enough that we punt to the user)
"lua-language-server": {"strategy": "manual", "pkg": "", "bin": "lua-language-server"},
}
_install_locks: Dict[str, threading.Lock] = {}
_install_results: Dict[str, Optional[str]] = {}
_install_lock_meta = threading.Lock()
def hermes_lsp_bin_dir() -> Path:
"""Return the Hermes-owned bin staging dir for LSP servers."""
home = os.environ.get("HERMES_HOME")
if home is None:
home = os.path.join(os.path.expanduser("~"), ".hermes")
p = Path(home) / "lsp" / "bin"
p.mkdir(parents=True, exist_ok=True)
return p
def _existing_binary(name: str) -> Optional[str]:
"""Probe the staging dir + PATH for a binary named ``name``."""
staged = hermes_lsp_bin_dir() / name
if staged.exists() and os.access(staged, os.X_OK):
return str(staged)
on_path = shutil.which(name)
if on_path:
return on_path
return None
def _get_lock(pkg: str) -> threading.Lock:
with _install_lock_meta:
lock = _install_locks.get(pkg)
if lock is None:
lock = threading.Lock()
_install_locks[pkg] = lock
return lock
def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
"""Try to install ``pkg`` and return the binary path if successful.
``strategy`` is ``"auto"``, ``"manual"``, or ``"off"``. In
``manual``/``off`` mode, this function only probes for an
existing binary and returns ``None`` if not found.
The install is cached per-package a second call returns the
same path (or ``None``) without reinstalling. Concurrent calls
are serialized.
"""
if strategy not in ("auto",):
# Only ``auto`` triggers an actual install. In manual/off,
# we still check whether the binary already exists.
recipe = INSTALL_RECIPES.get(pkg, {})
bin_name = recipe.get("bin", pkg)
return _existing_binary(bin_name)
if pkg in _install_results:
return _install_results[pkg]
lock = _get_lock(pkg)
with lock:
# Double-check after acquiring lock.
if pkg in _install_results:
return _install_results[pkg]
result = _do_install(pkg)
_install_results[pkg] = result
return result
def _do_install(pkg: str) -> Optional[str]:
recipe = INSTALL_RECIPES.get(pkg)
if recipe is None:
# Not in our registry — best-effort: just probe PATH.
return shutil.which(pkg)
strategy = recipe.get("strategy", "manual")
bin_name = recipe.get("bin", pkg)
# Check if already present (shutil.which or staging dir)
existing = _existing_binary(bin_name)
if existing:
return existing
if strategy == "manual":
logger.debug("[install] %s requires manual install (recipe=%s)", pkg, recipe)
return None
if strategy == "npm":
return _install_npm(
recipe.get("pkg", pkg),
bin_name,
extra_pkgs=recipe.get("extra_pkgs") or [],
)
if strategy == "go":
return _install_go(recipe.get("pkg", pkg), bin_name)
if strategy == "pip":
return _install_pip(recipe.get("pkg", pkg), bin_name)
logger.warning("[install] unknown strategy %r for %s", strategy, pkg)
return None
def _install_npm(
pkg: str,
bin_name: str,
extra_pkgs: Optional[list] = None,
) -> Optional[str]:
"""Install an npm package into our staging dir.
Uses ``npm install --prefix`` so the binaries land in
``<staging>/node_modules/.bin/<bin_name>`` and we symlink them up
one level for direct PATH-style access.
``extra_pkgs`` is a list of sibling packages to install in the
same ``node_modules`` tree. Used for LSP servers with runtime
peer deps that npm doesn't auto-pull (typescript-language-server
needs ``typescript`` next to it; intelephense ships standalone).
"""
npm = shutil.which("npm")
if npm is None:
logger.info("[install] cannot install %s: npm not on PATH", pkg)
return None
staging = hermes_lsp_bin_dir().parent # <HERMES_HOME>/lsp/
install_targets = [pkg] + list(extra_pkgs or [])
try:
logger.info(
"[install] npm install --prefix %s %s",
staging,
" ".join(install_targets),
)
proc = subprocess.run(
[npm, "install", "--prefix", str(staging), "--silent", "--no-fund", "--no-audit", *install_targets],
check=False,
capture_output=True,
text=True,
timeout=300,
)
if proc.returncode != 0:
logger.warning(
"[install] npm install failed for %s: %s", pkg, proc.stderr.strip()[:500]
)
return None
except (subprocess.TimeoutExpired, OSError) as e:
logger.warning("[install] npm install errored for %s: %s", pkg, e)
return None
# Find the bin
nm_bin = staging / "node_modules" / ".bin" / bin_name
if os.name == "nt":
# On Windows npm sometimes drops `.cmd` shims
candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
else:
candidates = [nm_bin]
for c in candidates:
if c.exists():
# Symlink into our `lsp/bin/` for stable PATH access.
link = hermes_lsp_bin_dir() / c.name
if not link.exists():
try:
link.symlink_to(c)
except (OSError, NotImplementedError):
# Symlinks fail on some Windows setups — copy instead.
try:
shutil.copy2(c, link)
except OSError:
return str(c)
return str(link if link.exists() else c)
logger.warning("[install] npm install for %s succeeded but bin %s not found", pkg, bin_name)
return None
def _install_go(pkg: str, bin_name: str) -> Optional[str]:
"""Install a Go module to GOBIN=<staging>."""
go = shutil.which("go")
if go is None:
logger.info("[install] cannot install %s: go not on PATH", pkg)
return None
staging = hermes_lsp_bin_dir()
env = dict(os.environ)
env["GOBIN"] = str(staging)
try:
logger.info("[install] go install %s (GOBIN=%s)", pkg, staging)
proc = subprocess.run(
[go, "install", pkg],
check=False,
capture_output=True,
text=True,
timeout=600,
env=env,
)
if proc.returncode != 0:
logger.warning(
"[install] go install failed for %s: %s", pkg, proc.stderr.strip()[:500]
)
return None
except (subprocess.TimeoutExpired, OSError) as e:
logger.warning("[install] go install errored for %s: %s", pkg, e)
return None
bin_path = staging / bin_name
if os.name == "nt":
bin_path = bin_path.with_suffix(".exe")
if bin_path.exists():
return str(bin_path)
logger.warning("[install] go install for %s succeeded but bin %s not found", pkg, bin_name)
return None
def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
"""Install a Python package into a hermes-owned target dir.
We avoid polluting the user's site-packages by using
``pip install --target``. Bins go into
``<staging>/python-packages/bin/`` which we symlink into
``<staging>/bin``. Note: this only works for packages that ship a
console script.
"""
pip_target = hermes_lsp_bin_dir().parent / "python-packages"
pip_target.mkdir(parents=True, exist_ok=True)
try:
logger.info("[install] pip install --target %s %s", pip_target, pkg)
proc = subprocess.run(
[sys.executable, "-m", "pip", "install", "--target", str(pip_target), "--quiet", pkg],
check=False,
capture_output=True,
text=True,
timeout=300,
)
if proc.returncode != 0:
logger.warning(
"[install] pip install failed for %s: %s", pkg, proc.stderr.strip()[:500]
)
return None
except (subprocess.TimeoutExpired, OSError) as e:
logger.warning("[install] pip install errored for %s: %s", pkg, e)
return None
# Look for the script
bin_path = pip_target / "bin" / bin_name
if bin_path.exists():
link = hermes_lsp_bin_dir() / bin_name
if not link.exists():
try:
link.symlink_to(bin_path)
except (OSError, NotImplementedError):
try:
shutil.copy2(bin_path, link)
except OSError:
return str(bin_path)
return str(link if link.exists() else bin_path)
return None
def detect_status(pkg: str) -> str:
"""Return ``installed``, ``missing``, or ``manual-only`` for a package.
Used by the ``hermes lsp status`` CLI to give users a quick
overview of what's available without spawning anything.
"""
recipe = INSTALL_RECIPES.get(pkg)
bin_name = recipe.get("bin", pkg) if recipe else pkg
if _existing_binary(bin_name):
return "installed"
if recipe and recipe.get("strategy") == "manual":
return "manual-only"
return "missing"
__all__ = [
"INSTALL_RECIPES",
"try_install",
"detect_status",
"hermes_lsp_bin_dir",
]
-607
View File
@@ -1,607 +0,0 @@
"""Service-level orchestration for LSP clients.
The :class:`LSPService` is the bridge between the synchronous
file_operations layer and the async :class:`agent.lsp.client.LSPClient`.
Design choices:
- A **single asyncio event loop** runs in a background thread. All
client work happens on that loop. Synchronous callers from
``tools/file_operations.py`` use :meth:`get_diagnostics_sync` to
open + wait + drain in one blocking call.
- One client per ``(server_id, workspace_root)`` key. Lazy spawn:
the first request for a key spawns the client; subsequent requests
re-use it.
- A **broken-set** records ``(server_id, workspace_root)`` pairs that
failed to spawn or initialize. These are never retried for the
life of the service. Mirrors OpenCode's design.
- A **delta baseline** map keeps "diagnostics-as-of-the-last-snapshot"
per file. ``snapshot_baseline()`` is called BEFORE a write; the
next ``get_diagnostics_sync()`` returns only diagnostics that
weren't in the baseline. This is the lift from Claude Code's
``beforeFileEdited`` / ``getNewDiagnostics`` pattern, except wired
to the local LSP layer instead of MCP IDE RPC.
The service is **off by default** call :meth:`is_active` to check
whether it's actually doing anything. When LSP is disabled in
config, when no git workspace can be detected, when all configured
servers are missing binaries and auto-install is off, ``is_active``
returns False and the file_operations layer falls through to the
in-process syntax check.
"""
from __future__ import annotations
import asyncio
import logging
import os
import threading
import time
from concurrent.futures import Future as ConcurrentFuture
from typing import Any, Dict, List, Optional, Tuple
from agent.lsp import eventlog
from agent.lsp.client import (
DIAGNOSTICS_DOCUMENT_WAIT,
LSPClient,
file_uri,
)
from agent.lsp.servers import (
ServerContext,
ServerDef,
SpawnSpec,
find_server_for_file,
language_id_for,
)
from agent.lsp.workspace import (
clear_cache,
is_inside_workspace,
resolve_workspace_for_file,
)
logger = logging.getLogger("agent.lsp.manager")
DEFAULT_IDLE_TIMEOUT = 600 # seconds; servers idle for >10min get reaped
class _BackgroundLoop:
"""A daemon thread that owns one asyncio event loop.
Provides :meth:`run` for synchronous callers submits a coroutine
to the loop and blocks until it finishes (or a timeout fires).
"""
def __init__(self) -> None:
self._loop: Optional[asyncio.AbstractEventLoop] = None
self._thread: Optional[threading.Thread] = None
self._ready = threading.Event()
def start(self) -> None:
if self._thread is not None:
return
self._thread = threading.Thread(
target=self._run_forever,
name="hermes-lsp-loop",
daemon=True,
)
self._thread.start()
self._ready.wait(timeout=5.0)
def _run_forever(self) -> None:
loop = asyncio.new_event_loop()
self._loop = loop
asyncio.set_event_loop(loop)
self._ready.set()
try:
loop.run_forever()
finally:
try:
loop.close()
except Exception: # noqa: BLE001
pass
def run(self, coro, *, timeout: Optional[float] = None) -> Any:
"""Submit a coroutine to the loop and block until done.
Returns the coroutine's result, or raises its exception.
"""
if self._loop is None:
raise RuntimeError("background loop not started")
fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
try:
return fut.result(timeout=timeout)
except Exception:
fut.cancel()
raise
def stop(self) -> None:
loop = self._loop
if loop is None:
return
try:
loop.call_soon_threadsafe(loop.stop)
except RuntimeError:
pass
if self._thread is not None:
self._thread.join(timeout=2.0)
self._loop = None
self._thread = None
class LSPService:
"""The process-wide LSP service.
Created once via :meth:`create_from_config`; the
:func:`agent.lsp.get_service` accessor manages the singleton.
Most callers should use that accessor rather than constructing
:class:`LSPService` directly.
"""
# ------------------------------------------------------------------
# construction + factory
# ------------------------------------------------------------------
def __init__(
self,
*,
enabled: bool,
wait_mode: str,
wait_timeout: float,
install_strategy: str,
binary_overrides: Optional[Dict[str, List[str]]] = None,
env_overrides: Optional[Dict[str, Dict[str, str]]] = None,
init_overrides: Optional[Dict[str, Dict[str, Any]]] = None,
disabled_servers: Optional[List[str]] = None,
idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
) -> None:
self._enabled = enabled
self._wait_mode = wait_mode if wait_mode in ("document", "full") else "document"
self._wait_timeout = wait_timeout
self._install_strategy = install_strategy
self._binary_overrides = binary_overrides or {}
self._env_overrides = env_overrides or {}
self._init_overrides = init_overrides or {}
self._disabled_servers = set(disabled_servers or [])
self._idle_timeout = idle_timeout
self._loop = _BackgroundLoop()
if self._enabled:
self._loop.start()
# Per-(server_id, workspace_root) state
self._clients: Dict[Tuple[str, str], LSPClient] = {}
self._broken: set = set()
self._spawning: Dict[Tuple[str, str], asyncio.Future] = {}
self._last_used: Dict[Tuple[str, str], float] = {}
self._state_lock = threading.Lock()
# Delta baseline: file path → snapshot of diagnostics taken
# immediately before a write. ``get_diagnostics_sync`` filters
# out anything in the baseline so the agent only sees errors
# introduced by the current edit.
self._delta_baseline: Dict[str, List[Dict[str, Any]]] = {}
@classmethod
def create_from_config(cls) -> Optional["LSPService"]:
"""Build a service from ``hermes_cli.config`` settings.
Returns ``None`` if the config can't be loaded. The service
itself returns ``is_active()`` False when LSP is disabled.
"""
try:
from hermes_cli.config import load_config
cfg = load_config()
except Exception as e: # noqa: BLE001
logger.debug("LSP config load failed: %s", e)
return None
lsp_cfg = (cfg.get("lsp") or {}) if isinstance(cfg, dict) else {}
if not isinstance(lsp_cfg, dict):
lsp_cfg = {}
enabled = bool(lsp_cfg.get("enabled", True))
wait_mode = lsp_cfg.get("wait_mode", "document")
wait_timeout = float(lsp_cfg.get("wait_timeout", DIAGNOSTICS_DOCUMENT_WAIT))
install_strategy = lsp_cfg.get("install_strategy", "auto")
servers_cfg = lsp_cfg.get("servers") or {}
disabled = []
binary_overrides: Dict[str, List[str]] = {}
env_overrides: Dict[str, Dict[str, str]] = {}
init_overrides: Dict[str, Dict[str, Any]] = {}
if isinstance(servers_cfg, dict):
for name, sub in servers_cfg.items():
if not isinstance(sub, dict):
continue
if sub.get("disabled"):
disabled.append(name)
cmd = sub.get("command")
if isinstance(cmd, list) and cmd:
binary_overrides[name] = cmd
env = sub.get("env")
if isinstance(env, dict):
env_overrides[name] = {k: str(v) for k, v in env.items()}
init = sub.get("initialization_options")
if isinstance(init, dict):
init_overrides[name] = init
return cls(
enabled=enabled,
wait_mode=wait_mode,
wait_timeout=wait_timeout,
install_strategy=install_strategy,
binary_overrides=binary_overrides,
env_overrides=env_overrides,
init_overrides=init_overrides,
disabled_servers=disabled,
)
# ------------------------------------------------------------------
# public API
# ------------------------------------------------------------------
def is_active(self) -> bool:
"""Return True iff this service should be consulted at all."""
return self._enabled
def enabled_for(self, file_path: str) -> bool:
"""Return True iff LSP should run for this specific file.
Gates on workspace detection (file or cwd inside a git worktree),
on whether any registered server matches the extension, and
on whether the (server_id, workspace_root) pair is in the
broken-set from a previous spawn failure.
Files in already-broken pairs return False so the file_operations
layer skips the LSP path entirely no spawn attempts, no
timeout cost until the service is restarted (``hermes lsp
restart``) or the process exits.
"""
if not self._enabled:
return False
srv = find_server_for_file(file_path)
if srv is None or srv.server_id in self._disabled_servers:
return False
ws_root, gated_in = resolve_workspace_for_file(file_path)
if not (ws_root and gated_in):
return False
# Broken-set short-circuit. Use the per-server root if we can
# compute one cheaply; otherwise fall back to the workspace
# root as the broken key (which is what _get_or_spawn would
# have used anyway when it failed).
try:
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
except Exception: # noqa: BLE001
per_server_root = ws_root
if (srv.server_id, per_server_root) in self._broken:
return False
return True
def snapshot_baseline(self, file_path: str) -> None:
"""Snapshot current diagnostics for ``file_path`` as the delta baseline.
Called BEFORE a write so the next ``get_diagnostics_sync()``
can filter out pre-existing errors. Best-effort failures
are silently swallowed so a flaky server can't break a write.
Outer timeouts (e.g. server hangs during initialize) mark the
(server_id, workspace_root) pair as broken so subsequent edits
skip it instantly instead of re-paying the timeout cost.
"""
if not self.enabled_for(file_path):
return
try:
diags = self._loop.run(self._snapshot_async(file_path), timeout=8.0)
self._delta_baseline[os.path.abspath(file_path)] = diags or []
except Exception as e: # noqa: BLE001
logger.debug("baseline snapshot failed for %s: %s", file_path, e)
self._mark_broken_for_file(file_path, e)
self._delta_baseline[os.path.abspath(file_path)] = []
def get_diagnostics_sync(
self,
file_path: str,
*,
delta: bool = True,
timeout: Optional[float] = None,
) -> List[Dict[str, Any]]:
"""Synchronously open ``file_path`` in the right server, wait for
diagnostics, return them.
If ``delta`` is True (default), the result is filtered against
any baseline previously captured via :meth:`snapshot_baseline`.
Diagnostics present in the baseline are removed so the caller
only sees errors introduced by the current edit.
Returns an empty list when LSP is disabled, when no workspace
can be detected, when no server matches, or when the server
can't be spawned. Never raises.
"""
if not self.enabled_for(file_path):
return []
# Resolve server_id eagerly so we can emit structured logs even
# when the request errors out below.
srv = find_server_for_file(file_path)
server_id = srv.server_id if srv else "?"
try:
t = timeout if timeout is not None else self._wait_timeout + 2.0
diags = self._loop.run(self._open_and_wait_async(file_path), timeout=t) or []
except asyncio.TimeoutError as e:
eventlog.log_timeout(server_id, file_path)
logger.debug("LSP diagnostics timeout for %s: %s", file_path, e)
self._mark_broken_for_file(file_path, e)
return []
except Exception as e: # noqa: BLE001
eventlog.log_server_error(server_id, file_path, e)
logger.debug("LSP diagnostics fetch failed for %s: %s", file_path, e)
self._mark_broken_for_file(file_path, e)
return []
abs_path = os.path.abspath(file_path)
if delta:
baseline = self._delta_baseline.get(abs_path) or []
if baseline:
seen = {_diag_key(d) for d in baseline}
diags = [d for d in diags if _diag_key(d) not in seen]
# Roll baseline forward — next call returns deltas relative
# to the just-emitted state, mirroring claude-code's
# diagnosticTracking.
try:
fresh = self._loop.run(self._current_diags_async(file_path), timeout=2.0) or []
except Exception: # noqa: BLE001
fresh = []
if fresh:
self._delta_baseline[abs_path] = fresh
if diags:
eventlog.log_diagnostics(server_id, file_path, len(diags))
else:
eventlog.log_clean(server_id, file_path)
return diags
def _mark_broken_for_file(self, file_path: str, exc: BaseException) -> None:
"""Mark the (server_id, workspace_root) pair as broken so subsequent
edits skip it instantly instead of re-paying timeout cost.
Called when the outer ``_loop.run`` timeout cancels an in-flight
spawn/initialize that the inner ``_get_or_spawn`` task was still
holding open. Without this, every subsequent write would re-enter
the spawn path and re-pay the full ``snapshot_baseline``
timeout (8s) until the binary is fixed.
Also kills any orphan client process that survived the cancelled
future, and emits a single eventlog WARNING so the user knows
which server gave up.
``exc`` is whatever exception the outer wrapper caught used
only for logging, never re-raised.
"""
srv = find_server_for_file(file_path)
if srv is None:
return
ws_root, gated = resolve_workspace_for_file(file_path)
if not (ws_root and gated):
return
try:
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
except Exception: # noqa: BLE001
per_server_root = ws_root
key = (srv.server_id, per_server_root)
already_broken = key in self._broken
self._broken.add(key)
# Kill any client we managed to spawn before the timeout. The
# cancelled future never reached the broken-set add inside
# ``_get_or_spawn`` so the client may still be hanging in
# ``_clients`` with a half-initialized state.
with self._state_lock:
client = self._clients.pop(key, None)
if client is not None:
try:
# Fire-and-forget shutdown — give it a second to cleanup,
# but don't block. We're already on a slow path.
self._loop.run(client.shutdown(), timeout=1.0)
except Exception: # noqa: BLE001
pass
if not already_broken:
eventlog.log_spawn_failed(srv.server_id, per_server_root, exc)
def shutdown(self) -> None:
"""Tear down all clients and stop the background loop."""
if not self._enabled:
return
try:
self._loop.run(self._shutdown_async(), timeout=10.0)
except Exception as e: # noqa: BLE001
logger.debug("LSP shutdown error: %s", e)
self._loop.stop()
clear_cache()
# ------------------------------------------------------------------
# async internals
# ------------------------------------------------------------------
async def _snapshot_async(self, file_path: str) -> List[Dict[str, Any]]:
client = await self._get_or_spawn(file_path)
if client is None:
return []
try:
version = await client.open_file(file_path, language_id=language_id_for(file_path))
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
except Exception as e: # noqa: BLE001
logger.debug("snapshot open/wait failed: %s", e)
return []
self._last_used[(client.server_id, client.workspace_root)] = time.time()
return list(client.diagnostics_for(file_path))
async def _open_and_wait_async(self, file_path: str) -> List[Dict[str, Any]]:
client = await self._get_or_spawn(file_path)
if client is None:
return []
try:
version = await client.open_file(file_path, language_id=language_id_for(file_path))
await client.save_file(file_path)
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
except Exception as e: # noqa: BLE001
logger.debug("open/wait failed for %s: %s", file_path, e)
return []
self._last_used[(client.server_id, client.workspace_root)] = time.time()
return list(client.diagnostics_for(file_path))
async def _current_diags_async(self, file_path: str) -> List[Dict[str, Any]]:
ws, gated = resolve_workspace_for_file(file_path)
srv = find_server_for_file(file_path)
if not (ws and gated and srv):
return []
with self._state_lock:
client = self._clients.get((srv.server_id, ws))
if client is None:
return []
return list(client.diagnostics_for(file_path))
async def _get_or_spawn(self, file_path: str) -> Optional[LSPClient]:
srv = find_server_for_file(file_path)
if srv is None:
return None
if srv.server_id in self._disabled_servers:
eventlog.log_disabled(srv.server_id, file_path, "disabled in config")
return None
ws_root, gated = resolve_workspace_for_file(file_path)
if not (ws_root and gated):
eventlog.log_no_project_root(srv.server_id, file_path)
return None
per_server_root = srv.resolve_root(file_path, ws_root)
if per_server_root is None:
eventlog.log_disabled(
srv.server_id, file_path, "exclude marker hit (server gated off)"
)
return None # exclude marker hit, server gated off
key = (srv.server_id, per_server_root)
if key in self._broken:
return None
with self._state_lock:
client = self._clients.get(key)
if client is not None and client.is_running:
eventlog.log_active(srv.server_id, per_server_root)
return client
spawning = self._spawning.get(key)
if spawning is not None:
try:
return await spawning
except Exception: # noqa: BLE001
return None
# Begin spawn
loop = asyncio.get_running_loop()
spawn_future: asyncio.Future = loop.create_future()
with self._state_lock:
self._spawning[key] = spawn_future
try:
ctx = ServerContext(
workspace_root=per_server_root,
install_strategy=self._install_strategy,
binary_overrides=self._binary_overrides,
env_overrides=self._env_overrides,
init_overrides=self._init_overrides,
)
spec = srv.build_spawn(per_server_root, ctx)
if spec is None:
# ``build_spawn`` returns None when the binary can't be
# located (auto-install disabled, manual-only server,
# or install attempt failed). Surface this once via
# the structured logger so the user can act on it.
eventlog.log_server_unavailable(srv.server_id, srv.server_id)
self._broken.add(key)
spawn_future.set_result(None)
return None
client = LSPClient(
server_id=srv.server_id,
workspace_root=spec.workspace_root,
command=spec.command,
env=spec.env,
cwd=spec.cwd,
initialization_options=spec.initialization_options,
seed_diagnostics_on_first_push=spec.seed_diagnostics_on_first_push or srv.seed_first_push,
)
try:
await client.start()
except Exception as e: # noqa: BLE001
eventlog.log_spawn_failed(srv.server_id, per_server_root, e)
self._broken.add(key)
spawn_future.set_result(None)
return None
with self._state_lock:
self._clients[key] = client
self._last_used[key] = time.time()
eventlog.log_active(srv.server_id, per_server_root)
spawn_future.set_result(client)
return client
finally:
with self._state_lock:
self._spawning.pop(key, None)
async def _shutdown_async(self) -> None:
with self._state_lock:
clients = list(self._clients.values())
self._clients.clear()
self._broken.clear()
self._last_used.clear()
await asyncio.gather(
*(c.shutdown() for c in clients),
return_exceptions=True,
)
# ------------------------------------------------------------------
# status / introspection (used by ``hermes lsp status``)
# ------------------------------------------------------------------
def get_status(self) -> Dict[str, Any]:
"""Return a snapshot of the service for the CLI status command."""
with self._state_lock:
clients = [
{
"server_id": k[0],
"workspace_root": k[1],
"state": c.state,
"running": c.is_running,
}
for k, c in self._clients.items()
]
broken = list(self._broken)
return {
"enabled": self._enabled,
"wait_mode": self._wait_mode,
"wait_timeout": self._wait_timeout,
"install_strategy": self._install_strategy,
"clients": clients,
"broken": broken,
"disabled_servers": sorted(self._disabled_servers),
}
def _diag_key(d: Dict[str, Any]) -> str:
"""Content equality key used for delta filtering. Mirrors
:func:`agent.lsp.client._diagnostic_key`."""
rng = d.get("range") or {}
start = rng.get("start") or {}
end = rng.get("end") or {}
code = d.get("code")
if code is not None and not isinstance(code, str):
code = str(code)
return "\x00".join(
[
str(d.get("severity") or 1),
str(code or ""),
str(d.get("source") or ""),
str(d.get("message") or "").strip(),
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
]
)
__all__ = ["LSPService"]
-196
View File
@@ -1,196 +0,0 @@
"""Minimal LSP JSON-RPC 2.0 framer over async streams.
LSP wire format:
Content-Length: <bytes>\\r\\n
\\r\\n
<utf-8 JSON body>
The body is a JSON-RPC 2.0 envelope: request, response, or notification.
This module replaces what ``vscode-jsonrpc/node`` would do in a
TypeScript implementation. We keep it deliberately small just the
framer + envelope helpers so :class:`agent.lsp.client.LSPClient` can
focus on protocol semantics.
"""
from __future__ import annotations
import asyncio
import json
import logging
from typing import Any, Optional, Tuple
logger = logging.getLogger("agent.lsp.protocol")
# LSP error codes we care about. Full list in
# https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#errorCodes
ERROR_CONTENT_MODIFIED = -32801
ERROR_REQUEST_CANCELLED = -32800
ERROR_METHOD_NOT_FOUND = -32601
class LSPProtocolError(Exception):
"""Raised when the wire protocol is violated.
Distinct from :class:`LSPRequestError` which represents a server
returning a JSON-RPC error response that's protocol-conformant.
This exception means the framing or envelope itself is broken.
"""
class LSPRequestError(Exception):
"""Raised when an LSP request returns an error response.
Carries the JSON-RPC ``code``, ``message``, and optional ``data``.
"""
def __init__(self, code: int, message: str, data: Any = None) -> None:
super().__init__(f"LSP error {code}: {message}")
self.code = code
self.message = message
self.data = data
def encode_message(obj: dict) -> bytes:
"""Encode a JSON-RPC envelope as a Content-Length framed byte string.
The body is encoded as compact UTF-8 JSON (no spaces between
separators) matches what ``vscode-jsonrpc`` emits and keeps the
Content-Length count exact.
"""
body = json.dumps(obj, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii")
return header + body
async def read_message(reader: asyncio.StreamReader) -> Optional[dict]:
"""Read one Content-Length framed JSON-RPC message from the stream.
Returns ``None`` on clean EOF (server closed stdout cleanly between
messages typical shutdown). Raises :class:`LSPProtocolError` on
malformed framing.
The reader is advanced to just past the JSON body on success.
"""
headers: dict = {}
header_bytes = 0
while True:
try:
line = await reader.readuntil(b"\r\n")
except asyncio.IncompleteReadError as e:
# EOF while reading headers. If we hadn't started a header
# block, treat as clean EOF; otherwise the framing is bad.
if not e.partial and not headers:
return None
raise LSPProtocolError(
f"unexpected EOF while reading LSP headers (partial={e.partial!r})"
) from e
# Defensive cap against a server streaming headers without ever
# emitting CRLF-CRLF. Caps total header bytes at 8 KiB — a
# well-behaved server fits in well under 200 bytes.
header_bytes += len(line)
if header_bytes > 8192:
raise LSPProtocolError(
f"LSP header block exceeded 8 KiB without terminator"
)
line = line[:-2] # strip CRLF
if not line:
break # blank line ends header block
try:
key, _, value = line.decode("ascii").partition(":")
except UnicodeDecodeError as e:
raise LSPProtocolError(f"non-ASCII LSP header: {line!r}") from e
if not key:
raise LSPProtocolError(f"malformed LSP header line: {line!r}")
headers[key.strip().lower()] = value.strip()
cl = headers.get("content-length")
if cl is None:
raise LSPProtocolError(f"LSP message missing Content-Length: {headers!r}")
try:
n = int(cl)
except ValueError as e:
raise LSPProtocolError(f"non-integer Content-Length: {cl!r}") from e
if n < 0 or n > 64 * 1024 * 1024: # 64 MiB sanity cap
raise LSPProtocolError(f"unreasonable Content-Length: {n}")
try:
body = await reader.readexactly(n)
except asyncio.IncompleteReadError as e:
raise LSPProtocolError(
f"truncated LSP body: expected {n} bytes, got {len(e.partial)}"
) from e
try:
return json.loads(body.decode("utf-8"))
except json.JSONDecodeError as e:
raise LSPProtocolError(f"invalid JSON in LSP body: {e}") from e
except UnicodeDecodeError as e:
raise LSPProtocolError(f"non-UTF-8 LSP body: {e}") from e
def make_request(req_id: int, method: str, params: Any) -> dict:
"""Build a JSON-RPC 2.0 request envelope."""
msg: dict = {"jsonrpc": "2.0", "id": req_id, "method": method}
if params is not None:
msg["params"] = params
return msg
def make_notification(method: str, params: Any) -> dict:
"""Build a JSON-RPC 2.0 notification envelope (no ``id``)."""
msg: dict = {"jsonrpc": "2.0", "method": method}
if params is not None:
msg["params"] = params
return msg
def make_response(req_id: Any, result: Any) -> dict:
"""Build a JSON-RPC 2.0 success response envelope."""
return {"jsonrpc": "2.0", "id": req_id, "result": result}
def make_error_response(req_id: Any, code: int, message: str, data: Any = None) -> dict:
"""Build a JSON-RPC 2.0 error response envelope."""
err: dict = {"code": code, "message": message}
if data is not None:
err["data"] = data
return {"jsonrpc": "2.0", "id": req_id, "error": err}
def classify_message(msg: dict) -> Tuple[str, Any]:
"""Return ``(kind, key)`` where kind is one of ``request``,
``response``, ``notification``, ``invalid``.
The key is the request id for request/response, the method name
for notifications, and ``None`` for invalid messages.
"""
if not isinstance(msg, dict):
return "invalid", None
if msg.get("jsonrpc") != "2.0":
return "invalid", None
has_id = "id" in msg
has_method = "method" in msg
if has_id and has_method:
return "request", msg["id"]
if has_id and ("result" in msg or "error" in msg):
return "response", msg["id"]
if has_method and not has_id:
return "notification", msg["method"]
return "invalid", None
__all__ = [
"ERROR_CONTENT_MODIFIED",
"ERROR_REQUEST_CANCELLED",
"ERROR_METHOD_NOT_FOUND",
"LSPProtocolError",
"LSPRequestError",
"encode_message",
"read_message",
"make_request",
"make_notification",
"make_response",
"make_error_response",
"classify_message",
]
-78
View File
@@ -1,78 +0,0 @@
"""Format LSP diagnostics for inclusion in tool output.
The model sees a compact, severity-filtered, line-bounded summary of
diagnostics introduced by the latest edit. Format matches what
OpenCode's ``lsp/diagnostic.ts`` and Claude Code's
``formatDiagnosticsSummary`` produce ``<diagnostics>`` blocks with
1-indexed line/column, capped at ``MAX_PER_FILE`` errors.
"""
from __future__ import annotations
from typing import Any, Dict, List
# Severity-1 only by default — warnings/info/hints would flood the
# agent. Lift this in config under ``lsp.severities`` if needed.
SEVERITY_NAMES = {1: "ERROR", 2: "WARN", 3: "INFO", 4: "HINT"}
DEFAULT_SEVERITIES = frozenset({1}) # ERROR only
MAX_PER_FILE = 20
MAX_TOTAL_CHARS = 4000
def format_diagnostic(d: Dict[str, Any]) -> str:
"""One-line representation of a single diagnostic."""
sev = SEVERITY_NAMES.get(d.get("severity") or 1, "ERROR")
rng = d.get("range") or {}
start = rng.get("start") or {}
line = int(start.get("line", 0)) + 1
col = int(start.get("character", 0)) + 1
msg = str(d.get("message") or "").rstrip()
code = d.get("code")
code_part = f" [{code}]" if code not in (None, "") else ""
source = d.get("source")
source_part = f" ({source})" if source else ""
return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"
def report_for_file(
file_path: str,
diagnostics: List[Dict[str, Any]],
*,
severities: frozenset = DEFAULT_SEVERITIES,
max_per_file: int = MAX_PER_FILE,
) -> str:
"""Build a ``<diagnostics file=...>`` block for one file.
Returns an empty string when no diagnostics pass the severity
filter, so callers can do ``if block:`` to skip empty cases.
"""
if not diagnostics:
return ""
filtered = [d for d in diagnostics if (d.get("severity") or 1) in severities]
if not filtered:
return ""
limited = filtered[:max_per_file]
extra = len(filtered) - len(limited)
lines = [format_diagnostic(d) for d in limited]
body = "\n".join(lines)
if extra > 0:
body += f"\n... and {extra} more"
return f"<diagnostics file=\"{file_path}\">\n{body}\n</diagnostics>"
def truncate(s: str, *, limit: int = MAX_TOTAL_CHARS) -> str:
"""Hard-cap a formatted summary string."""
if len(s) <= limit:
return s
marker = "\n…[truncated]"
return s[: limit - len(marker)] + marker
__all__ = [
"SEVERITY_NAMES",
"DEFAULT_SEVERITIES",
"MAX_PER_FILE",
"format_diagnostic",
"report_for_file",
"truncate",
]
-1040
View File
File diff suppressed because it is too large Load Diff
-223
View File
@@ -1,223 +0,0 @@
"""Workspace and project-root resolution for LSP.
Two concerns live here:
1. **Workspace gate** the upper-level "is this directory a project?"
check. Hermes only runs LSP when the cwd (or the file being edited)
sits inside a git worktree. Files outside any git root never
trigger LSP, even if a server is configured. This keeps Telegram
gateway users on user-home cwd's from spawning daemons.
2. **NearestRoot** the per-server project-root walk. Each language
server cares about a different marker (``pyproject.toml`` for
Python, ``Cargo.toml`` for Rust, ``go.mod`` for Go, etc.) and
wants the directory containing that marker. ``nearest_root()``
walks up from a starting path looking for any of a list of marker
files, optionally bailing if an exclude marker shows up first.
"""
from __future__ import annotations
import logging
import os
from pathlib import Path
from typing import Iterable, Optional, Tuple
logger = logging.getLogger("agent.lsp.workspace")
# Cache: cwd → (worktree_root, is_git) so repeated calls don't re-stat.
# Cleared on shutdown. Keyed by absolute resolved path so symlink
# folds collapse to one entry.
_workspace_cache: dict = {}
def normalize_path(path: str) -> str:
"""Normalize a path for use as a stable map key.
Resolves ``~``, makes absolute, and collapses ``.``/``..``. We do
NOT resolve symlinks here symlink stability matters for some
LSP servers (rust-analyzer cares about Cargo workspace identity)
and we want the canonical path the user typed when possible.
"""
return os.path.abspath(os.path.expanduser(path))
def find_git_worktree(start: str) -> Optional[str]:
"""Walk up from ``start`` looking for a ``.git`` entry (file or dir).
Returns the directory containing ``.git``, or ``None`` if no git
root is found before hitting the filesystem root.
A ``.git`` *file* (not directory) means we're inside a git
worktree set up via ``git worktree add`` both forms count.
"""
try:
start_path = Path(normalize_path(start))
if start_path.is_file():
start_path = start_path.parent
except (OSError, RuntimeError, ValueError):
# Pathological input (loop in symlinks, encoding error, etc.) —
# bail out rather than crash the lint hook.
return None
# Cache check
cached = _workspace_cache.get(str(start_path))
if cached is not None:
root, _is_git = cached
return root
cur = start_path
# Defensive cap: the deepest reasonable monorepo is well under 64
# levels. Caps the walk so a pathological cwd or a symlink cycle
# we somehow traverse can't keep us looping.
for _ in range(64):
git_marker = cur / ".git"
try:
if git_marker.exists():
resolved = str(cur)
_workspace_cache[str(start_path)] = (resolved, True)
return resolved
except OSError:
# Permission error on a parent dir — bail out cleanly.
break
parent = cur.parent
if parent == cur:
break
cur = parent
_workspace_cache[str(start_path)] = (None, False)
return None
def is_inside_workspace(path: str, workspace_root: str) -> bool:
"""Return True iff ``path`` is inside (or equal to) ``workspace_root``.
Uses absolute paths but does not resolve symlinks a file accessed
via a symlink that points outside the workspace still counts as
outside. This is the conservative interpretation; matches LSP
behaviour where servers reject didOpen for unrelated files.
"""
p = normalize_path(path)
root = normalize_path(workspace_root)
if p == root:
return True
# Use os.path.commonpath to handle case-insensitive filesystems
# correctly on macOS/Windows.
try:
common = os.path.commonpath([p, root])
except ValueError:
# Different drives on Windows.
return False
return common == root
def nearest_root(
start: str,
markers: Iterable[str],
*,
excludes: Optional[Iterable[str]] = None,
ceiling: Optional[str] = None,
) -> Optional[str]:
"""Walk up from ``start`` looking for any of the given marker files.
Returns the **directory containing** the first matched marker, or
``None`` if no marker is found before hitting ``ceiling`` (or the
filesystem root if no ceiling).
If ``excludes`` is provided and an exclude marker matches *first*
in the upward walk, returns ``None`` the server is gated off
for that file. Mirrors OpenCode's NearestRoot exclude semantics
(e.g. typescript skips deno projects when ``deno.json`` is found
before ``package.json``).
"""
start_path = Path(normalize_path(start))
try:
if start_path.is_file():
start_path = start_path.parent
except (OSError, RuntimeError, ValueError):
return None
ceiling_path = Path(normalize_path(ceiling)) if ceiling else None
markers_list = list(markers)
excludes_list = list(excludes) if excludes else []
cur = start_path
# Defensive cap matching ``find_git_worktree``. Bounded walk
# protects against pathological inputs even though the
# parent-equality stop normally terminates within ~10 steps.
for _ in range(64):
# Check excludes first — if an exclude is found at this level,
# the server is gated off for this file.
for exc in excludes_list:
try:
if (cur / exc).exists():
return None
except OSError:
continue
# Then check markers.
for marker in markers_list:
try:
if (cur / marker).exists():
return str(cur)
except OSError:
continue
# Stop conditions.
if ceiling_path is not None and cur == ceiling_path:
return None
parent = cur.parent
if parent == cur:
return None
cur = parent
return None
def resolve_workspace_for_file(
file_path: str,
*,
cwd: Optional[str] = None,
) -> Tuple[Optional[str], bool]:
"""Resolve the workspace root for a file.
Returns ``(workspace_root, gated_in)`` where ``gated_in`` is True
iff LSP should run for this file at all. Currently the gate is
"file is inside a git worktree found by walking up from cwd OR
from the file itself".
The cwd path takes precedence if the agent was launched in a
git project, that worktree is the workspace, and any edit inside
it (regardless of where the file lives) is in-scope. If the cwd
isn't in a git worktree, we try the file's own location as a
fallback.
Returns ``(None, False)`` when neither path is in a git worktree.
"""
cwd = cwd or os.getcwd()
cwd_root = find_git_worktree(cwd)
if cwd_root is not None:
if is_inside_workspace(file_path, cwd_root):
return cwd_root, True
# File is outside the cwd's worktree — try the file's own
# location as a secondary anchor. Useful for monorepos where
# the user opens an unrelated checkout.
file_root = find_git_worktree(file_path)
if file_root is not None:
return file_root, True
return None, False
def clear_cache() -> None:
"""Clear the workspace-resolution cache.
Called on service shutdown so a subsequent re-init doesn't pick
up stale results from a previous session.
"""
_workspace_cache.clear()
__all__ = [
"find_git_worktree",
"is_inside_workspace",
"nearest_root",
"normalize_path",
"resolve_workspace_for_file",
"clear_cache",
]
-309
View File
@@ -1,309 +0,0 @@
"""CJK/wide-character-aware re-alignment of model-emitted markdown tables.
Models pad markdown tables assuming each character occupies one terminal
cell. CJK glyphs and most emoji render as two cells, so the model's
spacing collapses into drift the moment a table reaches a real terminal
header pipes line up, every body row drifts right by N cells per CJK
char.
This module rebuilds row padding using ``wcwidth.wcswidth`` (display
columns), preserving the table's pipes and dashes so it still reads as a
plain-text table in ``strip`` / unrendered display modes. Standard Rich
markdown rendering already aligns CJK correctly inside a wide enough
panel; this helper is for the paths that print the model's text more or
less verbatim.
The helper is deliberately conservative:
* Only contiguous ``| ... |`` blocks with a divider line are rewritten.
* Anything that does not look like a table is passed through unchanged.
* Single-line / mid-stream fragments are left alone callers buffer
table rows and flush them once the block is complete.
There is a small, intentional caveat: ``wcwidth`` returns ``-1`` for some
emoji-with-variation-selector sequences (e.g. ````); we clamp those to
0 so they do not corrupt the column width math. The 1-cell drift on
those specific glyphs is preferable to silently widening every table
that contains one.
"""
from __future__ import annotations
import re
from typing import List
from wcwidth import wcswidth
__all__ = [
"is_table_divider",
"looks_like_table_row",
"realign_markdown_tables",
"split_table_row",
]
_DIVIDER_CELL_RE = re.compile(r"^\s*:?-{3,}:?\s*$")
_MIN_COL_WIDTH = 3 # matches the divider's minimum dash run.
def _disp_width(s: str) -> int:
"""``wcswidth`` clamped to a non-negative integer.
``wcswidth`` returns ``-1`` when it encounters a control char or an
unknown sequence; treat those as zero-width rather than letting a
negative number flow into ``max`` and break the column-width math.
"""
w = wcswidth(s)
return w if w > 0 else 0
def _pad_to_width(s: str, target: int) -> str:
return s + " " * max(0, target - _disp_width(s))
def split_table_row(row: str) -> List[str]:
"""Split ``| a | b | c |`` into ``["a", "b", "c"]`` with trims."""
s = row.strip()
if s.startswith("|"):
s = s[1:]
if s.endswith("|"):
s = s[:-1]
return [c.strip() for c in s.split("|")]
def is_table_divider(row: str) -> bool:
"""True when ``row`` is a markdown table separator line."""
cells = split_table_row(row)
return len(cells) > 1 and all(_DIVIDER_CELL_RE.match(c) for c in cells)
def looks_like_table_row(row: str) -> bool:
"""True when ``row`` could plausibly be a markdown table row.
Used by streaming callers to decide whether to buffer an in-flight
line. We are intentionally permissive here the realigner itself
only rewrites blocks that are accompanied by a divider, so a false
positive here at most delays the print of one line.
"""
if "|" not in row:
return False
stripped = row.strip()
if not stripped:
return False
# A leading pipe is the strongest signal; without it we still allow
# rows with at least two pipes so models that omit the leading pipe
# don't slip past us.
if stripped.startswith("|"):
return True
return stripped.count("|") >= 2
def _render_block(rows: List[List[str]], available_width: int | None = None) -> List[str]:
"""Render ``rows`` (header + body, divider implied) at uniform widths.
If ``available_width`` is given and the rebuilt horizontal table
would exceed it, fall back to a vertical key-value rendering so
rows do not soft-wrap mid-cell terminal soft-wrap destroys
column alignment visually even when the underlying bytes are
perfectly padded, which is exactly the "tables look broken"
user report this code path is meant to address.
"""
ncols = max(len(r) for r in rows)
rows = [r + [""] * (ncols - len(r)) for r in rows]
widths = [
max(_MIN_COL_WIDTH, *(_disp_width(r[c]) for r in rows))
for c in range(ncols)
]
# Total horizontal width for the rendered row:
# `| ` + cell + ` ` for each column, plus the final closing `|`.
horizontal_width = sum(widths) + 3 * ncols + 1
if available_width is not None and horizontal_width > max(available_width, 20):
return _render_vertical(rows, ncols, available_width)
def _row(cells: List[str]) -> str:
return (
"| "
+ " | ".join(_pad_to_width(c, widths[k]) for k, c in enumerate(cells))
+ " |"
)
out = [_row(rows[0])]
out.append("|" + "|".join("-" * (w + 2) for w in widths) + "|")
for r in rows[1:]:
out.append(_row(r))
return out
def _wrap_to_width(text: str, width: int) -> List[str]:
"""Soft-wrap ``text`` at word boundaries to fit ``width`` display cells.
Falls back to hard-breaking the longest word if a single token is
wider than ``width``. Empty input yields a single empty string so
the caller's row count stays predictable.
"""
if width <= 0 or not text:
return [text]
words = text.split()
if not words:
return [""]
lines: List[str] = []
current = ""
current_w = 0
def _hard_break(word: str, w: int) -> List[str]:
out: List[str] = []
buf = ""
bw = 0
for ch in word:
cw = _disp_width(ch) or 1
if bw + cw > w and buf:
out.append(buf)
buf = ch
bw = cw
else:
buf += ch
bw += cw
if buf:
out.append(buf)
return out
for word in words:
ww = _disp_width(word)
if not current:
if ww <= width:
current = word
current_w = ww
else:
pieces = _hard_break(word, width)
lines.extend(pieces[:-1])
current = pieces[-1] if pieces else ""
current_w = _disp_width(current)
continue
if current_w + 1 + ww <= width:
current += " " + word
current_w += 1 + ww
else:
lines.append(current)
if ww <= width:
current = word
current_w = ww
else:
pieces = _hard_break(word, width)
lines.extend(pieces[:-1])
current = pieces[-1] if pieces else ""
current_w = _disp_width(current)
if current:
lines.append(current)
return lines or [""]
def _render_vertical(
rows: List[List[str]], ncols: int, available_width: int
) -> List[str]:
"""Render a too-wide table as vertical ``Header: value`` rows.
Mirrors Claude Code's narrow-terminal fallback in
``MarkdownTable.tsx``: each body row becomes a small block of
``Header: cell-value`` lines (continuation lines indented two
spaces) separated by a thin ```` divider between rows. Keeps
every line narrower than ``available_width`` so the terminal does
not soft-wrap mid-cell.
"""
if not rows:
return []
headers = rows[0] + [""] * (ncols - len(rows[0]))
body = rows[1:]
labels = [h or f"Column {i + 1}" for i, h in enumerate(headers)]
sep_width = max(20, min(40, available_width - 2)) if available_width else 30
separator = "" * sep_width
indent = " "
indent_w = _disp_width(indent)
out: List[str] = []
for ri, row in enumerate(body):
if ri > 0:
out.append(separator)
for ci in range(ncols):
label = labels[ci]
value = row[ci] if ci < len(row) else ""
label_w = _disp_width(label)
first_budget = max(10, available_width - label_w - 2)
cont_budget = max(10, available_width - indent_w)
if not value:
out.append(f"{label}:")
continue
wrapped = _wrap_to_width(value, first_budget)
out.append(f"{label}: {wrapped[0]}")
if len(wrapped) > 1:
# Re-flow continuation text at the wider continuation
# budget — words split across the narrower first-line
# budget should re-pack greedily for the rest.
cont_text = " ".join(wrapped[1:])
for cl in _wrap_to_width(cont_text, cont_budget):
if cl.strip():
out.append(f"{indent}{cl}")
return out
def realign_markdown_tables(text: str, available_width: int | None = None) -> str:
"""Rewrite every ``| ... |`` + divider block with wcwidth-aware padding.
Lines that are not part of a recognised table are returned verbatim,
so this is safe to apply to arbitrary assistant prose.
If ``available_width`` is given (terminal cells available for the
rendered table), tables wider than that are rendered as vertical
key-value pairs instead of a horizontal pipe-bordered grid. This
avoids the terminal soft-wrapping mid-cell, which destroys column
alignment visually even when the bytes are perfectly padded.
"""
if "|" not in text:
return text
lines = text.split("\n")
out: List[str] = []
i = 0
n = len(lines)
while i < n:
line = lines[i]
# A table starts with a header row whose next line is a divider.
if (
"|" in line
and i + 1 < n
and is_table_divider(lines[i + 1])
):
header = split_table_row(line)
body: List[List[str]] = []
j = i + 2
while j < n and "|" in lines[j] and lines[j].strip():
if is_table_divider(lines[j]):
j += 1
continue
body.append(split_table_row(lines[j]))
j += 1
if any(c for c in header) or body:
out.extend(_render_block([header] + body, available_width))
i = j
continue
out.append(line)
i += 1
return "\n".join(out)
+2 -2
View File
@@ -470,11 +470,11 @@ class MemoryManager:
accepted = [
p for p in params
if p.kind in {
if p.kind in (
inspect.Parameter.POSITIONAL_ONLY,
inspect.Parameter.POSITIONAL_OR_KEYWORD,
inspect.Parameter.KEYWORD_ONLY,
}
)
]
if len(accepted) >= 4:
return "positional"
+34 -278
View File
@@ -10,7 +10,7 @@ import os
import re
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse
import requests
@@ -157,13 +157,6 @@ DEFAULT_CONTEXT_LENGTHS = {
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
# gpt-5.3-codex-spark is Codex-OAuth-only (ChatGPT Pro entitlement) and
# uses a smaller 128k window than other gpt-5.x slugs. Listed here as
# a defensive override so the longest-substring fallback doesn't match
# the generic "gpt-5" entry below (400k) and report the wrong limit if
# Spark's context ever needs to be resolved through this path. Real
# usage flows through _CODEX_OAUTH_CONTEXT_FALLBACK at line ~1113.
"gpt-5.3-codex-spark": 128000,
"gpt-5.1-chat": 128000, # Chat variant has 128k context
"gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k)
"gpt-4.1": 1047576,
@@ -217,10 +210,8 @@ DEFAULT_CONTEXT_LENGTHS = {
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
# Kimi
"kimi": 262144,
# Tencent — Hy3 Preview (Hunyuan) with 256K context window.
# OpenRouter live metadata reports 262144 (256 × 1024); align the
# static fallback so cache and offline both agree (issue #22268).
"hy3-preview": 262144,
# Tencent — Hy3 Preview (Hunyuan) with 256K context window
"hy3-preview": 256000,
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
"nemotron": 131072,
# Arcee
@@ -244,44 +235,6 @@ DEFAULT_CONTEXT_LENGTHS = {
"zai-org/GLM-5": 202752,
}
# xAI Grok models that ACCEPT the `reasoning.effort` parameter on
# api.x.ai. Verified live against /v1/responses 2026-05-10:
#
# ACCEPTS effort: grok-3-mini, grok-3-mini-fast, grok-4.20-multi-agent-0309,
# grok-4.3
# REJECTS effort: grok-3, grok-4, grok-4-0709, grok-4-fast-(non-)reasoning,
# grok-4-1-fast-(non-)reasoning, grok-4.20-0309-(non-)reasoning,
# grok-code-fast-1
#
# REJECTS-side models still reason natively — they just don't expose an
# effort dial — so callers should send no `reasoning` key at all rather
# than a default `medium` (which 400s with "Model X does not support
# parameter reasoningEffort").
_GROK_EFFORT_CAPABLE_PREFIXES = (
"grok-3-mini",
"grok-4.20-multi-agent",
"grok-4.3",
)
def grok_supports_reasoning_effort(model: str) -> bool:
"""Return True when an xAI Grok model accepts ``reasoning.effort``.
Allowlist by substring (matches both bare ``grok-3-mini`` and
aggregator-prefixed ``x-ai/grok-3-mini``). Conservative by design:
if a future Grok model isn't listed, we send no effort dial rather
than 400.
"""
name = (model or "").strip().lower()
if not name:
return False
# Strip common aggregator prefixes (x-ai/, openrouter/x-ai/, xai/, ...)
for sep in ("/",):
if sep in name:
name = name.rsplit(sep, 1)[-1]
return any(name.startswith(prefix) for prefix in _GROK_EFFORT_CAPABLE_PREFIXES)
_CONTEXT_LENGTH_KEYS = (
"context_length",
"context_window",
@@ -571,7 +524,7 @@ def _extract_pricing(payload: Dict[str, Any]) -> Dict[str, Any]:
pricing: Dict[str, Any] = {}
for target, aliases in alias_map.items():
for alias in aliases:
if alias in normalized and normalized[alias] not in {None, ""}:
if alias in normalized and normalized[alias] not in (None, ""):
pricing[target] = normalized[alias]
break
if pricing:
@@ -1006,79 +959,6 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
return None
def _query_ollama_api_show(model: str, base_url: str, api_key: str = "") -> Optional[int]:
"""Query an Ollama server's native ``/api/show`` for context length.
Provider-agnostic: works against ANY Ollama-compatible server regardless
of hostname local Ollama, Ollama Cloud (``ollama.com``), custom Ollama
hosting behind a reverse proxy, etc. For non-Ollama servers the POST
returns 404/405 quickly; the function handles errors gracefully.
For hosted servers the GGUF ``model_info.*.context_length`` is the
authoritative source: the user can't set their own ``num_ctx``, and the
OpenAI-compat ``/v1/models`` endpoint correctly omits ``context_length``
per the OpenAI schema.
Resolution order for hosted Ollama:
1. ``model_info.*.context_length`` GGUF training max (authoritative)
2. ``parameters`` ``num_ctx`` server-side Modelfile override
The order is flipped vs ``query_ollama_num_ctx()`` because local users
control ``num_ctx`` themselves; hosted users can't.
"""
import httpx
server_url = base_url.rstrip("/")
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=5.0, headers=headers) as client:
resp = client.post(f"{server_url}/api/show", json={"name": model})
if resp.status_code != 200:
return None
data = resp.json()
# Hosted Ollama: GGUF model_info is the real max — prefer it over
# num_ctx which the Cloud operator may have capped arbitrarily.
model_info = data.get("model_info", {})
for key, value in model_info.items():
if "context_length" in key and isinstance(value, (int, float)):
ctx = int(value)
if ctx >= 1024:
return ctx
# Fall back to num_ctx from Modelfile parameters (rare on Cloud)
params = data.get("parameters", "")
if "num_ctx" in params:
for line in params.split("\n"):
if "num_ctx" in line:
parts = line.strip().split()
if len(parts) >= 2:
try:
ctx = int(parts[-1])
if ctx >= 1024:
return ctx
except ValueError:
pass
except Exception:
pass
return None
def _model_name_suggests_kimi(model: str) -> bool:
"""Return True if the model name looks like a Kimi-family model.
Catches ``kimi-k2.6``, ``kimi-k2.5``, ``kimi-k2-thinking``,
``moonshotai/Kimi-K2.6``, and similar variants. Used as a guard
against stale OpenRouter metadata that underreports these models
as 32K context when they actually support 262K+.
"""
lower = model.lower()
return lower.startswith("kimi") or "moonshot" in lower
def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
"""Query a local server for the model's context length."""
import httpx
@@ -1226,12 +1106,6 @@ _CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
"gpt-5.1-codex-max": 272_000,
"gpt-5.1-codex-mini": 272_000,
"gpt-5.3-codex": 272_000,
# Spark runs on specialised low-latency hardware and exposes a smaller
# 128k window than other Codex OAuth slugs. Listed explicitly so the
# longest-key-first fallback resolves it correctly — substring match
# on "gpt-5.3-codex" otherwise wins and reports 272k. Availability is
# gated by ChatGPT Pro entitlement on the Codex backend.
"gpt-5.3-codex-spark": 128_000,
"gpt-5.2-codex": 272_000,
"gpt-5.4-mini": 272_000,
"gpt-5.5": 272_000,
@@ -1330,66 +1204,27 @@ def _resolve_codex_oauth_context_length(
return None
def _resolve_nous_context_length(
model: str,
base_url: str = "",
api_key: str = "",
) -> Tuple[Optional[int], str]:
"""Resolve Nous Portal model context length.
def _resolve_nous_context_length(model: str) -> Optional[int]:
"""Resolve Nous Portal model context length via OpenRouter metadata.
Tries the live Nous inference endpoint first (authoritative), then falls
back to OpenRouter metadata with suffix/version matching.
Nous model IDs are bare after prefix-stripping (e.g. 'qwen3.6-plus',
'claude-opus-4-6') while OpenRouter uses prefixed IDs (e.g.
'qwen/qwen3.6-plus', 'anthropic/claude-opus-4.6'). Version
normalization (dotdash) is applied to handle name drifts.
Returns ``(context_length, source)`` where ``source`` is one of:
- ``"portal"`` live /v1/models response (authoritative)
- ``"openrouter"`` OpenRouter cache fallback (non-authoritative;
callers must NOT persist this to the on-disk cache or a single
portal blip will freeze the wrong value in forever)
- ``""`` could not resolve
Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
with version normalization (dotdash).
"""
# Portal first — the Nous /models endpoint is authoritative for what our
# infrastructure enforces and may differ from OR (e.g. OR reports 1M for
# qwen3.6-plus; the portal correctly says 262144). Fall back to the OR
# catalog only if the portal doesn't list the model.
if base_url:
portal_ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if portal_ctx is not None:
return portal_ctx, "portal"
metadata = fetch_model_metadata()
def _safe_ctx(or_id: str, entry: dict) -> Optional[int]:
ctx = entry.get("context_length")
if ctx is None:
return None
if ctx <= 32768 and _model_name_suggests_kimi(or_id):
logger.info(
"Rejecting OpenRouter metadata context=%s for %r "
"(Kimi-family underreport, Nous path); falling through to hardcoded defaults",
ctx, or_id,
)
return None
return ctx
metadata = fetch_model_metadata() # OpenRouter cache
# Exact match first
if model in metadata:
ctx = _safe_ctx(model, metadata[model])
if ctx is not None:
return ctx, "openrouter"
return metadata[model].get("context_length")
normalized = _normalize_model_version(model).lower()
for or_id, entry in metadata.items():
bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
if bare.lower() == model.lower() or _normalize_model_version(bare).lower() == normalized:
ctx = _safe_ctx(or_id, entry)
if ctx is not None:
return ctx, "openrouter"
return entry.get("context_length")
# Partial prefix match for cases like gemini-3-flash → gemini-3-flash-preview
# Require match to be at a word boundary (followed by -, :, or end of string)
model_lower = model.lower()
for or_id, entry in metadata.items():
bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
@@ -1397,11 +1232,9 @@ def _resolve_nous_context_length(
if candidate.startswith(query) and (
len(candidate) == len(query) or candidate[len(query)] in "-:."
):
ctx = _safe_ctx(or_id, entry)
if ctx is not None:
return ctx, "openrouter"
return entry.get("context_length")
return None, ""
return None
def get_model_context_length(
@@ -1416,26 +1249,17 @@ def get_model_context_length(
Resolution order:
0. Explicit config override (model.context_length or custom_providers per-model)
1. Persistent cache (previously discovered via probing). Nous URLs
bypass the cache here so step 5b can always reconcile against
the authoritative portal /v1/models response.
1. Persistent cache (previously discovered via probing)
1b. AWS Bedrock static table (must precede custom-endpoint probe)
2. Active endpoint metadata (/models for explicit custom endpoints)
3. Local server query (for local endpoints)
4. Anthropic /v1/models API (API-key users only, not OAuth)
5. Provider-aware lookups (before generic OpenRouter cache):
a. Copilot live /models API
b. Nous: live /v1/models probe first (authoritative), then OR
cache fallback with suffix/version normalisation. Only
portal-derived values are persisted to disk.
c. Codex OAuth /models probe
d. GMI /models endpoint
e. Ollama native /api/show probe (any base_url, provider-agnostic)
f. models.dev registry lookup (with :cloud/-cloud suffix fallback)
6. OpenRouter live API metadata (Kimi-family 32k guard)
7. Hardcoded defaults (broad family patterns, longest-key-first)
8. Local server query (last resort)
9. Default fallback (256K)"""
5. OpenRouter live API metadata
6. Nous suffix-match via OpenRouter cache
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. Default fallback (256K)
"""
# 0. Explicit config override — user knows best
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
return config_context_length
@@ -1482,28 +1306,6 @@ def get_model_context_length(
model, base_url, f"{cached:,}",
)
_invalidate_cached_context_length(model, base_url)
# Invalidate stale 32k cache entries for Kimi-family models.
elif cached <= 32768 and _model_name_suggests_kimi(model):
logger.info(
"Dropping stale Kimi cache entry %s@%s -> %s (OpenRouter underreport); "
"re-resolving via hardcoded defaults",
model, base_url, f"{cached:,}",
)
_invalidate_cached_context_length(model, base_url)
# Nous Portal: the portal /v1/models endpoint is authoritative.
# Bypass the persistent cache so step 5b can always reconcile
# against it — this corrects pre-fix entries seeded from the
# OR catalog (the same OR underreport class that the Kimi/Qwen
# DEFAULT_CONTEXT_LENGTHS overrides exist to mitigate) without
# touching the on-disk file when the portal is unreachable.
# The in-memory 300s endpoint metadata cache makes the per-call
# cost amortise to ~0 within a process.
elif _infer_provider_from_url(base_url) == "nous":
logger.debug(
"Bypassing persistent cache for %s@%s (Nous portal authoritative)",
model, base_url,
)
# Fall through; step 5b reconciles and overwrites if portal responds.
else:
return cached
@@ -1537,13 +1339,6 @@ def get_model_context_length(
if context_length is not None:
return context_length
if not _is_known_provider_base_url(base_url):
# 2b. Ollama native /api/show — any URL might be an Ollama server
# (local, cloud, or custom hosting). Non-Ollama servers return
# 404/405 quickly. Fall through on failure.
ctx = _query_ollama_api_show(model, base_url, api_key=api_key)
if ctx is not None:
save_context_length(model, base_url, ctx)
return ctx
# 3. Try querying local server directly
if is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
@@ -1575,7 +1370,7 @@ def get_model_context_length(
# (e.g. claude-opus-4.6 is 1M on Anthropic but 128K on GitHub Copilot).
# If provider is generic (openrouter/custom/empty), try to infer from URL.
effective_provider = provider
if not effective_provider or effective_provider in {"openrouter", "custom"}:
if not effective_provider or effective_provider in ("openrouter", "custom"):
if base_url:
inferred = _infer_provider_from_url(base_url)
if inferred:
@@ -1585,7 +1380,7 @@ def get_model_context_length(
# This catches account-specific models (e.g. claude-opus-4.6-1m) that
# don't exist in models.dev. For models that ARE in models.dev, this
# returns the provider-enforced limit which is what users can actually use.
if effective_provider in {"copilot", "copilot-acp", "github-copilot"}:
if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
try:
from hermes_cli.models import get_copilot_model_context
ctx = get_copilot_model_context(model, api_key=api_key)
@@ -1595,18 +1390,8 @@ def get_model_context_length(
pass # Fall through to models.dev
if effective_provider == "nous":
ctx, source = _resolve_nous_context_length(
model, base_url=base_url or "", api_key=api_key or ""
)
ctx = _resolve_nous_context_length(model)
if ctx:
# Persist ONLY portal-derived values. Caching an OR-fallback
# value here would freeze in a wrong number on the first portal
# blip / auth glitch and step-1 would short-circuit it forever.
# OR's catalog is community-maintained and is precisely why the
# Kimi/Qwen DEFAULT_CONTEXT_LENGTHS overrides exist — we don't
# want it leaking into the persistent cache for Nous URLs.
if base_url and source == "portal":
save_context_length(model, base_url, ctx)
return ctx
if effective_provider == "openai-codex":
# Codex OAuth enforces lower context limits than the direct OpenAI
@@ -1623,45 +1408,16 @@ def get_model_context_length(
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if ctx is not None:
return ctx
# 5e. Ollama native /api/show probe — runs for ANY provider with a
# base_url, not just ollama-cloud. Ollama-compatible servers expose
# this endpoint regardless of hostname (local Ollama, Ollama Cloud,
# custom Ollama hosting). The OpenAI-compat /v1/models endpoint
# correctly omits context_length per the OpenAI schema, but /api/show
# returns the authoritative GGUF model_info.context_length.
# For non-Ollama servers (OpenAI, Anthropic, etc.), the POST returns
# 404/405 quickly. Results are cached, so the hit is per-model+URL,
# once per hour.
if base_url:
ctx = _query_ollama_api_show(model, base_url, api_key=api_key)
if ctx is not None:
save_context_length(model, base_url, ctx)
return ctx
if effective_provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(effective_provider, model)
if ctx:
return ctx
# 6. OpenRouter live API metadata provider-unaware fallback.
# Only consulted when the provider is unknown (no effective_provider),
# because OpenRouter data is community-maintained and can be incorrect
# for models that belong to known providers with curated defaults.
if not effective_provider:
metadata = fetch_model_metadata()
if model in metadata:
or_ctx = metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
# Guard against stale OpenRouter metadata for Kimi-family models.
if or_ctx == 32768 and _model_name_suggests_kimi(model):
logger.info(
"Rejecting OpenRouter metadata context=%s for %r "
"(Kimi-family underreport); falling through to hardcoded defaults",
or_ctx, model,
)
else:
return or_ctx
# 7. (reserved)
# 6. OpenRouter live API metadata (provider-unaware fallback)
metadata = fetch_model_metadata()
if model in metadata:
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
# Only check `default_model in model` (is the key a substring of the input).
@@ -1724,7 +1480,7 @@ def _count_image_tokens(msg: Dict[str, Any], cost_per_image: int) -> int:
if not isinstance(part, dict):
continue
ptype = part.get("type")
if ptype in {"image", "image_url", "input_image"}:
if ptype in ("image", "image_url", "input_image"):
count += 1
stashed = msg.get("_anthropic_content_blocks") if isinstance(msg, dict) else None
if isinstance(stashed, list):
@@ -1736,7 +1492,7 @@ def _count_image_tokens(msg: Dict[str, Any], cost_per_image: int) -> int:
inner = content.get("content")
if isinstance(inner, list):
for part in inner:
if isinstance(part, dict) and part.get("type") in {"image", "image_url"}:
if isinstance(part, dict) and part.get("type") in ("image", "image_url"):
count += 1
return count * cost_per_image
@@ -1758,7 +1514,7 @@ def _estimate_message_chars(msg: Dict[str, Any]) -> int:
cleaned = []
for part in v:
if isinstance(part, dict):
if part.get("type") in {"image", "image_url", "input_image"}:
if part.get("type") in ("image", "image_url", "input_image"):
cleaned.append({"type": part.get("type"), "image": "[stripped]"})
else:
cleaned.append(part)
+5 -92
View File
@@ -145,9 +145,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
"openai": "openai",
"openai-codex": "openai",
"zai": "zai",
"kimi": "kimi-for-coding",
"kimi-coding": "kimi-for-coding",
"moonshot": "kimi-for-coding",
"stepfun": "stepfun",
"kimi-coding-cn": "kimi-for-coding",
"minimax": "minimax",
@@ -199,32 +197,6 @@ def _load_disk_cache() -> Dict[str, Any]:
return {}
def _disk_cache_age_seconds() -> Optional[float]:
"""Return age (in seconds) of the disk cache file, or None if missing.
Used by ``fetch_models_dev`` to short-circuit the network probe when
a recent on-disk cache exists. Errors (missing file, permission
denied, weird filesystem) all return None callers fall through
to the network fetch path.
"""
try:
cache_path = _get_cache_path()
if not cache_path.exists():
return None
mtime = cache_path.stat().st_mtime
age = time.time() - mtime
# Negative age means the file's mtime is in the future (clock skew
# or system clock reset). Treat as "unknown freshness" → fall
# through to network so we don't serve potentially-bad data
# forever.
if age < 0:
return None
return age
except Exception as e:
logger.debug("Failed to stat models.dev disk cache: %s", e)
return None
def _save_disk_cache(data: Dict[str, Any]) -> None:
"""Save models.dev data to disk cache atomically."""
try:
@@ -235,29 +207,13 @@ def _save_disk_cache(data: Dict[str, Any]) -> None:
def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
"""Fetch models.dev registry. Cache hierarchy: in-mem → disk → network.
"""Fetch models.dev registry. In-memory cache (1hr) + disk fallback.
Returns the full registry dict keyed by provider ID, or empty dict on failure.
Cache hierarchy (when ``force_refresh=False``):
1. In-memory cache, populated and < TTL old return immediately.
2. **Disk cache file < TTL old by mtime load, populate in-mem, return.**
No network call. Saves ~500 ms per cold-start agent construction;
``models.dev`` only changes when providers add new models, so a
1 hour staleness window is acceptable (same TTL as in-mem cache).
3. Network fetch on success, save to disk + in-mem and return.
4. Network fails fall back to ANY available disk cache (even stale)
with a short 5 min in-mem grace period before retrying network.
When ``force_refresh=True`` (used by ``hermes config refresh``, the
\"refresh model catalog\" code path), stages 1 and 2 are skipped. The
function always hits the network and only falls back to disk if the
network call fails.
"""
global _models_dev_cache, _models_dev_cache_time
# Stage 1: fresh in-memory cache wins. This is the hot path on
# long-lived processes — no I/O, no system calls.
# Check in-memory cache
if (
not force_refresh
and _models_dev_cache
@@ -265,27 +221,7 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
):
return _models_dev_cache
# Stage 2: fresh-by-mtime disk cache short-circuits the network call.
# Only kicks in on cold-start processes (in-mem cache is empty or
# expired) and only when the user hasn't asked for a forced refresh.
# Skipped if the disk cache file is missing, unreadable, or older
# than _MODELS_DEV_CACHE_TTL.
if not force_refresh:
disk_age = _disk_cache_age_seconds()
if disk_age is not None and disk_age < _MODELS_DEV_CACHE_TTL:
disk_data = _load_disk_cache()
if disk_data:
_models_dev_cache = disk_data
# Anchor in-mem TTL to the disk file's age so we don't
# extend an already-aging cache by another full hour.
_models_dev_cache_time = time.time() - disk_age
logger.debug(
"Loaded models.dev from fresh disk cache "
"(%d providers, age=%.0fs)", len(disk_data), disk_age,
)
return _models_dev_cache
# Stage 3: network fetch.
# Try network fetch
try:
response = requests.get(MODELS_DEV_URL, timeout=15)
response.raise_for_status()
@@ -303,9 +239,8 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
except Exception as e:
logger.debug("Failed to fetch models.dev: %s", e)
# Stage 4: network failed — fall back to whatever disk cache exists,
# even if it's stale. Give it a short 5 min in-mem TTL so we retry
# the network soon instead of serving stale data for a full hour.
# Fall back to disk cache — use a short TTL (5 min) so we retry
# the network fetch soon instead of serving stale data for a full hour.
if not _models_dev_cache:
_models_dev_cache = _load_disk_cache()
if _models_dev_cache:
@@ -349,28 +284,6 @@ def lookup_models_dev_context(provider: str, model: str) -> Optional[int]:
if ctx:
return ctx
# Suffix-aware fallback: some providers (e.g. ollama-cloud) store
# model IDs with :cloud / -cloud suffixes in models.dev while the
# live API returns bare names. Without this, kimi-k2.6 misses the
# kimi-k2.6:cloud entry and falls through to stale OpenRouter metadata
# reporting 32768 — tripping the 64k minimum-context guard.
# The suffix-stripping in fetch_ollama_cloud_models() handles the
# model-picker UX; this handles the context-length lookup path.
for suffix in (":cloud", "-cloud"):
suffixed_key = model + suffix
entry = models.get(suffixed_key)
if entry:
ctx = _extract_context(entry)
if ctx:
return ctx
# Also try case-insensitive
suffixed_lower = model_lower + suffix
for mid, mdata in models.items():
if mid.lower() == suffixed_lower:
ctx = _extract_context(mdata)
if ctx:
return ctx
return None
+2 -2
View File
@@ -122,7 +122,7 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
# empty, drop it entirely.
if "enum" in repaired and isinstance(repaired["enum"], list):
node_type = repaired.get("type")
if node_type in {"string", "integer", "number", "boolean"}:
if node_type in ("string", "integer", "number", "boolean"):
cleaned = [v for v in repaired["enum"]
if v is not None and v != ""]
if cleaned:
@@ -135,7 +135,7 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
"""Infer a reasonable ``type`` if this schema node has none."""
if "type" in node and node["type"] not in {None, ""}:
if "type" in node and node["type"] not in (None, ""):
return node
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
-1046
View File
File diff suppressed because it is too large Load Diff
+2 -13
View File
@@ -157,9 +157,6 @@ MEMORY_GUIDANCE = (
"User preferences and recurring corrections matter more than procedural task details.\n"
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
"state to memory; use session_search to recall those from past transcripts. "
"Specifically: do not record PR numbers, issue numbers, commit SHAs, 'fixed bug X', "
"'submitted PR Y', 'Phase N done', file counts, or any artifact that will be stale "
"in 7 days. If a fact will be stale in a week, it does not belong in memory. "
"If you've discovered a new way to do something, solved a problem that could be "
"necessary later, save it as a skill with the skill tool.\n"
"Write memories as declarative facts, not instructions to yourself. "
@@ -216,15 +213,7 @@ KANBAN_GUIDANCE = (
"artifacts. `metadata` is machine-readable facts "
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
"workers read both via their own `kanban_show`. Never put secrets / "
"tokens / raw PII in either field — run rows are durable forever. "
"Exception: if your output is a code change that needs human review "
"before counting as merged/done (most coding tasks), drop the "
"structured metadata (changed_files / tests_run / diff_path) into a "
"`kanban_comment` first, then end with "
"`kanban_block(reason=\"review-required: <one-line summary>\")` so a "
"reviewer can approve+unblock or request changes. Reviewing-then-"
"completing is more honest than auto-completing work that still needs "
"eyes on it.\n"
"tokens / raw PII in either field — run rows are durable forever.\n"
"6. **If follow-up work appears, create it; don't do it.** Use "
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
"to spawn a child task for the appropriate specialist profile instead of "
@@ -268,7 +257,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
# Model name substrings that trigger tool-use enforcement guidance.
# Add new patterns here when a model family needs explicit steering.
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
# OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes
# where GPT models abandon work on partial results, skip prerequisite lookups,
+10 -139
View File
@@ -1,25 +1,15 @@
"""Anthropic prompt caching strategies.
"""Anthropic prompt caching (system_and_3 strategy).
Two layouts:
* ``system_and_3`` (default, used everywhere except the long-lived path):
4 cache_control breakpoints system prompt + last 3 non-system messages.
All at the same TTL (5m or 1h). Reduces input token costs by ~75% on
multi-turn conversations within a single session.
* ``prefix_and_2`` (Claude on Anthropic / OpenRouter / Nous Portal):
4 breakpoints split across two TTL tiers tools[-1] (1h) +
stable system prefix (1h) + last 2 non-system messages (5m). The
long-lived prefix is byte-stable across sessions for a given user
config, so every fresh session reads the cached system+tools instead
of re-paying for them. Within-session rolling window shrinks from 3
messages to 2 to free the breakpoint budget.
Reduces input token costs by ~75% on multi-turn conversations by caching
the conversation prefix. Uses 4 cache_control breakpoints (Anthropic max):
1. System prompt (stable across all turns)
2-4. Last 3 non-system messages (rolling window)
Pure functions -- no class state, no AIAgent dependency.
"""
import copy
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List
def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None:
@@ -48,14 +38,6 @@ def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool =
last["cache_control"] = cache_marker
def _build_marker(ttl: str) -> Dict[str, str]:
"""Build a cache_control marker dict for the given TTL ('5m' or '1h')."""
marker: Dict[str, str] = {"type": "ephemeral"}
if ttl == "1h":
marker["ttl"] = "1h"
return marker
def apply_anthropic_cache_control(
api_messages: List[Dict[str, Any]],
cache_ttl: str = "5m",
@@ -63,8 +45,7 @@ def apply_anthropic_cache_control(
) -> List[Dict[str, Any]]:
"""Apply system_and_3 caching strategy to messages for Anthropic models.
Places up to 4 cache_control breakpoints: system prompt + last 3 non-system
messages, all at the same TTL.
Places up to 4 cache_control breakpoints: system prompt + last 3 non-system messages.
Returns:
Deep copy of messages with cache_control breakpoints injected.
@@ -73,7 +54,9 @@ def apply_anthropic_cache_control(
if not messages:
return messages
marker = _build_marker(cache_ttl)
marker = {"type": "ephemeral"}
if cache_ttl == "1h":
marker["ttl"] = "1h"
breakpoints_used = 0
@@ -87,115 +70,3 @@ def apply_anthropic_cache_control(
_apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic)
return messages
def _mark_system_stable_block(
messages: List[Dict[str, Any]],
long_lived_marker: Dict[str, str],
) -> bool:
"""Mark the *first* content block of the system message with the 1h marker.
The system message is expected to have been split into multiple content
blocks beforehand by the caller block[0] is the cross-session-stable
prefix, subsequent blocks carry context files + volatile suffix.
Falls back to marking the whole system message as a single block when
the message hasn't been split (preserves correctness on the fallback path).
Returns True when a marker was placed.
"""
if not messages or messages[0].get("role") != "system":
return False
sys_msg = messages[0]
content = sys_msg.get("content")
# Already a list of blocks → mark the first block.
if isinstance(content, list) and content:
first = content[0]
if isinstance(first, dict):
first["cache_control"] = long_lived_marker
return True
return False
# String content (no split) → cannot place a stable-prefix breakpoint
# without changing the byte content. Caller is responsible for
# splitting; if they didn't, fall through to envelope marker so we still
# cache *something* for this turn.
if isinstance(content, str) and content:
sys_msg["content"] = [
{"type": "text", "text": content, "cache_control": long_lived_marker}
]
return True
return False
def apply_anthropic_cache_control_long_lived(
api_messages: List[Dict[str, Any]],
long_lived_ttl: str = "1h",
rolling_ttl: str = "5m",
native_anthropic: bool = False,
) -> List[Dict[str, Any]]:
"""Apply prefix_and_2 caching: long-lived stable prefix + rolling window.
Layout (4 breakpoints total):
* Stable system prefix (block[0]) ``long_lived_ttl`` TTL
* Last 2 non-system messages ``rolling_ttl`` TTL each
NOTE: this function does NOT mark the tools array. Tools cache_control
is attached separately (see ``mark_tools_for_long_lived_cache``) because
tools live outside the messages list in the API payload.
The caller MUST have split the system message into ordered content
blocks where block[0] is the cross-session-stable portion. If the system
message is still a single string, it is wrapped into a single block and
marked this is correct, just less effective (the volatile suffix is
not isolated, so the prefix invalidates per-session).
Returns:
Deep copy of messages with cache_control breakpoints injected.
"""
messages = copy.deepcopy(api_messages)
if not messages:
return messages
long_marker = _build_marker(long_lived_ttl)
rolling_marker = _build_marker(rolling_ttl)
placed_prefix = _mark_system_stable_block(messages, long_marker)
# Reserve 1 breakpoint for the system prefix (when placed); spend the
# remaining 3 on the rolling tail. Anthropic max is 4 total —
# tools[-1] (when marked) consumes the 4th, so we cap rolling at 2 here.
rolling_budget = 2 if placed_prefix else 3
non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
for idx in non_sys[-rolling_budget:]:
_apply_cache_marker(messages[idx], rolling_marker, native_anthropic=native_anthropic)
return messages
def mark_tools_for_long_lived_cache(
tools: Optional[List[Dict[str, Any]]],
long_lived_ttl: str = "1h",
) -> Optional[List[Dict[str, Any]]]:
"""Attach cache_control to the last tool in the OpenAI-format tools list.
Anthropic prefix-cache order is ``tools system messages``. Marking
the last tool dict caches the entire tools array (Anthropic's docs:
"the marker is placed on the last block you want included in the cached
prefix"). Marker is preserved across the OpenAI-wire boundary on
OpenRouter and Nous Portal (which proxies to OpenRouter); on native
Anthropic the marker is forwarded by ``convert_tools_to_anthropic``.
Returns a deep copy of the tools list with the marker attached, or the
input unchanged when tools is empty/None. Pure function does not
mutate the input.
"""
if not tools:
return tools
out = copy.deepcopy(tools)
last = out[-1]
if isinstance(last, dict):
last["cache_control"] = _build_marker(long_lived_ttl)
return out
+1 -1
View File
@@ -64,7 +64,7 @@ _SENSITIVE_BODY_KEYS = frozenset({
# cli.py) or `HERMES_REDACT_SECRETS=false` in ~/.hermes/.env. An opt-out
# warning is logged at gateway and CLI startup so operators see the
# downgrade — see `_log_redaction_status()` in gateway/run.py and cli.py.
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "true").lower() in {"1", "true", "yes", "on"}
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "true").lower() in ("1", "true", "yes", "on")
# Known API key prefixes -- match the prefix + contiguous token chars
_PREFIX_PATTERNS = [
+5 -5
View File
@@ -312,7 +312,7 @@ def _parse_single_entry(
)
matcher = None
if matcher is not None and event not in {"pre_tool_call", "post_tool_call"}:
if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
logger.warning(
"hooks.%s[%d].matcher=%r will be ignored at runtime — the "
"matcher field is only honored for pre_tool_call / "
@@ -423,7 +423,7 @@ def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]
def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
# Matcher gate — only meaningful for tool-scoped events.
if spec.event in {"pre_tool_call", "post_tool_call"}:
if spec.event in ("pre_tool_call", "post_tool_call"):
if not spec.matches_tool(kwargs.get("tool_name")):
return None
@@ -658,7 +658,7 @@ def _prompt_and_record(
print() # keep the terminal tidy after ^C
return False
if answer in {"y", "yes"}:
if answer in ("y", "yes"):
_record_approval(event, command)
return True
@@ -752,13 +752,13 @@ def _resolve_effective_accept(
if accept_hooks_arg:
return True
env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
if env in {"1", "true", "yes", "on"}:
if env in ("1", "true", "yes", "on"):
return True
cfg_val = cfg.get("hooks_auto_accept", False)
if isinstance(cfg_val, bool):
return cfg_val
if isinstance(cfg_val, str):
return cfg_val.strip().lower() in {"1", "true", "yes", "on"}
return cfg_val.strip().lower() in ("1", "true", "yes", "on")
return False
+1 -1
View File
@@ -261,7 +261,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
for scan_dir in dirs_to_scan:
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts):
if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
continue
try:
content = skill_md.read_text(encoding='utf-8')
+2 -19
View File
@@ -279,7 +279,7 @@ class ChatCompletionsTransport(ProviderTransport):
_kimi_effort = "medium"
if reasoning_config and isinstance(reasoning_config, dict):
_e = (reasoning_config.get("effort") or "").strip().lower()
if _e in {"low", "medium", "high"}:
if _e in ("low", "medium", "high"):
_kimi_effort = _e
api_kwargs["reasoning_effort"] = _kimi_effort
@@ -294,7 +294,7 @@ class ChatCompletionsTransport(ProviderTransport):
_tokenhub_effort = "high"
if reasoning_config and isinstance(reasoning_config, dict):
_e = (reasoning_config.get("effort") or "").strip().lower()
if _e in {"low", "medium", "high"}:
if _e in ("low", "medium", "high"):
_tokenhub_effort = _e
api_kwargs["reasoning_effort"] = _tokenhub_effort
@@ -323,21 +323,6 @@ class ChatCompletionsTransport(ProviderTransport):
if provider_prefs and is_openrouter:
extra_body["provider"] = provider_prefs
# Pareto Code router plugin — model-gated. Same shape as the
# profile path in plugins/model-providers/openrouter/__init__.py;
# this branch only runs when the OpenRouter profile isn't loaded.
if is_openrouter and model == "openrouter/pareto-code":
_pareto_score = params.get("openrouter_min_coding_score")
if _pareto_score is not None and _pareto_score != "":
try:
_pareto_score_f = float(_pareto_score)
except (TypeError, ValueError):
_pareto_score_f = None
if _pareto_score_f is not None and 0.0 <= _pareto_score_f <= 1.0:
extra_body["plugins"] = [
{"id": "pareto-router", "min_coding_score": _pareto_score_f}
]
# Kimi extra_body.thinking
if is_kimi:
_kimi_thinking_enabled = True
@@ -463,7 +448,6 @@ class ChatCompletionsTransport(ProviderTransport):
qwen_session_metadata=params.get("qwen_session_metadata"),
model=model,
ollama_num_ctx=params.get("ollama_num_ctx"),
session_id=params.get("session_id"),
)
)
api_kwargs.update(top_level_from_profile)
@@ -478,7 +462,6 @@ class ChatCompletionsTransport(ProviderTransport):
model=model,
base_url=params.get("base_url"),
reasoning_config=reasoning_config,
openrouter_min_coding_score=params.get("openrouter_min_coding_score"),
)
if profile_body:
extra_body.update(profile_body)
-9
View File
@@ -104,16 +104,7 @@ class ResponsesApiTransport(ProviderTransport):
kwargs["prompt_cache_key"] = session_id
if reasoning_enabled and is_xai_responses:
from agent.model_metadata import grok_supports_reasoning_effort
kwargs["include"] = ["reasoning.encrypted_content"]
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
# those models reason natively. Only send the effort dial when
# the target model is on the allowlist; otherwise send no
# `reasoning` key at all and let the model reason on its own.
if grok_supports_reasoning_effort(model):
kwargs["reasoning"] = {"effort": reasoning_effort}
elif reasoning_enabled:
if is_github_responses:
github_reasoning = params.get("github_reasoning_extra")
-11
View File
@@ -370,17 +370,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
source_url="https://api-docs.deepseek.com/quick_start/pricing",
pricing_version="deepseek-pricing-2026-03-16",
),
(
"deepseek",
"deepseek-v4-pro",
): PricingEntry(
input_cost_per_million=Decimal("1.74"),
output_cost_per_million=Decimal("3.48"),
cache_read_cost_per_million=Decimal("0.0145"),
source="official_docs_snapshot",
source_url="https://api-docs.deepseek.com/quick_start/pricing",
pricing_version="deepseek-pricing-2026-05-12",
),
# Google Gemini
(
"google",
+1 -5
View File
@@ -337,7 +337,6 @@ def _process_single_prompt(
providers_ignored=config.get("providers_ignored"),
providers_order=config.get("providers_order"),
provider_sort=config.get("provider_sort"),
openrouter_min_coding_score=config.get("openrouter_min_coding_score"),
max_tokens=config.get("max_tokens"),
reasoning_config=config.get("reasoning_config"),
prefill_messages=config.get("prefill_messages"),
@@ -547,7 +546,6 @@ class BatchRunner:
providers_ignored: List[str] = None,
providers_order: List[str] = None,
provider_sort: str = None,
openrouter_min_coding_score: Optional[float] = None,
max_tokens: int = None,
reasoning_config: Dict[str, Any] = None,
prefill_messages: List[Dict[str, Any]] = None,
@@ -597,7 +595,6 @@ class BatchRunner:
self.providers_ignored = providers_ignored
self.providers_order = providers_order
self.provider_sort = provider_sort
self.openrouter_min_coding_score = openrouter_min_coding_score
self.max_tokens = max_tokens
self.reasoning_config = reasoning_config
self.prefill_messages = prefill_messages
@@ -795,7 +792,7 @@ class BatchRunner:
conversations = entry.get("conversations", [])
for msg in conversations:
role = msg.get("role") or msg.get("from")
if role in {"user", "human"}:
if role in ("user", "human"):
prompt_text = (msg.get("content") or msg.get("value", "")).strip()
break
@@ -876,7 +873,6 @@ class BatchRunner:
"providers_ignored": self.providers_ignored,
"providers_order": self.providers_order,
"provider_sort": self.provider_sort,
"openrouter_min_coding_score": self.openrouter_min_coding_score,
"max_tokens": self.max_tokens,
"reasoning_config": self.reasoning_config,
"prefill_messages": self.prefill_messages,
-13
View File
@@ -203,12 +203,6 @@ terminal:
# docker_forward_env:
# - "GITHUB_TOKEN"
# - "NPM_TOKEN"
# # Optional: extra flags passed verbatim to docker run (appended after security defaults).
# # Useful for adding capabilities (e.g. apt installs needing SETUID) or custom options.
# # Example: add a Linux capability not included by default
# # docker_extra_args:
# # - "--cap-add"
# # - "SETUID"
# -----------------------------------------------------------------------------
# OPTION 4: Singularity/Apptainer container
@@ -663,10 +657,6 @@ platform_toolsets:
# platforms:
# telegram:
# reply_to_mode: "first" # off | first | all
# # guest_mode lets explicit @mentions from non-allowlisted groups through.
# # Default false; ordinary messages, replies, and regex wake words stay blocked.
# guest_mode: false
# # allowed_chats: ["-1001234567890"]
# extra:
# disable_link_previews: false # Set true to suppress Telegram URL previews in bot messages
@@ -953,9 +943,6 @@ display:
# false: Wait for the full response before rendering
streaming: true
# Show [HH:MM] timestamps on user input and assistant response labels.
# timestamps: false
# ───────────────────────────────────────────────────────────────────────────
# Skin / Theme
# ───────────────────────────────────────────────────────────────────────────
+131 -873
View File
File diff suppressed because it is too large Load Diff
+8 -7
View File
@@ -664,7 +664,7 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
# None both mean "clear the field" (restore old behaviour).
if "workdir" in updates:
_wd = updates["workdir"]
if _wd in {None, "", False}:
if _wd in (None, "", False):
updates["workdir"] = None
else:
updates["workdir"] = _normalize_workdir(_wd)
@@ -811,7 +811,7 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
# schedule quietly goes off. See issue #16265.
if job["next_run_at"] is None:
kind = job.get("schedule", {}).get("kind")
if kind in {"cron", "interval"}:
if kind in ("cron", "interval"):
job["state"] = "error"
if not job.get("last_error"):
job["last_error"] = (
@@ -855,7 +855,7 @@ def advance_next_run(job_id: str) -> bool:
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in {"cron", "interval"}:
if kind not in ("cron", "interval"):
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
@@ -909,7 +909,7 @@ def _get_due_jobs_locked() -> List[Dict[str, Any]]:
# next_run_at unset. Without this branch, such jobs are
# silently skipped forever; recompute next_run_at from the
# schedule so they pick up at their next scheduled tick.
if not recovered_next and kind in {"cron", "interval"}:
if not recovered_next and kind in ("cron", "interval"):
recovered_next = compute_next_run(schedule, now.isoformat())
if recovered_next:
recovery_kind = kind
@@ -940,7 +940,7 @@ def _get_due_jobs_locked() -> List[Dict[str, Any]]:
# (gateway was down and missed the window). Fast-forward to
# the next future occurrence instead of firing a stale run.
grace = _compute_grace_seconds(schedule)
if kind in {"cron", "interval"} and (now - next_run_dt).total_seconds() > grace:
if kind in ("cron", "interval") and (now - next_run_dt).total_seconds() > grace:
# Job is past its catch-up grace window — this is a stale missed run.
# Grace scales with schedule period: daily=2h, hourly=30m, 10min=5m.
new_next = compute_next_run(schedule, now.isoformat())
@@ -1082,8 +1082,9 @@ def rewrite_skill_refs(
new_skills.append(target)
elif name in pruned_set:
dropped.append(name)
elif name not in new_skills:
new_skills.append(name)
else:
if name not in new_skills:
new_skills.append(name)
if not mapped and not dropped:
continue
+1 -3
View File
@@ -111,7 +111,6 @@ _HOME_TARGET_ENV_VARS = {
"weixin": "WEIXIN_HOME_CHANNEL",
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
"qqbot": "QQBOT_HOME_CHANNEL",
"whatsapp": "WHATSAPP_HOME_CHANNEL",
}
# Legacy env var names kept for back-compat. Each entry is the current
@@ -755,7 +754,7 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
# shebang: the scripts dir is trusted, but keeping the interpreter
# choice explicit here keeps the allowed surface small and auditable.
suffix = path.suffix.lower()
if suffix in {".sh", ".bash"}:
if suffix in (".sh", ".bash"):
# Resolve bash dynamically so Windows (Git Bash) and Linux/macOS
# all work. On native Windows without Git for Windows installed
# shutil.which returns None — fall back to a clear error rather
@@ -1440,7 +1439,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
providers_ignored=pr.get("ignore"),
providers_order=pr.get("order"),
provider_sort=pr.get("sort"),
openrouter_min_coding_score=(_cfg.get("openrouter") or {}).get("min_coding_score"),
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
disabled_toolsets=["cronjob", "messaging", "clarify"],
quiet_mode=True,
+1 -1
View File
@@ -264,7 +264,7 @@ def _parse_hint_result(text: str) -> tuple[int | None, str]:
"""Parse the judge's boxed decision and hint text."""
boxed = _BOXED_RE.findall(text)
score = int(boxed[-1]) if boxed else None
if score not in {1, -1}:
if score not in (1, -1):
score = None
hint_matches = _HINT_RE.findall(text)
hint = hint_matches[-1].strip() if hint_matches else ""
@@ -162,7 +162,7 @@ def _normalize_tar_member_parts(member_name: str) -> list:
):
raise ValueError(f"Unsafe archive member path: {member_name}")
parts = [part for part in posix_path.parts if part not in {"", "."}]
parts = [part for part in posix_path.parts if part not in ("", ".")]
if not parts or any(part == ".." for part in parts):
raise ValueError(f"Unsafe archive member path: {member_name}")
return parts
@@ -561,7 +561,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
# --- 5. Verify -- run test suite in the agent's sandbox ---
# Skip verification if the agent produced no meaningful output
only_system_and_user = all(
msg.get("role") in {"system", "user"} for msg in result.messages
msg.get("role") in ("system", "user") for msg in result.messages
)
if result.turns_used == 0 or only_system_and_user:
logger.warning(
@@ -919,7 +919,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
eval_metrics[f"eval/pass_rate_{cat_key}"] = cat_pass_rate
# Store metrics for wandb_log
self.eval_metrics = list(eval_metrics.items())
self.eval_metrics = [(k, v) for k, v in eval_metrics.items()]
# ---- Print summary ----
print(f"\n{'='*60}")
@@ -759,7 +759,7 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
eval_metrics[f"eval/survival_rate_{key}"] = ps / pt if pt else 0
eval_metrics[f"eval/avg_score_{key}"] = pa
self.eval_metrics = list(eval_metrics.items())
self.eval_metrics = [(k, v) for k, v in eval_metrics.items()]
# --- Print summary ---
print(f"\n{'='*60}")
+1 -1
View File
@@ -571,7 +571,7 @@ class HermesAgentBaseEnv(BaseEnv):
# (e.g., API call failed on turn 1). No point spinning up a Modal sandbox
# just to verify files that were never created.
only_system_and_user = all(
msg.get("role") in {"system", "user"} for msg in result.messages
msg.get("role") in ("system", "user") for msg in result.messages
)
if result.turns_used == 0 or only_system_and_user:
logger.warning(
+1 -1
View File
@@ -179,7 +179,7 @@ class ToolContext:
# Ensure parent directory exists in the sandbox
parent = str(_Path(remote_path).parent)
if parent not in {".", "/"}:
if parent not in (".", "/"):
self.terminal(f"mkdir -p {parent}", timeout=10)
# For small files, single command is fine
+1 -1
View File
@@ -2,7 +2,7 @@
Hermes Gateway - Multi-platform messaging integration.
This module provides a unified gateway for connecting the Hermes agent
to various messaging platforms (Telegram, Discord, WhatsApp, Weixin, and more) with:
to various messaging platforms (Telegram, Discord, WhatsApp) with:
- Session management (persistent conversations with reset policies)
- Dynamic context injection (agent knows where messages come from)
- Delivery routing (cron job outputs to appropriate channels)
+35 -67
View File
@@ -2,7 +2,7 @@
Gateway configuration management.
Handles loading and validating configuration for:
- Connected platforms (Telegram, Discord, WhatsApp, Weixin, and more)
- Connected platforms (Telegram, Discord, WhatsApp)
- Home channels for each platform
- Session reset policies
- Delivery preferences
@@ -28,9 +28,9 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
return default
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in {"true", "1", "yes", "on"}:
if lowered in ("true", "1", "yes", "on"):
return True
if lowered in {"false", "0", "no", "off"}:
if lowered in ("false", "0", "no", "off"):
return False
return default
return is_truthy_value(value, default=default)
@@ -317,32 +317,14 @@ class PlatformConfig:
)
# Streaming defaults — single source of truth so both StreamingConfig and
# StreamConsumerConfig agree on the out-of-the-box edit rhythm. Tuned for
# Telegram's ~1 edit/s flood envelope: a touch under 1s lets the cadence
# breathe without bumping into rate limits, and a smaller buffer threshold
# makes short replies feel near-instant in DMs.
DEFAULT_STREAMING_EDIT_INTERVAL: float = 0.8
DEFAULT_STREAMING_BUFFER_THRESHOLD: int = 24
DEFAULT_STREAMING_CURSOR: str = ""
@dataclass
class StreamingConfig:
"""Configuration for real-time token streaming to messaging platforms."""
enabled: bool = False
# Transport selection:
# "auto" — prefer native streaming-draft updates when the platform
# supports them (Telegram sendMessageDraft, Bot API 9.5+);
# fall back to edit-based when not. Recommended.
# "draft" — explicitly request native drafts; falls back to edit when
# the platform/chat doesn't support them.
# "edit" — progressive editMessageText only (legacy behaviour).
# "off" — disable streaming entirely.
transport: str = "auto"
edit_interval: float = DEFAULT_STREAMING_EDIT_INTERVAL
buffer_threshold: int = DEFAULT_STREAMING_BUFFER_THRESHOLD
cursor: str = DEFAULT_STREAMING_CURSOR
transport: str = "edit" # "edit" (progressive editMessageText) or "off"
edit_interval: float = 1.0 # Seconds between message edits (Telegram rate-limits at ~1/s)
buffer_threshold: int = 40 # Chars before forcing an edit
cursor: str = "" # Cursor shown during streaming
# Ported from openclaw/openclaw#72038. When >0, the final edit for
# a long-running streamed response is delivered as a fresh message
# if the original preview has been visible for at least this many
@@ -368,14 +350,10 @@ class StreamingConfig:
return cls()
return cls(
enabled=_coerce_bool(data.get("enabled"), False),
transport=data.get("transport", "auto"),
edit_interval=_coerce_float(
data.get("edit_interval"), DEFAULT_STREAMING_EDIT_INTERVAL,
),
buffer_threshold=_coerce_int(
data.get("buffer_threshold"), DEFAULT_STREAMING_BUFFER_THRESHOLD,
),
cursor=data.get("cursor", DEFAULT_STREAMING_CURSOR),
transport=data.get("transport", "edit"),
edit_interval=_coerce_float(data.get("edit_interval"), 1.0),
buffer_threshold=_coerce_int(data.get("buffer_threshold"), 40),
cursor=data.get("cursor", ""),
fresh_final_after_seconds=_coerce_float(
data.get("fresh_final_after_seconds"), 60.0
),
@@ -610,7 +588,8 @@ class GatewayConfig:
try:
session_store_max_age_days = int(data.get("session_store_max_age_days", 90))
session_store_max_age_days = max(session_store_max_age_days, 0)
if session_store_max_age_days < 0:
session_store_max_age_days = 0
except (TypeError, ValueError):
session_store_max_age_days = 90
@@ -787,19 +766,11 @@ def load_gateway_config() -> GatewayConfig:
bridged["dm_policy"] = platform_cfg["dm_policy"]
if "allow_from" in platform_cfg:
bridged["allow_from"] = platform_cfg["allow_from"]
if "allow_admin_from" in platform_cfg:
bridged["allow_admin_from"] = platform_cfg["allow_admin_from"]
if "user_allowed_commands" in platform_cfg:
bridged["user_allowed_commands"] = platform_cfg["user_allowed_commands"]
if "group_policy" in platform_cfg:
bridged["group_policy"] = platform_cfg["group_policy"]
if "group_allow_from" in platform_cfg:
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
if "group_allow_admin_from" in platform_cfg:
bridged["group_allow_admin_from"] = platform_cfg["group_allow_admin_from"]
if "group_user_allowed_commands" in platform_cfg:
bridged["group_user_allowed_commands"] = platform_cfg["group_user_allowed_commands"]
if plat in {Platform.DISCORD, Platform.SLACK} and "channel_skill_bindings" in platform_cfg:
if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
channel_prompts = platform_cfg["channel_prompts"]
@@ -925,8 +896,6 @@ def load_gateway_config() -> GatewayConfig:
os.environ["TELEGRAM_REQUIRE_MENTION"] = str(_effective_rm).lower()
if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
if "guest_mode" in telegram_cfg and not os.getenv("TELEGRAM_GUEST_MODE"):
os.environ["TELEGRAM_GUEST_MODE"] = str(telegram_cfg["guest_mode"]).lower()
frc = telegram_cfg.get("free_response_chats")
if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
if isinstance(frc, list):
@@ -972,17 +941,16 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(group_allowed_chats, list):
group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
for _telegram_extra_key in ("guest_mode", "disable_link_previews"):
if _telegram_extra_key in telegram_cfg:
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[Platform.TELEGRAM.value] = plat_data
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
extra[_telegram_extra_key] = telegram_cfg[_telegram_extra_key]
if "disable_link_previews" in telegram_cfg:
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[Platform.TELEGRAM.value] = plat_data
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
extra["disable_link_previews"] = telegram_cfg["disable_link_previews"]
whatsapp_cfg = yaml_cfg.get("whatsapp", {})
if isinstance(whatsapp_cfg, dict):
@@ -1179,7 +1147,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
# Reply threading mode for Telegram (off/first/all)
telegram_reply_mode = os.getenv("TELEGRAM_REPLY_TO_MODE", "").lower()
if telegram_reply_mode in {"off", "first", "all"}:
if telegram_reply_mode in ("off", "first", "all"):
if Platform.TELEGRAM not in config.platforms:
config.platforms[Platform.TELEGRAM] = PlatformConfig()
config.platforms[Platform.TELEGRAM].reply_to_mode = telegram_reply_mode
@@ -1220,14 +1188,14 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
# Reply threading mode for Discord (off/first/all)
discord_reply_mode = os.getenv("DISCORD_REPLY_TO_MODE", "").lower()
if discord_reply_mode in {"off", "first", "all"}:
if discord_reply_mode in ("off", "first", "all"):
if Platform.DISCORD not in config.platforms:
config.platforms[Platform.DISCORD] = PlatformConfig()
config.platforms[Platform.DISCORD].reply_to_mode = discord_reply_mode
# WhatsApp (typically uses different auth mechanism)
whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in {"true", "1", "yes"}
whatsapp_disabled_explicitly = os.getenv("WHATSAPP_ENABLED", "").lower() in {"false", "0", "no"}
whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in ("true", "1", "yes")
whatsapp_disabled_explicitly = os.getenv("WHATSAPP_ENABLED", "").lower() in ("false", "0", "no")
if Platform.WHATSAPP in config.platforms:
# YAML config exists — respect explicit disable
wa_cfg = config.platforms[Platform.WHATSAPP]
@@ -1285,7 +1253,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
config.platforms[Platform.SIGNAL].extra.update({
"http_url": signal_url,
"account": signal_account,
"ignore_stories": os.getenv("SIGNAL_IGNORE_STORIES", "true").lower() in {"true", "1", "yes"},
"ignore_stories": os.getenv("SIGNAL_IGNORE_STORIES", "true").lower() in ("true", "1", "yes"),
})
signal_home = os.getenv("SIGNAL_HOME_CHANNEL")
if signal_home and Platform.SIGNAL in config.platforms:
@@ -1334,7 +1302,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
matrix_password = os.getenv("MATRIX_PASSWORD", "")
if matrix_password:
config.platforms[Platform.MATRIX].extra["password"] = matrix_password
matrix_e2ee = os.getenv("MATRIX_ENCRYPTION", "").lower() in {"true", "1", "yes"}
matrix_e2ee = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes")
config.platforms[Platform.MATRIX].extra["encryption"] = matrix_e2ee
matrix_device_id = os.getenv("MATRIX_DEVICE_ID", "")
if matrix_device_id:
@@ -1399,7 +1367,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
)
# API Server
api_server_enabled = os.getenv("API_SERVER_ENABLED", "").lower() in {"true", "1", "yes"}
api_server_enabled = os.getenv("API_SERVER_ENABLED", "").lower() in ("true", "1", "yes")
api_server_key = os.getenv("API_SERVER_KEY", "")
api_server_cors_origins = os.getenv("API_SERVER_CORS_ORIGINS", "")
api_server_port = os.getenv("API_SERVER_PORT")
@@ -1426,7 +1394,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
config.platforms[Platform.API_SERVER].extra["model_name"] = api_server_model_name
# Webhook platform
webhook_enabled = os.getenv("WEBHOOK_ENABLED", "").lower() in {"true", "1", "yes"}
webhook_enabled = os.getenv("WEBHOOK_ENABLED", "").lower() in ("true", "1", "yes")
webhook_port = os.getenv("WEBHOOK_PORT")
webhook_secret = os.getenv("WEBHOOK_SECRET", "")
if webhook_enabled:
@@ -1442,11 +1410,11 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
config.platforms[Platform.WEBHOOK].extra["secret"] = webhook_secret
# Microsoft Graph webhook platform
msgraph_webhook_enabled = os.getenv("MSGRAPH_WEBHOOK_ENABLED", "").lower() in {
msgraph_webhook_enabled = os.getenv("MSGRAPH_WEBHOOK_ENABLED", "").lower() in (
"true",
"1",
"yes",
}
)
msgraph_webhook_port = os.getenv("MSGRAPH_WEBHOOK_PORT")
msgraph_webhook_client_state = os.getenv("MSGRAPH_WEBHOOK_CLIENT_STATE", "")
msgraph_webhook_resources = os.getenv("MSGRAPH_WEBHOOK_ACCEPTED_RESOURCES", "")
@@ -1640,7 +1608,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
"webhook_host": os.getenv("BLUEBUBBLES_WEBHOOK_HOST", "127.0.0.1"),
"webhook_port": int(os.getenv("BLUEBUBBLES_WEBHOOK_PORT", "8645")),
"webhook_path": os.getenv("BLUEBUBBLES_WEBHOOK_PATH", "/bluebubbles-webhook"),
"send_read_receipts": os.getenv("BLUEBUBBLES_SEND_READ_RECEIPTS", "true").lower() in {"true", "1", "yes"},
"send_read_receipts": os.getenv("BLUEBUBBLES_SEND_READ_RECEIPTS", "true").lower() in ("true", "1", "yes"),
})
bluebubbles_home = os.getenv("BLUEBUBBLES_HOME_CHANNEL")
if bluebubbles_home and Platform.BLUEBUBBLES in config.platforms:
+4 -4
View File
@@ -81,7 +81,7 @@ _TIER_MINIMAL = {
_PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
# Tier 1 — full edit support, personal/team use
"telegram": {**_TIER_HIGH, "tool_progress": "new"},
"telegram": _TIER_HIGH,
"discord": _TIER_HIGH,
# Tier 2 — edit support, often customer/workspace channels
@@ -190,13 +190,13 @@ def _normalise(setting: str, value: Any) -> Any:
if value is True:
return "all"
return str(value).lower()
if setting in {"show_reasoning", "streaming"}:
if setting in ("show_reasoning", "streaming"):
if isinstance(value, str):
return value.lower() in {"true", "1", "yes", "on"}
return value.lower() in ("true", "1", "yes", "on")
return bool(value)
if setting == "cleanup_progress":
if isinstance(value, str):
return value.lower() in {"true", "1", "yes", "on"}
return value.lower() in ("true", "1", "yes", "on")
return bool(value)
if setting == "tool_preview_length":
try:
-11
View File
@@ -33,17 +33,6 @@ status display, gateway setup, and more.
auto-populate `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` so the setup
wizard surfaces proper descriptions, prompts, password flags, and URLs.
**Subclassing for platform-specific UX.** When a platform has a hard
time-window constraint that the base adapter can't anticipate (LINE's
60s single-use reply token, WhatsApp's 24h session window, etc.), an
adapter can override `_keep_typing` to layer a mid-flight bubble at a
threshold without expanding the kwarg surface. Always
`await super()._keep_typing(...)` so the typing heartbeat keeps running,
and tear down your side task in `finally`. See `plugins/platforms/line/`
for the full pattern (Template Buttons postback at 45s, `RequestCache`
state machine, `interrupt_session_activity` override for `/stop`
orphans) and the developer-guide page for the prose walkthrough.
See `plugins/platforms/irc/`, `plugins/platforms/teams/`, and
`plugins/platforms/google_chat/` for complete working examples, and
`website/docs/developer-guide/adding-platform-adapters.md` for the full
+2 -26
View File
@@ -9,19 +9,9 @@ Each adapter handles:
"""
from .base import BasePlatformAdapter, MessageEvent, SendResult
from .qqbot import QQAdapter
from .yuanbao import YuanbaoAdapter
# QQAdapter and YuanbaoAdapter were previously imported eagerly here, but
# nothing in the codebase consumes ``from gateway.platforms import
# QQAdapter`` (every real call site uses the long-form path
# ``from gateway.platforms.qqbot import QQAdapter``). The eager imports
# pulled in qqbot's chunked-upload + keyboards + onboard machinery and
# yuanbao's websocket stack — about 48 ms wall and ~8 MB RSS on every
# CLI invocation, even ones that never touch a gateway adapter.
#
# Use PEP 562 module ``__getattr__`` to keep the public re-export working
# while deferring the actual import to first attribute access. This is
# 100% backward-compatible for any external code that still imports the
# adapters from the package root.
__all__ = [
"BasePlatformAdapter",
"MessageEvent",
@@ -29,17 +19,3 @@ __all__ = [
"QQAdapter",
"YuanbaoAdapter",
]
def __getattr__(name):
if name == "QQAdapter":
from .qqbot import QQAdapter # noqa: F401
return QQAdapter
if name == "YuanbaoAdapter":
from .yuanbao import YuanbaoAdapter # noqa: F401
return YuanbaoAdapter
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
def __dir__():
return sorted(__all__)
+13 -65
View File
@@ -449,7 +449,7 @@ if AIOHTTP_AVAILABLE:
@web.middleware
async def body_limit_middleware(request, handler):
"""Reject overly large request bodies early based on Content-Length."""
if request.method in {"POST", "PUT", "PATCH"}:
if request.method in ("POST", "PUT", "PATCH"):
cl = request.headers.get("Content-Length")
if cl is not None:
try:
@@ -646,7 +646,7 @@ class APIServerAdapter(BasePlatformAdapter):
try:
from hermes_cli.profiles import get_active_profile_name
profile = get_active_profile_name()
if profile and profile not in {"default", "custom"}:
if profile and profile not in ("default", "custom"):
return profile
except Exception:
pass
@@ -1003,7 +1003,7 @@ class APIServerAdapter(BasePlatformAdapter):
system_prompt = content
else:
system_prompt = system_prompt + "\n" + content
elif role in {"user", "assistant"}:
elif role in ("user", "assistant"):
try:
content = _normalize_multimodal_content(raw_content)
except ValueError as exc:
@@ -1168,9 +1168,6 @@ class APIServerAdapter(BasePlatformAdapter):
agent_ref=agent_ref,
gateway_session_key=gateway_session_key,
))
# Ensure SSE drain loops can terminate without relying on polling
# agent_task.done(), which can race with queue timeout checks.
agent_task.add_done_callback(lambda _fut: _stream_q.put(None))
return await self._write_sse_chat_completion(
request, completion_id, model_name, created, _stream_q,
@@ -1209,49 +1206,10 @@ class APIServerAdapter(BasePlatformAdapter):
status=500,
)
final_response = result.get("final_response") or ""
is_partial = bool(result.get("partial"))
is_failed = bool(result.get("failed"))
completed = bool(result.get("completed", True))
err_msg = result.get("error")
final_response = result.get("final_response", "")
if not final_response:
final_response = result.get("error", "(No response generated)")
# Decide finish_reason. OpenAI uses "length" for truncation, "stop"
# for normal completion, and downstream SDKs accept "error" / custom
# codes. See issue #22496.
if is_partial and err_msg and "truncat" in err_msg.lower():
finish_reason = "length"
elif is_failed or (not completed and err_msg):
finish_reason = "error"
else:
finish_reason = "stop"
response_headers = {
"X-Hermes-Session-Id": result.get("session_id", session_id),
}
if gateway_session_key:
response_headers["X-Hermes-Session-Key"] = gateway_session_key
# Hard-fail path: no usable assistant text AND a real failure → 5xx
# with OpenAI-style error envelope so SDK clients raise instead of
# silently rendering the internal failure string as message.content.
if not final_response and (is_failed or is_partial):
err_body = _openai_error(
err_msg or "Agent run did not produce a response.",
err_type="server_error",
code="agent_incomplete",
)
err_body["error"]["hermes"] = {
"completed": completed,
"partial": is_partial,
"failed": is_failed,
}
response_headers["X-Hermes-Completed"] = "false"
response_headers["X-Hermes-Partial"] = "true" if is_partial else "false"
return web.json_response(err_body, status=502, headers=response_headers)
# Soft-partial path: we have *some* text but the run did not complete
# (e.g. truncation with partial buffered output). Still 200 but signal
# truncation via finish_reason="length" + Hermes-specific extras.
response_data = {
"id": completion_id,
"object": "chat.completion",
@@ -1264,7 +1222,7 @@ class APIServerAdapter(BasePlatformAdapter):
"role": "assistant",
"content": final_response,
},
"finish_reason": finish_reason,
"finish_reason": "stop",
}
],
"usage": {
@@ -1273,19 +1231,12 @@ class APIServerAdapter(BasePlatformAdapter):
"total_tokens": usage.get("total_tokens", 0),
},
}
if is_partial or is_failed or not completed:
response_data["hermes"] = {
"completed": completed,
"partial": is_partial,
"failed": is_failed,
"error": err_msg,
"error_code": "output_truncated" if finish_reason == "length" else "agent_error",
}
response_headers["X-Hermes-Completed"] = "false"
response_headers["X-Hermes-Partial"] = "true" if is_partial else "false"
if err_msg:
response_headers["X-Hermes-Error"] = err_msg[:200]
response_headers = {
"X-Hermes-Session-Id": result.get("session_id", session_id),
}
if gateway_session_key:
response_headers["X-Hermes-Session-Key"] = gateway_session_key
return web.json_response(response_data, headers=response_headers)
async def _write_sse_chat_completion(
@@ -2200,9 +2151,6 @@ class APIServerAdapter(BasePlatformAdapter):
agent_ref=agent_ref,
gateway_session_key=gateway_session_key,
))
# Ensure SSE drain loops can terminate without relying on polling
# agent_task.done(), which can race with queue timeout checks.
agent_task.add_done_callback(lambda _fut: _stream_q.put(None))
response_id = f"resp_{uuid.uuid4().hex[:28]}"
model_name = body.get("model", self._model_name)
@@ -2387,7 +2335,7 @@ class APIServerAdapter(BasePlatformAdapter):
if cron_err:
return cron_err
try:
include_disabled = request.query.get("include_disabled", "").lower() in {"true", "1"}
include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
jobs = _cron_list(include_disabled=include_disabled)
return web.json_response({"jobs": jobs})
except Exception as e:
+4 -206
View File
@@ -1,7 +1,7 @@
"""
Base platform adapter interface.
All platform adapters (Telegram, Discord, WhatsApp, Weixin, and more) inherit from this
All platform adapters (Telegram, Discord, WhatsApp) inherit from this
and implement the required methods.
"""
@@ -560,7 +560,7 @@ def _looks_like_image(data: bytes) -> bool:
return True
if data[:3] == b"\xff\xd8\xff":
return True
if data[:6] in {b"GIF87a", b"GIF89a"}:
if data[:6] in (b"GIF87a", b"GIF89a"):
return True
if data[:2] == b"BM":
return True
@@ -859,7 +859,7 @@ def cache_document_from_bytes(data: bytes, filename: str) -> str:
# Sanitize: strip directory components, null bytes, and control characters
safe_name = Path(filename).name if filename else "document"
safe_name = safe_name.replace("\x00", "").strip()
if not safe_name or safe_name in {".", ".."}:
if not safe_name or safe_name in (".", ".."):
safe_name = "document"
cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
filepath = cache_dir / cached_name
@@ -1035,13 +1035,6 @@ class SendResult:
error: Optional[str] = None
raw_response: Any = None
retryable: bool = False # True for transient connection errors — base will retry automatically
# When the adapter had to split an oversized payload across multiple
# platform messages (e.g. Telegram edit_message overflow split-and-deliver),
# ``message_id`` is the LAST visible message id (so subsequent edits target
# the most recent chunk) and these are the additional message ids that
# made up the full payload, in send order. Empty tuple for the common
# single-message case.
continuation_message_ids: tuple = ()
class EphemeralReply(str):
@@ -1318,61 +1311,6 @@ class BasePlatformAdapter(ABC):
# _keep_typing skips send_typing when the chat_id is in this set.
self._typing_paused: set = set()
@property
def message_len_fn(self) -> Callable[[str], int]:
"""Return the length function for measuring message size on this platform.
Override in adapters whose platform counts characters differently from
Python ``len`` (e.g. Telegram counts UTF-16 code units).
"""
return len
def supports_draft_streaming(
self,
chat_type: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> bool:
"""Whether this adapter supports native streaming-draft updates.
Telegram Bot API 9.5 introduced ``sendMessageDraft``, which renders an
animated streaming preview as the bot calls it repeatedly with the
same ``draft_id`` and growing text. Adapters that implement
``send_draft`` should return True here for the chat types where the
platform supports it (Telegram restricts drafts to private DMs).
Default implementation returns False. Stream consumers fall back to
the edit-based path (``send`` + ``edit_message``) when this returns
False or when ``send_draft`` raises.
"""
return False
async def send_draft(
self,
chat_id: str,
draft_id: int,
content: str,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send or update an animated streaming-draft preview.
Reuse the same ``draft_id`` (any non-zero int) across consecutive
calls within a single response so the platform animates the preview
rather than re-creating it. Different responses must use different
``draft_id`` values within the same chat to avoid animating over a
prior bubble.
Drafts have no message_id and cannot be edited, replied to, or
deleted via normal message APIs. When the response finishes, the
caller delivers the final answer as a regular ``send`` and the
draft preview clears naturally on the client.
Default implementation raises NotImplementedError; adapters that
also return True from :meth:`supports_draft_streaming` must override.
"""
raise NotImplementedError(
f"{type(self).__name__} does not implement send_draft"
)
@property
def has_fatal_error(self) -> bool:
return self._fatal_error_message is not None
@@ -1573,33 +1511,6 @@ class BasePlatformAdapter(ABC):
# property) so the stream consumer knows not to short-circuit.
REQUIRES_EDIT_FINALIZE: bool = False
async def create_handoff_thread(
self,
parent_chat_id: str,
name: str,
) -> Optional[str]:
"""Create a fresh thread under ``parent_chat_id`` for a session handoff.
Used by the gateway's handoff watcher when transferring a CLI
session to a thread-capable platform the new thread isolates the
handed-off conversation from any pre-existing chat in the home
channel and gives users a clean per-handoff scrollback.
Returns the new thread/topic id (as a string) on success, or
``None`` if the platform doesn't support threading or the
attempt failed (permissions, topics-mode off, etc.). When ``None``
is returned the watcher falls back to using ``parent_chat_id``
directly.
Default implementation returns ``None`` adapters that support
threads override this. See:
- Telegram: forum topics in groups, DM topics with bot API 9.4+
- Discord: text-channel threads (1440-min auto-archive)
- Slack: seed-message thread anchoring
"""
return None
async def edit_message(
self,
chat_id: str,
@@ -1743,55 +1654,6 @@ class BasePlatformAdapter(ABC):
"""
return SendResult(success=False, error="Not supported")
async def send_clarify(
self,
chat_id: str,
question: str,
choices: Optional[list],
clarify_id: str,
session_key: str,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send a clarify prompt to the user.
Two render modes:
* **Multiple choice** (``choices`` is a non-empty list) adapters
that override this should render inline buttons (one per choice
plus a final "Other" / free-text option). Button callbacks
MUST resolve via
``tools.clarify_gateway.resolve_gateway_clarify(clarify_id, response)``
with the chosen string. Picking the "Other" button calls
``mark_awaiting_text(clarify_id)`` so the next message in the
session is captured as the response.
* **Open-ended** (``choices`` is None or empty) render the
question as a plain text message; the next user message in the
session is captured by the gateway's text-intercept and
resolves the clarify automatically (see
``GatewayRunner._maybe_intercept_clarify_text``).
The default implementation falls back to a numbered text list,
which works on every platform the user replies with a number
("2") or with the literal choice text, and the gateway intercepts
and resolves. Adapters with native button UIs (Telegram, Discord)
SHOULD override this for a richer UX.
"""
if choices:
lines = [f"{question}", ""]
for i, choice in enumerate(choices, start=1):
lines.append(f" {i}. {choice}")
lines.append("")
lines.append("Reply with the number, the option text, or your own answer.")
text = "\n".join(lines)
else:
text = f"{question}"
return await self.send(
chat_id=chat_id,
content=text,
metadata=metadata,
)
async def send_private_notice(
self,
chat_id: str,
@@ -2842,7 +2704,7 @@ class BasePlatformAdapter(ABC):
# and preserve ordering of queued follow-ups. Route those
# through the dedicated handoff path that serializes
# cancellation + runner response + pending drain.
if cmd in {"stop", "new", "reset"}:
if cmd in ("stop", "new", "reset"):
try:
await self._dispatch_active_session_command(event, session_key, cmd)
except Exception as e:
@@ -2880,58 +2742,6 @@ class BasePlatformAdapter(ABC):
logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
return
# Clarify text-capture bypass: if the agent is blocked on a
# clarify_tool call awaiting a free-form text response (open-
# ended clarify, or user picked "Other"), the next non-command
# message in this session MUST reach the runner so the
# clarify-intercept can resolve it and unblock the agent.
#
# Without this bypass: the message gets queued in
# _pending_messages AND triggers an interrupt, killing the
# agent run mid-clarify and discarding the user's answer.
# Same shape as the /approve deadlock fix (PR #4926) — both
# cases are "agent thread blocked on Event.wait, message must
# reach the resolver before being treated as a new turn."
if not cmd:
try:
from tools import clarify_gateway as _clarify_mod
_has_text_clarify = (
_clarify_mod.get_pending_for_session(session_key) is not None
)
except Exception:
_has_text_clarify = False
if _has_text_clarify:
logger.debug(
"[%s] Routing message to clarify text-intercept for %s",
self.name, session_key,
)
try:
_thread_meta = _thread_metadata_for_source(
event.source, _reply_anchor_for_event(event)
)
response = await self._message_handler(event)
_text, _eph_ttl = self._unwrap_ephemeral(response)
if _text:
_r = await self._send_with_retry(
chat_id=event.source.chat_id,
content=_text,
reply_to=_reply_anchor_for_event(event),
metadata=_thread_meta,
)
if _eph_ttl > 0 and _r.success and _r.message_id:
self._schedule_ephemeral_delete(
chat_id=event.source.chat_id,
message_id=_r.message_id,
ttl_seconds=_eph_ttl,
)
except Exception as e:
logger.error(
"[%s] Clarify text-intercept dispatch failed: %s",
self.name, e, exc_info=True,
)
return
if self._busy_session_handler is not None:
try:
if await self._busy_session_handler(event, session_key):
@@ -3140,18 +2950,6 @@ class BasePlatformAdapter(ABC):
if text_content:
logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
_reply_anchor = _reply_anchor_for_event(event)
# Mark final response messages for notification delivery.
# Platform adapters that support per-message notification
# control (e.g. Telegram's disable_notification) use this
# flag to override silent-mode and ensure the final
# response triggers a push notification.
# Clone to avoid mutating the metadata shared with the
# typing-indicator task (which must remain unmarked).
if _thread_metadata is not None:
_thread_metadata = dict(_thread_metadata)
_thread_metadata["notify"] = True
else:
_thread_metadata = {"notify": True}
result = await self._send_with_retry(
chat_id=event.source.chat_id,
content=text_content,
+1 -1
View File
@@ -223,7 +223,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
def _webhook_url(self) -> str:
"""Compute the external webhook URL for BlueBubbles registration."""
host = self.webhook_host
if host in {"0.0.0.0", "127.0.0.1", "localhost", "::"}:
if host in ("0.0.0.0", "127.0.0.1", "localhost", "::"):
host = "localhost"
return f"http://{host}:{self.webhook_port}{self.webhook_path}"
+2 -63
View File
@@ -353,9 +353,9 @@ class DingTalkAdapter(BasePlatformAdapter):
configured = self.config.extra.get("require_mention")
if configured is not None:
if isinstance(configured, str):
return configured.lower() in {"true", "1", "yes", "on"}
return configured.lower() in ("true", "1", "yes", "on")
return bool(configured)
return os.getenv("DINGTALK_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}
return os.getenv("DINGTALK_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
def _dingtalk_free_response_chats(self) -> Set[str]:
raw = self.config.extra.get("free_response_chats")
@@ -886,67 +886,6 @@ class DingTalkAdapter(BasePlatformAdapter):
"""DingTalk does not support typing indicators."""
pass
async def send_image(
self,
chat_id: str,
image_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send an image via DingTalk markdown.
DingTalk's session webhook only supports text/markdown payloads, not
native image/file attachments. For remote image URLs, render the image
inline with markdown so the user still sees the image. Local files need
OpenAPI media upload and are handled separately.
"""
image_block = f"![image]({image_url})"
content = f"{caption}\n\n{image_block}" if caption else image_block
return await self.send(
chat_id=chat_id,
content=content,
reply_to=reply_to,
metadata=metadata,
)
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
"""DingTalk webhook replies cannot send local image files directly."""
return SendResult(
success=False,
error=(
"DingTalk session webhook replies do not support local image uploads. "
"Only markdown/text replies are supported without OpenAPI media upload."
),
)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
"""DingTalk webhook replies cannot send local file attachments directly."""
return SendResult(
success=False,
error=(
"DingTalk session webhook replies do not support local file attachments. "
"Only markdown/text replies are supported without OpenAPI message send."
),
)
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
"""Return basic info about a DingTalk conversation."""
return {
+14 -118
View File
@@ -86,32 +86,8 @@ def _clean_discord_id(entry: str) -> str:
def check_discord_requirements() -> bool:
"""Check if Discord dependencies are available.
Lazy-installs discord.py via ``tools.lazy_deps.ensure("platform.discord")``
on first call if not present. After successful install, re-binds module
globals so ``DISCORD_AVAILABLE`` becomes True.
"""
global DISCORD_AVAILABLE, discord, DiscordMessage, Intents, commands
if DISCORD_AVAILABLE:
return True
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("platform.discord", prompt=False)
except Exception:
return False
try:
import discord as _discord
from discord import Message as _DM, Intents as _Intents
from discord.ext import commands as _commands
except ImportError:
return False
discord = _discord
DiscordMessage = _DM
Intents = _Intents
commands = _commands
DISCORD_AVAILABLE = True
return True
"""Check if Discord dependencies are available."""
return DISCORD_AVAILABLE
def _build_allowed_mentions():
@@ -139,7 +115,7 @@ def _build_allowed_mentions():
raw = os.getenv(name, "").strip().lower()
if not raw:
return default
return raw in {"true", "1", "yes", "on"}
return raw in ("true", "1", "yes", "on")
return discord.AllowedMentions(
everyone=_b("DISCORD_ALLOW_MENTION_EVERYONE", False),
@@ -732,7 +708,7 @@ class DiscordAdapter(BasePlatformAdapter):
# Ignore Discord system messages (thread renames, pins, member joins, etc.)
# Allow both default and reply types — replies have a distinct MessageType.
if message.type not in {discord.MessageType.default, discord.MessageType.reply}:
if message.type not in (discord.MessageType.default, discord.MessageType.reply):
return
# Bot message filtering (DISCORD_ALLOW_BOTS):
@@ -793,7 +769,7 @@ class DiscordAdapter(BasePlatformAdapter):
# answer regardless of who is mentioned.
_ignore_no_mention = os.getenv(
"DISCORD_IGNORE_NO_MENTION", "true"
).lower() in {"true", "1", "yes"}
).lower() in ("true", "1", "yes")
if _ignore_no_mention and not _self_mentioned and not _other_bots_mentioned:
_channel_id = str(message.channel.id)
_parent_id = None
@@ -1341,7 +1317,7 @@ class DiscordAdapter(BasePlatformAdapter):
def _reactions_enabled(self) -> bool:
"""Check if message reactions are enabled via config/env."""
return os.getenv("DISCORD_REACTIONS", "true").lower() not in {"false", "0", "no"}
return os.getenv("DISCORD_REACTIONS", "true").lower() not in ("false", "0", "no")
async def on_processing_start(self, event: MessageEvent) -> None:
"""Add an in-progress reaction for normal Discord message events."""
@@ -2721,8 +2697,6 @@ class DiscordAdapter(BasePlatformAdapter):
await asyncio.sleep(8)
except asyncio.CancelledError:
pass
finally:
self._typing_tasks.pop(chat_id, None)
self._typing_tasks[chat_id] = asyncio.create_task(_typing_loop())
@@ -3161,9 +3135,9 @@ class DiscordAdapter(BasePlatformAdapter):
# UX so users don't see commands they can't invoke. Off by default
# to preserve the slash UX for deployments that intentionally allow
# everyone in the guild.
if os.getenv("DISCORD_HIDE_SLASH_COMMANDS", "false").strip().lower() in {
if os.getenv("DISCORD_HIDE_SLASH_COMMANDS", "false").strip().lower() in (
"true", "1", "yes", "on",
}:
):
self._apply_owner_only_visibility(tree)
def _apply_owner_only_visibility(self, tree) -> None:
@@ -3550,9 +3524,9 @@ class DiscordAdapter(BasePlatformAdapter):
configured = self.config.extra.get("require_mention")
if configured is not None:
if isinstance(configured, str):
return configured.lower() not in {"false", "0", "no", "off"}
return configured.lower() not in ("false", "0", "no", "off")
return bool(configured)
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in {"false", "0", "no", "off"}
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
def _discord_free_response_channels(self) -> set:
"""Return Discord channel IDs where no bot mention is required.
@@ -3715,84 +3689,6 @@ class DiscordAdapter(BasePlatformAdapter):
)
return None
async def create_handoff_thread(
self,
parent_chat_id: str,
name: str,
) -> Optional[str]:
"""Create a Discord thread under a text channel for a handoff.
Falls back to a seed-message + ``message.create_thread`` path if
``parent.create_thread`` is rejected (some channel types or
permission setups). Returns the new thread id as a string, or
``None`` on failure or when the parent isn't a text channel
(DMs, voice channels, threads themselves can't host threads).
"""
if not self._client or not DISCORD_AVAILABLE:
return None
try:
parent_id = int(parent_chat_id)
except (TypeError, ValueError):
return None
try:
parent = self._client.get_channel(parent_id)
if parent is None:
parent = await self._client.fetch_channel(parent_id)
except Exception as exc:
logger.warning(
"[%s] Handoff thread: cannot resolve parent %s: %s",
self.name, parent_chat_id, exc,
)
return None
# DMs, voice channels, and existing threads can't host child threads.
if isinstance(parent, getattr(discord, "DMChannel", ())):
logger.info(
"[%s] Handoff thread: parent %s is a DM; threads not supported here",
self.name, parent_chat_id,
)
return None
thread_name = (name or "handoff").strip()[:80] or "handoff"
reason = "Hermes session handoff"
# First try: create a thread directly on the channel.
try:
create = getattr(parent, "create_thread", None)
if create is not None:
thread = await create(
name=thread_name,
auto_archive_duration=1440,
reason=reason,
)
return str(thread.id)
except Exception as direct_error:
logger.debug(
"[%s] Handoff thread: direct create failed (%s); trying seed-message fallback",
self.name, direct_error,
)
# Fallback: post a seed message and create the thread from it.
try:
send = getattr(parent, "send", None)
if send is None:
return None
seed_msg = await send(f"\U0001f9f5 Hermes handoff: **{thread_name}**")
thread = await seed_msg.create_thread(
name=thread_name,
auto_archive_duration=1440,
reason=reason,
)
return str(thread.id)
except Exception as fallback_error:
logger.warning(
"[%s] Handoff thread: both create paths failed for parent %s: %s",
self.name, parent_chat_id, fallback_error,
)
return None
async def send_exec_approval(
self, chat_id: str, command: str, session_key: str,
description: str = "dangerous command",
@@ -4224,7 +4120,7 @@ class DiscordAdapter(BasePlatformAdapter):
no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "")
no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
skip_thread = bool(channel_ids & no_thread_channels)
auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in {"true", "1", "yes"}
auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
is_reply_message = getattr(message, "type", None) == discord.MessageType.reply
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
thread = await self._auto_create_thread(message)
@@ -4306,7 +4202,7 @@ class DiscordAdapter(BasePlatformAdapter):
try:
# Determine extension from content type (image/png -> .png)
ext = "." + content_type.split("/")[-1].split(";")[0]
if ext not in {".jpg", ".jpeg", ".png", ".gif", ".webp"}:
if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
ext = ".jpg"
cached_path = await self._cache_discord_image(att, ext)
media_urls.append(cached_path)
@@ -4320,7 +4216,7 @@ class DiscordAdapter(BasePlatformAdapter):
elif content_type.startswith("audio/"):
try:
ext = "." + content_type.split("/")[-1].split(";")[0]
if ext not in {".ogg", ".mp3", ".wav", ".webm", ".m4a"}:
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
ext = ".ogg"
cached_path = await self._cache_discord_audio(att, ext)
media_urls.append(cached_path)
@@ -4363,7 +4259,7 @@ class DiscordAdapter(BasePlatformAdapter):
logger.info("[Discord] Cached user document: %s", cached_path)
# Inject text content for plain-text documents (capped at 100 KB)
MAX_TEXT_INJECT_BYTES = 100 * 1024
if ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
if ext in (".md", ".txt", ".log") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = att.filename or f"document{ext}"
+2 -27
View File
@@ -54,7 +54,7 @@ _NOREPLY_PATTERNS = (
# RFC headers that indicate bulk/automated mail
_AUTOMATED_HEADERS = {
"Auto-Submitted": lambda v: v.lower() != "no",
"Precedence": lambda v: v.lower() in {"bulk", "list", "junk"},
"Precedence": lambda v: v.lower() in ("bulk", "list", "junk"),
"X-Auto-Response-Suppress": lambda v: bool(v),
"List-Unsubscribe": lambda v: bool(v),
}
@@ -65,29 +65,6 @@ MAX_MESSAGE_LENGTH = 50_000
# Supported image extensions for inline detection
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
def _send_imap_id(imap: "imaplib.IMAP4") -> None:
"""Send RFC 2971 IMAP ID command identifying this client.
Required by 163/NetEase mailbox after LOGIN: without it, every UID
SEARCH/FETCH returns ``BYE Unsafe Login`` and disconnects. Other
IMAP servers either honor it silently or reject the unknown command;
we swallow failures so non-supporting servers keep working.
"""
try:
try:
from hermes_cli import __version__ as _hermes_version
except Exception: # noqa: BLE001 — keep ID best-effort if import fails
_hermes_version = "0"
imap.xatom(
"ID",
f'("name" "hermes-agent" "version" "{_hermes_version}" '
'"vendor" "NousResearch" '
'"support-email" "noreply@nousresearch.com")',
)
except Exception as e: # noqa: BLE001 — best-effort, never fatal
logger.debug("[Email] IMAP ID command not accepted: %s", e)
def _is_automated_sender(address: str, headers: dict) -> bool:
"""Return True if this email is from an automated/noreply source."""
addr = address.lower()
@@ -203,7 +180,7 @@ def _extract_attachments(
continue
# Skip text/plain and text/html body parts
content_type = part.get_content_type()
if content_type in {"text/plain", "text/html"} and "attachment" not in disposition:
if content_type in ("text/plain", "text/html") and "attachment" not in disposition:
continue
filename = part.get_filename()
@@ -299,7 +276,6 @@ class EmailAdapter(BasePlatformAdapter):
# Test IMAP connection
imap = imaplib.IMAP4_SSL(self._imap_host, self._imap_port, timeout=30)
imap.login(self._address, self._password)
_send_imap_id(imap)
# Mark all existing messages as seen so we only process new ones
imap.select("INBOX")
status, data = imap.uid("search", None, "ALL")
@@ -368,7 +344,6 @@ class EmailAdapter(BasePlatformAdapter):
imap = imaplib.IMAP4_SSL(self._imap_host, self._imap_port, timeout=30)
try:
imap.login(self._address, self._password)
_send_imap_id(imap)
imap.select("INBOX")
status, data = imap.uid("search", None, "UNSEEN")
+21 -31
View File
@@ -428,7 +428,7 @@ RejectReason = Literal[
def _is_bot_sender(sender: Any) -> bool:
# receive_v1 docs say {user, bot}; accept "app" defensively.
return getattr(sender, "sender_type", "") in {"bot", "app"}
return getattr(sender, "sender_type", "") in ("bot", "app")
def _sender_identity(sender: Any) -> frozenset:
@@ -1428,8 +1428,8 @@ class FeishuAdapter(BasePlatformAdapter):
per_chat_require_mention = _to_boolean(rule_cfg.get("require_mention"))
group_rules[str(chat_id)] = FeishuGroupRule(
policy=str(rule_cfg.get("policy", "open")).strip().lower(),
allowlist={str(u).strip() for u in rule_cfg.get("allowlist", []) if str(u).strip()},
blacklist={str(u).strip() for u in rule_cfg.get("blacklist", []) if str(u).strip()},
allowlist=set(str(u).strip() for u in rule_cfg.get("allowlist", []) if str(u).strip()),
blacklist=set(str(u).strip() for u in rule_cfg.get("blacklist", []) if str(u).strip()),
require_mention=per_chat_require_mention,
)
@@ -1443,7 +1443,7 @@ class FeishuAdapter(BasePlatformAdapter):
# Env-only so adapter and gateway auth bypass share one source; yaml
# feishu.allow_bots is bridged to this env var at config load.
allow_bots = os.getenv("FEISHU_ALLOW_BOTS", "none").strip().lower()
if allow_bots not in {"none", "mentions", "all"}:
if allow_bots not in ("none", "mentions", "all"):
logger.warning(
"[Feishu] Unknown allow_bots=%r, falling back to 'none'. Valid: none, mentions, all.",
allow_bots,
@@ -2752,7 +2752,7 @@ class FeishuAdapter(BasePlatformAdapter):
# =========================================================================
def _reactions_enabled(self) -> bool:
return os.getenv("FEISHU_REACTIONS", "true").strip().lower() not in {"false", "0", "no"}
return os.getenv("FEISHU_REACTIONS", "true").strip().lower() not in ("false", "0", "no")
async def _add_reaction(self, message_id: str, emoji_type: str) -> Optional[str]:
"""Return the reaction_id on success, else None. The id is needed later for deletion."""
@@ -3219,7 +3219,7 @@ class FeishuAdapter(BasePlatformAdapter):
self._on_bot_added_to_chat(data)
elif event_type == "im.chat.member.bot.deleted_v1":
self._on_bot_removed_from_chat(data)
elif event_type in {"im.message.reaction.created_v1", "im.message.reaction.deleted_v1"}:
elif event_type in ("im.message.reaction.created_v1", "im.message.reaction.deleted_v1"):
self._on_reaction_event(event_type, data)
elif event_type == "card.action.trigger":
self._on_card_action_trigger(data)
@@ -4273,31 +4273,21 @@ class FeishuAdapter(BasePlatformAdapter):
request = self._build_reply_message_request(effective_reply_to, body)
return await asyncio.to_thread(self._client.im.v1.message.reply, request)
# For topic/thread messages that fell back from reply→create, use
# thread_id as receive_id so the message lands in the topic instead of
# the main chat.
_thread_id = (metadata or {}).get("thread_id")
if _thread_id:
body = self._build_create_message_body(
receive_id=_thread_id,
msg_type=msg_type,
content=payload,
uuid_value=str(uuid.uuid4()),
)
request = self._build_create_message_request("thread_id", body)
body = self._build_create_message_body(
receive_id=chat_id,
msg_type=msg_type,
content=payload,
uuid_value=str(uuid.uuid4()),
)
# Detect whether chat_id is a user open_id (DM) or a chat_id (group).
# Feishu API expects receive_id_type="open_id" for user DMs (ou_ prefix)
# and receive_id_type="chat_id" for group chats (oc_ prefix, which IS
# the chat_id format — see https://open.feishu.cn/document/).
if chat_id.startswith("ou_"):
receive_id_type = "open_id"
else:
body = self._build_create_message_body(
receive_id=chat_id,
msg_type=msg_type,
content=payload,
uuid_value=str(uuid.uuid4()),
)
# Detect whether chat_id is a user open_id (DM) or a chat_id (group).
if chat_id.startswith("ou_"):
receive_id_type = "open_id"
else:
receive_id_type = "chat_id"
request = self._build_create_message_request(receive_id_type, body)
receive_id_type = "chat_id"
request = self._build_create_message_request(receive_id_type, body)
return await asyncio.to_thread(self._client.im.v1.message.create, request)
@staticmethod
@@ -4815,7 +4805,7 @@ def _poll_registration(
# Terminal errors
error = res.get("error", "")
if error in {"access_denied", "expired_token"}:
if error in ("access_denied", "expired_token"):
if poll_count > 0:
print()
logger.warning("[Feishu onboard] Registration %s", error)
+3 -3
View File
@@ -690,7 +690,7 @@ def _extract_docs_links(replies: List[Dict[str, Any]]) -> List[Dict[str, str]]:
except (json.JSONDecodeError, TypeError):
continue
for elem in content.get("elements", []):
if elem.get("type") not in {"docs_link", "link"}:
if elem.get("type") not in ("docs_link", "link"):
continue
link_data = elem.get("docs_link") or elem.get("link") or {}
url = link_data.get("url", "")
@@ -1031,7 +1031,7 @@ def _save_session_history(key: str, messages: List[Dict[str, Any]]) -> None:
# Only keep user/assistant messages (strip system messages and tool internals)
cleaned = [
m for m in messages
if m.get("role") in {"user", "assistant"} and m.get("content")
if m.get("role") in ("user", "assistant") and m.get("content")
]
# Keep last N
if len(cleaned) > _SESSION_MAX_MESSAGES:
@@ -1170,7 +1170,7 @@ async def handle_drive_comment_event(
rule = resolve_rule(comments_cfg, file_type, file_token)
# If no exact match and config has wiki keys, try reverse-lookup
if rule.match_source in {"wildcard", "top"} and has_wiki_keys(comments_cfg):
if rule.match_source in ("wildcard", "top") and has_wiki_keys(comments_cfg):
wiki_token = await _reverse_lookup_wiki_token(client, file_type, file_token)
if wiki_token:
rule = resolve_rule(comments_cfg, file_type, file_token, wiki_token=wiki_token)
+1 -1
View File
@@ -228,7 +228,7 @@ def _load_pairing_approved() -> set:
if isinstance(approved, dict):
return set(approved.keys())
if isinstance(approved, list):
return {str(u) for u in approved if u}
return set(str(u) for u in approved if u)
return set()
+1 -1
View File
@@ -246,7 +246,7 @@ class ThreadParticipationTracker:
thread_list = list(self._threads)
if len(thread_list) > self._max_tracked:
thread_list = thread_list[-self._max_tracked:]
self._threads = dict.fromkeys(thread_list)
self._threads = {thread_id: None for thread_id in thread_list}
atomic_json_write(path, thread_list, indent=None)
def mark(self, thread_id: str) -> None:
+2 -2
View File
@@ -256,7 +256,7 @@ class HomeAssistantAdapter(BasePlatformAdapter):
await self._handle_ha_event(data.get("event", {}))
except json.JSONDecodeError:
logger.debug("Invalid JSON from HA WS: %s", ws_msg.data[:200])
elif ws_msg.type in {aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR}:
elif ws_msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR):
break
async def _handle_ha_event(self, event: Dict[str, Any]) -> None:
@@ -361,7 +361,7 @@ class HomeAssistantAdapter(BasePlatformAdapter):
f"(was {'triggered' if old_val == 'on' else 'cleared'})"
)
if domain in {"light", "switch", "fan"}:
if domain in ("light", "switch", "fan"):
return (
f"[Home Assistant] {friendly_name}: turned "
f"{'on' if new_val == 'on' else 'off'}"
+13 -13
View File
@@ -245,11 +245,11 @@ def check_matrix_requirements() -> bool:
# If encryption is requested, verify E2EE deps are available at startup
# rather than silently degrading to plaintext-only at connect time.
encryption_requested = os.getenv("MATRIX_ENCRYPTION", "").lower() in {
encryption_requested = os.getenv("MATRIX_ENCRYPTION", "").lower() in (
"true",
"1",
"yes",
}
)
if encryption_requested and not _check_e2ee_deps():
logger.error(
"Matrix: MATRIX_ENCRYPTION=true but E2EE dependencies are missing. %s. "
@@ -312,7 +312,7 @@ class MatrixAdapter(BasePlatformAdapter):
)
self._encryption: bool = config.extra.get(
"encryption",
os.getenv("MATRIX_ENCRYPTION", "").lower() in {"true", "1", "yes"},
os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes"),
)
self._device_id: str = config.extra.get("device_id", "") or os.getenv(
"MATRIX_DEVICE_ID", ""
@@ -343,7 +343,7 @@ class MatrixAdapter(BasePlatformAdapter):
# Mention/thread gating — parsed once from env vars.
self._require_mention: bool = os.getenv(
"MATRIX_REQUIRE_MENTION", "true"
).lower() not in {"false", "0", "no"}
).lower() not in ("false", "0", "no")
free_rooms_raw = config.extra.get("free_response_rooms")
if free_rooms_raw is None:
free_rooms_raw = os.getenv("MATRIX_FREE_RESPONSE_ROOMS", "")
@@ -367,22 +367,22 @@ class MatrixAdapter(BasePlatformAdapter):
self._allowed_rooms: Set[str] = {
r.strip() for r in str(allowed_rooms_raw).split(",") if r.strip()
}
self._auto_thread: bool = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in {
self._auto_thread: bool = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in (
"true",
"1",
"yes",
}
)
self._dm_auto_thread: bool = os.getenv(
"MATRIX_DM_AUTO_THREAD", "false"
).lower() in {"true", "1", "yes"}
).lower() in ("true", "1", "yes")
self._dm_mention_threads: bool = os.getenv(
"MATRIX_DM_MENTION_THREADS", "false"
).lower() in {"true", "1", "yes"}
).lower() in ("true", "1", "yes")
# Reactions: configurable via MATRIX_REACTIONS (default: true).
self._reactions_enabled: bool = os.getenv(
"MATRIX_REACTIONS", "true"
).lower() not in {"false", "0", "no"}
).lower() not in ("false", "0", "no")
self._pending_reactions: dict[tuple[str, str], str] = {}
# Delay before redacting reactions so Matrix homeservers have time to
# deliver the final message event without tripping "missing event"
@@ -1771,9 +1771,9 @@ class MatrixAdapter(BasePlatformAdapter):
# Cache media locally when downstream tools need a real file path.
cached_path = None
should_cache_locally = msg_type in {
should_cache_locally = msg_type in (
MessageType.PHOTO, MessageType.AUDIO, MessageType.VIDEO, MessageType.DOCUMENT,
} or is_voice_message or is_encrypted_media
) or is_voice_message or is_encrypted_media
if should_cache_locally and url:
try:
file_bytes = await self._client.download_media(ContentURI(url))
@@ -1834,7 +1834,7 @@ class MatrixAdapter(BasePlatformAdapter):
ext = ext_map.get(media_type, ".jpg")
cached_path = cache_image_from_bytes(file_bytes, ext=ext)
logger.info("[Matrix] Cached user image at %s", cached_path)
elif msg_type in {MessageType.AUDIO, MessageType.VOICE}:
elif msg_type in (MessageType.AUDIO, MessageType.VOICE):
ext = (
Path(
body
@@ -2602,7 +2602,7 @@ class MatrixAdapter(BasePlatformAdapter):
"""Sanitize a URL for use in an href attribute."""
stripped = url.strip()
scheme = stripped.split(":", 1)[0].lower().strip() if ":" in stripped else ""
if scheme in {"javascript", "data", "vbscript"}:
if scheme in ("javascript", "data", "vbscript"):
return ""
return stripped.replace('"', "&quot;")
+6 -6
View File
@@ -611,7 +611,7 @@ class MattermostAdapter(BasePlatformAdapter):
# succeed on retry — stop reconnecting instead of looping forever.
import aiohttp
err_str = str(exc).lower()
if isinstance(exc, aiohttp.WSServerHandshakeError) and exc.status in {401, 403}:
if isinstance(exc, aiohttp.WSServerHandshakeError) and exc.status in (401, 403):
logger.error("Mattermost WS auth failed (HTTP %d) — stopping reconnect", exc.status)
return
if "401" in err_str or "403" in err_str or "unauthorized" in err_str:
@@ -649,21 +649,21 @@ class MattermostAdapter(BasePlatformAdapter):
if self._closing:
return
if raw_msg.type in {
if raw_msg.type in (
raw_msg.type.TEXT,
raw_msg.type.BINARY,
}:
):
try:
event = json.loads(raw_msg.data)
except (json.JSONDecodeError, TypeError):
continue
await self._handle_ws_event(event)
elif raw_msg.type in {
elif raw_msg.type in (
raw_msg.type.ERROR,
raw_msg.type.CLOSE,
raw_msg.type.CLOSING,
raw_msg.type.CLOSED,
}:
):
logger.info("Mattermost: WebSocket closed (%s)", raw_msg.type)
break
@@ -732,7 +732,7 @@ class MattermostAdapter(BasePlatformAdapter):
require_mention = os.getenv(
"MATTERMOST_REQUIRE_MENTION", "true"
).lower() not in {"false", "0", "no"}
).lower() not in ("false", "0", "no")
free_channels_raw = os.getenv("MATTERMOST_FREE_RESPONSE_CHANNELS", "")
free_channels = {ch.strip() for ch in free_channels_raw.split(",") if ch.strip()}
+16 -16
View File
@@ -513,7 +513,7 @@ class QQAdapter(BasePlatformAdapter):
self._fail_pending("Connection closed")
# Stop reconnecting for fatal codes
if code in {4914, 4915}:
if code in (4914, 4915):
desc = "offline/sandbox-only" if code == 4914 else "banned"
logger.error(
"[%s] Bot is %s. Check QQ Open Platform.", self._log_tag, desc
@@ -550,7 +550,7 @@ class QQAdapter(BasePlatformAdapter):
self._token_expires_at = 0.0
# Session invalid → clear session, will re-identify on next Hello
if code in {
if code in (
4006,
4007,
4009,
@@ -568,7 +568,7 @@ class QQAdapter(BasePlatformAdapter):
4911,
4912,
4913,
}:
):
logger.info(
"[%s] Session error (%d), clearing session for re-identify",
self._log_tag,
@@ -637,12 +637,12 @@ class QQAdapter(BasePlatformAdapter):
payload = self._parse_json(msg.data)
if payload:
self._dispatch_payload(payload)
elif msg.type in {aiohttp.WSMsgType.PING,}:
elif msg.type in (aiohttp.WSMsgType.PING,):
# aiohttp auto-replies with PONG
pass
elif msg.type == aiohttp.WSMsgType.CLOSE:
raise QQCloseError(msg.data, msg.extra)
elif msg.type in {aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR}:
elif msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR):
raise RuntimeError("WebSocket closed")
async def _heartbeat_loop(self) -> None:
@@ -783,13 +783,13 @@ class QQAdapter(BasePlatformAdapter):
self._handle_ready(d)
elif t == "RESUMED":
logger.info("[%s] Session resumed", self._log_tag)
elif t in {
elif t in (
"C2C_MESSAGE_CREATE",
"GROUP_AT_MESSAGE_CREATE",
"DIRECT_MESSAGE_CREATE",
"GUILD_MESSAGE_CREATE",
"GUILD_AT_MESSAGE_CREATE",
}:
):
asyncio.create_task(self._on_message(t, d))
elif t == "INTERACTION_CREATE":
self._create_task(self._on_interaction(d))
@@ -859,9 +859,9 @@ class QQAdapter(BasePlatformAdapter):
# Route by event type
if event_type == "C2C_MESSAGE_CREATE":
await self._handle_c2c_message(d, msg_id, content, author, timestamp)
elif event_type in {"GROUP_AT_MESSAGE_CREATE",}:
elif event_type in ("GROUP_AT_MESSAGE_CREATE",):
await self._handle_group_message(d, msg_id, content, author, timestamp)
elif event_type in {"GUILD_MESSAGE_CREATE", "GUILD_AT_MESSAGE_CREATE"}:
elif event_type in ("GUILD_MESSAGE_CREATE", "GUILD_AT_MESSAGE_CREATE"):
await self._handle_guild_message(d, msg_id, content, author, timestamp)
elif event_type == "DIRECT_MESSAGE_CREATE":
await self._handle_dm_message(d, msg_id, content, author, timestamp)
@@ -1864,7 +1864,7 @@ class QQAdapter(BasePlatformAdapter):
return ".wav"
if data[:4] == b"fLaC":
return ".flac"
if data[:2] in {b"\xff\xfb", b"\xff\xf3", b"\xff\xf2"}:
if data[:2] in (b"\xff\xfb", b"\xff\xf3", b"\xff\xf2"):
return ".mp3"
if data[:4] == b"\x30\x26\xb2\x75" or data[:4] == b"\x4f\x67\x67\x53":
return ".ogg"
@@ -2033,7 +2033,7 @@ class QQAdapter(BasePlatformAdapter):
"base_url": base_url,
"api_key": api_key,
"model": model
or ("glm-asr" if provider in {"zai", "glm"} else "whisper-1"),
or ("glm-asr" if provider in ("zai", "glm") else "whisper-1"),
}
# 2. QQ-specific env vars (set by `hermes setup gateway` / `hermes gateway`)
@@ -2115,7 +2115,7 @@ class QQAdapter(BasePlatformAdapter):
if urlparse(source_url).path
else ""
)
if not ext or ext not in {
if not ext or ext not in (
".silk",
".amr",
".mp3",
@@ -2124,7 +2124,7 @@ class QQAdapter(BasePlatformAdapter):
".m4a",
".aac",
".flac",
}:
):
ext = self._guess_ext_from_data(audio_data)
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_src:
@@ -2870,7 +2870,7 @@ class QQAdapter(BasePlatformAdapter):
raise ValueError("Media source is required")
parsed = urlparse(source)
if parsed.scheme in {"http", "https"}:
if parsed.scheme in ("http", "https"):
# For URLs, pass through directly to the upload API
content_type = mimetypes.guess_type(source)[0] or "application/octet-stream"
resolved_name = file_name or Path(parsed.path).name or "media"
@@ -2966,7 +2966,7 @@ class QQAdapter(BasePlatformAdapter):
chat_type = self._guess_chat_type(chat_id)
return {
"name": chat_id,
"type": "group" if chat_type in {"group", "guild"} else "dm",
"type": "group" if chat_type in ("group", "guild") else "dm",
}
# ------------------------------------------------------------------
@@ -2975,7 +2975,7 @@ class QQAdapter(BasePlatformAdapter):
@staticmethod
def _is_url(source: str) -> bool:
return urlparse(str(source)).scheme in {"http", "https"}
return urlparse(str(source)).scheme in ("http", "https")
def _guess_chat_type(self, chat_id: str) -> str:
"""Determine chat type from stored inbound metadata, fallback to 'c2c'."""
+3 -2
View File
@@ -239,7 +239,7 @@ class ChunkedUploader:
:raises UploadFileTooLargeError: When the file exceeds the platform limit.
:raises RuntimeError: On other API or I/O failures.
"""
if chat_type not in {"c2c", "group"}:
if chat_type not in ("c2c", "group"):
raise ValueError(
f"ChunkedUploader: unsupported chat_type {chat_type!r}"
)
@@ -592,7 +592,8 @@ async def _run_with_concurrency(
concurrency: int,
) -> None:
"""Run a list of thunks with a bounded number in flight at once."""
concurrency = max(concurrency, 1)
if concurrency < 1:
concurrency = 1
sem = asyncio.Semaphore(concurrency)
async def _wrap(thunk: Callable[[], Awaitable[None]]) -> None:
+4 -6
View File
@@ -99,11 +99,11 @@ def _guess_extension(data: bytes) -> str:
def _is_image_ext(ext: str) -> bool:
return ext.lower() in {".jpg", ".jpeg", ".png", ".gif", ".webp"}
return ext.lower() in (".jpg", ".jpeg", ".png", ".gif", ".webp")
def _is_audio_ext(ext: str) -> bool:
return ext.lower() in {".mp3", ".wav", ".ogg", ".m4a", ".aac"}
return ext.lower() in (".mp3", ".wav", ".ogg", ".m4a", ".aac")
_EXT_TO_MIME = {
@@ -446,9 +446,7 @@ class SignalAdapter(BasePlatformAdapter):
if sent_msg and isinstance(sent_msg, dict):
dest = sent_msg.get("destinationNumber") or sent_msg.get("destination")
sent_ts = sent_msg.get("timestamp")
sent_msg_group_info = sent_msg.get("groupInfo") or {}
sent_msg_group_id = sent_msg_group_info.get("groupId") if sent_msg_group_info else None
if dest == self._account_normalized or sent_msg_group_id:
if dest == self._account_normalized:
# Check if this is an echo of our own outbound reply
if sent_ts and sent_ts in self._recent_sent_timestamps:
self._recent_sent_timestamps.discard(sent_ts)
@@ -1451,7 +1449,7 @@ class SignalAdapter(BasePlatformAdapter):
contacts from seeing the 👀 reaction (which fires before run.py's
auth gate and would otherwise reveal that a bot is listening).
"""
if os.getenv("SIGNAL_REACTIONS", "true").lower() in {"false", "0", "no"}:
if os.getenv("SIGNAL_REACTIONS", "true").lower() in ("false", "0", "no"):
return False
if event is not None:
sender = getattr(getattr(event, "source", None), "user_id", None)
+11 -46
View File
@@ -679,41 +679,6 @@ class SlackAdapter(BasePlatformAdapter):
if lock_acquired and not self._running:
self._release_platform_lock()
async def create_handoff_thread(
self,
parent_chat_id: str,
name: str,
) -> Optional[str]:
"""Create a Slack thread anchor for a session handoff.
Slack threads are anchored to a parent message (``thread_ts``), not
a channel-level construct. So we post a seed message into the home
channel and return its ``ts`` the watcher uses that as the
``thread_id`` for subsequent sends.
Returns the seed message ts as a string, or ``None`` on failure.
"""
if not self._app:
return None
try:
client = self._get_client(parent_chat_id)
if client is None:
return None
seed_text = f":thread: Hermes handoff — *{(name or 'session').strip()[:80]}*"
result = await client.chat_postMessage(
channel=parent_chat_id,
text=seed_text,
)
ts = result.get("ts") if isinstance(result, dict) else getattr(result, "get", lambda _k, _d=None: None)("ts")
if ts:
return str(ts)
except Exception as exc:
logger.warning(
"[%s] Handoff thread: seed-post failed for channel %s: %s",
self.name, parent_chat_id, exc,
)
return None
async def disconnect(self) -> None:
"""Disconnect from Slack."""
if self._handler:
@@ -935,7 +900,7 @@ class SlackAdapter(BasePlatformAdapter):
raw = self.config.extra.get("dm_top_level_threads_as_sessions")
if raw is None:
return True # default: each DM thread is its own session
return str(raw).strip().lower() in {"1", "true", "yes", "on"}
return str(raw).strip().lower() in ("1", "true", "yes", "on")
def _resolve_thread_ts(
self,
@@ -1300,7 +1265,7 @@ class SlackAdapter(BasePlatformAdapter):
def _reactions_enabled(self) -> bool:
"""Check if message reactions are enabled via config/env."""
return os.getenv("SLACK_REACTIONS", "true").lower() not in {"false", "0", "no"}
return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
async def on_processing_start(self, event: MessageEvent) -> None:
"""Add an in-progress reaction when message processing begins."""
@@ -1773,7 +1738,7 @@ class SlackAdapter(BasePlatformAdapter):
# Ignore message edits and deletions
subtype = event.get("subtype")
if subtype in {"message_changed", "message_deleted"}:
if subtype in ("message_changed", "message_deleted"):
return
original_text = event.get("text", "")
@@ -1892,7 +1857,7 @@ class SlackAdapter(BasePlatformAdapter):
channel_type = event.get("channel_type", "")
if not channel_type and channel_id.startswith("D"):
channel_type = "im"
is_dm = channel_type in {"im", "mpim"} # Both 1:1 and group DMs
is_dm = channel_type in ("im", "mpim") # Both 1:1 and group DMs
# Build thread_ts for session keying.
# In channels: fall back to ts so each top-level @mention starts a
@@ -2033,7 +1998,7 @@ class SlackAdapter(BasePlatformAdapter):
if mimetype.startswith("image/") and url:
try:
ext = "." + mimetype.split("/")[-1].split(";")[0]
if ext not in {".jpg", ".jpeg", ".png", ".gif", ".webp"}:
if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
ext = ".jpg"
# Slack private URLs require the bot token as auth header
cached = await self._download_slack_file(url, ext, team_id=team_id)
@@ -2049,7 +2014,7 @@ class SlackAdapter(BasePlatformAdapter):
elif mimetype.startswith("audio/") and url:
try:
ext = "." + mimetype.split("/")[-1].split(";")[0]
if ext not in {".ogg", ".mp3", ".wav", ".webm", ".m4a"}:
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
ext = ".ogg"
cached = await self._download_slack_file(url, ext, audio=True, team_id=team_id)
media_urls.append(cached)
@@ -2737,7 +2702,7 @@ class SlackAdapter(BasePlatformAdapter):
if team_id and channel_id:
self._channel_team[channel_id] = team_id
if slash_name in {"hermes", ""}:
if slash_name in ("hermes", ""):
# Legacy /hermes <subcommand> [args] routing + free-form questions.
# Empty slash_name falls into this branch for backward compat
# with any caller that didn't populate command["command"].
@@ -2932,9 +2897,9 @@ class SlackAdapter(BasePlatformAdapter):
configured = self.config.extra.get("require_mention")
if configured is not None:
if isinstance(configured, str):
return configured.lower() not in {"false", "0", "no", "off"}
return configured.lower() not in ("false", "0", "no", "off")
return bool(configured)
return os.getenv("SLACK_REQUIRE_MENTION", "true").lower() not in {"false", "0", "no", "off"}
return os.getenv("SLACK_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
def _slack_strict_mention(self) -> bool:
"""When true, channel threads require an explicit @-mention on every
@@ -2944,9 +2909,9 @@ class SlackAdapter(BasePlatformAdapter):
configured = self.config.extra.get("strict_mention")
if configured is not None:
if isinstance(configured, str):
return configured.lower() in {"true", "1", "yes", "on"}
return configured.lower() in ("true", "1", "yes", "on")
return bool(configured)
return os.getenv("SLACK_STRICT_MENTION", "false").lower() in {"true", "1", "yes", "on"}
return os.getenv("SLACK_STRICT_MENTION", "false").lower() in ("true", "1", "yes", "on")
def _slack_free_response_channels(self) -> set:
"""Return channel IDs where no @mention is required."""
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -59,7 +59,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
"""
def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
self._fallback_ips = list(dict.fromkeys(_normalize_fallback_ips(fallback_ips)))
self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
if proxy_url and "proxy" not in transport_kwargs:
transport_kwargs["proxy"] = proxy_url
+4 -4
View File
@@ -295,7 +295,7 @@ class WeComAdapter(BasePlatformAdapter):
auth_payload = await self._wait_for_handshake(req_id)
errcode = auth_payload.get("errcode", 0)
if errcode not in {0, None}:
if errcode not in (0, None):
errmsg = auth_payload.get("errmsg", "authentication failed")
raise RuntimeError(f"{errmsg} (errcode={errcode})")
@@ -320,7 +320,7 @@ class WeComAdapter(BasePlatformAdapter):
if self._payload_req_id(payload) == req_id:
return payload
logger.debug("[%s] Ignoring pre-auth payload: %s", self.name, payload.get("cmd"))
elif msg.type in {aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.ERROR}:
elif msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.ERROR):
raise RuntimeError("WeCom websocket closed during authentication")
async def _listen_loop(self) -> None:
@@ -360,7 +360,7 @@ class WeComAdapter(BasePlatformAdapter):
payload = self._parse_json(msg.data)
if payload:
await self._dispatch_payload(payload)
elif msg.type in {aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR}:
elif msg.type in (aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR):
raise RuntimeError("WeCom websocket closed")
async def _heartbeat_loop(self) -> None:
@@ -998,7 +998,7 @@ class WeComAdapter(BasePlatformAdapter):
@staticmethod
def _response_error(response: Dict[str, Any]) -> Optional[str]:
errcode = response.get("errcode", 0)
if errcode in {0, None}:
if errcode in (0, None):
return None
errmsg = str(response.get("errmsg") or "unknown error")
return f"WeCom errcode {errcode}: {errmsg}"
+4 -4
View File
@@ -605,7 +605,7 @@ def _assert_weixin_cdn_url(url: str) -> None:
except Exception as exc: # noqa: BLE001
raise ValueError(f"Unparseable media URL: {url!r}") from exc
if scheme not in {"http", "https"}:
if scheme not in ("http", "https"):
raise ValueError(
f"Media URL has disallowed scheme {scheme!r}; only http/https are permitted."
)
@@ -983,7 +983,7 @@ def _extract_text(item_list: List[Dict[str, Any]]) -> str:
ref = item.get("ref_msg") or {}
ref_item = ref.get("message_item") or {}
ref_type = ref_item.get("type")
if ref_type in {ITEM_IMAGE, ITEM_VIDEO, ITEM_FILE, ITEM_VOICE}:
if ref_type in (ITEM_IMAGE, ITEM_VIDEO, ITEM_FILE, ITEM_VOICE):
title = ref.get("title") or ""
prefix = f"[引用媒体: {title}]\n" if title else "[引用媒体]\n"
return f"{prefix}{text}".strip()
@@ -1331,7 +1331,7 @@ class WeixinAdapter(BasePlatformAdapter):
ret = response.get("ret", 0)
errcode = response.get("errcode", 0)
if ret not in {0, None} or errcode not in {0, None}:
if ret not in (0, None) or errcode not in (0, None):
if (ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE
or _is_stale_session_ret(ret, errcode, response.get("errmsg"))):
logger.error("[%s] Session expired; pausing for 10 minutes", self.name)
@@ -1601,7 +1601,7 @@ class WeixinAdapter(BasePlatformAdapter):
if resp and isinstance(resp, dict):
ret = resp.get("ret")
errcode = resp.get("errcode")
if (ret is not None and ret not in {0,}) or (errcode is not None and errcode not in {0,}):
if (ret is not None and ret not in (0,)) or (errcode is not None and errcode not in (0,)):
is_session_expired = (
ret == SESSION_EXPIRED_ERRCODE
or errcode == SESSION_EXPIRED_ERRCODE
+4 -4
View File
@@ -301,9 +301,9 @@ class WhatsAppAdapter(BasePlatformAdapter):
configured = self.config.extra.get("require_mention")
if configured is not None:
if isinstance(configured, str):
return configured.lower() in {"true", "1", "yes", "on"}
return configured.lower() in ("true", "1", "yes", "on")
return bool(configured)
return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}
return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
def _whatsapp_free_response_chats(self) -> set[str]:
raw = self.config.extra.get("free_response_chats")
@@ -679,7 +679,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
# getattr-with-default keeps tests that construct the adapter via
# ``WhatsAppAdapter.__new__`` (bypassing __init__) working without
# every _make_adapter() helper having to seed the attribute.
if getattr(self, "_shutting_down", False) and returncode in {0, -2, -15}:
if getattr(self, "_shutting_down", False) and returncode in (0, -2, -15):
logger.info(
"[%s] Bridge exited during shutdown (code %d).",
self.name,
@@ -1183,7 +1183,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
if msg_type == MessageType.DOCUMENT and cached_urls:
for doc_path in cached_urls:
ext = Path(doc_path).suffix.lower()
if ext in {".txt", ".md", ".csv", ".json", ".xml", ".yaml", ".yml", ".log", ".py", ".js", ".ts", ".html", ".css"}:
if ext in (".txt", ".md", ".csv", ".json", ".xml", ".yaml", ".yml", ".log", ".py", ".js", ".ts", ".html", ".css"):
try:
file_size = Path(doc_path).stat().st_size
if file_size > MAX_TEXT_INJECT_BYTES:
+5 -5
View File
@@ -2228,7 +2228,7 @@ class MediaResolveMiddleware(InboundMiddleware):
resp.raise_for_status()
payload = resp.json()
code = payload.get("code")
if code not in {None, 0}:
if code not in (None, 0):
raise RuntimeError(
f"resource/v1/download failed: code={code}, msg={payload.get('msg', '')}"
)
@@ -2391,7 +2391,7 @@ class MediaResolveMiddleware(InboundMiddleware):
rid = m.group(2)
kind, _, filename = head.partition(":")
kind = kind.strip()
if kind not in {"image", "file"}:
if kind not in ("image", "file"):
continue
if rid in seen:
continue
@@ -2993,10 +2993,10 @@ class ConnectionManager:
# Fire-and-forget heartbeat ACKs — server always responds but callers don't
# wait on these; silently discard to avoid "Unmatched Response" noise.
if cmd_type == CMD_TYPE["Response"] and cmd in {
if cmd_type == CMD_TYPE["Response"] and cmd in (
"send_group_heartbeat",
"send_private_heartbeat",
}:
):
logger.debug("[%s] Heartbeat ACK received: cmd=%s msg_id=%s", adapter.name, cmd, msg_id)
return
@@ -3369,7 +3369,7 @@ class MediaSendHandler(ABC):
# Remove keys already passed explicitly to avoid "multiple values" TypeError
fwd_kwargs = {
k: v for k, v in kwargs.items()
if k not in {"file_uuid", "filename", "content_type"}
if k not in ("file_uuid", "filename", "content_type")
}
msg_body = self.build_msg_body(
upload_result,
+2 -2
View File
@@ -150,7 +150,7 @@ def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
i += 1
continue
marker = buf[i + 1]
if marker in {0xC0, 0xC2}:
if marker in (0xC0, 0xC2):
h = struct.unpack(">H", buf[i + 5: i + 7])[0]
w = struct.unpack(">H", buf[i + 7: i + 9])[0]
return {"width": w, "height": h}
@@ -165,7 +165,7 @@ def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
if len(buf) < 10:
return None
sig = buf[:6].decode("ascii", errors="replace")
if sig not in {"GIF87a", "GIF89a"}:
if sig not in ("GIF87a", "GIF89a"):
return None
w = struct.unpack("<H", buf[6:8])[0]
h = struct.unpack("<H", buf[8:10])[0]
+1 -1
View File
@@ -702,7 +702,7 @@ def decode_inbound_push(data: bytes) -> Optional[dict]:
"trace_id": trace_id,
}
# 过滤空值(保持 API 整洁)
return {k: v for k, v in result.items() if v or k in {"msg_body", "msg_seq"}}
return {k: v for k, v in result.items() if v or k in ("msg_body", "msg_seq")}
except Exception as e:
if DEBUG_MODE:
logger.debug("[yuanbao_proto] decode_inbound_push failed: %s", e)
+564 -1432
View File
File diff suppressed because it is too large Load Diff
+7 -12
View File
@@ -764,12 +764,12 @@ class SessionStore:
now = _now()
if policy.mode in {"idle", "both"}:
if policy.mode in ("idle", "both"):
idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
if now > idle_deadline:
return True
if policy.mode in {"daily", "both"}:
if policy.mode in ("daily", "both"):
today_reset = now.replace(
hour=policy.at_hour,
minute=0, second=0, microsecond=0,
@@ -805,12 +805,12 @@ class SessionStore:
now = _now()
if policy.mode in {"idle", "both"}:
if policy.mode in ("idle", "both"):
idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
if now > idle_deadline:
return "idle"
if policy.mode in {"daily", "both"}:
if policy.mode in ("daily", "both"):
today_reset = now.replace(
hour=policy.at_hour,
minute=0,
@@ -1276,14 +1276,9 @@ class SessionStore:
# Also write legacy JSONL (keeps existing tooling working during transition)
transcript_path = self.get_transcript_path(session_id)
try:
with self._lock:
with open(transcript_path, "a", encoding="utf-8") as f:
f.write(json.dumps(message, ensure_ascii=False) + "\n")
except OSError as e:
# Disk full / read-only fs / permission errors must not crash the
# message handler — the SQLite write above is the primary store.
logger.debug("Failed to write JSONL transcript for %s: %s", session_id, e)
with self._lock:
with open(transcript_path, "a", encoding="utf-8") as f:
f.write(json.dumps(message, ensure_ascii=False) + "\n")
def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
"""Replace the entire transcript for a session with new messages.
-2
View File
@@ -55,7 +55,6 @@ _SESSION_THREAD_ID: ContextVar = ContextVar("HERMES_SESSION_THREAD_ID", default=
_SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNSET)
_SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
_SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)
_SESSION_ID: ContextVar = ContextVar("HERMES_SESSION_ID", default=_UNSET)
# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
# don't clobber each other's delivery targets.
@@ -71,7 +70,6 @@ _VAR_MAP = {
"HERMES_SESSION_USER_ID": _SESSION_USER_ID,
"HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
"HERMES_SESSION_KEY": _SESSION_KEY,
"HERMES_SESSION_ID": _SESSION_ID,
"HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
"HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
-462
View File
@@ -1,462 +0,0 @@
"""Shutdown forensics — capture context when the gateway receives SIGTERM/SIGINT.
The gateway's ``shutdown_signal_handler`` runs synchronously inside the
asyncio event loop. We can't safely block it for long, but we DO want a
durable record of who/what triggered the shutdown so that "the gateway
keeps dying" incidents can be diagnosed after the fact.
This module exposes :func:`snapshot_shutdown_context`, a fast (<10ms),
non-blocking probe that returns a structured dict the signal handler can
log immediately, plus :func:`spawn_async_diagnostic`, a fire-and-forget
``ps`` walk that runs as a detached subprocess so it can't block teardown
even if /proc is wedged.
Anything that needs to wait (e.g. shelling out to ``ps aux``) belongs in
the async helper, never in the synchronous probe.
"""
from __future__ import annotations
import json
import os
import signal
import subprocess
import sys
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
_SIGNAL_NAME_BY_NUM: Dict[int, str] = {}
for _name in ("SIGTERM", "SIGINT", "SIGHUP", "SIGQUIT", "SIGUSR1", "SIGUSR2"):
_val = getattr(signal, _name, None)
if _val is not None:
_SIGNAL_NAME_BY_NUM[int(_val)] = _name
def _signal_name(sig: Any) -> str:
"""Return a human-readable signal name (or ``str(sig)`` as fallback)."""
if sig is None:
return "UNKNOWN"
try:
sig_int = int(sig)
except (TypeError, ValueError):
return str(sig)
return _SIGNAL_NAME_BY_NUM.get(sig_int, f"signal#{sig_int}")
def _read_proc_field(pid: int, key: str) -> Optional[str]:
"""Read a single field from /proc/<pid>/status. Linux only; None elsewhere."""
try:
with open(f"/proc/{pid}/status", encoding="utf-8") as fh:
for line in fh:
if line.startswith(key + ":"):
return line.split(":", 1)[1].strip()
except (FileNotFoundError, PermissionError, OSError):
pass
return None
def _read_proc_cmdline(pid: int) -> Optional[str]:
"""Read /proc/<pid>/cmdline as a printable string. Linux only; None elsewhere."""
try:
with open(f"/proc/{pid}/cmdline", "rb") as fh:
data = fh.read()
except (FileNotFoundError, PermissionError, OSError):
return None
if not data:
return None
# cmdline uses NUL separators
return data.replace(b"\x00", b" ").decode("utf-8", errors="replace").strip()
def _proc_summary(pid: int) -> Dict[str, Any]:
"""Compact /proc/<pid> snapshot: pid, ppid, state, uid, cmdline.
Best-effort. Missing fields are simply omitted rather than raising.
"""
summary: Dict[str, Any] = {"pid": pid}
if pid <= 0:
return summary
name = _read_proc_field(pid, "Name")
if name is not None:
summary["name"] = name
state = _read_proc_field(pid, "State")
if state is not None:
summary["state"] = state
ppid = _read_proc_field(pid, "PPid")
if ppid is not None:
try:
summary["ppid"] = int(ppid)
except ValueError:
pass
uid = _read_proc_field(pid, "Uid")
if uid is not None:
# "real effective saved fs"
summary["uid"] = uid.split()[0] if uid else uid
cmdline = _read_proc_cmdline(pid)
if cmdline:
# Truncate aggressively — these can be 4KB
summary["cmdline"] = cmdline[:300]
return summary
def snapshot_shutdown_context(received_signal: Any = None) -> Dict[str, Any]:
"""Fast (<10ms) snapshot of who/what is asking us to shut down.
Captures:
* The signal number/name (so SIGINT vs SIGTERM is visible)
* Our own PID/ppid + parent process info from /proc (Linux)
* Whether systemd is our parent (``ppid==1`` or ``INVOCATION_ID`` set)
* Whether takeover/planned-stop markers exist (consumed lazily by the caller)
* /proc/self limits + load average (1-min)
* Wall-clock and monotonic timestamps for cross-correlating later phases
Pure stdlib, never raises, never blocks on subprocesses.
"""
now = time.time()
monotonic = time.monotonic()
pid = os.getpid()
ppid = os.getppid()
ctx: Dict[str, Any] = {
"ts": now,
"ts_monotonic": monotonic,
"signal": _signal_name(received_signal),
"signal_num": int(received_signal) if received_signal is not None else None,
"pid": pid,
"ppid": ppid,
"parent": _proc_summary(ppid),
"self": _proc_summary(pid),
}
# systemd context. If we were started by a systemd unit, INVOCATION_ID
# is set in our env. ppid==1 (init) is also a strong signal that
# systemd reaped+forwarded the SIGTERM.
invocation_id = os.environ.get("INVOCATION_ID")
if invocation_id:
ctx["systemd_invocation_id"] = invocation_id
journal_stream = os.environ.get("JOURNAL_STREAM")
if journal_stream:
ctx["systemd_journal_stream"] = journal_stream
ctx["under_systemd"] = bool(invocation_id) or ppid == 1
# Load average — high load points the finger at "something else
# crushing the box" rather than "external killer".
try:
ctx["loadavg_1m"] = os.getloadavg()[0]
except (OSError, AttributeError):
pass
# /proc/self/status TracerPid: nonzero means a debugger / strace is
# attached. Useful when "phantom SIGKILL" turns out to be a manual
# gdb session.
try:
tracer = _read_proc_field(pid, "TracerPid")
if tracer is not None and tracer != "0":
ctx["tracer_pid"] = int(tracer) if tracer.isdigit() else tracer
ctx["tracer"] = _proc_summary(int(tracer)) if tracer.isdigit() else None
except (TypeError, ValueError):
pass
# Race-detection hint: did somebody recently start a sibling gateway
# with --replace? We can't see the new process directly here, but if
# there's a takeover marker on disk that DOESN'T name us, that's a
# smoking gun for "another --replace instance is killing us".
# Filenames mirror gateway.status (._TAKEOVER_MARKER_FILENAME /
# _PLANNED_STOP_MARKER_FILENAME); we use string literals here so the
# signal-handler path stays import-light.
try:
hermes_home_str = os.environ.get("HERMES_HOME")
if hermes_home_str:
takeover_path = Path(hermes_home_str) / ".gateway-takeover.json"
if takeover_path.exists():
try:
raw = takeover_path.read_text(encoding="utf-8")
ctx["takeover_marker"] = raw[:300]
ctx["takeover_marker_for_self"] = (
f'"target_pid": {pid}' in raw
or f"'target_pid': {pid}" in raw
)
except OSError:
pass
planned_stop_path = Path(hermes_home_str) / ".gateway-planned-stop.json"
if planned_stop_path.exists():
try:
raw = planned_stop_path.read_text(encoding="utf-8")
ctx["planned_stop_marker"] = raw[:300]
except OSError:
pass
except Exception: # noqa: BLE001 — never raise from a signal handler
pass
return ctx
def spawn_async_diagnostic(
log_path: Path,
signal_name: str,
*,
timeout_seconds: float = 5.0,
) -> Optional[int]:
"""Fire-and-forget ``ps``-style snapshot written to ``log_path``.
Runs as a detached subprocess so it can't block the asyncio event loop
or compete with platform teardown. The subprocess uses its own
``timeout`` so a wedged ``ps`` still self-cleans within
``timeout_seconds``.
Returns the subprocess PID on success, ``None`` on failure. Never
raises.
We deliberately avoid ``subprocess.run(["ps", "aux"])`` from inside the
signal handler (the pre-existing pattern): on a busy host with hundreds
of processes, ``ps aux`` can take >2s to walk /proc, during which the
asyncio loop is frozen and adapter teardown can't begin.
"""
try:
log_path.parent.mkdir(parents=True, exist_ok=True)
except OSError:
return None
# Inline shell so we don't have to ship a helper script. bash -c is
# available on every POSIX target we support; on Windows we just skip
# the snapshot (the platform doesn't ship ps anyway).
if sys.platform == "win32":
return None
script = (
f"echo '=== shutdown diagnostic @ {signal_name} ==='; "
"echo '--- date ---'; date -u +%Y-%m-%dT%H:%M:%SZ; "
"echo '--- ps auxf (top 60 by cpu) ---'; "
"ps auxf --sort=-pcpu 2>/dev/null | head -60; "
"echo '--- pstree of self ---'; "
f"pstree -plau {os.getpid()} 2>/dev/null | head -40 || true; "
"echo '--- /proc/loadavg ---'; "
"cat /proc/loadavg 2>/dev/null || true; "
"echo '--- recent dmesg (oom/killed) ---'; "
"dmesg -T 2>/dev/null | tail -20 || journalctl --user -n 20 --no-pager 2>/dev/null | tail -20 || true; "
"echo '=== end ==='"
)
try:
# Open the log file in append mode and let the subprocess inherit.
# We use os.O_APPEND so concurrent diagnostics from rapid signals
# don't trample each other.
fd = os.open(str(log_path), os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o644)
except OSError:
return None
try:
# Detach from our process group so the subprocess survives even
# if systemd kills our cgroup with KillMode=control-group (which
# would also reap us anyway, but defense in depth). Without
# start_new_session, a SIGKILL on our cgroup takes the diag down
# before it can flush.
proc = subprocess.Popen(
["timeout", f"{timeout_seconds:.0f}", "bash", "-c", script],
stdout=fd,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL,
start_new_session=True,
close_fds=True,
)
except (FileNotFoundError, OSError):
try:
os.close(fd)
except OSError:
pass
return None
finally:
# Subprocess inherited the fd; we can drop our handle.
try:
os.close(fd)
except OSError:
pass
return proc.pid
def format_context_for_log(ctx: Dict[str, Any]) -> str:
"""Render a shutdown context dict as a single, scannable log line."""
sig = ctx.get("signal", "?")
parent = ctx.get("parent") or {}
parent_cmd = parent.get("cmdline", "(unknown)")
parent_name = parent.get("name") or "?"
parent_pid = parent.get("pid") or "?"
under_systemd = "yes" if ctx.get("under_systemd") else "no"
load = ctx.get("loadavg_1m")
load_str = f"{load:.2f}" if isinstance(load, (int, float)) else "?"
extras: List[str] = []
if ctx.get("takeover_marker") is not None:
for_self = ctx.get("takeover_marker_for_self")
extras.append(
f"takeover_marker_present={'self' if for_self else 'other'}"
)
if ctx.get("planned_stop_marker") is not None:
extras.append("planned_stop_marker_present=yes")
if ctx.get("tracer_pid"):
extras.append(f"tracer_pid={ctx['tracer_pid']}")
extras_str = (" " + " ".join(extras)) if extras else ""
# Parent cmdline is the most useful single signal — log it prominently.
return (
f"signal={sig} "
f"under_systemd={under_systemd} "
f"parent_pid={parent_pid} "
f"parent_name={parent_name} "
f"loadavg_1m={load_str}"
f"{extras_str} "
f"parent_cmdline={parent_cmd!r}"
)
def context_as_json(ctx: Dict[str, Any]) -> str:
"""JSON-serialise a context dict for structured ingestion. Never raises."""
try:
return json.dumps(ctx, default=str, sort_keys=True)
except (TypeError, ValueError):
return "{}"
def check_systemd_timing_alignment(drain_timeout: float) -> Optional[Dict[str, Any]]:
"""At startup, sanity-check that systemd's TimeoutStopSec >= drain_timeout.
When the gateway is run under a stale systemd unit file (e.g. the user
upgraded hermes-agent but never re-ran ``hermes setup`` to regenerate
the unit), ``TimeoutStopSec`` can be smaller than the configured
``restart_drain_timeout``. Result: SIGTERM arrives, the drain starts,
and systemd SIGKILLs the cgroup mid-drain looks like a phantom kill
in the journal because the journal only logs ``code=killed status=9``.
Returns ``None`` when the alignment is fine OR we can't determine it
(not running under systemd, ``systemctl`` unavailable, etc.). Returns
a dict with ``timeout_stop_sec`` + ``drain_timeout`` + ``mismatch``
bool when we have data to report.
Best-effort. Never raises.
"""
invocation_id = os.environ.get("INVOCATION_ID")
if not invocation_id:
return None # Not running under systemd (or at least not directly)
# Try to identify our unit name and ask systemctl for its config.
unit_name: Optional[str] = None
try:
# /proc/self/cgroup gives us "0::/user.slice/.../hermes-gateway.service"
with open("/proc/self/cgroup", encoding="utf-8") as fh:
for line in fh:
# systemd cgroup line ends with the unit name
if ".service" in line:
parts = line.strip().split("/")
for p in reversed(parts):
if p.endswith(".service"):
unit_name = p
break
if unit_name:
break
except (OSError, FileNotFoundError):
pass
if not unit_name:
return None
# Query systemctl for TimeoutStopUSec. Use --user OR system depending
# on which manager actually owns the unit. Try user first since
# that's the common case for hermes.
timeout_us: Optional[int] = None
for flag in (["--user"], []):
try:
result = subprocess.run(
["systemctl", *flag, "show", unit_name, "--property=TimeoutStopUSec"],
capture_output=True, text=True, timeout=2.0,
)
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
continue
if result.returncode != 0:
continue
# Output: "TimeoutStopUSec=1min 30s" or "TimeoutStopUSec=90000000"
for line in result.stdout.splitlines():
if line.startswith("TimeoutStopUSec="):
value = line.split("=", 1)[1].strip()
# Try numeric microseconds first
if value.isdigit():
timeout_us = int(value)
else:
timeout_us = _parse_systemd_duration_to_us(value)
if timeout_us is not None:
break
if timeout_us is not None:
break
if timeout_us is None:
return None
timeout_stop_sec = timeout_us / 1_000_000.0
# systemd needs headroom for: post-interrupt kill, adapter disconnect,
# SessionDB close, file unlinks, etc. 30s matches the unit-template
# constant in hermes_cli/gateway.py.
headroom = 30.0
expected = drain_timeout + headroom
return {
"unit": unit_name,
"timeout_stop_sec": timeout_stop_sec,
"drain_timeout": drain_timeout,
"expected_min": expected,
"mismatch": timeout_stop_sec < expected,
}
def _parse_systemd_duration_to_us(raw: str) -> Optional[int]:
"""Parse 'TimeoutStopUSec=1min 30s' / '90s' style values to microseconds.
systemd accepts a wide grammar; we cover the common cases (s, ms, min,
h) and return None on anything unexpected. Never raises.
"""
if not raw:
return None
units = {
"us": 1,
"ms": 1_000,
"s": 1_000_000,
"sec": 1_000_000,
"min": 60_000_000,
"h": 3_600_000_000,
"hr": 3_600_000_000,
}
total_us = 0
token = ""
digits = ""
for ch in raw + " ":
if ch.isdigit() or ch == ".":
if token:
# End previous unit, start new number
multiplier = units.get(token.lower())
if multiplier is None or not digits:
return None
try:
total_us += int(float(digits) * multiplier)
except ValueError:
return None
digits = ""
token = ""
digits += ch
elif ch.isalpha():
token += ch
elif digits and token:
multiplier = units.get(token.lower())
if multiplier is None:
return None
try:
total_us += int(float(digits) * multiplier)
except ValueError:
return None
digits = ""
token = ""
elif digits and not token:
# Bare number = seconds (rare but valid)
try:
total_us += int(float(digits) * 1_000_000)
except ValueError:
return None
digits = ""
return total_us if total_us > 0 else None
-229
View File
@@ -1,229 +0,0 @@
"""Per-platform slash command access control.
This module sits beside the existing per-platform allowlist (``allow_from``)
and adds a second axis: of the users who are *allowed to talk to the
gateway*, which ones can run *which slash commands*.
Two lists per platform scope (DM vs group, mirroring ``allow_from`` vs
``group_allow_from``):
- ``allow_admin_from`` user IDs that get every registered slash
command (built-in + plugin-registered).
- ``user_allowed_commands`` slash command names non-admin users may
run. Empty / unset non-admins get no
slash commands.
Backward compatibility:
If ``allow_admin_from`` is not set for a scope, slash command gating
is disabled entirely for that scope. Every allowed user can run every
slash command, exactly like before. This means existing installs are
unaffected until an operator opts in by listing at least one admin.
The gate is applied at the slash command dispatch site in
``gateway/run.py`` so it covers BOTH built-in and plugin-registered
commands via the live registry. Gating slash commands does not affect
plain chat non-admin users can still talk to the agent normally,
they just can't trigger commands outside ``user_allowed_commands``.
Authored as a slimmed-down salvage of PR #4443's permission tiers
(co-authored by @ReqX). The full tier system, audit log, usage
tracking, rate limiting, and tool filtering from that PR are not
included here only the slash-command access split.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, FrozenSet, Iterable, Optional, Tuple
# Slash commands that MUST stay reachable for any allowed user, even when
# slash gating is enabled and the user has no commands listed. Without this
# carve-out, a non-admin user has no way to discover what they can or
# can't do (``/help``, ``/whoami``) and no way to see what state the agent
# is in (``/status``). These mirror the smallest set of read-only commands
# we'd hand to a guest. Operators can still narrow this further by writing
# their own ``user_allowed_commands`` (this set is only the implicit
# fallback floor — anything in ``user_allowed_commands`` overrides it
# additively, never restrictively).
_ALWAYS_ALLOWED_FOR_USERS: FrozenSet[str] = frozenset({
"help",
"whoami",
})
@dataclass(frozen=True)
class SlashAccessPolicy:
"""Resolved access policy for a single (platform, scope) pair.
``scope`` is ``"dm"`` for direct messages and ``"group"`` for groups,
channels, threads, and any other multi-user context. The mapping from
SessionSource.chat_type scope happens in ``policy_for_source``.
"""
enabled: bool # gating active for this scope?
admin_user_ids: FrozenSet[str]
user_allowed_commands: FrozenSet[str]
def is_admin(self, user_id: Optional[str]) -> bool:
if not self.enabled:
# Gating disabled → treat every allowed user as admin so
# downstream code can keep using ``is_admin`` / ``can_run``
# uniformly.
return True
if not user_id:
return False
return str(user_id) in self.admin_user_ids
def can_run(self, user_id: Optional[str], canonical_cmd: str) -> bool:
if not self.enabled:
return True
if self.is_admin(user_id):
return True
if not canonical_cmd:
return False
if canonical_cmd in _ALWAYS_ALLOWED_FOR_USERS:
return True
return canonical_cmd in self.user_allowed_commands
_DM_CHAT_TYPES = frozenset({"dm", "direct", "private", ""})
def _coerce_id_list(raw: Any) -> FrozenSet[str]:
"""Normalize a YAML-loaded admin/user list into a frozenset of strings.
Accepts ``None``, list, tuple, or comma-separated string. Stringifies
each entry and strips whitespace; empty entries are dropped.
"""
if raw is None:
return frozenset()
if isinstance(raw, (list, tuple, set, frozenset)):
items: Iterable[Any] = raw
elif isinstance(raw, str):
items = (s for s in raw.split(",") if s.strip())
else:
# single scalar (int user id, etc.)
items = (raw,)
out: list[str] = []
for it in items:
s = str(it).strip()
if s:
out.append(s)
return frozenset(out)
def _coerce_command_list(raw: Any) -> FrozenSet[str]:
"""Normalize a slash command allowlist.
Strips leading slashes so YAML can read either ``["help", "status"]``
or ``["/help", "/status"]``. Lowercase canonicalization matches how
``resolve_command()`` stores names.
"""
if raw is None:
return frozenset()
if isinstance(raw, (list, tuple, set, frozenset)):
items: Iterable[Any] = raw
elif isinstance(raw, str):
items = (s for s in raw.split(",") if s.strip())
else:
items = (raw,)
out: list[str] = []
for it in items:
s = str(it).strip().lstrip("/").lower()
if s:
out.append(s)
return frozenset(out)
def _scope_for_chat_type(chat_type: Optional[str]) -> str:
if chat_type and chat_type.lower() in _DM_CHAT_TYPES:
return "dm"
return "group"
def _platform_extra(platform_config: Any) -> dict:
"""Return the ``extra`` dict from a PlatformConfig-like object.
Defensively handles None and non-PlatformConfig shapes so calling
code can stay simple.
"""
if platform_config is None:
return {}
extra = getattr(platform_config, "extra", None)
if isinstance(extra, dict):
return extra
if isinstance(platform_config, dict):
# Some test harnesses pass dicts directly.
return platform_config
return {}
def _keys_for_scope(scope: str) -> Tuple[str, str]:
"""Return (admin_key, user_cmd_key) names for a scope."""
if scope == "group":
return ("group_allow_admin_from", "group_user_allowed_commands")
return ("allow_admin_from", "user_allowed_commands")
def policy_from_extra(extra: dict, scope: str) -> SlashAccessPolicy:
"""Build a policy from a platform's ``extra`` dict for one scope.
DM scope falls back to group scope keys ONLY for ``user_allowed_commands``
when the DM scope didn't specify its own. This keeps the common case
(operator wants the same command set DM and group) ergonomic without
forcing duplication. Admin lists are NOT cross-scope: an admin in
DMs is not implicitly an admin in a group.
"""
admin_key, cmd_key = _keys_for_scope(scope)
admin_ids = _coerce_id_list(extra.get(admin_key))
cmds = _coerce_command_list(extra.get(cmd_key))
if scope == "dm" and not cmds:
# DM didn't specify — let group's user_allowed_commands fall through
# so operators only need to list it once if it's the same.
cmds = _coerce_command_list(extra.get("group_user_allowed_commands"))
enabled = bool(admin_ids)
return SlashAccessPolicy(
enabled=enabled,
admin_user_ids=admin_ids,
user_allowed_commands=cmds,
)
def policy_for_source(gateway_config: Any, source: Any) -> SlashAccessPolicy:
"""Resolve the access policy for a SessionSource.
Returns a "disabled" policy (gating off, allow everything) when:
- gateway_config is None
- the platform has no PlatformConfig
- the platform's PlatformConfig has no admin list set for the scope
Callers should treat the returned policy as authoritative for slash
command gating only. It does not gate plain chat messages.
"""
if gateway_config is None or source is None:
return SlashAccessPolicy(
enabled=False,
admin_user_ids=frozenset(),
user_allowed_commands=frozenset(),
)
platforms = getattr(gateway_config, "platforms", None)
platform_config = None
if platforms is not None:
try:
platform_config = platforms.get(source.platform)
except Exception:
platform_config = None
extra = _platform_extra(platform_config)
scope = _scope_for_chat_type(getattr(source, "chat_type", None))
return policy_from_extra(extra, scope)
__all__ = [
"SlashAccessPolicy",
"policy_from_extra",
"policy_for_source",
]
+10 -49
View File
@@ -124,33 +124,16 @@ def get_process_start_time(pid: int) -> Optional[int]:
def _read_process_cmdline(pid: int) -> Optional[str]:
"""Return the process command line as a space-separated string.
On Linux, reads /proc/<pid>/cmdline directly. On macOS and other
platforms without /proc, falls back to ``ps -p <pid> -o command=``.
"""
"""Return the process command line as a space-separated string."""
cmdline_path = Path(f"/proc/{pid}/cmdline")
try:
raw = cmdline_path.read_bytes()
except (FileNotFoundError, PermissionError, OSError):
pass
else:
if raw:
return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
return None
try:
result = subprocess.run(
["ps", "-p", str(pid), "-o", "command="],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode == 0 and result.stdout.strip():
return result.stdout.strip()
except (OSError, subprocess.TimeoutExpired):
pass
return None
if not raw:
return None
return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
def _looks_like_gateway_process(pid: int) -> bool:
@@ -235,11 +218,7 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
if not pid_path.exists():
return None
try:
raw = pid_path.read_text().strip()
except OSError:
# File was deleted between exists() and read_text(), or permission flipped.
return None
raw = pid_path.read_text().strip()
if not raw:
return None
@@ -503,12 +482,10 @@ def write_runtime_status(
"""Persist gateway runtime health information for diagnostics/status."""
path = _get_runtime_status_path()
payload = _read_json_file(path) or _build_runtime_status_record()
current_record = _build_pid_record()
payload.setdefault("platforms", {})
payload["kind"] = current_record["kind"]
payload["pid"] = current_record["pid"]
payload["argv"] = current_record["argv"]
payload["start_time"] = current_record["start_time"]
payload.setdefault("kind", _GATEWAY_KIND)
payload["pid"] = os.getpid()
payload["start_time"] = _get_process_start_time(os.getpid())
payload["updated_at"] = _utc_now_iso()
if gateway_state is not _UNSET:
@@ -611,22 +588,6 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
and current_start != existing.get("start_time")
):
stale = True
# When start_time comparison is unavailable (macOS / Windows
# have no /proc, so both sides are None), fall back to
# checking the live process command line. When cmdline is
# also unreadable (Windows has no ps), consult the lock
# record's own argv — the gateway writes it at startup and
# it's the only identity signal on platforms without ps.
# Both oracles must indicate "not a gateway" to mark stale.
if (
not stale
and existing.get("start_time") is None
and current_start is None
and not _looks_like_gateway_process(existing_pid)
):
live_cmdline = _read_process_cmdline(existing_pid)
if live_cmdline is not None or not _record_looks_like_gateway(existing):
stale = True
# Check if process is stopped (Ctrl+Z / SIGTSTP) — stopped
# processes still appear alive to _pid_exists but are not
# actually running. Treat them as stale so --replace works.
@@ -637,7 +598,7 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
for _line in _proc_status.read_text(encoding="utf-8").splitlines():
if _line.startswith("State:"):
_state = _line.split()[1]
if _state in {"T", "t"}: # stopped or tracing stop
if _state in ("T", "t"): # stopped or tracing stop
stale = True
break
except (OSError, PermissionError):
+20 -271
View File
@@ -21,15 +21,7 @@ import queue
import re
import time
from dataclasses import dataclass
from typing import Any, Callable, Optional
from gateway.platforms.base import BasePlatformAdapter as _BasePlatformAdapter
from gateway.platforms.base import _custom_unit_to_cp
from gateway.config import (
DEFAULT_STREAMING_EDIT_INTERVAL as _DEFAULT_STREAMING_EDIT_INTERVAL,
DEFAULT_STREAMING_BUFFER_THRESHOLD as _DEFAULT_STREAMING_BUFFER_THRESHOLD,
DEFAULT_STREAMING_CURSOR as _DEFAULT_STREAMING_CURSOR,
)
from typing import Any, Optional
logger = logging.getLogger("gateway.stream_consumer")
@@ -48,9 +40,9 @@ _COMMENTARY = object()
@dataclass
class StreamConsumerConfig:
"""Runtime config for a single stream consumer instance."""
edit_interval: float = _DEFAULT_STREAMING_EDIT_INTERVAL
buffer_threshold: int = _DEFAULT_STREAMING_BUFFER_THRESHOLD
cursor: str = _DEFAULT_STREAMING_CURSOR
edit_interval: float = 1.0
buffer_threshold: int = 40
cursor: str = ""
buffer_only: bool = False
# When >0, the final edit for a streamed response is delivered as a
# fresh message if the original preview has been visible for at least
@@ -60,18 +52,6 @@ class StreamConsumerConfig:
# openclaw/openclaw#72038. Default 0 = always edit in place (legacy
# behavior). The gateway enables this selectively per-platform.
fresh_final_after_seconds: float = 0.0
# Streaming transport selection:
# "auto" — prefer native draft streaming (e.g. Telegram sendMessageDraft)
# when the adapter + chat supports it; fall back to edit.
# "draft" — explicitly request native draft streaming; fall back to
# edit when unsupported.
# "edit" — progressive editMessageText (legacy behavior).
# "off" — handled by the gateway before the consumer is even built.
transport: str = "auto"
# Hint for the consumer about the originating chat type (e.g. "dm",
# "group", "supergroup", "forum"). Used to gate native draft streaming,
# which is platform-specific (Telegram drafts are DM-only).
chat_type: str = ""
class GatewayStreamConsumer:
@@ -105,11 +85,6 @@ class GatewayStreamConsumer:
"</THINKING>", "</thinking>", "</thought>",
)
# Class-wide monotonic counter for native-streaming draft ids. Telegram
# animates a draft when the same draft_id is reused across consecutive
# calls in the same chat, so we need a fresh non-zero id per response.
_draft_id_counter: int = 0
def __init__(
self,
adapter: Any,
@@ -117,7 +92,6 @@ class GatewayStreamConsumer:
config: Optional[StreamConsumerConfig] = None,
metadata: Optional[dict] = None,
on_new_message: Optional[callable] = None,
initial_reply_to_id: Optional[str] = None,
):
self.adapter = adapter
self.chat_id = chat_id
@@ -131,7 +105,6 @@ class GatewayStreamConsumer:
# the content, not edit the old bubble above it.
# Called with no arguments. Exceptions are swallowed.
self._on_new_message = on_new_message
self._initial_reply_to_id = initial_reply_to_id
self._queue: queue.Queue = queue.Queue()
self._accumulated = ""
self._message_id: Optional[str] = None
@@ -163,20 +136,6 @@ class GatewayStreamConsumer:
self._in_think_block = False
self._think_buffer = ""
# Native draft-streaming state. Resolved at the start of run() based
# on cfg.transport, cfg.chat_type, and the adapter's
# supports_draft_streaming() probe. When True, the consumer emits
# animated draft frames via adapter.send_draft instead of progressive
# edits via adapter.edit_message. The final answer still goes
# through the normal first-send path so the user gets a real message
# in their chat history (drafts have no message_id).
self._use_draft_streaming = False
self._draft_id: Optional[int] = None
# Cumulative draft-frame failure count for this consumer. After the
# first failure we permanently disable drafts for the remainder of
# this response and route through edit-based for graceful degradation.
self._draft_failures = 0
@property
def already_sent(self) -> bool:
"""True if at least one message was sent or edited during the run."""
@@ -215,16 +174,6 @@ class GatewayStreamConsumer:
self._last_sent_text = ""
self._fallback_final_send = False
self._fallback_prefix = ""
# Native draft streaming: bump the draft_id so the next text segment
# animates as a fresh preview below the tool-progress bubbles, not
# over the prior segment's already-finalized draft. This is how
# we avoid the "inter-tool-call text leak" failure mode openclaw
# documented in their issue #32535 — each text block becomes its
# own visible message via the finalize, then a new draft animates
# for the next one.
if self._use_draft_streaming:
type(self)._draft_id_counter += 1
self._draft_id = type(self)._draft_id_counter
def on_delta(self, text: str) -> None:
"""Thread-safe callback — called from the agent's worker thread.
@@ -350,32 +299,9 @@ class GatewayStreamConsumer:
async def run(self) -> None:
"""Async task that drains the queue and edits the platform message."""
# Platform message length limit — leave room for cursor + formatting.
# Use the adapter's length function (e.g. utf16_len for Telegram) so
# overflow detection matches what the platform actually enforces.
# Gate on isinstance(BasePlatformAdapter) so test MagicMocks (whose
# auto-attributes return mock objects, not callables) fall back to len.
_len_fn: "Callable[[str], int]" = (
self.adapter.message_len_fn
if isinstance(self.adapter, _BasePlatformAdapter)
else len
)
# Platform message length limit — leave room for cursor + formatting
_raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
_safe_limit = max(500, _raw_limit - _len_fn(self.cfg.cursor) - 100)
# Resolve native draft streaming once per run. When enabled the
# consumer routes mid-stream frames through adapter.send_draft and
# leaves _message_id=None so the existing got_done path delivers the
# final answer as a regular sendMessage (drafts have no message_id
# to edit).
self._use_draft_streaming = self._resolve_draft_streaming()
if self._use_draft_streaming:
type(self)._draft_id_counter += 1
self._draft_id = type(self)._draft_id_counter
logger.debug(
"Stream consumer using native-draft transport (chat=%s draft_id=%s)",
self.chat_id, self._draft_id,
)
_safe_limit = max(500, _raw_limit - len(self.cfg.cursor) - 100)
try:
while True:
@@ -417,10 +343,6 @@ class GatewayStreamConsumer:
should_edit = should_edit or (
(elapsed >= self._current_edit_interval
and self._accumulated)
# buffer_threshold is intentionally codepoint-based:
# it's a debounce heuristic ("send updates roughly
# every N visible characters"), not a platform-limit
# check. _len_fn is reserved for overflow detection.
or len(self._accumulated) >= self.cfg.buffer_threshold
)
@@ -429,7 +351,7 @@ class GatewayStreamConsumer:
# Split overflow: if accumulated text exceeds the platform
# limit, split into properly sized chunks.
if (
_len_fn(self._accumulated) > _safe_limit
len(self._accumulated) > _safe_limit
and self._message_id is None
):
# No existing message to edit (first message or after a
@@ -438,23 +360,15 @@ class GatewayStreamConsumer:
# proper word/code-fence boundaries and chunk
# indicators like "(1/2)".
chunks = self.adapter.truncate_message(
self._accumulated, _safe_limit, len_fn=_len_fn,
self._accumulated, _safe_limit
)
chunks_delivered = False
reply_to = self._message_id or self._initial_reply_to_id
for chunk in chunks:
new_id = await self._send_new_chunk(chunk, reply_to)
if new_id is not None and new_id != reply_to:
chunks_delivered = True
await self._send_new_chunk(chunk, self._message_id)
self._accumulated = ""
self._last_sent_text = ""
self._last_edit_time = time.monotonic()
if got_done:
# Only claim final delivery if THESE chunks actually
# landed. ``_already_sent`` may be True from prior
# tool-progress edits or fallback-mode promotion (#10748)
# — that doesn't mean the final answer reached the user.
self._final_response_sent = chunks_delivered
self._final_response_sent = self._already_sent
return
if got_segment_break:
self._message_id = None
@@ -465,14 +379,11 @@ class GatewayStreamConsumer:
# Existing message: edit it with the first chunk, then
# start a new message for the overflow remainder.
while (
_len_fn(self._accumulated) > _safe_limit
len(self._accumulated) > _safe_limit
and self._message_id is not None
and self._edit_supported
):
_cp_budget = _custom_unit_to_cp(
self._accumulated, _safe_limit, _len_fn,
)
split_at = self._accumulated.rfind("\n", 0, _cp_budget)
split_at = self._accumulated.rfind("\n", 0, _safe_limit)
if split_at < _safe_limit // 2:
split_at = _safe_limit
chunk = self._accumulated[:split_at]
@@ -500,7 +411,7 @@ class GatewayStreamConsumer:
# path below so we don't finalize here for it.
current_update_visible = await self._send_or_edit(
display_text,
finalize=(got_done or got_segment_break),
finalize=got_segment_break,
)
self._last_edit_time = time.monotonic()
@@ -663,18 +574,14 @@ class GatewayStreamConsumer:
return final_text
@staticmethod
def _split_text_chunks(
text: str, limit: int,
len_fn: "Callable[[str], int]" = len,
) -> list[str]:
def _split_text_chunks(text: str, limit: int) -> list[str]:
"""Split text into reasonably sized chunks for fallback sends."""
if len_fn(text) <= limit:
if len(text) <= limit:
return [text]
chunks: list[str] = []
remaining = text
while len_fn(remaining) > limit:
_cp_budget = _custom_unit_to_cp(remaining, limit, len_fn)
split_at = remaining.rfind("\n", 0, _cp_budget)
while len(remaining) > limit:
split_at = remaining.rfind("\n", 0, limit)
if split_at < limit // 2:
split_at = limit
chunks.append(remaining[:split_at])
@@ -730,15 +637,9 @@ class GatewayStreamConsumer:
return
raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
_len_fn: "Callable[[str], int]" = (
self.adapter.message_len_fn
if isinstance(self.adapter, _BasePlatformAdapter)
else len
)
safe_limit = max(500, raw_limit - 100)
chunks = self._split_text_chunks(continuation, safe_limit, len_fn=_len_fn)
chunks = self._split_text_chunks(continuation, safe_limit)
stale_message_id = self._message_id # partial message to clean up
last_message_id: Optional[str] = None
last_successful_chunk = ""
sent_any_chunk = False
@@ -786,22 +687,6 @@ class GatewayStreamConsumer:
# so any stale tool-progress bubble gets closed off.
self._notify_new_message()
# Remove the frozen partial message so the user only sees the
# complete fallback response. Best-effort — if the platform doesn't
# implement ``delete_message``, the delete fails (flood control still
# active, bot lacks permission, message too old to delete), the
# partial remains but at least the full answer was delivered.
if stale_message_id and stale_message_id != last_message_id:
delete_fn = getattr(self.adapter, "delete_message", None)
if delete_fn is not None:
try:
await delete_fn(self.chat_id, stale_message_id)
except Exception as e:
logger.debug(
"Fallback partial cleanup failed (%s): %s",
stale_message_id, e,
)
self._message_id = last_message_id
self._already_sent = True
self._final_response_sent = True
@@ -814,89 +699,6 @@ class GatewayStreamConsumer:
err_lower = err.lower()
return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower
def _resolve_draft_streaming(self) -> bool:
"""Decide whether this run should use native draft streaming.
Honors ``cfg.transport``:
* ``"edit"`` never use drafts (legacy progressive-edit path).
* ``"draft"`` require draft support; gracefully fall back to edit
when the adapter declines. Logs the downgrade at debug.
* ``"auto"`` use drafts when the adapter supports them for this
chat type; otherwise edit.
Adapter eligibility is checked via
:meth:`BasePlatformAdapter.supports_draft_streaming`, which considers
the chat type (e.g. Telegram drafts are DM-only) and platform-version
gates (e.g. python-telegram-bot 22.6+).
"""
transport = (self.cfg.transport or "auto").lower()
if transport == "edit":
return False
# "off" is filtered upstream by the gateway; treat as edit defensively.
if transport == "off":
return False
# Test adapters are MagicMocks that don't subclass BasePlatformAdapter;
# default them to edit so existing test behaviour is preserved.
if not isinstance(self.adapter, _BasePlatformAdapter):
return False
try:
supported = self.adapter.supports_draft_streaming(
chat_type=self.cfg.chat_type or None,
metadata=self.metadata,
)
except Exception:
logger.debug("supports_draft_streaming probe raised", exc_info=True)
supported = False
if not supported:
if transport == "draft":
logger.debug(
"Draft streaming requested but unsupported (chat=%s, type=%r) — "
"falling back to edit",
self.chat_id, self.cfg.chat_type,
)
return False
return True
async def _send_draft_frame(self, text: str) -> bool:
"""Emit a single animated draft frame for the current accumulated text.
Returns True when the frame landed. On any failure, permanently
disables drafts for the remainder of this run so subsequent frames
flow through the edit-based path (which can adapt with flood-control
backoff, etc.). Drafts have no message_id and clear naturally on
the client when the response finalizes via a regular sendMessage.
"""
if self._draft_id is None:
# Defensive: should never happen — _use_draft_streaming gate is
# set in tandem with _draft_id in run(). Disable to be safe.
self._use_draft_streaming = False
return False
try:
result = await self.adapter.send_draft(
chat_id=self.chat_id,
draft_id=self._draft_id,
content=text,
metadata=self.metadata,
)
except Exception as e:
logger.debug(
"send_draft raised, disabling draft transport for this run: %s", e,
)
self._draft_failures += 1
self._use_draft_streaming = False
return False
if not getattr(result, "success", False):
logger.debug(
"send_draft returned success=False, disabling draft transport: %s",
getattr(result, "error", "unknown"),
)
self._draft_failures += 1
self._use_draft_streaming = False
return False
# Frame delivered. Track text for parity with edit-based no-op skip.
self._last_sent_text = text
return True
async def _flush_segment_tail_on_edit_failure(self) -> None:
"""Deliver un-sent tail content before a segment-break reset.
@@ -1091,35 +893,6 @@ class GatewayStreamConsumer:
and self.cfg.cursor in text
and len(_visible_stripped) < _MIN_NEW_MSG_CHARS):
return True # too short for a standalone message — accumulate more
# Native draft streaming: route mid-stream frames through send_draft.
# The final answer is delivered via the regular sendMessage path
# below — drafts have no message_id so we can't finalize them
# in-place; the regular sendMessage clears the draft naturally on
# the client and gives the user a real message in their history.
# Skip when:
# * finalize=True (this is the final answer; needs to be a real message)
# * an edit path is already established (message_id is set, e.g. after
# a tool-boundary segment break where the prior text was finalized
# as a real sendMessage and the next text segment continues editing
# that one — staying on edit-based for that segment is correct).
if (
self._use_draft_streaming
and not finalize
and self._message_id is None
):
# No-op skip: identical to the last frame we sent.
if text == self._last_sent_text:
return True
ok = await self._send_draft_frame(text)
if ok:
# Drafts mark "we put something on screen" but DO NOT set
# _already_sent — that flag gates the gateway's fallback
# final-send path and we still need that to fire so the
# user gets a real message (drafts have no message_id).
return True
# Failure already disabled drafts for this run; fall through to
# the regular edit/send path below.
try:
if self._message_id is not None:
if self._edit_supported:
@@ -1158,29 +931,7 @@ class GatewayStreamConsumer:
)
if result.success:
self._already_sent = True
# Adapter may have split-and-delivered an oversized
# edit across the original message + N continuations.
# When that happens, ``message_id`` is the LAST visible
# continuation and ``_last_sent_text`` no longer reflects
# the on-screen content (the new message only holds the
# final chunk's text), so subsequent edits must target
# the new id and skip-if-same comparisons must reset.
# Fire on_new_message so tool-progress bubbles linearize
# below the new continuation, not the original.
# ``getattr`` with default keeps backwards compat with
# SimpleNamespace mocks in tests that pre-date the field.
_continuation_ids = getattr(result, "continuation_message_ids", ()) or ()
if (
_continuation_ids
and result.message_id
and result.message_id != self._message_id
):
self._message_id = str(result.message_id)
self._message_created_ts = time.monotonic()
self._last_sent_text = ""
self._notify_new_message()
else:
self._last_sent_text = text
self._last_sent_text = text
# Successful edit — reset flood strike counter
self._flood_strikes = 0
return True
@@ -1228,12 +979,10 @@ class GatewayStreamConsumer:
# The final response will be sent by the fallback path.
return False
else:
# First message — send new, threaded to the original user message
# so it lands in the correct topic/thread.
# First message — send new
result = await self.adapter.send(
chat_id=self.chat_id,
content=text,
reply_to=self._initial_reply_to_id,
metadata=self.metadata,
)
if result.success:
+144 -101
View File
@@ -197,6 +197,13 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
inference_base_url=DEFAULT_COPILOT_ACP_BASE_URL,
base_url_env_var="COPILOT_ACP_BASE_URL",
),
"codex-cli": ProviderConfig(
id="codex-cli",
name="OpenAI Codex CLI",
auth_type="external_process",
inference_base_url="codex-cli://local",
base_url_env_var="CODEX_CLI_BASE_URL",
),
"gemini": ProviderConfig(
id="gemini",
name="Google AI Studio",
@@ -1377,6 +1384,7 @@ def resolve_provider(
"github": "copilot", "github-copilot": "copilot",
"github-models": "copilot", "github-model": "copilot",
"github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
"codexcli": "codex-cli", "openai-codex-cli": "codex-cli",
"aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
"opencode": "opencode-zen", "zen": "opencode-zen",
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
@@ -1450,7 +1458,7 @@ def resolve_provider(
# whose availability isn't implied by LM_API_KEY presence (it may be
# offline, and the no-auth setup uses a placeholder value), so it
# also requires explicit selection.
if pid in {"copilot", "lmstudio"}:
if pid in ("copilot", "lmstudio"):
continue
for env_var in pconfig.api_key_env_vars:
if has_usable_secret(os.getenv(env_var, "")):
@@ -2541,7 +2549,7 @@ def refresh_codex_oauth_pure(
# A 401/403 from the token endpoint always means the refresh token
# is invalid/expired — force relogin even if the body error code
# wasn't one of the known strings above.
if response.status_code in {401, 403} and not relogin_required:
if response.status_code in (401, 403) and not relogin_required:
relogin_required = True
raise AuthError(
message,
@@ -2947,7 +2955,7 @@ def _merge_shared_nous_oauth_state(state: Dict[str, Any]) -> bool:
"expires_at",
):
value = shared.get(key)
if value not in {None, ""}:
if value not in (None, ""):
state[key] = value
return True
@@ -3986,7 +3994,7 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
if provider_id in {"kimi-coding", "kimi-coding-cn"}:
if provider_id in ("kimi-coding", "kimi-coding-cn"):
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif env_url:
base_url = env_url
@@ -4009,28 +4017,60 @@ def get_external_process_provider_status(provider_id: str) -> Dict[str, Any]:
if not pconfig or pconfig.auth_type != "external_process":
return {"configured": False}
command = (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
or os.getenv("COPILOT_CLI_PATH", "").strip()
or "copilot"
)
raw_args = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
args = shlex.split(raw_args) if raw_args else ["--acp", "--stdio"]
base_url = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
if not base_url:
base_url = pconfig.inference_base_url
if provider_id == "copilot-acp":
command = (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
or os.getenv("COPILOT_CLI_PATH", "").strip()
or "copilot"
)
raw_args = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
args = shlex.split(raw_args) if raw_args else ["--acp", "--stdio"]
base_url = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
if not base_url:
base_url = pconfig.inference_base_url
resolved_command = shutil.which(command) if command else None
return {
"configured": bool(resolved_command or base_url.startswith("acp+tcp://")),
"provider": provider_id,
"name": pconfig.name,
"command": command,
"args": args,
"resolved_command": resolved_command,
"base_url": base_url,
"logged_in": bool(resolved_command or base_url.startswith("acp+tcp://")),
}
resolved_command = shutil.which(command) if command else None
return {
"configured": bool(resolved_command or base_url.startswith("acp+tcp://")),
"provider": provider_id,
"name": pconfig.name,
"command": command,
"args": args,
"resolved_command": resolved_command,
"base_url": base_url,
"logged_in": bool(resolved_command or base_url.startswith("acp+tcp://")),
}
if provider_id == "codex-cli":
command = (
os.getenv("HERMES_CODEX_CLI_COMMAND", "").strip()
or os.getenv("CODEX_CLI_PATH", "").strip()
or "codex"
)
raw_args = os.getenv("HERMES_CODEX_CLI_ARGS", "").strip()
default_args = [
"exec",
"--json",
"--ephemeral",
"--dangerously-bypass-approvals-and-sandbox",
"--skip-git-repo-check",
]
args = shlex.split(raw_args) if raw_args else default_args
base_url = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
if not base_url:
base_url = pconfig.inference_base_url
resolved_command = shutil.which(command) if command else None
return {
"configured": bool(resolved_command),
"provider": provider_id,
"name": pconfig.name,
"command": command,
"args": args,
"resolved_command": resolved_command,
"base_url": base_url,
"logged_in": bool(resolved_command),
}
return {"configured": False}
def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
@@ -4046,10 +4086,10 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
return get_qwen_auth_status()
if target == "google-gemini-cli":
return get_gemini_oauth_auth_status()
if target == "minimax-oauth":
return get_minimax_oauth_auth_status()
if target == "copilot-acp":
return get_external_process_provider_status(target)
if target == "codex-cli":
return get_external_process_provider_status(target)
# API-key providers
pconfig = PROVIDER_REGISTRY.get(target)
if pconfig and pconfig.auth_type == "api_key":
@@ -4092,7 +4132,7 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
if provider_id in {"kimi-coding", "kimi-coding-cn"}:
if provider_id in ("kimi-coding", "kimi-coding-cn"):
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif provider_id == "zai":
base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
@@ -4123,30 +4163,69 @@ def resolve_external_process_provider_credentials(provider_id: str) -> Dict[str,
if not base_url:
base_url = pconfig.inference_base_url
command = (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
or os.getenv("COPILOT_CLI_PATH", "").strip()
or "copilot"
)
raw_args = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
args = shlex.split(raw_args) if raw_args else ["--acp", "--stdio"]
resolved_command = shutil.which(command) if command else None
if not resolved_command and not base_url.startswith("acp+tcp://"):
raise AuthError(
f"Could not find the Copilot CLI command '{command}'. "
"Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH.",
provider=provider_id,
code="missing_copilot_cli",
if provider_id == "copilot-acp":
command = (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
or os.getenv("COPILOT_CLI_PATH", "").strip()
or "copilot"
)
raw_args = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
args = shlex.split(raw_args) if raw_args else ["--acp", "--stdio"]
resolved_command = shutil.which(command) if command else None
if not resolved_command and not base_url.startswith("acp+tcp://"):
raise AuthError(
f"Could not find the Copilot CLI command '{command}'. "
"Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH.",
provider=provider_id,
code="missing_copilot_cli",
)
return {
"provider": provider_id,
"api_key": "copilot-acp",
"base_url": base_url.rstrip("/"),
"command": resolved_command or command,
"args": args,
"source": "process",
}
return {
"provider": provider_id,
"api_key": "copilot-acp",
"base_url": base_url.rstrip("/"),
"command": resolved_command or command,
"args": args,
"source": "process",
}
if provider_id == "codex-cli":
command = (
os.getenv("HERMES_CODEX_CLI_COMMAND", "").strip()
or os.getenv("CODEX_CLI_PATH", "").strip()
or "codex"
)
raw_args = os.getenv("HERMES_CODEX_CLI_ARGS", "").strip()
default_args = [
"exec",
"--json",
"--ephemeral",
"--dangerously-bypass-approvals-and-sandbox",
"--skip-git-repo-check",
]
args = shlex.split(raw_args) if raw_args else default_args
resolved_command = shutil.which(command) if command else None
if not resolved_command:
raise AuthError(
f"Could not find the Codex CLI command '{command}'. "
"Install Codex CLI (npm install -g @openai/codex) or set "
"HERMES_CODEX_CLI_COMMAND / CODEX_CLI_PATH.",
provider=provider_id,
code="missing_codex_cli",
)
return {
"provider": provider_id,
"api_key": "codex-cli",
"base_url": base_url.rstrip("/"),
"command": resolved_command or command,
"args": args,
"source": "process",
}
raise AuthError(
f"Unknown external-process provider '{provider_id}'.",
provider=provider_id,
code="unknown_external_process_provider",
)
# =============================================================================
@@ -4512,7 +4591,7 @@ def _login_openai_codex(
reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
reuse = "y"
if reuse in {"", "y", "yes"}:
if reuse in ("", "y", "yes"):
config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
print()
print("Login successful!")
@@ -4533,7 +4612,7 @@ def _login_openai_codex(
do_import = input("Import these credentials? (a separate login is recommended) [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
do_import = "n"
if do_import in {"y", "yes"}:
if do_import in ("y", "yes"):
_save_codex_tokens(cli_tokens)
base_url = os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") or DEFAULT_CODEX_BASE_URL
config_path = _update_config_for_provider("openai-codex", base_url)
@@ -4625,7 +4704,7 @@ def _codex_device_code_login() -> Dict[str, Any]:
if poll_resp.status_code == 200:
code_resp = poll_resp.json()
break
elif poll_resp.status_code in {403, 404}:
elif poll_resp.status_code in (403, 404):
continue # User hasn't completed login yet
else:
raise AuthError(
@@ -4759,20 +4838,6 @@ def _minimax_request_user_code(
return payload
def _minimax_expired_in_looks_like_unix_ms(expired_in: int, *, now_ms: int) -> bool:
"""True if ``expired_in`` is plausibly a unix-ms absolute time (vs TTL seconds)."""
return int(expired_in) > (now_ms // 2)
def _minimax_resolve_token_expiry_unix(expired_in: int, *, now: datetime) -> float:
"""Return access-token expiry as unix seconds (MiniMax uses ms epoch or TTL seconds)."""
raw = int(expired_in)
now_ms = int(now.timestamp() * 1000)
if _minimax_expired_in_looks_like_unix_ms(raw, now_ms=now_ms):
return raw / 1000.0
return now.timestamp() + max(1, raw)
def _minimax_poll_token(
client: httpx.Client, *, portal_base_url: str, client_id: str,
user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
@@ -4781,11 +4846,12 @@ def _minimax_poll_token(
# Defensive parsing: if it's small enough to be a duration, treat as seconds.
import time as _time
now_ms = int(_time.time() * 1000)
raw = int(expired_in)
if _minimax_expired_in_looks_like_unix_ms(raw, now_ms=now_ms):
deadline = raw / 1000.0
if expired_in > now_ms // 2:
# Looks like a unix-ms timestamp.
deadline = expired_in / 1000.0
else:
deadline = _time.time() + max(1, raw)
# Treat as duration in seconds from now.
deadline = _time.time() + max(1, expired_in)
interval = max(2.0, (interval_ms or 2000) / 1000.0)
while _time.time() < deadline:
@@ -4899,10 +4965,8 @@ def _minimax_oauth_login(
)
now = datetime.now(timezone.utc)
expires_at_unix = _minimax_resolve_token_expiry_unix(
int(token_data["expired_in"]), now=now,
)
expires_in_s = max(0, int(expires_at_unix - now.timestamp()))
expires_in_s = int(token_data["expired_in"])
expires_at = now.timestamp() + expires_in_s
auth_state = {
"provider": "minimax-oauth",
@@ -4916,7 +4980,7 @@ def _minimax_oauth_login(
"refresh_token": token_data["refresh_token"],
"resource_url": token_data.get("resource_url"),
"obtained_at": now.isoformat(),
"expires_at": datetime.fromtimestamp(expires_at_unix, tz=timezone.utc).isoformat(),
"expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
"expires_in": expires_in_s,
}
@@ -4977,16 +5041,14 @@ def _refresh_minimax_oauth_state(
relogin_required=True,
)
now_dt = datetime.now(timezone.utc)
expires_at_unix = _minimax_resolve_token_expiry_unix(
int(payload["expired_in"]), now=now_dt,
)
expires_in_s = max(0, int(expires_at_unix - now_dt.timestamp()))
expires_in_s = int(payload["expired_in"])
new_state = dict(state)
new_state.update({
"access_token": payload["access_token"],
"refresh_token": payload.get("refresh_token", state["refresh_token"]),
"obtained_at": now_dt.isoformat(),
"expires_at": datetime.fromtimestamp(expires_at_unix, tz=timezone.utc).isoformat(),
"expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
tz=timezone.utc).isoformat(),
"expires_in": expires_in_s,
})
_minimax_save_auth_state(new_state)
@@ -5207,7 +5269,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
do_import = input("Import these credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
do_import = "y"
if do_import in {"", "y", "yes"}:
if do_import in ("", "y", "yes"):
print("Rehydrating Nous session from shared credentials...")
auth_state = _try_import_shared_nous_state(
timeout_seconds=timeout_seconds,
@@ -5270,8 +5332,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
from hermes_cli.models import (
get_curated_nous_model_ids, get_pricing_for_provider,
check_nous_free_tier, partition_nous_models_by_tier,
union_with_portal_free_recommendations,
union_with_portal_paid_recommendations,
)
model_ids = get_curated_nous_model_ids()
@@ -5280,27 +5340,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
if model_ids:
pricing = get_pricing_for_provider("nous")
free_tier = check_nous_free_tier()
_portal_for_recs = auth_state.get("portal_base_url", "")
if free_tier:
# The Portal's freeRecommendedModels endpoint is the
# source of truth for what's free *right now*. Augment
# the curated list with anything new the Portal flags
# as free so users on older Hermes builds still see
# newly-launched free models without a CLI release.
model_ids, pricing = union_with_portal_free_recommendations(
model_ids, pricing, _portal_for_recs,
)
model_ids, unavailable_models = partition_nous_models_by_tier(
model_ids, pricing, free_tier=True,
)
else:
# Paid-tier mirror: pull paidRecommendedModels so newly
# launched paid models surface in the picker even if
# the in-repo curated list and docs-hosted manifest
# haven't caught up yet.
model_ids, pricing = union_with_portal_paid_recommendations(
model_ids, pricing, _portal_for_recs,
)
_portal = auth_state.get("portal_base_url", "")
if model_ids:
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
+6 -9
View File
@@ -266,7 +266,7 @@ def auth_add_command(args) -> None:
do_import = input("Import these credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
do_import = "y"
if do_import in {"", "y", "yes"}:
if do_import in ("", "y", "yes"):
print("Rehydrating Nous session from shared credentials...")
rehydrated = auth_mod._try_import_shared_nous_state(
timeout_seconds=getattr(args, "timeout", None) or 15.0,
@@ -375,12 +375,10 @@ def auth_add_command(args) -> None:
return
if provider == "minimax-oauth":
creds = auth_mod._minimax_oauth_login(
open_browser=not getattr(args, "no_browser", False),
timeout_seconds=getattr(args, "timeout", None) or 15.0,
)
from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
creds = resolve_minimax_oauth_runtime_credentials()
label = (getattr(args, "label", None) or "").strip() or label_from_token(
creds["access_token"],
creds["api_key"],
_oauth_default_label(provider, len(pool.entries()) + 1),
)
entry = PooledCredential(
@@ -390,9 +388,8 @@ def auth_add_command(args) -> None:
auth_type=AUTH_TYPE_OAUTH,
priority=0,
source=f"{SOURCE_MANUAL}:minimax_oauth",
access_token=creds["access_token"],
refresh_token=creds.get("refresh_token"),
base_url=creds.get("inference_base_url"),
access_token=creds["api_key"],
base_url=creds.get("base_url"),
)
pool.add_entry(entry)
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+6 -4
View File
@@ -298,7 +298,7 @@ def _detect_prefix(zf: zipfile.ZipFile) -> str:
if len(first_parts) == 1:
prefix = first_parts.pop()
# Only strip if it looks like a hermes dir name
if prefix in {".hermes", "hermes"}:
if prefix in (".hermes", "hermes"):
return prefix + "/"
return ""
@@ -349,7 +349,7 @@ def run_import(args) -> None:
except (EOFError, KeyboardInterrupt):
print("\nAborted.")
sys.exit(1)
if answer not in {"y", "yes"}:
if answer not in ("y", "yes"):
print("Aborted.")
return
@@ -802,7 +802,8 @@ def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
Operators who genuinely don't want a backup should set
``updates.pre_update_backup: false`` in config that gates creation.
"""
keep = max(keep, 1)
if keep < 1:
keep = 1
if not backup_dir.exists():
return 0
@@ -874,7 +875,8 @@ def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
Only touches files matching ``pre-migration-*.zip`` so other backups in
the same directory are never touched.
"""
keep = max(keep, 0)
if keep < 0:
keep = 0
if not backup_dir.exists():
return 0
+1 -1
View File
@@ -139,7 +139,7 @@ def _confirm(prompt: str) -> bool:
except (EOFError, KeyboardInterrupt):
print()
return False
return resp in {"y", "yes"}
return resp in ("y", "yes")
def cmd_clear(args: argparse.Namespace) -> int:
+11 -10
View File
@@ -298,7 +298,7 @@ def claw_command(args):
if action == "migrate":
_cmd_migrate(args)
elif action in {"cleanup", "clean"}:
elif action in ("cleanup", "clean"):
_cmd_cleanup(args)
else:
print("Usage: hermes claw <command> [options]")
@@ -670,16 +670,17 @@ def _cmd_cleanup(args):
elif not auto_yes and not sys.stdin.isatty():
print_info(f"Non-interactive session — would archive: {source_dir}")
print_info("To execute, re-run with: hermes claw cleanup --yes")
elif auto_yes or prompt_yes_no(f"Archive {source_dir}?", default=True):
try:
archive_path = _archive_directory(source_dir)
print_success(f"Archived: {source_dir}{archive_path}")
total_archived += 1
except OSError as e:
print_error(f"Could not archive: {e}")
print_info(f"Try manually: mv {source_dir} {source_dir}.pre-migration")
else:
print_info("Skipped.")
if auto_yes or prompt_yes_no(f"Archive {source_dir}?", default=True):
try:
archive_path = _archive_directory(source_dir)
print_success(f"Archived: {source_dir}{archive_path}")
total_archived += 1
except OSError as e:
print_error(f"Could not archive: {e}")
print_info(f"Try manually: mv {source_dir} {source_dir}.pre-migration")
else:
print_info("Skipped.")
# Summary
print()
+6 -27
View File
@@ -16,19 +16,6 @@ DEFAULT_CODEX_MODELS: List[str] = [
"gpt-5.4-mini",
"gpt-5.4",
"gpt-5.3-codex",
# gpt-5.3-codex-spark is in research preview and is exposed *only* via
# the Codex CLI / OAuth backend (chatgpt.com/backend-api/codex/models)
# for ChatGPT Pro subscribers. It is NOT available in the public OpenAI
# API, so it intentionally stays out of the "openai" provider catalog
# in hermes_cli/models.py — only the openai-codex (OAuth) provider
# surfaces it. The Codex backend reports ``supported_in_api: false`` for
# this slug; that flag describes API availability, not Codex backend
# availability, so the fetch/cache code paths below intentionally do
# not filter on it. PR #12994 removed this entry on the assumption it
# was unsupported — that was wrong; restored here. Keep it in the
# curated fallback so Pro users still see Spark in `/model` when live
# discovery is unavailable (offline first run, transient API failure).
"gpt-5.3-codex-spark",
"gpt-5.2-codex",
"gpt-5.1-codex-max",
"gpt-5.1-codex-mini",
@@ -39,11 +26,6 @@ _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
("gpt-5.3-codex", ("gpt-5.2-codex",)),
# Surface Spark whenever any compatible Codex template is present so
# accounts hitting the live endpoint with an older lineup still see
# Spark in the picker. Backend gates real availability by ChatGPT Pro
# entitlement; Hermes does not.
("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")),
]
@@ -96,12 +78,10 @@ def _fetch_models_from_api(access_token: str) -> List[str]:
if not isinstance(slug, str) or not slug.strip():
continue
slug = slug.strip()
# Codex CLI's catalog uses ``supported_in_api`` for the public OpenAI
# API, not for the OAuth-backed Codex backend that this provider uses.
# Some valid Codex CLI models (for example gpt-5.3-codex-spark) are
# marked false here but are still accepted by the Codex route.
if item.get("supported_in_api") is False:
continue
visibility = item.get("visibility", "")
if isinstance(visibility, str) and visibility.strip().lower() in {"hide", "hidden"}:
if isinstance(visibility, str) and visibility.strip().lower() in ("hide", "hidden"):
continue
priority = item.get("priority")
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
@@ -148,11 +128,10 @@ def _read_cache_models(codex_home: Path) -> List[str]:
if not isinstance(slug, str) or not slug.strip():
continue
slug = slug.strip()
# Do not filter on ``supported_in_api`` here. It describes the
# public OpenAI API, while Hermes openai-codex talks to the same
# OAuth-backed Codex backend as Codex CLI.
if item.get("supported_in_api") is False:
continue
visibility = item.get("visibility")
if isinstance(visibility, str) and visibility.strip().lower() in {"hide", "hidden"}:
if isinstance(visibility, str) and visibility.strip().lower() in ("hide", "hidden"):
continue
priority = item.get("priority")
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
+10 -16
View File
@@ -79,8 +79,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("undo", "Remove the last user/assistant exchange", "Session"),
CommandDef("title", "Set a title for the current session", "Session",
args_hint="[name]"),
CommandDef("handoff", "Hand off this session to a messaging platform (Telegram, Discord, etc.)", "Session",
args_hint="<platform>", cli_only=True),
CommandDef("branch", "Branch the current session (explore a different path)", "Session",
aliases=("fork",), args_hint="[name]"),
CommandDef("compress", "Manually compress conversation context", "Session",
@@ -105,7 +103,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("goal", "Set a standing goal Hermes works on across turns until achieved", "Session",
args_hint="[text | pause | resume | clear | status]"),
CommandDef("status", "Show session info", "Session"),
CommandDef("whoami", "Show your slash command access (admin / user)", "Info"),
CommandDef("profile", "Show active profile name and home directory", "Info"),
CommandDef("sethome", "Set this chat as the home channel", "Session",
gateway_only=True, aliases=("set-home",)),
@@ -468,23 +465,20 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
Telegram command names cannot contain hyphens, so they are replaced with
underscores. Aliases are skipped -- Telegram shows one menu entry per
canonical command.
canonical command. Commands that require arguments are skipped because
selecting a Telegram BotCommand sends only ``/command`` and would execute
an incomplete command.
Built-in commands that require arguments (e.g. /queue, /steer, /background)
are **included** because their handlers return usage text when selected
without a payload, making them discoverable via autocomplete.
Plugin-registered slash commands that require arguments are **excluded**
because plugins may not provide a no-arg usage fallback.
Plugin-registered slash commands are included so plugins get native
autocomplete in Telegram without touching core code.
"""
overrides = _resolve_config_gates()
result: list[tuple[str, str]] = []
for cmd in COMMAND_REGISTRY:
if not _is_gateway_available(cmd, overrides):
continue
# Built-in arg-taking commands are included — their handlers show
# usage text when invoked without arguments, and hiding them from
# the menu hurts discoverability (issue #24312).
if _requires_argument(cmd.args_hint):
continue
tg_name = _sanitize_telegram_name(cmd.name)
if tg_name:
result.append((tg_name, cmd.description))
@@ -814,7 +808,7 @@ def discord_skill_commands_by_category(
# names are marked with a sentinel so the warning distinguishes
# "skill collided with a reserved command" from "two skills collided
# on the 32-char clamp" — the latter is the rename-worthy case.
_names_used: dict[str, str] = dict.fromkeys(reserved_names, "<reserved>")
_names_used: dict[str, str] = {n: "<reserved>" for n in reserved_names}
hidden = 0
try:
@@ -1362,9 +1356,9 @@ class SlashCommandCompleter(Completer):
try:
proc = subprocess.run(
cmd, capture_output=True, text=True, timeout=2,
cwd=cwd, encoding="utf-8", errors="replace",
cwd=cwd,
)
if proc.returncode == 0 and proc.stdout and proc.stdout.strip():
if proc.returncode == 0 and proc.stdout.strip():
raw = proc.stdout.strip().split("\n")
# Store relative paths
for p in raw[:5000]:
+3 -3
View File
@@ -216,9 +216,9 @@ _hermes() {{
typeset -A opt_args
_arguments -C \\
'(-)'{{-h,--help}}'[Show help and exit]' \\
'(-)'{{-V,--version}}'[Show version and exit]' \\
'(-)'{{-p,--profile}}'[Profile name]:profile:_hermes_profiles' \\
'(-h --help){{-h,--help}}[Show help and exit]' \\
'(-V --version){{-V,--version}}[Show version and exit]' \\
'(-p --profile){{-p,--profile}}[Profile name]:profile:_hermes_profiles' \\
'1:command:->commands' \\
'*::arg:->args'
+13 -186
View File
@@ -28,48 +28,6 @@ from typing import Dict, Any, Optional, List, Tuple
logger = logging.getLogger(__name__)
# Track which (config_path, mtime_ns, size) tuples we've already warned about
# so concurrent CLI/gateway loads of a broken config.yaml don't spam stderr
# every time. Cleared automatically when the file changes (different mtime).
_CONFIG_PARSE_WARNED: set = set()
def _warn_config_parse_failure(config_path: Path, exc: Exception) -> None:
"""Surface a config.yaml parse failure to user, log, and stderr.
A YAML parse error in ``~/.hermes/config.yaml`` causes ``load_config()``
to silently fall back to ``DEFAULT_CONFIG``, which means every user
override (auxiliary providers, fallback chain, model overrides, etc.)
is dropped. Before this helper that was a one-line ``print(...)`` that
scrolled off-screen on the first invocation and was never seen again.
Now: warn once per (path, mtime_ns, size) on stderr **and** in
``agent.log`` / ``errors.log`` at WARNING level so ``hermes logs``
surfaces it. Re-warns automatically if the file changes (different
mtime/size), so users editing the config see the next failure.
"""
try:
st = config_path.stat()
key = (str(config_path), st.st_mtime_ns, st.st_size)
except OSError:
key = (str(config_path), 0, 0)
if key in _CONFIG_PARSE_WARNED:
return
_CONFIG_PARSE_WARNED.add(key)
msg = (
f"Failed to parse {config_path}: {exc}. "
f"Falling back to default config — every user override "
f"(auxiliary providers, fallback chain, model settings) is being IGNORED. "
f"Fix the YAML and restart."
)
logger.warning(msg)
try:
sys.stderr.write(f"⚠️ hermes config: {msg}\n")
sys.stderr.flush()
except Exception:
pass
_IS_WINDOWS = platform.system() == "Windows"
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
@@ -477,12 +435,6 @@ DEFAULT_CONFIG = {
# threshold before escalating to a full timeout. The warning fires
# once per run and does not interrupt the agent. 0 = disable warning.
"gateway_timeout_warning": 900,
# Maximum time (seconds) the gateway will block an agent waiting for
# a clarify-tool response from the user. Hit this and the agent
# unblocks with "[user did not respond within Xm]" so it can adapt
# rather than pinning the running-agent guard forever. CLI clarify
# blocks indefinitely (input() is synchronous) and ignores this.
"clarify_timeout": 600,
# Periodic "still working" notification interval (seconds).
# Sends a status message every N seconds so the user knows the
# agent hasn't died during long tasks. 0 = disable notifications.
@@ -585,7 +537,6 @@ DEFAULT_CONFIG = {
# Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
# Default off because passing host directories into a sandbox weakens isolation.
"docker_mount_cwd_to_workspace": False,
"docker_extra_args": [], # Extra flags passed verbatim to docker run
# Explicit opt-in: run the Docker container as the host user's uid:gid
# (via `--user`). When enabled, files written into bind-mounted dirs
# (docker_volumes, the persistent workspace, or the auto-mounted cwd)
@@ -634,12 +585,6 @@ DEFAULT_CONFIG = {
# so the server maps it to a persistent Firefox profile automatically.
# When false (default), each session gets a random userId (ephemeral).
"managed_persistence": False,
# Optional externally managed Camofox identity. Useful when another
# app owns the visible browser and Hermes should operate in it.
"user_id": "",
"session_key": "",
# Rehydrate tab_id from Camofox before creating a new tab.
"adopt_existing_tab": False,
},
},
@@ -735,15 +680,8 @@ DEFAULT_CONFIG = {
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
# long_lived_prefix: when true (default), Claude on Anthropic / OpenRouter / Nous
# Portal uses a split layout: tools[-1] + stable system prefix at long_lived_ttl
# (cross-session cache), last 2 messages at cache_ttl (within-session rolling).
# Set false to keep the legacy "system + last 3 messages" single-tier layout.
# long_lived_ttl: TTL for the cross-session prefix tier ("5m" or "1h"; default "1h").
"prompt_caching": {
"cache_ttl": "5m",
"long_lived_prefix": True,
"long_lived_ttl": "1h",
},
# OpenRouter-specific settings.
@@ -753,18 +691,9 @@ DEFAULT_CONFIG = {
# See: https://openrouter.ai/docs/guides/features/response-caching
# response_cache_ttl: how long cached responses remain valid, in seconds (1-86400).
# Default 300 (5 minutes). Only used when response_cache is enabled.
# min_coding_score: knob for the openrouter/pareto-code router (0.0-1.0).
# Only applied when model.model is "openrouter/pareto-code". Higher
# values route to stronger (more expensive) coders; lower values open
# up cheaper, faster options. Default 0.65 lands on the mid-tier
# coder on the current Pareto frontier. Empty string = let OpenRouter
# pick the strongest available coder (router's documented default
# when the plugins block is omitted).
# See: https://openrouter.ai/docs/guides/routing/routers/pareto-router
"openrouter": {
"response_cache": True,
"response_cache_ttl": 300,
"min_coding_score": 0.65,
},
# AWS Bedrock provider configuration.
@@ -793,26 +722,6 @@ DEFAULT_CONFIG = {
# Empty model = use provider's default auxiliary model.
# All tasks fall back to openrouter:google/gemini-3-flash-preview if
# the configured provider is unavailable.
#
# extra_body: forwarded verbatim as request body fields on every aux call
# for that task. Use this to set provider-specific knobs (independent of
# main-agent settings). On OpenRouter you can set provider routing prefs
# and the Pareto Code coding-score floor here. Example:
#
# auxiliary:
# compression:
# provider: openrouter
# model: openrouter/pareto-code
# extra_body:
# provider: # OpenRouter provider routing
# order: [anthropic, google]
# sort: throughput # or price | latency
# plugins: # OpenRouter Pareto Code router
# - id: pareto-router
# min_coding_score: 0.5
#
# Each aux task is independent — main-agent provider_routing and
# openrouter.min_coding_score do NOT propagate to aux calls by design.
"auxiliary": {
"vision": {
"provider": "auto", # auto | openrouter | nous | codex | custom
@@ -921,7 +830,6 @@ DEFAULT_CONFIG = {
"bell_on_complete": False,
"show_reasoning": False,
"streaming": False,
"timestamps": False, # Show [HH:MM] on user and assistant labels
"final_response_markdown": "strip", # render | strip | raw
# Preserve recent classic CLI output across Ctrl+L, /redraw, and
# terminal resize full-screen clears. Disable if a terminal emulator
@@ -929,14 +837,6 @@ DEFAULT_CONFIG = {
"persistent_output": True,
"persistent_output_max_lines": 200,
"inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage)
# File-mutation verifier footer. When true (default), the agent
# appends a one-line advisory to its final response whenever a
# write_file / patch call failed during the turn and was never
# superseded by a successful write to the same path. This catches
# the "batch of parallel patches, half fail, model claims success"
# class of over-claim that otherwise forces users to run
# `git status` to verify edits landed. Set false to suppress.
"file_mutation_verifier": True,
"show_cost": False, # Show $ cost in the status bar (off by default)
"skin": "default",
# UI language for static user-facing messages (approval prompts, a
@@ -1304,15 +1204,6 @@ DEFAULT_CONFIG = {
# "Always Approve" to silence the prompt permanently; that flips
# this key to false.
"mcp_reload_confirm": True,
# When true, destructive session slash commands (/clear, /new, /reset,
# /undo) ask the user to confirm before discarding conversation state.
# Three-option prompt (Approve Once / Always Approve / Cancel) routed
# through tools.slash_confirm — native yes/no buttons on Telegram,
# Discord, and Slack; text fallback elsewhere. Users click "Always
# Approve" to silence the prompt permanently; that flips this key to
# false. TUI has its own modal overlay (HERMES_TUI_NO_CONFIRM=1 to
# opt out there).
"destructive_slash_confirm": True,
},
# Permanently allowed dangerous command patterns (added via "always" approval)
@@ -1352,21 +1243,6 @@ DEFAULT_CONFIG = {
"domains": [],
"shared_files": [],
},
# Acknowledged supply-chain security advisories. Each entry is the
# ID of an advisory the user has read and acted on (uninstalled the
# compromised package, rotated credentials). Acked advisories no
# longer trigger the startup banner. Add via `hermes doctor --ack
# <id>`; remove by editing the list directly. See
# ``hermes_cli/security_advisories.py`` for the catalog.
"acked_advisories": [],
# Allow Hermes to lazy-install opt-in backend packages from PyPI
# the first time the user enables a backend that needs them
# (e.g. installing ``elevenlabs`` when the user picks ElevenLabs as
# their TTS provider). Set to false to require explicit
# ``pip install`` for everything beyond the base set — appropriate
# for restricted networks, audited environments, or air-gapped
# systems where any runtime install is unacceptable.
"allow_lazy_installs": True,
},
"cron": {
@@ -1505,53 +1381,6 @@ DEFAULT_CONFIG = {
"backup_keep": 5,
},
# Language Server Protocol — semantic diagnostics from real
# language servers (pyright, gopls, rust-analyzer, etc.) wired
# into the post-write lint check used by ``write_file`` and
# ``patch``.
#
# LSP is gated on git-workspace detection: when the agent's
# cwd (or the file being edited) is inside a git worktree, LSP
# runs against that workspace. When neither is in a git repo,
# LSP stays dormant and the in-process syntax check is the only
# tier — handy for Telegram/Discord chats where the cwd is the
# user's home directory.
"lsp": {
# Master toggle. Setting this to false disables the entire
# subsystem — no servers spawn, no background event loop, no
# cost.
"enabled": True,
# Diagnostic-wait mode for the post-write check.
# ``"document"`` waits up to ``wait_timeout`` seconds for the
# current file's diagnostics; ``"full"`` additionally requests
# workspace-wide diagnostics (slower).
"wait_mode": "document",
"wait_timeout": 5.0,
# How to handle missing server binaries.
# ``"auto"`` — try to install via npm/go/pip into
# ``<HERMES_HOME>/lsp/bin/`` on first use.
# ``"manual"`` — only use binaries already on PATH.
# ``"off"`` — alias for ``manual``.
"install_strategy": "auto",
# Per-server overrides. Each key is a server_id from the
# registry (``pyright``, ``typescript``, ``gopls``,
# ``rust-analyzer``, etc.) and accepts:
# disabled: true
# — skip this server even when its extensions match
# command: ["full/path/to/server", "--stdio"]
# — pin a custom binary path; bypasses auto-install
# env: {"KEY": "value"}
# — extra env vars passed to the spawned process
# initialization_options: {...}
# — merged into the LSP ``initializationOptions``
# Empty by default; the registry defaults work for typical
# setups.
"servers": {},
},
# Config schema version - bump this when adding new required fields
"_config_version": 23,
}
@@ -3291,7 +3120,7 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
terminal_cfg = config.get("terminal", {})
config_cwd = terminal_cfg.get("cwd", ".") if isinstance(terminal_cfg, dict) else "."
# Only warn if config.yaml doesn't have an explicit path
config_has_explicit_cwd = config_cwd not in {".", "auto", "cwd", ""}
config_has_explicit_cwd = config_cwd not in (".", "auto", "cwd", "")
lines: list[str] = []
if messaging_cwd:
@@ -3351,10 +3180,10 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
if "tool_progress" not in display:
old_enabled = get_env_value("HERMES_TOOL_PROGRESS")
old_mode = get_env_value("HERMES_TOOL_PROGRESS_MODE")
if old_enabled and old_enabled.lower() in {"false", "0", "no"}:
if old_enabled and old_enabled.lower() in ("false", "0", "no"):
display["tool_progress"] = "off"
results["config_added"].append("display.tool_progress=off (from HERMES_TOOL_PROGRESS=false)")
elif old_mode and old_mode.lower() in {"new", "all"}:
elif old_mode and old_mode.lower() in ("new", "all"):
display["tool_progress"] = old_mode.lower()
results["config_added"].append(f"display.tool_progress={old_mode.lower()} (from HERMES_TOOL_PROGRESS_MODE)")
else:
@@ -3433,7 +3262,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
new_entry = {"api": old_url}
if old_name:
new_entry["name"] = old_name
if old_key and old_key not in {"no-key", "no-key-required", ""}:
if old_key and old_key not in ("no-key", "no-key-required", ""):
new_entry["api_key"] = old_key
# Carry over model and api_mode if present
@@ -3491,7 +3320,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
stt.pop("model", None)
# Place it in the appropriate provider section only if the
# user didn't already set a model there
if provider in {"local", "local_command"}:
if provider in ("local", "local_command"):
# Don't migrate an OpenAI model name into the local section
_local_models = {
"tiny.en", "tiny", "base.en", "base", "small.en", "small",
@@ -3575,7 +3404,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
if not aux_comp.get("model"):
aux_comp["model"] = str(s_model).strip()
migrated_keys.append(f"model={s_model}")
if s_provider and str(s_provider).strip() not in {"", "auto"}:
if s_provider and str(s_provider).strip() not in ("", "auto"):
aux = config.setdefault("auxiliary", {})
aux_comp = aux.setdefault("compression", {})
if not aux_comp.get("provider") or aux_comp.get("provider") == "auto":
@@ -3806,7 +3635,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
except (EOFError, KeyboardInterrupt):
answer = "n"
if answer in {"y", "yes"}:
if answer in ("y", "yes"):
print()
for name, info in new_and_unset:
if info.get("url"):
@@ -3867,7 +3696,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
except (EOFError, KeyboardInterrupt):
answer = "n"
if answer in {"y", "yes"}:
if answer in ("y", "yes"):
print()
config = load_config()
try:
@@ -4137,8 +3966,7 @@ def read_raw_config() -> Dict[str, Any]:
try:
with open(config_path, encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
except Exception as e:
_warn_config_parse_failure(config_path, e)
except Exception:
return {}
if not isinstance(data, dict):
@@ -4188,7 +4016,7 @@ def load_config() -> Dict[str, Any]:
config = _deep_merge(config, user_config)
except Exception as e:
_warn_config_parse_failure(config_path, e)
print(f"Warning: Failed to load config: {e}")
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
expanded = _expand_env_vars(normalized)
@@ -4949,9 +4777,9 @@ def set_config_value(key: str, value: str):
# inline navigation here silently overwrote lists with dicts.
# Convert value to appropriate type
if value.lower() in {'true', 'yes', 'on'}:
if value.lower() in ('true', 'yes', 'on'):
value = True
elif value.lower() in {'false', 'no', 'off'}:
elif value.lower() in ('false', 'no', 'off'):
value = False
elif value.isdigit():
value = int(value)
@@ -4977,7 +4805,6 @@ def set_config_value(key: str, value: str):
"terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
"terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
"terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
"terminal.docker_env": "TERMINAL_DOCKER_ENV",
# terminal.cwd intentionally excluded — CLI resolves at runtime,
# gateway bridges it in gateway/run.py. Persisting to .env causes
# stale values to poison child processes.
@@ -5156,7 +4983,7 @@ def _inject_profile_env_vars() -> None:
try:
from providers import list_providers
for _pp in list_providers():
if _pp.auth_type not in {"api_key",}:
if _pp.auth_type not in ("api_key",):
continue
for _var in _pp.env_vars:
if _var in OPTIONAL_ENV_VARS:

Some files were not shown because too many files have changed in this diff Show More