Compare commits

..

1 Commits

Author SHA1 Message Date
Shannon Sands bad9fe2452 add generic gateway startup readiness checks 2026-04-15 10:03:23 +10:00
769 changed files with 12210 additions and 115845 deletions
-13
View File
@@ -24,15 +24,6 @@
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
# =============================================================================
# LLM PROVIDER (Ollama Cloud)
# =============================================================================
# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
# Get your key at: https://ollama.com/settings
# OLLAMA_API_KEY=your_ollama_key_here
# Optional base URL override (default: https://ollama.com/v1)
# OLLAMA_BASE_URL=https://ollama.com/v1
# =============================================================================
# LLM PROVIDER (z.ai / GLM)
# =============================================================================
@@ -154,10 +145,6 @@
# Only override here if you need to force a backend without touching config.yaml:
# TERMINAL_ENV=local
# Override the container runtime binary (e.g. to use Podman instead of Docker).
# Useful on systems where Docker's storage driver is broken or unavailable.
# HERMES_DOCKER_BINARY=/usr/local/bin/podman
# Container images (for singularity/docker/modal backends)
# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
# TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
-4
View File
@@ -1,5 +1 @@
watch_file pyproject.toml uv.lock
watch_file ui-tui/package-lock.json ui-tui/package.json
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
use flake
+8 -10
View File
@@ -1,12 +1,11 @@
name: Deploy Site
on:
release:
types: [published]
push:
branches: [main]
paths:
- 'website/**'
- 'landingpage/**'
- 'skills/**'
- 'optional-skills/**'
- '.github/workflows/deploy-site.yml'
@@ -21,14 +20,8 @@ concurrency:
cancel-in-progress: false
jobs:
deploy-vercel:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- name: Trigger Vercel Deploy
run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}"
deploy-docs:
build-and-deploy:
# Only run on the upstream repository, not on forks
if: github.repository == 'NousResearch/hermes-agent'
runs-on: ubuntu-latest
environment:
@@ -72,7 +65,12 @@ jobs:
- name: Stage deployment
run: |
mkdir -p _site/docs
# Landing page at root
cp -r landingpage/* _site/
# Docusaurus at /docs/
cp -r website/build/* _site/docs/
# CNAME so GitHub Pages keeps the custom domain between deploys
echo "hermes-agent.nousresearch.com" > _site/CNAME
- name: Upload artifact
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
-1
View File
@@ -60,6 +60,5 @@ mini-swe-agent/
# Nix
.direnv/
.nix-stamps/
result
website/static/api/skills-index.json
-1
View File
@@ -105,4 +105,3 @@ tesseracttars-creator <tesseracttars@gmail.com> <tesseracttars@gmail.com>
xinbenlv <zzn+pa@zzn.im> <zzn+pa@zzn.im>
SaulJWu <saul.jj.wu@gmail.com> <saul.jj.wu@gmail.com>
angelos <angelos@oikos.lan.home.malaiwah.com> <angelos@oikos.lan.home.malaiwah.com>
MestreY0d4-Uninter <241404605+MestreY0d4-Uninter@users.noreply.github.com> <MestreY0d4-Uninter@users.noreply.github.com>
+9 -107
View File
@@ -13,7 +13,7 @@ source venv/bin/activate # ALWAYS activate before running Python
```
hermes-agent/
├── run_agent.py # AIAgent class — core conversation loop
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
├── model_tools.py # Tool orchestration, _discover_tools(), handle_function_call()
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
├── cli.py # HermesCLI class — interactive CLI orchestrator
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
@@ -56,19 +56,6 @@ hermes-agent/
│ ├── run.py # Main loop, slash commands, message dispatch
│ ├── session.py # SessionStore — conversation persistence
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
│ ├── src/entry.tsx # TTY gate + render()
│ ├── src/app.tsx # Main state machine and UI
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
├── tui_gateway/ # Python JSON-RPC backend for the TUI
│ ├── entry.py # stdio entrypoint
│ ├── server.py # RPC handlers and session logic
│ ├── render.py # Optional rich/ANSI bridge
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
├── cron/ # Scheduler (jobs.py, scheduler.py)
├── environments/ # RL training environments (Atropos)
@@ -192,62 +179,9 @@ if canonical == "mycommand":
---
## TUI Architecture (ui-tui + tui_gateway)
The TUI is a full replacement for the classic (prompt_toolkit) CLI, activated via `hermes --tui` or `HERMES_TUI=1`.
### Process Model
```
hermes --tui
└─ Node (Ink) ──stdio JSON-RPC── Python (tui_gateway)
│ └─ AIAgent + tools + sessions
└─ renders transcript, composer, prompts, activity
```
TypeScript owns the screen. Python owns sessions, tools, model calls, and slash command logic.
### Transport
Newline-delimited JSON-RPC over stdio. Requests from Ink, events from Python. See `tui_gateway/server.py` for the full method/event catalog.
### Key Surfaces
| Surface | Ink component | Gateway method |
|---------|---------------|----------------|
| Chat streaming | `app.tsx` + `messageLine.tsx` | `prompt.submit``message.delta/complete` |
| Tool activity | `thinking.tsx` | `tool.start/progress/complete` |
| Approvals | `prompts.tsx` | `approval.respond``approval.request` |
| Clarify/sudo/secret | `prompts.tsx`, `maskedPrompt.tsx` | `clarify/sudo/secret.respond` |
| Session picker | `sessionPicker.tsx` | `session.list/resume` |
| Slash commands | Local handler + fallthrough | `slash.exec``_SlashWorker`, `command.dispatch` |
| Completions | `useCompletion` hook | `complete.slash`, `complete.path` |
| Theming | `theme.ts` + `branding.tsx` | `gateway.ready` with skin data |
### Slash Command Flow
1. Built-in client commands (`/help`, `/quit`, `/clear`, `/resume`, `/copy`, `/paste`, etc.) handled locally in `app.tsx`
2. Everything else → `slash.exec` (runs in persistent `_SlashWorker` subprocess) → `command.dispatch` fallback
### Dev Commands
```bash
cd ui-tui
npm install # first time
npm run dev # watch mode (rebuilds hermes-ink + tsx --watch)
npm start # production
npm run build # full build (hermes-ink + tsc)
npm run type-check # typecheck only (tsc --noEmit)
npm run lint # eslint
npm run fmt # prettier
npm test # vitest
```
---
## Adding New Tools
Requires changes in **2 files**:
Requires changes in **3 files**:
**1. Create `tools/your_tool.py`:**
```python
@@ -270,9 +204,9 @@ registry.register(
)
```
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
**2. Add import** in `model_tools.py` `_discover_tools()` list.
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
@@ -524,45 +458,13 @@ def profile_env(tmp_path, monkeypatch):
## Testing
**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
developer machine with API keys set diverges from CI in ways that have caused
multiple "works locally, fails in CI" incidents (and the reverse).
```bash
scripts/run_tests.sh # full suite, CI-parity
scripts/run_tests.sh tests/gateway/ # one directory
scripts/run_tests.sh tests/agent/test_foo.py::test_x # one test
scripts/run_tests.sh -v --tb=long # pass-through pytest flags
```
### Why the wrapper (and why the old "just call pytest" doesn't work)
Five real sources of local-vs-CI drift the script closes:
| | Without wrapper | With wrapper |
|---|---|---|
| Provider API keys | Whatever is in your env (auto-detects pool) | All `*_API_KEY`/`*_TOKEN`/etc. unset |
| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
| Timezone | Local TZ (PDT etc.) | UTC |
| Locale | Whatever is set | C.UTF-8 |
| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
invocation (including IDE integrations) gets hermetic behavior — but the wrapper
is belt-and-suspenders.
### Running without the wrapper (only if you must)
If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
pytest directly), at minimum activate the venv and pass `-n 4`:
```bash
source venv/bin/activate
python -m pytest tests/ -q -n 4
python -m pytest tests/ -q # Full suite (~3000 tests, ~3 min)
python -m pytest tests/test_model_tools.py -q # Toolset resolution
python -m pytest tests/test_cli_init.py -q # CLI config loading
python -m pytest tests/gateway/ -q # Gateway tests
python -m pytest tests/tools/ -q # Tool-level tests
```
Worker count above 4 will surface test-ordering flakes that CI never sees.
Always run the full suite before pushing changes.
+2 -9
View File
@@ -13,7 +13,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
<table>
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@@ -141,18 +141,11 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration
We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.
Quick start for contributors — clone and go with `setup-hermes.sh`:
Quick start for contributors:
```bash
git clone https://github.com/NousResearch/hermes-agent.git
cd hermes-agent
./setup-hermes.sh # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
./hermes # auto-detects the venv, no need to `source` first
```
Manual path (equivalent to the above):
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv venv --python 3.11
source venv/bin/activate
-27
View File
@@ -1,27 +0,0 @@
# Hermes Agent v0.10.0 (v2026.4.16)
**Release Date:** April 16, 2026
> The Tool Gateway release — paid Nous Portal subscribers can now use web search, image generation, text-to-speech, and browser automation through their existing subscription with zero additional API keys.
---
## ✨ Highlights
- **Nous Tool Gateway** — Paid [Nous Portal](https://portal.nousresearch.com) subscribers now get automatic access to **web search** (Firecrawl), **image generation** (FAL / FLUX 2 Pro), **text-to-speech** (OpenAI TTS), and **browser automation** (Browser Use) through their existing subscription. No separate API keys needed — just run `hermes model`, select Nous Portal, and pick which tools to enable. Per-tool opt-in via `use_gateway` config, full integration with `hermes tools` and `hermes status`, and the runtime correctly prefers the gateway even when direct API keys exist. Replaces the old hidden `HERMES_ENABLE_NOUS_MANAGED_TOOLS` env var with clean subscription-based detection. ([#11206](https://github.com/NousResearch/hermes-agent/pull/11206), based on work by @jquesnelle; docs: [#11208](https://github.com/NousResearch/hermes-agent/pull/11208))
---
## 🐛 Bug Fixes & Improvements
This release includes 180+ commits with numerous bug fixes, platform improvements, and reliability enhancements across the agent core, gateway, CLI, and tool system. Full details will be published in the v0.11.0 changelog.
---
## 👥 Contributors
- **@jquesnelle** (emozilla) — Original Tool Gateway implementation ([#10799](https://github.com/NousResearch/hermes-agent/pull/10799)), salvaged and shipped in this release
---
**Full Changelog**: [v2026.4.13...v2026.4.16](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.16)
-84
View File
@@ -1,84 +0,0 @@
# Hermes Agent Security Policy
This document outlines the security protocols, trust model, and deployment hardening guidelines for the **Hermes Agent** project.
## 1. Vulnerability Reporting
Hermes Agent does **not** operate a bug bounty program. Security issues should be reported via [GitHub Security Advisories (GHSA)](https://github.com/NousResearch/hermes-agent/security/advisories/new) or by emailing **security@nousresearch.com**. Do not open public issues for security vulnerabilities.
### Required Submission Details
- **Title & Severity:** Concise description and CVSS score/rating.
- **Affected Component:** Exact file path and line range (e.g., `tools/approval.py:120-145`).
- **Environment:** Output of `hermes version`, commit SHA, OS, and Python version.
- **Reproduction:** Step-by-step Proof-of-Concept (PoC) against `main` or the latest release.
- **Impact:** Explanation of what trust boundary was crossed.
---
## 2. Trust Model
The core assumption is that Hermes is a **personal agent** with one trusted operator.
### Operator & Session Trust
- **Single Tenant:** The system protects the operator from LLM actions, not from malicious co-tenants. Multi-user isolation must happen at the OS/host level.
- **Gateway Security:** Authorized callers (Telegram, Discord, Slack, etc.) receive equal trust. Session keys are used for routing, not as authorization boundaries.
- **Execution:** Defaults to `terminal.backend: local` (direct host execution). Container isolation (Docker, Modal, Daytona) is opt-in for sandboxing.
### Dangerous Command Approval
The approval system (`tools/approval.py`) is a core security boundary. Terminal commands, file operations, and other potentially destructive actions are gated behind explicit user confirmation before execution. The approval mode is configurable via `approvals.mode` in `config.yaml`:
- `"on"` (default) — prompts the user to approve dangerous commands.
- `"auto"` — auto-approves after a configurable delay.
- `"off"` — disables the gate entirely (break-glass; see Section 3).
### Output Redaction
`agent/redact.py` strips secret-like patterns (API keys, tokens, credentials) from all display output before it reaches the terminal or gateway platform. This prevents accidental credential leakage in chat logs, tool previews, and response text. Redaction operates on the display layer only — underlying values remain intact for internal agent operations.
### Skills vs. MCP Servers
- **Installed Skills:** High trust. Equivalent to local host code; skills can read environment variables and run arbitrary commands.
- **MCP Servers:** Lower trust. MCP subprocesses receive a filtered environment (`_build_safe_env()` in `tools/mcp_tool.py`) — only safe baseline variables (`PATH`, `HOME`, `XDG_*`) plus variables explicitly declared in the server's `env` config block are passed through. Host credentials are stripped by default. Additionally, packages invoked via `npx`/`uvx` are checked against the OSV malware database before spawning.
### Code Execution Sandbox
The `execute_code` tool (`tools/code_execution_tool.py`) runs LLM-generated Python scripts in a child process with API keys and tokens stripped from the environment to prevent credential exfiltration. Only environment variables explicitly declared by loaded skills (via `env_passthrough`) or by the user in `config.yaml` (`terminal.env_passthrough`) are passed through. The child accesses Hermes tools via RPC, not direct API calls.
### Subagents
- **No recursive delegation:** The `delegate_task` tool is disabled for child agents.
- **Depth limit:** `MAX_DEPTH = 2` — parent (depth 0) can spawn a child (depth 1); grandchildren are rejected.
- **Memory isolation:** Subagents run with `skip_memory=True` and do not have access to the parent's persistent memory provider. The parent receives only the task prompt and final response as an observation.
---
## 3. Out of Scope (Non-Vulnerabilities)
The following scenarios are **not** considered security breaches:
- **Prompt Injection:** Unless it results in a concrete bypass of the approval system, toolset restrictions, or container sandbox.
- **Public Exposure:** Deploying the gateway to the public internet without external authentication or network protection.
- **Trusted State Access:** Reports that require pre-existing write access to `~/.hermes/`, `.env`, or `config.yaml` (these are operator-owned files).
- **Default Behavior:** Host-level command execution when `terminal.backend` is set to `local` — this is the documented default, not a vulnerability.
- **Configuration Trade-offs:** Intentional break-glass settings such as `approvals.mode: "off"` or `terminal.backend: local` in production.
- **Tool-level read/access restrictions:** The agent has unrestricted shell access via the `terminal` tool by design. Reports that a specific tool (e.g., `read_file`) can access a resource are not vulnerabilities if the same access is available through `terminal`. Tool-level deny lists only constitute a meaningful security boundary when paired with equivalent restrictions on the terminal side (as with write operations, where `WRITE_DENIED_PATHS` is paired with the dangerous command approval system).
---
## 4. Deployment Hardening & Best Practices
### Filesystem & Network
- **Production sandboxing:** Use container backends (`docker`, `modal`, `daytona`) instead of `local` for untrusted workloads.
- **File permissions:** Run as non-root (the Docker image uses UID 10000); protect credentials with `chmod 600 ~/.hermes/.env` on local installs.
- **Network exposure:** Do not expose the gateway or API server to the public internet without VPN, Tailscale, or firewall protection. SSRF protection is enabled by default across all gateway platform adapters (Telegram, Discord, Slack, Matrix, Mattermost, etc.) with redirect validation. Note: the local terminal backend does not apply SSRF filtering, as it operates within the trusted operator's environment.
### Skills & Supply Chain
- **Skill installation:** Review Skills Guard reports (`tools/skills_guard.py`) before installing third-party skills. The audit log at `~/.hermes/skills/.hub/audit.log` tracks every install and removal.
- **MCP safety:** OSV malware checking runs automatically for `npx`/`uvx` packages before MCP server processes are spawned.
- **CI/CD:** GitHub Actions are pinned to full commit SHAs. The `supply-chain-audit.yml` workflow blocks PRs containing `.pth` files or suspicious `base64`+`exec` patterns.
### Credential Storage
- API keys and tokens belong exclusively in `~/.hermes/.env` — never in `config.yaml` or checked into version control.
- The credential pool system (`agent/credential_pool.py`) handles key rotation and fallback. Credentials are resolved from environment variables, not stored in plaintext databases.
---
## 5. Disclosure Process
- **Coordinated Disclosure:** 90-day window or until a fix is released, whichever comes first.
- **Communication:** All updates occur via the GHSA thread or email correspondence with security@nousresearch.com.
- **Credits:** Reporters are credited in release notes unless anonymity is requested.
+1 -20
View File
@@ -49,7 +49,6 @@ def make_tool_progress_cb(
session_id: str,
loop: asyncio.AbstractEventLoop,
tool_call_ids: Dict[str, Deque[str]],
tool_call_meta: Dict[str, Dict[str, Any]],
) -> Callable:
"""Create a ``tool_progress_callback`` for AIAgent.
@@ -85,16 +84,6 @@ def make_tool_progress_cb(
tool_call_ids[name] = queue
queue.append(tc_id)
snapshot = None
if name in {"write_file", "patch", "skill_manage"}:
try:
from agent.display import capture_local_edit_snapshot
snapshot = capture_local_edit_snapshot(name, args)
except Exception:
logger.debug("Failed to capture ACP edit snapshot for %s", name, exc_info=True)
tool_call_meta[tc_id] = {"args": args, "snapshot": snapshot}
update = build_tool_start(tc_id, name, args)
_send_update(conn, session_id, loop, update)
@@ -130,7 +119,6 @@ def make_step_cb(
session_id: str,
loop: asyncio.AbstractEventLoop,
tool_call_ids: Dict[str, Deque[str]],
tool_call_meta: Dict[str, Dict[str, Any]],
) -> Callable:
"""Create a ``step_callback`` for AIAgent.
@@ -144,12 +132,10 @@ def make_step_cb(
for tool_info in prev_tools:
tool_name = None
result = None
function_args = None
if isinstance(tool_info, dict):
tool_name = tool_info.get("name") or tool_info.get("function_name")
result = tool_info.get("result") or tool_info.get("output")
function_args = tool_info.get("arguments") or tool_info.get("args")
elif isinstance(tool_info, str):
tool_name = tool_info
@@ -159,13 +145,8 @@ def make_step_cb(
tool_call_ids[tool_name] = queue
if tool_name and queue:
tc_id = queue.popleft()
meta = tool_call_meta.pop(tc_id, {})
update = build_tool_complete(
tc_id,
tool_name,
result=str(result) if result is not None else None,
function_args=function_args or meta.get("args"),
snapshot=meta.get("snapshot"),
tc_id, tool_name, result=str(result) if result is not None else None
)
_send_update(conn, session_id, loop, update)
if not queue:
+30 -148
View File
@@ -26,7 +26,6 @@ from acp.schema import (
McpServerHttp,
McpServerSse,
McpServerStdio,
ModelInfo,
NewSessionResponse,
PromptResponse,
ResumeSessionResponse,
@@ -37,7 +36,6 @@ from acp.schema import (
SessionCapabilities,
SessionForkCapabilities,
SessionListCapabilities,
SessionModelState,
SessionResumeCapabilities,
SessionInfo,
TextContentBlock,
@@ -149,98 +147,6 @@ class HermesACPAgent(acp.Agent):
self._conn = conn
logger.info("ACP client connected")
@staticmethod
def _encode_model_choice(provider: str | None, model: str | None) -> str:
"""Encode a model selection so ACP clients can keep provider context."""
raw_model = str(model or "").strip()
if not raw_model:
return ""
raw_provider = str(provider or "").strip().lower()
if not raw_provider:
return raw_model
return f"{raw_provider}:{raw_model}"
def _build_model_state(self, state: SessionState) -> SessionModelState | None:
"""Return the ACP model selector payload for editors like Zed."""
model = str(state.model or getattr(state.agent, "model", "") or "").strip()
provider = getattr(state.agent, "provider", None) or detect_provider() or "openrouter"
try:
from hermes_cli.models import curated_models_for_provider, normalize_provider, provider_label
normalized_provider = normalize_provider(provider)
provider_name = provider_label(normalized_provider)
available_models: list[ModelInfo] = []
seen_ids: set[str] = set()
for model_id, description in curated_models_for_provider(normalized_provider):
rendered_model = str(model_id or "").strip()
if not rendered_model:
continue
choice_id = self._encode_model_choice(normalized_provider, rendered_model)
if choice_id in seen_ids:
continue
desc_parts = [f"Provider: {provider_name}"]
if description:
desc_parts.append(str(description).strip())
if rendered_model == model:
desc_parts.append("current")
available_models.append(
ModelInfo(
model_id=choice_id,
name=rendered_model,
description="".join(part for part in desc_parts if part),
)
)
seen_ids.add(choice_id)
current_model_id = self._encode_model_choice(normalized_provider, model)
if current_model_id and current_model_id not in seen_ids:
available_models.insert(
0,
ModelInfo(
model_id=current_model_id,
name=model,
description=f"Provider: {provider_name} • current",
),
)
if available_models:
return SessionModelState(
available_models=available_models,
current_model_id=current_model_id or available_models[0].model_id,
)
except Exception:
logger.debug("Could not build ACP model state", exc_info=True)
if not model:
return None
fallback_choice = self._encode_model_choice(provider, model)
return SessionModelState(
available_models=[ModelInfo(model_id=fallback_choice, name=model)],
current_model_id=fallback_choice,
)
@staticmethod
def _resolve_model_selection(raw_model: str, current_provider: str) -> tuple[str, str]:
"""Resolve ``provider:model`` input into the provider and normalized model id."""
target_provider = current_provider
new_model = raw_model.strip()
try:
from hermes_cli.models import detect_provider_for_model, parse_model_input
target_provider, new_model = parse_model_input(new_model, current_provider)
if target_provider == current_provider:
detected = detect_provider_for_model(new_model, current_provider)
if detected:
target_provider, new_model = detected
except Exception:
logger.debug("Provider detection failed, using model as-is", exc_info=True)
return target_provider, new_model
async def _register_session_mcp_servers(
self,
state: SessionState,
@@ -367,10 +273,7 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("New session %s (cwd=%s)", state.session_id, cwd)
self._schedule_available_commands_update(state.session_id)
return NewSessionResponse(
session_id=state.session_id,
models=self._build_model_state(state),
)
return NewSessionResponse(session_id=state.session_id)
async def load_session(
self,
@@ -386,7 +289,7 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Loaded session %s", session_id)
self._schedule_available_commands_update(session_id)
return LoadSessionResponse(models=self._build_model_state(state))
return LoadSessionResponse()
async def resume_session(
self,
@@ -402,7 +305,7 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Resumed session %s", state.session_id)
self._schedule_available_commands_update(state.session_id)
return ResumeSessionResponse(models=self._build_model_state(state))
return ResumeSessionResponse()
async def cancel(self, session_id: str, **kwargs: Any) -> None:
state = self.session_manager.get_session(session_id)
@@ -437,20 +340,11 @@ class HermesACPAgent(acp.Agent):
cwd: str | None = None,
**kwargs: Any,
) -> ListSessionsResponse:
infos = self.session_manager.list_sessions(cwd=cwd)
sessions = []
for s in infos:
updated_at = s.get("updated_at")
if updated_at is not None and not isinstance(updated_at, str):
updated_at = str(updated_at)
sessions.append(
SessionInfo(
session_id=s["session_id"],
cwd=s["cwd"],
title=s.get("title"),
updated_at=updated_at,
)
)
infos = self.session_manager.list_sessions()
sessions = [
SessionInfo(session_id=s["session_id"], cwd=s["cwd"])
for s in infos
]
return ListSessionsResponse(sessions=sessions)
# ---- Prompt (core) ------------------------------------------------------
@@ -495,13 +389,12 @@ class HermesACPAgent(acp.Agent):
state.cancel_event.clear()
tool_call_ids: dict[str, Deque[str]] = defaultdict(deque)
tool_call_meta: dict[str, dict[str, Any]] = {}
previous_approval_cb = None
if conn:
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids)
thinking_cb = make_thinking_cb(conn, session_id, loop)
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids)
message_cb = make_message_cb(conn, session_id, loop)
approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
else:
@@ -556,19 +449,6 @@ class HermesACPAgent(acp.Agent):
self.session_manager.save_session(session_id)
final_response = result.get("final_response", "")
if final_response:
try:
from agent.title_generator import maybe_auto_title
maybe_auto_title(
self.session_manager._get_db(),
session_id,
user_text,
final_response,
state.history,
)
except Exception:
logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
if final_response and conn:
update = acp.update_agent_message_text(final_response)
await conn.session_update(session_id, update)
@@ -676,15 +556,27 @@ class HermesACPAgent(acp.Agent):
provider = getattr(state.agent, "provider", None) or "auto"
return f"Current model: {model}\nProvider: {provider}"
new_model = args.strip()
target_provider = None
current_provider = getattr(state.agent, "provider", None) or "openrouter"
target_provider, new_model = self._resolve_model_selection(args, current_provider)
# Auto-detect provider for the requested model
try:
from hermes_cli.models import parse_model_input, detect_provider_for_model
target_provider, new_model = parse_model_input(new_model, current_provider)
if target_provider == current_provider:
detected = detect_provider_for_model(new_model, current_provider)
if detected:
target_provider, new_model = detected
except Exception:
logger.debug("Provider detection failed, using model as-is", exc_info=True)
state.model = new_model
state.agent = self.session_manager._make_agent(
session_id=state.session_id,
cwd=state.cwd,
model=new_model,
requested_provider=target_provider,
requested_provider=target_provider or current_provider,
)
self.session_manager.save_session(state.session_id)
provider_label = getattr(state.agent, "provider", None) or target_provider or current_provider
@@ -786,30 +678,20 @@ class HermesACPAgent(acp.Agent):
"""Switch the model for a session (called by ACP protocol)."""
state = self.session_manager.get_session(session_id)
if state:
state.model = model_id
current_provider = getattr(state.agent, "provider", None)
requested_provider, resolved_model = self._resolve_model_selection(
model_id,
current_provider or "openrouter",
)
state.model = resolved_model
provider_changed = bool(current_provider and requested_provider != current_provider)
current_base_url = None if provider_changed else getattr(state.agent, "base_url", None)
current_api_mode = None if provider_changed else getattr(state.agent, "api_mode", None)
current_base_url = getattr(state.agent, "base_url", None)
current_api_mode = getattr(state.agent, "api_mode", None)
state.agent = self.session_manager._make_agent(
session_id=session_id,
cwd=state.cwd,
model=resolved_model,
requested_provider=requested_provider,
model=model_id,
requested_provider=current_provider,
base_url=current_base_url,
api_mode=current_api_mode,
)
self.session_manager.save_session(session_id)
logger.info(
"Session %s: model switched to %s via provider %s",
session_id,
resolved_model,
requested_provider,
)
logger.info("Session %s: model switched to %s", session_id, model_id)
return SetSessionModelResponse()
logger.warning("Session %s: model switch requested for missing session", session_id)
return None
+34 -127
View File
@@ -13,12 +13,8 @@ from hermes_constants import get_hermes_home
import copy
import json
import logging
import os
import re
import sys
import time
import uuid
from datetime import datetime, timezone
from dataclasses import dataclass, field
from threading import Lock
from typing import Any, Dict, List, Optional
@@ -26,64 +22,6 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
def _normalize_cwd_for_compare(cwd: str | None) -> str:
raw = str(cwd or ".").strip()
if not raw:
raw = "."
expanded = os.path.expanduser(raw)
# Normalize Windows drive paths into the equivalent WSL mount form so
# ACP history filters match the same workspace across Windows and WSL.
match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
if match:
drive = match.group(1).lower()
tail = match.group(2).replace("\\", "/")
expanded = f"/mnt/{drive}/{tail}"
elif re.match(r"^/mnt/[A-Za-z]/", expanded):
expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"
return os.path.normpath(expanded)
def _build_session_title(title: Any, preview: Any, cwd: str | None) -> str:
explicit = str(title or "").strip()
if explicit:
return explicit
preview_text = str(preview or "").strip()
if preview_text:
return preview_text
leaf = os.path.basename(str(cwd or "").rstrip("/\\"))
return leaf or "New thread"
def _format_updated_at(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, str) and value.strip():
return value
try:
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
except Exception:
return None
def _updated_at_sort_key(value: Any) -> float:
if value is None:
return float("-inf")
if isinstance(value, (int, float)):
return float(value)
raw = str(value).strip()
if not raw:
return float("-inf")
try:
return datetime.fromisoformat(raw.replace("Z", "+00:00")).timestamp()
except Exception:
try:
return float(raw)
except Exception:
return float("-inf")
def _acp_stderr_print(*args, **kwargs) -> None:
"""Best-effort human-readable output sink for ACP stdio sessions.
@@ -224,78 +162,47 @@ class SessionManager:
logger.info("Forked ACP session %s -> %s", session_id, new_id)
return state
def list_sessions(self, cwd: str | None = None) -> List[Dict[str, Any]]:
def list_sessions(self) -> List[Dict[str, Any]]:
"""Return lightweight info dicts for all sessions (memory + database)."""
normalized_cwd = _normalize_cwd_for_compare(cwd) if cwd else None
db = self._get_db()
persisted_rows: dict[str, dict[str, Any]] = {}
if db is not None:
try:
for row in db.list_sessions_rich(source="acp", limit=1000):
persisted_rows[str(row["id"])] = dict(row)
except Exception:
logger.debug("Failed to load ACP sessions from DB", exc_info=True)
# Collect in-memory sessions first.
with self._lock:
seen_ids = set(self._sessions.keys())
results = []
for s in self._sessions.values():
history_len = len(s.history)
if history_len <= 0:
continue
if normalized_cwd and _normalize_cwd_for_compare(s.cwd) != normalized_cwd:
continue
persisted = persisted_rows.get(s.session_id, {})
preview = next(
(
str(msg.get("content") or "").strip()
for msg in s.history
if msg.get("role") == "user" and str(msg.get("content") or "").strip()
),
persisted.get("preview") or "",
)
results.append(
{
"session_id": s.session_id,
"cwd": s.cwd,
"model": s.model,
"history_len": history_len,
"title": _build_session_title(persisted.get("title"), preview, s.cwd),
"updated_at": _format_updated_at(
persisted.get("last_active") or persisted.get("started_at") or time.time()
),
}
)
results = [
{
"session_id": s.session_id,
"cwd": s.cwd,
"model": s.model,
"history_len": len(s.history),
}
for s in self._sessions.values()
]
# Merge any persisted sessions not currently in memory.
for sid, row in persisted_rows.items():
if sid in seen_ids:
continue
message_count = int(row.get("message_count") or 0)
if message_count <= 0:
continue
# Extract cwd from model_config JSON.
session_cwd = "."
mc = row.get("model_config")
if mc:
try:
session_cwd = json.loads(mc).get("cwd", ".")
except (json.JSONDecodeError, TypeError):
pass
if normalized_cwd and _normalize_cwd_for_compare(session_cwd) != normalized_cwd:
continue
results.append({
"session_id": sid,
"cwd": session_cwd,
"model": row.get("model") or "",
"history_len": message_count,
"title": _build_session_title(row.get("title"), row.get("preview"), session_cwd),
"updated_at": _format_updated_at(row.get("last_active") or row.get("started_at")),
})
db = self._get_db()
if db is not None:
try:
rows = db.search_sessions(source="acp", limit=1000)
for row in rows:
sid = row["id"]
if sid in seen_ids:
continue
# Extract cwd from model_config JSON.
cwd = "."
mc = row.get("model_config")
if mc:
try:
cwd = json.loads(mc).get("cwd", ".")
except (json.JSONDecodeError, TypeError):
pass
results.append({
"session_id": sid,
"cwd": cwd,
"model": row.get("model") or "",
"history_len": row.get("message_count") or 0,
})
except Exception:
logger.debug("Failed to list ACP sessions from DB", exc_info=True)
results.sort(key=lambda item: _updated_at_sort_key(item.get("updated_at")), reverse=True)
return results
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
+9 -174
View File
@@ -2,7 +2,6 @@
from __future__ import annotations
import json
import uuid
from typing import Any, Dict, List, Optional
@@ -97,170 +96,6 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
return tool_name
def _build_patch_mode_content(patch_text: str) -> List[Any]:
"""Parse V4A patch mode input into ACP diff blocks when possible."""
if not patch_text:
return [acp.tool_content(acp.text_block(""))]
try:
from tools.patch_parser import OperationType, parse_v4a_patch
operations, error = parse_v4a_patch(patch_text)
if error or not operations:
return [acp.tool_content(acp.text_block(patch_text))]
content: List[Any] = []
for op in operations:
if op.operation == OperationType.UPDATE:
old_chunks: list[str] = []
new_chunks: list[str] = []
for hunk in op.hunks:
old_lines = [line.content for line in hunk.lines if line.prefix in (" ", "-")]
new_lines = [line.content for line in hunk.lines if line.prefix in (" ", "+")]
if old_lines or new_lines:
old_chunks.append("\n".join(old_lines))
new_chunks.append("\n".join(new_lines))
old_text = "\n...\n".join(chunk for chunk in old_chunks if chunk)
new_text = "\n...\n".join(chunk for chunk in new_chunks if chunk)
if old_text or new_text:
content.append(
acp.tool_diff_content(
path=op.file_path,
old_text=old_text or None,
new_text=new_text or "",
)
)
continue
if op.operation == OperationType.ADD:
added_lines = [line.content for hunk in op.hunks for line in hunk.lines if line.prefix == "+"]
content.append(
acp.tool_diff_content(
path=op.file_path,
new_text="\n".join(added_lines),
)
)
continue
if op.operation == OperationType.DELETE:
content.append(
acp.tool_diff_content(
path=op.file_path,
old_text=f"Delete file: {op.file_path}",
new_text="",
)
)
continue
if op.operation == OperationType.MOVE:
content.append(
acp.tool_content(acp.text_block(f"Move file: {op.file_path} -> {op.new_path}"))
)
return content or [acp.tool_content(acp.text_block(patch_text))]
except Exception:
return [acp.tool_content(acp.text_block(patch_text))]
def _strip_diff_prefix(path: str) -> str:
raw = str(path or "").strip()
if raw.startswith(("a/", "b/")):
return raw[2:]
return raw
def _parse_unified_diff_content(diff_text: str) -> List[Any]:
"""Convert unified diff text into ACP diff content blocks."""
if not diff_text:
return []
content: List[Any] = []
current_old_path: Optional[str] = None
current_new_path: Optional[str] = None
old_lines: list[str] = []
new_lines: list[str] = []
def _flush() -> None:
nonlocal current_old_path, current_new_path, old_lines, new_lines
if current_old_path is None and current_new_path is None:
return
path = current_new_path if current_new_path and current_new_path != "/dev/null" else current_old_path
if not path or path == "/dev/null":
current_old_path = None
current_new_path = None
old_lines = []
new_lines = []
return
content.append(
acp.tool_diff_content(
path=_strip_diff_prefix(path),
old_text="\n".join(old_lines) if old_lines else None,
new_text="\n".join(new_lines),
)
)
current_old_path = None
current_new_path = None
old_lines = []
new_lines = []
for line in diff_text.splitlines():
if line.startswith("--- "):
_flush()
current_old_path = line[4:].strip()
continue
if line.startswith("+++ "):
current_new_path = line[4:].strip()
continue
if line.startswith("@@"):
continue
if current_old_path is None and current_new_path is None:
continue
if line.startswith("+"):
new_lines.append(line[1:])
elif line.startswith("-"):
old_lines.append(line[1:])
elif line.startswith(" "):
shared = line[1:]
old_lines.append(shared)
new_lines.append(shared)
_flush()
return content
def _build_tool_complete_content(
tool_name: str,
result: Optional[str],
*,
function_args: Optional[Dict[str, Any]] = None,
snapshot: Any = None,
) -> List[Any]:
"""Build structured ACP completion content, falling back to plain text."""
display_result = result or ""
if len(display_result) > 5000:
display_result = display_result[:4900] + f"\n... ({len(result)} chars total, truncated)"
if tool_name in {"write_file", "patch", "skill_manage"}:
try:
from agent.display import extract_edit_diff
diff_text = extract_edit_diff(
tool_name,
result,
function_args=function_args,
snapshot=snapshot,
)
if isinstance(diff_text, str) and diff_text.strip():
diff_content = _parse_unified_diff_content(diff_text)
if diff_content:
return diff_content
except Exception:
pass
return [acp.tool_content(acp.text_block(display_result))]
# ---------------------------------------------------------------------------
# Build ACP content objects for tool-call events
# ---------------------------------------------------------------------------
@@ -284,8 +119,9 @@ def build_tool_start(
new = arguments.get("new_string", "")
content = [acp.tool_diff_content(path=path, new_text=new, old_text=old)]
else:
# Patch mode — show the patch content as text
patch_text = arguments.get("patch", "")
content = _build_patch_mode_content(patch_text)
content = [acp.tool_content(acp.text_block(patch_text))]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
@@ -342,17 +178,16 @@ def build_tool_complete(
tool_call_id: str,
tool_name: str,
result: Optional[str] = None,
function_args: Optional[Dict[str, Any]] = None,
snapshot: Any = None,
) -> ToolCallProgress:
"""Create a ToolCallUpdate (progress) event for a completed tool call."""
kind = get_tool_kind(tool_name)
content = _build_tool_complete_content(
tool_name,
result,
function_args=function_args,
snapshot=snapshot,
)
# Truncate very large results for the UI
display_result = result or ""
if len(display_result) > 5000:
display_result = display_result[:4900] + f"\n... ({len(result)} chars total, truncated)"
content = [acp.tool_content(acp.text_block(display_result))]
return acp.update_tool_call(
tool_call_id,
kind=kind,
+11 -120
View File
@@ -28,45 +28,19 @@ except ImportError:
logger = logging.getLogger(__name__)
THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
# Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
# Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh.
# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/
# agentic work) and downgrade it to max on pre-4.7 adaptive models (which
# is the strongest level they accept). "minimal" is a legacy alias that
# maps to low on every model. See:
# https://platform.claude.com/docs/en/about-claude/models/migration-guide
ADAPTIVE_EFFORT_MAP = {
"max": "max",
"xhigh": "xhigh",
"high": "high",
"medium": "medium",
"low": "low",
"xhigh": "max",
"high": "high",
"medium": "medium",
"low": "low",
"minimal": "low",
}
# Models that accept the "xhigh" output_config.effort level. Opus 4.7 added
# xhigh as a distinct level between high and max; older adaptive-thinking
# models (4.6) reject it with a 400. Keep this substring list in sync with
# the Anthropic migration guide as new model families ship.
_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
# is the only supported mode; 4.7 additionally forbids manual thinking entirely
# and drops temperature/top_p/top_k).
_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
# Models where temperature/top_p/top_k return 400 if set to non-default values.
# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
# ── Max output token limits per Anthropic model ───────────────────────
# Source: Anthropic docs + Cline model catalog. Anthropic's API requires
# max_tokens as a mandatory field. Previously we hardcoded 16384, which
# starves thinking-enabled models (thinking tokens count toward the limit).
_ANTHROPIC_OUTPUT_LIMITS = {
# Claude 4.7
"claude-opus-4-7": 128_000,
# Claude 4.6
"claude-opus-4-6": 128_000,
"claude-sonnet-4-6": 64_000,
@@ -117,37 +91,11 @@ def _get_anthropic_max_output(model: str) -> int:
def _supports_adaptive_thinking(model: str) -> bool:
"""Return True for Claude 4.6+ models that support adaptive thinking."""
return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
"""Return True for Claude 4.6 models that support adaptive thinking."""
return any(v in model for v in ("4-6", "4.6"))
def _supports_xhigh_effort(model: str) -> bool:
"""Return True for models that accept the 'xhigh' adaptive effort level.
Opus 4.7 introduced xhigh as a distinct level between high and max.
Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
when this returns False.
"""
return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
def _forbids_sampling_params(model: str) -> bool:
"""Return True for models that 400 on any non-default temperature/top_p/top_k.
Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
expected to follow suit. Callers should omit these fields entirely rather
than passing zero/default values (the API rejects anything non-null).
"""
return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
# Beta headers for enhanced features (sent with ALL auth types).
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
# beta headers are still accepted (harmless no-op) but not required. Kept
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
# that still gate on the headers continue to get the enhanced features.
# Migration guide: remove these if you no longer support ≤4.5 models.
# Beta headers for enhanced features (sent with ALL auth types)
_COMMON_BETAS = [
"interleaved-thinking-2025-05-14",
"fine-grained-tool-streaming-2025-05-14",
@@ -350,33 +298,6 @@ def build_anthropic_client(api_key: str, base_url: str = None):
return _anthropic_sdk.Anthropic(**kwargs)
def build_anthropic_bedrock_client(region: str):
"""Create an AnthropicBedrock client for Bedrock Claude models.
Uses the Anthropic SDK's native Bedrock adapter, which provides full
Claude feature parity: prompt caching, thinking budgets, adaptive
thinking, fast mode — features not available via the Converse API.
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
"""
if _anthropic_sdk is None:
raise ImportError(
"The 'anthropic' package is required for the Bedrock provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
raise ImportError(
"anthropic.AnthropicBedrock not available. "
"Upgrade with: pip install 'anthropic>=0.39.0'"
)
from httpx import Timeout
return _anthropic_sdk.AnthropicBedrock(
aws_region=region,
timeout=Timeout(timeout=900.0, connect=10.0),
)
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
@@ -1393,31 +1314,18 @@ def build_anthropic_kwargs(
kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
# Map reasoning_config to Anthropic's thinking parameter.
# Claude 4.6+ models use adaptive thinking + output_config.effort.
# Claude 4.6 models use adaptive thinking + output_config.effort.
# Older models use manual thinking with budget_tokens.
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
#
# On 4.7+ the `thinking.display` field defaults to "omitted", which
# silently hides reasoning text that Hermes surfaces in its CLI. We
# request "summarized" so the reasoning blocks stay populated — matching
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
effort = str(reasoning_config.get("effort", "medium")).lower()
budget = THINKING_BUDGET.get(effort, 8000)
if _supports_adaptive_thinking(model):
kwargs["thinking"] = {
"type": "adaptive",
"display": "summarized",
}
adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium")
# Downgrade xhigh→max on models that don't list xhigh as a
# supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh.
if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model):
adaptive_effort = "max"
kwargs["thinking"] = {"type": "adaptive"}
kwargs["output_config"] = {
"effort": adaptive_effort,
"effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium")
}
else:
kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
@@ -1425,15 +1333,6 @@ def build_anthropic_kwargs(
kwargs["temperature"] = 1
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
# ── Strip sampling params on 4.7+ ─────────────────────────────────
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
# Callers (auxiliary_client, flush_memories, etc.) may set these for
# older models; drop them here as a safety net so upstream 4.6 → 4.7
# migrations don't require coordinated edits everywhere.
if _forbids_sampling_params(model):
for _sampling_key in ("temperature", "top_p", "top_k"):
kwargs.pop(_sampling_key, None)
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
# output speed. Only for native Anthropic endpoints — third-party
@@ -1491,20 +1390,12 @@ def normalize_anthropic_response(
)
)
# Map Anthropic stop_reason to OpenAI finish_reason.
# Newer stop reasons added in Claude 4.5+ / 4.7:
# - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
# - model_context_window_exceeded: hit context limit (not max_tokens)
# Both need distinct handling upstream — a refusal should surface to the
# user with a clear message, and a context-window overflow should trigger
# compression/truncation rather than be treated as normal end-of-turn.
# Map Anthropic stop_reason to OpenAI finish_reason
stop_reason_map = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
+59 -206
View File
@@ -58,9 +58,6 @@ _PROVIDER_ALIASES = {
"google": "gemini",
"google-gemini": "gemini",
"google-ai-studio": "gemini",
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
"glm": "zai",
"z-ai": "zai",
"z.ai": "zai",
@@ -94,17 +91,6 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
return "custom"
return _PROVIDER_ALIASES.get(normalized, normalized)
_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
"kimi-for-coding": 0.6,
}
def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
"""Return a required temperature override for models with strict contracts."""
normalized = (model or "").strip().lower()
return _FIXED_TEMPERATURE_MODELS.get(normalized)
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
@@ -118,7 +104,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"opencode-zen": "gemini-3-flash",
"opencode-go": "glm-5",
"kilocode": "google/gemini-3-flash-preview",
"ollama-cloud": "nemotron-3-nano:30b",
}
# Vision-specific model overrides for direct providers.
@@ -529,13 +514,8 @@ class _AnthropicCompletionsAdapter:
tool_choice=normalized_tool_choice,
is_oauth=self._is_oauth,
)
# Opus 4.7+ rejects any non-default temperature/top_p/top_k; only set
# temperature for models that still accept it. build_anthropic_kwargs
# additionally strips these keys as a safety net — keep both layers.
if temperature is not None:
from agent.anthropic_adapter import _forbids_sampling_params
if not _forbids_sampling_params(model):
anthropic_kwargs["temperature"] = temperature
anthropic_kwargs["temperature"] = temperature
response = self._client.messages.create(**anthropic_kwargs)
assistant_message, finish_reason = normalize_anthropic_response(response)
@@ -745,15 +725,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
elif "generativelanguage.googleapis.com" in base_url.lower():
# Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
# Passing api_key= causes the SDK to inject Authorization: Bearer,
# which Google rejects with HTTP 400 "Multiple authentication
# credentials received". Use a placeholder for api_key and pass
# the real key via x-goog-api-key header instead.
# Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
extra["default_headers"] = {"x-goog-api-key": api_key}
api_key = "not-used"
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
creds = resolve_api_key_provider_credentials(provider_id)
@@ -775,15 +746,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
elif "generativelanguage.googleapis.com" in base_url.lower():
# Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
# Passing api_key= causes the SDK to inject Authorization: Bearer,
# which Google rejects with HTTP 400 "Multiple authentication
# credentials received". Use a placeholder for api_key and pass
# the real key via x-goog-api-key header instead.
# Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
extra["default_headers"] = {"x-goog-api-key": api_key}
api_key = "not-used"
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
return None, None
@@ -813,21 +775,6 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
# Check cross-session rate limit guard before attempting Nous —
# if another session already recorded a 429, skip Nous entirely
# to avoid piling more requests onto the tapped RPH bucket.
try:
from agent.nous_rate_guard import nous_rate_limit_remaining
_remaining = nous_rate_limit_remaining()
if _remaining is not None and _remaining > 0:
logger.debug(
"Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)",
_remaining,
)
return None, None
except Exception:
pass
nous = _read_nous_auth()
if not nous:
return None, None
@@ -952,51 +899,6 @@ def _current_custom_base_url() -> str:
return custom_base or ""
def _validate_proxy_env_urls() -> None:
"""Fail fast with a clear error when proxy env vars have malformed URLs.
Common cause: shell config (e.g. .zshrc) with a typo like
``export HTTP_PROXY=http://127.0.0.1:6153export NEXT_VAR=...``
which concatenates 'export' into the port number. Without this
check the OpenAI/httpx client raises a cryptic ``Invalid port``
error that doesn't name the offending env var.
"""
from urllib.parse import urlparse
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = str(os.environ.get(key) or "").strip()
if not value:
continue
try:
parsed = urlparse(value)
if parsed.scheme:
_ = parsed.port # raises ValueError for e.g. '6153export'
except ValueError as exc:
raise RuntimeError(
f"Malformed proxy environment variable {key}={value!r}. "
"Fix or unset your proxy settings and try again."
) from exc
def _validate_base_url(base_url: str) -> None:
"""Reject obviously broken custom endpoint URLs before they reach httpx."""
from urllib.parse import urlparse
candidate = str(base_url or "").strip()
if not candidate or candidate.startswith("acp://"):
return
try:
parsed = urlparse(candidate)
if parsed.scheme in {"http", "https"}:
_ = parsed.port # raises ValueError for malformed ports
except ValueError as exc:
raise RuntimeError(
f"Malformed custom endpoint URL: {candidate!r}. "
"Run `hermes setup` or `hermes model` and enter a valid http(s) base URL."
) from exc
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
runtime = _resolve_custom_runtime()
if len(runtime) == 2:
@@ -1093,6 +995,8 @@ _AUTO_PROVIDER_LABELS = {
"_resolve_api_key_provider": "api-key",
}
_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
@@ -1223,15 +1127,11 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
"""Full auto-detection chain.
Priority:
1. User's main provider + main model, regardless of provider type.
This means auxiliary tasks (compression, vision, web extraction,
session search, etc.) use the same model the user configured for
chat. Users on OpenRouter/Nous get their chosen chat model; users
on DeepSeek/ZAI/Alibaba get theirs; etc. Running aux tasks on the
user's picked model keeps behavior predictable — no surprise
switches to a cheap fallback model for side tasks.
2. OpenRouter → Nous → custom → Codex → API-key providers (fallback
chain, only used when the main provider has no working client).
1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
use their main provider + main model directly. This ensures users on
Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
provider they already have credentials for — no OpenRouter key needed.
2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
"""
global auxiliary_is_nous, _stale_base_url_warned
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
@@ -1261,16 +1161,11 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
)
_stale_base_url_warned = True
# ── Step 1: main provider + main model → use them directly ──
#
# This is the primary aux backend for every user. "auto" means
# "use my main chat model for side tasks as well" — including users
# on aggregators (OpenRouter, Nous) who previously got routed to a
# cheap provider-side default. Explicit per-task overrides set via
# config.yaml (auxiliary.<task>.provider) still win over this.
# ── Step 1: non-aggregator main provider → use main model directly ──
main_provider = runtime_provider or _read_main_provider()
main_model = runtime_model or _read_main_model()
if (main_provider and main_model
and main_provider not in _AGGREGATOR_PROVIDERS
and main_provider not in ("auto", "")):
resolved_provider = main_provider
explicit_base_url = None
@@ -1404,7 +1299,6 @@ def resolve_provider_client(
Returns:
(client, resolved_model) or (None, None) if auth is unavailable.
"""
_validate_proxy_env_urls()
# Normalise aliases
provider = _normalize_aux_provider(provider)
@@ -1629,15 +1523,6 @@ def resolve_provider_client(
from hermes_cli.models import copilot_default_headers
headers.update(copilot_default_headers())
elif "generativelanguage.googleapis.com" in base_url.lower():
# Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
# Passing api_key= causes the OpenAI SDK to inject Authorization: Bearer,
# which Google rejects with HTTP 400 "Multiple authentication credentials
# received". Use a placeholder for api_key and pass the real key via
# x-goog-api-key header instead.
# Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
headers["x-goog-api-key"] = api_key
api_key = "not-used"
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
@@ -1862,31 +1747,34 @@ def resolve_vision_provider_client(
if requested == "auto":
# Vision auto-detection order:
# 1. User's main provider + main model (including aggregators).
# _PROVIDER_VISION_MODELS provides per-provider vision model
# overrides when the provider has a dedicated multimodal model
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
# zai → glm-5v-turbo).
# 2. OpenRouter (vision-capable aggregator fallback)
# 3. Nous Portal (vision-capable aggregator fallback)
# 1. Active provider + model (user's main chat config)
# 2. OpenRouter (known vision-capable default model)
# 3. Nous Portal (known vision-capable default model)
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
# Known strict backend — use its defaults.
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
return _finalize(main_provider, sync_client, default_model)
else:
# Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.)
# Use provider-specific vision model if available, otherwise main model.
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using active provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
# Fall back through aggregators (uses their dedicated vision model,
# not the user's main model) when main provider has no client.
# Fall back through aggregators.
for candidate in _VISION_AUTO_PROVIDER_ORDER:
if candidate == main_provider:
continue # already tried above
@@ -1947,15 +1835,9 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Every auxiliary LLM consumer should use these instead of manually
# constructing clients and calling .chat.completions.create().
# Client cache: (provider, async_mode, base_url, api_key, api_mode, runtime_key) -> (client, default_model, loop)
# NOTE: loop identity is NOT part of the key. On async cache hits we check
# whether the cached loop is the *current* loop; if not, the stale entry is
# replaced in-place. This bounds cache growth to one entry per unique
# provider config rather than one per (config × event-loop), which previously
# caused unbounded fd accumulation in long-running gateway processes (#10200).
# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
_client_cache: Dict[tuple, tuple] = {}
_client_cache_lock = threading.Lock()
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
def neuter_async_httpx_del() -> None:
@@ -2088,49 +1970,39 @@ def _get_cached_client(
Async clients (AsyncOpenAI) use httpx.AsyncClient internally, which
binds to the event loop that was current when the client was created.
Using such a client on a *different* loop causes deadlocks or
RuntimeError. To prevent cross-loop issues, the cache validates on
every async hit that the cached loop is the *current, open* loop.
If the loop changed (e.g. a new gateway worker-thread loop), the stale
entry is replaced in-place rather than creating an additional entry.
This keeps cache size bounded to one entry per unique provider config,
preventing the fd-exhaustion that previously occurred in long-running
gateways where recycled worker threads created unbounded entries (#10200).
RuntimeError. To prevent cross-loop issues (especially in gateway
mode where _run_async() may spawn fresh loops in worker threads), the
cache key for async clients includes the current event loop's identity
so each loop gets its own client instance.
"""
# Resolve the current event loop for async clients so we can validate
# cached entries. Loop identity is NOT in the cache key — instead we
# check at hit time whether the cached loop is still current and open.
# This prevents unbounded cache growth from recycled worker-thread loops
# while still guaranteeing we never reuse a client on the wrong loop
# (which causes deadlocks, see #2681).
# Include loop identity for async clients to prevent cross-loop reuse.
# httpx.AsyncClient (inside AsyncOpenAI) is bound to the loop where it
# was created — reusing it on a different loop causes deadlocks (#2681).
loop_id = 0
current_loop = None
if async_mode:
try:
import asyncio as _aio
current_loop = _aio.get_event_loop()
loop_id = id(current_loop)
except RuntimeError:
pass
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id, runtime_key)
with _client_cache_lock:
if cache_key in _client_cache:
cached_client, cached_default, cached_loop = _client_cache[cache_key]
if async_mode:
# Validate: the cached client must be bound to the CURRENT,
# OPEN loop. If the loop changed or was closed, the httpx
# transport inside is dead — force-close and replace.
loop_ok = (
cached_loop is not None
and cached_loop is current_loop
and not cached_loop.is_closed()
)
if loop_ok:
# A cached async client whose loop has been closed will raise
# "Event loop is closed" when httpx tries to clean up its
# transport. Discard the stale client and create a fresh one.
if cached_loop is not None and cached_loop.is_closed():
_force_close_async_httpx(cached_client)
del _client_cache[cache_key]
else:
effective = _compat_model(cached_client, model, cached_default)
return cached_client, effective
# Stale — evict and fall through to create a new client.
_force_close_async_httpx(cached_client)
del _client_cache[cache_key]
else:
effective = _compat_model(cached_client, model, cached_default)
return cached_client, effective
@@ -2150,12 +2022,6 @@ def _get_cached_client(
bound_loop = current_loop
with _client_cache_lock:
if cache_key not in _client_cache:
# Safety belt: if the cache has grown beyond the max, evict
# the oldest entries (FIFO — dict preserves insertion order).
while len(_client_cache) >= _CLIENT_CACHE_MAX_SIZE:
evict_key, evict_entry = next(iter(_client_cache.items()))
_force_close_async_httpx(evict_entry[0])
del _client_cache[evict_key]
_client_cache[cache_key] = (client, default_model, bound_loop)
else:
client, default_model, _ = _client_cache[cache_key]
@@ -2335,19 +2201,6 @@ def _build_call_kwargs(
"timeout": timeout,
}
fixed_temperature = _fixed_temperature_for_model(model)
if fixed_temperature is not None:
temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
# the aux model is flipped to 4.7.
if temperature is not None:
from agent.anthropic_adapter import _forbids_sampling_params
if _forbids_sampling_params(model):
temperature = None
if temperature is not None:
kwargs["temperature"] = temperature
@@ -2451,10 +2304,10 @@ def call_llm(
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=resolved_provider if resolved_provider != "auto" else provider,
model=resolved_model or model,
base_url=resolved_base_url or base_url,
api_key=resolved_api_key or api_key,
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
async_mode=False,
)
if client is None and resolved_provider != "auto" and not resolved_base_url:
@@ -2659,10 +2512,10 @@ async def async_call_llm(
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=resolved_provider if resolved_provider != "auto" else provider,
model=resolved_model or model,
base_url=resolved_base_url or base_url,
api_key=resolved_api_key or api_key,
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
async_mode=True,
)
if client is None and resolved_provider != "auto" and not resolved_base_url:
File diff suppressed because it is too large Load Diff
+40 -383
View File
@@ -17,10 +17,7 @@ Improvements over v2:
- Richer tool call/result detail in summarizer input
"""
import hashlib
import json
import logging
import re
import time
from typing import Any, Dict, List, Optional
@@ -39,10 +36,7 @@ SUMMARY_PREFIX = (
"into the summary below. This is a handoff from a previous context "
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message "
"they were already addressed. Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:"
)
@@ -63,128 +57,6 @@ _CHARS_PER_TOKEN = 4
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
"""Create an informative 1-line summary of a tool call + result.
Used during the pre-compression pruning pass to replace large tool
outputs with a short but useful description of what the tool did,
rather than a generic placeholder that carries zero information.
Returns strings like::
[terminal] ran `npm test` -> exit 0, 47 lines output
[read_file] read config.py from line 1 (1,200 chars)
[search_files] content search for 'compress' in agent/ -> 12 matches
"""
try:
args = json.loads(tool_args) if tool_args else {}
except (json.JSONDecodeError, TypeError):
args = {}
content = tool_content or ""
content_len = len(content)
line_count = content.count("\n") + 1 if content.strip() else 0
if tool_name == "terminal":
cmd = args.get("command", "")
if len(cmd) > 80:
cmd = cmd[:77] + "..."
exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content)
exit_code = exit_match.group(1) if exit_match else "?"
return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output"
if tool_name == "read_file":
path = args.get("path", "?")
offset = args.get("offset", 1)
return f"[read_file] read {path} from line {offset} ({content_len:,} chars)"
if tool_name == "write_file":
path = args.get("path", "?")
written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
return f"[write_file] wrote to {path} ({written_lines} lines)"
if tool_name == "search_files":
pattern = args.get("pattern", "?")
path = args.get("path", ".")
target = args.get("target", "content")
match_count = re.search(r'"total_count"\s*:\s*(\d+)', content)
count = match_count.group(1) if match_count else "?"
return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches"
if tool_name == "patch":
path = args.get("path", "?")
mode = args.get("mode", "replace")
return f"[patch] {mode} in {path} ({content_len:,} chars result)"
if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
"browser_type", "browser_scroll", "browser_vision"):
url = args.get("url", "")
ref = args.get("ref", "")
detail = f" {url}" if url else (f" ref={ref}" if ref else "")
return f"[{tool_name}]{detail} ({content_len:,} chars)"
if tool_name == "web_search":
query = args.get("query", "?")
return f"[web_search] query='{query}' ({content_len:,} chars result)"
if tool_name == "web_extract":
urls = args.get("urls", [])
url_desc = urls[0] if isinstance(urls, list) and urls else "?"
if isinstance(urls, list) and len(urls) > 1:
url_desc += f" (+{len(urls) - 1} more)"
return f"[web_extract] {url_desc} ({content_len:,} chars)"
if tool_name == "delegate_task":
goal = args.get("goal", "")
if len(goal) > 60:
goal = goal[:57] + "..."
return f"[delegate_task] '{goal}' ({content_len:,} chars result)"
if tool_name == "execute_code":
code_preview = (args.get("code") or "")[:60].replace("\n", " ")
if len(args.get("code", "")) > 60:
code_preview += "..."
return f"[execute_code] `{code_preview}` ({line_count} lines output)"
if tool_name in ("skill_view", "skills_list", "skill_manage"):
name = args.get("name", "?")
return f"[{tool_name}] name={name} ({content_len:,} chars)"
if tool_name == "vision_analyze":
question = args.get("question", "")[:50]
return f"[vision_analyze] '{question}' ({content_len:,} chars)"
if tool_name == "memory":
action = args.get("action", "?")
target = args.get("target", "?")
return f"[memory] {action} on {target}"
if tool_name == "todo":
return "[todo] updated task list"
if tool_name == "clarify":
return "[clarify] asked user a question"
if tool_name == "text_to_speech":
return f"[text_to_speech] generated audio ({content_len:,} chars)"
if tool_name == "cronjob":
action = args.get("action", "?")
return f"[cronjob] {action}"
if tool_name == "process":
action = args.get("action", "?")
sid = args.get("session_id", "?")
return f"[process] {action} session={sid}"
# Generic fallback
first_arg = ""
for k, v in list(args.items())[:2]:
sv = str(v)[:40]
first_arg += f" {k}={sv}"
return f"[{tool_name}]{first_arg} ({content_len:,} chars result)"
class ContextCompressor(ContextEngine):
"""Default context engine — compresses conversation context via lossy summarization.
@@ -206,8 +78,6 @@ class ContextCompressor(ContextEngine):
self._context_probed = False
self._context_probe_persistable = False
self._previous_summary = None
self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0
def update_model(
self,
@@ -297,9 +167,6 @@ class ContextCompressor(ContextEngine):
# Stores the previous compaction summary for iterative updates
self._previous_summary: Optional[str] = None
# Anti-thrashing: track whether last compression was effective
self._last_compression_savings_pct: float = 100.0
self._ineffective_compression_count: int = 0
self._summary_failure_cooldown_until: float = 0.0
def update_from_response(self, usage: Dict[str, Any]):
@@ -308,26 +175,9 @@ class ContextCompressor(ContextEngine):
self.last_completion_tokens = usage.get("completion_tokens", 0)
def should_compress(self, prompt_tokens: int = None) -> bool:
"""Check if context exceeds the compression threshold.
Includes anti-thrashing protection: if the last two compressions
each saved less than 10%, skip compression to avoid infinite loops
where each pass removes only 1-2 messages.
"""
"""Check if context exceeds the compression threshold."""
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
if tokens < self.threshold_tokens:
return False
# Anti-thrashing: back off if recent compressions were ineffective
if self._ineffective_compression_count >= 2:
if not self.quiet_mode:
logger.warning(
"Compression skipped — last %d compressions saved <10%% each. "
"Consider /new to start a fresh session, or /compress <topic> "
"for focused compression.",
self._ineffective_compression_count,
)
return False
return True
return tokens >= self.threshold_tokens
# ------------------------------------------------------------------
# Tool output pruning (cheap pre-pass, no LLM call)
@@ -337,16 +187,7 @@ class ContextCompressor(ContextEngine):
self, messages: List[Dict[str, Any]], protect_tail_count: int,
protect_tail_tokens: int | None = None,
) -> tuple[List[Dict[str, Any]], int]:
"""Replace old tool result contents with informative 1-line summaries.
Instead of a generic placeholder, generates a summary like::
[terminal] ran `npm test` -> exit 0, 47 lines output
[read_file] read config.py from line 1 (3,400 chars)
Also deduplicates identical tool results (e.g. reading the same file
5x keeps only the newest full copy) and truncates large tool_call
arguments in assistant messages outside the protected tail.
"""Replace old tool result contents with a short placeholder.
Walks backward from the end, protecting the most recent messages that
fall within ``protect_tail_tokens`` (when provided) OR the last
@@ -362,22 +203,6 @@ class ContextCompressor(ContextEngine):
result = [m.copy() for m in messages]
pruned = 0
# Build index: tool_call_id -> (tool_name, arguments_json)
call_id_to_tool: Dict[str, tuple] = {}
for msg in result:
if msg.get("role") == "assistant":
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
cid = tc.get("id", "")
fn = tc.get("function", {})
call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
else:
cid = getattr(tc, "id", "") or ""
fn = getattr(tc, "function", None)
name = getattr(fn, "name", "unknown") if fn else "unknown"
args_str = getattr(fn, "arguments", "") if fn else ""
call_id_to_tool[cid] = (name, args_str)
# Determine the prune boundary
if protect_tail_tokens is not None and protect_tail_tokens > 0:
# Token-budget approach: walk backward accumulating tokens
@@ -386,8 +211,7 @@ class ContextCompressor(ContextEngine):
min_protect = min(protect_tail_count, len(result) - 1)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
raw_content = msg.get("content") or ""
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
content_len = len(msg.get("content") or "")
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -402,69 +226,18 @@ class ContextCompressor(ContextEngine):
else:
prune_boundary = len(result) - protect_tail_count
# Pass 1: Deduplicate identical tool results.
# When the same file is read multiple times, keep only the most recent
# full copy and replace older duplicates with a back-reference.
content_hashes: dict = {} # hash -> (index, tool_call_id)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
if msg.get("role") != "tool":
continue
content = msg.get("content") or ""
# Skip multimodal content (list of content blocks)
if isinstance(content, list):
continue
if len(content) < 200:
continue
h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
if h in content_hashes:
# This is an older duplicate — replace with back-reference
result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
pruned += 1
else:
content_hashes[h] = (i, msg.get("tool_call_id", "?"))
# Pass 2: Replace old tool results with informative summaries
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "tool":
continue
content = msg.get("content", "")
# Skip multimodal content (list of content blocks)
if isinstance(content, list):
continue
if not content or content == _PRUNED_TOOL_PLACEHOLDER:
continue
# Skip already-deduplicated or previously-summarized results
if content.startswith("[Duplicate tool output"):
continue
# Only prune if the content is substantial (>200 chars)
if len(content) > 200:
call_id = msg.get("tool_call_id", "")
tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
summary = _summarize_tool_result(tool_name, tool_args, content)
result[i] = {**msg, "content": summary}
result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER}
pruned += 1
# Pass 3: Truncate large tool_call arguments in assistant messages
# outside the protected tail. write_file with 50KB content, for
# example, survives pruning entirely without this.
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "assistant" or not msg.get("tool_calls"):
continue
new_tcs = []
modified = False
for tc in msg["tool_calls"]:
if isinstance(tc, dict):
args = tc.get("function", {}).get("arguments", "")
if len(args) > 500:
tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
modified = True
new_tcs.append(tc)
if modified:
result[i] = {**msg, "tool_calls": new_tcs}
return result, pruned
# ------------------------------------------------------------------
@@ -584,45 +357,29 @@ class ContextCompressor(ContextEngine):
)
# Shared structured template (used by both paths).
_template_sections = f"""## Active Task
[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
task assignment verbatim — the exact words they used. If multiple tasks
were requested and only some are done, list only the ones NOT yet completed.
The next assistant must pick up exactly here. Example:
"User asked: 'Now refactor the auth module to use JWT instead of sessions'"
If no outstanding task exists, write "None."]
## Goal
[What the user is trying to accomplish overall]
# Key changes vs v1:
# - "Pending User Asks" section (from Claude Code) explicitly tracks
# unanswered questions so the model knows what's resolved vs open
# - "Remaining Work" replaces "Next Steps" to avoid reading as active
# instructions
# - "Resolved Questions" makes it clear which questions were already
# answered (prevents model from re-answering them)
_template_sections = f"""## Goal
[What the user is trying to accomplish]
## Constraints & Preferences
[User preferences, coding style, constraints, important decisions]
## Completed Actions
[Numbered list of concrete actions taken — include tool used, target, and outcome.
Format each as: N. ACTION target — outcome [tool: name]
Example:
1. READ config.py:45 — found `==` should be `!=` [tool: read_file]
2. PATCH config.py:45 — changed `==` to `!=` [tool: patch]
3. TEST `pytest tests/` — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
Be specific with file paths, commands, line numbers, and results.]
## Active State
[Current working state — include:
- Working directory and branch (if applicable)
- Modified/created files with brief note on each
- Test status (X/Y passing)
- Any running processes or servers
- Environment details that matter]
## In Progress
[Work currently underway — what was being done when compaction fired]
## Blocked
[Any blockers, errors, or issues not yet resolved. Include exact error messages.]
## Progress
### Done
[Completed work — include specific file paths, commands run, results obtained]
### In Progress
[Work currently underway]
### Blocked
[Any blockers or issues encountered]
## Key Decisions
[Important technical decisions and WHY they were made]
[Important technical decisions and why they were made]
## Resolved Questions
[Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
@@ -639,7 +396,10 @@ Be specific with file paths, commands, line numbers, and results.]
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
## Tools & Patterns
[Which tools were used, how they were used effectively, and any tool-specific discoveries]
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
Write only the summary body. Do not include any preamble or prefix."""
@@ -655,7 +415,7 @@ PREVIOUS SUMMARY:
NEW TURNS TO INCORPORATE:
{content_to_summarize}
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity.
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
{_template_sections}"""
else:
@@ -690,7 +450,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
"api_mode": self.api_mode,
},
"messages": [{"role": "user", "content": prompt}],
"max_tokens": int(summary_budget * 1.3),
"max_tokens": summary_budget * 2,
# timeout resolved from auxiliary.compression.timeout config by call_llm
}
if self.summary_model:
@@ -704,10 +464,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0
self._summary_model_fallen_back = False
return self._with_summary_prefix(summary)
except RuntimeError:
# No provider configured — long cooldown, unlikely to self-resolve
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary "
@@ -715,42 +473,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
_SUMMARY_FAILURE_COOLDOWN_SECONDS)
return None
except Exception as e:
# If the summary model is different from the main model and the
# error looks permanent (model not found, 503, 404), fall back to
# using the main model instead of entering cooldown that leaves
# context growing unbounded. (#8620 sub-issue 4)
_status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
_err_str = str(e).lower()
_is_model_not_found = (
_status in (404, 503)
or "model_not_found" in _err_str
or "does not exist" in _err_str
or "no available channel" in _err_str
)
if (
_is_model_not_found
and self.summary_model
and self.summary_model != self.model
and not getattr(self, "_summary_model_fallen_back", False)
):
self._summary_model_fallen_back = True
logging.warning(
"Summary model '%s' not available (%s). "
"Falling back to main model '%s' for compression.",
self.summary_model, e, self.model,
)
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown
return self._generate_summary(messages, summary_budget) # retry immediately
# Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
logging.warning(
"Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.",
e,
_transient_cooldown,
_SUMMARY_FAILURE_COOLDOWN_SECONDS,
)
return None
@@ -873,62 +601,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Tail protection by token budget
# ------------------------------------------------------------------
def _find_last_user_message_idx(
self, messages: List[Dict[str, Any]], head_end: int
) -> int:
"""Return the index of the last user-role message at or after *head_end*, or -1."""
for i in range(len(messages) - 1, head_end - 1, -1):
if messages[i].get("role") == "user":
return i
return -1
def _ensure_last_user_message_in_tail(
self,
messages: List[Dict[str, Any]],
cut_idx: int,
head_end: int,
) -> int:
"""Guarantee the most recent user message is in the protected tail.
Context compressor bug (#10896): ``_align_boundary_backward`` can pull
``cut_idx`` past a user message when it tries to keep tool_call/result
groups together. If the last user message ends up in the *compressed*
middle region the LLM summariser writes it into "Pending User Asks",
but ``SUMMARY_PREFIX`` tells the next model to respond only to user
messages *after* the summary — so the task effectively disappears from
the active context, causing the agent to stall, repeat completed work,
or silently drop the user's latest request.
Fix: if the last user-role message is not already in the tail
(``messages[cut_idx:]``), walk ``cut_idx`` back to include it. We
then re-align backward one more time to avoid splitting any
tool_call/result group that immediately precedes the user message.
"""
last_user_idx = self._find_last_user_message_idx(messages, head_end)
if last_user_idx < 0:
# No user message found beyond head — nothing to anchor.
return cut_idx
if last_user_idx >= cut_idx:
# Already in the tail; nothing to do.
return cut_idx
# The last user message is in the middle (compressed) region.
# Pull cut_idx back to it directly — a user message is already a
# clean boundary (no tool_call/result splitting risk), so there is no
# need to call _align_boundary_backward here; doing so would
# unnecessarily pull the cut further back into the preceding
# assistant + tool_calls group.
if not self.quiet_mode:
logger.debug(
"Anchoring tail cut to last user message at index %d "
"(was %d) to prevent active-task loss after compression",
last_user_idx,
cut_idx,
)
# Safety: never go back into the head region.
return max(last_user_idx, head_end + 1)
def _find_tail_cut_by_tokens(
self, messages: List[Dict[str, Any]], head_end: int,
token_budget: int | None = None,
@@ -946,8 +618,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
read, etc.). If even the minimum 3 messages exceed 1.5x the budget
the cut is placed right after the head so compression still runs.
Never cuts inside a tool_call/result group. Always ensures the most
recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
Never cuts inside a tool_call/result group.
"""
if token_budget is None:
token_budget = self.tail_token_budget
@@ -986,10 +657,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Align to avoid splitting tool groups
cut_idx = self._align_boundary_backward(messages, cut_idx)
# Ensure the most recent user message is always in the tail so the
# active task is never lost to compression (fixes #10896).
cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)
return max(cut_idx, head_end + 1)
# ------------------------------------------------------------------
@@ -1077,11 +744,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = []
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system":
existing = msg.get("content") or ""
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
if _compression_note not in existing:
msg["content"] = existing + "\n\n" + _compression_note
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (
(msg.get("content") or "")
+ "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
)
compressed.append(msg)
# If LLM summary failed, insert a static fallback so the model
@@ -1139,24 +806,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = self._sanitize_tool_pairs(compressed)
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
# Anti-thrashing: track compression effectiveness
savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
self._last_compression_savings_pct = savings_pct
if savings_pct < 10:
self._ineffective_compression_count += 1
else:
self._ineffective_compression_count = 0
if not self.quiet_mode:
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
logger.info(
"Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)",
"Compressed: %d -> %d messages (~%d tokens saved)",
n_messages,
len(compressed),
saved_estimate,
savings_pct,
)
logger.info("Compression #%d complete", self.compression_count)
+1 -17
View File
@@ -313,25 +313,9 @@ class CopilotACPClient:
tools=tools,
tool_choice=tool_choice,
)
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
# (used natively by the OpenAI SDK) rather than a plain float.
if timeout is None:
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
elif isinstance(timeout, (int, float)):
_effective_timeout = float(timeout)
else:
# httpx.Timeout or similar — pick the largest component so the
# subprocess has enough wall-clock time for the full response.
_candidates = [
getattr(timeout, attr, None)
for attr in ("read", "write", "connect", "pool", "timeout")
]
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
response_text, reasoning_text = self._run_prompt(
prompt_text,
timeout_seconds=_effective_timeout,
timeout_seconds=float(timeout or _DEFAULT_TIMEOUT_SECONDS),
)
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
+1 -24
View File
@@ -1130,14 +1130,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
state = _load_provider_state(auth_store, "nous")
if state:
active_sources.add("device_code")
# Prefer a user-supplied label embedded in the singleton state
# (set by persist_nous_credentials(label=...) when the user ran
# `hermes auth add nous --label <name>`). Fall back to the
# auto-derived token fingerprint for logins that didn't supply one.
custom_label = str(state.get("label") or "").strip()
seeded_label = custom_label or label_from_token(
state.get("access_token", ""), "device_code"
)
changed |= _upsert_entry(
entries,
provider,
@@ -1156,7 +1148,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
"agent_key": state.get("agent_key"),
"agent_key_expires_at": state.get("agent_key_expires_at"),
"tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
"label": seeded_label,
"label": label_from_token(state.get("access_token", ""), "device_code"),
},
)
@@ -1170,7 +1162,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
if token:
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
@@ -1179,7 +1170,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
@@ -1216,19 +1206,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
logger.debug("Qwen OAuth token seed failed: %s", exc)
elif provider == "openai-codex":
# Respect user suppression — `hermes auth remove openai-codex` marks
# the device_code source as suppressed so it won't be re-seeded from
# either the Hermes auth store or ~/.codex/auth.json. Without this
# gate the removal is instantly undone on the next load_pool() call.
codex_suppressed = False
try:
from hermes_cli.auth import is_source_suppressed
codex_suppressed = is_source_suppressed(provider, "device_code")
except ImportError:
pass
if codex_suppressed:
return changed, active_sources
state = _load_provider_state(auth_store, "openai-codex")
tokens = state.get("tokens") if isinstance(state, dict) else None
# Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth
+80 -39
View File
@@ -600,45 +600,6 @@ class KawaiiSpinner:
"analyzing", "computing", "synthesizing", "formulating", "brainstorming",
]
@classmethod
def get_waiting_faces(cls) -> list:
"""Return waiting faces from the active skin, falling back to KAWAII_WAITING."""
try:
skin = _get_skin()
if skin:
faces = skin.spinner.get("waiting_faces", [])
if faces:
return faces
except Exception:
pass
return cls.KAWAII_WAITING
@classmethod
def get_thinking_faces(cls) -> list:
"""Return thinking faces from the active skin, falling back to KAWAII_THINKING."""
try:
skin = _get_skin()
if skin:
faces = skin.spinner.get("thinking_faces", [])
if faces:
return faces
except Exception:
pass
return cls.KAWAII_THINKING
@classmethod
def get_thinking_verbs(cls) -> list:
"""Return thinking verbs from the active skin, falling back to THINKING_VERBS."""
try:
skin = _get_skin()
if skin:
verbs = skin.spinner.get("thinking_verbs", [])
if verbs:
return verbs
except Exception:
pass
return cls.THINKING_VERBS
def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
self.message = message
self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
@@ -993,4 +954,84 @@ def get_cute_tool_message(
# Honcho session line (one-liner with clickable OSC 8 hyperlink)
# =========================================================================
_DIM = "\033[2m"
_SKY_BLUE = "\033[38;5;117m"
_ANSI_RESET = "\033[0m"
# =========================================================================
# Context pressure display (CLI user-facing warnings)
# =========================================================================
# ANSI color codes for context pressure tiers
_CYAN = "\033[36m"
_YELLOW = "\033[33m"
_BOLD = "\033[1m"
_DIM_ANSI = "\033[2m"
# Bar characters
_BAR_FILLED = ""
_BAR_EMPTY = ""
_BAR_WIDTH = 20
def format_context_pressure(
compaction_progress: float,
threshold_tokens: int,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a formatted context pressure line for CLI display.
The bar and percentage show progress toward the compaction threshold,
NOT the raw context window. 100% = compaction fires.
Args:
compaction_progress: How close to compaction (0.01.0, 1.0 = fires).
threshold_tokens: Compaction threshold in tokens.
threshold_percent: Compaction threshold as a fraction of context window.
compression_enabled: Whether auto-compression is active.
"""
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
threshold_pct_int = int(threshold_percent * 100)
color = f"{_BOLD}{_YELLOW}"
icon = ""
if compression_enabled:
hint = "compaction approaching"
else:
hint = "no auto-compaction"
return (
f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}"
)
def format_context_pressure_gateway(
compaction_progress: float,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a plain-text context pressure notification for messaging platforms.
No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
The percentage shows progress toward the compaction threshold.
"""
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_pct_int = int(threshold_percent * 100)
icon = "⚠️"
if compression_enabled:
hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)."
else:
hint = "Auto-compaction is disabled — context may be truncated."
return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"
-9
View File
@@ -112,10 +112,6 @@ _RATE_LIMIT_PATTERNS = [
"please retry after",
"resource_exhausted",
"rate increased too quickly", # Alibaba/DashScope throttling
# AWS Bedrock throttling
"throttlingexception",
"too many concurrent requests",
"servicequotaexceededexception",
]
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
@@ -175,11 +171,6 @@ _CONTEXT_OVERFLOW_PATTERNS = [
# Chinese error messages (some providers return these)
"超过最大长度",
"上下文长度",
# AWS Bedrock Converse API error patterns
"input is too long",
"max input token",
"input token",
"exceeds the maximum number of input tokens",
]
# Model not found patterns
-895
View File
@@ -1,895 +0,0 @@
"""OpenAI-compatible facade that talks to Google's Cloud Code Assist backend.
This adapter lets Hermes use the ``google-gemini-cli`` provider as if it were
a standard OpenAI-shaped chat completion endpoint, while the underlying HTTP
traffic goes to ``cloudcode-pa.googleapis.com/v1internal:{generateContent,
streamGenerateContent}`` with a Bearer access token obtained via OAuth PKCE.
Architecture
------------
- ``GeminiCloudCodeClient`` exposes ``.chat.completions.create(**kwargs)``
mirroring the subset of the OpenAI SDK that ``run_agent.py`` uses.
- Incoming OpenAI ``messages[]`` / ``tools[]`` / ``tool_choice`` are translated
to Gemini's native ``contents[]`` / ``tools[].functionDeclarations`` /
``toolConfig`` / ``systemInstruction`` shape.
- The request body is wrapped ``{project, model, user_prompt_id, request}``
per Code Assist API expectations.
- Responses (``candidates[].content.parts[]``) are converted back to
OpenAI ``choices[0].message`` shape with ``content`` + ``tool_calls``.
- Streaming uses SSE (``?alt=sse``) and yields OpenAI-shaped delta chunks.
Attribution
-----------
Translation semantics follow jenslys/opencode-gemini-auth (MIT) and the public
Gemini API docs. Request envelope shape
(``{project, model, user_prompt_id, request}``) is documented nowhere; it is
reverse-engineered from the opencode-gemini-auth and clawdbot implementations.
"""
from __future__ import annotations
import json
import logging
import os
import time
import uuid
from types import SimpleNamespace
from typing import Any, Dict, Iterator, List, Optional
import httpx
from agent import google_oauth
from agent.google_code_assist import (
CODE_ASSIST_ENDPOINT,
FREE_TIER_ID,
CodeAssistError,
ProjectContext,
resolve_project_context,
)
logger = logging.getLogger(__name__)
# =============================================================================
# Request translation: OpenAI → Gemini
# =============================================================================
_ROLE_MAP_OPENAI_TO_GEMINI = {
"user": "user",
"assistant": "model",
"system": "user", # handled separately via systemInstruction
"tool": "user", # functionResponse is wrapped in a user-role turn
"function": "user",
}
def _coerce_content_to_text(content: Any) -> str:
"""OpenAI content may be str or a list of parts; reduce to plain text."""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
pieces: List[str] = []
for p in content:
if isinstance(p, str):
pieces.append(p)
elif isinstance(p, dict):
if p.get("type") == "text" and isinstance(p.get("text"), str):
pieces.append(p["text"])
# Multimodal (image_url, etc.) — stub for now; log and skip
elif p.get("type") in ("image_url", "input_audio"):
logger.debug("Dropping multimodal part (not yet supported): %s", p.get("type"))
return "\n".join(pieces)
return str(content)
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
"""OpenAI tool_call -> Gemini functionCall part."""
fn = tool_call.get("function") or {}
args_raw = fn.get("arguments", "")
try:
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
except json.JSONDecodeError:
args = {"_raw": args_raw}
if not isinstance(args, dict):
args = {"_value": args}
return {
"functionCall": {
"name": fn.get("name") or "",
"args": args,
},
# Sentinel signature — matches opencode-gemini-auth's approach.
# Without this, Code Assist rejects function calls that originated
# outside its own chain.
"thoughtSignature": "skip_thought_signature_validator",
}
def _translate_tool_result_to_gemini(message: Dict[str, Any]) -> Dict[str, Any]:
"""OpenAI tool-role message -> Gemini functionResponse part.
The function name isn't in the OpenAI tool message directly; it must be
passed via the assistant message that issued the call. For simplicity we
look up ``name`` on the message (OpenAI SDK copies it there) or on the
``tool_call_id`` cross-reference.
"""
name = str(message.get("name") or message.get("tool_call_id") or "tool")
content = _coerce_content_to_text(message.get("content"))
# Gemini expects the response as a dict under `response`. We wrap plain
# text in {"output": "..."}.
try:
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
except json.JSONDecodeError:
parsed = None
response = parsed if isinstance(parsed, dict) else {"output": content}
return {
"functionResponse": {
"name": name,
"response": response,
},
}
def _build_gemini_contents(
messages: List[Dict[str, Any]],
) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
"""Convert OpenAI messages[] to Gemini contents[] + systemInstruction."""
system_text_parts: List[str] = []
contents: List[Dict[str, Any]] = []
for msg in messages:
if not isinstance(msg, dict):
continue
role = str(msg.get("role") or "user")
if role == "system":
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
continue
# Tool result message — emit a user-role turn with functionResponse
if role == "tool" or role == "function":
contents.append({
"role": "user",
"parts": [_translate_tool_result_to_gemini(msg)],
})
continue
gemini_role = _ROLE_MAP_OPENAI_TO_GEMINI.get(role, "user")
parts: List[Dict[str, Any]] = []
text = _coerce_content_to_text(msg.get("content"))
if text:
parts.append({"text": text})
# Assistant messages can carry tool_calls
tool_calls = msg.get("tool_calls") or []
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict):
parts.append(_translate_tool_call_to_gemini(tc))
if not parts:
# Gemini rejects empty parts; skip the turn entirely
continue
contents.append({"role": gemini_role, "parts": parts})
system_instruction: Optional[Dict[str, Any]] = None
joined_system = "\n".join(p for p in system_text_parts if p).strip()
if joined_system:
system_instruction = {
"role": "system",
"parts": [{"text": joined_system}],
}
return contents, system_instruction
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
"""OpenAI tools[] -> Gemini tools[].functionDeclarations[]."""
if not isinstance(tools, list) or not tools:
return []
declarations: List[Dict[str, Any]] = []
for t in tools:
if not isinstance(t, dict):
continue
fn = t.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not name:
continue
decl = {"name": str(name)}
if fn.get("description"):
decl["description"] = str(fn["description"])
params = fn.get("parameters")
if isinstance(params, dict):
decl["parameters"] = params
declarations.append(decl)
if not declarations:
return []
return [{"functionDeclarations": declarations}]
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
"""OpenAI tool_choice -> Gemini toolConfig.functionCallingConfig."""
if tool_choice is None:
return None
if isinstance(tool_choice, str):
if tool_choice == "auto":
return {"functionCallingConfig": {"mode": "AUTO"}}
if tool_choice == "required":
return {"functionCallingConfig": {"mode": "ANY"}}
if tool_choice == "none":
return {"functionCallingConfig": {"mode": "NONE"}}
if isinstance(tool_choice, dict):
fn = tool_choice.get("function") or {}
name = fn.get("name")
if name:
return {
"functionCallingConfig": {
"mode": "ANY",
"allowedFunctionNames": [str(name)],
},
}
return None
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
"""Accept thinkingBudget / thinkingLevel / includeThoughts (+ snake_case)."""
if not isinstance(config, dict) or not config:
return None
budget = config.get("thinkingBudget", config.get("thinking_budget"))
level = config.get("thinkingLevel", config.get("thinking_level"))
include = config.get("includeThoughts", config.get("include_thoughts"))
normalized: Dict[str, Any] = {}
if isinstance(budget, (int, float)):
normalized["thinkingBudget"] = int(budget)
if isinstance(level, str) and level.strip():
normalized["thinkingLevel"] = level.strip().lower()
if isinstance(include, bool):
normalized["includeThoughts"] = include
return normalized or None
def build_gemini_request(
*,
messages: List[Dict[str, Any]],
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
thinking_config: Any = None,
) -> Dict[str, Any]:
"""Build the inner Gemini request body (goes inside ``request`` wrapper)."""
contents, system_instruction = _build_gemini_contents(messages)
body: Dict[str, Any] = {"contents": contents}
if system_instruction is not None:
body["systemInstruction"] = system_instruction
gemini_tools = _translate_tools_to_gemini(tools)
if gemini_tools:
body["tools"] = gemini_tools
tool_cfg = _translate_tool_choice_to_gemini(tool_choice)
if tool_cfg is not None:
body["toolConfig"] = tool_cfg
generation_config: Dict[str, Any] = {}
if isinstance(temperature, (int, float)):
generation_config["temperature"] = float(temperature)
if isinstance(max_tokens, int) and max_tokens > 0:
generation_config["maxOutputTokens"] = max_tokens
if isinstance(top_p, (int, float)):
generation_config["topP"] = float(top_p)
if isinstance(stop, str) and stop:
generation_config["stopSequences"] = [stop]
elif isinstance(stop, list) and stop:
generation_config["stopSequences"] = [str(s) for s in stop if s]
normalized_thinking = _normalize_thinking_config(thinking_config)
if normalized_thinking:
generation_config["thinkingConfig"] = normalized_thinking
if generation_config:
body["generationConfig"] = generation_config
return body
def wrap_code_assist_request(
*,
project_id: str,
model: str,
inner_request: Dict[str, Any],
user_prompt_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Wrap the inner Gemini request in the Code Assist envelope."""
return {
"project": project_id,
"model": model,
"user_prompt_id": user_prompt_id or str(uuid.uuid4()),
"request": inner_request,
}
# =============================================================================
# Response translation: Gemini → OpenAI
# =============================================================================
def _translate_gemini_response(
resp: Dict[str, Any],
model: str,
) -> SimpleNamespace:
"""Non-streaming Gemini response -> OpenAI-shaped SimpleNamespace.
Code Assist wraps the actual Gemini response inside ``response``, so we
unwrap it first if present.
"""
inner = resp.get("response") if isinstance(resp.get("response"), dict) else resp
candidates = inner.get("candidates") or []
if not isinstance(candidates, list) or not candidates:
return _empty_response(model)
cand = candidates[0]
content_obj = cand.get("content") if isinstance(cand, dict) else {}
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
text_pieces: List[str] = []
reasoning_pieces: List[str] = []
tool_calls: List[SimpleNamespace] = []
for i, part in enumerate(parts or []):
if not isinstance(part, dict):
continue
# Thought parts are model's internal reasoning — surface as reasoning,
# don't mix into content.
if part.get("thought") is True:
if isinstance(part.get("text"), str):
reasoning_pieces.append(part["text"])
continue
if isinstance(part.get("text"), str):
text_pieces.append(part["text"])
continue
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
args_str = "{}"
tool_calls.append(SimpleNamespace(
id=f"call_{uuid.uuid4().hex[:12]}",
type="function",
index=i,
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
))
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(
str(cand.get("finishReason") or "")
)
usage_meta = inner.get("usageMetadata") or {}
usage = SimpleNamespace(
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
prompt_tokens_details=SimpleNamespace(
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
),
)
message = SimpleNamespace(
role="assistant",
content="".join(text_pieces) if text_pieces else None,
tool_calls=tool_calls or None,
reasoning="".join(reasoning_pieces) or None,
reasoning_content="".join(reasoning_pieces) or None,
reasoning_details=None,
)
choice = SimpleNamespace(
index=0,
message=message,
finish_reason=finish_reason,
)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
def _empty_response(model: str) -> SimpleNamespace:
message = SimpleNamespace(
role="assistant", content="", tool_calls=None,
reasoning=None, reasoning_content=None, reasoning_details=None,
)
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
usage = SimpleNamespace(
prompt_tokens=0, completion_tokens=0, total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
def _map_gemini_finish_reason(reason: str) -> str:
mapping = {
"STOP": "stop",
"MAX_TOKENS": "length",
"SAFETY": "content_filter",
"RECITATION": "content_filter",
"OTHER": "stop",
}
return mapping.get(reason.upper(), "stop")
# =============================================================================
# Streaming SSE iterator
# =============================================================================
class _GeminiStreamChunk(SimpleNamespace):
"""Mimics an OpenAI ChatCompletionChunk with .choices[0].delta."""
pass
def _make_stream_chunk(
*,
model: str,
content: str = "",
tool_call_delta: Optional[Dict[str, Any]] = None,
finish_reason: Optional[str] = None,
reasoning: str = "",
) -> _GeminiStreamChunk:
delta_kwargs: Dict[str, Any] = {"role": "assistant"}
if content:
delta_kwargs["content"] = content
if tool_call_delta is not None:
delta_kwargs["tool_calls"] = [SimpleNamespace(
index=tool_call_delta.get("index", 0),
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
type="function",
function=SimpleNamespace(
name=tool_call_delta.get("name") or "",
arguments=tool_call_delta.get("arguments") or "",
),
)]
if reasoning:
delta_kwargs["reasoning"] = reasoning
delta_kwargs["reasoning_content"] = reasoning
delta = SimpleNamespace(**delta_kwargs)
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
return _GeminiStreamChunk(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion.chunk",
created=int(time.time()),
model=model,
choices=[choice],
usage=None,
)
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
"""Parse Server-Sent Events from an httpx streaming response."""
buffer = ""
for chunk in response.iter_text():
if not chunk:
continue
buffer += chunk
while "\n" in buffer:
line, buffer = buffer.split("\n", 1)
line = line.rstrip("\r")
if not line:
continue
if line.startswith("data: "):
data = line[6:]
if data == "[DONE]":
return
try:
yield json.loads(data)
except json.JSONDecodeError:
logger.debug("Non-JSON SSE line: %s", data[:200])
def _translate_stream_event(
event: Dict[str, Any],
model: str,
tool_call_indices: Dict[str, int],
) -> List[_GeminiStreamChunk]:
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s)."""
inner = event.get("response") if isinstance(event.get("response"), dict) else event
candidates = inner.get("candidates") or []
if not candidates:
return []
cand = candidates[0]
if not isinstance(cand, dict):
return []
chunks: List[_GeminiStreamChunk] = []
content = cand.get("content") or {}
parts = content.get("parts") if isinstance(content, dict) else []
for part in parts or []:
if not isinstance(part, dict):
continue
if part.get("thought") is True and isinstance(part.get("text"), str):
chunks.append(_make_stream_chunk(
model=model, reasoning=part["text"],
))
continue
if isinstance(part.get("text"), str) and part["text"]:
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
name = str(fc["name"])
idx = tool_call_indices.setdefault(name, len(tool_call_indices))
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
args_str = "{}"
chunks.append(_make_stream_chunk(
model=model,
tool_call_delta={
"index": idx,
"name": name,
"arguments": args_str,
},
))
finish_reason_raw = str(cand.get("finishReason") or "")
if finish_reason_raw:
mapped = _map_gemini_finish_reason(finish_reason_raw)
if tool_call_indices:
mapped = "tool_calls"
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
return chunks
# =============================================================================
# GeminiCloudCodeClient — OpenAI-compatible facade
# =============================================================================
MARKER_BASE_URL = "cloudcode-pa://google"
class _GeminiChatCompletions:
def __init__(self, client: "GeminiCloudCodeClient"):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _GeminiChatNamespace:
def __init__(self, client: "GeminiCloudCodeClient"):
self.completions = _GeminiChatCompletions(client)
class GeminiCloudCodeClient:
"""Minimal OpenAI-SDK-compatible facade over Code Assist v1internal."""
def __init__(
self,
*,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
default_headers: Optional[Dict[str, str]] = None,
project_id: str = "",
**_: Any,
):
# `api_key` here is a dummy — real auth is the OAuth access token
# fetched on every call via agent.google_oauth.get_valid_access_token().
# We accept the kwarg for openai.OpenAI interface parity.
self.api_key = api_key or "google-oauth"
self.base_url = base_url or MARKER_BASE_URL
self._default_headers = dict(default_headers or {})
self._configured_project_id = project_id
self._project_context: Optional[ProjectContext] = None
self._project_context_lock = False # simple single-thread guard
self.chat = _GeminiChatNamespace(self)
self.is_closed = False
self._http = httpx.Client(timeout=httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0))
def close(self) -> None:
self.is_closed = True
try:
self._http.close()
except Exception:
pass
# Implement the OpenAI SDK's context-manager-ish closure check
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def _ensure_project_context(self, access_token: str, model: str) -> ProjectContext:
"""Lazily resolve and cache the project context for this client."""
if self._project_context is not None:
return self._project_context
env_project = google_oauth.resolve_project_id_from_env()
creds = google_oauth.load_credentials()
stored_project = creds.project_id if creds else ""
# Prefer what's already baked into the creds
if stored_project:
self._project_context = ProjectContext(
project_id=stored_project,
managed_project_id=creds.managed_project_id if creds else "",
tier_id="",
source="stored",
)
return self._project_context
ctx = resolve_project_context(
access_token,
configured_project_id=self._configured_project_id,
env_project_id=env_project,
user_agent_model=model,
)
# Persist discovered project back to the creds file so the next
# session doesn't re-run the discovery.
if ctx.project_id or ctx.managed_project_id:
google_oauth.update_project_ids(
project_id=ctx.project_id,
managed_project_id=ctx.managed_project_id,
)
self._project_context = ctx
return ctx
def _create_chat_completion(
self,
*,
model: str = "gemini-2.5-flash",
messages: Optional[List[Dict[str, Any]]] = None,
stream: bool = False,
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Any = None,
**_: Any,
) -> Any:
access_token = google_oauth.get_valid_access_token()
ctx = self._ensure_project_context(access_token, model)
thinking_config = None
if isinstance(extra_body, dict):
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
inner = build_gemini_request(
messages=messages or [],
tools=tools,
tool_choice=tool_choice,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stop=stop,
thinking_config=thinking_config,
)
wrapped = wrap_code_assist_request(
project_id=ctx.project_id,
model=model,
inner_request=inner,
)
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {access_token}",
"User-Agent": "hermes-agent (gemini-cli-compat)",
"X-Goog-Api-Client": "gl-python/hermes",
"x-activity-request-id": str(uuid.uuid4()),
}
headers.update(self._default_headers)
if stream:
return self._stream_completion(model=model, wrapped=wrapped, headers=headers)
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:generateContent"
response = self._http.post(url, json=wrapped, headers=headers)
if response.status_code != 200:
raise _gemini_http_error(response)
try:
payload = response.json()
except ValueError as exc:
raise CodeAssistError(
f"Invalid JSON from Code Assist: {exc}",
code="code_assist_invalid_json",
) from exc
return _translate_gemini_response(payload, model=model)
def _stream_completion(
self,
*,
model: str,
wrapped: Dict[str, Any],
headers: Dict[str, str],
) -> Iterator[_GeminiStreamChunk]:
"""Generator that yields OpenAI-shaped streaming chunks."""
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse"
stream_headers = dict(headers)
stream_headers["Accept"] = "text/event-stream"
def _generator() -> Iterator[_GeminiStreamChunk]:
try:
with self._http.stream("POST", url, json=wrapped, headers=stream_headers) as response:
if response.status_code != 200:
# Materialize error body for better diagnostics
response.read()
raise _gemini_http_error(response)
tool_call_indices: Dict[str, int] = {}
for event in _iter_sse_events(response):
for chunk in _translate_stream_event(event, model, tool_call_indices):
yield chunk
except httpx.HTTPError as exc:
raise CodeAssistError(
f"Streaming request failed: {exc}",
code="code_assist_stream_error",
) from exc
return _generator()
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
"""Translate an httpx response into a CodeAssistError with rich metadata.
Parses Google's error envelope (``{"error": {"code", "message", "status",
"details": [...]}}``) so the agent's error classifier can reason about
the failure ``status_code`` enables the rate_limit / auth classification
paths, and ``response`` lets the main loop honor ``Retry-After`` just
like it does for OpenAI SDK exceptions.
Also lifts a few recognizable Google conditions into human-readable
messages so the user sees something better than a 500-char JSON dump:
MODEL_CAPACITY_EXHAUSTED "Gemini model capacity exhausted for
<model>. This is a Google-side throttle..."
RESOURCE_EXHAUSTED w/o reason quota-style message
404 "Model <name> not found at cloudcode-pa..."
"""
status = response.status_code
# Parse the body once, surviving any weird encodings.
body_text = ""
body_json: Dict[str, Any] = {}
try:
body_text = response.text
except Exception:
body_text = ""
if body_text:
try:
parsed = json.loads(body_text)
if isinstance(parsed, dict):
body_json = parsed
except (ValueError, TypeError):
body_json = {}
# Dig into Google's error envelope. Shape is:
# {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
# "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
# "metadata": {...}},
# {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
if not isinstance(err_obj, dict):
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
# than one ErrorInfo (rare), so we pick the first one with a reason.
error_reason = ""
error_metadata: Dict[str, Any] = {}
retry_delay_seconds: Optional[float] = None
for detail in err_details_list:
if not isinstance(detail, dict):
continue
type_url = str(detail.get("@type") or "")
if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
reason = detail.get("reason")
if isinstance(reason, str) and reason:
error_reason = reason
md = detail.get("metadata")
if isinstance(md, dict):
error_metadata = md
elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
# retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
delay_raw = detail.get("retryDelay")
if isinstance(delay_raw, str) and delay_raw.endswith("s"):
try:
retry_delay_seconds = float(delay_raw[:-1])
except ValueError:
pass
elif isinstance(delay_raw, (int, float)):
retry_delay_seconds = float(delay_raw)
# Fall back to the Retry-After header if the body didn't include RetryInfo.
if retry_delay_seconds is None:
try:
header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
except Exception:
header_val = None
if header_val:
try:
retry_delay_seconds = float(header_val)
except (TypeError, ValueError):
retry_delay_seconds = None
# Classify the error code. ``code_assist_rate_limited`` stays the default
# for 429s; a more specific reason tag helps downstream callers (e.g. tests,
# logs) without changing the rate_limit classification path.
code = f"code_assist_http_{status}"
if status == 401:
code = "code_assist_unauthorized"
elif status == 429:
code = "code_assist_rate_limited"
if error_reason == "MODEL_CAPACITY_EXHAUSTED":
code = "code_assist_capacity_exhausted"
# Build a human-readable message. Keep the status + a raw-body tail for
# debugging, but lead with a friendlier summary when we recognize the
# Google signal.
model_hint = ""
if isinstance(error_metadata, dict):
model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
target = model_hint or "this Gemini model"
message = (
f"Gemini capacity exhausted for {target} (Google-side throttle, "
f"not a Hermes issue). Try a different Gemini model or set a "
f"fallback_providers entry to a non-Gemini provider."
)
if retry_delay_seconds is not None:
message += f" Google suggests retrying in {retry_delay_seconds:g}s."
elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
message = (
f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
f"Check /gquota for remaining daily requests."
)
if retry_delay_seconds is not None:
message += f" Retry suggested in {retry_delay_seconds:g}s."
elif status == 404:
# Google returns 404 when a model has been retired or renamed.
target = model_hint or (err_message or "model")
message = (
f"Code Assist 404: {target} is not available at "
f"cloudcode-pa.googleapis.com. It may have been renamed or "
f"retired. Check hermes_cli/models.py for the current list."
)
elif err_message:
# Generic fallback with the parsed message.
message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
else:
# Last-ditch fallback — raw body snippet.
message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
return CodeAssistError(
message,
code=code,
status_code=status,
response=response,
retry_after=retry_delay_seconds,
details={
"status": err_status,
"reason": error_reason,
"metadata": error_metadata,
"message": err_message,
},
)
-453
View File
@@ -1,453 +0,0 @@
"""Google Code Assist API client — project discovery, onboarding, quota.
The Code Assist API powers Google's official gemini-cli. It sits at
``cloudcode-pa.googleapis.com`` and provides:
- Free tier access (generous daily quota) for personal Google accounts
- Paid tier access via GCP projects with billing / Workspace / Standard / Enterprise
This module handles the control-plane dance needed before inference:
1. ``load_code_assist()`` probe the user's account to learn what tier they're on
and whether a ``cloudaicompanionProject`` is already assigned.
2. ``onboard_user()`` if the user hasn't been onboarded yet (new account, fresh
free tier, etc.), call this with the chosen tier + project id. Supports LRO
polling for slow provisioning.
3. ``retrieve_user_quota()`` fetch the ``buckets[]`` array showing remaining
quota per model, used by the ``/gquota`` slash command.
VPC-SC handling: enterprise accounts under a VPC Service Controls perimeter
will get ``SECURITY_POLICY_VIOLATED`` on ``load_code_assist``. We catch this
and force the account to ``standard-tier`` so the call chain still succeeds.
Derived from opencode-gemini-auth (MIT) and clawdbot/extensions/google. The
request/response shapes are specific to Google's internal Code Assist API,
documented nowhere public we copy them from the reference implementations.
"""
from __future__ import annotations
import json
import logging
import os
import time
import urllib.error
import urllib.parse
import urllib.request
import uuid
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# =============================================================================
# Constants
# =============================================================================
CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"
# Fallback endpoints tried when prod returns an error during project discovery
FALLBACK_ENDPOINTS = [
"https://daily-cloudcode-pa.sandbox.googleapis.com",
"https://autopush-cloudcode-pa.sandbox.googleapis.com",
]
# Tier identifiers that Google's API uses
FREE_TIER_ID = "free-tier"
LEGACY_TIER_ID = "legacy-tier"
STANDARD_TIER_ID = "standard-tier"
# Default HTTP headers matching gemini-cli's fingerprint.
# Google may reject unrecognized User-Agents on these internal endpoints.
_GEMINI_CLI_USER_AGENT = "google-api-nodejs-client/9.15.1 (gzip)"
_X_GOOG_API_CLIENT = "gl-node/24.0.0"
_DEFAULT_REQUEST_TIMEOUT = 30.0
_ONBOARDING_POLL_ATTEMPTS = 12
_ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
class CodeAssistError(RuntimeError):
"""Exception raised by the Code Assist (``cloudcode-pa``) integration.
Carries HTTP status / response / retry-after metadata so the agent's
``error_classifier._extract_status_code`` and the main loop's Retry-After
handling (which walks ``error.response.headers``) pick up the right
signals. Without these, 429s from the OAuth path look like opaque
``RuntimeError`` and skip the rate-limit path.
"""
def __init__(
self,
message: str,
*,
code: str = "code_assist_error",
status_code: Optional[int] = None,
response: Any = None,
retry_after: Optional[float] = None,
details: Optional[Dict[str, Any]] = None,
) -> None:
super().__init__(message)
self.code = code
# ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
# so a 429 from Code Assist classifies as FailoverReason.rate_limit and
# triggers the main loop's fallback_providers chain the same way SDK
# errors do.
self.status_code = status_code
# ``response`` is the underlying ``httpx.Response`` (or a shim with a
# ``.headers`` mapping and ``.json()`` method). The main loop reads
# ``error.response.headers["Retry-After"]`` to honor Google's retry
# hints when the backend throttles us.
self.response = response
# Parsed ``Retry-After`` seconds (kept separately for convenience —
# Google returns retry hints in both the header and the error body's
# ``google.rpc.RetryInfo`` details, and we pick whichever we found).
self.retry_after = retry_after
# Parsed structured error details from the Google error envelope
# (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
# Useful for logging and for tests that want to assert on specifics.
self.details = details or {}
class ProjectIdRequiredError(CodeAssistError):
def __init__(self, message: str = "GCP project id required for this tier") -> None:
super().__init__(message, code="code_assist_project_id_required")
# =============================================================================
# HTTP primitive (auth via Bearer token passed per-call)
# =============================================================================
def _build_headers(access_token: str, *, user_agent_model: str = "") -> Dict[str, str]:
ua = _GEMINI_CLI_USER_AGENT
if user_agent_model:
ua = f"{ua} model/{user_agent_model}"
return {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {access_token}",
"User-Agent": ua,
"X-Goog-Api-Client": _X_GOOG_API_CLIENT,
"x-activity-request-id": str(uuid.uuid4()),
}
def _client_metadata() -> Dict[str, str]:
"""Match Google's gemini-cli exactly — unrecognized metadata may be rejected."""
return {
"ideType": "IDE_UNSPECIFIED",
"platform": "PLATFORM_UNSPECIFIED",
"pluginType": "GEMINI",
}
def _post_json(
url: str,
body: Dict[str, Any],
access_token: str,
*,
timeout: float = _DEFAULT_REQUEST_TIMEOUT,
user_agent_model: str = "",
) -> Dict[str, Any]:
data = json.dumps(body).encode("utf-8")
request = urllib.request.Request(
url, data=data, method="POST",
headers=_build_headers(access_token, user_agent_model=user_agent_model),
)
try:
with urllib.request.urlopen(request, timeout=timeout) as response:
raw = response.read().decode("utf-8", errors="replace")
return json.loads(raw) if raw else {}
except urllib.error.HTTPError as exc:
detail = ""
try:
detail = exc.read().decode("utf-8", errors="replace")
except Exception:
pass
# Special case: VPC-SC violation should be distinguishable
if _is_vpc_sc_violation(detail):
raise CodeAssistError(
f"VPC-SC policy violation: {detail}",
code="code_assist_vpc_sc",
) from exc
raise CodeAssistError(
f"Code Assist HTTP {exc.code}: {detail or exc.reason}",
code=f"code_assist_http_{exc.code}",
) from exc
except urllib.error.URLError as exc:
raise CodeAssistError(
f"Code Assist request failed: {exc}",
code="code_assist_network_error",
) from exc
def _is_vpc_sc_violation(body: str) -> bool:
"""Detect a VPC Service Controls violation from a response body."""
if not body:
return False
try:
parsed = json.loads(body)
except (json.JSONDecodeError, ValueError):
return "SECURITY_POLICY_VIOLATED" in body
# Walk the nested error structure Google uses
error = parsed.get("error") if isinstance(parsed, dict) else None
if not isinstance(error, dict):
return False
details = error.get("details") or []
if isinstance(details, list):
for item in details:
if isinstance(item, dict):
reason = item.get("reason") or ""
if reason == "SECURITY_POLICY_VIOLATED":
return True
msg = str(error.get("message", ""))
return "SECURITY_POLICY_VIOLATED" in msg
# =============================================================================
# load_code_assist — discovers current tier + assigned project
# =============================================================================
@dataclass
class CodeAssistProjectInfo:
"""Result from ``load_code_assist``."""
current_tier_id: str = ""
cloudaicompanion_project: str = "" # Google-managed project (free tier)
allowed_tiers: List[str] = field(default_factory=list)
raw: Dict[str, Any] = field(default_factory=dict)
def load_code_assist(
access_token: str,
*,
project_id: str = "",
user_agent_model: str = "",
) -> CodeAssistProjectInfo:
"""Call ``POST /v1internal:loadCodeAssist`` with prod → sandbox fallback.
Returns whatever tier + project info Google reports. On VPC-SC violations,
returns a synthetic ``standard-tier`` result so the chain can continue.
"""
body: Dict[str, Any] = {
"metadata": {
"duetProject": project_id,
**_client_metadata(),
},
}
if project_id:
body["cloudaicompanionProject"] = project_id
endpoints = [CODE_ASSIST_ENDPOINT] + FALLBACK_ENDPOINTS
last_err: Optional[Exception] = None
for endpoint in endpoints:
url = f"{endpoint}/v1internal:loadCodeAssist"
try:
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
return _parse_load_response(resp)
except CodeAssistError as exc:
if exc.code == "code_assist_vpc_sc":
logger.info("VPC-SC violation on %s — defaulting to standard-tier", endpoint)
return CodeAssistProjectInfo(
current_tier_id=STANDARD_TIER_ID,
cloudaicompanion_project=project_id,
)
last_err = exc
logger.warning("loadCodeAssist failed on %s: %s", endpoint, exc)
continue
if last_err:
raise last_err
return CodeAssistProjectInfo()
def _parse_load_response(resp: Dict[str, Any]) -> CodeAssistProjectInfo:
current_tier = resp.get("currentTier") or {}
tier_id = str(current_tier.get("id") or "") if isinstance(current_tier, dict) else ""
project = str(resp.get("cloudaicompanionProject") or "")
allowed = resp.get("allowedTiers") or []
allowed_ids: List[str] = []
if isinstance(allowed, list):
for t in allowed:
if isinstance(t, dict):
tid = str(t.get("id") or "")
if tid:
allowed_ids.append(tid)
return CodeAssistProjectInfo(
current_tier_id=tier_id,
cloudaicompanion_project=project,
allowed_tiers=allowed_ids,
raw=resp,
)
# =============================================================================
# onboard_user — provisions a new user on a tier (with LRO polling)
# =============================================================================
def onboard_user(
access_token: str,
*,
tier_id: str,
project_id: str = "",
user_agent_model: str = "",
) -> Dict[str, Any]:
"""Call ``POST /v1internal:onboardUser`` to provision the user.
For paid tiers, ``project_id`` is REQUIRED (raises ProjectIdRequiredError).
For free tiers, ``project_id`` is optional Google will assign one.
Returns the final operation response. Polls ``/v1internal/<name>`` for up
to ``_ONBOARDING_POLL_ATTEMPTS`` × ``_ONBOARDING_POLL_INTERVAL_SECONDS``
(default: 12 × 5s = 1 min).
"""
if tier_id != FREE_TIER_ID and tier_id != LEGACY_TIER_ID and not project_id:
raise ProjectIdRequiredError(
f"Tier {tier_id!r} requires a GCP project id. "
"Set HERMES_GEMINI_PROJECT_ID or GOOGLE_CLOUD_PROJECT."
)
body: Dict[str, Any] = {
"tierId": tier_id,
"metadata": _client_metadata(),
}
if project_id:
body["cloudaicompanionProject"] = project_id
endpoint = CODE_ASSIST_ENDPOINT
url = f"{endpoint}/v1internal:onboardUser"
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
# Poll if LRO (long-running operation)
if not resp.get("done"):
op_name = resp.get("name", "")
if not op_name:
return resp
for attempt in range(_ONBOARDING_POLL_ATTEMPTS):
time.sleep(_ONBOARDING_POLL_INTERVAL_SECONDS)
poll_url = f"{endpoint}/v1internal/{op_name}"
try:
poll_resp = _post_json(poll_url, {}, access_token, user_agent_model=user_agent_model)
except CodeAssistError as exc:
logger.warning("Onboarding poll attempt %d failed: %s", attempt + 1, exc)
continue
if poll_resp.get("done"):
return poll_resp
logger.warning("Onboarding did not complete within %d attempts", _ONBOARDING_POLL_ATTEMPTS)
return resp
# =============================================================================
# retrieve_user_quota — for /gquota
# =============================================================================
@dataclass
class QuotaBucket:
model_id: str
token_type: str = ""
remaining_fraction: float = 0.0
reset_time_iso: str = ""
raw: Dict[str, Any] = field(default_factory=dict)
def retrieve_user_quota(
access_token: str,
*,
project_id: str = "",
user_agent_model: str = "",
) -> List[QuotaBucket]:
"""Call ``POST /v1internal:retrieveUserQuota`` and parse ``buckets[]``."""
body: Dict[str, Any] = {}
if project_id:
body["project"] = project_id
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:retrieveUserQuota"
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
raw_buckets = resp.get("buckets") or []
buckets: List[QuotaBucket] = []
if not isinstance(raw_buckets, list):
return buckets
for b in raw_buckets:
if not isinstance(b, dict):
continue
buckets.append(QuotaBucket(
model_id=str(b.get("modelId") or ""),
token_type=str(b.get("tokenType") or ""),
remaining_fraction=float(b.get("remainingFraction") or 0.0),
reset_time_iso=str(b.get("resetTime") or ""),
raw=b,
))
return buckets
# =============================================================================
# Project context resolution
# =============================================================================
@dataclass
class ProjectContext:
"""Resolved state for a given OAuth session."""
project_id: str = "" # effective project id sent on requests
managed_project_id: str = "" # Google-assigned project (free tier)
tier_id: str = ""
source: str = "" # "env", "config", "discovered", "onboarded"
def resolve_project_context(
access_token: str,
*,
configured_project_id: str = "",
env_project_id: str = "",
user_agent_model: str = "",
) -> ProjectContext:
"""Figure out what project id + tier to use for requests.
Priority:
1. If configured_project_id or env_project_id is set, use that directly
and short-circuit (no discovery needed).
2. Otherwise call loadCodeAssist to see what Google says.
3. If no tier assigned yet, onboard the user (free tier default).
"""
# Short-circuit: caller provided a project id
if configured_project_id:
return ProjectContext(
project_id=configured_project_id,
tier_id=STANDARD_TIER_ID, # assume paid since they specified one
source="config",
)
if env_project_id:
return ProjectContext(
project_id=env_project_id,
tier_id=STANDARD_TIER_ID,
source="env",
)
# Discover via loadCodeAssist
info = load_code_assist(access_token, user_agent_model=user_agent_model)
effective_project = info.cloudaicompanion_project
tier = info.current_tier_id
if not tier:
# User hasn't been onboarded — provision them on free tier
onboard_resp = onboard_user(
access_token,
tier_id=FREE_TIER_ID,
project_id="",
user_agent_model=user_agent_model,
)
# Re-parse from the onboard response
response_body = onboard_resp.get("response") or {}
if isinstance(response_body, dict):
effective_project = (
effective_project
or str(response_body.get("cloudaicompanionProject") or "")
)
tier = FREE_TIER_ID
source = "onboarded"
else:
source = "discovered"
return ProjectContext(
project_id=effective_project,
managed_project_id=effective_project if tier == FREE_TIER_ID else "",
tier_id=tier,
source=source,
)
File diff suppressed because it is too large Load Diff
+26 -5
View File
@@ -634,7 +634,13 @@ class InsightsEngine:
lines.append(f" Sessions: {o['total_sessions']:<12} Messages: {o['total_messages']:,}")
lines.append(f" Tool calls: {o['total_tool_calls']:<12,} User messages: {o['user_messages']:,}")
lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}")
lines.append(f" Total tokens: {o['total_tokens']:,}")
cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
if cache_total > 0:
lines.append(f" Cache read: {o['total_cache_read_tokens']:<12,} Cache write: {o['total_cache_write_tokens']:,}")
cost_str = f"${o['estimated_cost']:.2f}"
if o.get("models_without_pricing"):
cost_str += " *"
lines.append(f" Total tokens: {o['total_tokens']:<12,} Est. cost: {cost_str}")
if o["total_hours"] > 0:
lines.append(f" Active time: ~{_format_duration(o['total_hours'] * 3600):<11} Avg session: ~{_format_duration(o['avg_session_duration'])}")
lines.append(f" Avg msgs/session: {o['avg_messages_per_session']:.1f}")
@@ -644,10 +650,16 @@ class InsightsEngine:
if report["models"]:
lines.append(" 🤖 Models Used")
lines.append(" " + "" * 56)
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12}")
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}")
for m in report["models"]:
model_name = m["model"][:28]
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,}")
if m.get("has_pricing"):
cost_cell = f"${m['cost']:>6.2f}"
else:
cost_cell = " N/A"
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}")
if o.get("models_without_pricing"):
lines.append(" * Cost N/A for custom/self-hosted models")
lines.append("")
# Platform breakdown
@@ -727,7 +739,15 @@ class InsightsEngine:
# Overview
lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
if cache_total > 0:
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
else:
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
cost_note = ""
if o.get("models_without_pricing"):
cost_note = " _(excludes custom/self-hosted models)_"
lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}")
if o["total_hours"] > 0:
lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
lines.append("")
@@ -736,7 +756,8 @@ class InsightsEngine:
if report["models"]:
lines.append("**🤖 Models:**")
for m in report["models"][:5]:
lines.append(f" {m['model'][:25]}{m['sessions']} sessions, {m['total_tokens']:,} tokens")
cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A"
lines.append(f" {m['model'][:25]}{m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}")
lines.append("")
# Platforms (if multi-platform)
+2 -14
View File
@@ -28,7 +28,6 @@ Usage in run_agent.py:
from __future__ import annotations
import json
import logging
import re
from typing import Any, Dict, List, Optional
@@ -44,22 +43,11 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
_FENCE_TAG_RE = re.compile(r'</?\s*memory-context\s*>', re.IGNORECASE)
_INTERNAL_CONTEXT_RE = re.compile(
r'<\s*memory-context\s*>[\s\S]*?</\s*memory-context\s*>',
re.IGNORECASE,
)
_INTERNAL_NOTE_RE = re.compile(
r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
re.IGNORECASE,
)
def sanitize_context(text: str) -> str:
"""Strip fence tags, injected context blocks, and system notes from provider output."""
text = _INTERNAL_CONTEXT_RE.sub('', text)
text = _INTERNAL_NOTE_RE.sub('', text)
text = _FENCE_TAG_RE.sub('', text)
return text
"""Strip fence-escape sequences from provider output."""
return _FENCE_TAG_RE.sub('', text)
def build_memory_context_block(raw_context: str) -> str:
+2 -20
View File
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
# are preserved so the full model name reaches cache lookups and server queries.
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
"qwen-oauth",
"xiaomi",
@@ -33,12 +33,9 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"google", "google-gemini", "google-ai-studio",
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
"ollama",
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"xai", "x-ai", "x.ai", "grok",
"nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
})
@@ -103,8 +100,6 @@ DEFAULT_CONTEXT_LENGTHS = {
# fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
# substring of "anthropic/claude-sonnet-4.6").
# OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
"claude-opus-4-7": 1000000,
"claude-opus-4.7": 1000000,
"claude-opus-4-6": 1000000,
"claude-sonnet-4-6": 1000000,
"claude-opus-4.6": 1000000,
@@ -125,6 +120,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"gemini": 1048576,
# Gemma (open models served via AI Studio)
"gemma-4-31b": 256000,
"gemma-4-26b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek
@@ -158,8 +154,6 @@ DEFAULT_CONTEXT_LENGTHS = {
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
# Kimi
"kimi": 262144,
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
"nemotron": 131072,
# Arcee
"trinity": 262144,
# OpenRouter
@@ -242,10 +236,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.fireworks.ai": "fireworks",
"opencode.ai": "opencode-go",
"api.x.ai": "xai",
"integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"ollama.com": "ollama-cloud",
}
@@ -1019,16 +1011,6 @@ def get_model_context_length(
if ctx:
return ctx
# 4b. AWS Bedrock — use static context length table.
# Bedrock's ListFoundationModels doesn't expose context window sizes,
# so we maintain a curated table in bedrock_adapter.py.
if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
try:
from agent.bedrock_adapter import get_bedrock_context_length
return get_bedrock_context_length(model)
except ImportError:
pass # boto3 not installed — fall through to generic resolution
# 5. Provider-aware lookups (before generic OpenRouter cache)
# These are provider-specific and take priority over the generic OR cache,
# since the same model can have different context limits per provider
-1
View File
@@ -169,7 +169,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
"togetherai": "togetherai",
"perplexity": "perplexity",
"cohere": "cohere",
"ollama-cloud": "ollama-cloud",
}
# Reverse mapping: models.dev → Hermes (built lazily)
-182
View File
@@ -1,182 +0,0 @@
"""Cross-session rate limit guard for Nous Portal.
Writes rate limit state to a shared file so all sessions (CLI, gateway,
cron, auxiliary) can check whether Nous Portal is currently rate-limited
before making requests. Prevents retry amplification when RPH is tapped.
Each 429 from Nous triggers up to 9 API calls per conversation turn
(3 SDK retries x 3 Hermes retries), and every one of those calls counts
against RPH. By recording the rate limit state on first 429 and checking
it before subsequent attempts, we eliminate the amplification effect.
"""
from __future__ import annotations
import json
import logging
import os
import tempfile
import time
from typing import Any, Mapping, Optional
logger = logging.getLogger(__name__)
_STATE_SUBDIR = "rate_limits"
_STATE_FILENAME = "nous.json"
def _state_path() -> str:
"""Return the path to the Nous rate limit state file."""
try:
from hermes_constants import get_hermes_home
base = get_hermes_home()
except ImportError:
base = os.path.join(os.path.expanduser("~"), ".hermes")
return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
"""Extract the best available reset-time estimate from response headers.
Priority:
1. x-ratelimit-reset-requests-1h (hourly RPH window most useful)
2. x-ratelimit-reset-requests (per-minute RPM window)
3. retry-after (generic HTTP header)
Returns seconds-from-now, or None if no usable header found.
"""
if not headers:
return None
lowered = {k.lower(): v for k, v in headers.items()}
for key in (
"x-ratelimit-reset-requests-1h",
"x-ratelimit-reset-requests",
"retry-after",
):
raw = lowered.get(key)
if raw is not None:
try:
val = float(raw)
if val > 0:
return val
except (TypeError, ValueError):
pass
return None
def record_nous_rate_limit(
*,
headers: Optional[Mapping[str, str]] = None,
error_context: Optional[dict[str, Any]] = None,
default_cooldown: float = 300.0,
) -> None:
"""Record that Nous Portal is rate-limited.
Parses the reset time from response headers or error context.
Falls back to ``default_cooldown`` (5 minutes) if no reset info
is available. Writes to a shared file that all sessions can read.
Args:
headers: HTTP response headers from the 429 error.
error_context: Structured error context from _extract_api_error_context().
default_cooldown: Fallback cooldown in seconds when no header data.
"""
now = time.time()
reset_at = None
# Try headers first (most accurate)
header_seconds = _parse_reset_seconds(headers)
if header_seconds is not None:
reset_at = now + header_seconds
# Try error_context reset_at (from body parsing)
if reset_at is None and isinstance(error_context, dict):
ctx_reset = error_context.get("reset_at")
if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
reset_at = float(ctx_reset)
# Default cooldown
if reset_at is None:
reset_at = now + default_cooldown
path = _state_path()
try:
state_dir = os.path.dirname(path)
os.makedirs(state_dir, exist_ok=True)
state = {
"reset_at": reset_at,
"recorded_at": now,
"reset_seconds": reset_at - now,
}
# Atomic write: write to temp file + rename
fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
try:
with os.fdopen(fd, "w") as f:
json.dump(state, f)
os.replace(tmp_path, path)
except Exception:
# Clean up temp file on failure
try:
os.unlink(tmp_path)
except OSError:
pass
raise
logger.info(
"Nous rate limit recorded: resets in %.0fs (at %.0f)",
reset_at - now, reset_at,
)
except Exception as exc:
logger.debug("Failed to write Nous rate limit state: %s", exc)
def nous_rate_limit_remaining() -> Optional[float]:
"""Check if Nous Portal is currently rate-limited.
Returns:
Seconds remaining until reset, or None if not rate-limited.
"""
path = _state_path()
try:
with open(path) as f:
state = json.load(f)
reset_at = state.get("reset_at", 0)
remaining = reset_at - time.time()
if remaining > 0:
return remaining
# Expired — clean up
try:
os.unlink(path)
except OSError:
pass
return None
except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
return None
def clear_nous_rate_limit() -> None:
"""Clear the rate limit state (e.g., after a successful Nous request)."""
try:
os.unlink(_state_path())
except FileNotFoundError:
pass
except OSError as exc:
logger.debug("Failed to clear Nous rate limit state: %s", exc)
def format_remaining(seconds: float) -> str:
"""Format seconds remaining into human-readable duration."""
s = max(0, int(seconds))
if s < 60:
return f"{s}s"
if s < 3600:
m, sec = divmod(s, 60)
return f"{m}m {sec}s" if sec else f"{m}m"
h, remainder = divmod(s, 3600)
m = remainder // 60
return f"{h}h {m}m" if m else f"{h}h"
+7 -10
View File
@@ -295,9 +295,7 @@ PLATFORM_HINTS = {
),
"telegram": (
"You are on a text messaging communication platform, Telegram. "
"Standard markdown is automatically converted to Telegram format. "
"Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
"`inline code`, ```code blocks```, [links](url), and ## headers. "
"Please do not use markdown as it does not render. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. Images "
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -654,7 +652,7 @@ def build_skills_system_prompt(
):
continue
skills_by_category.setdefault(category, []).append(
(frontmatter_name, entry.get("description", ""))
(skill_name, entry.get("description", ""))
)
category_descriptions = {
str(k): str(v)
@@ -679,7 +677,7 @@ def build_skills_system_prompt(
):
continue
skills_by_category.setdefault(entry["category"], []).append(
(entry["frontmatter_name"], entry["description"])
(skill_name, entry["description"])
)
# Read category-level DESCRIPTION.md files
@@ -722,10 +720,9 @@ def build_skills_system_prompt(
continue
entry = _build_snapshot_entry(skill_file, ext_dir, frontmatter, desc)
skill_name = entry["skill_name"]
frontmatter_name = entry["frontmatter_name"]
if frontmatter_name in seen_skill_names:
if skill_name in seen_skill_names:
continue
if frontmatter_name in disabled or skill_name in disabled:
if entry["frontmatter_name"] in disabled or skill_name in disabled:
continue
if not _skill_should_show(
extract_skill_conditions(frontmatter),
@@ -733,9 +730,9 @@ def build_skills_system_prompt(
available_toolsets,
):
continue
seen_skill_names.add(frontmatter_name)
seen_skill_names.add(skill_name)
skills_by_category.setdefault(entry["category"], []).append(
(frontmatter_name, entry["description"])
(skill_name, entry["description"])
)
except Exception as e:
logger.debug("Error reading external skill %s: %s", skill_file, e)
-135
View File
@@ -1,135 +0,0 @@
"""Provider-specific OpenRouter routing tweaks.
Central registry for known-buggy OpenRouter endpoints whose upstream providers
silently drop tool-call streams, stall mid-arguments, or otherwise fail in ways
that are not user-configurable. Consumed by ``run_agent.py`` when building
``provider_preferences`` for chat completion requests against OpenRouter.
Design principles:
* Only applies to OpenRouter ``base_url`` other provider chains route through
different infrastructure and may not have the same endpoint issues.
* User-provided preferences always win. We only layer defaults in where the
user hasn't specified ``only``, ``order``, or ``ignore``.
* Additions must be backed by a concrete upstream-bug reference (vendor repo
issue, reproducible empirical evidence) this is not for speculative
provider preferences.
Registry format (``_KNOWN_BROKEN_ROUTES``):
key: lowercase model-slug substring that identifies the affected family
value: {
"ignore": [list of OpenRouter provider tags to skip, e.g. "minimax"],
"order": [list of OpenRouter provider tags to prefer in order],
"reason": "human-readable one-liner used in logs",
"ref": "issue/PR reference for the upstream bug",
}
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# Ordered list: first matching entry wins. Match is substring-in-lower-model.
_KNOWN_BROKEN_ROUTES: List[Dict[str, Any]] = [
{
# MiniMax direct OpenRouter endpoint has documented non-terminating
# streams on tool-calling workflows (MiniMax-M2 issue #109, Apr 2026;
# OpenClaw #1622). Empirically reproduced 4/4 times on 2026-04-18:
# streaming a write_file tool call returned zero bytes and closed
# silently at ~40s from both minimax/fp8 and minimax/highspeed tags.
# Fireworks, Together, NovitaAI, Google-Vertex, AtlasCloud all work.
"match": "minimax/",
"ignore": ["minimax"],
"order": [
"fireworks", # m2.7: best throughput + uptime
"novitaai", # m2: best tool-call error rate (0.19%)
"google-vertex", # m2: fastest latency
"atlascloud",
"together", # fp4 quant — last resort
],
"reason": "Minimax direct endpoint drops tool-call streams",
"ref": "MiniMax-M2#109, OpenClaw#1622, Hermes-PR#12072",
},
]
def get_provider_tweaks(model: Optional[str], base_url: Optional[str]) -> Dict[str, Any]:
"""Return known-broken-endpoint tweaks for a given model/base_url pair.
Returns an empty dict when no tweaks apply (non-OpenRouter endpoint,
unknown model, etc.) so callers can do ``if tweaks:`` cheaply.
Returned keys when applicable:
ignore: list[str] OpenRouter provider tags to exclude
order: list[str] OpenRouter provider tags to prefer in order
reason: str human-readable reason (for logging)
ref: str upstream bug reference (for logging)
"""
if not model or not base_url:
return {}
url_lower = base_url.lower()
# Only OpenRouter-compatible endpoints understand the ``provider`` object.
if "openrouter.ai" not in url_lower:
return {}
model_lower = model.lower()
for entry in _KNOWN_BROKEN_ROUTES:
if entry["match"] in model_lower:
return {
"ignore": list(entry.get("ignore") or []),
"order": list(entry.get("order") or []),
"reason": entry.get("reason", ""),
"ref": entry.get("ref", ""),
}
return {}
def merge_provider_tweaks(
provider_preferences: Dict[str, Any],
tweaks: Dict[str, Any],
*,
log_label: str = "",
) -> Dict[str, Any]:
"""Merge auto-tweaks into user-supplied provider preferences.
User-provided fields always win this function never overrides ``only``,
``ignore``, or ``order`` that the user has already set. It only supplies
defaults where those fields are absent.
When the user has set ``only`` (whitelist mode), the tweaks are fully
ignored: a whitelist already constrains routing to a known-good subset,
and layering ``ignore``/``order`` on top would be confusing.
Emits a single INFO log line when tweaks are actually applied so the
behaviour is visible in agent.log without spamming every request.
"""
if not tweaks:
return provider_preferences or {}
result = dict(provider_preferences or {})
# Whitelist already narrows routing — don't layer on.
if result.get("only"):
return result
applied: List[str] = []
if tweaks.get("ignore") and "ignore" not in result:
result["ignore"] = list(tweaks["ignore"])
applied.append(f"ignore={tweaks['ignore']}")
if tweaks.get("order") and "order" not in result:
result["order"] = list(tweaks["order"])
applied.append(f"order={tweaks['order']}")
if applied:
logger.info(
"Provider tweaks applied%s: %s (reason: %s; ref: %s)",
f" [{log_label}]" if log_label else "",
", ".join(applied),
tweaks.get("reason", "?"),
tweaks.get("ref", "?"),
)
return result
__all__ = ["get_provider_tweaks", "merge_provider_tweaks"]
-17
View File
@@ -93,17 +93,6 @@ _DB_CONNSTR_RE = re.compile(
re.IGNORECASE,
)
# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
_JWT_RE = re.compile(
r"eyJ[A-Za-z0-9_-]{10,}" # Header (always starts with eyJ)
r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}" # Optional payload and/or signature
)
# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
# E.164 phone numbers: +<country><number>, 7-15 digits
# Negative lookahead prevents matching hex strings or identifiers
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
@@ -170,12 +159,6 @@ def redact_sensitive_text(text: str) -> str:
# Database connection string passwords
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
# JWT tokens (eyJ... — base64-encoded JSON headers)
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
# Discord user/role mentions (<@snowflake_id>)
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
# E.164 phone numbers (Signal, WhatsApp)
def _redact_phone(m):
phone = m.group(1)
+2 -11
View File
@@ -12,8 +12,6 @@ from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
@@ -72,14 +70,7 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
skill_name = str(loaded_skill.get("name") or normalized)
skill_path = str(loaded_skill.get("path") or "")
skill_dir = None
# Prefer the absolute skill_dir returned by skill_view() — this is
# correct for both local and external skills. Fall back to the old
# SKILLS_DIR-relative reconstruction only when skill_dir is absent
# (e.g. legacy skill_view responses).
abs_skill_dir = loaded_skill.get("skill_dir")
if abs_skill_dir:
skill_dir = Path(abs_skill_dir)
elif skill_path:
if skill_path:
try:
skill_dir = SKILLS_DIR / Path(skill_path).parent
except Exception:
@@ -117,7 +108,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
if not resolved:
return
lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"]
lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
for key, value in resolved.items():
display_val = str(value) if value else "(not set)"
lines.append(f" {key} = {display_val}")
-74
View File
@@ -284,80 +284,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
source_url="https://ai.google.dev/pricing",
pricing_version="google-pricing-2026-03-16",
),
# AWS Bedrock — pricing per the Bedrock pricing page.
# Bedrock charges the same per-token rates as the model provider but
# through AWS billing. These are the on-demand prices (no commitment).
# Source: https://aws.amazon.com/bedrock/pricing/
(
"bedrock",
"anthropic.claude-opus-4-6",
): PricingEntry(
input_cost_per_million=Decimal("15.00"),
output_cost_per_million=Decimal("75.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-sonnet-4-6",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-sonnet-4-5",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-haiku-4-5",
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("4.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-pro",
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("3.20"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-lite",
): PricingEntry(
input_cost_per_million=Decimal("0.06"),
output_cost_per_million=Decimal("0.24"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-micro",
): PricingEntry(
input_cost_per_million=Decimal("0.035"),
output_cost_per_million=Decimal("0.14"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
}
+1 -4
View File
@@ -561,10 +561,7 @@ class BatchRunner:
provider_sort (str): Sort providers by price/throughput/latency (optional)
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
reasoning_config (Dict): OpenRouter reasoning config override (e.g. {"effort": "none"} to disable thinking)
prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming).
NOTE: Anthropic Sonnet 4.6+ and Opus 4.6+ reject a trailing assistant-role prefill
(400 error). For those models use output_config.format or structured-output
schemas instead. Safe here for user-role priming and for older Claude / non-Claude models.
prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming)
max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
"""
self.dataset_file = Path(dataset_file)
+7 -16
View File
@@ -16,7 +16,7 @@ model:
# "nous" - Nous Portal OAuth (requires: hermes login)
# "nous-api" - Nous Portal API key (requires: NOUS_API_KEY)
# "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
# "openai-codex" - OpenAI Codex (requires: hermes auth)
# "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
# "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
# "gemini" - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
@@ -24,10 +24,8 @@ model:
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
# "nvidia" - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
#
@@ -39,6 +37,12 @@ model:
# base_url: "http://localhost:1234/v1"
# No API key needed — local servers typically ignore auth.
#
# For Ollama Cloud (https://ollama.com/pricing):
# provider: "custom"
# base_url: "https://ollama.com/v1"
# Set OLLAMA_API_KEY in .env — automatically picked up when base_url
# points to ollama.com.
#
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
provider: "auto"
@@ -333,7 +337,6 @@ compression:
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
# "nous" - Force Nous Portal (requires: hermes login)
# "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
# Uses gpt-5.3-codex which supports vision.
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
@@ -561,18 +564,6 @@ platform_toolsets:
homeassistant: [hermes-homeassistant]
qqbot: [hermes-qqbot]
# =============================================================================
# Gateway Platform Settings
# =============================================================================
# Optional per-platform messaging settings.
# Platform-specific knobs live under `extra`.
#
# platforms:
# telegram:
# reply_to_mode: "first" # off | first | all
# extra:
# disable_link_previews: false # Set true to suppress Telegram URL previews in bot messages
# ─────────────────────────────────────────────────────────────────────────────
# Available toolsets (use these names in platform_toolsets or the toolsets list)
#
+100 -565
View File
@@ -18,8 +18,6 @@ import os
import shutil
import sys
import json
import re
import base64
import atexit
import tempfile
import time
@@ -80,42 +78,6 @@ _project_env = Path(__file__).parent / '.env'
load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
_REASONING_TAGS = (
"REASONING_SCRATCHPAD",
"think",
"reasoning",
"THINKING",
"thinking",
)
def _strip_reasoning_tags(text: str) -> str:
cleaned = text
for tag in _REASONING_TAGS:
cleaned = re.sub(rf"<{tag}>.*?</{tag}>\s*", "", cleaned, flags=re.DOTALL)
cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL)
return cleaned.strip()
def _assistant_content_as_text(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
parts = [
str(part.get("text", ""))
for part in content
if isinstance(part, dict) and part.get("type") == "text"
]
return "\n".join(p for p in parts if p)
return str(content)
def _assistant_copy_text(content: Any) -> str:
return _strip_reasoning_tags(_assistant_content_as_text(content))
# =============================================================================
# Configuration Loading
# =============================================================================
@@ -439,27 +401,14 @@ def load_cli_config() -> Dict[str, Any]:
# filesystem is directly accessible. For ALL remote/container backends
# (ssh, docker, modal, singularity), the host path doesn't exist on the
# target -- remove the key so terminal_tool.py uses its per-backend default.
#
# GUARD: If TERMINAL_CWD is already set to a real absolute path (by the
# gateway's config bridge earlier in the process), don't clobber it.
# This prevents a lazy import of cli.py during gateway runtime from
# rewriting TERMINAL_CWD to the service's working directory.
# See issue #10817.
_CWD_PLACEHOLDERS = (".", "auto", "cwd")
if terminal_config.get("cwd") in _CWD_PLACEHOLDERS:
_existing_cwd = os.environ.get("TERMINAL_CWD", "")
if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd):
# Gateway (or earlier startup) already resolved a real path — keep it
terminal_config["cwd"] = _existing_cwd
defaults["terminal"]["cwd"] = _existing_cwd
if terminal_config.get("cwd") in (".", "auto", "cwd"):
effective_backend = terminal_config.get("env_type", "local")
if effective_backend == "local":
terminal_config["cwd"] = os.getcwd()
defaults["terminal"]["cwd"] = terminal_config["cwd"]
else:
effective_backend = terminal_config.get("env_type", "local")
if effective_backend == "local":
terminal_config["cwd"] = os.getcwd()
defaults["terminal"]["cwd"] = terminal_config["cwd"]
else:
# Remove so TERMINAL_CWD stays unset → tool picks backend default
terminal_config.pop("cwd", None)
# Remove so TERMINAL_CWD stays unset → tool picks backend default
terminal_config.pop("cwd", None)
env_mappings = {
"env_type": "TERMINAL_ENV",
@@ -1040,7 +989,6 @@ def _prune_orphaned_branches(repo_root: str) -> None:
_ACCENT_ANSI_DEFAULT = "\033[1;38;2;255;215;0m" # True-color #FFD700 bold — fallback
_BOLD = "\033[1m"
_RST = "\033[0m"
_STREAM_PAD = " " # 4-space indent for streamed response text (matches Panel padding)
def _hex_to_ansi(hex_color: str, *, bold: bool = False) -> str:
@@ -1210,10 +1158,6 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
return None
expanded = os.path.expandvars(os.path.expanduser(token))
if os.name != "nt":
normalized = expanded.replace("\\", "/")
if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
expanded = f"/mnt/{normalized[0].lower()}/{normalized[3:]}"
path = Path(expanded)
if not path.is_absolute():
base_dir = Path(os.getenv("TERMINAL_CWD", os.getcwd()))
@@ -1296,12 +1240,10 @@ def _detect_file_drop(user_input: str) -> "dict | None":
or stripped.startswith("~")
or stripped.startswith("./")
or stripped.startswith("../")
or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
or stripped.startswith('"/')
or stripped.startswith('"~')
or stripped.startswith("'/")
or stripped.startswith("'~")
or (len(stripped) >= 4 and stripped[0] in ("'", '"') and stripped[2] == ":" and stripped[3] in ("\\", "/") and stripped[1].isalpha())
)
if not starts_like_path:
return None
@@ -1770,9 +1712,9 @@ class HermesCLI:
# Parse and validate toolsets
self.enabled_toolsets = toolsets
if toolsets and "all" not in toolsets and "*" not in toolsets:
# Validate each toolset — MCP server names are resolved via
# live registry aliases (registered during discover_mcp_tools),
# but discovery hasn't run yet at this point, so exclude them.
# Validate each toolset — MCP server names are added by
# _get_platform_tools() but aren't registered in TOOLSETS yet
# (that happens later in _sync_mcp_toolsets), so exclude them.
mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys())
invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names]
if invalid:
@@ -2070,17 +2012,7 @@ class HermesCLI:
"""Return the visible height for the spinner/status text line above the status bar."""
if not getattr(self, "_spinner_text", ""):
return 0
if self._use_minimal_tui_chrome(width=width):
return 0
# Compute how many lines the spinner text needs when wrapped.
# The rendered text is " {emoji} {label} ({elapsed})" — about
# len(_spinner_text) + 16 chars for indent + timer suffix.
width = width or self._get_tui_terminal_width()
if width and width > 10:
import math
text_len = len(self._spinner_text) + 16 # indent + timer
return max(1, math.ceil(text_len / width))
return 1
return 0 if self._use_minimal_tui_chrome(width=width) else 1
def _get_voice_status_fragments(self, width: Optional[int] = None):
"""Return the voice status bar fragments for the interactive TUI."""
@@ -2648,7 +2580,7 @@ class HermesCLI:
_tc = getattr(self, "_stream_text_ansi", "")
while "\n" in self._stream_buf:
line, self._stream_buf = self._stream_buf.split("\n", 1)
_cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")
_cprint(f"{_tc}{line}{_RST}" if _tc else line)
def _flush_stream(self) -> None:
"""Emit any remaining partial line from the stream buffer and close the box."""
@@ -2665,7 +2597,7 @@ class HermesCLI:
if self._stream_buf:
_tc = getattr(self, "_stream_text_ansi", "")
_cprint(f"{_STREAM_PAD}{_tc}{self._stream_buf}{_RST}" if _tc else f"{_STREAM_PAD}{self._stream_buf}")
_cprint(f"{_tc}{self._stream_buf}{_RST}" if _tc else self._stream_buf)
self._stream_buf = ""
# Close the response box
@@ -3169,6 +3101,21 @@ class HermesCLI:
MAX_ASST_LEN = 200 # truncate assistant text
MAX_ASST_LINES = 3 # max lines of assistant text
def _strip_reasoning(text: str) -> str:
"""Remove <REASONING_SCRATCHPAD>...</REASONING_SCRATCHPAD> blocks
from displayed text (reasoning model internal thoughts)."""
import re
cleaned = re.sub(
r"<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>\s*",
"", text, flags=re.DOTALL,
)
# Also strip unclosed reasoning tags at the end
cleaned = re.sub(
r"<REASONING_SCRATCHPAD>.*$",
"", cleaned, flags=re.DOTALL,
)
return cleaned.strip()
# Collect displayable entries (skip system, tool-result messages)
entries = [] # list of (role, display_text)
_last_asst_idx = None # index of last assistant entry
@@ -3200,7 +3147,7 @@ class HermesCLI:
elif role == "assistant":
text = "" if content is None else str(content)
text = _strip_reasoning_tags(text)
text = _strip_reasoning(text)
parts = []
full_parts = [] # un-truncated version
if text:
@@ -3539,26 +3486,6 @@ class HermesCLI:
killed = process_registry.kill_all()
print(f" ✅ Stopped {killed} process(es).")
def _handle_agents_command(self):
"""Handle /agents — show background processes and agent status."""
from tools.process_registry import format_uptime_short, process_registry
processes = process_registry.list_sessions()
running = [p for p in processes if p.get("status") == "running"]
finished = [p for p in processes if p.get("status") != "running"]
_cprint(f" Running processes: {len(running)}")
for p in running:
cmd = p.get("command", "")[:80]
up = format_uptime_short(p.get("uptime_seconds", 0))
_cprint(f" {p.get('session_id', '?')} · {up} · {cmd}")
if finished:
_cprint(f" Recently finished: {len(finished)}")
agent_running = getattr(self, "_agent_running", False)
_cprint(f" Agent: {'running' if agent_running else 'idle'}")
def _handle_paste_command(self):
"""Handle /paste — explicitly check clipboard for an image.
@@ -3584,61 +3511,6 @@ class HermesCLI:
else:
_cprint(f" {_DIM}(._.) No image found in clipboard{_RST}")
def _write_osc52_clipboard(self, text: str) -> None:
"""Copy *text* to terminal clipboard via OSC 52."""
payload = base64.b64encode(text.encode("utf-8")).decode("ascii")
seq = f"\x1b]52;c;{payload}\x07"
out = getattr(self, "_app", None)
output = getattr(out, "output", None) if out else None
if output and hasattr(output, "write_raw"):
output.write_raw(seq)
output.flush()
return
if output and hasattr(output, "write"):
output.write(seq)
output.flush()
return
sys.stdout.write(seq)
sys.stdout.flush()
def _handle_copy_command(self, cmd_original: str) -> None:
"""Handle /copy [number] — copy assistant output to clipboard."""
parts = cmd_original.split(maxsplit=1)
arg = parts[1].strip() if len(parts) > 1 else ""
assistant = [m for m in self.conversation_history if m.get("role") == "assistant"]
if not assistant:
_cprint(" Nothing to copy yet.")
return
if arg:
try:
idx = int(arg) - 1
except ValueError:
_cprint(" Usage: /copy [number]")
return
if idx < 0 or idx >= len(assistant):
_cprint(f" Invalid response number. Use 1-{len(assistant)}.")
return
else:
idx = len(assistant) - 1
while idx >= 0 and not _assistant_copy_text(assistant[idx].get("content")):
idx -= 1
if idx < 0:
_cprint(" Nothing to copy in assistant responses yet.")
return
text = _assistant_copy_text(assistant[idx].get("content"))
if not text:
_cprint(" Nothing to copy in that assistant response.")
return
try:
self._write_osc52_clipboard(text)
_cprint(f" Copied assistant response #{idx + 1} to clipboard")
except Exception as e:
_cprint(f" Clipboard copy failed: {e}")
def _handle_image_command(self, cmd_original: str):
"""Handle /image <path> — attach a local image file for the next prompt."""
raw_args = (cmd_original.split(None, 1)[1].strip() if " " in cmd_original else "")
@@ -3775,7 +3647,7 @@ class HermesCLI:
skin = get_active_skin()
separator_color = skin.get_color("banner_dim", "#B8860B")
accent_color = skin.get_color("ui_accent", "#FFBF00")
label_color = skin.get_color("ui_label", "#DAA520")
label_color = skin.get_color("ui_label", "#4dd0e1")
except Exception:
separator_color, accent_color, label_color = "#B8860B", "#FFBF00", "cyan"
toolsets_info = ""
@@ -4024,14 +3896,23 @@ class HermesCLI:
def _handle_profile_command(self):
"""Display active profile name and home directory."""
from hermes_constants import display_hermes_home
from hermes_cli.profiles import get_active_profile_name
from hermes_constants import get_hermes_home, display_hermes_home
home = get_hermes_home()
display = display_hermes_home()
profile_name = get_active_profile_name()
profiles_parent = Path.home() / ".hermes" / "profiles"
try:
rel = home.relative_to(profiles_parent)
profile_name = str(rel).split("/")[0]
except ValueError:
profile_name = None
print()
print(f" Profile: {profile_name}")
if profile_name:
print(f" Profile: {profile_name}")
else:
print(" Profile: default")
print(f" Home: {display}")
print()
@@ -4218,8 +4099,6 @@ class HermesCLI:
self.agent.flush_memories(self.conversation_history)
except (Exception, KeyboardInterrupt):
pass
# Trigger memory extraction on the old session before session_id rotates.
self.agent.commit_memory_session(self.conversation_history)
self._notify_session_boundary("on_session_finalize")
elif self.agent:
# First session or empty history — still finalize the old session
@@ -4618,34 +4497,6 @@ class HermesCLI:
self._restore_modal_input_snapshot()
self._invalidate(min_interval=0.0)
@staticmethod
def _compute_model_picker_viewport(
selected: int,
scroll_offset: int,
n: int,
term_rows: int,
reserved_below: int = 6,
panel_chrome: int = 6,
min_visible: int = 3,
) -> tuple[int, int]:
"""Resolve (scroll_offset, visible) for the /model picker viewport.
``reserved_below`` matches the approval / clarify panels input area,
status bar, and separators below the panel. ``panel_chrome`` covers
this panel's own borders + blanks + hint row. The remaining rows hold
the scrollable list, with the offset slid to keep ``selected`` on screen.
"""
max_visible = max(min_visible, term_rows - reserved_below - panel_chrome)
if n <= max_visible:
return 0, n
visible = max_visible
if selected < scroll_offset:
scroll_offset = selected
elif selected >= scroll_offset + visible:
scroll_offset = selected - visible + 1
scroll_offset = max(0, min(scroll_offset, n - visible))
return scroll_offset, visible
def _apply_model_switch_result(self, result, persist_global: bool) -> None:
if not result.success:
_cprint(f"{result.error_message}")
@@ -4736,19 +4587,16 @@ class HermesCLI:
self._close_model_picker()
return
provider_data = providers[selected]
# Use the curated model list from list_authenticated_providers()
# (same lists as `hermes model` and gateway pickers).
# Only fall back to the live provider catalog when the curated
# list is empty (e.g. user-defined endpoints with no curated list).
model_list = provider_data.get("models", [])
model_list = []
try:
from hermes_cli.models import provider_model_ids
live = provider_model_ids(provider_data["slug"])
if live:
model_list = live
except Exception:
pass
if not model_list:
try:
from hermes_cli.models import provider_model_ids
live = provider_model_ids(provider_data["slug"])
if live:
model_list = live
except Exception:
pass
model_list = provider_data.get("models", [])
state["stage"] = "model"
state["provider_data"] = provider_data
state["model_list"] = model_list
@@ -5056,52 +4904,6 @@ class HermesCLI:
return "\n".join(p for p in parts if p)
return str(value)
def _handle_gquota_command(self, cmd_original: str) -> None:
"""Show Google Gemini Code Assist quota usage for the current OAuth account."""
try:
from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials
from agent.google_code_assist import retrieve_user_quota, CodeAssistError
except ImportError as exc:
self.console.print(f" [red]Gemini modules unavailable: {exc}[/]")
return
try:
access_token = get_valid_access_token()
except GoogleOAuthError as exc:
self.console.print(f" [yellow]{exc}[/]")
self.console.print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
return
creds = load_credentials()
project_id = (creds.project_id if creds else "") or ""
try:
buckets = retrieve_user_quota(access_token, project_id=project_id)
except CodeAssistError as exc:
self.console.print(f" [red]Quota lookup failed:[/] {exc}")
return
if not buckets:
self.console.print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
return
# Sort for stable display, group by model
buckets.sort(key=lambda b: (b.model_id, b.token_type))
self.console.print()
self.console.print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})")
self.console.print()
for b in buckets:
pct = max(0.0, min(1.0, b.remaining_fraction))
width = 20
filled = int(round(pct * width))
bar = "" * filled + "" * (width - filled)
pct_str = f"{int(pct * 100):3d}%"
header = b.model_id
if b.token_type:
header += f" [{b.token_type}]"
self.console.print(f" {header:40s} {bar} {pct_str}")
self.console.print()
def _handle_personality_command(self, cmd: str):
"""Handle the /personality command to set predefined personalities."""
parts = cmd.split(maxsplit=1)
@@ -5611,8 +5413,6 @@ class HermesCLI:
self._handle_model_switch(cmd_original)
elif canonical == "provider":
self._show_model_and_providers()
elif canonical == "gquota":
self._handle_gquota_command(cmd_original)
elif canonical == "personality":
# Use original case (handler lowercases the personality name itself)
@@ -5657,8 +5457,6 @@ class HermesCLI:
self._show_usage()
elif canonical == "insights":
self._show_insights(cmd_original)
elif canonical == "copy":
self._handle_copy_command(cmd_original)
elif canonical == "debug":
self._handle_debug_command()
elif canonical == "paste":
@@ -5689,8 +5487,7 @@ class HermesCLI:
version = f" v{p['version']}" if p["version"] else ""
tools = f"{p['tools']} tools" if p["tools"] else ""
hooks = f"{p['hooks']} hooks" if p["hooks"] else ""
commands = f"{p['commands']} commands" if p.get("commands") else ""
parts = [x for x in [tools, hooks, commands] if x]
parts = [x for x in [tools, hooks] if x]
detail = f" ({', '.join(parts)})" if parts else ""
error = f"{p['error']}" if p["error"] else ""
print(f" {status} {p['name']}{version}{detail}{error}")
@@ -5702,8 +5499,6 @@ class HermesCLI:
self._handle_snapshot_command(cmd_original)
elif canonical == "stop":
self._handle_stop_command()
elif canonical == "agents":
self._handle_agents_command()
elif canonical == "background":
self._handle_background_command(cmd_original)
elif canonical == "btw":
@@ -5966,7 +5761,7 @@ class HermesCLI:
border_style=_resp_color,
style=_resp_text,
box=rich_box.HORIZONTALS,
padding=(1, 4),
padding=(1, 2),
))
else:
_cprint(" (No response generated)")
@@ -6090,7 +5885,7 @@ class HermesCLI:
title_align="left",
border_style=_resp_color,
box=rich_box.HORIZONTALS,
padding=(1, 4),
padding=(1, 2),
))
else:
_cprint(" 💬 /btw: (no response)")
@@ -6157,7 +5952,7 @@ class HermesCLI:
parts = cmd.strip().split(None, 1)
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
_DEFAULT_CDP = "http://127.0.0.1:9222"
_DEFAULT_CDP = "http://localhost:9222"
current = os.environ.get("BROWSER_CDP_URL", "").strip()
if sub.startswith("connect"):
@@ -6404,21 +6199,13 @@ class HermesCLI:
def _toggle_yolo(self):
"""Toggle YOLO mode — skip all dangerous command approval prompts."""
import os
from hermes_cli.colors import Colors as _Colors
current = bool(os.environ.get("HERMES_YOLO_MODE"))
if current:
os.environ.pop("HERMES_YOLO_MODE", None)
_cprint(
f" ⚠ YOLO mode {_Colors.BOLD}{_Colors.RED}OFF{_Colors.RESET}"
" — dangerous commands will require approval."
)
self.console.print(" ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.")
else:
os.environ["HERMES_YOLO_MODE"] = "1"
_cprint(
f" ⚡ YOLO mode {_Colors.BOLD}{_Colors.GREEN}ON{_Colors.RESET}"
" — all commands auto-approved. Use with caution."
)
self.console.print(" ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.")
def _handle_reasoning_command(self, cmd: str):
"""Handle /reasoning — manage effort level and display toggle.
@@ -7017,7 +6804,8 @@ class HermesCLI:
)
raise RuntimeError(
"Voice mode requires sounddevice and numpy.\n"
f"Install with: {sys.executable} -m pip install sounddevice numpy"
"Install with: pip install sounddevice numpy\n"
"Or: pip install hermes-agent[voice]"
)
if not reqs.get("stt_available", reqs.get("stt_key_set")):
raise RuntimeError(
@@ -7293,7 +7081,8 @@ class HermesCLI:
_cprint(f" {_DIM}Then install/update the Termux:API Android app for microphone capture{_RST}")
_cprint(f" {_BOLD}Option 2: pkg install python-numpy portaudio && python -m pip install sounddevice{_RST}")
else:
_cprint(f"\n {_BOLD}Install: {sys.executable} -m pip install {' '.join(reqs['missing_packages'])}{_RST}")
_cprint(f"\n {_BOLD}Install: pip install {' '.join(reqs['missing_packages'])}{_RST}")
_cprint(f" {_DIM}Or: pip install hermes-agent[voice]{_RST}")
return
with self._voice_lock:
@@ -7593,15 +7382,7 @@ class HermesCLI:
self._invalidate()
def _get_approval_display_fragments(self):
"""Render the dangerous-command approval panel for the prompt_toolkit UI.
Layout priority: title + command + choices must always render, even if
the terminal is short or the description is long. Description is placed
at the bottom of the panel and gets truncated to fit the remaining row
budget. This prevents HSplit from clipping approve/deny off-screen when
tirith findings produce multi-paragraph descriptions or when the user
runs in a compact terminal pane.
"""
"""Render the dangerous-command approval panel for the prompt_toolkit UI."""
state = self._approval_state
if not state:
return []
@@ -7660,89 +7441,22 @@ class HermesCLI:
box_width = _panel_box_width(title, preview_lines)
inner_text_width = max(8, box_width - 2)
# Pre-wrap the mandatory content — command + choices must always render.
cmd_wrapped = _wrap_panel_text(cmd_display, inner_text_width)
# (choice_index, wrapped_line) so we can re-apply selected styling below
choice_wrapped: list[tuple[int, str]] = []
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Budget vertical space so HSplit never clips the command or choices.
# Panel chrome (full layout with separators):
# top border + title + blank_after_title
# + blank_between_cmd_choices + bottom border = 5 rows.
# In tight terminals we collapse to:
# top border + title + bottom border = 3 rows (no blanks).
#
# reserved_below: rows consumed below the approval panel by the
# spinner/tool-progress line, status bar, input area, separators, and
# prompt symbol. Measured at ~6 rows during live PTY approval prompts;
# budget 6 so we don't overestimate the panel's room.
term_rows = shutil.get_terminal_size((100, 24)).lines
chrome_full = 5
chrome_tight = 3
reserved_below = 6
available = max(0, term_rows - reserved_below)
mandatory_full = chrome_full + len(cmd_wrapped) + len(choice_wrapped)
# If the full-chrome panel doesn't fit, drop the separator blanks.
# This keeps the command and every choice on-screen in compact terminals.
use_compact_chrome = mandatory_full > available
chrome_rows = chrome_tight if use_compact_chrome else chrome_full
# If the command itself is too long to leave room for choices (e.g. user
# hit "view" on a multi-hundred-character command), truncate it so the
# approve/deny buttons still render. Keep at least 1 row of command.
max_cmd_rows = max(1, available - chrome_rows - len(choice_wrapped))
if len(cmd_wrapped) > max_cmd_rows:
keep = max(1, max_cmd_rows - 1) if max_cmd_rows > 1 else 1
cmd_wrapped = cmd_wrapped[:keep] + ["… (command truncated — use /logs or /debug for full text)"]
# Allocate any remaining rows to description. The extra -1 in full mode
# accounts for the blank separator between choices and description.
mandatory_no_desc = chrome_rows + len(cmd_wrapped) + len(choice_wrapped)
desc_sep_cost = 0 if use_compact_chrome else 1
available_for_desc = available - mandatory_no_desc - desc_sep_cost
# Even on huge terminals, cap description height so the panel stays compact.
available_for_desc = max(0, min(available_for_desc, 10))
desc_wrapped = _wrap_panel_text(description, inner_text_width) if description else []
if available_for_desc < 1 or not desc_wrapped:
desc_wrapped = []
elif len(desc_wrapped) > available_for_desc:
keep = max(1, available_for_desc - 1)
desc_wrapped = desc_wrapped[:keep] + ["… (description truncated)"]
# Render: title → command → choices → description (description last so
# any remaining overflow clips from the bottom of the least-critical
# content, never from the command or choices). Use compact chrome (no
# blank separators) when the terminal is tight.
lines = []
lines.append(('class:approval-border', '' + ('' * box_width) + '\n'))
_append_panel_line(lines, 'class:approval-border', 'class:approval-title', title, box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for wrapped in cmd_wrapped:
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for wrapped in _wrap_panel_text(description, inner_text_width):
_append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
for wrapped in _wrap_panel_text(cmd_display, inner_text_width):
_append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for i, wrapped in choice_wrapped:
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
style = 'class:approval-selected' if i == selected else 'class:approval-choice'
_append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
if desc_wrapped:
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for wrapped in desc_wrapped:
_append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
_append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
lines.append(('class:approval-border', '' + ('' * box_width) + '\n'))
return lines
@@ -7934,7 +7648,7 @@ class HermesCLI:
label = " ⚕ Hermes "
fill = w - 2 - len(label)
_cprint(f"\n{_ACCENT}╭─{label}{'' * max(fill - 1, 0)}{_RST}")
_cprint(f"{_STREAM_PAD}{sentence.rstrip()}")
_cprint(sentence.rstrip())
tts_thread = threading.Thread(
target=stream_tts_to_speaker,
@@ -8038,33 +7752,7 @@ class HermesCLI:
# Fallback for non-interactive mode (e.g., single-query)
agent_thread.join(0.1)
# Wait for the agent thread to finish. After an interrupt the
# agent may take a few seconds to clean up (kill subprocess, persist
# session). Poll instead of a blocking join so the process_loop
# stays responsive — if the user sent another interrupt or the
# agent gets stuck, we can break out instead of freezing forever.
if interrupt_msg is not None:
# Interrupt path: poll briefly, then move on. The agent
# thread is daemon — it dies on process exit regardless.
for _wait_tick in range(50): # 50 * 0.2s = 10s max
agent_thread.join(timeout=0.2)
if not agent_thread.is_alive():
break
# Check if user fired ANOTHER interrupt (Ctrl+C sets
# _should_exit which process_loop checks on next pass).
if getattr(self, '_should_exit', False):
break
if agent_thread.is_alive():
logger.warning(
"Agent thread still alive after interrupt "
"(thread %s). Daemon thread will be cleaned up "
"on exit.",
agent_thread.ident,
)
else:
# Normal completion: agent thread should be done already,
# but guard against edge cases.
agent_thread.join(timeout=30)
agent_thread.join() # Ensure agent thread completes
# Proactively clean up async clients whose event loop is dead.
# The agent thread may have created AsyncOpenAI clients bound
@@ -8191,7 +7879,7 @@ class HermesCLI:
border_style=_resp_color,
style=_resp_text,
box=rich_box.HORIZONTALS,
padding=(1, 4),
padding=(1, 2),
))
@@ -8662,7 +8350,6 @@ class HermesCLI:
# --- /model picker modal ---
if self._model_picker_state:
self._handle_model_picker_selection()
event.app.current_buffer.reset()
event.app.invalidate()
return
@@ -8828,13 +8515,6 @@ class HermesCLI:
state["selected"] = min(max_idx, state.get("selected", 0) + 1)
event.app.invalidate()
@kb.add('escape', filter=Condition(lambda: bool(self._model_picker_state)), eager=True)
def model_picker_escape(event):
"""ESC closes the /model picker."""
self._close_model_picker()
event.app.current_buffer.reset()
event.app.invalidate()
# --- History navigation: up/down browse history in normal input mode ---
# The TextArea is multiline, so by default up/down only move the cursor.
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@@ -9365,7 +9045,6 @@ class HermesCLI:
spinner_widget = Window(
content=FormattedTextControl(get_spinner_text),
height=get_spinner_height,
wrap_lines=True,
)
spacer = Window(
@@ -9402,13 +9081,7 @@ class HermesCLI:
lines.append((border_style, "" + (" " * box_width) + "\n"))
def _get_clarify_display():
"""Build styled text for the clarify question/choices panel.
Layout priority: choices + Other option must always render even if
the question is very long. The question is budgeted to leave enough
rows for the choices and trailing chrome; anything over the budget
is truncated with a marker.
"""
"""Build styled text for the clarify question/choices panel."""
state = cli_ref._clarify_state
if not state:
return []
@@ -9429,97 +9102,48 @@ class HermesCLI:
box_width = _panel_box_width("Hermes needs your input", preview_lines)
inner_text_width = max(8, box_width - 2)
# Pre-wrap choices + Other option — these are mandatory.
choice_wrapped: list[tuple[int, str]] = []
if choices:
for i, choice in enumerate(choices):
prefix = ' ' if i == selected and not cli_ref._clarify_freetext else ' '
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Trailing Other row(s)
other_idx = len(choices)
if selected == other_idx and not cli_ref._clarify_freetext:
other_label_mand = ' Other (type your answer)'
elif cli_ref._clarify_freetext:
other_label_mand = ' Other (type below)'
else:
other_label_mand = ' Other (type your answer)'
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
elif cli_ref._clarify_freetext:
# Freetext-only mode: the guidance line takes the place of choices.
other_wrapped = _wrap_panel_text(
"Type your answer in the prompt below, then press Enter.",
inner_text_width,
)
else:
other_wrapped = []
# Budget the question so mandatory rows always render.
# Chrome layouts:
# full : top border + blank_after_title + blank_after_question
# + blank_before_bottom + bottom border = 5 rows
# tight: top border + bottom border = 2 rows (drop all blanks)
#
# reserved_below matches the approval-panel budget (~6 rows for
# spinner/tool-progress + status + input + separators + prompt).
term_rows = shutil.get_terminal_size((100, 24)).lines
chrome_full = 5
chrome_tight = 2
reserved_below = 6
available = max(0, term_rows - reserved_below)
mandatory_full = chrome_full + len(choice_wrapped) + len(other_wrapped)
use_compact_chrome = mandatory_full > available
chrome_rows = chrome_tight if use_compact_chrome else chrome_full
max_question_rows = max(1, available - chrome_rows - len(choice_wrapped) - len(other_wrapped))
max_question_rows = min(max_question_rows, 12) # soft cap on huge terminals
question_wrapped = _wrap_panel_text(question, inner_text_width)
if len(question_wrapped) > max_question_rows:
keep = max(1, max_question_rows - 1)
question_wrapped = question_wrapped[:keep] + ["… (question truncated)"]
lines = []
# Box top border
lines.append(('class:clarify-border', '╭─ '))
lines.append(('class:clarify-title', 'Hermes needs your input'))
lines.append(('class:clarify-border', ' ' + ('' * max(0, box_width - len("Hermes needs your input") - 3)) + '\n'))
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
# Question text (bounded)
for wrapped in question_wrapped:
# Question text
for wrapped in _wrap_panel_text(question, inner_text_width):
_append_panel_line(lines, 'class:clarify-border', 'class:clarify-question', wrapped, box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
if cli_ref._clarify_freetext and not choices:
for wrapped in other_wrapped:
guidance = "Type your answer in the prompt below, then press Enter."
for wrapped in _wrap_panel_text(guidance, inner_text_width):
_append_panel_line(lines, 'class:clarify-border', 'class:clarify-choice', wrapped, box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
if choices:
# Multiple-choice mode: show selectable options
for i, wrapped in choice_wrapped:
for i, choice in enumerate(choices):
style = 'class:clarify-selected' if i == selected and not cli_ref._clarify_freetext else 'class:clarify-choice'
_append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width)
prefix = ' ' if i == selected and not cli_ref._clarify_freetext else ' '
wrapped_lines = _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" ")
for wrapped in wrapped_lines:
_append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width)
# "Other" option (trailing row(s), only shown when choices exist)
# "Other" option (5th line, only shown when choices exist)
other_idx = len(choices)
if selected == other_idx and not cli_ref._clarify_freetext:
other_style = 'class:clarify-selected'
other_label = ' Other (type your answer)'
elif cli_ref._clarify_freetext:
other_style = 'class:clarify-active-other'
other_label = ' Other (type below)'
else:
other_style = 'class:clarify-choice'
for wrapped in other_wrapped:
other_label = ' Other (type your answer)'
for wrapped in _wrap_panel_text(other_label, inner_text_width, subsequent_indent=" "):
_append_panel_line(lines, 'class:clarify-border', other_style, wrapped, box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
lines.append(('class:clarify-border', '' + ('' * box_width) + '\n'))
return lines
@@ -9636,22 +9260,6 @@ class HermesCLI:
box_width = _panel_box_width(title, [hint] + choices, min_width=46, max_width=84)
inner_text_width = max(8, box_width - 6)
selected = state.get("selected", 0)
# Scrolling viewport: the panel renders into a Window with no max
# height, so without limiting visible items the bottom border and
# any items past the available terminal rows get clipped on long
# provider catalogs (e.g. Ollama Cloud's 36+ models).
try:
from prompt_toolkit.application import get_app
term_rows = get_app().output.get_size().rows
except Exception:
term_rows = shutil.get_terminal_size((100, 24)).lines
scroll_offset, visible = HermesCLI._compute_model_picker_viewport(
selected, state.get("_scroll_offset", 0), len(choices), term_rows,
)
state["_scroll_offset"] = scroll_offset
lines = []
lines.append(('class:clarify-border', '╭─ '))
lines.append(('class:clarify-title', title))
@@ -9659,8 +9267,8 @@ class HermesCLI:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
_append_panel_line(lines, 'class:clarify-border', 'class:clarify-hint', hint, box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
for idx in range(scroll_offset, scroll_offset + visible):
choice = choices[idx]
selected = state.get("selected", 0)
for idx, choice in enumerate(choices):
style = 'class:clarify-selected' if idx == selected else 'class:clarify-choice'
prefix = ' ' if idx == selected else ' '
for wrapped in _wrap_panel_text(prefix + choice, inner_text_width, subsequent_indent=' '):
@@ -10065,36 +9673,8 @@ class HermesCLI:
# Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM
def _signal_handler(signum, frame):
"""Handle SIGHUP/SIGTERM by triggering graceful cleanup.
Calls ``self.agent.interrupt()`` first so the agent daemon
thread's poll loop sees the per-thread interrupt and kills the
tool's subprocess group via ``_kill_process`` (os.killpg).
Without this, the main thread dies from KeyboardInterrupt and
the daemon thread is killed with it before it can run one
more poll iteration to clean up the subprocess, which was
spawned with ``os.setsid`` and therefore survives as an orphan
with PPID=1.
Grace window (``HERMES_SIGTERM_GRACE``, default 1.5 s) gives
the daemon time to: detect the interrupt (next 200 ms poll)
call _kill_process (SIGTERM + 1 s wait + SIGKILL if needed)
return from _wait_for_process. ``time.sleep`` releases the
GIL so the daemon actually runs during the window.
"""
"""Handle SIGHUP/SIGTERM by triggering graceful cleanup."""
logger.debug("Received signal %s, triggering graceful shutdown", signum)
try:
if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
self.agent.interrupt(f"received signal {signum}")
import time as _t
try:
_grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
except (TypeError, ValueError):
_grace = 1.5
if _grace > 0:
_t.sleep(_grace)
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
try:
@@ -10397,45 +9977,6 @@ def main(
# Register cleanup for single-query mode (interactive mode registers in run())
atexit.register(_run_cleanup)
# Also install signal handlers in single-query / `-q` mode. Interactive
# mode registers its own inside HermesCLI.run(), but `-q` runs
# cli.agent.run_conversation() below and AIAgent spawns worker threads
# for tools — so when SIGTERM arrives on the main thread, raising
# KeyboardInterrupt only unwinds the main thread, not the worker
# running _wait_for_process. Python then exits, the child subprocess
# (spawned with os.setsid, its own process group) is reparented to
# init and keeps running as an orphan.
#
# Fix: route SIGTERM/SIGHUP through agent.interrupt() which sets the
# per-thread interrupt flag the worker's poll loop checks every 200 ms.
# Give the worker a grace window to call _kill_process (SIGTERM to the
# process group, then SIGKILL after 1 s), then raise KeyboardInterrupt
# so main unwinds normally. HERMES_SIGTERM_GRACE overrides the 1.5 s
# default for debugging.
def _signal_handler_q(signum, frame):
logger.debug("Received signal %s in single-query mode", signum)
try:
_agent = getattr(cli, "agent", None)
if _agent is not None:
_agent.interrupt(f"received signal {signum}")
import time as _t
try:
_grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
except (TypeError, ValueError):
_grace = 1.5
if _grace > 0:
_t.sleep(_grace)
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
try:
import signal as _signal
_signal.signal(_signal.SIGTERM, _signal_handler_q)
if hasattr(_signal, "SIGHUP"):
_signal.signal(_signal.SIGHUP, _signal_handler_q)
except Exception:
pass # signal handler may fail in restricted environments
# Handle single query mode
if query or image:
@@ -10463,11 +10004,6 @@ def main(
):
cli.agent.quiet_mode = True
cli.agent.suppress_status_output = True
# Suppress streaming display callbacks so stdout stays
# machine-readable (no styled "Hermes" box, no tool-gen
# status lines). The response is printed once below.
cli.agent.stream_delta_callback = None
cli.agent.tool_gen_callback = None
result = cli.agent.run_conversation(
user_message=effective_query,
conversation_history=cli.conversation_history,
@@ -10475,8 +10011,7 @@ def main(
response = result.get("final_response", "") if isinstance(result, dict) else str(result)
if response:
print(response)
# Session ID goes to stderr so piped stdout is clean.
print(f"\nsession_id: {cli.session_id}", file=sys.stderr)
print(f"\nsession_id: {cli.session_id}")
# Ensure proper exit code for automation wrappers
sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0)
-6
View File
@@ -501,12 +501,6 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
if schedule_changed:
updated_schedule = updated["schedule"]
# The API may pass schedule as a raw string (e.g. "every 10m")
# instead of a pre-parsed dict. Normalize it the same way
# create_job() does so downstream code can call .get() safely.
if isinstance(updated_schedule, str):
updated_schedule = parse_schedule(updated_schedule)
updated["schedule"] = updated_schedule
updated["schedule_display"] = updates.get(
"schedule_display",
updated_schedule.get("display", updated.get("schedule_display")),
+109 -203
View File
@@ -10,7 +10,6 @@ runs at a time if multiple processes overlap.
import asyncio
import concurrent.futures
import contextvars
import json
import logging
import os
@@ -27,7 +26,7 @@ except ImportError:
except ImportError:
msvcrt = None
from pathlib import Path
from typing import List, Optional
from typing import Optional
# Add parent directory to path for imports BEFORE repo-level imports.
# Without this, standalone invocations (e.g. after `hermes update` reloads
@@ -49,33 +48,6 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
"qqbot",
})
# Platforms that support a configured cron/notification home target, mapped to
# the environment variable used by gateway setup/runtime config.
_HOME_TARGET_ENV_VARS = {
"matrix": "MATRIX_HOME_ROOM",
"telegram": "TELEGRAM_HOME_CHANNEL",
"discord": "DISCORD_HOME_CHANNEL",
"slack": "SLACK_HOME_CHANNEL",
"signal": "SIGNAL_HOME_CHANNEL",
"mattermost": "MATTERMOST_HOME_CHANNEL",
"sms": "SMS_HOME_CHANNEL",
"email": "EMAIL_HOME_ADDRESS",
"dingtalk": "DINGTALK_HOME_CHANNEL",
"feishu": "FEISHU_HOME_CHANNEL",
"wecom": "WECOM_HOME_CHANNEL",
"weixin": "WEIXIN_HOME_CHANNEL",
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
"qqbot": "QQBOT_HOME_CHANNEL",
}
# Legacy env var names kept for back-compat. Each entry is the current
# primary env var → the previous name. _get_home_target_chat_id falls
# back to the legacy name if the primary is unset, so users who set the
# old name before the rename keep working until they migrate.
_LEGACY_HOME_TARGET_ENV_VARS = {
"QQBOT_HOME_CHANNEL": "QQ_HOME_CHANNEL",
}
from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
# Sentinel: when a cron agent has nothing new to report, it can start its
@@ -103,28 +75,15 @@ def _resolve_origin(job: dict) -> Optional[dict]:
return None
def _get_home_target_chat_id(platform_name: str) -> str:
"""Return the configured home target chat/room ID for a delivery platform."""
env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
if not env_var:
return ""
value = os.getenv(env_var, "")
if not value:
legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
if legacy:
value = os.getenv(legacy, "")
return value
def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
"""Resolve one concrete auto-delivery target for a cron job."""
def _resolve_delivery_target(job: dict) -> Optional[dict]:
"""Resolve the concrete auto-delivery target for a cron job, if any."""
deliver = job.get("deliver", "local")
origin = _resolve_origin(job)
if deliver_value == "local":
if deliver == "local":
return None
if deliver_value == "origin":
if deliver == "origin":
if origin:
return {
"platform": origin["platform"],
@@ -133,8 +92,8 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
}
# Origin missing (e.g. job created via API/script) — try each
# platform's home channel as a fallback instead of silently dropping.
for platform_name in _HOME_TARGET_ENV_VARS:
chat_id = _get_home_target_chat_id(platform_name)
for platform_name in ("matrix", "telegram", "discord", "slack", "bluebubbles"):
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
if chat_id:
logger.info(
"Job '%s' has deliver=origin but no origin; falling back to %s home channel",
@@ -148,8 +107,8 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
}
return None
if ":" in deliver_value:
platform_name, rest = deliver_value.split(":", 1)
if ":" in deliver:
platform_name, rest = deliver.split(":", 1)
platform_key = platform_name.lower()
from tools.send_message_tool import _parse_target_ref
@@ -179,7 +138,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
"thread_id": thread_id,
}
platform_name = deliver_value
platform_name = deliver
if origin and origin.get("platform") == platform_name:
return {
"platform": platform_name,
@@ -189,7 +148,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
if platform_name.lower() not in _KNOWN_DELIVERY_PLATFORMS:
return None
chat_id = _get_home_target_chat_id(platform_name)
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
if not chat_id:
return None
@@ -200,30 +159,6 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
}
def _resolve_delivery_targets(job: dict) -> List[dict]:
"""Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
deliver = job.get("deliver", "local")
if deliver == "local":
return []
parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
seen = set()
targets = []
for part in parts:
target = _resolve_single_delivery_target(job, part)
if target:
key = (target["platform"].lower(), str(target["chat_id"]), target.get("thread_id"))
if key not in seen:
seen.add(key)
targets.append(target)
return targets
def _resolve_delivery_target(job: dict) -> Optional[dict]:
"""Resolve the concrete auto-delivery target for a cron job, if any."""
targets = _resolve_delivery_targets(job)
return targets[0] if targets else None
# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
_VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
@@ -264,7 +199,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Optional[str]:
"""
Deliver job output to the configured target(s) (origin chat, specific platform, etc.).
Deliver job output to the configured target (origin chat, specific platform, etc.).
When ``adapters`` and ``loop`` are provided (gateway is running), tries to
use the live adapter first this supports E2EE rooms (e.g. Matrix) where
@@ -273,14 +208,33 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
Returns None on success, or an error string on failure.
"""
targets = _resolve_delivery_targets(job)
if not targets:
target = _resolve_delivery_target(job)
if not target:
if job.get("deliver", "local") != "local":
msg = f"no delivery target resolved for deliver={job.get('deliver', 'local')}"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
return None # local-only jobs don't deliver — not a failure
platform_name = target["platform"]
chat_id = target["chat_id"]
thread_id = target.get("thread_id")
# Diagnostic: log thread_id for topic-aware delivery debugging
origin = job.get("origin") or {}
origin_thread = origin.get("thread_id")
if origin_thread and not thread_id:
logger.warning(
"Job '%s': origin has thread_id=%s but delivery target lost it "
"(deliver=%s, target=%s)",
job["id"], origin_thread, job.get("deliver", "local"), target,
)
elif thread_id:
logger.debug(
"Job '%s': delivering to %s:%s thread_id=%s",
job["id"], platform_name, chat_id, thread_id,
)
from tools.send_message_tool import _send_to_platform
from gateway.config import load_gateway_config, Platform
@@ -303,6 +257,24 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
"bluebubbles": Platform.BLUEBUBBLES,
"qqbot": Platform.QQBOT,
}
platform = platform_map.get(platform_name.lower())
if not platform:
msg = f"unknown platform '{platform_name}'"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
try:
config = load_gateway_config()
except Exception as e:
msg = f"failed to load gateway config: {e}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
pconfig = config.platforms.get(platform)
if not pconfig or not pconfig.enabled:
msg = f"platform '{platform_name}' not configured/enabled"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
# Optionally wrap the content with a header/footer so the user knows this
# is a cron delivery. Wrapping is on by default; set cron.wrap_response: false
@@ -316,13 +288,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
if wrap_response:
task_name = job.get("name", job["id"])
job_id = job.get("id", "")
delivery_content = (
f"Cronjob Response: {task_name}\n"
f"(job_id: {job_id})\n"
f"-------------\n\n"
f"{content}\n\n"
f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")."
f"Note: The agent cannot see this message, and therefore cannot respond to it."
)
else:
delivery_content = content
@@ -331,117 +301,67 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
from gateway.platforms.base import BasePlatformAdapter
media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)
# Prefer the live adapter when the gateway is running — this supports E2EE
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
runtime_adapter = (adapters or {}).get(platform)
if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
send_metadata = {"thread_id": thread_id} if thread_id else None
try:
# Send cleaned text (MEDIA tags stripped) — not the raw content
text_to_send = cleaned_delivery_content.strip()
adapter_ok = True
if text_to_send:
future = asyncio.run_coroutine_threadsafe(
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
send_result = future.result(timeout=60)
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, err,
)
adapter_ok = False # fall through to standalone path
# Send extracted media files as native attachments via the live adapter
if adapter_ok and media_files:
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
if adapter_ok:
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
return None
except Exception as e:
logger.warning(
"Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, e,
)
# Standalone path: run the async send in a fresh event loop (safe from any thread)
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
try:
config = load_gateway_config()
result = asyncio.run(coro)
except RuntimeError:
# asyncio.run() checks for a running loop before awaiting the coroutine;
# when it raises, the original coro was never started — close it to
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
except Exception as e:
msg = f"failed to load gateway config: {e}"
msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
delivery_errors = []
if result and result.get("error"):
msg = f"delivery error: {result['error']}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
for target in targets:
platform_name = target["platform"]
chat_id = target["chat_id"]
thread_id = target.get("thread_id")
# Diagnostic: log thread_id for topic-aware delivery debugging
origin = job.get("origin") or {}
origin_thread = origin.get("thread_id")
if origin_thread and not thread_id:
logger.warning(
"Job '%s': origin has thread_id=%s but delivery target lost it "
"(deliver=%s, target=%s)",
job["id"], origin_thread, job.get("deliver", "local"), target,
)
elif thread_id:
logger.debug(
"Job '%s': delivering to %s:%s thread_id=%s",
job["id"], platform_name, chat_id, thread_id,
)
platform = platform_map.get(platform_name.lower())
if not platform:
msg = f"unknown platform '{platform_name}'"
logger.warning("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
# Prefer the live adapter when the gateway is running — this supports E2EE
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
runtime_adapter = (adapters or {}).get(platform)
delivered = False
if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
send_metadata = {"thread_id": thread_id} if thread_id else None
try:
# Send cleaned text (MEDIA tags stripped) — not the raw content
text_to_send = cleaned_delivery_content.strip()
adapter_ok = True
if text_to_send:
future = asyncio.run_coroutine_threadsafe(
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
send_result = future.result(timeout=60)
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, err,
)
adapter_ok = False # fall through to standalone path
# Send extracted media files as native attachments via the live adapter
if adapter_ok and media_files:
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
if adapter_ok:
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
delivered = True
except Exception as e:
logger.warning(
"Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, e,
)
if not delivered:
pconfig = config.platforms.get(platform)
if not pconfig or not pconfig.enabled:
msg = f"platform '{platform_name}' not configured/enabled"
logger.warning("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
# Standalone path: run the async send in a fresh event loop (safe from any thread)
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
try:
result = asyncio.run(coro)
except RuntimeError:
# asyncio.run() checks for a running loop before awaiting the coroutine;
# when it raises, the original coro was never started — close it to
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
except Exception as e:
msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
logger.error("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
if result and result.get("error"):
msg = f"delivery error: {result['error']}"
logger.error("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
if delivery_errors:
return "; ".join(delivery_errors)
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
return None
@@ -848,11 +768,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
_POLL_INTERVAL = 5.0
_cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
# Preserve scheduler-scoped ContextVar state (for example skill-declared
# env passthrough registrations) when the cron run hops into the worker
# thread used for inactivity timeout monitoring.
_cron_context = contextvars.copy_context()
_cron_future = _cron_pool.submit(_cron_context.run, agent.run_conversation, prompt)
_cron_future = _cron_pool.submit(agent.run_conversation, prompt)
_inactivity_timeout = False
try:
if _cron_inactivity_limit is None:
@@ -914,9 +830,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
)
final_response = result.get("final_response", "") or ""
# Strip leaked placeholder text that upstream may inject on empty completions.
if final_response.strip() == "(No response generated)":
final_response = ""
# Use a separate variable for log display; keep final_response clean
# for delivery logic (empty response = no delivery).
logged_response = final_response if final_response else "(No response generated)"
@@ -1056,13 +969,6 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
delivery_error = str(de)
logger.error("Delivery failed for job %s: %s", job["id"], de)
# Treat empty final_response as a soft failure so last_status
# is not "ok" — the agent ran but produced nothing useful.
# (issue #8585)
if success and not final_response:
success = False
error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
executed += 1
Executable → Regular
+6 -13
View File
@@ -1,14 +1,13 @@
#!/bin/bash
# Docker/Podman entrypoint: bootstrap config files into the mounted volume, then run hermes.
# Docker entrypoint: bootstrap config files into the mounted volume, then run hermes.
set -e
HERMES_HOME="${HERMES_HOME:-/opt/data}"
HERMES_HOME="/opt/data"
INSTALL_DIR="/opt/hermes"
# --- Privilege dropping via gosu ---
# When started as root (the default for Docker, or fakeroot in rootless Podman),
# optionally remap the hermes user/group to match host-side ownership, fix volume
# permissions, then re-exec as hermes.
# When started as root (the default), optionally remap the hermes user/group
# to match host-side ownership, fix volume permissions, then re-exec as hermes.
if [ "$(id -u)" = "0" ]; then
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
echo "Changing hermes UID to $HERMES_UID"
@@ -17,19 +16,13 @@ if [ "$(id -u)" = "0" ]; then
if [ -n "$HERMES_GID" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
echo "Changing hermes GID to $HERMES_GID"
# -o allows non-unique GID (e.g. macOS GID 20 "staff" may already exist
# as "dialout" in the Debian-based container image)
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
groupmod -g "$HERMES_GID" hermes
fi
actual_hermes_uid=$(id -u hermes)
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
# In rootless Podman the container's "root" is mapped to an unprivileged
# host UID — chown will fail. That's fine: the volume is already owned
# by the mapped user on the host side.
chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
echo "Warning: chown failed (rootless container?) — continuing anyway"
chown -R hermes:hermes "$HERMES_HOME"
fi
echo "Dropping root privileges"
@@ -1,108 +0,0 @@
# Ink Gateway TUI Migration — Post-mortem
Planned: 2026-04-01 · Delivered: 2026-04 · Status: shipped, classic (prompt_toolkit) CLI still present
## What Shipped
Three layers, same repo, Python runtime unchanged.
```
ui-tui (Node/TS) ──stdio JSON-RPC──▶ tui_gateway (Py) ──▶ AIAgent (run_agent.py)
```
### Backend — `tui_gateway/`
```
tui_gateway/
├── entry.py # subprocess entrypoint, stdio read/write loop
├── server.py # everything: sessions dict, @method handlers, _emit
├── render.py # stream renderer, diff rendering, message rendering
├── slash_worker.py # subprocess that runs hermes_cli slash commands
└── __init__.py
```
`server.py` owns the full runtime-control surface: session store (`_sessions: dict[str, dict]`), method registry (`@method("…")` decorator), event emitter (`_emit`), agent lifecycle (`_make_agent`, `_init_session`, `_wire_callbacks`), approval/sudo/clarify round-trips, and JSON-RPC dispatch.
Protocol methods (`@method(...)` in `server.py`):
- session: `session.{create, resume, list, close, interrupt, usage, history, compress, branch, title, save, undo}`
- prompt: `prompt.{submit, background, btw}`
- tools: `tools.{list, show, configure}`
- slash: `slash.exec`, `command.{dispatch, resolve}`, `commands.catalog`, `complete.{path, slash}`
- approvals: `approval.respond`, `sudo.respond`, `clarify.respond`, `secret.respond`
- config/state: `config.{get, set, show}`, `model.options`, `reload.mcp`
- ops: `shell.exec`, `cli.exec`, `terminal.resize`, `input.detect_drop`, `clipboard.paste`, `paste.collapse`, `image.attach`, `process.stop`
- misc: `agents.list`, `skills.manage`, `plugins.list`, `cron.manage`, `insights.get`, `rollback.{list, diff, restore}`, `browser.manage`
Protocol events (`_emit(…)` → handled in `ui-tui/src/app/createGatewayEventHandler.ts`):
- lifecycle: `gateway.{ready, stderr}`, `session.info`, `skin.changed`
- stream: `message.{start, delta, complete}`, `thinking.delta`, `reasoning.{delta, available}`, `status.update`
- tools: `tool.{start, progress, complete, generating}`, `subagent.{start, thinking, tool, progress, complete}`
- interactive: `approval.request`, `sudo.request`, `clarify.request`, `secret.request`
- async: `background.complete`, `btw.complete`, `error`
### Frontend — `ui-tui/src/`
```
src/
├── entry.tsx # node bootstrap: bootBanner → spawn python → dynamic-import Ink → render(<App/>)
├── app.tsx # <GatewayProvider> wraps <AppLayout>
├── bootBanner.ts # raw-ANSI banner to stdout in ~2ms, pre-React
├── gatewayClient.ts # JSON-RPC client over child_process stdio
├── gatewayTypes.ts # typed RPC responses + GatewayEvent union
├── theme.ts # DEFAULT_THEME + fromSkin
├── app/ # hooks + stores — the orchestration layer
│ ├── uiStore.ts # nanostore: sid, info, busy, usage, theme, status…
│ ├── turnStore.ts # nanostore: per-turn activity / reasoning / tools
│ ├── turnController.ts # imperative singleton for stream-time operations
│ ├── overlayStore.ts # nanostore: modal/overlay state
│ ├── useMainApp.ts # top-level composition hook
│ ├── useSessionLifecycle.ts # session.create/resume/close/reset
│ ├── useSubmission.ts # shell/slash/prompt dispatch + interpolation
│ ├── useConfigSync.ts # config.get + mtime poll
│ ├── useComposerState.ts # input buffer, paste snippets, editor mode
│ ├── useInputHandlers.ts # key bindings
│ ├── createGatewayEventHandler.ts # event-stream dispatcher
│ ├── createSlashHandler.ts # slash command router (registry + python fallback)
│ └── slash/commands/ # core.ts, ops.ts, session.ts — TS-owned slash commands
├── components/ # AppLayout, AppChrome, AppOverlays, MessageLine, Thinking, Markdown, pickers, prompts, Banner, SessionPanel
├── config/ # env, limits, timing constants
├── content/ # charms, faces, fortunes, hotkeys, placeholders, verbs
├── domain/ # details, messages, paths, roles, slash, usage, viewport
├── protocol/ # interpolation, paste regex
├── hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
└── lib/ # history, messages, osc52, rpc, text
```
### CLI entry points — `hermes_cli/main.py`
- `hermes --tui``node dist/entry.js` (auto-builds when `.ts`/`.tsx` newer than `dist/entry.js`)
- `hermes --tui --dev``tsx src/entry.tsx` (skip build)
- `HERMES_TUI_DIR=…` → external prebuilt dist (nix, distro packaging)
## Diverged From Original Plan
| Plan | Reality | Why |
|---|---|---|
| `tui_gateway/{controller,session_state,events,protocol}.py` | all collapsed into `server.py` | no second consumer ever emerged, keeping one file cheaper than four |
| `ui-tui/src/main.tsx` | split into `entry.tsx` (bootstrap) + `app.tsx` (shell) | boot banner + early python spawn wanted a pre-React moment |
| `ui-tui/src/state/store.ts` | three nanostores (`uiStore`, `turnStore`, `overlayStore`) | separate lifetimes: ui persists, turn resets per reply, overlay is modal |
| `approval.requested` / `sudo.requested` / `clarify.requested` | `*.request` (no `-ed`) | cosmetic |
| `session.cancel` | dropped | `session.interrupt` covers it |
| `HERMES_EXPERIMENTAL_TUI=1`, `display.experimental_tui: true`, `/tui on/off/status` | none shipped | `--tui` went from opt-in to first-class without an experimental phase |
## Post-migration Additions (not in original plan)
- **Async `session.create`** — returns sid in ~1ms, agent builds on a background thread, `session.info` broadcasts when ready; `_wait_agent()` gates every agent-touching handler via `_sess`
- **`bootBanner`** — raw-ANSI logo painted to stdout at T≈2ms, before Ink loads; `<AlternateScreen>` wipes it seamlessly when React mounts
- **Selection uniform bg**`theme.color.selectionBg` wired via `useSelection().setSelectionBgColor`; replaces SGR-inverse per-cell swap that fragmented over amber/gold fg
- **Slash command registry** — TS-owned commands in `app/slash/commands/{core,ops,session}.ts`, everything else falls through to `slash.exec` (python worker)
- **Turn store + controller split** — imperative singleton (`turnController`) holds refs/timers, nanostore (`turnStore`) holds render-visible state
## What's Still Open
- **Classic CLI not deleted.** `cli.py` still has ~80 `prompt_toolkit` references; classic REPL is still the default when `--tui` is absent. The original plan's "Cut 4 · prompt_toolkit removal later" hasn't happened.
- **No config-file opt-in.** `HERMES_EXPERIMENTAL_TUI` and `display.experimental_tui` were never built; only the CLI flag exists. Fine for now — if we want "default to TUI", a single line in `main.py` flips it.
+27 -36
View File
@@ -6,11 +6,6 @@
# All fields are optional — missing values inherit from the default skin.
# Activate with: /skin <name> or display.skin: <name> in config.yaml
#
# Keys are marked:
# (both) — applies to both the classic CLI and the TUI
# (classic) — classic CLI only (see hermes --tui in user-guide/tui.md)
# (tui) — TUI only
#
# See hermes_cli/skin_engine.py for the full schema reference.
# ============================================================================
@@ -19,47 +14,43 @@ name: example
description: An example custom skin — copy and modify this template
# ── Colors ──────────────────────────────────────────────────────────────────
# Hex color values. These control the visual palette.
# Hex color values for Rich markup. These control the CLI's visual palette.
colors:
# Banner panel (the startup welcome box) — (both)
# Banner panel (the startup welcome box)
banner_border: "#CD7F32" # Panel border
banner_title: "#FFD700" # Panel title text
banner_accent: "#FFBF00" # Section headers (Available Tools, Skills, etc.)
banner_dim: "#B8860B" # Dim/muted text (separators, model info)
banner_text: "#FFF8DC" # Body text (tool names, skill names)
# UI elements — (both)
ui_accent: "#FFBF00" # General accent (falls back to banner_accent)
# UI elements
ui_accent: "#FFBF00" # General accent color
ui_label: "#4dd0e1" # Labels
ui_ok: "#4caf50" # Success indicators
ui_error: "#ef5350" # Error indicators
ui_warn: "#ffa726" # Warning indicators
# Input area
prompt: "#FFF8DC" # Prompt text / `` glyph color (both)
input_rule: "#CD7F32" # Horizontal rule above input (classic)
prompt: "#FFF8DC" # Prompt text color
input_rule: "#CD7F32" # Horizontal rule around input
# Response box — (classic)
response_border: "#FFD700" # Response box border
# Response box
response_border: "#FFD700" # Response box border (ANSI color)
# Session display — (both)
session_label: "#DAA520" # "Session: " label
session_border: "#8B8682" # Session ID text
# Session display
session_label: "#DAA520" # Session label
session_border: "#8B8682" # Session ID dim color
# TUI / CLI surfaces — (classic: status bar, voice badge, completion meta)
status_bar_bg: "#1a1a2e" # Status / usage bar background (classic)
voice_status_bg: "#1a1a2e" # Voice-mode badge background (classic)
completion_menu_bg: "#1a1a2e" # Completion list background (both)
completion_menu_current_bg: "#333355" # Active completion row background (both)
completion_menu_meta_bg: "#1a1a2e" # Completion meta column bg (classic)
completion_menu_meta_current_bg: "#333355" # Active meta bg (classic)
# Drag-to-select background — (tui)
selection_bg: "#3a3a55" # Uniform selection highlight in the TUI
# TUI surfaces
status_bar_bg: "#1a1a2e" # Status / usage bar background
voice_status_bg: "#1a1a2e" # Voice-mode badge background
completion_menu_bg: "#1a1a2e" # Completion list background
completion_menu_current_bg: "#333355" # Active completion row background
completion_menu_meta_bg: "#1a1a2e" # Completion meta column background
completion_menu_meta_current_bg: "#333355" # Active completion meta background
# ── Spinner ─────────────────────────────────────────────────────────────────
# (classic) — the TUI uses its own animated indicators; spinner config here
# is only read by the classic prompt_toolkit CLI.
# Customize the animated spinner shown during API calls and tool execution.
spinner:
# Faces shown while waiting for the API response
waiting_faces:
@@ -87,17 +78,17 @@ spinner:
# - ["⟪▲", "▲⟫"]
# ── Branding ────────────────────────────────────────────────────────────────
# Text strings used throughout the interface.
# Text strings used throughout the CLI interface.
branding:
agent_name: "Hermes Agent" # (both) Banner title, about display
welcome: "Welcome! Type your message or /help for commands." # (both)
goodbye: "Goodbye! ⚕" # (both) Exit message
response_label: " ⚕ Hermes " # (classic) Response box header label
prompt_symbol: " " # (both) Input prompt glyph
help_header: "(^_^)? Available Commands" # (both) /help overlay title
agent_name: "Hermes Agent" # Banner title, about display
welcome: "Welcome! Type your message or /help for commands."
goodbye: "Goodbye! ⚕" # Exit message
response_label: " ⚕ Hermes " # Response box header label
prompt_symbol: " " # Input prompt symbol
help_header: "(^_^)? Available Commands" # /help header text
# ── Tool Output ─────────────────────────────────────────────────────────────
# Character used as the prefix for tool output lines. (both)
# Character used as the prefix for tool output lines.
# Default is "┊" (thin dotted vertical line). Some alternatives:
# "╎" (light triple dash vertical)
# "▏" (left one-eighth block)
Generated
-21
View File
@@ -36,26 +36,6 @@
"type": "github"
}
},
"npm-lockfile-fix": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1775903712,
"narHash": "sha256-2GV79U6iVH4gKAPWYrxUReB0S41ty/Y3dBLquU8AlaA=",
"owner": "jeslie0",
"repo": "npm-lockfile-fix",
"rev": "c6093acb0c0548e0f9b8b3d82918823721930fe8",
"type": "github"
},
"original": {
"owner": "jeslie0",
"repo": "npm-lockfile-fix",
"type": "github"
}
},
"pyproject-build-systems": {
"inputs": {
"nixpkgs": [
@@ -144,7 +124,6 @@
"inputs": {
"flake-parts": "flake-parts",
"nixpkgs": "nixpkgs",
"npm-lockfile-fix": "npm-lockfile-fix",
"pyproject-build-systems": "pyproject-build-systems",
"pyproject-nix": "pyproject-nix_2",
"uv2nix": "uv2nix_2"
+2 -11
View File
@@ -19,20 +19,11 @@
url = "github:pyproject-nix/build-system-pkgs";
inputs.nixpkgs.follows = "nixpkgs";
};
npm-lockfile-fix = {
url = "github:jeslie0/npm-lockfile-fix";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs =
inputs:
outputs = inputs:
inputs.flake-parts.lib.mkFlake { inherit inputs; } {
systems = [
"x86_64-linux"
"aarch64-linux"
"aarch64-darwin"
];
systems = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ];
imports = [
./nix/packages.nix
+1 -19
View File
@@ -100,7 +100,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
def _build_discord(adapter) -> List[Dict[str, str]]:
"""Enumerate all text channels and forum channels the Discord bot can see."""
"""Enumerate all text channels the Discord bot can see."""
channels = []
client = getattr(adapter, "_client", None)
if not client:
@@ -119,15 +119,6 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
"guild": guild.name,
"type": "channel",
})
# Forum channels (type 15) — creating a message auto-spawns a thread post.
forums = getattr(guild, "forum_channels", None) or []
for ch in forums:
channels.append({
"id": str(ch.id),
"name": ch.name,
"guild": guild.name,
"type": "forum",
})
# Also include DM-capable users we've interacted with is not
# feasible via guild enumeration; those come from sessions.
@@ -200,15 +191,6 @@ def load_directory() -> Dict[str, Any]:
return {"updated_at": None, "platforms": {}}
def lookup_channel_type(platform_name: str, chat_id: str) -> Optional[str]:
"""Return the channel ``type`` string (e.g. ``"channel"``, ``"forum"``) for *chat_id*, or *None* if unknown."""
directory = load_directory()
for ch in directory.get("platforms", {}).get(platform_name, []):
if ch.get("id") == chat_id:
return ch.get("type")
return None
def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
"""
Resolve a human-friendly channel name to a numeric ID.
+2 -107
View File
@@ -258,13 +258,6 @@ class GatewayConfig:
# Streaming configuration
streaming: StreamingConfig = field(default_factory=StreamingConfig)
# Session store pruning: drop SessionEntry records older than this many
# days from the in-memory dict and sessions.json. Keeps the store from
# growing unbounded in gateways serving many chats/threads/users over
# months. Pruning is invisible to users — if they resume, they get a
# fresh session exactly as if the reset policy had fired. 0 = disabled.
session_store_max_age_days: int = 90
def get_connected_platforms(self) -> List[Platform]:
"""Return list of platforms that are enabled and configured."""
connected = []
@@ -314,14 +307,6 @@ class GatewayConfig:
# QQBot uses extra dict for app credentials
elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
connected.append(platform)
# DingTalk uses client_id/client_secret from config.extra or env vars
elif platform == Platform.DINGTALK and (
config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
) and (
config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
):
connected.append(platform)
return connected
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@@ -372,7 +357,6 @@ class GatewayConfig:
"thread_sessions_per_user": self.thread_sessions_per_user,
"unauthorized_dm_behavior": self.unauthorized_dm_behavior,
"streaming": self.streaming.to_dict(),
"session_store_max_age_days": self.session_store_max_age_days,
}
@classmethod
@@ -420,13 +404,6 @@ class GatewayConfig:
"pair",
)
try:
session_store_max_age_days = int(data.get("session_store_max_age_days", 90))
if session_store_max_age_days < 0:
session_store_max_age_days = 0
except (TypeError, ValueError):
session_store_max_age_days = 90
return cls(
platforms=platforms,
default_reset_policy=default_policy,
@@ -441,7 +418,6 @@ class GatewayConfig:
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
unauthorized_dm_behavior=unauthorized_dm_behavior,
streaming=StreamingConfig.from_dict(data.get("streaming", {})),
session_store_max_age_days=session_store_max_age_days,
)
def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
@@ -578,12 +554,6 @@ def load_gateway_config() -> GatewayConfig:
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
channel_prompts = platform_cfg["channel_prompts"]
if isinstance(channel_prompts, dict):
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
else:
bridged["channel_prompts"] = channel_prompts
if not bridged:
continue
plat_data = platforms_data.setdefault(plat.value, {})
@@ -641,20 +611,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(ntc, list):
ntc = ",".join(str(v) for v in ntc)
os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
# allow_mentions: granular control over what the bot can ping.
# Safe defaults (no @everyone/roles) are applied in the adapter;
# these YAML keys only override when set and let users opt back
# into unsafe modes (e.g. roles=true) if they actually want it.
allow_mentions_cfg = discord_cfg.get("allow_mentions")
if isinstance(allow_mentions_cfg, dict):
for yaml_key, env_key in (
("everyone", "DISCORD_ALLOW_MENTION_EVERYONE"),
("roles", "DISCORD_ALLOW_MENTION_ROLES"),
("users", "DISCORD_ALLOW_MENTION_USERS"),
("replied_user", "DISCORD_ALLOW_MENTION_REPLIED_USER"),
):
if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()
# Telegram settings → env vars (env vars take precedence)
telegram_cfg = yaml_cfg.get("telegram", {})
@@ -676,18 +632,6 @@ def load_gateway_config() -> GatewayConfig:
os.environ["TELEGRAM_IGNORED_THREADS"] = str(ignored_threads)
if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
if "disable_link_previews" in telegram_cfg:
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[Platform.TELEGRAM.value] = plat_data
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
extra["disable_link_previews"] = telegram_cfg["disable_link_previews"]
whatsapp_cfg = yaml_cfg.get("whatsapp", {})
if isinstance(whatsapp_cfg, dict):
@@ -701,24 +645,6 @@ def load_gateway_config() -> GatewayConfig:
frc = ",".join(str(v) for v in frc)
os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
# DingTalk settings → env vars (env vars take precedence)
dingtalk_cfg = yaml_cfg.get("dingtalk", {})
if isinstance(dingtalk_cfg, dict):
if "require_mention" in dingtalk_cfg and not os.getenv("DINGTALK_REQUIRE_MENTION"):
os.environ["DINGTALK_REQUIRE_MENTION"] = str(dingtalk_cfg["require_mention"]).lower()
if "mention_patterns" in dingtalk_cfg and not os.getenv("DINGTALK_MENTION_PATTERNS"):
os.environ["DINGTALK_MENTION_PATTERNS"] = json.dumps(dingtalk_cfg["mention_patterns"])
frc = dingtalk_cfg.get("free_response_chats")
if frc is not None and not os.getenv("DINGTALK_FREE_RESPONSE_CHATS"):
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["DINGTALK_FREE_RESPONSE_CHATS"] = str(frc)
allowed = dingtalk_cfg.get("allowed_users")
if allowed is not None and not os.getenv("DINGTALK_ALLOWED_USERS"):
if isinstance(allowed, list):
allowed = ",".join(str(v) for v in allowed)
os.environ["DINGTALK_ALLOWED_USERS"] = str(allowed)
# Matrix settings → env vars (env vars take precedence)
matrix_cfg = yaml_cfg.get("matrix", {})
if isinstance(matrix_cfg, dict):
@@ -1062,25 +988,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
if webhook_secret:
config.platforms[Platform.WEBHOOK].extra["secret"] = webhook_secret
# DingTalk
dingtalk_client_id = os.getenv("DINGTALK_CLIENT_ID")
dingtalk_client_secret = os.getenv("DINGTALK_CLIENT_SECRET")
if dingtalk_client_id and dingtalk_client_secret:
if Platform.DINGTALK not in config.platforms:
config.platforms[Platform.DINGTALK] = PlatformConfig()
config.platforms[Platform.DINGTALK].enabled = True
config.platforms[Platform.DINGTALK].extra.update({
"client_id": dingtalk_client_id,
"client_secret": dingtalk_client_secret,
})
dingtalk_home = os.getenv("DINGTALK_HOME_CHANNEL")
if dingtalk_home:
config.platforms[Platform.DINGTALK].home_channel = HomeChannel(
platform=Platform.DINGTALK,
chat_id=dingtalk_home,
name=os.getenv("DINGTALK_HOME_CHANNEL_NAME", "Home"),
)
# Feishu / Lark
feishu_app_id = os.getenv("FEISHU_APP_ID")
feishu_app_secret = os.getenv("FEISHU_APP_SECRET")
@@ -1229,24 +1136,12 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
qq_group_allowed = os.getenv("QQ_GROUP_ALLOWED_USERS", "").strip()
if qq_group_allowed:
extra["group_allow_from"] = qq_group_allowed
qq_home = os.getenv("QQBOT_HOME_CHANNEL", "").strip()
qq_home_name_env = "QQBOT_HOME_CHANNEL_NAME"
if not qq_home:
# Back-compat: accept the pre-rename name and log a one-time warning.
legacy_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
if legacy_home:
qq_home = legacy_home
qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
import logging
logging.getLogger(__name__).warning(
"QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
"in your .env for consistency with the platform key."
)
qq_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
if qq_home:
config.platforms[Platform.QQBOT].home_channel = HomeChannel(
platform=Platform.QQBOT,
chat_id=qq_home,
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
name=os.getenv("QQ_HOME_CHANNEL_NAME", "Home"),
)
# Session settings
+25 -1
View File
@@ -3,11 +3,12 @@ Event Hook System
A lightweight event-driven system that fires handlers at key lifecycle points.
Hooks are discovered from ~/.hermes/hooks/ directories, each containing:
- HOOK.yaml (metadata: name, description, events list)
- HOOK.yaml (metadata: name, description, events list, optional startup_readiness)
- handler.py (Python handler with async def handle(event_type, context))
Events:
- gateway:startup -- Gateway process starts
- gateway:shutdown -- Gateway process is shutting down
- session:start -- New session created (first message of a new session)
- session:end -- Session ends (user ran /new or /reset)
- session:reset -- Session reset completed (new session entry created)
@@ -31,6 +32,26 @@ from hermes_cli.config import get_hermes_home
HOOKS_DIR = get_hermes_home() / "hooks"
def _normalize_startup_readiness(hook_name: str, manifest: dict[str, Any]) -> Optional[dict[str, Any]]:
"""Validate and normalize optional startup readiness metadata."""
readiness = manifest.get("startup_readiness")
if readiness is None:
return None
if not isinstance(readiness, dict):
print(f"[hooks] Ignoring startup_readiness for {hook_name}: expected mapping", flush=True)
return None
check_id = str(readiness.get("id", "")).strip()
if not check_id:
print(f"[hooks] Ignoring startup_readiness for {hook_name}: missing id", flush=True)
return None
return {
"id": check_id,
"required": bool(readiness.get("required", True)),
}
class HookRegistry:
"""
Discovers, loads, and fires event hooks.
@@ -62,6 +83,7 @@ class HookRegistry:
"description": "Run ~/.hermes/BOOT.md on gateway startup",
"events": ["gateway:startup"],
"path": "(builtin)",
"startup_readiness": None,
})
except Exception as e:
print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
@@ -102,6 +124,7 @@ class HookRegistry:
if not events:
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
continue
startup_readiness = _normalize_startup_readiness(hook_name, manifest)
# Dynamically load the handler module
spec = importlib.util.spec_from_file_location(
@@ -128,6 +151,7 @@ class HookRegistry:
"description": manifest.get("description", ""),
"events": events,
"path": str(hook_dir),
"startup_readiness": startup_readiness,
})
print(f"[hooks] Loaded hook '{hook_name}' for events: {events}", flush=True)
+5 -514
View File
@@ -515,8 +515,6 @@ class APIServerAdapter(BasePlatformAdapter):
session_id: Optional[str] = None,
stream_delta_callback=None,
tool_progress_callback=None,
tool_start_callback=None,
tool_complete_callback=None,
) -> Any:
"""
Create an AIAgent instance using the gateway's runtime config.
@@ -555,8 +553,6 @@ class APIServerAdapter(BasePlatformAdapter):
platform="api_server",
stream_delta_callback=stream_delta_callback,
tool_progress_callback=tool_progress_callback,
tool_start_callback=tool_start_callback,
tool_complete_callback=tool_complete_callback,
session_db=self._ensure_session_db(),
fallback_model=fallback_model,
)
@@ -902,7 +898,7 @@ class APIServerAdapter(BasePlatformAdapter):
return time.monotonic()
# Stream content chunks as they arrive from the agent
loop = asyncio.get_running_loop()
loop = asyncio.get_event_loop()
while True:
try:
delta = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
@@ -969,427 +965,6 @@ class APIServerAdapter(BasePlatformAdapter):
return response
async def _write_sse_responses(
self,
request: "web.Request",
response_id: str,
model: str,
created_at: int,
stream_q,
agent_task,
agent_ref,
conversation_history: List[Dict[str, str]],
user_message: str,
instructions: Optional[str],
conversation: Optional[str],
store: bool,
session_id: str,
) -> "web.StreamResponse":
"""Write an SSE stream for POST /v1/responses (OpenAI Responses API).
Emits spec-compliant event types as the agent runs:
- ``response.created`` initial envelope (status=in_progress)
- ``response.output_text.delta`` / ``response.output_text.done``
streamed assistant text
- ``response.output_item.added`` / ``response.output_item.done``
with ``item.type == "function_call"`` when the agent invokes a
tool (both events fire; the ``done`` event carries the finalized
``arguments`` string)
- ``response.output_item.added`` with
``item.type == "function_call_output"`` tool result with
``{call_id, output, status}``
- ``response.completed`` terminal event carrying the full
response object with all output items + usage (same payload
shape as the non-streaming path for parity)
- ``response.failed`` terminal event on agent error
If the client disconnects mid-stream, ``agent.interrupt()`` is
called so the agent stops issuing upstream LLM calls, then the
asyncio task is cancelled. When ``store=True`` the full response
is persisted to the ResponseStore in a ``finally`` block so GET
/v1/responses/{id} and ``previous_response_id`` chaining work the
same as the batch path.
"""
import queue as _q
sse_headers = {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
}
origin = request.headers.get("Origin", "")
cors = self._cors_headers_for_origin(origin) if origin else None
if cors:
sse_headers.update(cors)
if session_id:
sse_headers["X-Hermes-Session-Id"] = session_id
response = web.StreamResponse(status=200, headers=sse_headers)
await response.prepare(request)
# State accumulated during the stream
final_text_parts: List[str] = []
# Track open function_call items by name so we can emit a matching
# ``done`` event when the tool completes. Order preserved.
pending_tool_calls: List[Dict[str, Any]] = []
# Output items we've emitted so far (used to build the terminal
# response.completed payload). Kept in the order they appeared.
emitted_items: List[Dict[str, Any]] = []
# Monotonic counter for output_index (spec requires it).
output_index = 0
# Monotonic counter for call_id generation if the agent doesn't
# provide one (it doesn't, from tool_progress_callback).
call_counter = 0
# Canonical Responses SSE events include a monotonically increasing
# sequence_number. Add it server-side for every emitted event so
# clients that validate the OpenAI event schema can parse our stream.
sequence_number = 0
# Track the assistant message item id + content index for text
# delta events — the spec ties deltas to a specific item.
message_item_id = f"msg_{uuid.uuid4().hex[:24]}"
message_output_index: Optional[int] = None
message_opened = False
async def _write_event(event_type: str, data: Dict[str, Any]) -> None:
nonlocal sequence_number
if "sequence_number" not in data:
data["sequence_number"] = sequence_number
sequence_number += 1
payload = f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
await response.write(payload.encode())
def _envelope(status: str) -> Dict[str, Any]:
env: Dict[str, Any] = {
"id": response_id,
"object": "response",
"status": status,
"created_at": created_at,
"model": model,
}
return env
final_response_text = ""
agent_error: Optional[str] = None
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
try:
# response.created — initial envelope, status=in_progress
created_env = _envelope("in_progress")
created_env["output"] = []
await _write_event("response.created", {
"type": "response.created",
"response": created_env,
})
last_activity = time.monotonic()
async def _open_message_item() -> None:
"""Emit response.output_item.added for the assistant message
the first time any text delta arrives."""
nonlocal message_opened, message_output_index, output_index
if message_opened:
return
message_opened = True
message_output_index = output_index
output_index += 1
item = {
"id": message_item_id,
"type": "message",
"status": "in_progress",
"role": "assistant",
"content": [],
}
await _write_event("response.output_item.added", {
"type": "response.output_item.added",
"output_index": message_output_index,
"item": item,
})
async def _emit_text_delta(delta_text: str) -> None:
await _open_message_item()
final_text_parts.append(delta_text)
await _write_event("response.output_text.delta", {
"type": "response.output_text.delta",
"item_id": message_item_id,
"output_index": message_output_index,
"content_index": 0,
"delta": delta_text,
"logprobs": [],
})
async def _emit_tool_started(payload: Dict[str, Any]) -> str:
"""Emit response.output_item.added for a function_call.
Returns the call_id so the matching completion event can
reference it. Prefer the real ``tool_call_id`` from the
agent when available; fall back to a generated call id for
safety in tests or older code paths.
"""
nonlocal output_index, call_counter
call_counter += 1
call_id = payload.get("tool_call_id") or f"call_{response_id[5:]}_{call_counter}"
args = payload.get("arguments", {})
if isinstance(args, dict):
arguments_str = json.dumps(args)
else:
arguments_str = str(args)
item = {
"id": f"fc_{uuid.uuid4().hex[:24]}",
"type": "function_call",
"status": "in_progress",
"name": payload.get("name", ""),
"call_id": call_id,
"arguments": arguments_str,
}
idx = output_index
output_index += 1
pending_tool_calls.append({
"call_id": call_id,
"name": payload.get("name", ""),
"arguments": arguments_str,
"item_id": item["id"],
"output_index": idx,
})
emitted_items.append({
"type": "function_call",
"name": payload.get("name", ""),
"arguments": arguments_str,
"call_id": call_id,
})
await _write_event("response.output_item.added", {
"type": "response.output_item.added",
"output_index": idx,
"item": item,
})
return call_id
async def _emit_tool_completed(payload: Dict[str, Any]) -> None:
"""Emit response.output_item.done (function_call) followed
by response.output_item.added (function_call_output)."""
nonlocal output_index
call_id = payload.get("tool_call_id")
result = payload.get("result", "")
pending = None
if call_id:
for i, p in enumerate(pending_tool_calls):
if p["call_id"] == call_id:
pending = pending_tool_calls.pop(i)
break
if pending is None:
# Completion without a matching start — skip to avoid
# emitting orphaned done events.
return
# function_call done
done_item = {
"id": pending["item_id"],
"type": "function_call",
"status": "completed",
"name": pending["name"],
"call_id": pending["call_id"],
"arguments": pending["arguments"],
}
await _write_event("response.output_item.done", {
"type": "response.output_item.done",
"output_index": pending["output_index"],
"item": done_item,
})
# function_call_output added (result)
result_str = result if isinstance(result, str) else json.dumps(result)
output_parts = [{"type": "input_text", "text": result_str}]
output_item = {
"id": f"fco_{uuid.uuid4().hex[:24]}",
"type": "function_call_output",
"call_id": pending["call_id"],
"output": output_parts,
"status": "completed",
}
idx = output_index
output_index += 1
emitted_items.append({
"type": "function_call_output",
"call_id": pending["call_id"],
"output": output_parts,
})
await _write_event("response.output_item.added", {
"type": "response.output_item.added",
"output_index": idx,
"item": output_item,
})
await _write_event("response.output_item.done", {
"type": "response.output_item.done",
"output_index": idx,
"item": output_item,
})
# Main drain loop — thread-safe queue fed by agent callbacks.
async def _dispatch(it) -> None:
"""Route a queue item to the correct SSE emitter.
Plain strings are text deltas. Tagged tuples with
``__tool_started__`` / ``__tool_completed__`` prefixes
are tool lifecycle events.
"""
if isinstance(it, tuple) and len(it) == 2 and isinstance(it[0], str):
tag, payload = it
if tag == "__tool_started__":
await _emit_tool_started(payload)
elif tag == "__tool_completed__":
await _emit_tool_completed(payload)
# Unknown tags are silently ignored (forward-compat).
elif isinstance(it, str):
await _emit_text_delta(it)
# Other types (non-string, non-tuple) are silently dropped.
loop = asyncio.get_running_loop()
while True:
try:
item = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
except _q.Empty:
if agent_task.done():
# Drain remaining
while True:
try:
item = stream_q.get_nowait()
if item is None:
break
await _dispatch(item)
last_activity = time.monotonic()
except _q.Empty:
break
break
if time.monotonic() - last_activity >= CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS:
await response.write(b": keepalive\n\n")
last_activity = time.monotonic()
continue
if item is None: # EOS sentinel
break
await _dispatch(item)
last_activity = time.monotonic()
# Pick up agent result + usage from the completed task
try:
result, agent_usage = await agent_task
usage = agent_usage or usage
# If the agent produced a final_response but no text
# deltas were streamed (e.g. some providers only emit
# the full response at the end), emit a single fallback
# delta so Responses clients still receive a live text part.
agent_final = result.get("final_response", "") if isinstance(result, dict) else ""
if agent_final and not final_text_parts:
await _emit_text_delta(agent_final)
if agent_final and not final_response_text:
final_response_text = agent_final
if isinstance(result, dict) and result.get("error") and not final_response_text:
agent_error = result["error"]
except Exception as e: # noqa: BLE001
logger.error("Error running agent for streaming responses: %s", e, exc_info=True)
agent_error = str(e)
# Close the message item if it was opened
final_response_text = "".join(final_text_parts) or final_response_text
if message_opened:
await _write_event("response.output_text.done", {
"type": "response.output_text.done",
"item_id": message_item_id,
"output_index": message_output_index,
"content_index": 0,
"text": final_response_text,
"logprobs": [],
})
msg_done_item = {
"id": message_item_id,
"type": "message",
"status": "completed",
"role": "assistant",
"content": [
{"type": "output_text", "text": final_response_text}
],
}
await _write_event("response.output_item.done", {
"type": "response.output_item.done",
"output_index": message_output_index,
"item": msg_done_item,
})
# Always append a final message item in the completed
# response envelope so clients that only parse the terminal
# payload still see the assistant text. This mirrors the
# shape produced by _extract_output_items in the batch path.
final_items: List[Dict[str, Any]] = list(emitted_items)
final_items.append({
"type": "message",
"role": "assistant",
"content": [
{"type": "output_text", "text": final_response_text or (agent_error or "")}
],
})
if agent_error:
failed_env = _envelope("failed")
failed_env["output"] = final_items
failed_env["error"] = {"message": agent_error, "type": "server_error"}
failed_env["usage"] = {
"input_tokens": usage.get("input_tokens", 0),
"output_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
}
await _write_event("response.failed", {
"type": "response.failed",
"response": failed_env,
})
else:
completed_env = _envelope("completed")
completed_env["output"] = final_items
completed_env["usage"] = {
"input_tokens": usage.get("input_tokens", 0),
"output_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
}
await _write_event("response.completed", {
"type": "response.completed",
"response": completed_env,
})
# Persist for future chaining / GET retrieval, mirroring
# the batch path behavior.
if store:
full_history = list(conversation_history)
full_history.append({"role": "user", "content": user_message})
if isinstance(result, dict) and result.get("messages"):
full_history.extend(result["messages"])
else:
full_history.append({"role": "assistant", "content": final_response_text})
self._response_store.put(response_id, {
"response": completed_env,
"conversation_history": full_history,
"instructions": instructions,
"session_id": session_id,
})
if conversation:
self._response_store.set_conversation(conversation, response_id)
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
# Client disconnected — interrupt the agent so it stops
# making upstream LLM calls, then cancel the task.
agent = agent_ref[0] if agent_ref else None
if agent is not None:
try:
agent.interrupt("SSE client disconnected")
except Exception:
pass
if not agent_task.done():
agent_task.cancel()
try:
await agent_task
except (asyncio.CancelledError, Exception):
pass
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
return response
async def _handle_responses(self, request: "web.Request") -> "web.Response":
"""POST /v1/responses — OpenAI Responses API format."""
auth_err = self._check_auth(request)
@@ -1460,13 +1035,11 @@ class APIServerAdapter(BasePlatformAdapter):
if previous_response_id:
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
stored_session_id = None
if not conversation_history and previous_response_id:
stored = self._response_store.get(previous_response_id)
if stored is None:
return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
conversation_history = list(stored.get("conversation_history", []))
stored_session_id = stored.get("session_id")
# If no instructions provided, carry forward from previous
if instructions is None:
instructions = stored.get("instructions")
@@ -1484,83 +1057,8 @@ class APIServerAdapter(BasePlatformAdapter):
if body.get("truncation") == "auto" and len(conversation_history) > 100:
conversation_history = conversation_history[-100:]
# Reuse session from previous_response_id chain so the dashboard
# groups the entire conversation under one session entry.
session_id = stored_session_id or str(uuid.uuid4())
stream = bool(body.get("stream", False))
if stream:
# Streaming branch — emit OpenAI Responses SSE events as the
# agent runs so frontends can render text deltas and tool
# calls in real time. See _write_sse_responses for details.
import queue as _q
_stream_q: _q.Queue = _q.Queue()
def _on_delta(delta):
# None from the agent is a CLI box-close signal, not EOS.
# Forwarding would kill the SSE stream prematurely; the
# SSE writer detects completion via agent_task.done().
if delta is not None:
_stream_q.put(delta)
def _on_tool_progress(event_type, name, preview, args, **kwargs):
"""Queue non-start tool progress events if needed in future.
The structured Responses stream uses ``tool_start_callback``
and ``tool_complete_callback`` for exact call-id correlation,
so progress events are currently ignored here.
"""
return
def _on_tool_start(tool_call_id, function_name, function_args):
"""Queue a started tool for live function_call streaming."""
_stream_q.put(("__tool_started__", {
"tool_call_id": tool_call_id,
"name": function_name,
"arguments": function_args or {},
}))
def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
"""Queue a completed tool result for live function_call_output streaming."""
_stream_q.put(("__tool_completed__", {
"tool_call_id": tool_call_id,
"name": function_name,
"arguments": function_args or {},
"result": function_result,
}))
agent_ref = [None]
agent_task = asyncio.ensure_future(self._run_agent(
user_message=user_message,
conversation_history=conversation_history,
ephemeral_system_prompt=instructions,
session_id=session_id,
stream_delta_callback=_on_delta,
tool_progress_callback=_on_tool_progress,
tool_start_callback=_on_tool_start,
tool_complete_callback=_on_tool_complete,
agent_ref=agent_ref,
))
response_id = f"resp_{uuid.uuid4().hex[:28]}"
model_name = body.get("model", self._model_name)
created_at = int(time.time())
return await self._write_sse_responses(
request=request,
response_id=response_id,
model=model_name,
created_at=created_at,
stream_q=_stream_q,
agent_task=agent_task,
agent_ref=agent_ref,
conversation_history=conversation_history,
user_message=user_message,
instructions=instructions,
conversation=conversation,
store=store,
session_id=session_id,
)
# Run the agent (with Idempotency-Key support)
session_id = str(uuid.uuid4())
async def _compute_response():
return await self._run_agent(
@@ -1635,7 +1133,6 @@ class APIServerAdapter(BasePlatformAdapter):
"response": response_data,
"conversation_history": full_history,
"instructions": instructions,
"session_id": session_id,
})
# Update conversation mapping so the next request with the same
# conversation name automatically chains to this response
@@ -1989,8 +1486,6 @@ class APIServerAdapter(BasePlatformAdapter):
session_id: Optional[str] = None,
stream_delta_callback=None,
tool_progress_callback=None,
tool_start_callback=None,
tool_complete_callback=None,
agent_ref: Optional[list] = None,
) -> tuple:
"""
@@ -2004,7 +1499,7 @@ class APIServerAdapter(BasePlatformAdapter):
callers (e.g. the SSE writer) to call ``agent.interrupt()`` from
another thread to stop in-progress LLM calls.
"""
loop = asyncio.get_running_loop()
loop = asyncio.get_event_loop()
def _run():
agent = self._create_agent(
@@ -2012,8 +1507,6 @@ class APIServerAdapter(BasePlatformAdapter):
session_id=session_id,
stream_delta_callback=stream_delta_callback,
tool_progress_callback=tool_progress_callback,
tool_start_callback=tool_start_callback,
tool_complete_callback=tool_complete_callback,
)
if agent_ref is not None:
agent_ref[0] = agent
@@ -2150,12 +1643,10 @@ class APIServerAdapter(BasePlatformAdapter):
if previous_response_id:
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
stored_session_id = None
if not conversation_history and previous_response_id:
stored = self._response_store.get(previous_response_id)
if stored:
conversation_history = list(stored.get("conversation_history", []))
stored_session_id = stored.get("session_id")
if instructions is None:
instructions = stored.get("instructions")
@@ -2174,7 +1665,7 @@ class APIServerAdapter(BasePlatformAdapter):
)
conversation_history.append({"role": msg["role"], "content": str(content)})
session_id = body.get("session_id") or stored_session_id or run_id
session_id = body.get("session_id") or run_id
ephemeral_system_prompt = instructions
async def _run_and_close():
+14 -144
View File
@@ -669,15 +669,6 @@ class MessageEvent:
# Original platform data
raw_message: Any = None
message_id: Optional[str] = None
# Platform-specific update identifier. For Telegram this is the
# ``update_id`` from the PTB Update wrapper; other platforms currently
# ignore it. Used by ``/restart`` to record the triggering update so the
# new gateway can advance the Telegram offset past it and avoid processing
# the same ``/restart`` twice if PTB's graceful-shutdown ACK times out
# ("Error while calling `get_updates` one more time to mark all fetched
# updates" in gateway.log).
platform_update_id: Optional[int] = None
# Media attachments
# media_urls: local file paths (for vision tool access)
@@ -691,10 +682,6 @@ class MessageEvent:
# Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
# Discord channel_skill_bindings). A single name or ordered list.
auto_skill: Optional[str | list[str]] = None
# Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
# Applied at API call time and never persisted to transcript history.
channel_prompt: Optional[str] = None
# Internal flag — set for synthetic events (e.g. background process
# completion notifications) that must bypass user authorization checks.
@@ -743,56 +730,25 @@ def merge_pending_message_event(
pending_messages: Dict[str, MessageEvent],
session_key: str,
event: MessageEvent,
*,
merge_text: bool = False,
) -> None:
"""Store or merge a pending event for a session.
Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
events. Merge those into the existing queued event so the next turn sees
the whole burst.
When ``merge_text`` is enabled, rapid follow-up TEXT events are appended
instead of replacing the pending turn. This is used for Telegram bursty
follow-ups so a multi-part user thought is not silently truncated to only
the last queued fragment.
the whole burst, while non-photo follow-ups still replace the pending
event normally.
"""
existing = pending_messages.get(session_key)
if existing:
existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO
incoming_is_photo = event.message_type == MessageType.PHOTO
existing_has_media = bool(existing.media_urls)
incoming_has_media = bool(event.media_urls)
if existing_is_photo and incoming_is_photo:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
return
if existing_has_media or incoming_has_media:
if incoming_has_media:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
if existing.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
else:
existing.text = event.text
if existing_is_photo or incoming_is_photo:
existing.message_type = MessageType.PHOTO
return
if (
merge_text
and getattr(existing, "message_type", None) == MessageType.TEXT
and event.message_type == MessageType.TEXT
):
if event.text:
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
return
if (
existing
and getattr(existing, "message_type", None) == MessageType.PHOTO
and event.message_type == MessageType.PHOTO
):
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
return
pending_messages[session_key] = event
@@ -820,36 +776,6 @@ _RETRYABLE_ERROR_PATTERNS = (
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
def resolve_channel_prompt(
config_extra: dict,
channel_id: str,
parent_id: str | None = None,
) -> str | None:
"""Resolve a per-channel ephemeral prompt from platform config.
Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
Prefers an exact match on *channel_id*; falls back to *parent_id*
(useful for forum threads / child channels inheriting a parent prompt).
Returns the prompt string, or None if no match is found. Blank/whitespace-
only prompts are treated as absent.
"""
prompts = config_extra.get("channel_prompts") or {}
if not isinstance(prompts, dict):
return None
for key in (channel_id, parent_id):
if not key:
continue
prompt = prompts.get(key)
if prompt is None:
continue
prompt = str(prompt).strip()
if prompt:
return prompt
return None
class BasePlatformAdapter(ABC):
"""
Base class for platform adapters.
@@ -879,11 +805,6 @@ class BasePlatformAdapter(ABC):
# Gateway shutdown cancels these so an old gateway instance doesn't keep
# working on a task after --replace or manual restarts.
self._background_tasks: set[asyncio.Task] = set()
# One-shot callbacks to fire after the main response is delivered.
# Keyed by session_key. GatewayRunner uses this to defer
# background-review notifications ("💾 Skill created") until the
# primary reply has been sent.
self._post_delivery_callbacks: Dict[str, Callable] = {}
self._expected_cancelled_tasks: set[asyncio.Task] = set()
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
# Chats where auto-TTS on voice input is disabled (set by /voice off)
@@ -1054,40 +975,16 @@ class BasePlatformAdapter(ABC):
"""
pass
# Default: the adapter treats ``finalize=True`` on edit_message as a
# no-op and is happy to have the stream consumer skip redundant final
# edits. Subclasses that *require* an explicit finalize call to close
# out the message lifecycle (e.g. rich card / AI assistant surfaces
# such as DingTalk AI Cards) override this to True (class attribute or
# property) so the stream consumer knows not to short-circuit.
REQUIRES_EDIT_FINALIZE: bool = False
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""
Edit a previously sent message. Optional platforms that don't
support editing return success=False and callers fall back to
sending a new message.
``finalize`` signals that this is the last edit in a streaming
sequence. Most platforms (Telegram, Slack, Discord, Matrix,
etc.) treat it as a no-op because their edit APIs have no notion
of message lifecycle state an edit is an edit. Platforms that
render streaming updates with a distinct "in progress" state and
require explicit closure (e.g. rich card / AI assistant surfaces
such as DingTalk AI Cards) use it to finalize the message and
transition the UI out of the streaming indicator those should
also set ``REQUIRES_EDIT_FINALIZE = True`` so callers route a
final edit through even when content is unchanged. Callers
should set ``finalize=True`` on the final edit of a streamed
response (typically when ``got_done`` fires in the stream
consumer) and leave it ``False`` on intermediate edits.
"""
return SendResult(success=False, error="Not supported")
@@ -1324,7 +1221,7 @@ class BasePlatformAdapter(ABC):
path = path[1:-1].strip()
path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
if path:
media.append((os.path.expanduser(path), has_voice_tag))
media.append((path, has_voice_tag))
# Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
if media:
@@ -1612,9 +1509,7 @@ class BasePlatformAdapter(ABC):
# session lifecycle and its cleanup races with the running task
# (see PR #4926).
cmd = event.get_command()
from hermes_cli.commands import should_bypass_active_session
if should_bypass_active_session(cmd):
if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background", "restart"):
logger.debug(
"[%s] Command '/%s' bypassing active-session guard for %s",
self.name, cmd, session_key,
@@ -1729,21 +1624,6 @@ class BasePlatformAdapter(ABC):
# streaming already delivered the text (already_sent=True) or
# when the message was queued behind an active agent. Log at
# DEBUG to avoid noisy warnings for expected behavior.
#
# Suppress stale response when the session was interrupted by a
# new message that hasn't been consumed yet. The pending message
# is processed by the pending-message handler below (#8221/#2483).
if (
response
and interrupt_event.is_set()
and session_key in self._pending_messages
):
logger.info(
"[%s] Suppressing stale response for interrupted session %s",
self.name,
session_key,
)
response = None
if not response:
logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
if response:
@@ -1965,14 +1845,6 @@ class BasePlatformAdapter(ABC):
except Exception:
pass # Last resort — don't let error reporting crash the handler
finally:
# Fire any one-shot post-delivery callback registered for this
# session (e.g. deferred background-review notifications).
_post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
if callable(_post_cb):
try:
_post_cb()
except Exception:
pass
# Stop typing indicator
typing_task.cancel()
try:
@@ -2026,7 +1898,6 @@ class BasePlatformAdapter(ABC):
chat_topic: Optional[str] = None,
user_id_alt: Optional[str] = None,
chat_id_alt: Optional[str] = None,
is_bot: bool = False,
) -> SessionSource:
"""Helper to build a SessionSource for this platform."""
# Normalize empty topic to None
@@ -2043,7 +1914,6 @@ class BasePlatformAdapter(ABC):
chat_topic=chat_topic.strip() if chat_topic else None,
user_id_alt=user_id_alt,
chat_id_alt=chat_id_alt,
is_bot=is_bot,
)
@abstractmethod
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+3 -173
View File
@@ -1073,13 +1073,6 @@ class FeishuAdapter(BasePlatformAdapter):
self._webhook_rate_counts: Dict[str, tuple[int, float]] = {} # rate_key → (count, window_start)
self._webhook_anomaly_counts: Dict[str, tuple[int, str, float]] = {} # ip → (count, last_status, first_seen)
self._card_action_tokens: Dict[str, float] = {} # token → first_seen_time
# Inbound events that arrived before the adapter loop was ready
# (e.g. during startup/restart or network-flap reconnect). A single
# drainer thread replays them as soon as the loop becomes available.
self._pending_inbound_events: List[Any] = []
self._pending_inbound_lock = threading.Lock()
self._pending_drain_scheduled = False
self._pending_inbound_max_depth = 1000 # cap queue; drop oldest beyond
self._chat_locks: Dict[str, asyncio.Lock] = {} # chat_id → lock (per-chat serial processing)
self._sent_message_ids_to_chat: Dict[str, str] = {} # message_id → chat_id (for reaction routing)
self._sent_message_id_order: List[str] = [] # LRU order for _sent_message_ids_to_chat
@@ -1226,12 +1219,6 @@ class FeishuAdapter(BasePlatformAdapter):
.register_p2_card_action_trigger(self._on_card_action_trigger)
.register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat)
.register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat)
.register_p2_im_chat_access_event_bot_p2p_chat_entered_v1(self._on_p2p_chat_entered)
.register_p2_im_message_recalled_v1(self._on_message_recalled)
.register_p2_customized_event(
"drive.notice.comment_add_v1",
self._on_drive_comment_event,
)
.build()
)
@@ -1770,22 +1757,10 @@ class FeishuAdapter(BasePlatformAdapter):
# =========================================================================
def _on_message_event(self, data: Any) -> None:
"""Normalize Feishu inbound events into MessageEvent.
Called by the lark_oapi SDK's event dispatcher on a background thread.
If the adapter loop is not currently accepting callbacks (brief window
during startup/restart or network-flap reconnect), the event is queued
for replay instead of dropped.
"""
"""Normalize Feishu inbound events into MessageEvent."""
loop = self._loop
if not self._loop_accepts_callbacks(loop):
start_drainer = self._enqueue_pending_inbound_event(data)
if start_drainer:
threading.Thread(
target=self._drain_pending_inbound_events,
name="feishu-pending-inbound-drainer",
daemon=True,
).start()
if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
logger.warning("[Feishu] Dropping inbound message before adapter loop is ready")
return
future = asyncio.run_coroutine_threadsafe(
self._handle_message_event_data(data),
@@ -1793,124 +1768,6 @@ class FeishuAdapter(BasePlatformAdapter):
)
future.add_done_callback(self._log_background_failure)
def _enqueue_pending_inbound_event(self, data: Any) -> bool:
"""Append an event to the pending-inbound queue.
Returns True if the caller should spawn a drainer thread (no drainer
currently scheduled), False if a drainer is already running and will
pick up the new event on its next pass.
"""
with self._pending_inbound_lock:
if len(self._pending_inbound_events) >= self._pending_inbound_max_depth:
# Queue full — drop the oldest to make room. This happens only
# if the loop stays unavailable for an extended period AND the
# WS keeps firing callbacks. Still better than silent drops.
dropped = self._pending_inbound_events.pop(0)
try:
event = getattr(dropped, "event", None)
message = getattr(event, "message", None)
message_id = str(getattr(message, "message_id", "") or "unknown")
except Exception:
message_id = "unknown"
logger.error(
"[Feishu] Pending-inbound queue full (%d); dropped oldest event %s",
self._pending_inbound_max_depth,
message_id,
)
self._pending_inbound_events.append(data)
depth = len(self._pending_inbound_events)
should_start = not self._pending_drain_scheduled
if should_start:
self._pending_drain_scheduled = True
logger.warning(
"[Feishu] Queued inbound event for replay (loop not ready, queue depth=%d)",
depth,
)
return should_start
def _drain_pending_inbound_events(self) -> None:
"""Replay queued inbound events once the adapter loop is ready.
Runs in a dedicated daemon thread. Polls ``_running`` and
``_loop_accepts_callbacks`` until events can be dispatched or the
adapter shuts down. A single drainer handles the entire queue;
concurrent ``_on_message_event`` calls just append.
"""
poll_interval = 0.25
max_wait_seconds = 120.0 # safety cap: drop queue after 2 minutes
waited = 0.0
try:
while True:
if not getattr(self, "_running", True):
# Adapter shutting down — drop queued events rather than
# holding them against a closed loop.
with self._pending_inbound_lock:
dropped = len(self._pending_inbound_events)
self._pending_inbound_events.clear()
if dropped:
logger.warning(
"[Feishu] Dropped %d queued inbound event(s) during shutdown",
dropped,
)
return
loop = self._loop
if self._loop_accepts_callbacks(loop):
with self._pending_inbound_lock:
batch = self._pending_inbound_events[:]
self._pending_inbound_events.clear()
if not batch:
# Queue emptied between check and grab; done.
with self._pending_inbound_lock:
if not self._pending_inbound_events:
return
continue
dispatched = 0
requeue: List[Any] = []
for event in batch:
try:
fut = asyncio.run_coroutine_threadsafe(
self._handle_message_event_data(event),
loop,
)
fut.add_done_callback(self._log_background_failure)
dispatched += 1
except RuntimeError:
# Loop closed between check and submit — requeue
# and poll again.
requeue.append(event)
if requeue:
with self._pending_inbound_lock:
self._pending_inbound_events[:0] = requeue
if dispatched:
logger.info(
"[Feishu] Replayed %d queued inbound event(s)",
dispatched,
)
if not requeue:
# Successfully drained; check if more arrived while
# we were dispatching and exit if not.
with self._pending_inbound_lock:
if not self._pending_inbound_events:
return
# More events queued or requeue pending — loop again.
continue
if waited >= max_wait_seconds:
with self._pending_inbound_lock:
dropped = len(self._pending_inbound_events)
self._pending_inbound_events.clear()
logger.error(
"[Feishu] Adapter loop unavailable for %.0fs; "
"dropped %d queued inbound event(s)",
max_wait_seconds,
dropped,
)
return
time.sleep(poll_interval)
waited += poll_interval
finally:
with self._pending_inbound_lock:
self._pending_drain_scheduled = False
async def _handle_message_event_data(self, data: Any) -> None:
"""Shared inbound message handling for websocket and webhook transports."""
event = getattr(data, "event", None)
@@ -1963,31 +1820,6 @@ class FeishuAdapter(BasePlatformAdapter):
logger.info("[Feishu] Bot removed from chat: %s", chat_id)
self._chat_info_cache.pop(chat_id, None)
def _on_p2p_chat_entered(self, data: Any) -> None:
logger.debug("[Feishu] User entered P2P chat with bot")
def _on_message_recalled(self, data: Any) -> None:
logger.debug("[Feishu] Message recalled by user")
def _on_drive_comment_event(self, data: Any) -> None:
"""Handle drive document comment notification (drive.notice.comment_add_v1).
Delegates to :mod:`gateway.platforms.feishu_comment` for parsing,
logging, and reaction. Scheduling follows the same
``run_coroutine_threadsafe`` pattern used by ``_on_message_event``.
"""
from gateway.platforms.feishu_comment import handle_drive_comment_event
loop = self._loop
if not self._loop_accepts_callbacks(loop):
logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready")
return
future = asyncio.run_coroutine_threadsafe(
handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
loop,
)
future.add_done_callback(self._log_background_failure)
def _on_reaction_event(self, event_type: str, data: Any) -> None:
"""Route user reactions on bot messages as synthetic text events."""
event = getattr(data, "event", None)
@@ -2613,8 +2445,6 @@ class FeishuAdapter(BasePlatformAdapter):
self._on_reaction_event(event_type, data)
elif event_type == "card.action.trigger":
self._on_card_action_trigger(data)
elif event_type == "drive.notice.comment_add_v1":
self._on_drive_comment_event(data)
else:
logger.debug("[Feishu] Ignoring webhook event type: %s", event_type or "unknown")
return web.json_response({"code": 0, "msg": "ok"})
File diff suppressed because it is too large Load Diff
-429
View File
@@ -1,429 +0,0 @@
"""
Feishu document comment access-control rules.
3-tier rule resolution: exact doc > wildcard "*" > top-level > code defaults.
Each field (enabled/policy/allow_from) falls back independently.
Config: ~/.hermes/feishu_comment_rules.json (mtime-cached, hot-reload).
Pairing store: ~/.hermes/feishu_comment_pairing.json.
"""
from __future__ import annotations
import json
import logging
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
#
# Uses the canonical ``get_hermes_home()`` helper (HERMES_HOME-aware and
# profile-safe). Resolved at import time; this module is lazy-imported by
# the Feishu comment event handler, which runs long after profile overrides
# have been applied, so freezing paths here is safe.
RULES_FILE = get_hermes_home() / "feishu_comment_rules.json"
PAIRING_FILE = get_hermes_home() / "feishu_comment_pairing.json"
# ---------------------------------------------------------------------------
# Data models
# ---------------------------------------------------------------------------
_VALID_POLICIES = ("allowlist", "pairing")
@dataclass(frozen=True)
class CommentDocumentRule:
"""Per-document rule. ``None`` means 'inherit from lower tier'."""
enabled: Optional[bool] = None
policy: Optional[str] = None
allow_from: Optional[frozenset] = None
@dataclass(frozen=True)
class CommentsConfig:
"""Top-level comment access config."""
enabled: bool = True
policy: str = "pairing"
allow_from: frozenset = field(default_factory=frozenset)
documents: Dict[str, CommentDocumentRule] = field(default_factory=dict)
@dataclass(frozen=True)
class ResolvedCommentRule:
"""Fully resolved rule after field-by-field fallback."""
enabled: bool
policy: str
allow_from: frozenset
match_source: str # e.g. "exact:docx:xxx" | "wildcard" | "top" | "default"
# ---------------------------------------------------------------------------
# Mtime-cached file loading
# ---------------------------------------------------------------------------
class _MtimeCache:
"""Generic mtime-based file cache. ``stat()`` per access, re-read only on change."""
def __init__(self, path: Path):
self._path = path
self._mtime: float = 0.0
self._data: Optional[dict] = None
def load(self) -> dict:
try:
st = self._path.stat()
mtime = st.st_mtime
except FileNotFoundError:
self._mtime = 0.0
self._data = {}
return {}
if mtime == self._mtime and self._data is not None:
return self._data
try:
with open(self._path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
data = {}
except (json.JSONDecodeError, OSError):
logger.warning("[Feishu-Rules] Failed to read %s, using empty config", self._path)
data = {}
self._mtime = mtime
self._data = data
return data
_rules_cache = _MtimeCache(RULES_FILE)
_pairing_cache = _MtimeCache(PAIRING_FILE)
# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------
def _parse_frozenset(raw: Any) -> Optional[frozenset]:
"""Parse a list of strings into a frozenset; return None if key absent."""
if raw is None:
return None
if isinstance(raw, (list, tuple)):
return frozenset(str(u).strip() for u in raw if str(u).strip())
return None
def _parse_document_rule(raw: dict) -> CommentDocumentRule:
enabled = raw.get("enabled")
if enabled is not None:
enabled = bool(enabled)
policy = raw.get("policy")
if policy is not None:
policy = str(policy).strip().lower()
if policy not in _VALID_POLICIES:
policy = None
allow_from = _parse_frozenset(raw.get("allow_from"))
return CommentDocumentRule(enabled=enabled, policy=policy, allow_from=allow_from)
def load_config() -> CommentsConfig:
"""Load comment rules from disk (mtime-cached)."""
raw = _rules_cache.load()
if not raw:
return CommentsConfig()
documents: Dict[str, CommentDocumentRule] = {}
raw_docs = raw.get("documents", {})
if isinstance(raw_docs, dict):
for key, rule_raw in raw_docs.items():
if isinstance(rule_raw, dict):
documents[str(key)] = _parse_document_rule(rule_raw)
policy = str(raw.get("policy", "pairing")).strip().lower()
if policy not in _VALID_POLICIES:
policy = "pairing"
return CommentsConfig(
enabled=raw.get("enabled", True),
policy=policy,
allow_from=_parse_frozenset(raw.get("allow_from")) or frozenset(),
documents=documents,
)
# ---------------------------------------------------------------------------
# Rule resolution (§8.4 field-by-field fallback)
# ---------------------------------------------------------------------------
def has_wiki_keys(cfg: CommentsConfig) -> bool:
"""Check if any document rule key starts with 'wiki:'."""
return any(k.startswith("wiki:") for k in cfg.documents)
def resolve_rule(
cfg: CommentsConfig,
file_type: str,
file_token: str,
wiki_token: str = "",
) -> ResolvedCommentRule:
"""Resolve effective rule: exact doc → wiki key → wildcard → top-level → defaults."""
exact_key = f"{file_type}:{file_token}"
exact = cfg.documents.get(exact_key)
exact_src = f"exact:{exact_key}"
if exact is None and wiki_token:
wiki_key = f"wiki:{wiki_token}"
exact = cfg.documents.get(wiki_key)
exact_src = f"exact:{wiki_key}"
wildcard = cfg.documents.get("*")
layers = []
if exact is not None:
layers.append((exact, exact_src))
if wildcard is not None:
layers.append((wildcard, "wildcard"))
def _pick(field_name: str):
for layer, source in layers:
val = getattr(layer, field_name)
if val is not None:
return val, source
return getattr(cfg, field_name), "top"
enabled, en_src = _pick("enabled")
policy, pol_src = _pick("policy")
allow_from, _ = _pick("allow_from")
# match_source = highest-priority tier that contributed any field
priority_order = {"exact": 0, "wildcard": 1, "top": 2}
best_src = min(
[en_src, pol_src],
key=lambda s: priority_order.get(s.split(":")[0], 3),
)
return ResolvedCommentRule(
enabled=enabled,
policy=policy,
allow_from=allow_from,
match_source=best_src,
)
# ---------------------------------------------------------------------------
# Pairing store
# ---------------------------------------------------------------------------
def _load_pairing_approved() -> set:
"""Return set of approved user open_ids (mtime-cached)."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if isinstance(approved, dict):
return set(approved.keys())
if isinstance(approved, list):
return set(str(u) for u in approved if u)
return set()
def _save_pairing(data: dict) -> None:
PAIRING_FILE.parent.mkdir(parents=True, exist_ok=True)
tmp = PAIRING_FILE.with_suffix(".tmp")
with open(tmp, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
tmp.replace(PAIRING_FILE)
# Invalidate cache so next load picks up change
_pairing_cache._mtime = 0.0
_pairing_cache._data = None
def pairing_add(user_open_id: str) -> bool:
"""Add a user to the pairing-approved list. Returns True if newly added."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if not isinstance(approved, dict):
approved = {}
if user_open_id in approved:
return False
approved[user_open_id] = {"approved_at": time.time()}
data["approved"] = approved
_save_pairing(data)
return True
def pairing_remove(user_open_id: str) -> bool:
"""Remove a user from the pairing-approved list. Returns True if removed."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if not isinstance(approved, dict):
return False
if user_open_id not in approved:
return False
del approved[user_open_id]
data["approved"] = approved
_save_pairing(data)
return True
def pairing_list() -> Dict[str, Any]:
"""Return the approved dict {user_open_id: {approved_at: ...}}."""
data = _pairing_cache.load()
approved = data.get("approved", {})
return dict(approved) if isinstance(approved, dict) else {}
# ---------------------------------------------------------------------------
# Access check (public API for feishu_comment.py)
# ---------------------------------------------------------------------------
def is_user_allowed(rule: ResolvedCommentRule, user_open_id: str) -> bool:
"""Check if user passes the resolved rule's policy gate."""
if user_open_id in rule.allow_from:
return True
if rule.policy == "pairing":
return user_open_id in _load_pairing_approved()
return False
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def _print_status() -> None:
cfg = load_config()
print(f"Rules file: {RULES_FILE}")
print(f" exists: {RULES_FILE.exists()}")
print(f"Pairing file: {PAIRING_FILE}")
print(f" exists: {PAIRING_FILE.exists()}")
print()
print(f"Top-level:")
print(f" enabled: {cfg.enabled}")
print(f" policy: {cfg.policy}")
print(f" allow_from: {sorted(cfg.allow_from) if cfg.allow_from else '[]'}")
print()
if cfg.documents:
print(f"Document rules ({len(cfg.documents)}):")
for key, rule in sorted(cfg.documents.items()):
parts = []
if rule.enabled is not None:
parts.append(f"enabled={rule.enabled}")
if rule.policy is not None:
parts.append(f"policy={rule.policy}")
if rule.allow_from is not None:
parts.append(f"allow_from={sorted(rule.allow_from)}")
print(f" [{key}] {', '.join(parts) if parts else '(empty — inherits all)'}")
else:
print("Document rules: (none)")
print()
approved = pairing_list()
print(f"Pairing approved ({len(approved)}):")
for uid, meta in sorted(approved.items()):
ts = meta.get("approved_at", 0)
print(f" {uid} (approved_at={ts})")
def _do_check(doc_key: str, user_open_id: str) -> None:
cfg = load_config()
parts = doc_key.split(":", 1)
if len(parts) != 2:
print(f"Error: doc_key must be 'fileType:fileToken', got '{doc_key}'")
return
file_type, file_token = parts
rule = resolve_rule(cfg, file_type, file_token)
allowed = is_user_allowed(rule, user_open_id)
print(f"Document: {doc_key}")
print(f"User: {user_open_id}")
print(f"Resolved rule:")
print(f" enabled: {rule.enabled}")
print(f" policy: {rule.policy}")
print(f" allow_from: {sorted(rule.allow_from) if rule.allow_from else '[]'}")
print(f" match_source: {rule.match_source}")
print(f"Result: {'ALLOWED' if allowed else 'DENIED'}")
def _main() -> int:
import sys
try:
from hermes_cli.env_loader import load_hermes_dotenv
load_hermes_dotenv()
except Exception:
pass
usage = (
"Usage: python -m gateway.platforms.feishu_comment_rules <command> [args]\n"
"\n"
"Commands:\n"
" status Show rules config and pairing state\n"
" check <fileType:token> <user> Simulate access check\n"
" pairing add <user_open_id> Add user to pairing-approved list\n"
" pairing remove <user_open_id> Remove user from pairing-approved list\n"
" pairing list List pairing-approved users\n"
"\n"
f"Rules config file: {RULES_FILE}\n"
" Edit this JSON file directly to configure policies and document rules.\n"
" Changes take effect on the next comment event (no restart needed).\n"
)
args = sys.argv[1:]
if not args:
print(usage)
return 1
cmd = args[0]
if cmd == "status":
_print_status()
elif cmd == "check":
if len(args) < 3:
print("Usage: check <fileType:fileToken> <user_open_id>")
return 1
_do_check(args[1], args[2])
elif cmd == "pairing":
if len(args) < 2:
print("Usage: pairing <add|remove|list> [args]")
return 1
sub = args[1]
if sub == "add":
if len(args) < 3:
print("Usage: pairing add <user_open_id>")
return 1
if pairing_add(args[2]):
print(f"Added: {args[2]}")
else:
print(f"Already approved: {args[2]}")
elif sub == "remove":
if len(args) < 3:
print("Usage: pairing remove <user_open_id>")
return 1
if pairing_remove(args[2]):
print(f"Removed: {args[2]}")
else:
print(f"Not in approved list: {args[2]}")
elif sub == "list":
approved = pairing_list()
if not approved:
print("(no approved users)")
for uid, meta in sorted(approved.items()):
print(f" {uid} approved_at={meta.get('approved_at', '?')}")
else:
print(f"Unknown pairing subcommand: {sub}")
return 1
else:
print(f"Unknown command: {cmd}\n")
print(usage)
return 1
return 0
if __name__ == "__main__":
import sys
sys.exit(_main())
+1 -4
View File
@@ -49,10 +49,7 @@ class MessageDeduplicator:
return False
now = time.time()
if msg_id in self._seen:
if now - self._seen[msg_id] < self._ttl:
return True
# Entry has expired — remove it and treat as new
del self._seen[msg_id]
return True
self._seen[msg_id] = now
if len(self._seen) > self._max_size:
cutoff = now - self._ttl
File diff suppressed because it is too large Load Diff
-7
View File
@@ -718,12 +718,6 @@ class MattermostAdapter(BasePlatformAdapter):
thread_id=thread_id,
)
# Per-channel ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_channel_prompt = resolve_channel_prompt(
self.config.extra, channel_id, None,
)
msg_event = MessageEvent(
text=message_text,
message_type=msg_type,
@@ -732,7 +726,6 @@ class MattermostAdapter(BasePlatformAdapter):
message_id=post_id,
media_urls=media_urls if media_urls else None,
media_types=media_types if media_types else None,
channel_prompt=_channel_prompt,
)
await self.handle_message(msg_event)
File diff suppressed because it is too large Load Diff
-57
View File
@@ -1,57 +0,0 @@
"""
QQBot platform package.
Re-exports the main adapter symbols from ``adapter.py`` (the original
``qqbot.py``) so that **all existing import paths remain unchanged**::
from gateway.platforms.qqbot import QQAdapter # works
from gateway.platforms.qqbot import check_qq_requirements # works
New modules:
- ``constants`` shared constants (API URLs, timeouts, message types)
- ``utils`` User-Agent builder, config helpers
- ``crypto`` AES-256-GCM key generation and decryption
- ``onboard`` QR-code scan-to-configure flow
"""
# -- Adapter (original qqbot.py) ------------------------------------------
from .adapter import ( # noqa: F401
QQAdapter,
QQCloseError,
check_qq_requirements,
_coerce_list,
_ssrf_redirect_guard,
)
# -- Onboard (QR-code scan-to-configure) -----------------------------------
from .onboard import ( # noqa: F401
BindStatus,
create_bind_task,
poll_bind_result,
build_connect_url,
)
from .crypto import decrypt_secret, generate_bind_key # noqa: F401
# -- Utils -----------------------------------------------------------------
from .utils import build_user_agent, get_api_headers, coerce_list # noqa: F401
__all__ = [
# adapter
"QQAdapter",
"QQCloseError",
"check_qq_requirements",
"_coerce_list",
"_ssrf_redirect_guard",
# onboard
"BindStatus",
"create_bind_task",
"poll_bind_result",
"build_connect_url",
# crypto
"decrypt_secret",
"generate_bind_key",
# utils
"build_user_agent",
"get_api_headers",
"coerce_list",
]
-74
View File
@@ -1,74 +0,0 @@
"""QQBot package-level constants shared across adapter, onboard, and other modules."""
from __future__ import annotations
import os
# ---------------------------------------------------------------------------
# QQBot adapter version — bump on functional changes to the adapter package.
# ---------------------------------------------------------------------------
QQBOT_VERSION = "1.1.0"
# ---------------------------------------------------------------------------
# API endpoints
# ---------------------------------------------------------------------------
# The portal domain is configurable via QQ_API_HOST for corporate proxies
# or test environments. Default: q.qq.com (production).
PORTAL_HOST = os.getenv("QQ_PORTAL_HOST", "q.qq.com")
API_BASE = "https://api.sgroup.qq.com"
TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"
GATEWAY_URL_PATH = "/gateway"
# QR-code onboard endpoints (on the portal host)
ONBOARD_CREATE_PATH = "/lite/create_bind_task"
ONBOARD_POLL_PATH = "/lite/poll_bind_result"
QR_URL_TEMPLATE = (
"https://q.qq.com/qqbot/openclaw/connect.html"
"?task_id={task_id}&_wv=2&source=hermes"
)
# ---------------------------------------------------------------------------
# Timeouts & retry
# ---------------------------------------------------------------------------
DEFAULT_API_TIMEOUT = 30.0
FILE_UPLOAD_TIMEOUT = 120.0
CONNECT_TIMEOUT_SECONDS = 20.0
RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
MAX_RECONNECT_ATTEMPTS = 100
RATE_LIMIT_DELAY = 60 # seconds
QUICK_DISCONNECT_THRESHOLD = 5.0 # seconds
MAX_QUICK_DISCONNECT_COUNT = 3
ONBOARD_POLL_INTERVAL = 2.0 # seconds between poll_bind_result calls
ONBOARD_API_TIMEOUT = 10.0
# ---------------------------------------------------------------------------
# Message limits
# ---------------------------------------------------------------------------
MAX_MESSAGE_LENGTH = 4000
DEDUP_WINDOW_SECONDS = 300
DEDUP_MAX_SIZE = 1000
# ---------------------------------------------------------------------------
# QQ Bot message types
# ---------------------------------------------------------------------------
MSG_TYPE_TEXT = 0
MSG_TYPE_MARKDOWN = 2
MSG_TYPE_MEDIA = 7
MSG_TYPE_INPUT_NOTIFY = 6
# ---------------------------------------------------------------------------
# QQ Bot file media types
# ---------------------------------------------------------------------------
MEDIA_TYPE_IMAGE = 1
MEDIA_TYPE_VIDEO = 2
MEDIA_TYPE_VOICE = 3
MEDIA_TYPE_FILE = 4
-45
View File
@@ -1,45 +0,0 @@
"""AES-256-GCM utilities for QQBot scan-to-configure credential decryption."""
from __future__ import annotations
import base64
import os
def generate_bind_key() -> str:
"""Generate a 256-bit random AES key and return it as base64.
The key is passed to ``create_bind_task`` so the server can encrypt
the bot's *client_secret* before returning it. Only this CLI holds
the key, ensuring the secret never travels in plaintext.
"""
return base64.b64encode(os.urandom(32)).decode()
def decrypt_secret(encrypted_base64: str, key_base64: str) -> str:
"""Decrypt a base64-encoded AES-256-GCM ciphertext.
Ciphertext layout (after base64-decoding)::
IV (12 bytes) ciphertext (N bytes) AuthTag (16 bytes)
Args:
encrypted_base64: The ``bot_encrypt_secret`` value from
``poll_bind_result``.
key_base64: The base64 AES key generated by
:func:`generate_bind_key`.
Returns:
The decrypted *client_secret* as a UTF-8 string.
"""
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
key = base64.b64decode(key_base64)
raw = base64.b64decode(encrypted_base64)
iv = raw[:12]
ciphertext_with_tag = raw[12:] # AESGCM expects ciphertext + tag concatenated
aesgcm = AESGCM(key)
plaintext = aesgcm.decrypt(iv, ciphertext_with_tag, None)
return plaintext.decode("utf-8")
-124
View File
@@ -1,124 +0,0 @@
"""
QQBot scan-to-configure (QR code onboard) module.
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
generate a QR-code URL and poll for scan completion. On success the caller
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
scanner's *user_openid* — enough to fully configure the QQBot gateway.
Reference: https://bot.q.qq.com/wiki/develop/api-v2/
"""
from __future__ import annotations
import logging
from enum import IntEnum
from typing import Tuple
from urllib.parse import quote
from .constants import (
ONBOARD_API_TIMEOUT,
ONBOARD_CREATE_PATH,
ONBOARD_POLL_PATH,
PORTAL_HOST,
QR_URL_TEMPLATE,
)
from .crypto import generate_bind_key
from .utils import get_api_headers
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Bind status
# ---------------------------------------------------------------------------
class BindStatus(IntEnum):
"""Status codes returned by ``poll_bind_result``."""
NONE = 0
PENDING = 1
COMPLETED = 2
EXPIRED = 3
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def create_bind_task(
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[str, str]:
"""Create a bind task and return *(task_id, aes_key_base64)*.
The AES key is generated locally and sent to the server so it can
encrypt the bot credentials before returning them.
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
import httpx
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
key = generate_bind_key()
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"key": key}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
if data.get("retcode") != 0:
raise RuntimeError(data.get("msg", "create_bind_task failed"))
task_id = data.get("data", {}).get("task_id")
if not task_id:
raise RuntimeError("create_bind_task: missing task_id in response")
logger.debug("create_bind_task ok: task_id=%s", task_id)
return task_id, key
async def poll_bind_result(
task_id: str,
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[BindStatus, str, str, str]:
"""Poll the bind result for *task_id*.
Returns:
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
* ``bot_encrypt_secret`` is AES-256-GCM encrypted decrypt it with
:func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
key from :func:`create_bind_task`.
* ``user_openid`` is the OpenID of the person who scanned the code
(available when ``status == COMPLETED``).
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
import httpx
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
if data.get("retcode") != 0:
raise RuntimeError(data.get("msg", "poll_bind_result failed"))
d = data.get("data", {})
return (
BindStatus(d.get("status", 0)),
str(d.get("bot_appid", "")),
d.get("bot_encrypt_secret", ""),
d.get("user_openid", ""),
)
def build_connect_url(task_id: str) -> str:
"""Build the QR-code target URL for a given *task_id*."""
return QR_URL_TEMPLATE.format(task_id=quote(task_id))
-71
View File
@@ -1,71 +0,0 @@
"""QQBot shared utilities — User-Agent, HTTP helpers, config coercion."""
from __future__ import annotations
import platform
import sys
from typing import Any, Dict, List
from .constants import QQBOT_VERSION
# ---------------------------------------------------------------------------
# User-Agent
# ---------------------------------------------------------------------------
def _get_hermes_version() -> str:
"""Return the hermes-agent package version, or 'dev' if unavailable."""
try:
from importlib.metadata import version
return version("hermes-agent")
except Exception:
return "dev"
def build_user_agent() -> str:
"""Build a descriptive User-Agent string.
Format::
QQBotAdapter/<qqbot_version> (Python/<py_version>; <os>; Hermes/<hermes_version>)
Example::
QQBotAdapter/1.0.0 (Python/3.11.15; darwin; Hermes/0.9.0)
"""
py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
os_name = platform.system().lower()
hermes_version = _get_hermes_version()
return f"QQBotAdapter/{QQBOT_VERSION} (Python/{py_version}; {os_name}; Hermes/{hermes_version})"
def get_api_headers() -> Dict[str, str]:
"""Return standard HTTP headers for QQBot API requests.
Includes ``Content-Type``, ``Accept``, and a dynamic ``User-Agent``.
``q.qq.com`` requires ``Accept: application/json`` without it,
the server returns a JavaScript anti-bot challenge page.
"""
return {
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": build_user_agent(),
}
# ---------------------------------------------------------------------------
# Config helpers
# ---------------------------------------------------------------------------
def coerce_list(value: Any) -> List[str]:
"""Coerce config values into a trimmed string list.
Accepts comma-separated strings, lists, tuples, sets, or single values.
"""
if value is None:
return []
if isinstance(value, str):
return [item.strip() for item in value.split(",") if item.strip()]
if isinstance(value, (list, tuple, set)):
return [str(item).strip() for item in value if str(item).strip()]
return [str(value).strip()] if str(value).strip() else []
+3 -28
View File
@@ -366,20 +366,6 @@ class SlackAdapter(BasePlatformAdapter):
# in an assistant-enabled context. Falls back to reactions.
logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
def _dm_top_level_threads_as_sessions(self) -> bool:
"""Whether top-level Slack DMs get per-message session threads.
Defaults to ``True`` so each visible DM reply thread is isolated as its
own Hermes session matching the per-thread behavior channels already
have. Set ``platforms.slack.extra.dm_top_level_threads_as_sessions``
to ``false`` in config.yaml to revert to the legacy behavior where all
top-level DMs share one continuous session.
"""
raw = self.config.extra.get("dm_top_level_threads_as_sessions")
if raw is None:
return True # default: each DM thread is its own session
return str(raw).strip().lower() in ("1", "true", "yes", "on")
def _resolve_thread_ts(
self,
reply_to: Optional[str] = None,
@@ -1010,14 +996,10 @@ class SlackAdapter(BasePlatformAdapter):
# Build thread_ts for session keying.
# In channels: fall back to ts so each top-level @mention starts a
# new thread/session (the bot always replies in a thread).
# In DMs: fall back to ts so each top-level DM reply thread gets
# its own session key (matching channel behavior). Set
# dm_top_level_threads_as_sessions: false in config to revert to
# legacy single-session-per-DM-channel behavior.
# In DMs: only use the real thread_ts — top-level DMs should share
# one continuous session, threaded DMs get their own session.
if is_dm:
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts")
if not thread_ts and self._dm_top_level_threads_as_sessions():
thread_ts = ts
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts") # None for top-level DMs
else:
thread_ts = event.get("thread_ts") or ts # ts fallback for channels
@@ -1185,12 +1167,6 @@ class SlackAdapter(BasePlatformAdapter):
thread_id=thread_ts,
)
# Per-channel ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_channel_prompt = resolve_channel_prompt(
self.config.extra, channel_id, None,
)
msg_event = MessageEvent(
text=text,
message_type=msg_type,
@@ -1200,7 +1176,6 @@ class SlackAdapter(BasePlatformAdapter):
media_urls=media_urls,
media_types=media_types,
reply_to_message_id=thread_ts if thread_ts != ts else None,
channel_prompt=_channel_prompt,
)
# Only react when bot is directly addressed (DM or @mention).
+49 -234
View File
@@ -11,7 +11,6 @@ import asyncio
import json
import logging
import os
import html as _html
import re
from typing import Dict, List, Optional, Any
@@ -19,10 +18,6 @@ logger = logging.getLogger(__name__)
try:
from telegram import Update, Bot, Message, InlineKeyboardButton, InlineKeyboardMarkup
try:
from telegram import LinkPreviewOptions
except ImportError:
LinkPreviewOptions = None
from telegram.ext import (
Application,
CommandHandler,
@@ -41,7 +36,6 @@ except ImportError:
Message = Any
InlineKeyboardButton = Any
InlineKeyboardMarkup = Any
LinkPreviewOptions = None
Application = Any
CommandHandler = Any
CallbackQueryHandler = Any
@@ -118,84 +112,6 @@ def _strip_mdv2(text: str) -> str:
return cleaned
# ---------------------------------------------------------------------------
# Markdown table → code block conversion
# ---------------------------------------------------------------------------
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
# so pipe tables render as noisy backslash-pipe text with no alignment.
# Wrapping the table in a fenced code block makes Telegram render it as
# monospace preformatted text with columns intact.
# Matches a GFM table delimiter row: optional outer pipes, cells containing
# only dashes (with optional leading/trailing colons for alignment) separated
# by '|'. Requires at least one internal '|' so lone '---' horizontal rules
# are NOT matched.
_TABLE_SEPARATOR_RE = re.compile(
r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
)
def _is_table_row(line: str) -> bool:
"""Return True if *line* could plausibly be a table data row."""
stripped = line.strip()
return bool(stripped) and '|' in stripped
def _wrap_markdown_tables(text: str) -> str:
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
Detected by a row containing '|' immediately followed by a delimiter
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
non-blank lines are consumed as the table body and included in the
wrapped block. Tables inside existing fenced code blocks are left
alone.
"""
if '|' not in text or '-' not in text:
return text
lines = text.split('\n')
out: list[str] = []
in_fence = False
i = 0
while i < len(lines):
line = lines[i]
stripped = line.lstrip()
# Track existing fenced code blocks — never touch content inside.
if stripped.startswith('```'):
in_fence = not in_fence
out.append(line)
i += 1
continue
if in_fence:
out.append(line)
i += 1
continue
# Look for a header row (contains '|') immediately followed by a
# delimiter row.
if (
'|' in line
and i + 1 < len(lines)
and _TABLE_SEPARATOR_RE.match(lines[i + 1])
):
table_block = [line, lines[i + 1]]
j = i + 2
while j < len(lines) and _is_table_row(lines[j]):
table_block.append(lines[j])
j += 1
out.append('```')
out.extend(table_block)
out.append('```')
i = j
continue
out.append(line)
i += 1
return '\n'.join(out)
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
@@ -213,7 +129,6 @@ class TelegramAdapter(BasePlatformAdapter):
# When a chunk is near this limit, a continuation is almost certain.
_SPLIT_THRESHOLD = 4000
MEDIA_GROUP_WAIT_SECONDS = 0.8
_GENERAL_TOPIC_THREAD_ID = "1"
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.TELEGRAM)
@@ -222,7 +137,6 @@ class TelegramAdapter(BasePlatformAdapter):
self._webhook_mode: bool = False
self._mention_patterns = self._compile_mention_patterns()
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
self._disable_link_previews: bool = self._coerce_bool_extra("disable_link_previews", False)
# Buffer rapid/album photo updates so Telegram image bursts are handled
# as a single MessageEvent instead of self-interrupting multiple turns.
self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
@@ -249,38 +163,6 @@ class TelegramAdapter(BasePlatformAdapter):
# Approval button state: message_id → session_key
self._approval_state: Dict[int, str] = {}
@staticmethod
def _is_callback_user_authorized(user_id: str) -> bool:
"""Return whether a Telegram inline-button caller may perform gated actions."""
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
if not allowed_csv:
return True
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
return "*" in allowed_ids or user_id in allowed_ids
@classmethod
def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
if not metadata:
return None
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
return str(thread_id) if thread_id is not None else None
@classmethod
def _message_thread_id_for_send(cls, thread_id: Optional[str]) -> Optional[int]:
if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID:
return None
return int(thread_id)
@classmethod
def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]:
if not thread_id:
return None
return int(thread_id)
@staticmethod
def _is_thread_not_found_error(error: Exception) -> bool:
return "thread not found" in str(error).lower()
def _fallback_ips(self) -> list[str]:
"""Return validated fallback IPs from config (populated by _apply_env_overrides)."""
configured = self.config.extra.get("fallback_ips", []) if getattr(self.config, "extra", None) else []
@@ -311,26 +193,6 @@ class TelegramAdapter(BasePlatformAdapter):
pass
return isinstance(error, OSError)
def _coerce_bool_extra(self, key: str, default: bool = False) -> bool:
value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
if value is None:
return default
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in ("true", "1", "yes", "on"):
return True
if lowered in ("false", "0", "no", "off"):
return False
return default
return bool(value)
def _link_preview_kwargs(self) -> Dict[str, Any]:
if not getattr(self, "_disable_link_previews", False):
return {}
if LinkPreviewOptions is not None:
return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
return {"disable_web_page_preview": True}
async def _handle_polling_network_error(self, error: Exception) -> None:
"""Reconnect polling after a transient network interruption.
@@ -678,7 +540,7 @@ class TelegramAdapter(BasePlatformAdapter):
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
}
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
proxy_url = resolve_proxy_url()
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
fallback_ips = self._fallback_ips()
if not fallback_ips:
@@ -744,14 +606,14 @@ class TelegramAdapter(BasePlatformAdapter):
from telegram.error import NetworkError, TimedOut
except ImportError:
NetworkError = TimedOut = OSError # type: ignore[misc,assignment]
_max_connect = 8
_max_connect = 3
for _attempt in range(_max_connect):
try:
await self._app.initialize()
break
except (NetworkError, TimedOut, OSError) as init_err:
if _attempt < _max_connect - 1:
wait = min(2 ** _attempt, 15)
wait = 2 ** _attempt
logger.warning(
"[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
self.name, _attempt + 1, _max_connect, init_err, wait,
@@ -952,7 +814,7 @@ class TelegramAdapter(BasePlatformAdapter):
]
message_ids = []
thread_id = self._metadata_thread_id(metadata)
thread_id = metadata.get("thread_id") if metadata else None
try:
from telegram.error import NetworkError as _NetErr
@@ -972,7 +834,7 @@ class TelegramAdapter(BasePlatformAdapter):
for i, chunk in enumerate(chunks):
should_thread = self._should_thread_reply(reply_to, i)
reply_to_id = int(reply_to) if should_thread else None
effective_thread_id = self._message_thread_id_for_send(thread_id)
effective_thread_id = int(thread_id) if thread_id else None
msg = None
for _send_attempt in range(3):
@@ -985,7 +847,6 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=ParseMode.MARKDOWN_V2,
reply_to_message_id=reply_to_id,
message_thread_id=effective_thread_id,
**self._link_preview_kwargs(),
)
except Exception as md_error:
# Markdown parsing failed, try plain text
@@ -998,7 +859,6 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=None,
reply_to_message_id=reply_to_id,
message_thread_id=effective_thread_id,
**self._link_preview_kwargs(),
)
else:
raise
@@ -1009,7 +869,8 @@ class TelegramAdapter(BasePlatformAdapter):
# (not transient network issues). Detect and handle
# specific cases instead of blindly retrying.
if _BadReq and isinstance(send_err, _BadReq):
if self._is_thread_not_found_error(send_err) and effective_thread_id is not None:
err_lower = str(send_err).lower()
if "thread not found" in err_lower and effective_thread_id is not None:
# Thread doesn't exist — retry without
# message_thread_id so the message still
# reaches the chat.
@@ -1019,7 +880,6 @@ class TelegramAdapter(BasePlatformAdapter):
)
effective_thread_id = None
continue
err_lower = str(send_err).lower()
if "message to be replied not found" in err_lower and reply_to_id is not None:
# Original message was deleted before we
# could reply — clear reply target and retry
@@ -1186,7 +1046,6 @@ class TelegramAdapter(BasePlatformAdapter):
text=text,
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
**self._link_preview_kwargs(),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1209,13 +1068,15 @@ class TelegramAdapter(BasePlatformAdapter):
try:
cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
text = (
f"⚠️ <b>Command Approval Required</b>\n\n"
f"<pre>{_html.escape(cmd_preview)}</pre>\n\n"
f"Reason: {_html.escape(description)}"
f"⚠️ *Command Approval Required*\n\n"
f"`{cmd_preview}`\n\n"
f"Reason: {description}"
)
# Resolve thread context for thread replies
thread_id = self._metadata_thread_id(metadata)
thread_id = None
if metadata:
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
# We'll use the message_id as part of callback_data to look up session_key
# Send a placeholder first, then update — or use a counter.
@@ -1239,13 +1100,11 @@ class TelegramAdapter(BasePlatformAdapter):
kwargs: Dict[str, Any] = {
"chat_id": int(chat_id),
"text": text,
"parse_mode": ParseMode.HTML,
"parse_mode": ParseMode.MARKDOWN,
"reply_markup": keyboard,
**self._link_preview_kwargs(),
}
message_thread_id = self._message_thread_id_for_send(thread_id)
if message_thread_id is not None:
kwargs["message_thread_id"] = message_thread_id
if thread_id:
kwargs["message_thread_id"] = int(thread_id)
msg = await self._bot.send_message(**kwargs)
@@ -1313,7 +1172,6 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
message_thread_id=int(thread_id) if thread_id else None,
**self._link_preview_kwargs(),
)
# Store picker state keyed by chat_id
@@ -1582,9 +1440,12 @@ class TelegramAdapter(BasePlatformAdapter):
# Only authorized users may click approval buttons.
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to approve commands.")
return
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
if allowed_csv:
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
if "*" not in allowed_ids and caller_id not in allowed_ids:
await query.answer(text="⛔ You are not authorized to approve commands.")
return
session_key = self._approval_state.pop(approval_id, None)
if not session_key:
@@ -1629,10 +1490,6 @@ class TelegramAdapter(BasePlatformAdapter):
if not data.startswith("update_prompt:"):
return
answer = data.split(":", 1)[1] # "y" or "n"
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to answer update prompts.")
return
await query.answer(text=f"Sent '{answer}' to the update process.")
# Edit the message to show the choice and remove buttons
label = "Yes" if answer == "y" else "No"
@@ -1678,23 +1535,23 @@ class TelegramAdapter(BasePlatformAdapter):
with open(audio_path, "rb") as audio_file:
# .ogg files -> send as voice (round playable bubble)
if audio_path.endswith((".ogg", ".opus")):
_voice_thread = self._metadata_thread_id(metadata)
_voice_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_voice(
chat_id=int(chat_id),
voice=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_voice_thread),
message_thread_id=int(_voice_thread) if _voice_thread else None,
)
else:
# .mp3 and others -> send as audio file
_audio_thread = self._metadata_thread_id(metadata)
_audio_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_audio(
chat_id=int(chat_id),
audio=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_audio_thread),
message_thread_id=int(_audio_thread) if _audio_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1724,14 +1581,14 @@ class TelegramAdapter(BasePlatformAdapter):
if not os.path.exists(image_path):
return SendResult(success=False, error=f"Image file not found: {image_path}")
_thread = self._metadata_thread_id(metadata)
_thread = metadata.get("thread_id") if metadata else None
with open(image_path, "rb") as image_file:
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_thread),
message_thread_id=int(_thread) if _thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1762,7 +1619,7 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error=f"File not found: {file_path}")
display_name = file_name or os.path.basename(file_path)
_thread = self._metadata_thread_id(metadata)
_thread = metadata.get("thread_id") if metadata else None
with open(file_path, "rb") as f:
msg = await self._bot.send_document(
@@ -1771,7 +1628,7 @@ class TelegramAdapter(BasePlatformAdapter):
filename=display_name,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_thread),
message_thread_id=int(_thread) if _thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1795,14 +1652,14 @@ class TelegramAdapter(BasePlatformAdapter):
if not os.path.exists(video_path):
return SendResult(success=False, error=f"Video file not found: {video_path}")
_thread = self._metadata_thread_id(metadata)
_thread = metadata.get("thread_id") if metadata else None
with open(video_path, "rb") as f:
msg = await self._bot.send_video(
chat_id=int(chat_id),
video=f,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_thread),
message_thread_id=int(_thread) if _thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1832,13 +1689,13 @@ class TelegramAdapter(BasePlatformAdapter):
try:
# Telegram can send photos directly from URLs (up to ~5MB)
_photo_thread = self._metadata_thread_id(metadata)
_photo_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_url,
caption=caption[:1024] if caption else None, # Telegram caption limit
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_photo_thread),
message_thread_id=int(_photo_thread) if _photo_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1861,7 +1718,6 @@ class TelegramAdapter(BasePlatformAdapter):
photo=image_data,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_photo_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e2:
@@ -1887,13 +1743,13 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
_anim_thread = self._metadata_thread_id(metadata)
_anim_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_animation(
chat_id=int(chat_id),
animation=animation_url,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_anim_thread),
message_thread_id=int(_anim_thread) if _anim_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1910,23 +1766,12 @@ class TelegramAdapter(BasePlatformAdapter):
"""Send typing indicator."""
if self._bot:
try:
_typing_thread = self._metadata_thread_id(metadata)
message_thread_id = self._message_thread_id_for_typing(_typing_thread)
try:
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=message_thread_id,
)
except Exception as e:
if message_thread_id is not None and self._is_thread_not_found_error(e):
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=None,
)
else:
raise
_typing_thread = metadata.get("thread_id") if metadata else None
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=int(_typing_thread) if _typing_thread else None,
)
except Exception as e:
# Typing failures are non-fatal; log at debug level only.
logger.debug(
@@ -1994,12 +1839,6 @@ class TelegramAdapter(BasePlatformAdapter):
text = content
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
# render tables natively, but fenced code blocks render as
# monospace preformatted text with columns intact. The wrapped
# tables then flow through step (1) below as protected regions.
text = _wrap_markdown_tables(text)
# 1) Protect fenced code blocks (``` ... ```)
# Per MarkdownV2 spec, \ and ` inside pre/code must be escaped.
def _protect_fenced(m):
@@ -2326,7 +2165,7 @@ class TelegramAdapter(BasePlatformAdapter):
if not self._should_process_message(update.message):
return
event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
event = self._build_message_event(update.message, MessageType.TEXT)
event.text = self._clean_bot_trigger_text(event.text)
self._enqueue_text_event(event)
@@ -2337,7 +2176,7 @@ class TelegramAdapter(BasePlatformAdapter):
if not self._should_process_message(update.message, is_command=True):
return
event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
event = self._build_message_event(update.message, MessageType.COMMAND)
await self.handle_message(event)
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -2373,7 +2212,7 @@ class TelegramAdapter(BasePlatformAdapter):
parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
event = self._build_message_event(msg, MessageType.LOCATION)
event.text = "\n".join(parts)
await self.handle_message(event)
@@ -2524,7 +2363,7 @@ class TelegramAdapter(BasePlatformAdapter):
else:
msg_type = MessageType.DOCUMENT
event = self._build_message_event(msg, msg_type, update_id=update.update_id)
event = self._build_message_event(msg, msg_type)
# Add caption as text
if msg.caption:
@@ -2863,19 +2702,8 @@ class TelegramAdapter(BasePlatformAdapter):
self.name, cache_key, thread_id,
)
def _build_message_event(
self,
message: Message,
msg_type: MessageType,
update_id: Optional[int] = None,
) -> MessageEvent:
"""Build a MessageEvent from a Telegram message.
``update_id`` is the ``Update.update_id`` from PTB; passing it through
lets ``/restart`` record the triggering offset so the new gateway
process can advance past it (prevents ``/restart`` being re-delivered
when PTB's graceful-shutdown ACK fails).
"""
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
"""Build a MessageEvent from a Telegram message."""
chat = message.chat
user = message.from_user
@@ -2888,9 +2716,7 @@ class TelegramAdapter(BasePlatformAdapter):
# Resolve DM topic name and skill binding
thread_id_raw = message.message_thread_id
thread_id_str = str(thread_id_raw) if thread_id_raw is not None else None
if chat_type == "group" and thread_id_str is None and getattr(chat, "is_forum", False):
thread_id_str = self._GENERAL_TOPIC_THREAD_ID
thread_id_str = str(thread_id_raw) if thread_id_raw else None
chat_topic = None
topic_skill = None
@@ -2939,26 +2765,15 @@ class TelegramAdapter(BasePlatformAdapter):
reply_to_id = str(message.reply_to_message.message_id)
reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None
# Per-channel/topic ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_chat_id_str = str(chat.id)
_channel_prompt = resolve_channel_prompt(
self.config.extra,
thread_id_str or _chat_id_str,
_chat_id_str if thread_id_str else None,
)
return MessageEvent(
text=message.text or "",
message_type=msg_type,
source=source,
raw_message=message,
message_id=str(message.message_id),
platform_update_id=update_id,
reply_to_message_id=reply_to_id,
reply_to_text=reply_to_text,
auto_skill=topic_skill,
channel_prompt=_channel_prompt,
timestamp=message.date,
)
+1 -1
View File
@@ -46,7 +46,7 @@ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
def _resolve_proxy_url() -> str | None:
# Delegate to shared implementation (env vars + macOS system proxy detection)
from gateway.platforms.base import resolve_proxy_url
return resolve_proxy_url("TELEGRAM_PROXY")
return resolve_proxy_url()
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
+10 -41
View File
@@ -180,8 +180,6 @@ class WeComAdapter(BasePlatformAdapter):
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
self._pending_text_batches: Dict[str, MessageEvent] = {}
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
self._device_id = uuid.uuid4().hex
self._last_chat_req_ids: Dict[str, str] = {}
# ------------------------------------------------------------------
# Connection lifecycle
@@ -279,11 +277,7 @@ class WeComAdapter(BasePlatformAdapter):
{
"cmd": APP_CMD_SUBSCRIBE,
"headers": {"req_id": req_id},
"body": {
"bot_id": self._bot_id,
"secret": self._secret,
"device_id": self._device_id,
},
"body": {"bot_id": self._bot_id, "secret": self._secret},
}
)
@@ -502,11 +496,6 @@ class WeComAdapter(BasePlatformAdapter):
logger.debug("[%s] DM sender %s blocked by policy", self.name, sender_id)
return
# Cache the inbound req_id after policy checks so proactive sends to
# this chat can fall back to APP_CMD_RESPONSE (required for groups —
# WeCom AI Bots cannot initiate APP_CMD_SEND in group chats).
self._remember_chat_req_id(chat_id, self._payload_req_id(payload))
text, reply_text = self._extract_text(body)
media_urls, media_types = await self._extract_media(body)
message_type = self._derive_message_type(body, text, media_types)
@@ -858,23 +847,6 @@ class WeComAdapter(BasePlatformAdapter):
while len(self._reply_req_ids) > DEDUP_MAX_SIZE:
self._reply_req_ids.pop(next(iter(self._reply_req_ids)))
def _remember_chat_req_id(self, chat_id: str, req_id: str) -> None:
"""Cache the most recent inbound req_id per chat.
Used as a fallback reply target when we need to send into a group
without an explicit ``reply_to`` WeCom AI Bots are blocked from
APP_CMD_SEND in groups and must use APP_CMD_RESPONSE bound to some
prior req_id. Bounded like _reply_req_ids so long-running gateways
don't leak memory across many chats.
"""
normalized_chat_id = str(chat_id or "").strip()
normalized_req_id = str(req_id or "").strip()
if not normalized_chat_id or not normalized_req_id:
return
self._last_chat_req_ids[normalized_chat_id] = normalized_req_id
while len(self._last_chat_req_ids) > DEDUP_MAX_SIZE:
self._last_chat_req_ids.pop(next(iter(self._last_chat_req_ids)))
def _reply_req_id_for_message(self, reply_to: Optional[str]) -> Optional[str]:
normalized = str(reply_to or "").strip()
if not normalized or normalized.startswith("quote:"):
@@ -1191,15 +1163,19 @@ class WeComAdapter(BasePlatformAdapter):
self._raise_for_wecom_error(response, "send media message")
return response
async def _send_reply_markdown(self, reply_req_id: str, content: str) -> Dict[str, Any]:
async def _send_reply_stream(self, reply_req_id: str, content: str) -> Dict[str, Any]:
response = await self._send_reply_request(
reply_req_id,
{
"msgtype": "markdown",
"markdown": {"content": content[:self.MAX_MESSAGE_LENGTH]},
"msgtype": "stream",
"stream": {
"id": self._new_req_id("stream"),
"finish": True,
"content": content[:self.MAX_MESSAGE_LENGTH],
},
},
)
self._raise_for_wecom_error(response, "send reply markdown")
self._raise_for_wecom_error(response, "send reply stream")
return response
async def _send_reply_media_message(
@@ -1259,9 +1235,6 @@ class WeComAdapter(BasePlatformAdapter):
return SendResult(success=False, error=prepared["reject_reason"])
reply_req_id = self._reply_req_id_for_message(reply_to)
if not reply_req_id and chat_id in self._last_chat_req_ids:
reply_req_id = self._last_chat_req_ids[chat_id]
try:
upload_result = await self._upload_media_bytes(
prepared["data"],
@@ -1329,12 +1302,8 @@ class WeComAdapter(BasePlatformAdapter):
try:
reply_req_id = self._reply_req_id_for_message(reply_to)
if not reply_req_id and chat_id in self._last_chat_req_ids:
reply_req_id = self._last_chat_req_ids[chat_id]
if reply_req_id:
response = await self._send_reply_markdown(reply_req_id, content)
response = await self._send_reply_stream(reply_req_id, content)
else:
response = await self._send_request(
APP_CMD_SEND,
-14
View File
@@ -258,20 +258,6 @@ class WecomCallbackAdapter(BasePlatformAdapter):
)
event = self._build_event(app, decrypted)
if event is not None:
# Deduplicate: WeCom retries callbacks on timeout,
# producing duplicate inbound messages (#10305).
if event.message_id:
now = time.time()
if event.message_id in self._seen_messages:
if now - self._seen_messages[event.message_id] < MESSAGE_DEDUP_TTL_SECONDS:
logger.debug("[WecomCallback] Duplicate MsgId %s, skipping", event.message_id)
return web.Response(text="success", content_type="text/plain")
del self._seen_messages[event.message_id]
self._seen_messages[event.message_id] = now
# Prune expired entries when cache grows large
if len(self._seen_messages) > 2000:
cutoff = now - MESSAGE_DEDUP_TTL_SECONDS
self._seen_messages = {k: v for k, v in self._seen_messages.items() if v > cutoff}
# Record which app this user belongs to.
if event.source and event.source.user_id:
map_key = self._user_app_key(
+86 -310
View File
@@ -28,7 +28,7 @@ import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import quote, urlparse
from urllib.parse import quote
logger = logging.getLogger(__name__)
@@ -96,28 +96,6 @@ MEDIA_VIDEO = 2
MEDIA_FILE = 3
MEDIA_VOICE = 4
_LIVE_ADAPTERS: Dict[str, Any] = {}
def _make_ssl_connector() -> Optional["aiohttp.TCPConnector"]:
"""Return a TCPConnector with a certifi CA bundle, or None if certifi is unavailable.
Tencent's iLink server (``ilinkai.weixin.qq.com``) is not verifiable against
some system CA stores (notably Homebrew's OpenSSL on macOS Apple Silicon).
When ``certifi`` is installed, use its Mozilla CA bundle to guarantee
verification. Otherwise fall back to aiohttp's default (which honors
``SSL_CERT_FILE`` env var via ``trust_env=True``).
"""
try:
import ssl
import certifi
except ImportError:
return None
if not AIOHTTP_AVAILABLE:
return None
ssl_ctx = ssl.create_default_context(cafile=certifi.where())
return aiohttp.TCPConnector(ssl=ssl_ctx)
ITEM_TEXT = 1
ITEM_IMAGE = 2
ITEM_VOICE = 3
@@ -420,12 +398,7 @@ async def _send_message(
text: str,
context_token: Optional[str],
client_id: str,
) -> Dict[str, Any]:
"""Send a text message via iLink sendmessage API.
Returns the raw API response dict (may contain error codes like
``errcode: -14`` for session expiry that the caller can inspect).
"""
) -> None:
if not text or not text.strip():
raise ValueError("_send_message: text must not be empty")
message: Dict[str, Any] = {
@@ -438,7 +411,7 @@ async def _send_message(
}
if context_token:
message["context_token"] = context_token
return await _api_post(
await _api_post(
session,
base_url=base_url,
endpoint=EP_SEND_MESSAGE,
@@ -560,39 +533,6 @@ async def _download_bytes(
return await response.read()
_WEIXIN_CDN_ALLOWLIST: frozenset[str] = frozenset(
{
"novac2c.cdn.weixin.qq.com",
"ilinkai.weixin.qq.com",
"wx.qlogo.cn",
"thirdwx.qlogo.cn",
"res.wx.qq.com",
"mmbiz.qpic.cn",
"mmbiz.qlogo.cn",
}
)
def _assert_weixin_cdn_url(url: str) -> None:
"""Raise ValueError if *url* does not point at a known WeChat CDN host."""
try:
parsed = urlparse(url)
scheme = parsed.scheme.lower()
host = parsed.hostname or ""
except Exception as exc: # noqa: BLE001
raise ValueError(f"Unparseable media URL: {url!r}") from exc
if scheme not in ("http", "https"):
raise ValueError(
f"Media URL has disallowed scheme {scheme!r}; only http/https are permitted."
)
if host not in _WEIXIN_CDN_ALLOWLIST:
raise ValueError(
f"Media URL host {host!r} is not in the WeChat CDN allowlist. "
"Refusing to fetch to prevent SSRF."
)
def _media_reference(item: Dict[str, Any], key: str) -> Dict[str, Any]:
return (item.get(key) or {}).get("media") or {}
@@ -613,7 +553,6 @@ async def _download_and_decrypt_media(
timeout_seconds=timeout_seconds,
)
elif full_url:
_assert_weixin_cdn_url(full_url)
raw = await _download_bytes(session, url=full_url, timeout_seconds=timeout_seconds)
else:
raise RuntimeError("media item had neither encrypt_query_param nor full_url")
@@ -684,31 +623,42 @@ def _rewrite_table_block_for_weixin(lines: List[str]) -> str:
def _normalize_markdown_blocks(content: str) -> str:
lines = content.splitlines()
result: List[str] = []
i = 0
in_code_block = False
blank_run = 0
for raw_line in lines:
line = raw_line.rstrip()
if _FENCE_RE.match(line.strip()):
while i < len(lines):
line = lines[i].rstrip()
fence_match = _FENCE_RE.match(line.strip())
if fence_match:
in_code_block = not in_code_block
result.append(line)
blank_run = 0
i += 1
continue
if in_code_block:
result.append(line)
i += 1
continue
if not line.strip():
blank_run += 1
if blank_run <= 1:
result.append("")
if (
i + 1 < len(lines)
and "|" in lines[i]
and _TABLE_RULE_RE.match(lines[i + 1].rstrip())
):
table_lines = [lines[i].rstrip(), lines[i + 1].rstrip()]
i += 2
while i < len(lines) and "|" in lines[i]:
table_lines.append(lines[i].rstrip())
i += 1
result.append(_rewrite_table_block_for_weixin(table_lines))
continue
blank_run = 0
result.append(line)
result.append(_MARKDOWN_LINK_RE.sub(r"\1 (\2)", _rewrite_headers_for_weixin(line)))
i += 1
return "\n".join(result).strip()
normalized = "\n".join(item.rstrip() for item in result)
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
return normalized.strip()
def _split_markdown_blocks(content: str) -> List[str]:
@@ -754,8 +704,8 @@ def _split_delivery_units_for_weixin(content: str) -> List[str]:
Weixin can render Markdown, but chat readability is better when top-level
line breaks become separate messages. Keep fenced code blocks intact and
attach indented continuation lines to the previous top-level line so nested
list items do not get torn apart.
attach indented continuation lines to the previous top-level line so
transformed tables/lists do not get torn apart.
"""
units: List[str] = []
@@ -797,9 +747,7 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool:
return False
if line.startswith((" ", "\t")):
return False
if stripped.startswith((">", "-", "*", "", "#", "|")):
return False
if _TABLE_RULE_RE.match(stripped):
if stripped.startswith((">", "-", "*", "")):
return False
if re.match(r"^\*\*[^*]+\*\*$", stripped):
return False
@@ -809,12 +757,10 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool:
def _looks_like_heading_line_for_weixin(line: str) -> bool:
"""Return True when a short line behaves like a heading."""
"""Return True when a short line behaves like a plain-text heading."""
stripped = line.strip()
if not stripped:
return False
if _HEADER_RE.match(stripped):
return True
return len(stripped) <= 24 and stripped.endswith((":", ""))
@@ -989,7 +935,7 @@ async def qr_login(
if not AIOHTTP_AVAILABLE:
raise RuntimeError("aiohttp is required for Weixin QR login")
async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session:
async with aiohttp.ClientSession(trust_env=True) as session:
try:
qr_resp = await _api_get(
session,
@@ -1007,10 +953,6 @@ async def qr_login(
logger.error("weixin: QR response missing qrcode")
return None
# qrcode_url is the full scannable liteapp URL; qrcode_value is just the hex token
# WeChat needs to scan the full URL, not the raw hex string
qr_scan_data = qrcode_url if qrcode_url else qrcode_value
print("\n请使用微信扫描以下二维码:")
if qrcode_url:
print(qrcode_url)
@@ -1018,11 +960,11 @@ async def qr_login(
import qrcode
qr = qrcode.QRCode()
qr.add_data(qr_scan_data)
qr.add_data(qrcode_url or qrcode_value)
qr.make(fit=True)
qr.print_ascii(invert=True)
except Exception as _qr_exc:
print(f"(终端二维码渲染失败: {_qr_exc},请直接打开上面的二维码链接)")
except Exception:
print("(终端二维码渲染失败,请直接打开上面的二维码链接)")
deadline = time.time() + timeout_seconds
current_base_url = ILINK_BASE_URL
@@ -1068,17 +1010,8 @@ async def qr_login(
)
qrcode_value = str(qr_resp.get("qrcode") or "")
qrcode_url = str(qr_resp.get("qrcode_img_content") or "")
qr_scan_data = qrcode_url if qrcode_url else qrcode_value
if qrcode_url:
print(qrcode_url)
try:
import qrcode as _qrcode
qr = _qrcode.QRCode()
qr.add_data(qr_scan_data)
qr.make(fit=True)
qr.print_ascii(invert=True)
except Exception:
pass
except Exception as exc:
logger.error("weixin: QR refresh failed: %s", exc)
return None
@@ -1126,8 +1059,7 @@ class WeixinAdapter(BasePlatformAdapter):
self._hermes_home = hermes_home
self._token_store = ContextTokenStore(hermes_home)
self._typing_cache = TypingTicketCache()
self._poll_session: Optional[aiohttp.ClientSession] = None
self._send_session: Optional[aiohttp.ClientSession] = None
self._session: Optional[aiohttp.ClientSession] = None
self._poll_task: Optional[asyncio.Task] = None
self._dedup = MessageDeduplicator(ttl_seconds=MESSAGE_DEDUP_TTL_SECONDS)
@@ -1202,17 +1134,14 @@ class WeixinAdapter(BasePlatformAdapter):
except Exception as exc:
logger.debug("[%s] Token lock unavailable (non-fatal): %s", self.name, exc)
self._poll_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector())
self._send_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector())
self._session = aiohttp.ClientSession(trust_env=True)
self._token_store.restore(self._account_id)
self._poll_task = asyncio.create_task(self._poll_loop(), name="weixin-poll")
self._mark_connected()
_LIVE_ADAPTERS[self._token] = self
logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
return True
async def disconnect(self) -> None:
_LIVE_ADAPTERS.pop(self._token, None)
self._running = False
if self._poll_task and not self._poll_task.done():
self._poll_task.cancel()
@@ -1221,18 +1150,15 @@ class WeixinAdapter(BasePlatformAdapter):
except asyncio.CancelledError:
pass
self._poll_task = None
if self._poll_session and not self._poll_session.closed:
await self._poll_session.close()
self._poll_session = None
if self._send_session and not self._send_session.closed:
await self._send_session.close()
self._send_session = None
if self._session and not self._session.closed:
await self._session.close()
self._session = None
self._release_platform_lock()
self._mark_disconnected()
logger.info("[%s] Disconnected", self.name)
async def _poll_loop(self) -> None:
assert self._poll_session is not None
assert self._session is not None
sync_buf = _load_sync_buf(self._hermes_home, self._account_id)
timeout_ms = LONG_POLL_TIMEOUT_MS
consecutive_failures = 0
@@ -1240,7 +1166,7 @@ class WeixinAdapter(BasePlatformAdapter):
while self._running:
try:
response = await _get_updates(
self._poll_session,
self._session,
base_url=self._base_url,
token=self._token,
sync_buf=sync_buf,
@@ -1297,7 +1223,7 @@ class WeixinAdapter(BasePlatformAdapter):
logger.error("[%s] unhandled inbound error from=%s: %s", self.name, _safe_id(message.get("from_user_id")), exc, exc_info=True)
async def _process_message(self, message: Dict[str, Any]) -> None:
assert self._poll_session is not None
assert self._session is not None
sender_id = str(message.get("from_user_id") or "").strip()
if not sender_id:
return
@@ -1390,7 +1316,7 @@ class WeixinAdapter(BasePlatformAdapter):
media = _media_reference(item, "image_item")
try:
data = await _download_and_decrypt_media(
self._poll_session,
self._session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=(item.get("image_item") or {}).get("aeskey")
@@ -1408,7 +1334,7 @@ class WeixinAdapter(BasePlatformAdapter):
media = _media_reference(item, "video_item")
try:
data = await _download_and_decrypt_media(
self._poll_session,
self._session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@@ -1427,7 +1353,7 @@ class WeixinAdapter(BasePlatformAdapter):
mime = _mime_from_filename(filename)
try:
data = await _download_and_decrypt_media(
self._poll_session,
self._session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@@ -1446,7 +1372,7 @@ class WeixinAdapter(BasePlatformAdapter):
return None
try:
data = await _download_and_decrypt_media(
self._poll_session,
self._session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@@ -1459,13 +1385,13 @@ class WeixinAdapter(BasePlatformAdapter):
return None
async def _maybe_fetch_typing_ticket(self, user_id: str, context_token: Optional[str]) -> None:
if not self._poll_session or not self._token:
if not self._session or not self._token:
return
if self._typing_cache.get(user_id):
return
try:
response = await _get_config(
self._poll_session,
self._session,
base_url=self._base_url,
token=self._token,
user_id=user_id,
@@ -1490,19 +1416,12 @@ class WeixinAdapter(BasePlatformAdapter):
context_token: Optional[str],
client_id: str,
) -> None:
"""Send a single text chunk with per-chunk retry and backoff.
On session-expired errors (errcode -14), automatically retries
*without* ``context_token`` iLink accepts tokenless sends as a
degraded fallback, which keeps cron-initiated push messages working
even when no user message has refreshed the session recently.
"""
"""Send a single text chunk with per-chunk retry and backoff."""
last_error: Optional[Exception] = None
retried_without_token = False
for attempt in range(self._send_chunk_retries + 1):
try:
resp = await _send_message(
self._send_session,
await _send_message(
self._session,
base_url=self._base_url,
token=self._token,
to=chat_id,
@@ -1510,31 +1429,6 @@ class WeixinAdapter(BasePlatformAdapter):
context_token=context_token,
client_id=client_id,
)
# Check iLink response for session-expired error
if resp and isinstance(resp, dict):
ret = resp.get("ret")
errcode = resp.get("errcode")
if (ret is not None and ret not in (0,)) or (errcode is not None and errcode not in (0,)):
is_session_expired = (
ret == SESSION_EXPIRED_ERRCODE
or errcode == SESSION_EXPIRED_ERRCODE
)
# Session expired — strip token and retry once
if is_session_expired and not retried_without_token and context_token:
retried_without_token = True
context_token = None
self._token_store._cache.pop(
self._token_store._key(self._account_id, chat_id), None
)
logger.warning(
"[%s] session expired for %s; retrying without context_token",
self.name, _safe_id(chat_id),
)
continue
errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
raise RuntimeError(
f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
)
return
except Exception as exc:
last_error = exc
@@ -1562,48 +1456,12 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
if not self._send_session or not self._token:
if not self._session or not self._token:
return SendResult(success=False, error="Not connected")
context_token = self._token_store.get(self._account_id, chat_id)
last_message_id: Optional[str] = None
# Extract MEDIA: tags and bare local file paths before text delivery.
media_files, cleaned_content = self.extract_media(content)
_, image_cleaned = self.extract_images(cleaned_content)
local_files, final_content = self.extract_local_files(image_cleaned)
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
async def _deliver_media(path: str, is_voice: bool = False) -> None:
ext = Path(path).suffix.lower()
if is_voice or ext in _AUDIO_EXTS:
await self.send_voice(chat_id=chat_id, audio_path=path, metadata=metadata)
elif ext in _VIDEO_EXTS:
await self.send_video(chat_id=chat_id, video_path=path, metadata=metadata)
elif ext in _IMAGE_EXTS:
await self.send_image_file(chat_id=chat_id, image_path=path, metadata=metadata)
else:
await self.send_document(chat_id=chat_id, file_path=path, metadata=metadata)
try:
# Deliver extracted MEDIA: attachments first.
for media_path, is_voice in media_files:
try:
await _deliver_media(media_path, is_voice)
except Exception as exc:
logger.warning("[%s] media delivery failed for %s: %s", self.name, media_path, exc)
# Deliver bare local file paths.
for file_path in local_files:
try:
await _deliver_media(file_path, is_voice=False)
except Exception as exc:
logger.warning("[%s] local file delivery failed for %s: %s", self.name, file_path, exc)
# Deliver text content.
chunks = [c for c in self._split_text(self.format_message(final_content)) if c and c.strip()]
chunks = [c for c in self._split_text(self.format_message(content)) if c and c.strip()]
for idx, chunk in enumerate(chunks):
client_id = f"hermes-weixin-{uuid.uuid4().hex}"
await self._send_text_chunk(
@@ -1621,14 +1479,14 @@ class WeixinAdapter(BasePlatformAdapter):
return SendResult(success=False, error=str(exc))
async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None:
if not self._send_session or not self._token:
if not self._session or not self._token:
return
typing_ticket = self._typing_cache.get(chat_id)
if not typing_ticket:
return
try:
await _send_typing(
self._send_session,
self._session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@@ -1639,14 +1497,14 @@ class WeixinAdapter(BasePlatformAdapter):
logger.debug("[%s] typing start failed for %s: %s", self.name, _safe_id(chat_id), exc)
async def stop_typing(self, chat_id: str) -> None:
if not self._send_session or not self._token:
if not self._session or not self._token:
return
typing_ticket = self._typing_cache.get(chat_id)
if not typing_ticket:
return
try:
await _send_typing(
self._send_session,
self._session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@@ -1684,35 +1542,24 @@ class WeixinAdapter(BasePlatformAdapter):
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
path: str,
caption: str = "",
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
del reply_to, kwargs
return await self.send_document(
chat_id=chat_id,
file_path=image_path,
caption=caption,
metadata=metadata,
)
return await self.send_document(chat_id, file_path=path, caption=caption, metadata=metadata)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
caption: str = "",
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
del file_name, reply_to, metadata, kwargs
if not self._send_session or not self._token:
if not self._session or not self._token:
return SendResult(success=False, error="Not connected")
try:
message_id = await self._send_file(chat_id, file_path, caption or "")
message_id = await self._send_file(chat_id, file_path, caption)
return SendResult(success=True, message_id=message_id)
except Exception as exc:
logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc)
@@ -1726,7 +1573,7 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
if not self._send_session or not self._token:
if not self._session or not self._token:
return SendResult(success=False, error="Not connected")
try:
message_id = await self._send_file(chat_id, video_path, caption or "")
@@ -1743,24 +1590,7 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
if not self._send_session or not self._token:
return SendResult(success=False, error="Not connected")
# Native outbound Weixin voice bubbles are not proven-working in the
# upstream reference implementation. Prefer a reliable file attachment
# fallback so users at least receive playable audio, even for .silk.
fallback_caption = caption or "[voice message as attachment]"
try:
message_id = await self._send_file(
chat_id,
audio_path,
fallback_caption,
force_file_attachment=True,
)
return SendResult(success=True, message_id=message_id)
except Exception as exc:
logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc)
return SendResult(success=False, error=str(exc))
return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
async def _download_remote_media(self, url: str) -> str:
from tools.url_safety import is_safe_url
@@ -1768,8 +1598,8 @@ class WeixinAdapter(BasePlatformAdapter):
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {url}")
assert self._send_session is not None
async with self._send_session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
assert self._session is not None
async with self._session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
response.raise_for_status()
data = await response.read()
suffix = Path(url.split("?", 1)[0]).suffix or ".bin"
@@ -1777,22 +1607,16 @@ class WeixinAdapter(BasePlatformAdapter):
handle.write(data)
return handle.name
async def _send_file(
self,
chat_id: str,
path: str,
caption: str,
force_file_attachment: bool = False,
) -> str:
assert self._send_session is not None and self._token is not None
async def _send_file(self, chat_id: str, path: str, caption: str) -> str:
assert self._session is not None and self._token is not None
plaintext = Path(path).read_bytes()
media_type, item_builder = self._outbound_media_builder(path, force_file_attachment=force_file_attachment)
media_type, item_builder = self._outbound_media_builder(path)
filekey = secrets.token_hex(16)
aes_key = secrets.token_bytes(16)
rawsize = len(plaintext)
rawfilemd5 = hashlib.md5(plaintext).hexdigest()
upload_response = await _get_upload_url(
self._send_session,
self._session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@@ -1818,34 +1642,30 @@ class WeixinAdapter(BasePlatformAdapter):
raise RuntimeError(f"getUploadUrl returned neither upload_param nor upload_full_url: {upload_response}")
encrypted_query_param = await _upload_ciphertext(
self._send_session,
self._session,
ciphertext=ciphertext,
upload_url=upload_url,
)
context_token = self._token_store.get(self._account_id, chat_id)
# The iLink API expects aes_key as base64(hex_string), not base64(raw_bytes).
# Sending base64(raw_bytes) causes images to show as grey boxes on the
# receiver side because the decryption key doesn't match.
aes_key_for_api = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii")
item_kwargs = {
"encrypt_query_param": encrypted_query_param,
"aes_key_for_api": aes_key_for_api,
"ciphertext_size": len(ciphertext),
"plaintext_size": rawsize,
"filename": Path(path).name,
"rawfilemd5": rawfilemd5,
}
if media_type == MEDIA_VOICE and path.endswith(".silk"):
item_kwargs["encode_type"] = 6
item_kwargs["sample_rate"] = 24000
item_kwargs["bits_per_sample"] = 16
media_item = item_builder(**item_kwargs)
media_item = item_builder(
encrypt_query_param=encrypted_query_param,
aes_key_for_api=aes_key_for_api,
ciphertext_size=len(ciphertext),
plaintext_size=rawsize,
filename=Path(path).name,
rawfilemd5=rawfilemd5,
)
last_message_id = None
if caption:
last_message_id = f"hermes-weixin-{uuid.uuid4().hex}"
await _send_message(
self._send_session,
self._session,
base_url=self._base_url,
token=self._token,
to=chat_id,
@@ -1856,7 +1676,7 @@ class WeixinAdapter(BasePlatformAdapter):
last_message_id = f"hermes-weixin-{uuid.uuid4().hex}"
await _api_post(
self._send_session,
self._session,
base_url=self._base_url,
endpoint=EP_SEND_MESSAGE,
payload={
@@ -1875,7 +1695,7 @@ class WeixinAdapter(BasePlatformAdapter):
)
return last_message_id
def _outbound_media_builder(self, path: str, force_file_attachment: bool = False):
def _outbound_media_builder(self, path: str):
mime = mimetypes.guess_type(path)[0] or "application/octet-stream"
if mime.startswith("image/"):
return MEDIA_IMAGE, lambda **kw: {
@@ -1903,7 +1723,7 @@ class WeixinAdapter(BasePlatformAdapter):
"video_md5": kw.get("rawfilemd5", ""),
},
}
if path.endswith(".silk") and not force_file_attachment:
if mime.startswith("audio/") or path.endswith(".silk"):
return MEDIA_VOICE, lambda **kw: {
"type": ITEM_VOICE,
"voice_item": {
@@ -1912,25 +1732,9 @@ class WeixinAdapter(BasePlatformAdapter):
"aes_key": kw["aes_key_for_api"],
"encrypt_type": 1,
},
"encode_type": kw.get("encode_type"),
"bits_per_sample": kw.get("bits_per_sample"),
"sample_rate": kw.get("sample_rate"),
"playtime": kw.get("playtime", 0),
},
}
if mime.startswith("audio/"):
return MEDIA_FILE, lambda **kw: {
"type": ITEM_FILE,
"file_item": {
"media": {
"encrypt_query_param": kw["encrypt_query_param"],
"aes_key": kw["aes_key_for_api"],
"encrypt_type": 1,
},
"file_name": kw["filename"],
"len": str(kw["plaintext_size"]),
},
}
return MEDIA_FILE, lambda **kw: {
"type": ITEM_FILE,
"file_item": {
@@ -1980,34 +1784,7 @@ async def send_weixin_direct(
token_store.restore(account_id)
context_token = token_store.get(account_id, chat_id)
live_adapter = _LIVE_ADAPTERS.get(resolved_token)
send_session = getattr(live_adapter, '_send_session', None)
if live_adapter is not None and send_session is not None and not send_session.closed:
last_result: Optional[SendResult] = None
cleaned = live_adapter.format_message(message)
if cleaned:
last_result = await live_adapter.send(chat_id, cleaned)
if not last_result.success:
return {"error": f"Weixin send failed: {last_result.error}"}
for media_path, _is_voice in media_files or []:
ext = Path(media_path).suffix.lower()
if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}:
last_result = await live_adapter.send_image_file(chat_id, media_path)
else:
last_result = await live_adapter.send_document(chat_id, media_path)
if not last_result.success:
return {"error": f"Weixin media send failed: {last_result.error}"}
return {
"success": True,
"platform": "weixin",
"chat_id": chat_id,
"message_id": last_result.message_id if last_result else None,
"context_token_used": bool(context_token),
}
async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session:
async with aiohttp.ClientSession(trust_env=True) as session:
adapter = WeixinAdapter(
PlatformConfig(
enabled=True,
@@ -2020,7 +1797,6 @@ async def send_weixin_direct(
},
)
)
adapter._send_session = session
adapter._session = session
adapter._token = resolved_token
adapter._account_id = account_id
+361 -1352
View File
File diff suppressed because it is too large Load Diff
+2 -58
View File
@@ -82,7 +82,6 @@ class SessionSource:
chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack)
user_id_alt: Optional[str] = None # Signal UUID (alternative to phone number)
chat_id_alt: Optional[str] = None # Signal group internal ID
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
@property
def description(self) -> str:
@@ -302,8 +301,6 @@ def build_session_context_prompt(
lines.append("")
lines.append("**Delivery options for scheduled tasks:**")
from hermes_constants import display_hermes_home
# Origin delivery
if context.source.platform == Platform.LOCAL:
lines.append("- `\"origin\"` → Local output (saved to files)")
@@ -312,11 +309,9 @@ def build_session_context_prompt(
_hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
)
lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
# Local always available
lines.append(
f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
)
lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
# Platform home channels
for platform, home in context.home_channels.items():
@@ -802,57 +797,6 @@ class SessionStore:
return True
return False
def prune_old_entries(self, max_age_days: int) -> int:
"""Drop SessionEntry records older than max_age_days.
Pruning is based on ``updated_at`` (last activity), not ``created_at``.
A session that's been active within the window is kept regardless of
how old it is. Entries marked ``suspended`` are kept the user
explicitly paused them for later resume. Entries held by an active
process (via has_active_processes_fn) are also kept so long-running
background work isn't orphaned.
Pruning is functionally identical to a natural reset-policy expiry:
the transcript in SQLite stays, but the session_key session_id
mapping is dropped and the user starts a fresh session on return.
``max_age_days <= 0`` disables pruning; returns 0 immediately.
Returns the number of entries removed.
"""
if max_age_days is None or max_age_days <= 0:
return 0
from datetime import timedelta
cutoff = _now() - timedelta(days=max_age_days)
removed_keys: list[str] = []
with self._lock:
self._ensure_loaded_locked()
for key, entry in list(self._entries.items()):
if entry.suspended:
continue
# Never prune sessions with an active background process
# attached — the user may still be waiting on output.
if self._has_active_processes_fn is not None:
try:
if self._has_active_processes_fn(entry.session_id):
continue
except Exception:
pass
if entry.updated_at < cutoff:
removed_keys.append(key)
for key in removed_keys:
self._entries.pop(key, None)
if removed_keys:
self._save()
if removed_keys:
logger.info(
"SessionStore pruned %d entries older than %d days",
len(removed_keys), max_age_days,
)
return len(removed_keys)
def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
"""Mark recently-active sessions as suspended.
+17 -34
View File
@@ -37,24 +37,18 @@ needs to replace the import + call site:
"""
from contextvars import ContextVar
from typing import Any
# Sentinel to distinguish "never set in this context" from "explicitly set to empty".
# When a contextvar holds _UNSET, we fall back to os.environ (CLI/cron compat).
# When it holds "" (after clear_session_vars resets it), we return "" — no fallback.
_UNSET: Any = object()
# ---------------------------------------------------------------------------
# Per-task session variables
# ---------------------------------------------------------------------------
_SESSION_PLATFORM: ContextVar = ContextVar("HERMES_SESSION_PLATFORM", default=_UNSET)
_SESSION_CHAT_ID: ContextVar = ContextVar("HERMES_SESSION_CHAT_ID", default=_UNSET)
_SESSION_CHAT_NAME: ContextVar = ContextVar("HERMES_SESSION_CHAT_NAME", default=_UNSET)
_SESSION_THREAD_ID: ContextVar = ContextVar("HERMES_SESSION_THREAD_ID", default=_UNSET)
_SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNSET)
_SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
_SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)
_SESSION_PLATFORM: ContextVar[str] = ContextVar("HERMES_SESSION_PLATFORM", default="")
_SESSION_CHAT_ID: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_ID", default="")
_SESSION_CHAT_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_NAME", default="")
_SESSION_THREAD_ID: ContextVar[str] = ContextVar("HERMES_SESSION_THREAD_ID", default="")
_SESSION_USER_ID: ContextVar[str] = ContextVar("HERMES_SESSION_USER_ID", default="")
_SESSION_USER_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_USER_NAME", default="")
_SESSION_KEY: ContextVar[str] = ContextVar("HERMES_SESSION_KEY", default="")
_VAR_MAP = {
"HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
@@ -97,17 +91,10 @@ def set_session_vars(
def clear_session_vars(tokens: list) -> None:
"""Mark session context variables as explicitly cleared.
Sets all variables to ``""`` so that ``get_session_env`` returns an empty
string instead of falling back to (potentially stale) ``os.environ``
values. The *tokens* argument is accepted for API compatibility with
callers that saved the return value of ``set_session_vars``, but the
actual clearing uses ``var.set("")`` rather than ``var.reset(token)``
to ensure the "explicitly cleared" state is distinguishable from
"never set" (which holds the ``_UNSET`` sentinel).
"""
for var in (
"""Restore session context variables to their pre-handler values."""
if not tokens:
return
vars_in_order = [
_SESSION_PLATFORM,
_SESSION_CHAT_ID,
_SESSION_CHAT_NAME,
@@ -115,8 +102,9 @@ def clear_session_vars(tokens: list) -> None:
_SESSION_USER_ID,
_SESSION_USER_NAME,
_SESSION_KEY,
):
var.set("")
]
for var, token in zip(vars_in_order, tokens):
var.reset(token)
def get_session_env(name: str, default: str = "") -> str:
@@ -125,13 +113,8 @@ def get_session_env(name: str, default: str = "") -> str:
Drop-in replacement for ``os.getenv("HERMES_SESSION_*", default)``.
Resolution order:
1. Context variable (set by the gateway for concurrency-safe access).
If the variable was explicitly set (even to ``""``) via
``set_session_vars`` or ``clear_session_vars``, that value is
returned **no fallback to os.environ**.
2. ``os.environ`` (only when the context variable was never set in
this context i.e. CLI, cron scheduler, and test processes that
don't use ``set_session_vars`` at all).
1. Context variable (set by the gateway for concurrency-safe access)
2. ``os.environ`` (used by CLI, cron scheduler, and tests)
3. *default*
"""
import os
@@ -139,7 +122,7 @@ def get_session_env(name: str, default: str = "") -> str:
var = _VAR_MAP.get(name)
if var is not None:
value = var.get()
if value is not _UNSET:
if value:
return value
# Fall back to os.environ for CLI, cron, and test compatibility
return os.getenv(name, default)
+146 -160
View File
@@ -27,6 +27,7 @@ _RUNTIME_STATUS_FILE = "gateway_state.json"
_LOCKS_DIRNAME = "gateway-locks"
_IS_WINDOWS = sys.platform == "win32"
_UNSET = object()
_VALID_STARTUP_CHECK_STATES = {"pending", "ready", "failed"}
def _get_pid_path() -> Path:
@@ -162,11 +163,39 @@ def _build_runtime_status_record() -> dict[str, Any]:
"restart_requested": False,
"active_agents": 0,
"platforms": {},
"startup_checks": {},
"updated_at": _utc_now_iso(),
})
return payload
def _normalize_startup_check_entries(
startup_checks: Optional[dict[str, Any]],
) -> dict[str, dict[str, Any]]:
"""Normalize persisted startup readiness entries."""
if not isinstance(startup_checks, dict):
return {}
now = _utc_now_iso()
normalized: dict[str, dict[str, Any]] = {}
for raw_id, raw_payload in startup_checks.items():
check_id = str(raw_id).strip()
if not check_id:
continue
payload = raw_payload if isinstance(raw_payload, dict) else {}
state = str(payload.get("state", "pending")).strip().lower()
if state not in _VALID_STARTUP_CHECK_STATES:
state = "pending"
normalized[check_id] = {
"state": state,
"required": bool(payload.get("required", True)),
"source": payload.get("source"),
"detail": payload.get("detail"),
"updated_at": payload.get("updated_at") or now,
}
return normalized
def _read_json_file(path: Path) -> Optional[dict[str, Any]]:
if not path.exists():
return None
@@ -188,8 +217,8 @@ def _write_json_file(path: Path, payload: dict[str, Any]) -> None:
path.write_text(json.dumps(payload))
def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
pid_path = pid_path or _get_pid_path()
def _read_pid_record() -> Optional[dict]:
pid_path = _get_pid_path()
if not pid_path.exists():
return None
@@ -212,18 +241,6 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
return None
def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
if not cleanup_stale:
return
try:
if pid_path == _get_pid_path():
remove_pid_file()
else:
pid_path.unlink(missing_ok=True)
except Exception:
pass
def write_pid_file() -> None:
"""Write the current process PID and metadata to the gateway PID file."""
_write_json_file(_get_pid_path(), _build_pid_record())
@@ -235,6 +252,7 @@ def write_runtime_status(
exit_reason: Any = _UNSET,
restart_requested: Any = _UNSET,
active_agents: Any = _UNSET,
startup_checks: Any = _UNSET,
platform: Any = _UNSET,
platform_state: Any = _UNSET,
error_code: Any = _UNSET,
@@ -257,6 +275,8 @@ def write_runtime_status(
payload["restart_requested"] = bool(restart_requested)
if active_agents is not _UNSET:
payload["active_agents"] = max(0, int(active_agents))
if startup_checks is not _UNSET:
payload["startup_checks"] = _normalize_startup_check_entries(startup_checks)
if platform is not _UNSET:
platform_payload = payload["platforms"].get(platform, {})
@@ -274,7 +294,109 @@ def write_runtime_status(
def read_runtime_status() -> Optional[dict[str, Any]]:
"""Read the persisted gateway runtime health/status information."""
return _read_json_file(_get_runtime_status_path())
payload = _read_json_file(_get_runtime_status_path())
if payload is None:
return None
payload.setdefault("platforms", {})
payload["startup_checks"] = _normalize_startup_check_entries(payload.get("startup_checks"))
return payload
def reset_startup_checks(checks: Optional[list[dict[str, Any]]] = None) -> dict[str, dict[str, Any]]:
"""Replace persisted startup readiness checks for the current run."""
normalized: dict[str, dict[str, Any]] = {}
now = _utc_now_iso()
for hook in checks or []:
if not isinstance(hook, dict):
continue
readiness = hook.get("startup_readiness")
if not isinstance(readiness, dict):
continue
check_id = str(readiness.get("id", "")).strip()
if not check_id:
continue
normalized[check_id] = {
"state": "pending",
"required": bool(readiness.get("required", True)),
"source": hook.get("name"),
"detail": None,
"updated_at": now,
}
write_runtime_status(startup_checks=normalized)
return normalized
def update_startup_check(
check_id: str,
state: str,
*,
detail: Any = _UNSET,
required: Any = _UNSET,
source: Any = _UNSET,
) -> dict[str, Any]:
"""Update a single startup readiness check in the runtime status file."""
normalized_id = str(check_id).strip()
if not normalized_id:
raise ValueError("startup readiness check id is required")
normalized_state = str(state).strip().lower()
if normalized_state not in _VALID_STARTUP_CHECK_STATES:
raise ValueError(f"invalid startup readiness state: {state}")
path = _get_runtime_status_path()
payload = _read_json_file(path) or _build_runtime_status_record()
checks = _normalize_startup_check_entries(payload.get("startup_checks"))
existing = checks.get(normalized_id, {})
now = _utc_now_iso()
checks[normalized_id] = {
"state": normalized_state,
"required": bool(existing.get("required", True) if required is _UNSET else required),
"source": existing.get("source") if source is _UNSET else source,
"detail": existing.get("detail") if detail is _UNSET else detail,
"updated_at": now,
}
payload["startup_checks"] = checks
payload.setdefault("platforms", {})
payload.setdefault("kind", _GATEWAY_KIND)
payload["pid"] = os.getpid()
payload["start_time"] = _get_process_start_time(os.getpid())
payload["updated_at"] = now
_write_json_file(path, payload)
return checks[normalized_id]
def mark_startup_check_pending(
check_id: str,
*,
detail: Any = _UNSET,
required: Any = _UNSET,
source: Any = _UNSET,
) -> dict[str, Any]:
return update_startup_check(check_id, "pending", detail=detail, required=required, source=source)
def mark_startup_check_ready(
check_id: str,
*,
detail: Any = _UNSET,
required: Any = _UNSET,
source: Any = _UNSET,
) -> dict[str, Any]:
return update_startup_check(check_id, "ready", detail=detail, required=required, source=source)
def mark_startup_check_failed(
check_id: str,
*,
detail: Any = _UNSET,
required: Any = _UNSET,
source: Any = _UNSET,
) -> dict[str, Any]:
return update_startup_check(check_id, "failed", detail=detail, required=required, source=source)
def remove_pid_file() -> None:
@@ -425,179 +547,43 @@ def release_all_scoped_locks() -> int:
return removed
# ── --replace takeover marker ─────────────────────────────────────────
#
# When a new gateway starts with ``--replace``, it SIGTERMs the existing
# gateway so it can take over the bot token. PR #5646 made SIGTERM exit
# the gateway with code 1 so ``Restart=on-failure`` can revive it after
# unexpected kills — but that also means a --replace takeover target
# exits 1, which tricks systemd into reviving it 30 seconds later,
# starting a flap loop against the replacer when both services are
# enabled in the user's systemd (e.g. ``hermes.service`` + ``hermes-
# gateway.service``).
#
# The takeover marker breaks the loop: the replacer writes a short-lived
# file naming the target PID + start_time BEFORE sending SIGTERM.
# The target's shutdown handler reads the marker and, if it names
# this process, treats the SIGTERM as a planned takeover and exits 0.
# The marker is unlinked after the target has consumed it, so a stale
# marker left by a crashed replacer can grief at most one future
# shutdown on the same PID — and only within _TAKEOVER_MARKER_TTL_S.
_TAKEOVER_MARKER_FILENAME = ".gateway-takeover.json"
_TAKEOVER_MARKER_TTL_S = 60 # Marker older than this is treated as stale
def _get_takeover_marker_path() -> Path:
"""Return the path to the --replace takeover marker file."""
home = get_hermes_home()
return home / _TAKEOVER_MARKER_FILENAME
def write_takeover_marker(target_pid: int) -> bool:
"""Record that ``target_pid`` is being replaced by the current process.
Captures the target's ``start_time`` so that PID reuse after the
target exits cannot later match the marker. Also records the
replacer's PID and a UTC timestamp for TTL-based staleness checks.
Returns True on successful write, False on any failure. The caller
should proceed with the SIGTERM even if the write fails (the marker
is a best-effort signal, not a correctness requirement).
"""
try:
target_start_time = _get_process_start_time(target_pid)
record = {
"target_pid": target_pid,
"target_start_time": target_start_time,
"replacer_pid": os.getpid(),
"written_at": _utc_now_iso(),
}
_write_json_file(_get_takeover_marker_path(), record)
return True
except (OSError, PermissionError):
return False
def consume_takeover_marker_for_self() -> bool:
"""Check & unlink the takeover marker if it names the current process.
Returns True only when a valid (non-stale) marker names this PID +
start_time. A returning True indicates the current SIGTERM is a
planned --replace takeover; the caller should exit 0 instead of
signalling ``_signal_initiated_shutdown``.
Always unlinks the marker on match (and on detected staleness) so
subsequent unrelated signals don't re-trigger.
"""
path = _get_takeover_marker_path()
record = _read_json_file(path)
if not record:
return False
# Any malformed or stale marker → drop it and return False
try:
target_pid = int(record["target_pid"])
target_start_time = record.get("target_start_time")
written_at = record.get("written_at") or ""
except (KeyError, TypeError, ValueError):
try:
path.unlink(missing_ok=True)
except OSError:
pass
return False
# TTL guard: a stale marker older than _TAKEOVER_MARKER_TTL_S is ignored.
stale = False
try:
written_dt = datetime.fromisoformat(written_at)
age = (datetime.now(timezone.utc) - written_dt).total_seconds()
if age > _TAKEOVER_MARKER_TTL_S:
stale = True
except (TypeError, ValueError):
stale = True # Unparseable timestamp — treat as stale
if stale:
try:
path.unlink(missing_ok=True)
except OSError:
pass
return False
# Does the marker name THIS process?
our_pid = os.getpid()
our_start_time = _get_process_start_time(our_pid)
matches = (
target_pid == our_pid
and target_start_time is not None
and our_start_time is not None
and target_start_time == our_start_time
)
# Consume the marker whether it matched or not — a marker that doesn't
# match our identity is stale-for-us anyway.
try:
path.unlink(missing_ok=True)
except OSError:
pass
return matches
def clear_takeover_marker() -> None:
"""Remove the takeover marker unconditionally. Safe to call repeatedly."""
try:
_get_takeover_marker_path().unlink(missing_ok=True)
except OSError:
pass
def get_running_pid(
pid_path: Optional[Path] = None,
*,
cleanup_stale: bool = True,
) -> Optional[int]:
def get_running_pid() -> Optional[int]:
"""Return the PID of a running gateway instance, or ``None``.
Checks the PID file and verifies the process is actually alive.
Cleans up stale PID files automatically.
"""
resolved_pid_path = pid_path or _get_pid_path()
record = _read_pid_record(resolved_pid_path)
record = _read_pid_record()
if not record:
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
remove_pid_file()
return None
try:
pid = int(record["pid"])
except (KeyError, TypeError, ValueError):
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
remove_pid_file()
return None
try:
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
except (ProcessLookupError, PermissionError):
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
remove_pid_file()
return None
recorded_start = record.get("start_time")
current_start = _get_process_start_time(pid)
if recorded_start is not None and current_start is not None and current_start != recorded_start:
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
remove_pid_file()
return None
if not _looks_like_gateway_process(pid):
if not _record_looks_like_gateway(record):
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
remove_pid_file()
return None
return pid
def is_gateway_running(
pid_path: Optional[Path] = None,
*,
cleanup_stale: bool = True,
) -> bool:
def is_gateway_running() -> bool:
"""Check if the gateway daemon is currently running."""
return get_running_pid(pid_path, cleanup_stale=cleanup_stale) is not None
return get_running_pid() is not None
+24 -81
View File
@@ -43,7 +43,6 @@ class StreamConsumerConfig:
edit_interval: float = 1.0
buffer_threshold: int = 40
cursor: str = ""
buffer_only: bool = False
class GatewayStreamConsumer:
@@ -100,14 +99,6 @@ class GatewayStreamConsumer:
self._flood_strikes = 0 # Consecutive flood-control edit failures
self._current_edit_interval = self.cfg.edit_interval # Adaptive backoff
self._final_response_sent = False
# Cache adapter lifecycle capability: only platforms that need an
# explicit finalize call (e.g. DingTalk AI Cards) force us to make
# a redundant final edit. Everyone else keeps the fast path.
# Use ``is True`` (not ``bool(...)``) so MagicMock attribute access
# in tests doesn't incorrectly enable this path.
self._adapter_requires_finalize: bool = (
getattr(adapter, "REQUIRES_EDIT_FINALIZE", False) is True
)
# Think-block filter state (mirrors CLI's _stream_delta tag suppression)
self._in_think_block = False
@@ -304,13 +295,10 @@ class GatewayStreamConsumer:
got_done
or got_segment_break
or commentary_text is not None
or (elapsed >= self._current_edit_interval
and self._accumulated)
or len(self._accumulated) >= self.cfg.buffer_threshold
)
if not self.cfg.buffer_only:
should_edit = should_edit or (
(elapsed >= self._current_edit_interval
and self._accumulated)
or len(self._accumulated) >= self.cfg.buffer_threshold
)
current_update_visible = False
if should_edit and self._accumulated:
@@ -369,16 +357,7 @@ class GatewayStreamConsumer:
if not got_done and not got_segment_break and commentary_text is None:
display_text += self.cfg.cursor
# Segment break: finalize the current message so platforms
# that need explicit closure (e.g. DingTalk AI Cards) don't
# leave the previous segment stuck in a loading state when
# the next segment (tool progress, next chunk) creates a
# new message below it. got_done has its own finalize
# path below so we don't finalize here for it.
current_update_visible = await self._send_or_edit(
display_text,
finalize=got_segment_break,
)
current_update_visible = await self._send_or_edit(display_text)
self._last_edit_time = time.monotonic()
if got_done:
@@ -389,22 +368,10 @@ class GatewayStreamConsumer:
if self._accumulated:
if self._fallback_final_send:
await self._send_fallback_final(self._accumulated)
elif (
current_update_visible
and not self._adapter_requires_finalize
):
# Mid-stream edit above already delivered the
# final accumulated content. Skip the redundant
# final edit — but only for adapters that don't
# need an explicit finalize signal.
elif current_update_visible:
self._final_response_sent = True
elif self._message_id:
# Either the mid-stream edit didn't run (no
# visible update this tick) OR the adapter needs
# explicit finalize=True to close the stream.
self._final_response_sent = await self._send_or_edit(
self._accumulated, finalize=True,
)
self._final_response_sent = await self._send_or_edit(self._accumulated)
elif not self._already_sent:
self._final_response_sent = await self._send_or_edit(self._accumulated)
return
@@ -436,20 +403,18 @@ class GatewayStreamConsumer:
except asyncio.CancelledError:
# Best-effort final edit on cancellation
_best_effort_ok = False
if self._accumulated and self._message_id:
try:
_best_effort_ok = bool(await self._send_or_edit(self._accumulated))
await self._send_or_edit(self._accumulated)
except Exception:
pass
# Only confirm final delivery if the best-effort send above
# actually succeeded OR if the final response was already
# confirmed before we were cancelled. Previously this
# promoted any partial send (already_sent=True) to
# final_response_sent — which suppressed the gateway's
# fallback send even when only intermediate text (e.g.
# "Let me search…") had been delivered, not the real answer.
if _best_effort_ok and not self._final_response_sent:
# If we delivered any content before being cancelled, mark the
# final response as sent so the gateway's already_sent check
# doesn't trigger a duplicate message. The 5-second
# stream_task timeout (gateway/run.py) can cancel us while
# waiting on a slow Telegram API call — without this flag the
# gateway falls through to the normal send path.
if self._already_sent:
self._final_response_sent = True
except Exception as e:
logger.error("Stream consumer error: %s", e)
@@ -548,17 +513,9 @@ class GatewayStreamConsumer:
self._fallback_final_send = False
if not continuation.strip():
# Nothing new to send — the visible partial already matches final text.
# BUT: if final_text itself has meaningful content (e.g. a timeout
# message after a long tool call), the prefix-based continuation
# calculation may wrongly conclude "already shown" because the
# streamed prefix was from a *previous* segment (before the tool
# boundary). In that case, send the full final_text as-is (#10807).
if final_text.strip() and final_text != self._visible_prefix():
continuation = final_text
else:
self._already_sent = True
self._final_response_sent = True
return
self._already_sent = True
self._final_response_sent = True
return
raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
safe_limit = max(500, raw_limit - 100)
@@ -652,25 +609,19 @@ class GatewayStreamConsumer:
content=text,
metadata=self.metadata,
)
# Note: do NOT set _already_sent = True here.
# Commentary messages are interim status updates (e.g. "Using browser
# tool..."), not the final response. Setting already_sent would cause
# the final response to be incorrectly suppressed when there are
# multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
return result.success
if result.success:
self._already_sent = True
return True
except Exception as e:
logger.error("Commentary send error: %s", e)
return False
return False
async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool:
async def _send_or_edit(self, text: str) -> bool:
"""Send or edit the streaming message.
Returns True if the text was successfully delivered (sent or edited),
False otherwise. Callers like the overflow split loop use this to
decide whether to advance past the delivered chunk.
``finalize`` is True when this is the last edit in a streaming
sequence.
"""
# Strip MEDIA: directives so they don't appear as visible text.
# Media files are delivered as native attachments after the stream
@@ -704,22 +655,14 @@ class GatewayStreamConsumer:
try:
if self._message_id is not None:
if self._edit_supported:
# Skip if text is identical to what we last sent.
# Exception: adapters that require an explicit finalize
# call (REQUIRES_EDIT_FINALIZE) must still receive the
# finalize=True edit even when content is unchanged, so
# their streaming UI can transition out of the in-
# progress state. Everyone else short-circuits.
if text == self._last_sent_text and not (
finalize and self._adapter_requires_finalize
):
# Skip if text is identical to what we last sent
if text == self._last_sent_text:
return True
# Edit existing message
result = await self.adapter.edit_message(
chat_id=self.chat_id,
message_id=self._message_id,
content=text,
finalize=finalize,
)
if result.success:
self._already_sent = True
+2 -2
View File
@@ -11,5 +11,5 @@ Provides subcommands for:
- hermes cron - Manage cron jobs
"""
__version__ = "0.10.0"
__release_date__ = "2026.4.16"
__version__ = "0.9.0"
__release_date__ = "2026.4.13"
+4 -244
View File
@@ -70,7 +70,6 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -78,10 +77,6 @@ QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
# Google Gemini OAuth (google-gemini-cli provider, Cloud Code Assist backend)
DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60 # refresh 60s before expiry
# =============================================================================
# Provider Registry
@@ -126,12 +121,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
auth_type="oauth_external",
inference_base_url=DEFAULT_QWEN_BASE_URL,
),
"google-gemini-cli": ProviderConfig(
id="google-gemini-cli",
name="Google Gemini (OAuth)",
auth_type="oauth_external",
inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
),
"copilot": ProviderConfig(
id="copilot",
name="GitHub Copilot",
@@ -233,14 +222,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XAI_API_KEY",),
base_url_env_var="XAI_BASE_URL",
),
"nvidia": ProviderConfig(
id="nvidia",
name="NVIDIA NIM",
auth_type="api_key",
inference_base_url="https://integrate.api.nvidia.com/v1",
api_key_env_vars=("NVIDIA_API_KEY",),
base_url_env_var="NVIDIA_BASE_URL",
),
"ai-gateway": ProviderConfig(
id="ai-gateway",
name="Vercel AI Gateway",
@@ -293,22 +274,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XIAOMI_API_KEY",),
base_url_env_var="XIAOMI_BASE_URL",
),
"ollama-cloud": ProviderConfig(
id="ollama-cloud",
name="Ollama Cloud",
auth_type="api_key",
inference_base_url=DEFAULT_OLLAMA_CLOUD_BASE_URL,
api_key_env_vars=("OLLAMA_API_KEY",),
base_url_env_var="OLLAMA_BASE_URL",
),
"bedrock": ProviderConfig(
id="bedrock",
name="AWS Bedrock",
auth_type="aws_sdk",
inference_base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
api_key_env_vars=(),
base_url_env_var="BEDROCK_BASE_URL",
),
}
@@ -781,28 +746,6 @@ def is_source_suppressed(provider_id: str, source: str) -> bool:
return False
def unsuppress_credential_source(provider_id: str, source: str) -> bool:
"""Clear a suppression marker so the source will be re-seeded on the next load.
Returns True if a marker was cleared, False if no marker existed.
"""
with _auth_store_lock():
auth_store = _load_auth_store()
suppressed = auth_store.get("suppressed_sources")
if not isinstance(suppressed, dict):
return False
provider_list = suppressed.get(provider_id)
if not isinstance(provider_list, list) or source not in provider_list:
return False
provider_list.remove(source)
if not provider_list:
suppressed.pop(provider_id, None)
if not suppressed:
auth_store.pop("suppressed_sources", None)
_save_auth_store(auth_store)
return True
def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
"""Return persisted auth state for a provider, or None."""
auth_store = _load_auth_store()
@@ -968,7 +911,6 @@ def resolve_provider(
_PROVIDER_ALIASES = {
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"arcee-ai": "arcee", "arceeai": "arcee",
@@ -979,16 +921,14 @@ def resolve_provider(
"github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
"aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
"opencode": "opencode-zen", "zen": "opencode-zen",
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
"aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
"go": "opencode-go", "opencode-go-sub": "opencode-go",
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
# Local server aliases — route through the generic custom provider
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
"ollama": "custom", "ollama_cloud": "ollama-cloud",
"vllm": "custom", "llamacpp": "custom",
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
"llama.cpp": "custom", "llama-cpp": "custom",
}
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
@@ -1040,15 +980,6 @@ def resolve_provider(
if has_usable_secret(os.getenv(env_var, "")):
return pid
# AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars).
# This runs after API-key providers so explicit keys always win.
try:
from agent.bedrock_adapter import has_aws_credentials
if has_aws_credentials():
return "bedrock"
except ImportError:
pass # boto3 not installed — skip Bedrock auto-detection
raise AuthError(
"No inference provider configured. Run 'hermes model' to choose a "
"provider and model, or set an API key (OPENROUTER_API_KEY, "
@@ -1291,83 +1222,6 @@ def get_qwen_auth_status() -> Dict[str, Any]:
}
# =============================================================================
# Google Gemini OAuth (google-gemini-cli) — PKCE flow + Cloud Code Assist.
#
# Tokens live in ~/.hermes/auth/google_oauth.json (managed by agent.google_oauth).
# The `base_url` here is the marker "cloudcode-pa://google" that run_agent.py
# uses to construct a GeminiCloudCodeClient instead of the default OpenAI SDK.
# Actual HTTP traffic goes to https://cloudcode-pa.googleapis.com/v1internal:*.
# =============================================================================
def resolve_gemini_oauth_runtime_credentials(
*,
force_refresh: bool = False,
) -> Dict[str, Any]:
"""Resolve runtime OAuth creds for google-gemini-cli."""
try:
from agent.google_oauth import (
GoogleOAuthError,
_credentials_path,
get_valid_access_token,
load_credentials,
)
except ImportError as exc:
raise AuthError(
f"agent.google_oauth is not importable: {exc}",
provider="google-gemini-cli",
code="google_oauth_module_missing",
) from exc
try:
access_token = get_valid_access_token(force_refresh=force_refresh)
except GoogleOAuthError as exc:
raise AuthError(
str(exc),
provider="google-gemini-cli",
code=exc.code,
) from exc
creds = load_credentials()
base_url = DEFAULT_GEMINI_CLOUDCODE_BASE_URL
return {
"provider": "google-gemini-cli",
"base_url": base_url,
"api_key": access_token,
"source": "google-oauth",
"expires_at_ms": (creds.expires_ms if creds else None),
"auth_file": str(_credentials_path()),
"email": (creds.email if creds else "") or "",
"project_id": (creds.project_id if creds else "") or "",
}
def get_gemini_oauth_auth_status() -> Dict[str, Any]:
"""Return a status dict for `hermes auth list` / `hermes status`."""
try:
from agent.google_oauth import _credentials_path, load_credentials
except ImportError:
return {"logged_in": False, "error": "agent.google_oauth unavailable"}
auth_path = _credentials_path()
creds = load_credentials()
if creds is None or not creds.access_token:
return {
"logged_in": False,
"auth_file": str(auth_path),
"error": "not logged in",
}
return {
"logged_in": True,
"auth_file": str(auth_path),
"source": "google-oauth",
"api_key": creds.access_token,
"expires_at_ms": creds.expires_ms,
"email": creds.email,
"project_id": creds.project_id,
}
# =============================================================================
# SSH / remote session detection
# =============================================================================
@@ -2159,62 +2013,6 @@ def refresh_nous_oauth_from_state(
)
NOUS_DEVICE_CODE_SOURCE = "device_code"
def persist_nous_credentials(
creds: Dict[str, Any],
*,
label: Optional[str] = None,
):
"""Persist minted Nous OAuth credentials as the singleton provider state
and ensure the credential pool is in sync.
Nous credentials are read at runtime from two independent locations:
- ``providers.nous``: singleton state read by
``resolve_nous_runtime_credentials()`` during 401 recovery and by
``_seed_from_singletons()`` during pool load.
- ``credential_pool.nous``: used by the runtime ``pool.select()`` path.
Historically ``hermes auth add nous`` wrote a ``manual:device_code`` pool
entry only, skipping ``providers.nous``. When the 24h agent_key TTL
expired, the recovery path read the empty singleton state and raised
``AuthError`` silently (``logger.debug`` at INFO level).
This helper writes ``providers.nous`` then calls ``load_pool("nous")`` so
``_seed_from_singletons`` materialises the canonical ``device_code`` pool
entry from the singleton. Re-running login upserts the same entry in
place; the pool never accumulates duplicate device_code rows.
``label`` is an optional user-chosen display name (from
``hermes auth add nous --label <name>``). It gets embedded in the
singleton state so that ``_seed_from_singletons`` uses it as the pool
entry's label on every subsequent ``load_pool("nous")`` instead of the
auto-derived token fingerprint. When ``None``, the auto-derived label
via ``label_from_token`` is used (unchanged default behaviour).
Returns the upserted :class:`PooledCredential` entry (or ``None`` if
seeding somehow produced no match shouldn't happen).
"""
from agent.credential_pool import load_pool
state = dict(creds)
if label and str(label).strip():
state["label"] = str(label).strip()
with _auth_store_lock():
auth_store = _load_auth_store()
_save_provider_state(auth_store, "nous", state)
_save_auth_store(auth_store)
pool = load_pool("nous")
return next(
(e for e in pool.entries() if e.source == NOUS_DEVICE_CODE_SOURCE),
None,
)
def resolve_nous_runtime_credentials(
*,
min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
@@ -2586,7 +2384,7 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
if provider_id in ("kimi-coding", "kimi-coding-cn"):
if provider_id == "kimi-coding":
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif env_url:
base_url = env_url
@@ -2642,21 +2440,12 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
return get_codex_auth_status()
if target == "qwen-oauth":
return get_qwen_auth_status()
if target == "google-gemini-cli":
return get_gemini_oauth_auth_status()
if target == "copilot-acp":
return get_external_process_provider_status(target)
# API-key providers
pconfig = PROVIDER_REGISTRY.get(target)
if pconfig and pconfig.auth_type == "api_key":
return get_api_key_provider_status(target)
# AWS SDK providers (Bedrock) — check via boto3 credential chain
if pconfig and pconfig.auth_type == "aws_sdk":
try:
from agent.bedrock_adapter import has_aws_credentials
return {"logged_in": has_aws_credentials(), "provider": target}
except ImportError:
return {"logged_in": False, "provider": target, "error": "boto3 not installed"}
return {"logged_in": False}
@@ -2681,7 +2470,7 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
if provider_id in ("kimi-coding", "kimi-coding-cn"):
if provider_id == "kimi-coding":
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif provider_id == "zai":
base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
@@ -3383,14 +3172,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
inference_base_url = auth_state["inference_base_url"]
# Snapshot the prior active_provider BEFORE _save_provider_state
# overwrites it to "nous". If the user picks "Skip (keep current)"
# during model selection below, we restore this so the user's previous
# provider (e.g. openrouter) is preserved.
with _auth_store_lock():
_prior_store = _load_auth_store()
prior_active_provider = _prior_store.get("active_provider")
with _auth_store_lock():
auth_store = _load_auth_store()
_save_provider_state(auth_store, "nous", auth_state)
@@ -3450,27 +3231,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
print(f"Login succeeded, but could not fetch available models. Reason: {message}")
# Write provider + model atomically so config is never mismatched.
# If no model was selected (user picked "Skip (keep current)",
# model list fetch failed, or no curated models were available),
# preserve the user's previous provider — don't silently switch
# them to Nous with a mismatched model. The Nous OAuth tokens
# stay saved for future use.
if not selected_model:
# Restore the prior active_provider that _save_provider_state
# overwrote to "nous". config.yaml model.provider is left
# untouched, so the user's previous provider is fully preserved.
with _auth_store_lock():
auth_store = _load_auth_store()
if prior_active_provider:
auth_store["active_provider"] = prior_active_provider
else:
auth_store.pop("active_provider", None)
_save_auth_store(auth_store)
print()
print("No provider change. Nous credentials saved for future use.")
print(" Run `hermes model` again to switch to Nous Portal.")
return
config_path = _update_config_for_provider(
"nous", inference_base_url, default_model=selected_model,
)
+16 -88
View File
@@ -4,7 +4,6 @@ from __future__ import annotations
from getpass import getpass
import math
import sys
import time
from types import SimpleNamespace
import uuid
@@ -33,7 +32,7 @@ from hermes_constants import OPENROUTER_BASE_URL
# Providers that support OAuth login in addition to API keys.
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"}
def _get_custom_provider_names() -> list:
@@ -148,7 +147,7 @@ def auth_add_command(args) -> None:
if provider.startswith(CUSTOM_POOL_PREFIX):
requested_type = AUTH_TYPE_API_KEY
else:
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth"} else AUTH_TYPE_API_KEY
pool = load_pool(provider)
@@ -161,10 +160,7 @@ def auth_add_command(args) -> None:
default_label = _api_key_default_label(len(pool.entries()) + 1)
label = (getattr(args, "label", None) or "").strip()
if not label:
if sys.stdin.isatty():
label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
else:
label = default_label
label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
entry = PooledCredential(
provider=provider,
id=uuid.uuid4().hex[:6],
@@ -217,21 +213,22 @@ def auth_add_command(args) -> None:
ca_bundle=getattr(args, "ca_bundle", None),
min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))),
)
# Honor `--label <name>` so nous matches other providers' UX. The
# helper embeds this into providers.nous so that label_from_token
# doesn't overwrite it on every subsequent load_pool("nous").
custom_label = (getattr(args, "label", None) or "").strip() or None
entry = auth_mod.persist_nous_credentials(creds, label=custom_label)
shown_label = entry.label if entry is not None else label_from_token(
creds.get("access_token", ""), _oauth_default_label(provider, 1),
label = (getattr(args, "label", None) or "").strip() or label_from_token(
creds.get("access_token", ""),
_oauth_default_label(provider, len(pool.entries()) + 1),
)
print(f'Saved {provider} OAuth device-code credentials: "{shown_label}"')
entry = PooledCredential.from_dict(provider, {
**creds,
"label": label,
"auth_type": AUTH_TYPE_OAUTH,
"source": f"{SOURCE_MANUAL}:device_code",
"base_url": creds.get("inference_base_url"),
})
pool.add_entry(entry)
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
return
if provider == "openai-codex":
# Clear any existing suppression marker so a re-link after `hermes auth
# remove openai-codex` works without the new tokens being skipped.
auth_mod.unsuppress_credential_source(provider, "device_code")
creds = auth_mod._codex_device_code_login()
label = (getattr(args, "label", None) or "").strip() or label_from_token(
creds["tokens"]["access_token"],
@@ -253,27 +250,6 @@ def auth_add_command(args) -> None:
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
return
if provider == "google-gemini-cli":
from agent.google_oauth import run_gemini_oauth_login_pure
creds = run_gemini_oauth_login_pure()
label = (getattr(args, "label", None) or "").strip() or (
creds.get("email") or _oauth_default_label(provider, len(pool.entries()) + 1)
)
entry = PooledCredential(
provider=provider,
id=uuid.uuid4().hex[:6],
label=label,
auth_type=AUTH_TYPE_OAUTH,
priority=0,
source=f"{SOURCE_MANUAL}:google_pkce",
access_token=creds["access_token"],
refresh_token=creds.get("refresh_token"),
)
pool.add_entry(entry)
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
return
if provider == "qwen-oauth":
creds = auth_mod.resolve_qwen_runtime_credentials(refresh_if_expiring=False)
label = (getattr(args, "label", None) or "").strip() or label_from_token(
@@ -351,34 +327,7 @@ def auth_remove_command(args) -> None:
# If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
# clear the underlying auth store / credential file so it doesn't get
# re-seeded on the next load_pool() call.
elif provider == "openai-codex" and (
removed.source == "device_code" or removed.source.endswith(":device_code")
):
# Codex tokens live in TWO places: the Hermes auth store and
# ~/.codex/auth.json (the Codex CLI shared file). On every refresh,
# refresh_codex_oauth_pure() writes to both. So clearing only the
# Hermes auth store is not enough — _seed_from_singletons() will
# auto-import from ~/.codex/auth.json on the next load_pool() and
# the removal is instantly undone. Mark the source as suppressed
# so auto-import is skipped; leave ~/.codex/auth.json untouched so
# the Codex CLI itself keeps working.
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
suppress_credential_source,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
suppress_credential_source(provider, "device_code")
print("Suppressed openai-codex device_code source — it will not be re-seeded.")
print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
print("Run `hermes auth add openai-codex` to re-enable if needed.")
elif removed.source == "device_code" and provider == "nous":
elif removed.source == "device_code" and provider in ("openai-codex", "nous"):
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
)
@@ -419,27 +368,6 @@ def _interactive_auth() -> None:
print("=" * 50)
auth_list_command(SimpleNamespace(provider=None))
# Show AWS Bedrock credential status (not in the pool — uses boto3 chain)
try:
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
if has_aws_credentials():
auth_source = resolve_aws_auth_env_var() or "unknown"
region = resolve_bedrock_region()
print(f"bedrock (AWS SDK credential chain):")
print(f" Auth: {auth_source}")
print(f" Region: {region}")
try:
import boto3
sts = boto3.client("sts", region_name=region)
identity = sts.get_caller_identity()
arn = identity.get("Arn", "unknown")
print(f" Identity: {arn}")
except Exception:
print(f" Identity: (could not resolve — boto3 STS call failed)")
print()
except ImportError:
pass # boto3 or bedrock_adapter not available
print()
# Main menu
+70 -119
View File
@@ -7,8 +7,8 @@ CLI tools that ship with the platform (or are commonly installed).
Platform support:
macOS osascript (always available), pngpaste (if installed)
Windows PowerShell via WinForms, Get-Clipboard, file-drop fallback
WSL2 powershell.exe via WinForms, Get-Clipboard, file-drop fallback
Windows PowerShell via .NET System.Windows.Forms.Clipboard
WSL2 powershell.exe via .NET System.Windows.Forms.Clipboard
Linux wl-paste (Wayland), xclip (X11)
"""
@@ -46,11 +46,10 @@ def has_clipboard_image() -> bool:
return _macos_has_image()
if sys.platform == "win32":
return _windows_has_image()
# Match _linux_save fallthrough order: WSL → Wayland → X11
if _is_wsl() and _wsl_has_image():
return True
if os.environ.get("WAYLAND_DISPLAY") and _wayland_has_image():
return True
if _is_wsl():
return _wsl_has_image()
if os.environ.get("WAYLAND_DISPLAY"):
return _wayland_has_image()
return _xclip_has_image()
@@ -136,114 +135,6 @@ _PS_EXTRACT_IMAGE = (
"[System.Convert]::ToBase64String($ms.ToArray())"
)
_PS_CHECK_IMAGE_GET_CLIPBOARD = (
"try { "
"$img = Get-Clipboard -Format Image -ErrorAction Stop;"
"if ($null -ne $img) { 'True' } else { 'False' }"
"} catch { 'False' }"
)
_PS_EXTRACT_IMAGE_GET_CLIPBOARD = (
"try { "
"Add-Type -AssemblyName System.Drawing;"
"Add-Type -AssemblyName PresentationCore;"
"Add-Type -AssemblyName WindowsBase;"
"$img = Get-Clipboard -Format Image -ErrorAction Stop;"
"if ($null -eq $img) { exit 1 }"
"$ms = New-Object System.IO.MemoryStream;"
"if ($img -is [System.Drawing.Image]) {"
"$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)"
"} elseif ($img -is [System.Windows.Media.Imaging.BitmapSource]) {"
"$enc = New-Object System.Windows.Media.Imaging.PngBitmapEncoder;"
"$enc.Frames.Add([System.Windows.Media.Imaging.BitmapFrame]::Create($img));"
"$enc.Save($ms)"
"} else { exit 2 }"
"[System.Convert]::ToBase64String($ms.ToArray())"
"} catch { exit 1 }"
)
_FILEDROP_IMAGE_EXTS = "'.png','.jpg','.jpeg','.gif','.webp','.bmp','.tiff','.tif'"
_PS_CHECK_FILEDROP_IMAGE = (
"try { "
"$files = Get-Clipboard -Format FileDropList -ErrorAction Stop;"
f"$exts = @({_FILEDROP_IMAGE_EXTS});"
"$hit = $files | Where-Object { $exts -contains ([System.IO.Path]::GetExtension($_).ToLowerInvariant()) } | Select-Object -First 1;"
"if ($null -ne $hit) { 'True' } else { 'False' }"
"} catch { 'False' }"
)
_PS_EXTRACT_FILEDROP_IMAGE = (
"try { "
"$files = Get-Clipboard -Format FileDropList -ErrorAction Stop;"
f"$exts = @({_FILEDROP_IMAGE_EXTS});"
"$hit = $files | Where-Object { $exts -contains ([System.IO.Path]::GetExtension($_).ToLowerInvariant()) } | Select-Object -First 1;"
"if ($null -eq $hit) { exit 1 }"
"[System.Convert]::ToBase64String([System.IO.File]::ReadAllBytes($hit))"
"} catch { exit 1 }"
)
_POWERSHELL_HAS_IMAGE_SCRIPTS = (
_PS_CHECK_IMAGE,
_PS_CHECK_IMAGE_GET_CLIPBOARD,
_PS_CHECK_FILEDROP_IMAGE,
)
_POWERSHELL_EXTRACT_IMAGE_SCRIPTS = (
_PS_EXTRACT_IMAGE,
_PS_EXTRACT_IMAGE_GET_CLIPBOARD,
_PS_EXTRACT_FILEDROP_IMAGE,
)
def _run_powershell(exe: str, script: str, timeout: int) -> subprocess.CompletedProcess:
return subprocess.run(
[exe, "-NoProfile", "-NonInteractive", "-Command", script],
capture_output=True, text=True, timeout=timeout,
)
def _write_base64_image(dest: Path, b64_data: str) -> bool:
image_bytes = base64.b64decode(b64_data, validate=True)
dest.write_bytes(image_bytes)
return dest.exists() and dest.stat().st_size > 0
def _powershell_has_image(exe: str, *, timeout: int, label: str) -> bool:
for script in _POWERSHELL_HAS_IMAGE_SCRIPTS:
try:
r = _run_powershell(exe, script, timeout=timeout)
if r.returncode == 0 and "True" in r.stdout:
return True
except FileNotFoundError:
logger.debug("%s not found — clipboard unavailable", exe)
return False
except Exception as e:
logger.debug("%s clipboard image check failed: %s", label, e)
return False
def _powershell_save_image(exe: str, dest: Path, *, timeout: int, label: str) -> bool:
for script in _POWERSHELL_EXTRACT_IMAGE_SCRIPTS:
try:
r = _run_powershell(exe, script, timeout=timeout)
if r.returncode != 0:
continue
b64_data = r.stdout.strip()
if not b64_data:
continue
if _write_base64_image(dest, b64_data):
return True
except FileNotFoundError:
logger.debug("%s not found — clipboard unavailable", exe)
return False
except Exception as e:
logger.debug("%s clipboard image extraction failed: %s", label, e)
dest.unlink(missing_ok=True)
return False
# ── Native Windows ────────────────────────────────────────────────────────
@@ -284,7 +175,15 @@ def _windows_has_image() -> bool:
ps = _get_ps_exe()
if ps is None:
return False
return _powershell_has_image(ps, timeout=5, label="Windows")
try:
r = subprocess.run(
[ps, "-NoProfile", "-NonInteractive", "-Command", _PS_CHECK_IMAGE],
capture_output=True, text=True, timeout=5,
)
return r.returncode == 0 and "True" in r.stdout
except Exception as e:
logger.debug("Windows clipboard image check failed: %s", e)
return False
def _windows_save(dest: Path) -> bool:
@@ -293,7 +192,26 @@ def _windows_save(dest: Path) -> bool:
if ps is None:
logger.debug("No PowerShell found — Windows clipboard image paste unavailable")
return False
return _powershell_save_image(ps, dest, timeout=15, label="Windows")
try:
r = subprocess.run(
[ps, "-NoProfile", "-NonInteractive", "-Command", _PS_EXTRACT_IMAGE],
capture_output=True, text=True, timeout=15,
)
if r.returncode != 0:
return False
b64_data = r.stdout.strip()
if not b64_data:
return False
png_bytes = base64.b64decode(b64_data)
dest.write_bytes(png_bytes)
return dest.exists() and dest.stat().st_size > 0
except Exception as e:
logger.debug("Windows clipboard image extraction failed: %s", e)
dest.unlink(missing_ok=True)
return False
# ── Linux ────────────────────────────────────────────────────────────────
@@ -317,12 +235,45 @@ def _linux_save(dest: Path) -> bool:
def _wsl_has_image() -> bool:
"""Check if Windows clipboard has an image (via powershell.exe)."""
return _powershell_has_image("powershell.exe", timeout=8, label="WSL")
try:
r = subprocess.run(
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
_PS_CHECK_IMAGE],
capture_output=True, text=True, timeout=8,
)
return r.returncode == 0 and "True" in r.stdout
except FileNotFoundError:
logger.debug("powershell.exe not found — WSL clipboard unavailable")
except Exception as e:
logger.debug("WSL clipboard check failed: %s", e)
return False
def _wsl_save(dest: Path) -> bool:
"""Extract clipboard image via powershell.exe → base64 → decode to PNG."""
return _powershell_save_image("powershell.exe", dest, timeout=15, label="WSL")
try:
r = subprocess.run(
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
_PS_EXTRACT_IMAGE],
capture_output=True, text=True, timeout=15,
)
if r.returncode != 0:
return False
b64_data = r.stdout.strip()
if not b64_data:
return False
png_bytes = base64.b64decode(b64_data)
dest.write_bytes(png_bytes)
return dest.exists() and dest.stat().st_size > 0
except FileNotFoundError:
logger.debug("powershell.exe not found — WSL clipboard unavailable")
except Exception as e:
logger.debug("WSL clipboard extraction failed: %s", e)
dest.unlink(missing_ok=True)
return False
# ── Wayland (wl-paste) ──────────────────────────────────────────────────
+11 -112
View File
@@ -87,8 +87,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
aliases=("bg",), args_hint="<prompt>"),
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
args_hint="<question>"),
CommandDef("agents", "Show active agents and running tasks", "Session",
aliases=("tasks",)),
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
aliases=("q",), args_hint="<prompt>"),
CommandDef("status", "Show session info", "Session"),
@@ -101,10 +99,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
# Configuration
CommandDef("config", "Show current configuration", "Configuration",
cli_only=True),
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"),
CommandDef("provider", "Show available providers and current provider",
"Configuration"),
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"),
CommandDef("personality", "Set a predefined personality", "Configuration",
args_hint="[name]"),
@@ -122,7 +119,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
args_hint="[normal|fast|status]",
subcommands=("normal", "fast", "status", "on", "off")),
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
args_hint="[name]"),
cli_only=True, args_hint="[name]"),
CommandDef("voice", "Toggle voice mode", "Configuration",
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
@@ -157,9 +154,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
args_hint="[days]"),
CommandDef("platforms", "Show gateway/messaging platform status", "Info",
cli_only=True, aliases=("gateway",)),
CommandDef("copy", "Copy the last assistant response to clipboard", "Info",
cli_only=True, args_hint="[number]"),
CommandDef("paste", "Attach clipboard image from your clipboard", "Info",
CommandDef("paste", "Check clipboard for an image and attach it", "Info",
cli_only=True),
CommandDef("image", "Attach a local image file for your next prompt", "Info",
cli_only=True, args_hint="<path>"),
@@ -169,7 +164,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
# Exit
CommandDef("quit", "Exit the CLI", "Exit",
cli_only=True, aliases=("exit",)),
cli_only=True, aliases=("exit", "q")),
]
@@ -258,35 +253,6 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
)
# Commands that must never be queued behind an active gateway session.
# These are explicit control/info commands handled by the gateway itself;
# if they get queued as pending text, the safety net in gateway.run will
# discard them before they ever reach the user.
ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
{
"agents",
"approve",
"background",
"commands",
"deny",
"help",
"new",
"profile",
"queue",
"restart",
"status",
"stop",
"update",
}
)
def should_bypass_active_session(command_name: str | None) -> bool:
"""Return True when a slash command must bypass active-session queuing."""
cmd = resolve_command(command_name) if command_name else None
return bool(cmd and cmd.name in ACTIVE_SESSION_BYPASS_COMMANDS)
def _resolve_config_gates() -> set[str]:
"""Return canonical names of commands whose ``gateway_config_gate`` is truthy.
@@ -484,7 +450,7 @@ def _collect_gateway_skill_entries(
name = sanitize_name(cmd_name) if sanitize_name else cmd_name
if not name:
continue
desc = plugin_cmds[cmd_name].get("description", "Plugin command")
desc = "Plugin command"
if len(desc) > desc_limit:
desc = desc[:desc_limit - 3] + "..."
plugin_pairs.append((name, desc))
@@ -878,7 +844,8 @@ class SlashCommandCompleter(Completer):
return None
return word
def _context_completions(self, word: str, limit: int = 30):
@staticmethod
def _context_completions(word: str, limit: int = 30):
"""Yield Claude Code-style @ context completions.
Bare ``@`` or ``@partial`` shows static references and matching
@@ -1077,51 +1044,6 @@ class SlashCommandCompleter(Completer):
display_meta=f"{fp} {meta}" if meta else fp,
)
@staticmethod
def _skin_completions(sub_text: str, sub_lower: str):
"""Yield completions for /skin from available skins."""
try:
from hermes_cli.skin_engine import list_skins
for s in list_skins():
name = s["name"]
if name.startswith(sub_lower) and name != sub_lower:
yield Completion(
name,
start_position=-len(sub_text),
display=name,
display_meta=s.get("description", "") or s.get("source", ""),
)
except Exception:
pass
@staticmethod
def _personality_completions(sub_text: str, sub_lower: str):
"""Yield completions for /personality from configured personalities."""
try:
from hermes_cli.config import load_config
personalities = load_config().get("agent", {}).get("personalities", {})
if "none".startswith(sub_lower) and "none" != sub_lower:
yield Completion(
"none",
start_position=-len(sub_text),
display="none",
display_meta="clear personality overlay",
)
for name, prompt in personalities.items():
if name.startswith(sub_lower) and name != sub_lower:
if isinstance(prompt, dict):
meta = prompt.get("description") or prompt.get("system_prompt", "")[:50]
else:
meta = str(prompt)[:50]
yield Completion(
name,
start_position=-len(sub_text),
display=name,
display_meta=meta,
)
except Exception:
pass
def _model_completions(self, sub_text: str, sub_lower: str):
"""Yield completions for /model from config aliases + built-in aliases."""
seen = set()
@@ -1176,17 +1098,10 @@ class SlashCommandCompleter(Completer):
sub_text = parts[1] if len(parts) > 1 else ""
sub_lower = sub_text.lower()
# Dynamic completions for commands with runtime lists
if " " not in sub_text:
if base_cmd == "/model":
yield from self._model_completions(sub_text, sub_lower)
return
if base_cmd == "/skin":
yield from self._skin_completions(sub_text, sub_lower)
return
if base_cmd == "/personality":
yield from self._personality_completions(sub_text, sub_lower)
return
# Dynamic model alias completions for /model
if " " not in sub_text and base_cmd == "/model":
yield from self._model_completions(sub_text, sub_lower)
return
# Static subcommand completions
if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd):
@@ -1225,22 +1140,6 @@ class SlashCommandCompleter(Completer):
display_meta=f"{short_desc}",
)
# Plugin-registered slash commands
try:
from hermes_cli.plugins import get_plugin_commands
for cmd_name, cmd_info in get_plugin_commands().items():
if cmd_name.startswith(word):
desc = str(cmd_info.get("description", "Plugin command"))
short_desc = desc[:50] + ("..." if len(desc) > 50 else "")
yield Completion(
self._completion_text(cmd_name, word),
start_position=-len(word),
display=f"/{cmd_name}",
display_meta=f"🔌 {short_desc}",
)
except Exception:
pass
# ---------------------------------------------------------------------------
# Inline auto-suggest (ghost text) for slash commands
+35 -413
View File
@@ -12,7 +12,6 @@ This module provides:
- hermes config wizard - Re-run setup wizard
"""
import copy
import os
import platform
import re
@@ -24,10 +23,10 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled
_IS_WINDOWS = platform.system() == "Windows"
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
# Env var names written to .env that aren't in OPTIONAL_ENV_VARS
# (managed by setup/provider flows directly).
_EXTRA_ENV_KEYS = frozenset({
@@ -46,8 +45,7 @@ _EXTRA_ENV_KEYS = frozenset({
"WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
"WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
"QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
"QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME", # legacy aliases (pre-rename, still read for back-compat)
"QQ_APP_ID", "QQ_CLIENT_SECRET", "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",
"QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
"QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
@@ -243,41 +241,13 @@ def _secure_dir(path):
pass
def _is_container() -> bool:
"""Detect if we're running inside a Docker/Podman/LXC container.
When Hermes runs in a container with volume-mounted config files, forcing
0o600 permissions breaks multi-process setups where the gateway and
dashboard run as different UIDs or the volume mount requires broader
permissions.
"""
# Explicit opt-out
if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"):
return True
# Docker / Podman marker file
if os.path.exists("/.dockerenv"):
return True
# LXC / cgroup-based detection
try:
with open("/proc/1/cgroup", "r") as f:
cgroup_content = f.read()
if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content:
return True
except (OSError, IOError):
pass
return False
def _secure_file(path):
"""Set file to owner-only read/write (0600). No-op on Windows.
Skipped in managed mode the NixOS activation script sets
group-readable permissions (0640) on config files.
Skipped in containers Docker/Podman volume mounts often need broader
permissions. Set HERMES_SKIP_CHMOD=1 to force-skip on other systems.
"""
if is_managed() or _is_container():
if is_managed():
return
try:
if os.path.exists(str(path)):
@@ -420,10 +390,10 @@ DEFAULT_CONFIG = {
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
"record_sessions": False, # Auto-record browser sessions as WebM videos
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
"camofox": {
# When true, Hermes sends a stable profile-scoped userId to Camofox
# so the server maps it to a persistent Firefox profile automatically.
# so the server can map it to a persistent browser profile directory.
# Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
# When false (default), each session gets a random userId (ephemeral).
"managed_persistence": False,
},
@@ -449,27 +419,6 @@ DEFAULT_CONFIG = {
"protect_last_n": 20, # minimum recent messages to keep uncompressed
},
# AWS Bedrock provider configuration.
# Only used when model.provider is "bedrock".
"bedrock": {
"region": "", # AWS region for Bedrock API calls (empty = AWS_REGION env var → us-east-1)
"discovery": {
"enabled": True, # Auto-discover models via ListFoundationModels
"provider_filter": [], # Only show models from these providers (e.g. ["anthropic", "amazon"])
"refresh_interval": 3600, # Cache discovery results for this many seconds
},
"guardrail": {
# Amazon Bedrock Guardrails — content filtering and safety policies.
# Create a guardrail in the Bedrock console, then set the ID and version here.
# See: https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html
"guardrail_identifier": "", # e.g. "abc123def456"
"guardrail_version": "", # e.g. "1" or "DRAFT"
"stream_processing_mode": "async", # "sync" or "async"
"trace": "disabled", # "enabled", "disabled", or "enabled_full"
},
},
"smart_model_routing": {
"enabled": False,
"max_simple_chars": 160,
@@ -541,13 +490,6 @@ DEFAULT_CONFIG = {
"api_key": "",
"timeout": 30,
},
"title_generation": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
"timeout": 30,
},
},
"display": {
@@ -568,11 +510,6 @@ DEFAULT_CONFIG = {
"platforms": {}, # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
},
# Web dashboard settings
"dashboard": {
"theme": "default", # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
},
# Privacy settings
"privacy": {
"redact_pii": False, # When True, hash user IDs and strip phone numbers from LLM context
@@ -580,7 +517,7 @@ DEFAULT_CONFIG = {
# Text-to-speech configuration
"tts": {
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
"edge": {
"voice": "en-US-AriaNeural",
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@@ -594,12 +531,6 @@ DEFAULT_CONFIG = {
"voice": "alloy",
# Voices: alloy, echo, fable, onyx, nova, shimmer
},
"xai": {
"voice_id": "eve",
"language": "en",
"sample_rate": 24000,
"bit_rate": 128000,
},
"mistral": {
"model": "voxtral-mini-tts-2603",
"voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral
@@ -707,7 +638,6 @@ DEFAULT_CONFIG = {
"allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist)
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
},
# WhatsApp platform settings (gateway mode)
@@ -718,21 +648,6 @@ DEFAULT_CONFIG = {
# Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n"
},
# Telegram platform settings (gateway mode)
"telegram": {
"channel_prompts": {}, # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
},
# Slack platform settings (gateway mode)
"slack": {
"channel_prompts": {}, # Per-channel ephemeral system prompts
},
# Mattermost platform settings (gateway mode)
"mattermost": {
"channel_prompts": {}, # Per-channel ephemeral system prompts
},
# Approval mode for dangerous commands:
# manual — always prompt the user (default)
# smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
@@ -771,20 +686,6 @@ DEFAULT_CONFIG = {
"wrap_response": True,
},
# execute_code settings — controls the tool used for programmatic tool calls.
"code_execution": {
# Execution mode:
# project (default) — scripts run in the session's working directory
# with the active virtualenv/conda env's python, so project deps
# (pandas, torch, project packages) and relative paths resolve.
# strict — scripts run in an isolated temp directory with
# hermes-agent's own python (sys.executable). Maximum isolation
# and reproducibility; project deps and relative paths won't work.
# Env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, ...) and the
# tool whitelist apply identically in both modes.
"mode": "project",
},
# Logging — controls file logging to ~/.hermes/logs/.
# agent.log captures INFO+ (all agent activity); errors.log captures WARNING+.
"logging": {
@@ -802,7 +703,7 @@ DEFAULT_CONFIG = {
},
# Config schema version - bump this when adding new required fields
"_config_version": 19,
"_config_version": 17,
}
# =============================================================================
@@ -870,38 +771,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"XAI_API_KEY": {
"description": "xAI API key",
"prompt": "xAI API key",
"url": "https://console.x.ai/",
"password": True,
"category": "provider",
"advanced": True,
},
"XAI_BASE_URL": {
"description": "xAI base URL override",
"prompt": "xAI base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"NVIDIA_API_KEY": {
"description": "NVIDIA NIM API key (build.nvidia.com or local NIM endpoint)",
"prompt": "NVIDIA NIM API key",
"url": "https://build.nvidia.com/",
"password": True,
"category": "provider",
"advanced": True,
},
"NVIDIA_BASE_URL": {
"description": "NVIDIA NIM base URL override (e.g. http://localhost:8000/v1 for local NIM)",
"prompt": "NVIDIA NIM base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"GLM_API_KEY": {
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
"prompt": "Z.AI / GLM API key",
@@ -1043,30 +912,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"HERMES_GEMINI_CLIENT_ID": {
"description": "Google OAuth client ID for google-gemini-cli (optional; defaults to Google's public gemini-cli client)",
"prompt": "Google OAuth client ID (optional — leave empty to use the public default)",
"url": "https://console.cloud.google.com/apis/credentials",
"password": False,
"category": "provider",
"advanced": True,
},
"HERMES_GEMINI_CLIENT_SECRET": {
"description": "Google OAuth client secret for google-gemini-cli (optional)",
"prompt": "Google OAuth client secret (optional)",
"url": "https://console.cloud.google.com/apis/credentials",
"password": True,
"category": "provider",
"advanced": True,
},
"HERMES_GEMINI_PROJECT_ID": {
"description": "GCP project ID for paid Gemini tiers (free tier auto-provisions)",
"prompt": "GCP project ID for Gemini OAuth (leave empty for free tier)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"OPENCODE_ZEN_API_KEY": {
"description": "OpenCode Zen API key (pay-as-you-go access to curated models)",
"prompt": "OpenCode Zen API key",
@@ -1114,22 +959,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"OLLAMA_API_KEY": {
"description": "Ollama Cloud API key (ollama.com — cloud-hosted open models)",
"prompt": "Ollama Cloud API key",
"url": "https://ollama.com/settings",
"password": True,
"category": "provider",
"advanced": True,
},
"OLLAMA_BASE_URL": {
"description": "Ollama Cloud base URL override (default: https://ollama.com/v1)",
"prompt": "Ollama base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"XIAOMI_API_KEY": {
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
"prompt": "Xiaomi MiMo API Key",
@@ -1145,22 +974,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"AWS_REGION": {
"description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
"prompt": "AWS Region",
"url": "https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-regions.html",
"password": False,
"category": "provider",
"advanced": True,
},
"AWS_PROFILE": {
"description": "AWS named profile for Bedrock authentication (from ~/.aws/credentials)",
"prompt": "AWS Profile",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
# ── Tool API keys ──
"EXA_API_KEY": {
@@ -1358,12 +1171,6 @@ OPTIONAL_ENV_VARS = {
"password": False,
"category": "messaging",
},
"TELEGRAM_PROXY": {
"description": "Proxy URL for Telegram connections (overrides HTTPS_PROXY). Supports http://, https://, socks5://",
"prompt": "Telegram proxy URL (optional)",
"password": False,
"category": "messaging",
},
"DISCORD_BOT_TOKEN": {
"description": "Discord bot token from Developer Portal",
"prompt": "Discord bot token",
@@ -1559,12 +1366,12 @@ OPTIONAL_ENV_VARS = {
"prompt": "Allow All QQ Users",
"category": "messaging",
},
"QQBOT_HOME_CHANNEL": {
"QQ_HOME_CHANNEL": {
"description": "Default QQ channel/group for cron delivery and notifications",
"prompt": "QQ Home Channel",
"category": "messaging",
},
"QQBOT_HOME_CHANNEL_NAME": {
"QQ_HOME_CHANNEL_NAME": {
"description": "Display name for the QQ home channel",
"prompt": "QQ Home Channel Name",
"category": "messaging",
@@ -1661,8 +1468,13 @@ OPTIONAL_ENV_VARS = {
},
# ── Agent settings ──
# NOTE: MESSAGING_CWD was removed here — use terminal.cwd in config.yaml
# instead. The gateway reads TERMINAL_CWD (bridged from terminal.cwd).
"MESSAGING_CWD": {
"description": "Working directory for terminal commands via messaging",
"prompt": "Messaging working directory (default: home)",
"url": None,
"password": False,
"category": "setting",
},
"SUDO_PASSWORD": {
"description": "Sudo password for terminal commands requiring root access; set to an explicit empty string to try empty without prompting",
"prompt": "Sudo password",
@@ -1710,8 +1522,14 @@ OPTIONAL_ENV_VARS = {
},
}
# Tool Gateway env vars are always visible — they're useful for
# self-hosted / custom gateway setups regardless of subscription state.
if not _managed_nous_tools_enabled():
for _hidden_var in (
"FIRECRAWL_GATEWAY_URL",
"TOOL_GATEWAY_DOMAIN",
"TOOL_GATEWAY_SCHEME",
"TOOL_GATEWAY_USER_TOKEN",
):
OPTIONAL_ENV_VARS.pop(_hidden_var, None)
def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
@@ -2135,52 +1953,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
sys.stderr.write("\n".join(lines) + "\n\n")
def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> None:
"""Warn if MESSAGING_CWD or TERMINAL_CWD is set in .env instead of config.yaml.
These env vars are deprecated the canonical setting is terminal.cwd
in config.yaml. Prints a migration hint to stderr.
"""
import os, sys
messaging_cwd = os.environ.get("MESSAGING_CWD")
terminal_cwd_env = os.environ.get("TERMINAL_CWD")
if config is None:
try:
config = load_config()
except Exception:
return
terminal_cfg = config.get("terminal", {})
config_cwd = terminal_cfg.get("cwd", ".") if isinstance(terminal_cfg, dict) else "."
# Only warn if config.yaml doesn't have an explicit path
config_has_explicit_cwd = config_cwd not in (".", "auto", "cwd", "")
lines: list[str] = []
if messaging_cwd:
lines.append(
f" \033[33m⚠\033[0m MESSAGING_CWD={messaging_cwd} found in .env — "
f"this is deprecated."
)
if terminal_cwd_env and not config_has_explicit_cwd:
# TERMINAL_CWD in env but not from config bridge — likely from .env
lines.append(
f" \033[33m⚠\033[0m TERMINAL_CWD={terminal_cwd_env} found in .env — "
f"this is deprecated."
)
if lines:
hint_path = os.environ.get("HERMES_HOME", "~/.hermes")
lines.insert(0, "\033[33m⚠ Deprecated .env settings detected:\033[0m")
lines.append(
f" \033[2mMove to config.yaml instead: "
f"terminal:\\n cwd: /your/project/path\033[0m"
)
lines.append(
f" \033[2mThen remove the old entries from {hint_path}/.env\033[0m"
)
sys.stderr.write("\n".join(lines) + "\n\n")
def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, Any]:
"""
Migrate config to latest version, prompting for new required fields.
@@ -2651,85 +2423,6 @@ def _expand_env_vars(obj):
return obj
def _items_by_unique_name(items):
"""Return a name-indexed dict only when all items have unique string names."""
if not isinstance(items, list):
return None
indexed = {}
for item in items:
if not isinstance(item, dict) or not isinstance(item.get("name"), str):
return None
name = item["name"]
if name in indexed:
return None
indexed[name] = item
return indexed
def _preserve_env_ref_templates(current, raw, loaded_expanded=None):
"""Restore raw ``${VAR}`` templates when a value is otherwise unchanged.
``load_config()`` expands env refs for runtime use. When a caller later
persists that config after modifying some unrelated setting, keep the
original on-disk template instead of writing the expanded plaintext
secret back to ``config.yaml``.
Prefer preserving the raw template when ``current`` still matches either
the value previously returned by ``load_config()`` for this config path or
the current environment expansion of ``raw``. This handles env-var
rotation between load and save while still treating mixed literal/template
string edits as caller-owned once their rendered value diverges.
"""
if isinstance(current, str) and isinstance(raw, str) and re.search(r"\${[^}]+}", raw):
if current == raw:
return raw
if isinstance(loaded_expanded, str) and current == loaded_expanded:
return raw
if _expand_env_vars(raw) == current:
return raw
return current
if isinstance(current, dict) and isinstance(raw, dict):
return {
key: _preserve_env_ref_templates(
value,
raw.get(key),
loaded_expanded.get(key) if isinstance(loaded_expanded, dict) else None,
)
for key, value in current.items()
}
if isinstance(current, list) and isinstance(raw, list):
# Prefer matching named config objects (e.g. custom_providers) by name
# so harmless reordering doesn't drop the original template. If names
# are duplicated, fall back to positional matching instead of silently
# shadowing one entry.
current_by_name = _items_by_unique_name(current)
raw_by_name = _items_by_unique_name(raw)
loaded_by_name = _items_by_unique_name(loaded_expanded)
if current_by_name is not None and raw_by_name is not None:
return [
_preserve_env_ref_templates(
item,
raw_by_name.get(item.get("name")),
loaded_by_name.get(item.get("name")) if loaded_by_name is not None else None,
)
for item in current
]
return [
_preserve_env_ref_templates(
item,
raw[index] if index < len(raw) else None,
loaded_expanded[index]
if isinstance(loaded_expanded, list) and index < len(loaded_expanded)
else None,
)
for index, item in enumerate(current)
]
return current
def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
"""Move stale root-level provider/base_url into model section.
@@ -2797,6 +2490,7 @@ def read_raw_config() -> Dict[str, Any]:
def load_config() -> Dict[str, Any]:
"""Load configuration from ~/.hermes/config.yaml."""
import copy
ensure_hermes_home()
config_path = get_config_path()
@@ -2817,11 +2511,8 @@ def load_config() -> Dict[str, Any]:
config = _deep_merge(config, user_config)
except Exception as e:
print(f"Warning: Failed to load config: {e}")
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
expanded = _expand_env_vars(normalized)
_LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded)
return expanded
return _expand_env_vars(_normalize_root_model_keys(_normalize_max_turns_config(config)))
_SECURITY_COMMENT = """
@@ -2930,15 +2621,7 @@ def save_config(config: Dict[str, Any]):
ensure_hermes_home()
config_path = get_config_path()
current_normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
normalized = current_normalized
raw_existing = _normalize_root_model_keys(_normalize_max_turns_config(read_raw_config()))
if raw_existing:
normalized = _preserve_env_ref_templates(
normalized,
raw_existing,
_LAST_EXPANDED_CONFIG_BY_PATH.get(str(config_path)),
)
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
# Build optional commented-out sections for features that are off by
# default or only relevant when explicitly configured.
@@ -2956,7 +2639,6 @@ def save_config(config: Dict[str, Any]):
extra_content="".join(parts) if parts else None,
)
_secure_file(config_path)
_LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(current_normalized)
def load_env() -> Dict[str, str]:
@@ -3084,47 +2766,6 @@ def sanitize_env_file() -> int:
return fixes
def _check_non_ascii_credential(key: str, value: str) -> str:
"""Warn and strip non-ASCII characters from credential values.
API keys and tokens must be pure ASCII they are sent as HTTP header
values which httpx/httpcore encode as ASCII. Non-ASCII characters
(commonly introduced by copy-pasting from rich-text editors or PDFs
that substitute lookalike Unicode glyphs for ASCII letters) cause
``UnicodeEncodeError: 'ascii' codec can't encode character`` at
request time.
Returns the sanitized (ASCII-only) value. Prints a warning if any
non-ASCII characters were found and removed.
"""
try:
value.encode("ascii")
return value # all ASCII — nothing to do
except UnicodeEncodeError:
pass
# Build a readable list of the offending characters
bad_chars: list[str] = []
for i, ch in enumerate(value):
if ord(ch) > 127:
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
import sys
print(
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"
f" or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n"
f"\n"
+ "\n".join(f" {line}" for line in bad_chars[:5])
+ ("\n ... and more" if len(bad_chars) > 5 else "")
+ f"\n\n The non-ASCII characters have been stripped automatically.\n"
f" If authentication fails, re-copy the key from the provider's dashboard.\n",
file=sys.stderr,
)
return sanitized
def save_env_value(key: str, value: str):
"""Save or update a value in ~/.hermes/.env."""
if is_managed():
@@ -3133,8 +2774,6 @@ def save_env_value(key: str, value: str):
if not _ENV_VAR_NAME_RE.match(key):
raise ValueError(f"Invalid environment variable name: {key!r}")
value = value.replace("\n", "").replace("\r", "")
# API keys / tokens must be ASCII — strip non-ASCII with a warning.
value = _check_non_ascii_credential(key, value)
ensure_hermes_home()
env_path = get_env_path()
@@ -3165,25 +2804,12 @@ def save_env_value(key: str, value: str):
lines.append(f"{key}={value}\n")
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
if env_path.exists():
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try:
with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(lines)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, env_path)
# Restore original permissions before _secure_file may tighten them.
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException:
try:
os.unlink(tmp_path)
@@ -3194,6 +2820,13 @@ def save_env_value(key: str, value: str):
os.environ[key] = value
# Restrict .env permissions to owner-only (contains API keys)
if not _IS_WINDOWS:
try:
os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
def remove_env_value(key: str) -> bool:
"""Remove a key from ~/.hermes/.env and os.environ.
@@ -3222,23 +2855,12 @@ def remove_env_value(key: str) -> bool:
if found:
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try:
with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(new_lines)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, env_path)
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException:
try:
os.unlink(tmp_path)
+4 -25
View File
@@ -166,7 +166,6 @@ def curses_radiolist(
selected: int = 0,
*,
cancel_returns: int | None = None,
description: str | None = None,
) -> int:
"""Curses single-select radio list. Returns the selected index.
@@ -175,9 +174,6 @@ def curses_radiolist(
items: Display labels for each row.
selected: Index that starts selected (pre-selected).
cancel_returns: Returned on ESC/q. Defaults to the original *selected*.
description: Optional multi-line text shown between the title and
the item list. Useful for context that should survive the
curses screen clear.
"""
if cancel_returns is None:
cancel_returns = selected
@@ -185,10 +181,6 @@ def curses_radiolist(
if not sys.stdin.isatty():
return cancel_returns
desc_lines: list[str] = []
if description:
desc_lines = description.splitlines()
try:
import curses
result_holder: list = [None]
@@ -207,35 +199,22 @@ def curses_radiolist(
stdscr.clear()
max_y, max_x = stdscr.getmaxyx()
row = 0
# Header
try:
hattr = curses.A_BOLD
if curses.has_colors():
hattr |= curses.color_pair(2)
stdscr.addnstr(row, 0, title, max_x - 1, hattr)
row += 1
# Description lines
for dline in desc_lines:
if row >= max_y - 1:
break
stdscr.addnstr(row, 0, dline, max_x - 1, curses.A_NORMAL)
row += 1
stdscr.addnstr(0, 0, title, max_x - 1, hattr)
stdscr.addnstr(
row, 0,
1, 0,
" \u2191\u2193 navigate ENTER/SPACE select ESC cancel",
max_x - 1, curses.A_DIM,
)
row += 1
except curses.error:
pass
# Scrollable item list
items_start = row + 1
visible_rows = max_y - items_start - 1
visible_rows = max_y - 4
if cursor < scroll_offset:
scroll_offset = cursor
elif cursor >= scroll_offset + visible_rows:
@@ -244,7 +223,7 @@ def curses_radiolist(
for draw_i, i in enumerate(
range(scroll_offset, min(len(items), scroll_offset + visible_rows))
):
y = draw_i + items_start
y = draw_i + 3
if y >= max_y - 1:
break
radio = "\u25cf" if i == selected else "\u25cb"
+2 -251
View File
@@ -6,10 +6,7 @@ Currently supports:
"""
import io
import json
import os
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
@@ -30,205 +27,6 @@ _DPASTE_COM_URL = "https://dpaste.com/api/"
# paste.rs caps at ~1 MB; we stay under that with headroom.
_MAX_LOG_BYTES = 512_000
# Auto-delete pastes after this many seconds (6 hours).
_AUTO_DELETE_SECONDS = 21600
# ---------------------------------------------------------------------------
# Pending-deletion tracking (replaces the old fork-and-sleep subprocess).
# ---------------------------------------------------------------------------
def _pending_file() -> Path:
"""Path to ``~/.hermes/pastes/pending.json``.
Each entry: ``{"url": "...", "expire_at": <unix_ts>}``. Scheduled
DELETEs used to be handled by spawning a detached Python process per
paste that slept for 6 hours; those accumulated forever if the user
ran ``hermes debug share`` repeatedly. We now persist the schedule
to disk and sweep expired entries on the next debug invocation.
"""
return get_hermes_home() / "pastes" / "pending.json"
def _load_pending() -> list[dict]:
path = _pending_file()
if not path.exists():
return []
try:
data = json.loads(path.read_text(encoding="utf-8"))
if isinstance(data, list):
# Filter to well-formed entries only
return [
e for e in data
if isinstance(e, dict) and "url" in e and "expire_at" in e
]
except (OSError, ValueError, json.JSONDecodeError):
pass
return []
def _save_pending(entries: list[dict]) -> None:
path = _pending_file()
try:
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(".json.tmp")
tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
os.replace(tmp, path)
except OSError:
# Non-fatal — worst case the user has to run ``hermes debug delete``
# manually.
pass
def _record_pending(urls: list[str], delay_seconds: int = _AUTO_DELETE_SECONDS) -> None:
"""Record *urls* for deletion at ``now + delay_seconds``.
Only paste.rs URLs are recorded (dpaste.com auto-expires). Entries
are merged into any existing pending.json.
"""
paste_rs_urls = [u for u in urls if _extract_paste_id(u)]
if not paste_rs_urls:
return
entries = _load_pending()
# Dedupe by URL: keep the later expire_at if same URL appears twice
by_url: dict[str, float] = {e["url"]: float(e["expire_at"]) for e in entries}
expire_at = time.time() + delay_seconds
for u in paste_rs_urls:
by_url[u] = max(expire_at, by_url.get(u, 0.0))
merged = [{"url": u, "expire_at": ts} for u, ts in by_url.items()]
_save_pending(merged)
def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
"""Synchronously DELETE any pending pastes whose ``expire_at`` has passed.
Returns ``(deleted, remaining)``. Best-effort: failed deletes stay in
the pending file and will be retried on the next sweep. Silent
intended to be called from every ``hermes debug`` invocation with
minimal noise.
"""
entries = _load_pending()
if not entries:
return (0, 0)
current = time.time() if now is None else now
deleted = 0
remaining: list[dict] = []
for entry in entries:
try:
expire_at = float(entry.get("expire_at", 0))
except (TypeError, ValueError):
continue # drop malformed entries
if expire_at > current:
remaining.append(entry)
continue
url = entry.get("url", "")
try:
if delete_paste(url):
deleted += 1
continue
except Exception:
# Network hiccup, 404 (already gone), etc. — drop the entry
# after a grace period; don't retry forever.
pass
# Retain failed deletes for up to 24h past expiration, then give up.
if expire_at + 86400 > current:
remaining.append(entry)
else:
deleted += 1 # count as reaped (paste.rs will GC eventually)
if deleted:
_save_pending(remaining)
return (deleted, len(remaining))
# ---------------------------------------------------------------------------
# Privacy / delete helpers
# ---------------------------------------------------------------------------
_PRIVACY_NOTICE = """\
This will upload the following to a public paste service:
System info (OS, Python version, Hermes version, provider, which API keys
are configured NOT the actual keys)
Recent log lines (agent.log, errors.log, gateway.log may contain
conversation fragments and file paths)
Full agent.log and gateway.log (up to 512 KB each likely contains
conversation content, tool outputs, and file paths)
Pastes auto-delete after 6 hours.
"""
_GATEWAY_PRIVACY_NOTICE = (
"⚠️ **Privacy notice:** This uploads system info + recent log tails "
"(may contain conversation fragments) to a public paste service. "
"Full logs are NOT included from the gateway — use `hermes debug share` "
"from the CLI for full log uploads.\n"
"Pastes auto-delete after 6 hours."
)
def _extract_paste_id(url: str) -> Optional[str]:
"""Extract the paste ID from a paste.rs or dpaste.com URL.
Returns the ID string, or None if the URL doesn't match a known service.
"""
url = url.strip().rstrip("/")
for prefix in ("https://paste.rs/", "http://paste.rs/"):
if url.startswith(prefix):
return url[len(prefix):]
return None
def delete_paste(url: str) -> bool:
"""Delete a paste from paste.rs. Returns True on success.
Only paste.rs supports unauthenticated DELETE. dpaste.com pastes
expire automatically but cannot be deleted via API.
"""
paste_id = _extract_paste_id(url)
if not paste_id:
raise ValueError(
f"Cannot delete: only paste.rs URLs are supported. Got: {url}"
)
target = f"{_PASTE_RS_URL}{paste_id}"
req = urllib.request.Request(
target, method="DELETE",
headers={"User-Agent": "hermes-agent/debug-share"},
)
with urllib.request.urlopen(req, timeout=30) as resp:
return 200 <= resp.status < 300
def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SECONDS):
"""Record *urls* for deletion ``delay_seconds`` from now.
Previously this spawned a detached Python subprocess per call that slept
for 6 hours and then issued DELETE requests. Those subprocesses leaked
every ``hermes debug share`` invocation added ~20 MB of resident Python
interpreters that never exited until the sleep completed.
The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
every ``hermes debug`` invocation. If the user never runs ``hermes debug``
again, paste.rs's own retention policy handles cleanup.
"""
_record_pending(urls, delay_seconds=delay_seconds)
def _delete_hint(url: str) -> str:
"""Return a one-liner delete command for the given paste URL."""
paste_id = _extract_paste_id(url)
if paste_id:
return f"hermes debug delete {url}"
# dpaste.com — no API delete, expires on its own.
return "(auto-expires per dpaste.com policy)"
def _upload_paste_rs(content: str) -> str:
"""Upload to paste.rs. Returns the paste URL.
@@ -452,9 +250,6 @@ def run_debug_share(args):
expiry = getattr(args, "expire", 7)
local_only = getattr(args, "local", False)
if not local_only:
print(_PRIVACY_NOTICE)
print("Collecting debug report...")
# Capture dump once — prepended to every paste for context.
@@ -520,66 +315,22 @@ def run_debug_share(args):
if failures:
print(f"\n (failed to upload: {', '.join(failures)})")
# Schedule auto-deletion after 6 hours
_schedule_auto_delete(list(urls.values()))
print(f"\n⏱ Pastes will auto-delete in 6 hours.")
# Manual delete fallback
print(f"To delete now: hermes debug delete <url>")
print(f"\nShare these links with the Hermes team for support.")
def run_debug_delete(args):
"""Delete one or more paste URLs uploaded by /debug."""
urls = getattr(args, "urls", [])
if not urls:
print("Usage: hermes debug delete <url> [<url> ...]")
print(" Deletes paste.rs pastes uploaded by 'hermes debug share'.")
return
for url in urls:
try:
ok = delete_paste(url)
if ok:
print(f" ✓ Deleted: {url}")
else:
print(f" ✗ Failed to delete: {url} (unexpected response)")
except ValueError as exc:
print(f"{exc}")
except Exception as exc:
print(f" ✗ Could not delete {url}: {exc}")
def run_debug(args):
"""Route debug subcommands."""
# Opportunistic sweep of expired pastes on every ``hermes debug`` call.
# Replaces the old per-paste sleeping subprocess that used to leak as
# one orphaned Python interpreter per scheduled deletion. Silent and
# best-effort — any failure is swallowed so ``hermes debug`` stays
# reliable even when offline.
try:
_sweep_expired_pastes()
except Exception:
pass
subcmd = getattr(args, "debug_command", None)
if subcmd == "share":
run_debug_share(args)
elif subcmd == "delete":
run_debug_delete(args)
else:
# Default: show help
print("Usage: hermes debug <command>")
print("Usage: hermes debug share [--lines N] [--expire N] [--local]")
print()
print("Commands:")
print(" share Upload debug report to a paste service and print URL")
print(" delete Delete a previously uploaded paste")
print()
print("Options (share):")
print("Options:")
print(" --lines N Number of log lines to include (default: 200)")
print(" --expire N Paste expiry in days (default: 7)")
print(" --local Print report locally instead of uploading")
print()
print("Options (delete):")
print(" <url> ... One or more paste URLs to delete")
-294
View File
@@ -1,294 +0,0 @@
"""
DingTalk Device Flow authorization.
Implements the same 3-step registration flow as dingtalk-openclaw-connector:
1. POST /app/registration/init get nonce
2. POST /app/registration/begin get device_code + verification_uri_complete
3. POST /app/registration/poll poll until SUCCESS get client_id + client_secret
The verification_uri_complete is rendered as a QR code in the terminal so the
user can scan it with DingTalk to authorize, yielding AppKey + AppSecret
automatically.
"""
from __future__ import annotations
import io
import os
import sys
import time
import logging
from typing import Optional, Tuple
import requests
logger = logging.getLogger(__name__)
# ── Configuration ──────────────────────────────────────────────────────────
REGISTRATION_BASE_URL = os.environ.get(
"DINGTALK_REGISTRATION_BASE_URL", "https://oapi.dingtalk.com"
).rstrip("/")
REGISTRATION_SOURCE = os.environ.get("DINGTALK_REGISTRATION_SOURCE", "openClaw")
# ── API helpers ────────────────────────────────────────────────────────────
class RegistrationError(Exception):
"""Raised when a DingTalk registration API call fails."""
def _api_post(path: str, payload: dict) -> dict:
"""POST to the registration API and return the parsed JSON body."""
url = f"{REGISTRATION_BASE_URL}{path}"
try:
resp = requests.post(url, json=payload, timeout=15)
resp.raise_for_status()
data = resp.json()
except requests.RequestException as exc:
raise RegistrationError(f"Network error calling {url}: {exc}") from exc
errcode = data.get("errcode", -1)
if errcode != 0:
errmsg = data.get("errmsg", "unknown error")
raise RegistrationError(f"API error [{path}]: {errmsg} (errcode={errcode})")
return data
# ── Core flow ──────────────────────────────────────────────────────────────
def begin_registration() -> dict:
"""Start a device-flow registration.
Returns a dict with keys:
device_code, verification_uri_complete, expires_in, interval
"""
# Step 1: init → nonce
init_data = _api_post("/app/registration/init", {"source": REGISTRATION_SOURCE})
nonce = str(init_data.get("nonce", "")).strip()
if not nonce:
raise RegistrationError("init response missing nonce")
# Step 2: begin → device_code, verification_uri_complete
begin_data = _api_post("/app/registration/begin", {"nonce": nonce})
device_code = str(begin_data.get("device_code", "")).strip()
verification_uri_complete = str(begin_data.get("verification_uri_complete", "")).strip()
if not device_code:
raise RegistrationError("begin response missing device_code")
if not verification_uri_complete:
raise RegistrationError("begin response missing verification_uri_complete")
return {
"device_code": device_code,
"verification_uri_complete": verification_uri_complete,
"expires_in": int(begin_data.get("expires_in", 7200)),
"interval": max(int(begin_data.get("interval", 3)), 2),
}
def poll_registration(device_code: str) -> dict:
"""Poll the registration status once.
Returns a dict with keys: status, client_id?, client_secret?, fail_reason?
"""
data = _api_post("/app/registration/poll", {"device_code": device_code})
status_raw = str(data.get("status", "")).strip().upper()
if status_raw not in ("WAITING", "SUCCESS", "FAIL", "EXPIRED"):
status_raw = "UNKNOWN"
return {
"status": status_raw,
"client_id": str(data.get("client_id", "")).strip() or None,
"client_secret": str(data.get("client_secret", "")).strip() or None,
"fail_reason": str(data.get("fail_reason", "")).strip() or None,
}
def wait_for_registration_success(
device_code: str,
interval: int = 3,
expires_in: int = 7200,
on_waiting: Optional[callable] = None,
) -> Tuple[str, str]:
"""Block until the registration succeeds or times out.
Returns (client_id, client_secret).
"""
deadline = time.monotonic() + expires_in
retry_window = 120 # 2 minutes for transient errors
retry_start = 0.0
while time.monotonic() < deadline:
time.sleep(interval)
try:
result = poll_registration(device_code)
except RegistrationError:
if retry_start == 0:
retry_start = time.monotonic()
if time.monotonic() - retry_start < retry_window:
continue
raise
status = result["status"]
if status == "WAITING":
retry_start = 0
if on_waiting:
on_waiting()
continue
if status == "SUCCESS":
cid = result["client_id"]
csecret = result["client_secret"]
if not cid or not csecret:
raise RegistrationError("authorization succeeded but credentials are missing")
return cid, csecret
# FAIL / EXPIRED / UNKNOWN
if retry_start == 0:
retry_start = time.monotonic()
if time.monotonic() - retry_start < retry_window:
continue
reason = result.get("fail_reason") or status
raise RegistrationError(f"authorization failed: {reason}")
raise RegistrationError("authorization timed out, please retry")
# ── QR code rendering ─────────────────────────────────────────────────────
def _ensure_qrcode_installed() -> bool:
"""Try to import qrcode; if missing, auto-install it via pip/uv."""
try:
import qrcode # noqa: F401
return True
except ImportError:
pass
import subprocess
# Try uv first (Hermes convention), then pip
for cmd in (
[sys.executable, "-m", "uv", "pip", "install", "qrcode"],
[sys.executable, "-m", "pip", "install", "-q", "qrcode"],
):
try:
subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
import qrcode # noqa: F401,F811
return True
except (subprocess.CalledProcessError, ImportError, FileNotFoundError):
continue
return False
def render_qr_to_terminal(url: str) -> bool:
"""Render *url* as a compact QR code in the terminal.
Returns True if the QR code was printed, False if the library is missing.
"""
try:
import qrcode
except ImportError:
return False
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=1,
border=1,
)
qr.add_data(url)
qr.make(fit=True)
# Use half-block characters for compact rendering (2 rows per character)
matrix = qr.get_matrix()
rows = len(matrix)
lines: list[str] = []
TOP_HALF = "\u2580" # ▀
BOTTOM_HALF = "\u2584" # ▄
FULL_BLOCK = "\u2588" # █
EMPTY = " "
for r in range(0, rows, 2):
line_chars: list[str] = []
for c in range(len(matrix[r])):
top = matrix[r][c]
bottom = matrix[r + 1][c] if r + 1 < rows else False
if top and bottom:
line_chars.append(FULL_BLOCK)
elif top:
line_chars.append(TOP_HALF)
elif bottom:
line_chars.append(BOTTOM_HALF)
else:
line_chars.append(EMPTY)
lines.append(" " + "".join(line_chars))
print("\n".join(lines))
return True
# ── High-level entry point for the setup wizard ───────────────────────────
def dingtalk_qr_auth() -> Optional[Tuple[str, str]]:
"""Run the interactive QR-code device-flow authorization.
Returns (client_id, client_secret) on success, or None if the user
cancelled or the flow failed.
"""
from hermes_cli.setup import print_info, print_success, print_warning, print_error
print()
print_info(" Initializing DingTalk device authorization...")
print_info(" Note: the scan page is branded 'OpenClaw' — DingTalk's")
print_info(" ecosystem onboarding bridge. Safe to use.")
try:
reg = begin_registration()
except RegistrationError as exc:
print_error(f" Authorization init failed: {exc}")
return None
url = reg["verification_uri_complete"]
# Ensure qrcode library is available (auto-install if missing)
if not _ensure_qrcode_installed():
print_warning(" qrcode library install failed, will show link only.")
print()
print_info(" Please scan the QR code below with DingTalk to authorize:")
print()
if not render_qr_to_terminal(url):
print_warning(f" QR code render failed, please open the link below to authorize:")
print()
print_info(f" Or open this link manually: {url}")
print()
print_info(" Waiting for QR scan authorization... (timeout: 2 hours)")
dot_count = 0
def _on_waiting():
nonlocal dot_count
dot_count += 1
if dot_count % 10 == 0:
sys.stdout.write(".")
sys.stdout.flush()
try:
client_id, client_secret = wait_for_registration_success(
device_code=reg["device_code"],
interval=reg["interval"],
expires_in=reg["expires_in"],
on_waiting=_on_waiting,
)
except RegistrationError as exc:
print()
print_error(f" Authorization failed: {exc}")
return None
print()
print_success(" QR scan authorization successful!")
print_success(f" Client ID: {client_id}")
print_success(f" Client Secret: {client_secret[:8]}{'*' * (len(client_secret) - 8)}")
return client_id, client_secret
+3 -129
View File
@@ -8,7 +8,6 @@ import os
import sys
import subprocess
import shutil
from pathlib import Path
from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
from hermes_constants import display_hermes_home
@@ -373,11 +372,7 @@ def run_doctor(args):
print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
try:
from hermes_cli.auth import (
get_nous_auth_status,
get_codex_auth_status,
get_gemini_oauth_auth_status,
)
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
nous_status = get_nous_auth_status()
if nous_status.get("logged_in"):
@@ -392,20 +387,6 @@ def run_doctor(args):
check_warn("OpenAI Codex auth", "(not logged in)")
if codex_status.get("error"):
check_info(codex_status["error"])
gemini_status = get_gemini_oauth_auth_status()
if gemini_status.get("logged_in"):
email = gemini_status.get("email") or ""
project = gemini_status.get("project_id") or ""
pieces = []
if email:
pieces.append(email)
if project:
pieces.append(f"project={project}")
suffix = f" ({', '.join(pieces)})" if pieces else ""
check_ok("Google Gemini OAuth", f"(logged in{suffix})")
else:
check_warn("Google Gemini OAuth", "(not logged in)")
except Exception as e:
check_warn("Auth provider status", f"(could not check: {e})")
@@ -532,87 +513,7 @@ def run_doctor(args):
pass
_check_gateway_service_linger(issues)
# =========================================================================
# Check: Command installation (hermes bin symlink)
# =========================================================================
if sys.platform != "win32":
print()
print(color("◆ Command Installation", Colors.CYAN, Colors.BOLD))
# Determine the venv entry point location
_venv_bin = None
for _venv_name in ("venv", ".venv"):
_candidate = PROJECT_ROOT / _venv_name / "bin" / "hermes"
if _candidate.exists():
_venv_bin = _candidate
break
# Determine the expected command link directory (mirrors install.sh logic)
_prefix = os.environ.get("PREFIX", "")
_is_termux_env = bool(os.environ.get("TERMUX_VERSION")) or "com.termux/files/usr" in _prefix
if _is_termux_env and _prefix:
_cmd_link_dir = Path(_prefix) / "bin"
_cmd_link_display = "$PREFIX/bin"
else:
_cmd_link_dir = Path.home() / ".local" / "bin"
_cmd_link_display = "~/.local/bin"
_cmd_link = _cmd_link_dir / "hermes"
if _venv_bin is None:
check_warn(
"Venv entry point not found",
"(hermes not in venv/bin/ or .venv/bin/ — reinstall with pip install -e '.[all]')"
)
manual_issues.append(
f"Reinstall entry point: cd {PROJECT_ROOT} && source venv/bin/activate && pip install -e '.[all]'"
)
else:
check_ok(f"Venv entry point exists ({_venv_bin.relative_to(PROJECT_ROOT)})")
# Check the symlink at the command link location
if _cmd_link.is_symlink():
_target = _cmd_link.resolve()
_expected = _venv_bin.resolve()
if _target == _expected:
check_ok(f"{_cmd_link_display}/hermes → correct target")
else:
check_warn(
f"{_cmd_link_display}/hermes points to wrong target",
f"(→ {_target}, expected → {_expected})"
)
if should_fix:
_cmd_link.unlink()
_cmd_link.symlink_to(_venv_bin)
check_ok(f"Fixed symlink: {_cmd_link_display}/hermes → {_venv_bin}")
fixed_count += 1
else:
issues.append(f"Broken symlink at {_cmd_link_display}/hermes — run 'hermes doctor --fix'")
elif _cmd_link.exists():
# It's a regular file, not a symlink — possibly a wrapper script
check_ok(f"{_cmd_link_display}/hermes exists (non-symlink)")
else:
check_fail(
f"{_cmd_link_display}/hermes not found",
"(hermes command may not work outside the venv)"
)
if should_fix:
_cmd_link_dir.mkdir(parents=True, exist_ok=True)
_cmd_link.symlink_to(_venv_bin)
check_ok(f"Created symlink: {_cmd_link_display}/hermes → {_venv_bin}")
fixed_count += 1
# Check if the link dir is on PATH
_path_dirs = os.environ.get("PATH", "").split(os.pathsep)
if str(_cmd_link_dir) not in _path_dirs:
check_warn(
f"{_cmd_link_display} is not on your PATH",
"(add it to your shell config: export PATH=\"$HOME/.local/bin:$PATH\")"
)
manual_issues.append(f"Add {_cmd_link_display} to your PATH")
else:
issues.append(f"Missing {_cmd_link_display}/hermes symlink — run 'hermes doctor --fix'")
# =========================================================================
# Check: External tools
# =========================================================================
@@ -825,7 +726,6 @@ def run_doctor(args):
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
@@ -833,8 +733,7 @@ def run_doctor(args):
("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
# OpenCode Go has no shared /models endpoint; skip the health check.
("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
("OpenCode Go", ("OPENCODE_GO_API_KEY",), "https://opencode.ai/zen/go/v1/models", "OPENCODE_GO_BASE_URL", True),
]
for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
_key = ""
@@ -879,31 +778,6 @@ def run_doctor(args):
except Exception as _e:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ")
# -- AWS Bedrock --
# Bedrock uses the AWS SDK credential chain, not API keys.
try:
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
if has_aws_credentials():
_auth_var = resolve_aws_auth_env_var()
_region = resolve_bedrock_region()
_label = "AWS Bedrock".ljust(20)
print(f" Checking AWS Bedrock...", end="", flush=True)
try:
import boto3
_br_client = boto3.client("bedrock", region_name=_region)
_br_resp = _br_client.list_foundation_models()
_model_count = len(_br_resp.get("modelSummaries", []))
print(f"\r {color('', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ")
except ImportError:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)} ")
issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3")
except Exception as _e:
_err_name = type(_e).__name__
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ")
issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels")
except ImportError:
pass # bedrock_adapter not available — skip silently
# =========================================================================
# Check: Submodules
# =========================================================================
+35 -15
View File
@@ -43,20 +43,41 @@ def _redact(value: str) -> str:
def _gateway_status() -> str:
"""Return a short gateway status string."""
try:
from hermes_cli.gateway import get_gateway_runtime_snapshot
snapshot = get_gateway_runtime_snapshot()
if snapshot.running:
mode = snapshot.manager
if snapshot.has_process_service_mismatch:
mode = "manual"
return f"running ({mode}, pid {snapshot.gateway_pids[0]})"
if snapshot.service_installed and not snapshot.service_running:
return f"stopped ({snapshot.manager})"
return f"stopped ({snapshot.manager})"
except Exception:
return "unknown" if sys.platform.startswith(("linux", "darwin")) else "N/A"
if sys.platform.startswith("linux"):
from hermes_constants import is_container
if is_container():
try:
from hermes_cli.gateway import find_gateway_pids
pids = find_gateway_pids()
if pids:
return f"running (docker, pid {pids[0]})"
return "stopped (docker)"
except Exception:
return "stopped (docker)"
try:
from hermes_cli.gateway import get_service_name
svc = get_service_name()
except Exception:
svc = "hermes-gateway"
try:
r = subprocess.run(
["systemctl", "--user", "is-active", svc],
capture_output=True, text=True, timeout=5,
)
return "running (systemd)" if r.stdout.strip() == "active" else "stopped"
except Exception:
return "unknown"
elif sys.platform == "darwin":
try:
from hermes_cli.gateway import get_launchd_label
r = subprocess.run(
["launchctl", "list", get_launchd_label()],
capture_output=True, text=True, timeout=5,
)
return "loaded (launchd)" if r.returncode == 0 else "not loaded"
except Exception:
return "unknown"
return "N/A"
def _count_skills(hermes_home: Path) -> int:
@@ -275,7 +296,6 @@ def run_dump(args):
("DEEPSEEK_API_KEY", "deepseek"),
("DASHSCOPE_API_KEY", "dashscope"),
("HF_TOKEN", "huggingface"),
("NVIDIA_API_KEY", "nvidia"),
("AI_GATEWAY_API_KEY", "ai_gateway"),
("OPENCODE_ZEN_API_KEY", "opencode_zen"),
("OPENCODE_GO_API_KEY", "opencode_go"),
-29
View File
@@ -8,40 +8,11 @@ from pathlib import Path
from dotenv import load_dotenv
# Env var name suffixes that indicate credential values. These are the
# only env vars whose values we sanitize on load — we must not silently
# alter arbitrary user env vars, but credentials are known to require
# pure ASCII (they become HTTP header values).
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
def _sanitize_loaded_credentials() -> None:
"""Strip non-ASCII characters from credential env vars in os.environ.
Called after dotenv loads so the rest of the codebase never sees
non-ASCII API keys. Only touches env vars whose names end with
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
"""
for key, value in list(os.environ.items()):
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
continue
try:
value.encode("ascii")
except UnicodeEncodeError:
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
try:
load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
except UnicodeDecodeError:
load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
# Strip non-ASCII characters from credential env vars that were just
# loaded. API keys must be pure ASCII since they're sent as HTTP
# header values (httpx encodes headers as ASCII). Non-ASCII chars
# typically come from copy-pasting keys from PDFs or rich-text editors
# that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
_sanitize_loaded_credentials()
def _sanitize_env_file_if_needed(path: Path) -> None:
+161 -802
View File
File diff suppressed because it is too large Load Diff
+696 -3052
View File
File diff suppressed because it is too large Load Diff
+5 -66
View File
@@ -279,8 +279,8 @@ def cmd_mcp_add(args):
_info(f"Starting OAuth flow for '{name}'...")
oauth_ok = False
try:
from tools.mcp_oauth_manager import get_manager
oauth_auth = get_manager().get_or_build_provider(name, url, None)
from tools.mcp_oauth import build_oauth_auth
oauth_auth = build_oauth_auth(name, url)
if oauth_auth:
server_config["auth"] = "oauth"
_success("OAuth configured (tokens will be acquired on first connection)")
@@ -428,12 +428,10 @@ def cmd_mcp_remove(args):
_remove_mcp_server(name)
_success(f"Removed '{name}' from config")
# Clean up OAuth tokens if they exist — route through MCPOAuthManager so
# any provider instance cached in the current process (e.g. from an
# earlier `hermes mcp test` in the same session) is evicted too.
# Clean up OAuth tokens if they exist
try:
from tools.mcp_oauth_manager import get_manager
get_manager().remove(name)
from tools.mcp_oauth import remove_oauth_tokens
remove_oauth_tokens(name)
_success("Cleaned up OAuth tokens")
except Exception:
pass
@@ -579,63 +577,6 @@ def _interpolate_value(value: str) -> str:
return re.sub(r"\$\{(\w+)\}", _replace, value)
# ─── hermes mcp login ────────────────────────────────────────────────────────
def cmd_mcp_login(args):
"""Force re-authentication for an OAuth-based MCP server.
Deletes cached tokens (both on disk and in the running process's
MCPOAuthManager cache) and triggers a fresh OAuth flow via the
existing probe path.
Use this when:
- Tokens are stuck in a bad state (server revoked, refresh token
consumed by an external process, etc.)
- You want to re-authenticate to change scopes or account
- A tool call returned ``needs_reauth: true``
"""
name = args.name
servers = _get_mcp_servers()
if name not in servers:
_error(f"Server '{name}' not found in config.")
if servers:
_info(f"Available servers: {', '.join(servers)}")
return
server_config = servers[name]
url = server_config.get("url")
if not url:
_error(f"Server '{name}' has no URL — not an OAuth-capable server")
return
if server_config.get("auth") != "oauth":
_error(f"Server '{name}' is not configured for OAuth (auth={server_config.get('auth')})")
_info("Use `hermes mcp remove` + `hermes mcp add` to reconfigure auth.")
return
# Wipe both disk and in-memory cache so the next probe forces a fresh
# OAuth flow.
try:
from tools.mcp_oauth_manager import get_manager
mgr = get_manager()
mgr.remove(name)
except Exception as exc:
_warning(f"Could not clear existing OAuth state: {exc}")
print()
_info(f"Starting OAuth flow for '{name}'...")
# Probe triggers the OAuth flow (browser redirect + callback capture).
try:
tools = _probe_single_server(name, server_config)
if tools:
_success(f"Authenticated — {len(tools)} tool(s) available")
else:
_success("Authenticated (server reported no tools)")
except Exception as exc:
_error(f"Authentication failed: {exc}")
# ─── hermes mcp configure ────────────────────────────────────────────────────
def cmd_mcp_configure(args):
@@ -755,7 +696,6 @@ def mcp_command(args):
"test": cmd_mcp_test,
"configure": cmd_mcp_configure,
"config": cmd_mcp_configure,
"login": cmd_mcp_login,
}
handler = handlers.get(action)
@@ -773,5 +713,4 @@ def mcp_command(args):
_info("hermes mcp list List servers")
_info("hermes mcp test <name> Test connection")
_info("hermes mcp configure <name> Toggle tools")
_info("hermes mcp login <name> Re-authenticate OAuth")
print()
+2 -4
View File
@@ -58,11 +58,9 @@ def _prompt(label: str, default: str | None = None, secret: bool = False) -> str
def _install_dependencies(provider_name: str) -> None:
"""Install pip dependencies declared in plugin.yaml."""
import subprocess
from plugins.memory import find_provider_dir
from pathlib import Path as _Path
plugin_dir = find_provider_dir(provider_name)
if not plugin_dir:
return
plugin_dir = _Path(__file__).parent.parent / "plugins" / "memory" / provider_name
yaml_path = plugin_dir / "plugin.yaml"
if not yaml_path.exists():
return
+1 -21
View File
@@ -96,7 +96,6 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
"qwen-oauth",
"xiaomi",
"arcee",
"ollama-cloud",
"custom",
})
@@ -374,26 +373,7 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
return bare
return _dots_to_hyphens(bare)
# --- Copilot / Copilot ACP: delegate to the Copilot-specific
# normalizer. It knows about the alias table (vendor-prefix
# stripping for Anthropic/OpenAI, dash-to-dot repair for Claude)
# and live-catalog lookups. Without this, vendor-prefixed or
# dash-notation Claude IDs survive to the Copilot API and hit
# HTTP 400 "model_not_supported". See issue #6879.
if provider in {"copilot", "copilot-acp"}:
try:
from hermes_cli.models import normalize_copilot_model_id
normalized = normalize_copilot_model_id(name)
if normalized:
return normalized
except Exception:
# Fall through to the generic strip-vendor behaviour below
# if the Copilot-specific path is unavailable for any reason.
pass
# --- Copilot / Copilot ACP / openai-codex fallback:
# strip matching provider prefix, keep dots ---
# --- Copilot: strip matching provider prefix, keep dots ---
if provider in _STRIP_VENDOR_ONLY_PROVIDERS:
stripped = _strip_matching_provider_prefix(name, provider)
if stripped == name and name.startswith("openai/"):
+17 -52
View File
@@ -274,11 +274,6 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
is_global = False
explicit_provider = ""
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
# A single Unicode dash before a flag keyword becomes "--"
import re as _re
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
# Extract --global
if "--global" in raw_args:
is_global = True
@@ -457,7 +452,6 @@ def switch_model(
ModelSwitchResult with all information the caller needs.
"""
from hermes_cli.models import (
copilot_model_api_mode,
detect_provider_for_model,
validate_requested_model,
opencode_model_api_mode,
@@ -692,12 +686,12 @@ def switch_model(
api_key=api_key,
base_url=base_url,
)
except Exception as e:
except Exception:
validation = {
"accepted": False,
"persist": False,
"accepted": True,
"persist": True,
"recognized": False,
"message": f"Could not validate `{new_model}`: {e}",
"message": None,
}
if not validation.get("accepted"):
@@ -715,34 +709,14 @@ def switch_model(
if validation.get("corrected_model"):
new_model = validation["corrected_model"]
# --- Copilot api_mode override ---
if target_provider in {"copilot", "github-copilot"}:
api_mode = copilot_model_api_mode(new_model, api_key=api_key)
# --- OpenCode api_mode override ---
if target_provider in {"opencode-zen", "opencode-go", "opencode"}:
if target_provider in {"opencode-zen", "opencode-go", "opencode", "opencode-go"}:
api_mode = opencode_model_api_mode(target_provider, new_model)
# --- Determine api_mode if not already set ---
if not api_mode:
api_mode = determine_api_mode(target_provider, base_url)
# OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
# Anthropic SDK prepends its own /v1/messages to the base_url. Strip the
# trailing /v1 so the SDK constructs the correct path (e.g.
# https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages).
# Mirrors the same logic in hermes_cli.runtime_provider.resolve_runtime_provider;
# without it, /model switches into an anthropic_messages-routed OpenCode
# model (e.g. `/model minimax-m2.7` on opencode-go, `/model claude-sonnet-4-6`
# on opencode-zen) hit a double /v1 and returned OpenCode's website 404 page.
if (
api_mode == "anthropic_messages"
and target_provider in {"opencode-zen", "opencode-go"}
and isinstance(base_url, str)
and base_url
):
base_url = re.sub(r"/v1/?$", "", base_url)
# --- Get capabilities (legacy) ---
capabilities = get_model_capabilities(target_provider, new_model)
@@ -812,8 +786,7 @@ def list_authenticated_providers(
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
results: List[dict] = []
seen_slugs: set = set() # lowercase-normalized to catch case variants (#9545)
seen_mdev_ids: set = set() # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)
seen_slugs: set = set()
data = fetch_models_dev()
@@ -823,18 +796,9 @@ def list_authenticated_providers(
# "nous" shares OpenRouter's curated list if not separately defined
if "nous" not in curated:
curated["nous"] = curated["openrouter"]
# Ollama Cloud uses dynamic discovery (no static curated list)
if "ollama-cloud" not in curated:
from hermes_cli.models import fetch_ollama_cloud_models
curated["ollama-cloud"] = fetch_ollama_cloud_models()
# --- 1. Check Hermes-mapped providers ---
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
# Skip aliases that map to the same models.dev provider (e.g.
# kimi-coding and kimi-coding-cn both → kimi-for-coding).
# The first one with valid credentials wins (#10526).
if mdev_id in seen_mdev_ids:
continue
pdata = data.get(mdev_id)
if not isinstance(pdata, dict):
continue
@@ -873,8 +837,7 @@ def list_authenticated_providers(
"total_models": total,
"source": "built-in",
})
seen_slugs.add(slug.lower())
seen_mdev_ids.add(mdev_id)
seen_slugs.add(slug)
# --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
from hermes_cli.providers import HERMES_OVERLAYS
@@ -886,12 +849,12 @@ def list_authenticated_providers(
_mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
for pid, overlay in HERMES_OVERLAYS.items():
if pid.lower() in seen_slugs:
if pid in seen_slugs:
continue
# Resolve Hermes slug — e.g. "github-copilot" → "copilot"
hermes_slug = _mdev_to_hermes.get(pid, pid)
if hermes_slug.lower() in seen_slugs:
if hermes_slug in seen_slugs:
continue
# Check if credentials exist
@@ -972,8 +935,8 @@ def list_authenticated_providers(
"total_models": total,
"source": "hermes",
})
seen_slugs.add(pid.lower())
seen_slugs.add(hermes_slug.lower())
seen_slugs.add(pid)
seen_slugs.add(hermes_slug)
# --- 2b. Cross-check canonical provider list ---
# Catches providers that are in CANONICAL_PROVIDERS but weren't found
@@ -985,7 +948,7 @@ def list_authenticated_providers(
_canon_provs = []
for _cp in _canon_provs:
if _cp.slug.lower() in seen_slugs:
if _cp.slug in seen_slugs:
continue
# Check credentials via PROVIDER_REGISTRY (auth.py)
@@ -1032,7 +995,7 @@ def list_authenticated_providers(
"total_models": _cp_total,
"source": "canonical",
})
seen_slugs.add(_cp.slug.lower())
seen_slugs.add(_cp.slug)
# --- 3. User-defined endpoints from config ---
if user_providers and isinstance(user_providers, dict):
@@ -1105,7 +1068,7 @@ def list_authenticated_providers(
groups[slug]["models"].append(default_model)
for slug, grp in groups.items():
if slug.lower() in seen_slugs:
if slug in seen_slugs:
continue
results.append({
"slug": slug,
@@ -1117,9 +1080,11 @@ def list_authenticated_providers(
"source": "user-config",
"api_url": grp["api_url"],
})
seen_slugs.add(slug.lower())
seen_slugs.add(slug)
# Sort: current provider first, then by model count descending
results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
return results
+48 -281
View File
@@ -11,9 +11,7 @@ import json
import os
import urllib.request
import urllib.error
import time
from difflib import get_close_matches
from pathlib import Path
from typing import Any, NamedTuple, Optional
COPILOT_BASE_URL = "https://api.githubcopilot.com"
@@ -26,9 +24,7 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
# Fallback OpenRouter snapshot used when the live catalog is unavailable.
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
("moonshotai/kimi-k2.5", "recommended"),
("anthropic/claude-opus-4.7", ""),
("anthropic/claude-opus-4.6", ""),
("anthropic/claude-opus-4.6", "recommended"),
("anthropic/claude-sonnet-4.6", ""),
("qwen/qwen3.6-plus", ""),
("anthropic/claude-sonnet-4.5", ""),
@@ -50,6 +46,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
("z-ai/glm-5.1", ""),
("z-ai/glm-5v-turbo", ""),
("z-ai/glm-5-turbo", ""),
("moonshotai/kimi-k2.5", ""),
("x-ai/grok-4.20", ""),
("nvidia/nemotron-3-super-120b-a12b", ""),
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
@@ -75,9 +72,7 @@ def _codex_curated_models() -> list[str]:
_PROVIDER_MODELS: dict[str, list[str]] = {
"nous": [
"moonshotai/kimi-k2.5",
"xiaomi/mimo-v2-pro",
"anthropic/claude-opus-4.7",
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-4.5",
@@ -97,6 +92,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"z-ai/glm-5.1",
"z-ai/glm-5v-turbo",
"z-ai/glm-5-turbo",
"moonshotai/kimi-k2.5",
"x-ai/grok-4.20-beta",
"nvidia/nemotron-3-super-120b-a12b",
"nvidia/nemotron-3-super-120b-a12b:free",
@@ -135,11 +131,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"gemini-2.5-flash-lite",
# Gemma open models (also served via AI Studio)
"gemma-4-31b-it",
],
"google-gemini-cli": [
"gemini-2.5-pro",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
"gemma-4-26b-it",
],
"zai": [
"glm-5.1",
@@ -151,26 +143,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"glm-4.5-flash",
],
"xai": [
"grok-4.20-reasoning",
"grok-4.20-0309-reasoning",
"grok-4.20-0309-non-reasoning",
"grok-4.20-multi-agent-0309",
"grok-4-1-fast-reasoning",
],
"nvidia": [
# NVIDIA flagship reasoning models
"nvidia/nemotron-3-super-120b-a12b",
"nvidia/nemotron-3-nano-30b-a3b",
"nvidia/llama-3.3-nemotron-super-49b-v1.5",
# Third-party agentic models hosted on build.nvidia.com
# (map to OpenRouter defaults — users get familiar picks on NIM)
"qwen/qwen3.5-397b-a17b",
"deepseek-ai/deepseek-v3.2",
"moonshotai/kimi-k2.5",
"minimaxai/minimax-m2.5",
"z-ai/glm5",
"openai/gpt-oss-120b",
"grok-4-1-fast-non-reasoning",
"grok-4-fast-reasoning",
"grok-4-fast-non-reasoning",
"grok-4-0709",
"grok-code-fast-1",
"grok-3",
"grok-3-mini",
],
"kimi-coding": [
"kimi-k2.5",
"kimi-for-coding",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
"kimi-k2-turbo-preview",
@@ -201,7 +188,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"MiniMax-M2",
],
"anthropic": [
"claude-opus-4-7",
"claude-opus-4-6",
"claude-sonnet-4-6",
"claude-opus-4-5-20251101",
@@ -225,7 +211,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"trinity-mini",
],
"opencode-zen": [
"kimi-k2.5",
"gpt-5.4-pro",
"gpt-5.4",
"gpt-5.3-codex",
@@ -257,15 +242,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"glm-5",
"glm-4.7",
"glm-4.6",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2",
"qwen3-coder",
"big-pickle",
],
"opencode-go": [
"kimi-k2.5",
"glm-5.1",
"glm-5",
"kimi-k2.5",
"mimo-v2-pro",
"mimo-v2-omni",
"minimax-m2.7",
@@ -298,42 +283,26 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
# to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
# or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
"alibaba": [
"kimi-k2.5",
"qwen3.5-plus",
"qwen3-coder-plus",
"qwen3-coder-next",
# Third-party models available on coding-intl
"glm-5",
"glm-4.7",
"kimi-k2.5",
"MiniMax-M2.5",
],
# Curated HF model list — only agentic models that map to OpenRouter defaults.
"huggingface": [
"moonshotai/Kimi-K2.5",
"Qwen/Qwen3.5-397B-A17B",
"Qwen/Qwen3.5-35B-A3B",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2.5",
"MiniMaxAI/MiniMax-M2.5",
"zai-org/GLM-5",
"XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking",
],
# AWS Bedrock — static fallback list used when dynamic discovery is
# unavailable (no boto3, no credentials, or API error). The agent
# prefers live discovery via ListFoundationModels + ListInferenceProfiles.
# Use inference profile IDs (us.*) since most models require them.
"bedrock": [
"us.anthropic.claude-sonnet-4-6",
"us.anthropic.claude-opus-4-6-v1",
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
"us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"us.amazon.nova-pro-v1:0",
"us.amazon.nova-lite-v1:0",
"us.amazon.nova-micro-v1:0",
"deepseek.v3.2",
"us.meta.llama4-maverick-17b-instruct-v1:0",
"us.meta.llama4-scout-17b-instruct-v1:0",
],
}
# ---------------------------------------------------------------------------
@@ -549,35 +518,30 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"),
ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"),
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"),
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
ProviderEntry("kimi-coding", "Kimi / Moonshot", "Kimi / Moonshot (Moonshot AI direct API)"),
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
]
# Derived dicts — used throughout the codebase
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
_PROVIDER_ALIASES = {
"glm": "zai",
"z-ai": "zai",
@@ -618,26 +582,14 @@ _PROVIDER_ALIASES = {
"qwen": "alibaba",
"alibaba-cloud": "alibaba",
"qwen-portal": "qwen-oauth",
"gemini-cli": "google-gemini-cli",
"gemini-oauth": "google-gemini-cli",
"hf": "huggingface",
"hugging-face": "huggingface",
"huggingface-hub": "huggingface",
"mimo": "xiaomi",
"xiaomi-mimo": "xiaomi",
"aws": "bedrock",
"aws-bedrock": "bedrock",
"amazon-bedrock": "bedrock",
"amazon": "bedrock",
"grok": "xai",
"x-ai": "xai",
"x.ai": "xai",
"nim": "nvidia",
"nvidia-nim": "nvidia",
"build-nvidia": "nvidia",
"nemotron": "nvidia",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"ollama_cloud": "ollama-cloud",
}
@@ -1074,7 +1026,7 @@ def detect_provider_for_model(
return (resolved_provider, default_models[0])
# Aggregators list other providers' models — never auto-switch TO them
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
_AGGREGATORS = {"nous", "openrouter"}
# If the model belongs to the current provider's catalog, don't suggest switching
current_models = _PROVIDER_MODELS.get(current_provider, [])
@@ -1091,8 +1043,7 @@ def detect_provider_for_model(
break
if direct_match:
# Check if we have credentials for this provider — env vars,
# credential pool, or auth store entries.
# Check if we have credentials for this provider
has_creds = False
try:
from hermes_cli.auth import PROVIDER_REGISTRY
@@ -1105,28 +1056,16 @@ def detect_provider_for_model(
break
except Exception:
pass
# Also check credential pool and auth store — covers OAuth,
# Claude Code tokens, and other non-env-var credentials (#10300).
if not has_creds:
try:
from agent.credential_pool import load_pool
pool = load_pool(direct_match)
if pool.has_credentials():
has_creds = True
except Exception:
pass
if not has_creds:
try:
from hermes_cli.auth import _load_auth_store
store = _load_auth_store()
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
has_creds = True
except Exception:
pass
# Always return the direct provider match. If credentials are
# missing, the client init will give a clear error rather than
# silently routing through the wrong provider (#10300).
if has_creds:
return (direct_match, name)
# No direct creds — try to find this model on OpenRouter instead
or_slug = _find_openrouter_slug(name)
if or_slug:
return ("openrouter", or_slug)
# Still return the direct provider — credential resolution will
# give a clear error rather than silently using the wrong provider
return (direct_match, name)
# --- Step 2: check OpenRouter catalog ---
@@ -1316,10 +1255,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
live = _fetch_ai_gateway_models()
if live:
return live
if normalized == "ollama-cloud":
live = fetch_ollama_cloud_models(force_refresh=force_refresh)
if live:
return live
if normalized == "custom":
base_url = _get_custom_base_url()
if base_url:
@@ -1506,19 +1441,6 @@ _COPILOT_MODEL_ALIASES = {
"anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
"anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
"anthropic/claude-haiku-4.5": "claude-haiku-4.5",
# Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use
# hyphens (anthropic native format), but Copilot's API only accepts
# dot-notation. Accept both so users who configure copilot + a
# default hyphenated Claude model don't hit HTTP 400
# "model_not_supported". See issue #6879.
"claude-opus-4-6": "claude-opus-4.6",
"claude-sonnet-4-6": "claude-sonnet-4.6",
"claude-sonnet-4-5": "claude-sonnet-4.5",
"claude-haiku-4-5": "claude-haiku-4.5",
"anthropic/claude-opus-4-6": "claude-opus-4.6",
"anthropic/claude-sonnet-4-6": "claude-sonnet-4.6",
"anthropic/claude-sonnet-4-5": "claude-sonnet-4.5",
"anthropic/claude-haiku-4-5": "claude-haiku-4.5",
}
@@ -1617,11 +1539,6 @@ def copilot_model_api_mode(
primary signal. Falls back to the catalog's ``supported_endpoints``
only for models not covered by the pattern check.
"""
# Fetch the catalog once so normalize + endpoint check share it
# (avoids two redundant network calls for non-GPT-5 models).
if catalog is None and api_key:
catalog = fetch_github_model_catalog(api_key=api_key)
normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
if not normalized:
return "chat_completions"
@@ -1631,6 +1548,9 @@ def copilot_model_api_mode(
return "codex_responses"
# Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
if catalog is None and api_key:
catalog = fetch_github_model_catalog(api_key=api_key)
if catalog:
catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
if isinstance(catalog_entry, dict):
@@ -1845,125 +1765,6 @@ def fetch_api_models(
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
# ---------------------------------------------------------------------------
# Ollama Cloud — merged model discovery with disk cache
# ---------------------------------------------------------------------------
_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour
def _ollama_cloud_cache_path() -> Path:
"""Return the path for the Ollama Cloud model cache."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "ollama_cloud_models_cache.json"
def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]:
"""Load cached Ollama Cloud models from disk.
Args:
ignore_ttl: If True, return data even if the TTL has expired (stale fallback).
"""
try:
cache_path = _ollama_cloud_cache_path()
if not cache_path.exists():
return None
with open(cache_path, encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
return None
models = data.get("models")
if not (isinstance(models, list) and models):
return None
if not ignore_ttl:
cached_at = data.get("cached_at", 0)
if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL:
return None # stale
return data
except Exception:
pass
return None
def _save_ollama_cloud_cache(models: list[str]) -> None:
"""Persist the merged Ollama Cloud model list to disk."""
try:
from utils import atomic_json_write
cache_path = _ollama_cloud_cache_path()
cache_path.parent.mkdir(parents=True, exist_ok=True)
atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None)
except Exception:
pass
def fetch_ollama_cloud_models(
api_key: Optional[str] = None,
base_url: Optional[str] = None,
*,
force_refresh: bool = False,
) -> list[str]:
"""Fetch Ollama Cloud models by merging live API + models.dev, with disk cache.
Resolution order:
1. Disk cache (if fresh, < 1 hour, and not force_refresh)
2. Live ``/v1/models`` endpoint (primary freshest source)
3. models.dev registry (secondary fills gaps for unlisted models)
4. Merge: live models first, then models.dev additions (deduped)
Returns a list of model IDs (never None empty list on total failure).
"""
# 1. Check disk cache
if not force_refresh:
cached = _load_ollama_cloud_cache()
if cached is not None:
return cached["models"]
# 2. Live API probe
if not api_key:
api_key = os.getenv("OLLAMA_API_KEY", "")
if not base_url:
base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1"
live_models: list[str] = []
if api_key:
result = fetch_api_models(api_key, base_url, timeout=8.0)
if result:
live_models = result
# 3. models.dev registry
mdev_models: list[str] = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models("ollama-cloud")
except Exception:
pass
# 4. Merge: live first, then models.dev additions (deduped, order-preserving)
if live_models or mdev_models:
seen: set[str] = set()
merged: list[str] = []
for m in live_models:
if m and m not in seen:
seen.add(m)
merged.append(m)
for m in mdev_models:
if m and m not in seen:
seen.add(m)
merged.append(m)
if merged:
_save_ollama_cloud_cache(merged)
return merged
# Total failure — return stale cache if available (ignore TTL)
stale = _load_ollama_cloud_cache(ignore_ttl=True)
if stale is not None:
return stale["models"]
return []
def validate_requested_model(
model_name: str,
provider: Optional[str],
@@ -2050,8 +1851,8 @@ def validate_requested_model(
)
return {
"accepted": False,
"persist": False,
"accepted": True,
"persist": True,
"recognized": False,
"message": message,
}
@@ -2064,8 +1865,8 @@ def validate_requested_model(
message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
return {
"accepted": False,
"persist": False,
"accepted": True,
"persist": True,
"recognized": False,
"message": message,
}
@@ -2099,11 +1900,12 @@ def validate_requested_model(
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
return {
"accepted": False,
"persist": False,
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Model `{requested}` was not found in the OpenAI Codex model listing."
f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
f"It may still work if your account has access to it."
f"{suggestion_text}"
),
}
@@ -2142,58 +1944,23 @@ def validate_requested_model(
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
return {
"accepted": False,
"persist": False,
"recognized": False,
"message": (
f"Model `{requested}` was not found in this provider's model listing."
f"{suggestion_text}"
),
}
# api_models is None — couldn't reach API. Accept and persist,
# but warn so typos don't silently break things.
# Bedrock: use our own discovery instead of HTTP /models endpoint.
# Bedrock's bedrock-runtime URL doesn't support /models — it uses the
# AWS SDK control plane (ListFoundationModels + ListInferenceProfiles).
if normalized == "bedrock":
try:
from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region
region = resolve_bedrock_region()
discovered = discover_bedrock_models(region)
discovered_ids = {m["id"] for m in discovered}
if requested in discovered_ids:
return {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
# Not in discovered list — still accept (user may have custom
# inference profiles or cross-account access), but warn.
suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in Bedrock model discovery for {region}. "
f"It may still work with custom inference profiles or cross-account access."
f"Note: `{requested}` was not found in this provider's model listing. "
f"It may still work if your plan supports it."
f"{suggestion_text}"
),
}
except Exception:
pass # Fall through to generic warning
# api_models is None — couldn't reach API. Accept and persist,
# but warn so typos don't silently break things.
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
return {
"accepted": False,
"persist": False,
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Could not reach the {provider_label} API to validate `{requested}`. "
+30 -277
View File
@@ -143,7 +143,6 @@ def _tts_label(current_provider: str) -> str:
"openai": "OpenAI TTS",
"elevenlabs": "ElevenLabs",
"edge": "Edge TTS",
"xai": "xAI TTS",
"mistral": "Mistral Voxtral TTS",
"neutts": "NeuTTS",
}
@@ -258,15 +257,6 @@ def get_nous_subscription_features(
terminal_cfg.get("modal_mode")
)
# use_gateway flags — when True, the user explicitly opted into the
# Tool Gateway via `hermes model`, so direct credentials should NOT
# prevent gateway routing.
web_use_gateway = bool(web_cfg.get("use_gateway"))
tts_use_gateway = bool(tts_cfg.get("use_gateway"))
browser_use_gateway = bool(browser_cfg.get("use_gateway"))
image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
image_use_gateway = bool(image_gen_cfg.get("use_gateway"))
direct_exa = bool(get_env_value("EXA_API_KEY"))
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
@@ -279,21 +269,6 @@ def get_nous_subscription_features(
direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
direct_modal = has_direct_modal_credentials()
# When use_gateway is set, suppress direct credentials for managed detection
if web_use_gateway:
direct_firecrawl = False
direct_exa = False
direct_parallel = False
direct_tavily = False
if image_use_gateway:
direct_fal = False
if tts_use_gateway:
direct_openai_tts = False
direct_elevenlabs = False
if browser_use_gateway:
direct_browser_use = False
direct_browserbase = False
managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
@@ -464,7 +439,37 @@ def get_nous_subscription_features(
)
def get_nous_subscription_explainer_lines() -> list[str]:
if not managed_nous_tools_enabled():
return []
return [
"Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.",
"Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.",
"Change these later with: hermes setup tools, hermes setup terminal, or hermes status.",
]
def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]:
"""Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`."""
if not managed_nous_tools_enabled():
return set()
features = get_nous_subscription_features(config)
if not features.provider_is_nous:
return set()
tts_cfg = config.get("tts")
if not isinstance(tts_cfg, dict):
tts_cfg = {}
config["tts"] = tts_cfg
current_tts = str(tts_cfg.get("provider") or "edge").strip().lower()
if current_tts not in {"", "edge"}:
return set()
tts_cfg["provider"] = "openai"
return {"tts"}
def apply_nous_managed_defaults(
@@ -524,255 +529,3 @@ def apply_nous_managed_defaults(
changed.add("image_gen")
return changed
# ---------------------------------------------------------------------------
# Tool Gateway offer — single Y/n prompt after model selection
# ---------------------------------------------------------------------------
_GATEWAY_TOOL_LABELS = {
"web": "Web search & extract (Firecrawl)",
"image_gen": "Image generation (FAL)",
"tts": "Text-to-speech (OpenAI TTS)",
"browser": "Browser automation (Browser Use)",
}
def _get_gateway_direct_credentials() -> Dict[str, bool]:
"""Return a dict of tool_key -> has_direct_credentials."""
return {
"web": bool(
get_env_value("FIRECRAWL_API_KEY")
or get_env_value("FIRECRAWL_API_URL")
or get_env_value("PARALLEL_API_KEY")
or get_env_value("TAVILY_API_KEY")
or get_env_value("EXA_API_KEY")
),
"image_gen": bool(get_env_value("FAL_KEY")),
"tts": bool(
resolve_openai_audio_api_key()
or get_env_value("ELEVENLABS_API_KEY")
),
"browser": bool(
get_env_value("BROWSER_USE_API_KEY")
or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
),
}
_GATEWAY_DIRECT_LABELS = {
"web": "Firecrawl/Exa/Parallel/Tavily key",
"image_gen": "FAL key",
"tts": "OpenAI/ElevenLabs key",
"browser": "Browser Use/Browserbase key",
}
_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser")
def get_gateway_eligible_tools(
config: Optional[Dict[str, object]] = None,
) -> tuple[list[str], list[str], list[str]]:
"""Return (unconfigured, has_direct, already_managed) tool key lists.
- unconfigured: tools with no direct credentials (easy switch)
- has_direct: tools where the user has their own API keys
- already_managed: tools already routed through the gateway
All lists are empty when the user is not a paid Nous subscriber or
is not using Nous as their provider.
"""
if not managed_nous_tools_enabled():
return [], [], []
if config is None:
from hermes_cli.config import load_config
config = load_config() or {}
# Quick provider check without the heavy get_nous_subscription_features call
model_cfg = config.get("model")
if not isinstance(model_cfg, dict) or str(model_cfg.get("provider") or "").strip().lower() != "nous":
return [], [], []
direct = _get_gateway_direct_credentials()
# Check which tools the user has explicitly opted into the gateway for.
# This is distinct from managed_by_nous which fires implicitly when
# no direct keys exist — we only skip the prompt for tools where
# use_gateway was explicitly set.
opted_in = {
"web": bool((config.get("web") if isinstance(config.get("web"), dict) else {}).get("use_gateway")),
"image_gen": bool((config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}).get("use_gateway")),
"tts": bool((config.get("tts") if isinstance(config.get("tts"), dict) else {}).get("use_gateway")),
"browser": bool((config.get("browser") if isinstance(config.get("browser"), dict) else {}).get("use_gateway")),
}
unconfigured: list[str] = []
has_direct: list[str] = []
already_managed: list[str] = []
for key in _ALL_GATEWAY_KEYS:
if opted_in.get(key):
already_managed.append(key)
elif direct.get(key):
has_direct.append(key)
else:
unconfigured.append(key)
return unconfigured, has_direct, already_managed
def apply_gateway_defaults(
config: Dict[str, object],
tool_keys: list[str],
) -> set[str]:
"""Apply Tool Gateway config for the given tool keys.
Sets ``use_gateway: true`` in each tool's config section so the
runtime prefers the gateway even when direct API keys are present.
Returns the set of tools that were actually changed.
"""
changed: set[str] = set()
web_cfg = config.get("web")
if not isinstance(web_cfg, dict):
web_cfg = {}
config["web"] = web_cfg
tts_cfg = config.get("tts")
if not isinstance(tts_cfg, dict):
tts_cfg = {}
config["tts"] = tts_cfg
browser_cfg = config.get("browser")
if not isinstance(browser_cfg, dict):
browser_cfg = {}
config["browser"] = browser_cfg
if "web" in tool_keys:
web_cfg["backend"] = "firecrawl"
web_cfg["use_gateway"] = True
changed.add("web")
if "tts" in tool_keys:
tts_cfg["provider"] = "openai"
tts_cfg["use_gateway"] = True
changed.add("tts")
if "browser" in tool_keys:
browser_cfg["cloud_provider"] = "browser-use"
browser_cfg["use_gateway"] = True
changed.add("browser")
if "image_gen" in tool_keys:
image_cfg = config.get("image_gen")
if not isinstance(image_cfg, dict):
image_cfg = {}
config["image_gen"] = image_cfg
image_cfg["use_gateway"] = True
changed.add("image_gen")
return changed
def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
"""If eligible tools exist, prompt the user to enable the Tool Gateway.
Uses prompt_choice() with a description parameter so the curses TUI
shows the tool context alongside the choices.
Returns the set of tools that were enabled, or empty set if the user
declined or no tools were eligible.
"""
unconfigured, has_direct, already_managed = get_gateway_eligible_tools(config)
if not unconfigured and not has_direct:
return set()
try:
from hermes_cli.setup import prompt_choice
except Exception:
return set()
# Build description lines showing full status of all gateway tools
desc_parts: list[str] = [
"",
" The Tool Gateway gives you access to web search, image generation,",
" text-to-speech, and browser automation through your Nous subscription.",
" No need to sign up for separate API keys — just pick the tools you want.",
"",
]
if already_managed:
for k in already_managed:
desc_parts.append(f"{_GATEWAY_TOOL_LABELS[k]} — using Tool Gateway")
if unconfigured:
for k in unconfigured:
desc_parts.append(f"{_GATEWAY_TOOL_LABELS[k]} — not configured")
if has_direct:
for k in has_direct:
desc_parts.append(f"{_GATEWAY_TOOL_LABELS[k]} — using {_GATEWAY_DIRECT_LABELS[k]}")
# Build short choice labels — detail is in the description above
choices: list[str] = []
choice_keys: list[str] = [] # maps choice index -> action
if unconfigured and has_direct:
choices.append("Enable for all tools (existing keys kept, not used)")
choice_keys.append("all")
choices.append("Enable only for tools without existing keys")
choice_keys.append("unconfigured")
choices.append("Skip")
choice_keys.append("skip")
elif unconfigured:
choices.append("Enable Tool Gateway")
choice_keys.append("unconfigured")
choices.append("Skip")
choice_keys.append("skip")
else:
choices.append("Enable Tool Gateway (existing keys kept, not used)")
choice_keys.append("all")
choices.append("Skip")
choice_keys.append("skip")
description = "\n".join(desc_parts) if desc_parts else None
# Default to "Enable" when user has no direct keys (new user),
# default to "Skip" when they have existing keys to preserve.
default_idx = 0 if not has_direct else len(choices) - 1
try:
idx = prompt_choice(
"Your Nous subscription includes the Tool Gateway.",
choices,
default_idx,
description=description,
)
except (KeyboardInterrupt, EOFError, OSError, SystemExit):
return set()
action = choice_keys[idx]
if action == "skip":
return set()
if action == "all":
# Apply to switchable tools + ensure already-managed tools also
# have use_gateway persisted in config for consistency.
to_apply = list(_ALL_GATEWAY_KEYS)
else:
to_apply = unconfigured
changed = apply_gateway_defaults(config, to_apply)
if changed:
from hermes_cli.config import save_config
save_config(config)
# Only report the tools that actually switched (not already-managed ones)
newly_switched = changed - set(already_managed)
for key in sorted(newly_switched):
label = _GATEWAY_TOOL_LABELS.get(key, key)
print(f"{label}: enabled via Nous subscription")
if already_managed and not newly_switched:
print(" (all tools already using Tool Gateway)")
return changed
-99
View File
@@ -112,7 +112,6 @@ class LoadedPlugin:
module: Optional[types.ModuleType] = None
tools_registered: List[str] = field(default_factory=list)
hooks_registered: List[str] = field(default_factory=list)
commands_registered: List[str] = field(default_factory=list)
enabled: bool = False
error: Optional[str] = None
@@ -212,84 +211,6 @@ class PluginContext:
}
logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name)
# -- slash command registration -------------------------------------------
def register_command(
self,
name: str,
handler: Callable,
description: str = "",
) -> None:
"""Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.
The handler signature is ``fn(raw_args: str) -> str | None``.
It may also be an async callable the gateway dispatch handles both.
Unlike ``register_cli_command()`` (which creates ``hermes <subcommand>``
terminal commands), this registers in-session slash commands that users
invoke during a conversation.
Names conflicting with built-in commands are rejected with a warning.
"""
clean = name.lower().strip().lstrip("/").replace(" ", "-")
if not clean:
logger.warning(
"Plugin '%s' tried to register a command with an empty name.",
self.manifest.name,
)
return
# Reject if it conflicts with a built-in command
try:
from hermes_cli.commands import resolve_command
if resolve_command(clean) is not None:
logger.warning(
"Plugin '%s' tried to register command '/%s' which conflicts "
"with a built-in command. Skipping.",
self.manifest.name, clean,
)
return
except Exception:
pass # If commands module isn't available, skip the check
self._manager._plugin_commands[clean] = {
"handler": handler,
"description": description or "Plugin command",
"plugin": self.manifest.name,
}
logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)
# -- tool dispatch -------------------------------------------------------
def dispatch_tool(self, tool_name: str, args: dict, **kwargs) -> str:
"""Dispatch a tool call through the registry, with parent agent context.
This is the public interface for plugin slash commands that need to call
tools like ``delegate_task`` without reaching into framework internals.
The parent agent (if available) is resolved automatically plugins never
need to access the agent directly.
Args:
tool_name: Registry name of the tool (e.g. ``"delegate_task"``).
args: Tool arguments dict (same as what the model would pass).
**kwargs: Extra keyword args forwarded to the registry dispatch.
Returns:
JSON string from the tool handler (same format as model tool calls).
"""
from tools.registry import registry
# Wire up parent agent context when available (CLI mode).
# In gateway mode _cli_ref is None — tools degrade gracefully
# (workspace hints fall back to TERMINAL_CWD, no spinner).
if "parent_agent" not in kwargs:
cli = self._manager._cli_ref
agent = getattr(cli, "agent", None) if cli else None
if agent is not None:
kwargs["parent_agent"] = agent
return registry.dispatch(tool_name, args, **kwargs)
# -- context engine registration -----------------------------------------
def register_context_engine(self, engine) -> None:
@@ -402,7 +323,6 @@ class PluginManager:
self._plugin_tool_names: Set[str] = set()
self._cli_commands: Dict[str, dict] = {}
self._context_engine = None # Set by a plugin via register_context_engine()
self._plugin_commands: Dict[str, dict] = {} # Slash commands registered by plugins
self._discovered: bool = False
self._cli_ref = None # Set by CLI after plugin discovery
# Plugin skill registry: qualified name → metadata dict.
@@ -565,10 +485,6 @@ class PluginManager:
for h in p.hooks_registered
}
)
loaded.commands_registered = [
c for c in self._plugin_commands
if self._plugin_commands[c].get("plugin") == manifest.name
]
loaded.enabled = True
except Exception as exc:
@@ -682,7 +598,6 @@ class PluginManager:
"enabled": loaded.enabled,
"tools": len(loaded.tools_registered),
"hooks": len(loaded.hooks_registered),
"commands": len(loaded.commands_registered),
"error": loaded.error,
}
)
@@ -784,20 +699,6 @@ def get_plugin_context_engine():
return get_plugin_manager()._context_engine
def get_plugin_command_handler(name: str) -> Optional[Callable]:
"""Return the handler for a plugin-registered slash command, or ``None``."""
entry = get_plugin_manager()._plugin_commands.get(name)
return entry["handler"] if entry else None
def get_plugin_commands() -> Dict[str, dict]:
"""Return the full plugin commands dict (name → {handler, description, plugin}).
Safe to call before discovery returns an empty dict if no plugins loaded.
"""
return get_plugin_manager()._plugin_commands
def get_plugin_toolsets() -> List[tuple]:
"""Return plugin toolsets as ``(key, label, description)`` tuples.
+12 -3
View File
@@ -300,10 +300,19 @@ def _read_config_model(profile_dir: Path) -> tuple:
def _check_gateway_running(profile_dir: Path) -> bool:
"""Check if a gateway is running for a given profile directory."""
pid_file = profile_dir / "gateway.pid"
if not pid_file.exists():
return False
try:
from gateway.status import get_running_pid
return get_running_pid(profile_dir / "gateway.pid", cleanup_stale=False) is not None
except Exception:
raw = pid_file.read_text().strip()
if not raw:
return False
data = json.loads(raw) if raw.startswith("{") else {"pid": int(raw)}
pid = int(data["pid"])
os.kill(pid, 0) # existence check
return True
except (json.JSONDecodeError, KeyError, ValueError, TypeError,
ProcessLookupError, PermissionError, OSError):
return False
+2 -43
View File
@@ -64,11 +64,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://portal.qwen.ai/v1",
base_url_env_var="HERMES_QWEN_BASE_URL",
),
"google-gemini-cli": HermesOverlay(
transport="openai_chat",
auth_type="oauth_external",
base_url_override="cloudcode-pa://google",
),
"copilot-acp": HermesOverlay(
transport="codex_responses",
auth_type="external_process",
@@ -133,15 +128,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_env_var="HF_BASE_URL",
),
"xai": HermesOverlay(
transport="codex_responses",
transport="openai_chat",
base_url_override="https://api.x.ai/v1",
base_url_env_var="XAI_BASE_URL",
),
"nvidia": HermesOverlay(
transport="openai_chat",
base_url_override="https://integrate.api.nvidia.com/v1",
base_url_env_var="NVIDIA_BASE_URL",
),
"xiaomi": HermesOverlay(
transport="openai_chat",
base_url_env_var="XIAOMI_BASE_URL",
@@ -151,10 +141,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://api.arcee.ai/api/v1",
base_url_env_var="ARCEE_BASE_URL",
),
"ollama-cloud": HermesOverlay(
transport="openai_chat",
base_url_env_var="OLLAMA_BASE_URL",
),
}
@@ -194,13 +180,6 @@ ALIASES: Dict[str, str] = {
# xai
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
# nvidia
"nim": "nvidia",
"nvidia-nim": "nvidia",
"build-nvidia": "nvidia",
"nemotron": "nvidia",
# kimi-for-coding (models.dev ID)
"kimi": "kimi-for-coding",
@@ -248,11 +227,6 @@ ALIASES: Dict[str, str] = {
"qwen": "alibaba",
"alibaba-cloud": "alibaba",
# google-gemini-cli (OAuth + Code Assist)
"gemini-cli": "google-gemini-cli",
"gemini-oauth": "google-gemini-cli",
# huggingface
"hf": "huggingface",
"hugging-face": "huggingface",
@@ -262,12 +236,6 @@ ALIASES: Dict[str, str] = {
"mimo": "xiaomi",
"xiaomi-mimo": "xiaomi",
# bedrock
"aws": "bedrock",
"aws-bedrock": "bedrock",
"amazon-bedrock": "bedrock",
"amazon": "bedrock",
# arcee
"arcee-ai": "arcee",
"arceeai": "arcee",
@@ -276,7 +244,7 @@ ALIASES: Dict[str, str] = {
"lmstudio": "lmstudio",
"lm-studio": "lmstudio",
"lm_studio": "lmstudio",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"ollama": "ollama-cloud",
"vllm": "local",
"llamacpp": "local",
"llama.cpp": "local",
@@ -294,8 +262,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"copilot-acp": "GitHub Copilot ACP",
"xiaomi": "Xiaomi MiMo",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
"ollama-cloud": "Ollama Cloud",
}
@@ -305,7 +271,6 @@ TRANSPORT_TO_API_MODE: Dict[str, str] = {
"openai_chat": "chat_completions",
"anthropic_messages": "anthropic_messages",
"codex_responses": "codex_responses",
"bedrock_converse": "bedrock_converse",
}
@@ -423,10 +388,6 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
if pdef is not None:
return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
# Direct provider checks for providers not in HERMES_OVERLAYS
if provider == "bedrock":
return "bedrock_converse"
# URL-based heuristics for custom / unknown providers
if base_url:
url_lower = base_url.rstrip("/").lower()
@@ -434,8 +395,6 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
return "anthropic_messages"
if "api.openai.com" in url_lower:
return "codex_responses"
if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
return "bedrock_converse"
return "chat_completions"
+1 -105
View File
@@ -22,7 +22,6 @@ from hermes_cli.auth import (
resolve_nous_runtime_credentials,
resolve_codex_runtime_credentials,
resolve_qwen_runtime_credentials,
resolve_gemini_oauth_runtime_credentials,
resolve_api_key_provider_credentials,
resolve_external_process_provider_credentials,
has_usable_secret,
@@ -42,8 +41,6 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
tool calls with reasoning (chat/completions returns 400).
"""
normalized = (base_url or "").strip().lower().rstrip("/")
if "api.x.ai" in normalized:
return "codex_responses"
if "api.openai.com" in normalized and "openrouter" not in normalized:
return "codex_responses"
return None
@@ -127,7 +124,7 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
return "chat_completions"
_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}
_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages"}
def _parse_api_mode(raw: Any) -> Optional[str]:
@@ -157,9 +154,6 @@ def _resolve_runtime_from_pool_entry(
elif provider == "qwen-oauth":
api_mode = "chat_completions"
base_url = base_url or DEFAULT_QWEN_BASE_URL
elif provider == "google-gemini-cli":
api_mode = "chat_completions"
base_url = base_url or "cloudcode-pa://google"
elif provider == "anthropic":
api_mode = "anthropic_messages"
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
@@ -169,13 +163,10 @@ def _resolve_runtime_from_pool_entry(
base_url = cfg_base_url or base_url or "https://api.anthropic.com"
elif provider == "openrouter":
base_url = base_url or OPENROUTER_BASE_URL
elif provider == "xai":
api_mode = "codex_responses"
elif provider == "nous":
api_mode = "chat_completions"
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Honour model.base_url from config.yaml when the configured provider
@@ -636,8 +627,6 @@ def _resolve_explicit_runtime(
api_mode = "chat_completions"
if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
elif provider == "xai":
api_mode = "codex_responses"
else:
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode:
@@ -808,26 +797,6 @@ def resolve_runtime_provider(
logger.info("Qwen OAuth credentials failed; "
"falling through to next provider.")
if provider == "google-gemini-cli":
try:
creds = resolve_gemini_oauth_runtime_credentials()
return {
"provider": "google-gemini-cli",
"api_mode": "chat_completions",
"base_url": creds.get("base_url", ""),
"api_key": creds.get("api_key", ""),
"source": creds.get("source", "google-oauth"),
"expires_at_ms": creds.get("expires_at_ms"),
"email": creds.get("email", ""),
"project_id": creds.get("project_id", ""),
"requested_provider": requested_provider,
}
except AuthError:
if requested_provider != "auto":
raise
logger.info("Google Gemini OAuth credentials failed; "
"falling through to next provider.")
if provider == "copilot-acp":
creds = resolve_external_process_provider_credentials(provider)
return {
@@ -867,77 +836,6 @@ def resolve_runtime_provider(
"requested_provider": requested_provider,
}
# AWS Bedrock (native Converse API via boto3)
if provider == "bedrock":
from agent.bedrock_adapter import (
has_aws_credentials,
resolve_aws_auth_env_var,
resolve_bedrock_region,
is_anthropic_bedrock_model,
)
# When the user explicitly selected bedrock (not auto-detected),
# trust boto3's credential chain — it handles IMDS, ECS task roles,
# Lambda execution roles, SSO, and other implicit sources that our
# env-var check can't detect.
is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon")
if not is_explicit and not has_aws_credentials():
raise AuthError(
"No AWS credentials found for Bedrock. Configure one of:\n"
" - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n"
" - AWS_PROFILE (for SSO / named profiles)\n"
" - IAM instance role (EC2, ECS, Lambda)\n"
"Or run 'aws configure' to set up credentials.",
code="no_aws_credentials",
)
# Read bedrock-specific config from config.yaml
from hermes_cli.config import load_config as _load_bedrock_config
_bedrock_cfg = _load_bedrock_config().get("bedrock", {})
# Region priority: config.yaml bedrock.region → env var → us-east-1
region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
# Build guardrail config if configured
_gr = _bedrock_cfg.get("guardrail", {})
guardrail_config = None
if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
guardrail_config = {
"guardrailIdentifier": _gr["guardrail_identifier"],
"guardrailVersion": _gr["guardrail_version"],
}
if _gr.get("stream_processing_mode"):
guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
if _gr.get("trace"):
guardrail_config["trace"] = _gr["trace"]
# Dual-path routing: Claude models use AnthropicBedrock SDK for full
# feature parity (prompt caching, thinking budgets, adaptive thinking).
# Non-Claude models use the Converse API for multi-model support.
_current_model = str(model_cfg.get("default") or "").strip()
if is_anthropic_bedrock_model(_current_model):
# Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path
runtime = {
"provider": "bedrock",
"api_mode": "anthropic_messages",
"base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
"api_key": "aws-sdk",
"source": auth_source,
"region": region,
"bedrock_anthropic": True, # Signal to use AnthropicBedrock client
"requested_provider": requested_provider,
}
else:
# Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API
runtime = {
"provider": "bedrock",
"api_mode": "bedrock_converse",
"base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
"api_key": "aws-sdk",
"source": auth_source,
"region": region,
"requested_provider": requested_provider,
}
if guardrail_config:
runtime["guardrail_config"] = guardrail_config
return runtime
# API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig and pconfig.auth_type == "api_key":
@@ -954,8 +852,6 @@ def resolve_runtime_provider(
api_mode = "chat_completions"
if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
elif provider == "xai":
api_mode = "codex_responses"
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Only honor persisted api_mode when it belongs to the same provider family.
+75 -70
View File
@@ -20,7 +20,10 @@ import copy
from pathlib import Path
from typing import Optional, Dict, Any
from hermes_cli.nous_subscription import get_nous_subscription_features
from hermes_cli.nous_subscription import (
apply_nous_provider_defaults,
get_nous_subscription_features,
)
from tools.tool_backend_helpers import managed_nous_tools_enabled
from hermes_constants import get_optional_skills_dir
@@ -91,7 +94,7 @@ _DEFAULT_PROVIDER_MODELS = {
"gemini": [
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
"gemma-4-31b-it",
"gemma-4-31b-it", "gemma-4-26b-it",
],
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
@@ -102,7 +105,7 @@ _DEFAULT_PROVIDER_MODELS = {
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
"opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
"opencode-go": ["glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
"huggingface": [
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -210,20 +213,20 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
sys.exit(1)
def _curses_prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int:
"""Single-select menu using curses. Delegates to curses_radiolist."""
from hermes_cli.curses_ui import curses_radiolist
return curses_radiolist(question, choices, selected=default, cancel_returns=-1, description=description)
return curses_radiolist(question, choices, selected=default, cancel_returns=-1)
def prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
def prompt_choice(question: str, choices: list, default: int = 0) -> int:
"""Prompt for a choice from a list with arrow key navigation.
Escape keeps the current default (skips the question).
Ctrl+C exits the wizard.
"""
idx = _curses_prompt_choice(question, choices, default, description=description)
idx = _curses_prompt_choice(question, choices, default)
if idx >= 0:
if idx == default:
print_info(" Skipped (keeping current)")
@@ -430,8 +433,6 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (MiniMax)", True, None))
elif tts_provider == "mistral" and get_env_value("MISTRAL_API_KEY"):
tool_status.append(("Text-to-Speech (Mistral Voxtral)", True, None))
elif tts_provider == "gemini" and (get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY")):
tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
elif tts_provider == "neutts":
try:
import importlib.util
@@ -834,7 +835,14 @@ def setup_model_provider(config: dict, *, quick: bool = False):
print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")
# Tool Gateway prompt is already shown by _model_flow_nous() above.
if selected_provider == "nous" and nous_subscription_selected:
changed_defaults = apply_nous_provider_defaults(config)
current_tts = str(config.get("tts", {}).get("provider") or "edge")
if "tts" in changed_defaults:
print_success("TTS provider set to: OpenAI TTS via your Nous subscription")
else:
print_info(f"Keeping your existing TTS provider: {current_tts}")
save_config(config)
if not quick and selected_provider != "nous":
@@ -912,10 +920,8 @@ def _setup_tts_provider(config: dict):
"edge": "Edge TTS",
"elevenlabs": "ElevenLabs",
"openai": "OpenAI TTS",
"xai": "xAI TTS",
"minimax": "MiniMax TTS",
"mistral": "Mistral Voxtral TTS",
"gemini": "Google Gemini TTS",
"neutts": "NeuTTS",
}
current_label = provider_labels.get(current_provider, current_provider)
@@ -935,14 +941,12 @@ def _setup_tts_provider(config: dict):
"Edge TTS (free, cloud-based, no setup needed)",
"ElevenLabs (premium quality, needs API key)",
"OpenAI TTS (good quality, needs API key)",
"xAI TTS (Grok voices, needs API key)",
"MiniMax TTS (high quality with voice cloning, needs API key)",
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
"NeuTTS (local on-device, free, ~300MB model download)",
]
)
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"])
choices.append(f"Keep current ({current_label})")
keep_current_idx = len(choices) - 1
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -1008,23 +1012,6 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "xai":
existing = get_env_value("XAI_API_KEY")
if not existing:
print()
api_key = prompt("xAI API key for TTS", password=True)
if api_key:
save_env_value("XAI_API_KEY", api_key)
print_success("xAI TTS API key saved")
else:
from hermes_constants import display_hermes_home as _dhh
print_warning(
"No xAI API key provided for TTS. Configure XAI_API_KEY via "
f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
"Falling back to Edge TTS."
)
selected = "edge"
elif selected == "minimax":
existing = get_env_value("MINIMAX_API_KEY")
if not existing:
@@ -1049,19 +1036,6 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "gemini":
existing = get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY")
if not existing:
print()
print_info("Get a free API key at https://aistudio.google.com/app/apikey")
api_key = prompt("Gemini API key for TTS", password=True)
if api_key:
save_env_value("GEMINI_API_KEY", api_key)
print_success("Gemini TTS API key saved")
else:
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
# Save the selection
if "tts" not in config:
config["tts"] = {}
@@ -1637,19 +1611,9 @@ def _setup_telegram():
return
print_info("Create a bot via @BotFather on Telegram")
import re
while True:
token = prompt("Telegram bot token", password=True)
if not token:
return
if not re.match(r"^\d+:[A-Za-z0-9_-]{30,}$", token):
print_error(
"Invalid token format. Expected: <numeric_id>:<alphanumeric_hash> "
"(e.g., 123456789:ABCdefGHI-jklMNOpqrSTUvwxYZ)"
)
continue
break
token = prompt("Telegram bot token", password=True)
if not token:
return
save_env_value("TELEGRAM_BOT_TOKEN", token)
print_success("Telegram token saved")
@@ -2005,6 +1969,52 @@ def _setup_wecom_callback():
_gw_setup()
def _setup_qqbot():
"""Configure QQ Bot gateway."""
print_header("QQ Bot")
existing = get_env_value("QQ_APP_ID")
if existing:
print_info("QQ Bot: already configured")
if not prompt_yes_no("Reconfigure QQ Bot?", False):
return
print_info("Connects Hermes to QQ via the Official QQ Bot API (v2).")
print_info(" Requires a QQ Bot application at q.qq.com")
print_info(" Reference: https://bot.q.qq.com/wiki/develop/api-v2/")
print()
app_id = prompt("QQ Bot App ID")
if not app_id:
print_warning("App ID is required — skipping QQ Bot setup")
return
save_env_value("QQ_APP_ID", app_id.strip())
client_secret = prompt("QQ Bot App Secret", password=True)
if not client_secret:
print_warning("App Secret is required — skipping QQ Bot setup")
return
save_env_value("QQ_CLIENT_SECRET", client_secret)
print_success("QQ Bot credentials saved")
print()
print_info("🔒 Security: Restrict who can DM your bot")
print_info(" Use QQ user OpenIDs (found in event payloads)")
print()
allowed_users = prompt("Allowed user OpenIDs (comma-separated, leave empty for open access)")
if allowed_users:
save_env_value("QQ_ALLOWED_USERS", allowed_users.replace(" ", ""))
print_success("QQ Bot allowlist configured")
else:
print_info("⚠️ No allowlist set — anyone can DM the bot!")
print()
print_info("📬 Home Channel: OpenID for cron job delivery and notifications.")
home_channel = prompt("Home channel OpenID (leave empty to set later)")
if home_channel:
save_env_value("QQ_HOME_CHANNEL", home_channel)
print()
print_success("QQ Bot configured!")
def _setup_bluebubbles():
@@ -2073,9 +2083,12 @@ def _setup_bluebubbles():
def _setup_qqbot():
"""Configure QQ Bot (Official API v2) via gateway setup."""
from hermes_cli.gateway import _setup_qqbot as _gateway_setup_qqbot
_gateway_setup_qqbot()
"""Configure QQ Bot (Official API v2) via standard platform setup."""
from hermes_cli.gateway import _PLATFORMS
qq_platform = next((p for p in _PLATFORMS if p["key"] == "qqbot"), None)
if qq_platform:
from hermes_cli.gateway import _setup_standard_platform
_setup_standard_platform(qq_platform)
def _setup_webhooks():
@@ -2215,9 +2228,7 @@ def setup_gateway(config: dict):
missing_home.append("Slack")
if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"):
missing_home.append("BlueBubbles")
if get_env_value("QQ_APP_ID") and not (
get_env_value("QQBOT_HOME_CHANNEL") or get_env_value("QQ_HOME_CHANNEL")
):
if get_env_value("QQ_APP_ID") and not get_env_value("QQ_HOME_CHANNEL"):
missing_home.append("QQBot")
if missing_home:
@@ -2242,10 +2253,8 @@ def setup_gateway(config: dict):
_is_service_running,
supports_systemd_services,
has_conflicting_systemd_units,
has_legacy_hermes_units,
install_linux_gateway_from_setup,
print_systemd_scope_conflict_warning,
print_legacy_unit_warning,
systemd_start,
systemd_restart,
launchd_install,
@@ -2263,10 +2272,6 @@ def setup_gateway(config: dict):
print_systemd_scope_conflict_warning()
print()
if supports_systemd and has_legacy_hermes_units():
print_legacy_unit_warning()
print()
if service_running:
if prompt_yes_no(" Restart the gateway to pick up changes?", True):
try:
+1 -147
View File
@@ -515,90 +515,6 @@ def do_inspect(identifier: str, console: Optional[Console] = None) -> None:
c.print()
def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> dict:
"""Paginated hub browse for programmatic callers (e.g. TUI gateway).
Returns ``{"items": [...], "page": int, "total_pages": int, "total": int}``.
"""
from tools.skills_hub import GitHubAuth, create_source_router
page_size = max(1, min(page_size, 100))
_TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1}
_PER_SOURCE_LIMIT = {"official": 100, "skills-sh": 100, "well-known": 25, "github": 100, "clawhub": 50,
"claude-marketplace": 50, "lobehub": 50}
auth = GitHubAuth()
sources = create_source_router(auth)
all_results: list = []
for src in sources:
sid = src.source_id()
if source != "all" and sid != source and sid != "official":
continue
try:
limit = _PER_SOURCE_LIMIT.get(sid, 50)
all_results.extend(src.search("", limit=limit))
except Exception:
continue
if not all_results:
return {"items": [], "page": 1, "total_pages": 1, "total": 0}
seen: dict = {}
for r in all_results:
rank = _TRUST_RANK.get(r.trust_level, 0)
if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0):
seen[r.name] = r
deduped = list(seen.values())
deduped.sort(key=lambda r: (-_TRUST_RANK.get(r.trust_level, 0), r.source != "official", r.name.lower()))
total = len(deduped)
total_pages = max(1, (total + page_size - 1) // page_size)
page = max(1, min(page, total_pages))
start = (page - 1) * page_size
page_items = deduped[start : min(start + page_size, total)]
return {
"items": [{"name": r.name, "description": r.description, "source": r.source,
"trust": r.trust_level} for r in page_items],
"page": page,
"total_pages": total_pages,
"total": total,
}
def inspect_skill(identifier: str) -> Optional[dict]:
"""Skill metadata (+ SKILL.md preview) for programmatic callers."""
from tools.skills_hub import GitHubAuth, create_source_router
class _Q:
def print(self, *a, **k):
pass
c = _Q()
auth = GitHubAuth()
sources = create_source_router(auth)
ident = identifier
if "/" not in ident:
ident = _resolve_short_name(ident, sources, c)
if not ident:
return None
meta, bundle, _ = _resolve_source_meta_and_bundle(ident, sources)
if not meta:
return None
out: dict = {
"name": meta.name,
"description": meta.description,
"source": meta.source,
"identifier": meta.identifier,
"tags": list(meta.tags) if meta.tags else [],
}
if bundle and "SKILL.md" in bundle.files:
content = bundle.files["SKILL.md"]
if isinstance(content, bytes):
content = content.decode("utf-8", errors="replace")
lines = content.split("\n")
preview = "\n".join(lines[:50])
if len(lines) > 50:
preview += f"\n\n... ({len(lines) - 50} more lines)"
out["skill_md_preview"] = preview
return out
def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
"""List installed skills, distinguishing hub, builtin, and local skills."""
from tools.skills_hub import HubLockFile, ensure_hub_dirs
@@ -768,51 +684,6 @@ def do_uninstall(name: str, console: Optional[Console] = None,
c.print(f"[bold red]Error:[/] {msg}\n")
def do_reset(name: str, restore: bool = False,
console: Optional[Console] = None,
skip_confirm: bool = False,
invalidate_cache: bool = True) -> None:
"""Reset a bundled skill's manifest tracking (+ optionally restore from bundled)."""
from tools.skills_sync import reset_bundled_skill
c = console or _console
if not skip_confirm and restore:
c.print(f"\n[bold]Restore '{name}' from bundled source?[/]")
c.print("[dim]This will DELETE your current copy and re-copy the bundled version.[/]")
try:
answer = input("Confirm [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
answer = "n"
if answer not in ("y", "yes"):
c.print("[dim]Cancelled.[/]\n")
return
result = reset_bundled_skill(name, restore=restore)
if not result["ok"]:
c.print(f"[bold red]Error:[/] {result['message']}\n")
return
c.print(f"[bold green]{result['message']}[/]")
synced = result.get("synced") or {}
if synced.get("copied"):
c.print(f"[dim]Copied: {', '.join(synced['copied'])}[/]")
if synced.get("updated"):
c.print(f"[dim]Updated: {', '.join(synced['updated'])}[/]")
c.print()
if invalidate_cache:
try:
from agent.prompt_builder import clear_skills_system_prompt_cache
clear_skills_system_prompt_cache(clear_snapshot=True)
except Exception:
pass
else:
c.print("[dim]Change will take effect in your next session.[/]")
c.print("[dim]Use /reset to start a new session now, or --now to apply immediately (invalidates prompt cache).[/]\n")
def do_tap(action: str, repo: str = "", console: Optional[Console] = None) -> None:
"""Manage taps (custom GitHub repo sources)."""
from tools.skills_hub import TapsManager
@@ -1136,9 +1007,6 @@ def skills_command(args) -> None:
do_audit(name=getattr(args, "name", None))
elif action == "uninstall":
do_uninstall(args.name)
elif action == "reset":
do_reset(args.name, restore=getattr(args, "restore", False),
skip_confirm=getattr(args, "yes", False))
elif action == "publish":
do_publish(
args.skill_path,
@@ -1161,7 +1029,7 @@ def skills_command(args) -> None:
return
do_tap(tap_action, repo=repo)
else:
_console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|reset|publish|snapshot|tap]\n")
_console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|publish|snapshot|tap]\n")
_console.print("Run 'hermes skills <command> --help' for details.\n")
@@ -1307,19 +1175,6 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
do_uninstall(args[0], console=c, skip_confirm=skip_confirm,
invalidate_cache=invalidate_cache)
elif action == "reset":
if not args:
c.print("[bold red]Usage:[/] /skills reset <name> [--restore] [--now]\n")
c.print("[dim]Clears the bundled-skills manifest entry so future updates stop marking it as user-modified.[/]")
c.print("[dim]Pass --restore to also replace the current copy with the bundled version.[/]\n")
return
name = args[0]
restore = "--restore" in args
invalidate_cache = "--now" in args
# Slash commands can't prompt — --restore in slash mode is implicit consent.
do_reset(name, restore=restore, console=c, skip_confirm=True,
invalidate_cache=invalidate_cache)
elif action == "publish":
if not args:
c.print("[bold red]Usage:[/] /skills publish <skill-path> [--to github] [--repo owner/repo]\n")
@@ -1376,7 +1231,6 @@ def _print_skills_help(console: Console) -> None:
" [cyan]update[/] [name] Update hub skills with upstream changes\n"
" [cyan]audit[/] [name] Re-scan hub skills for security\n"
" [cyan]uninstall[/] <name> Remove a hub-installed skill\n"
" [cyan]reset[/] <name> [--restore] Reset bundled-skill tracking (fix 'user-modified' flag)\n"
" [cyan]publish[/] <path> --repo <r> Publish a skill to GitHub via PR\n"
" [cyan]snapshot[/] export|import Export/import skill configurations\n"
" [cyan]tap[/] list|add|remove Manage skill sources\n",

Some files were not shown because too many files have changed in this diff Show More