Compare commits

..

1 Commits

Author SHA1 Message Date
kshitijk4poor ed201cce9c fix(kimi): drop client-side temperature overrides for Kimi/Moonshot models
The Kimi gateway selects the correct temperature server-side based on the
active mode (thinking on → 1.0, thinking off → 0.6).  Client-side clamping
is no longer needed and would conflict if the gateway changes its defaults.

Removed:
- _FIXED_TEMPERATURE_MODELS, _KIMI_INSTANT_MODELS, _KIMI_THINKING_MODELS,
  _KIMI_PUBLIC_API_OVERRIDES maps from auxiliary_client.py
- All Kimi-specific branches in _fixed_temperature_for_model() — the
  function now always returns None (kept for future non-Kimi contracts)

Callers already guard with 'if fixed_temperature is not None:' so the
change is transparent — temperature is simply omitted from API calls,
letting the Kimi gateway use its own defaults.

Updated tests across 5 files to verify temperature is NOT forced.
2026-04-21 00:00:45 +05:30
290 changed files with 3089 additions and 21887 deletions
-8
View File
@@ -1,8 +0,0 @@
name: 'Setup Nix'
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
runs:
using: composite
steps:
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
-68
View File
@@ -1,68 +0,0 @@
name: Nix Lockfile Check
on:
pull_request:
workflow_dispatch:
permissions:
contents: read
pull-requests: write
concurrency:
group: nix-lockfile-check-${{ github.ref }}
cancel-in-progress: true
jobs:
check:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: ./.github/actions/nix-setup
- name: Resolve head SHA
id: sha
shell: bash
run: |
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
echo "full=$FULL" >> "$GITHUB_OUTPUT"
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
- name: Check lockfile hashes
id: check
continue-on-error: true
env:
LINK_SHA: ${{ steps.sha.outputs.full }}
run: nix run .#fix-lockfiles -- --check
- name: Post sticky PR comment (stale)
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
message: |
### ⚠️ npm lockfile hash out of date
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
${{ steps.check.outputs.report }}
#### Apply the fix
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
- name: Clear sticky PR comment (resolved)
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
delete: true
- name: Fail if stale
if: steps.check.outputs.stale == 'true'
run: exit 1
-149
View File
@@ -1,149 +0,0 @@
name: Nix Lockfile Fix
on:
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to fix (leave empty to run on the selected branch)'
required: false
type: string
issue_comment:
types: [edited]
permissions:
contents: write
pull-requests: write
concurrency:
group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
cancel-in-progress: false
jobs:
fix:
# Run on manual dispatch OR when a task-list checkbox in the sticky
# lockfile-check comment flips from `[ ]` to `[x]`.
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'issue_comment'
&& github.event.issue.pull_request != null
&& contains(github.event.comment.body, '[x] **Apply lockfile fix**')
&& !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
runs-on: ubuntu-latest
timeout-minutes: 25
steps:
- name: Authorize & resolve PR
id: resolve
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
// 1. Verify the actor has write access — applies to both checkbox
// clicks and manual dispatch.
const { data: perm } =
await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: context.actor,
});
if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
core.setFailed(
`${context.actor} lacks write access (has: ${perm.permission})`
);
return;
}
// 2. Resolve which ref to check out.
let prNumber = '';
if (context.eventName === 'issue_comment') {
prNumber = String(context.payload.issue.number);
} else if (context.eventName === 'workflow_dispatch') {
prNumber = context.payload.inputs.pr_number || '';
}
if (!prNumber) {
core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
core.setOutput('repo', context.repo.repo);
core.setOutput('owner', context.repo.owner);
core.setOutput('pr', '');
return;
}
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: Number(prNumber),
});
core.setOutput('ref', pr.head.ref);
core.setOutput('repo', pr.head.repo.name);
core.setOutput('owner', pr.head.repo.owner.login);
core.setOutput('pr', String(pr.number));
# Wipe the sticky lockfile-check comment to a "running" state as soon
# as the job is authorized, so the user sees their click was picked up
# before the ~minute of nix build work.
- name: Mark sticky as running
if: steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### 🔄 Applying lockfile fix…
Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
ref: ${{ steps.resolve.outputs.ref }}
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0
- uses: ./.github/actions/nix-setup
- name: Apply lockfile hashes
id: apply
run: nix run .#fix-lockfiles -- --apply
- name: Commit & push
if: steps.apply.outputs.changed == 'true'
shell: bash
run: |
set -euo pipefail
git config user.name 'github-actions[bot]'
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
git add nix/tui.nix nix/web.nix
git commit -m "fix(nix): refresh npm lockfile hashes"
git push
- name: Update sticky (applied)
if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ✅ Lockfile fix applied
Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- name: Update sticky (already current)
if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ✅ Lockfile hashes already current
Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- name: Update sticky (failed)
if: failure() && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ❌ Lockfile fix failed
See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
+12 -2
View File
@@ -4,6 +4,15 @@ on:
push:
branches: [main]
pull_request:
paths:
- 'flake.nix'
- 'flake.lock'
- 'nix/**'
- 'pyproject.toml'
- 'uv.lock'
- 'hermes_cli/**'
- 'run_agent.py'
- 'acp_adapter/**'
permissions:
contents: read
@@ -20,8 +29,9 @@ jobs:
runs-on: ${{ matrix.os }}
timeout-minutes: 30
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: ./.github/actions/nix-setup
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
- name: Check flake
if: runner.os == 'Linux'
run: nix flake check --print-build-logs
-49
View File
@@ -566,52 +566,3 @@ python -m pytest tests/ -q -n 4
Worker count above 4 will surface test-ordering flakes that CI never sees.
Always run the full suite before pushing changes.
### Don't write change-detector tests
A test is a **change-detector** if it fails whenever data that is **expected
to change** gets updated — model catalogs, config version numbers,
enumeration counts, hardcoded lists of provider models. These tests add no
behavioral coverage; they just guarantee that routine source updates break
CI and cost engineering time to "fix."
**Do not write:**
```python
# catalog snapshot — breaks every model release
assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
assert "MiniMax-M2.7" in models
# config version literal — breaks every schema bump
assert DEFAULT_CONFIG["_config_version"] == 21
# enumeration count — breaks every time a skill/provider is added
assert len(_PROVIDER_MODELS["huggingface"]) == 8
```
**Do write:**
```python
# behavior: does the catalog plumbing work at all?
assert "gemini" in _PROVIDER_MODELS
assert len(_PROVIDER_MODELS["gemini"]) >= 1
# behavior: does migration bump the user's version to current latest?
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
# invariant: no plan-only model leaks into the legacy list
assert not (set(moonshot_models) & coding_plan_only_models)
# invariant: every model in the catalog has a context-length entry
for m in _PROVIDER_MODELS["huggingface"]:
assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
```
The rule: if the test reads like a snapshot of current data, delete it. If
it reads like a contract about how two pieces of data must relate, keep it.
When a PR adds a new provider/model and you want a test, make the test
assert the relationship (e.g. "catalog entries all have context lengths"),
not the specific names.
Reviewers should reject new change-detector tests; authors should convert
them into invariants before re-requesting review.
-3
View File
@@ -63,9 +63,6 @@ def make_approval_callback(
logger.warning("Permission request timed out or failed: %s", exc)
return "deny"
if response is None:
return "deny"
outcome = response.outcome
if isinstance(outcome, AllowedOutcome):
option_id = outcome.option_id
+14 -74
View File
@@ -4,7 +4,6 @@ from __future__ import annotations
import asyncio
import logging
import os
from collections import defaultdict, deque
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Deque, Optional
@@ -52,7 +51,7 @@ try:
except ImportError:
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
from acp_adapter.auth import detect_provider
from acp_adapter.auth import detect_provider, has_provider
from acp_adapter.events import (
make_message_cb,
make_step_cb,
@@ -72,11 +71,6 @@ except Exception:
# Thread pool for running AIAgent (synchronous) in parallel.
_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
# does not expose a client-side limit, so this is a fixed cap that clients
# paginate against using `cursor` / `next_cursor`.
_LIST_SESSIONS_PAGE_SIZE = 50
def _extract_text(
prompt: list[
@@ -357,18 +351,9 @@ class HermesACPAgent(acp.Agent):
)
async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
# Only accept authenticate() calls whose method_id matches the
# provider we advertised in initialize(). Without this check,
# authenticate() would acknowledge any method_id as long as the
# server has provider credentials configured — harmless under
# Hermes' threat model (ACP is stdio-only, local-trust), but poor
# API hygiene and confusing if ACP ever grows multi-method auth.
provider = detect_provider()
if not provider:
return None
if not isinstance(method_id, str) or method_id.strip().lower() != provider:
return None
return AuthenticateResponse()
if has_provider():
return AuthenticateResponse()
return None
# ---- Session management -------------------------------------------------
@@ -452,28 +437,7 @@ class HermesACPAgent(acp.Agent):
cwd: str | None = None,
**kwargs: Any,
) -> ListSessionsResponse:
"""List ACP sessions with optional ``cwd`` filtering and cursor pagination.
``cwd`` is passed through to ``SessionManager.list_sessions`` which already
normalizes and filters by working directory. ``cursor`` is a ``session_id``
previously returned as ``next_cursor``; results resume after that entry.
Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
results remain, ``next_cursor`` is set to the last returned ``session_id``.
"""
infos = self.session_manager.list_sessions(cwd=cwd)
if cursor:
for idx, s in enumerate(infos):
if s["session_id"] == cursor:
infos = infos[idx + 1:]
break
else:
# Unknown cursor -> empty page (do not fall back to full list).
infos = []
has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
sessions = []
for s in infos:
updated_at = s.get("updated_at")
@@ -487,9 +451,7 @@ class HermesACPAgent(acp.Agent):
updated_at=updated_at,
)
)
next_cursor = sessions[-1].session_id if has_more and sessions else None
return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
return ListSessionsResponse(sessions=sessions)
# ---- Prompt (core) ------------------------------------------------------
@@ -555,32 +517,15 @@ class HermesACPAgent(acp.Agent):
agent.step_callback = step_cb
agent.message_callback = message_cb
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
# Set it INSIDE _run_agent so the TLS write happens in the executor
# thread — setting it here would write to the event-loop thread's TLS,
# not the executor's. Also set HERMES_INTERACTIVE so approval.py
# takes the CLI-interactive path (which calls the registered
# callback via prompt_dangerous_approval) instead of the
# non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
# ACP's conn.request_permission maps cleanly to the interactive
# callback shape — not the gateway-queue HERMES_EXEC_ASK path,
# which requires a notify_cb registered in _gateway_notify_cbs.
previous_approval_cb = None
previous_interactive = None
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
def _run_agent() -> dict:
nonlocal previous_approval_cb, previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = _terminal_tool._get_approval_callback()
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
# Signal to tools.approval that we have an interactive callback
# and the non-interactive auto-approve path must not fire.
previous_interactive = os.environ.get("HERMES_INTERACTIVE")
os.environ["HERMES_INTERACTIVE"] = "1"
try:
result = agent.run_conversation(
user_message=user_text,
@@ -592,11 +537,6 @@ class HermesACPAgent(acp.Agent):
logger.exception("Agent error in session %s", session_id)
return {"final_response": f"Error: {e}", "messages": state.history}
finally:
# Restore HERMES_INTERACTIVE.
if previous_interactive is None:
os.environ.pop("HERMES_INTERACTIVE", None)
else:
os.environ["HERMES_INTERACTIVE"] = previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
@@ -673,8 +613,8 @@ class HermesACPAgent(acp.Agent):
await self._conn.session_update(
session_id=session_id,
update=AvailableCommandsUpdate(
session_update="available_commands_update",
available_commands=self._available_commands(),
sessionUpdate="available_commands_update",
availableCommands=self._available_commands(),
),
)
except Exception:
-326
View File
@@ -1,326 +0,0 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Optional
import httpx
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
from hermes_cli.runtime_provider import resolve_runtime_provider
def _utc_now() -> datetime:
return datetime.now(timezone.utc)
@dataclass(frozen=True)
class AccountUsageWindow:
label: str
used_percent: Optional[float] = None
reset_at: Optional[datetime] = None
detail: Optional[str] = None
@dataclass(frozen=True)
class AccountUsageSnapshot:
provider: str
source: str
fetched_at: datetime
title: str = "Account limits"
plan: Optional[str] = None
windows: tuple[AccountUsageWindow, ...] = ()
details: tuple[str, ...] = ()
unavailable_reason: Optional[str] = None
@property
def available(self) -> bool:
return bool(self.windows or self.details) and not self.unavailable_reason
def _title_case_slug(value: Optional[str]) -> Optional[str]:
cleaned = str(value or "").strip()
if not cleaned:
return None
return cleaned.replace("_", " ").replace("-", " ").title()
def _parse_dt(value: Any) -> Optional[datetime]:
if value in (None, ""):
return None
if isinstance(value, (int, float)):
return datetime.fromtimestamp(float(value), tz=timezone.utc)
if isinstance(value, str):
text = value.strip()
if not text:
return None
if text.endswith("Z"):
text = text[:-1] + "+00:00"
try:
dt = datetime.fromisoformat(text)
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
except ValueError:
return None
return None
def _format_reset(dt: Optional[datetime]) -> str:
if not dt:
return "unknown"
local_dt = dt.astimezone()
delta = dt - _utc_now()
total_seconds = int(delta.total_seconds())
if total_seconds <= 0:
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
hours, rem = divmod(total_seconds, 3600)
minutes = rem // 60
if hours >= 24:
days, hours = divmod(hours, 24)
rel = f"in {days}d {hours}h"
elif hours > 0:
rel = f"in {hours}h {minutes}m"
else:
rel = f"in {minutes}m"
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
if not snapshot:
return []
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
lines = [header]
if snapshot.plan:
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
else:
lines.append(f"Provider: {snapshot.provider}")
for window in snapshot.windows:
if window.used_percent is None:
base = f"{window.label}: unavailable"
else:
remaining = max(0, round(100 - float(window.used_percent)))
used = max(0, round(float(window.used_percent)))
base = f"{window.label}: {remaining}% remaining ({used}% used)"
if window.reset_at:
base += f" • resets {_format_reset(window.reset_at)}"
elif window.detail:
base += f"{window.detail}"
lines.append(base)
for detail in snapshot.details:
lines.append(detail)
if snapshot.unavailable_reason:
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
return lines
def _resolve_codex_usage_url(base_url: str) -> str:
normalized = (base_url or "").strip().rstrip("/")
if not normalized:
normalized = "https://chatgpt.com/backend-api/codex"
if normalized.endswith("/codex"):
normalized = normalized[: -len("/codex")]
if "/backend-api" in normalized:
return normalized + "/wham/usage"
return normalized + "/api/codex/usage"
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
token_data = _read_codex_tokens()
tokens = token_data.get("tokens") or {}
account_id = str(tokens.get("account_id", "") or "").strip() or None
headers = {
"Authorization": f"Bearer {creds['api_key']}",
"Accept": "application/json",
"User-Agent": "codex-cli",
}
if account_id:
headers["ChatGPT-Account-Id"] = account_id
with httpx.Client(timeout=15.0) as client:
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
response.raise_for_status()
payload = response.json() or {}
rate_limit = payload.get("rate_limit") or {}
windows: list[AccountUsageWindow] = []
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
window = rate_limit.get(key) or {}
used = window.get("used_percent")
if used is None:
continue
windows.append(
AccountUsageWindow(
label=label,
used_percent=float(used),
reset_at=_parse_dt(window.get("reset_at")),
)
)
details: list[str] = []
credits = payload.get("credits") or {}
if credits.get("has_credits"):
balance = credits.get("balance")
if isinstance(balance, (int, float)):
details.append(f"Credits balance: ${float(balance):.2f}")
elif credits.get("unlimited"):
details.append("Credits balance: unlimited")
return AccountUsageSnapshot(
provider="openai-codex",
source="usage_api",
fetched_at=_utc_now(),
plan=_title_case_slug(payload.get("plan_type")),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
token = (resolve_anthropic_token() or "").strip()
if not token:
return None
if not _is_oauth_token(token):
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
)
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
"Content-Type": "application/json",
"anthropic-beta": "oauth-2025-04-20",
"User-Agent": "claude-code/2.1.0",
}
with httpx.Client(timeout=15.0) as client:
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
response.raise_for_status()
payload = response.json() or {}
windows: list[AccountUsageWindow] = []
mapping = (
("five_hour", "Current session"),
("seven_day", "Current week"),
("seven_day_opus", "Opus week"),
("seven_day_sonnet", "Sonnet week"),
)
for key, label in mapping:
window = payload.get(key) or {}
util = window.get("utilization")
if util is None:
continue
used = float(util) * 100 if float(util) <= 1 else float(util)
windows.append(
AccountUsageWindow(
label=label,
used_percent=used,
reset_at=_parse_dt(window.get("resets_at")),
)
)
details: list[str] = []
extra = payload.get("extra_usage") or {}
if extra.get("is_enabled"):
used_credits = extra.get("used_credits")
monthly_limit = extra.get("monthly_limit")
currency = extra.get("currency") or "USD"
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
details.append(
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
)
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
runtime = resolve_runtime_provider(
requested="openrouter",
explicit_base_url=base_url,
explicit_api_key=api_key,
)
token = str(runtime.get("api_key", "") or "").strip()
if not token:
return None
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
credits_url = f"{normalized}/credits"
key_url = f"{normalized}/key"
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
}
with httpx.Client(timeout=10.0) as client:
credits_resp = client.get(credits_url, headers=headers)
credits_resp.raise_for_status()
credits = (credits_resp.json() or {}).get("data") or {}
try:
key_resp = client.get(key_url, headers=headers)
key_resp.raise_for_status()
key_data = (key_resp.json() or {}).get("data") or {}
except Exception:
key_data = {}
total_credits = float(credits.get("total_credits") or 0.0)
total_usage = float(credits.get("total_usage") or 0.0)
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
windows: list[AccountUsageWindow] = []
limit = key_data.get("limit")
limit_remaining = key_data.get("limit_remaining")
limit_reset = str(key_data.get("limit_reset") or "").strip()
usage = key_data.get("usage")
if (
isinstance(limit, (int, float))
and float(limit) > 0
and isinstance(limit_remaining, (int, float))
and 0 <= float(limit_remaining) <= float(limit)
):
limit_value = float(limit)
remaining_value = float(limit_remaining)
used_percent = ((limit_value - remaining_value) / limit_value) * 100
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
if limit_reset:
detail_parts.append(f"resets {limit_reset}")
windows.append(
AccountUsageWindow(
label="API key quota",
used_percent=used_percent,
detail="".join(detail_parts),
)
)
if isinstance(usage, (int, float)):
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
for value, label in (
(key_data.get("usage_daily"), "today"),
(key_data.get("usage_weekly"), "this week"),
(key_data.get("usage_monthly"), "this month"),
):
if isinstance(value, (int, float)) and float(value) > 0:
usage_parts.append(f"${float(value):.2f} {label}")
details.append("".join(usage_parts))
return AccountUsageSnapshot(
provider="openrouter",
source="credits_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def fetch_account_usage(
provider: Optional[str],
*,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Optional[AccountUsageSnapshot]:
normalized = str(provider or "").strip().lower()
if normalized in {"", "auto", "custom"}:
return None
try:
if normalized == "openai-codex":
return _fetch_codex_account_usage()
if normalized == "anthropic":
return _fetch_anthropic_account_usage()
if normalized == "openrouter":
return _fetch_openrouter_account_usage(base_url, api_key)
except Exception:
return None
return None
-43
View File
@@ -19,7 +19,6 @@ from pathlib import Path
from hermes_constants import get_hermes_home
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from utils import normalize_proxy_env_vars
try:
import anthropic as _anthropic_sdk
@@ -309,9 +308,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
"The 'anthropic' package is required for the Anthropic provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
normalize_proxy_env_vars()
from httpx import Timeout
normalized_base_url = _normalize_base_url_text(base_url)
@@ -1529,42 +1525,3 @@ def normalize_anthropic_response(
),
finish_reason,
)
def normalize_anthropic_response_v2(
response,
strip_tool_prefix: bool = False,
) -> "NormalizedResponse":
"""Normalize Anthropic response to NormalizedResponse.
Wraps the existing normalize_anthropic_response() and maps its output
to the shared transport types. This allows incremental migration —
one call site at a time — without changing the original function.
"""
from agent.transports.types import NormalizedResponse, build_tool_call
assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
tool_calls = None
if assistant_msg.tool_calls:
tool_calls = [
build_tool_call(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
)
for tc in assistant_msg.tool_calls
]
provider_data = {}
if getattr(assistant_msg, "reasoning_details", None):
provider_data["reasoning_details"] = assistant_msg.reasoning_details
return NormalizedResponse(
content=assistant_msg.content,
tool_calls=tool_calls,
finish_reason=finish_reason,
reasoning=getattr(assistant_msg, "reasoning", None),
usage=None, # Anthropic usage is on the raw response, not the normaliser
provider_data=provider_data or None,
)
+47 -259
View File
@@ -48,7 +48,6 @@ from openai import OpenAI
from agent.credential_pool import load_pool
from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
logger = logging.getLogger(__name__)
@@ -96,37 +95,21 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
return _PROVIDER_ALIASES.get(normalized, normalized)
# Sentinel: when returned by _fixed_temperature_for_model(), callers must
# strip the ``temperature`` key from API kwargs entirely so the provider's
# server-side default applies. Kimi/Moonshot models manage temperature
# internally — sending *any* value (even the "correct" one) can conflict
# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
OMIT_TEMPERATURE: object = object()
def _is_kimi_model(model: Optional[str]) -> bool:
"""True for any Kimi / Moonshot model that manages temperature server-side."""
bare = (model or "").strip().lower().rsplit("/", 1)[-1]
return bare.startswith("kimi-") or bare == "kimi"
def _fixed_temperature_for_model(
model: Optional[str],
base_url: Optional[str] = None,
) -> "Optional[float] | object":
"""Return a temperature directive for models with strict contracts.
) -> Optional[float]:
"""Return a required temperature override for models with strict contracts.
Returns:
``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
provider chooses its own default. Used for all Kimi / Moonshot
models whose gateway selects temperature server-side.
``float`` — a specific value the caller must use (reserved for future
models with fixed-temperature contracts).
``None`` — no override; caller should use its own default.
Returns ``None`` for all models — callers should omit the ``temperature``
parameter so the provider's server-side defaults apply.
Kimi / Moonshot models previously had hardcoded temperature overrides here
(0.6 for non-thinking, 1.0 for thinking). As of July 2026 the Kimi gateway
selects the correct temperature server-side based on the active mode, so
client-side clamping is no longer needed (and would conflict if the gateway
changes its defaults).
"""
if _is_kimi_model(model):
logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
return OMIT_TEMPERATURE
return None
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@@ -161,16 +144,6 @@ _OR_HEADERS = {
"X-OpenRouter-Categories": "productivity,cli-agent",
}
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
# referrerUrl and X-Title maps to appName in the gateway's analytics.
from hermes_cli import __version__ as _HERMES_VERSION
_AI_GATEWAY_HEADERS = {
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
"X-Title": "Hermes Agent",
"User-Agent": f"HermesAgent/{_HERMES_VERSION}",
}
# Nous Portal extra_body for product attribution.
# Callers should pass this as extra_body in chat.completions.create()
# when the auxiliary client is backed by Nous Portal.
@@ -728,33 +701,6 @@ def _nous_base_url() -> str:
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
"""Return fresh Nous runtime credentials when available.
This mirrors the main agent's 401 recovery path and keeps auxiliary
clients aligned with the singleton auth store + mint flow instead of
relying only on whatever raw tokens happen to be sitting in auth.json
or the credential pool.
"""
try:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
force_mint=force_refresh,
)
except Exception as exc:
logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
return None
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
if not api_key or not base_url:
return None
return api_key, base_url
def _read_codex_access_token() -> Optional[str]:
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
@@ -844,9 +790,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if is_native_gemini_base_url(base_url):
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
if "api.kimi.com" in base_url.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
@@ -870,9 +816,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if is_native_gemini_base_url(base_url):
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
if "api.kimi.com" in base_url.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
@@ -921,8 +867,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
pass
nous = _read_nous_auth()
runtime = _resolve_nous_runtime_api(force_refresh=False)
if runtime is None and not nous:
if not nous:
return None, None
global auxiliary_is_nous
auxiliary_is_nous = True
@@ -933,8 +878,6 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
model = _NOUS_MODEL
# Free-tier users can't use paid auxiliary models — use the free
# models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
# Paid accounts keep their tier-appropriate models: gemini-3-flash-preview
# for both text and vision tasks.
try:
from hermes_cli.models import check_nous_free_tier
if check_nous_free_tier():
@@ -943,15 +886,10 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
model, "vision" if vision else "text")
except Exception:
pass
if runtime is not None:
api_key, base_url = runtime
else:
api_key = _nous_api_key(nous or {})
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
return (
OpenAI(
api_key=api_key,
base_url=base_url,
api_key=_nous_api_key(nous),
base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
),
model,
)
@@ -1029,7 +967,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
return None, None, None
custom_base = custom_base.strip().rstrip("/")
if base_url_host_matches(custom_base, "openrouter.ai"):
if "openrouter.ai" in custom_base.lower():
# requested='custom' falls back to OpenRouter when no custom endpoint is
# configured. Treat that as "no custom endpoint" for auxiliary routing.
return None, None, None
@@ -1063,8 +1001,6 @@ def _validate_proxy_env_urls() -> None:
"""
from urllib.parse import urlparse
normalize_proxy_env_vars()
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = str(os.environ.get(key) or "").strip()
@@ -1295,15 +1231,6 @@ def _is_connection_error(exc: Exception) -> bool:
return False
def _is_auth_error(exc: Exception) -> bool:
"""Detect auth failures that should trigger provider-specific refresh."""
status = getattr(exc, "status_code", None)
if status == 401:
return True
err_lower = str(exc).lower()
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
def _try_payment_fallback(
failed_provider: str,
task: str = None,
@@ -1479,14 +1406,14 @@ def _to_async_client(sync_client, model: str):
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
sync_base_url = str(sync_client.base_url)
if base_url_host_matches(sync_base_url, "openrouter.ai"):
base_lower = str(sync_client.base_url).lower()
if "openrouter" in base_lower:
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_lower:
from hermes_cli.models import copilot_default_headers
async_kwargs["default_headers"] = copilot_default_headers()
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
elif "api.kimi.com" in base_lower:
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
return AsyncOpenAI(**async_kwargs), model
@@ -1563,7 +1490,8 @@ def resolve_provider_client(
# Auto-detect: api.openai.com + codex model name pattern
if api_mode and api_mode != "codex_responses":
return False # explicit non-codex mode
if base_url_hostname(base_url_str) == "api.openai.com":
normalized_base = (base_url_str or "").strip().lower()
if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
model_lower = (model_str or "").lower()
if "codex" in model_lower:
return True
@@ -1611,13 +1539,7 @@ def resolve_provider_client(
# ── Nous Portal (OAuth) ──────────────────────────────────────────
if provider == "nous":
# Detect vision tasks: either explicit model override from
# _PROVIDER_VISION_MODELS, or caller passed a known vision model.
_is_vision = (
model in _PROVIDER_VISION_MODELS.values()
or (model or "").strip().lower() == "mimo-v2-omni"
)
client, default = _try_nous(vision=_is_vision)
client, default = _try_nous()
if client is None:
logger.warning("resolve_provider_client: nous requested "
"but Nous Portal not configured (run: hermes auth)")
@@ -1673,9 +1595,9 @@ def resolve_provider_client(
provider,
)
extra = {}
if base_url_host_matches(custom_base, "api.kimi.com"):
if "api.kimi.com" in custom_base.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in custom_base.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
@@ -1780,9 +1702,9 @@ def resolve_provider_client(
# Provider-specific headers
headers = {}
if base_url_host_matches(base_url, "api.kimi.com"):
if "api.kimi.com" in base_url.lower():
headers["User-Agent"] = "KimiCLI/1.30.0"
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
headers.update(copilot_default_headers())
@@ -2013,35 +1935,24 @@ def resolve_vision_provider_client(
# _PROVIDER_VISION_MODELS provides per-provider vision model
# overrides when the provider has a dedicated multimodal model
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
# zai → glm-5v-turbo). Nous is the exception: it has a dedicated
# strict vision backend with tier-aware defaults, so it must not
# fall through to the user's text chat model here.
# zai → glm-5v-turbo).
# 2. OpenRouter (vision-capable aggregator fallback)
# 3. Nous Portal (vision-capable aggregator fallback)
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
if main_provider == "nous":
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, default_model or resolved_model or main_model,
)
return _finalize(main_provider, sync_client, default_model)
else:
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
# Fall back through aggregators (uses their dedicated vision model,
# not the user's main model) when main provider has no client.
@@ -2088,7 +1999,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Only use max_completion_tokens for direct OpenAI custom endpoints
if (not or_key
and _read_nous_auth() is None
and base_url_hostname(custom_base) == "api.openai.com"):
and "api.openai.com" in custom_base.lower()):
return {"max_completion_tokens": value}
return {"max_tokens": value}
@@ -2116,76 +2027,6 @@ _client_cache_lock = threading.Lock()
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
def _client_cache_key(
provider: str,
*,
async_mode: bool,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
) -> tuple:
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
with _client_cache_lock:
old_entry = _client_cache.get(cache_key)
if old_entry is not None and old_entry[0] is not client:
_force_close_async_httpx(old_entry[0])
try:
close_fn = getattr(old_entry[0], "close", None)
if callable(close_fn):
close_fn()
except Exception:
pass
_client_cache[cache_key] = (client, default_model, bound_loop)
def _refresh_nous_auxiliary_client(
*,
cache_provider: str,
model: Optional[str],
async_mode: bool,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
) -> Tuple[Optional[Any], Optional[str]]:
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
runtime = _resolve_nous_runtime_api(force_refresh=True)
if runtime is None:
return None, model
fresh_key, fresh_base_url = runtime
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
final_model = model
current_loop = None
if async_mode:
try:
import asyncio as _aio
current_loop = _aio.get_event_loop()
except RuntimeError:
pass
client, final_model = _to_async_client(sync_client, final_model or "")
else:
client = sync_client
cache_key = _client_cache_key(
cache_provider,
async_mode=async_mode,
base_url=base_url,
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
)
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
return client, final_model
def neuter_async_httpx_del() -> None:
"""Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
@@ -2287,7 +2128,7 @@ def cleanup_stale_async_clients() -> None:
def _is_openrouter_client(client: Any) -> bool:
for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
return True
return False
@@ -2339,14 +2180,8 @@ def _get_cached_client(
except RuntimeError:
pass
runtime = _normalize_main_runtime(main_runtime)
cache_key = _client_cache_key(
provider,
async_mode=async_mode,
base_url=base_url,
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
with _client_cache_lock:
if cache_key in _client_cache:
cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2577,9 +2412,7 @@ def _build_call_kwargs(
}
fixed_temperature = _fixed_temperature_for_model(model, base_url)
if fixed_temperature is OMIT_TEMPERATURE:
temperature = None # strip — let server choose
elif fixed_temperature is not None:
if fixed_temperature is not None:
temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
@@ -2599,7 +2432,7 @@ def _build_call_kwargs(
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom":
custom_base = base_url or _current_custom_base_url()
if base_url_hostname(custom_base) == "api.openai.com":
if "api.openai.com" in custom_base.lower():
kwargs["max_completion_tokens"] = max_tokens
else:
kwargs["max_tokens"] = max_tokens
@@ -2794,29 +2627,6 @@ def call_llm(
raise
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
)
if _is_auth_error(first_err) and client_is_nous:
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
cache_provider=resolved_provider or "nous",
model=final_model,
async_mode=False,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
main_runtime=main_runtime,
)
if refreshed_client is not None:
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
task or "call")
if refreshed_model and refreshed_model != kwargs.get("model"):
kwargs["model"] = refreshed_model
return _validate_llm_response(
refreshed_client.chat.completions.create(**kwargs), task)
# ── Payment / credit exhaustion fallback ──────────────────────
# When the resolved provider returns 402 or a credit-related error,
# try alternative providers instead of giving up. This handles the
@@ -3015,28 +2825,6 @@ async def async_call_llm(
raise
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
)
if _is_auth_error(first_err) and client_is_nous:
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
cache_provider=resolved_provider or "nous",
model=final_model,
async_mode=True,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
)
if refreshed_client is not None:
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
task or "call")
if refreshed_model and refreshed_model != kwargs.get("model"):
kwargs["model"] = refreshed_model
return _validate_llm_response(
await refreshed_client.chat.completions.create(**kwargs), task)
# ── Payment / connection fallback (mirrors sync call_llm) ─────
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
is_auto = resolved_provider in ("auto", "", None)
-813
View File
@@ -1,813 +0,0 @@
"""Codex Responses API adapter.
Pure format-conversion and normalization logic for the OpenAI Responses API
(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints).
Extracted from run_agent.py to isolate Responses API-specific logic from the
core agent loop. All functions are stateless — they operate on the data passed
in and return transformed results.
"""
from __future__ import annotations
import hashlib
import json
import logging
import re
import uuid
from types import SimpleNamespace
from typing import Any, Dict, List, Optional
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Multimodal content helpers
# ---------------------------------------------------------------------------
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
"""Convert chat-style multimodal content to Responses API input parts.
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
Returns an empty list when ``content`` is not a list or contains no
recognized parts — callers fall back to the string path.
"""
if not isinstance(content, list):
return []
converted: List[Dict[str, Any]] = []
for part in content:
if isinstance(part, str):
if part:
converted.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
continue
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
converted.append({"type": "input_text", "text": text})
continue
if ptype in {"image_url", "input_image"}:
image_ref = part.get("image_url")
detail = part.get("detail")
if isinstance(image_ref, dict):
url = image_ref.get("url")
detail = image_ref.get("detail", detail)
else:
url = image_ref
if not isinstance(url, str) or not url:
continue
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
if isinstance(detail, str) and detail.strip():
image_part["detail"] = detail.strip()
converted.append(image_part)
return converted
def _summarize_user_message_for_log(content: Any) -> str:
"""Return a short text summary of a user message for logging/trajectory.
Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
parts from the API server. Logging, spinner previews, and trajectory
files all want a plain string — this helper extracts the first chunk of
text and notes any attached images. Returns an empty string for empty
lists and ``str(content)`` for unexpected scalar types.
"""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
text_bits: List[str] = []
image_count = 0
for part in content:
if isinstance(part, str):
if part:
text_bits.append(part)
continue
if not isinstance(part, dict):
continue
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
text_bits.append(text)
elif ptype in {"image_url", "input_image"}:
image_count += 1
summary = " ".join(text_bits).strip()
if image_count:
note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
summary = f"{note} {summary}" if summary else note
return summary
try:
return str(content)
except Exception:
return ""
# ---------------------------------------------------------------------------
# ID helpers
# ---------------------------------------------------------------------------
def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
"""Generate a deterministic call_id from tool call content.
Used as a fallback when the API doesn't provide a call_id.
Deterministic IDs prevent cache invalidation — random UUIDs would
make every API call's prefix unique, breaking OpenAI's prompt cache.
"""
seed = f"{fn_name}:{arguments}:{index}"
digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
return f"call_{digest}"
def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
"""Split a stored tool id into (call_id, response_item_id)."""
if not isinstance(raw_id, str):
return None, None
value = raw_id.strip()
if not value:
return None, None
if "|" in value:
call_id, response_item_id = value.split("|", 1)
call_id = call_id.strip() or None
response_item_id = response_item_id.strip() or None
return call_id, response_item_id
if value.startswith("fc_"):
return None, value
return value, None
def _derive_responses_function_call_id(
call_id: str,
response_item_id: Optional[str] = None,
) -> str:
"""Build a valid Responses `function_call.id` (must start with `fc_`)."""
if isinstance(response_item_id, str):
candidate = response_item_id.strip()
if candidate.startswith("fc_"):
return candidate
source = (call_id or "").strip()
if source.startswith("fc_"):
return source
if source.startswith("call_") and len(source) > len("call_"):
return f"fc_{source[len('call_'):]}"
sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
if sanitized.startswith("fc_"):
return sanitized
if sanitized.startswith("call_") and len(sanitized) > len("call_"):
return f"fc_{sanitized[len('call_'):]}"
if sanitized:
return f"fc_{sanitized[:48]}"
seed = source or str(response_item_id or "") or uuid.uuid4().hex
digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
return f"fc_{digest}"
# ---------------------------------------------------------------------------
# Schema conversion
# ---------------------------------------------------------------------------
def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
"""Convert chat-completions tool schemas to Responses function-tool schemas."""
if not tools:
return None
converted: List[Dict[str, Any]] = []
for item in tools:
fn = item.get("function", {}) if isinstance(item, dict) else {}
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
continue
converted.append({
"type": "function",
"name": name,
"description": fn.get("description", ""),
"strict": False,
"parameters": fn.get("parameters", {"type": "object", "properties": {}}),
})
return converted or None
# ---------------------------------------------------------------------------
# Message format conversion
# ---------------------------------------------------------------------------
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
items: List[Dict[str, Any]] = []
seen_item_ids: set = set()
for msg in messages:
if not isinstance(msg, dict):
continue
role = msg.get("role")
if role == "system":
continue
if role in {"user", "assistant"}:
content = msg.get("content", "")
if isinstance(content, list):
content_parts = _chat_content_to_responses_parts(content)
content_text = "".join(
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
)
else:
content_parts = []
content_text = str(content) if content is not None else ""
if role == "assistant":
# Replay encrypted reasoning items from previous turns
# so the API can maintain coherent reasoning chains.
codex_reasoning = msg.get("codex_reasoning_items")
has_codex_reasoning = False
if isinstance(codex_reasoning, list):
for ri in codex_reasoning:
if isinstance(ri, dict) and ri.get("encrypted_content"):
item_id = ri.get("id")
if item_id and item_id in seen_item_ids:
continue
# Strip the "id" field — with store=False the
# Responses API cannot look up items by ID and
# returns 404. The encrypted_content blob is
# self-contained for reasoning chain continuity.
replay_item = {k: v for k, v in ri.items() if k != "id"}
items.append(replay_item)
if item_id:
seen_item_ids.add(item_id)
has_codex_reasoning = True
if content_parts:
items.append({"role": "assistant", "content": content_parts})
elif content_text.strip():
items.append({"role": "assistant", "content": content_text})
elif has_codex_reasoning:
# The Responses API requires a following item after each
# reasoning item (otherwise: missing_following_item error).
# When the assistant produced only reasoning with no visible
# content, emit an empty assistant message as the required
# following item.
items.append({"role": "assistant", "content": ""})
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if not isinstance(tc, dict):
continue
fn = tc.get("function", {})
fn_name = fn.get("name")
if not isinstance(fn_name, str) or not fn_name.strip():
continue
embedded_call_id, embedded_response_item_id = _split_responses_tool_id(
tc.get("id")
)
call_id = tc.get("call_id")
if not isinstance(call_id, str) or not call_id.strip():
call_id = embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
if (
isinstance(embedded_response_item_id, str)
and embedded_response_item_id.startswith("fc_")
and len(embedded_response_item_id) > len("fc_")
):
call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
else:
_raw_args = str(fn.get("arguments", "{}"))
call_id = _deterministic_call_id(fn_name, _raw_args, len(items))
call_id = call_id.strip()
arguments = fn.get("arguments", "{}")
if isinstance(arguments, dict):
arguments = json.dumps(arguments, ensure_ascii=False)
elif not isinstance(arguments, str):
arguments = str(arguments)
arguments = arguments.strip() or "{}"
items.append({
"type": "function_call",
"call_id": call_id,
"name": fn_name,
"arguments": arguments,
})
continue
# Non-assistant (user) role: emit multimodal parts when present,
# otherwise fall back to the text payload.
if content_parts:
items.append({"role": role, "content": content_parts})
else:
items.append({"role": role, "content": content_text})
continue
if role == "tool":
raw_tool_call_id = msg.get("tool_call_id")
call_id, _ = _split_responses_tool_id(raw_tool_call_id)
if not isinstance(call_id, str) or not call_id.strip():
if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
call_id = raw_tool_call_id.strip()
if not isinstance(call_id, str) or not call_id.strip():
continue
items.append({
"type": "function_call_output",
"call_id": call_id,
"output": str(msg.get("content", "") or ""),
})
return items
# ---------------------------------------------------------------------------
# Input preflight / validation
# ---------------------------------------------------------------------------
def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
if not isinstance(raw_items, list):
raise ValueError("Codex Responses input must be a list of input items.")
normalized: List[Dict[str, Any]] = []
seen_ids: set = set()
for idx, item in enumerate(raw_items):
if not isinstance(item, dict):
raise ValueError(f"Codex Responses input[{idx}] must be an object.")
item_type = item.get("type")
if item_type == "function_call":
call_id = item.get("call_id")
name = item.get("name")
if not isinstance(call_id, str) or not call_id.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
if not isinstance(name, str) or not name.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
arguments = item.get("arguments", "{}")
if isinstance(arguments, dict):
arguments = json.dumps(arguments, ensure_ascii=False)
elif not isinstance(arguments, str):
arguments = str(arguments)
arguments = arguments.strip() or "{}"
normalized.append(
{
"type": "function_call",
"call_id": call_id.strip(),
"name": name.strip(),
"arguments": arguments,
}
)
continue
if item_type == "function_call_output":
call_id = item.get("call_id")
if not isinstance(call_id, str) or not call_id.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
output = item.get("output", "")
if output is None:
output = ""
if not isinstance(output, str):
output = str(output)
normalized.append(
{
"type": "function_call_output",
"call_id": call_id.strip(),
"output": output,
}
)
continue
if item_type == "reasoning":
encrypted = item.get("encrypted_content")
if isinstance(encrypted, str) and encrypted:
item_id = item.get("id")
if isinstance(item_id, str) and item_id:
if item_id in seen_ids:
continue
seen_ids.add(item_id)
reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
# Do NOT include the "id" in the outgoing item — with
# store=False (our default) the API tries to resolve the
# id server-side and returns 404. The id is still used
# above for local deduplication via seen_ids.
summary = item.get("summary")
if isinstance(summary, list):
reasoning_item["summary"] = summary
else:
reasoning_item["summary"] = []
normalized.append(reasoning_item)
continue
role = item.get("role")
if role in {"user", "assistant"}:
content = item.get("content", "")
if content is None:
content = ""
if isinstance(content, list):
# Multimodal content from ``_chat_messages_to_responses_input``
# is already in Responses format (``input_text`` / ``input_image``).
# Validate each part and pass through.
validated: List[Dict[str, Any]] = []
for part_idx, part in enumerate(content):
if isinstance(part, str):
if part:
validated.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
raise ValueError(
f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
)
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"input_text", "text", "output_text"}:
text = part.get("text", "")
if not isinstance(text, str):
text = str(text or "")
validated.append({"type": "input_text", "text": text})
elif ptype in {"input_image", "image_url"}:
image_ref = part.get("image_url", "")
detail = part.get("detail")
if isinstance(image_ref, dict):
url = image_ref.get("url", "")
detail = image_ref.get("detail", detail)
else:
url = image_ref
if not isinstance(url, str):
url = str(url or "")
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
if isinstance(detail, str) and detail.strip():
image_part["detail"] = detail.strip()
validated.append(image_part)
else:
raise ValueError(
f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
)
normalized.append({"role": role, "content": validated})
continue
if not isinstance(content, str):
content = str(content)
normalized.append({"role": role, "content": content})
continue
raise ValueError(
f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
)
return normalized
def _preflight_codex_api_kwargs(
api_kwargs: Any,
*,
allow_stream: bool = False,
) -> Dict[str, Any]:
if not isinstance(api_kwargs, dict):
raise ValueError("Codex Responses request must be a dict.")
required = {"model", "instructions", "input"}
missing = [key for key in required if key not in api_kwargs]
if missing:
raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
model = api_kwargs.get("model")
if not isinstance(model, str) or not model.strip():
raise ValueError("Codex Responses request 'model' must be a non-empty string.")
model = model.strip()
instructions = api_kwargs.get("instructions")
if instructions is None:
instructions = ""
if not isinstance(instructions, str):
instructions = str(instructions)
instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
normalized_input = _preflight_codex_input_items(api_kwargs.get("input"))
tools = api_kwargs.get("tools")
normalized_tools = None
if tools is not None:
if not isinstance(tools, list):
raise ValueError("Codex Responses request 'tools' must be a list when provided.")
normalized_tools = []
for idx, tool in enumerate(tools):
if not isinstance(tool, dict):
raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
if tool.get("type") != "function":
raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
name = tool.get("name")
parameters = tool.get("parameters")
if not isinstance(name, str) or not name.strip():
raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
if not isinstance(parameters, dict):
raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
description = tool.get("description", "")
if description is None:
description = ""
if not isinstance(description, str):
description = str(description)
strict = tool.get("strict", False)
if not isinstance(strict, bool):
strict = bool(strict)
normalized_tools.append(
{
"type": "function",
"name": name.strip(),
"description": description,
"strict": strict,
"parameters": parameters,
}
)
store = api_kwargs.get("store", False)
if store is not False:
raise ValueError("Codex Responses contract requires 'store' to be false.")
allowed_keys = {
"model", "instructions", "input", "tools", "store",
"reasoning", "include", "max_output_tokens", "temperature",
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
"extra_headers",
}
normalized: Dict[str, Any] = {
"model": model,
"instructions": instructions,
"input": normalized_input,
"store": False,
}
if normalized_tools is not None:
normalized["tools"] = normalized_tools
# Pass through reasoning config
reasoning = api_kwargs.get("reasoning")
if isinstance(reasoning, dict):
normalized["reasoning"] = reasoning
include = api_kwargs.get("include")
if isinstance(include, list):
normalized["include"] = include
service_tier = api_kwargs.get("service_tier")
if isinstance(service_tier, str) and service_tier.strip():
normalized["service_tier"] = service_tier.strip()
# Pass through max_output_tokens and temperature
max_output_tokens = api_kwargs.get("max_output_tokens")
if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
normalized["max_output_tokens"] = int(max_output_tokens)
temperature = api_kwargs.get("temperature")
if isinstance(temperature, (int, float)):
normalized["temperature"] = float(temperature)
# Pass through tool_choice, parallel_tool_calls, prompt_cache_key
for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
val = api_kwargs.get(passthrough_key)
if val is not None:
normalized[passthrough_key] = val
extra_headers = api_kwargs.get("extra_headers")
if extra_headers is not None:
if not isinstance(extra_headers, dict):
raise ValueError("Codex Responses request 'extra_headers' must be an object.")
normalized_headers: Dict[str, str] = {}
for key, value in extra_headers.items():
if not isinstance(key, str) or not key.strip():
raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
if value is None:
continue
normalized_headers[key.strip()] = str(value)
if normalized_headers:
normalized["extra_headers"] = normalized_headers
if allow_stream:
stream = api_kwargs.get("stream")
if stream is not None and stream is not True:
raise ValueError("Codex Responses 'stream' must be true when set.")
if stream is True:
normalized["stream"] = True
allowed_keys.add("stream")
elif "stream" in api_kwargs:
raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
if unexpected:
raise ValueError(
f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
)
return normalized
# ---------------------------------------------------------------------------
# Response extraction helpers
# ---------------------------------------------------------------------------
def _extract_responses_message_text(item: Any) -> str:
"""Extract assistant text from a Responses message output item."""
content = getattr(item, "content", None)
if not isinstance(content, list):
return ""
chunks: List[str] = []
for part in content:
ptype = getattr(part, "type", None)
if ptype not in {"output_text", "text"}:
continue
text = getattr(part, "text", None)
if isinstance(text, str) and text:
chunks.append(text)
return "".join(chunks).strip()
def _extract_responses_reasoning_text(item: Any) -> str:
"""Extract a compact reasoning text from a Responses reasoning item."""
summary = getattr(item, "summary", None)
if isinstance(summary, list):
chunks: List[str] = []
for part in summary:
text = getattr(part, "text", None)
if isinstance(text, str) and text:
chunks.append(text)
if chunks:
return "\n".join(chunks).strip()
text = getattr(item, "text", None)
if isinstance(text, str) and text:
return text.strip()
return ""
# ---------------------------------------------------------------------------
# Full response normalization
# ---------------------------------------------------------------------------
def _normalize_codex_response(response: Any) -> tuple[Any, str]:
"""Normalize a Responses API object to an assistant_message-like object."""
output = getattr(response, "output", None)
if not isinstance(output, list) or not output:
# The Codex backend can return empty output when the answer was
# delivered entirely via stream events. Check output_text as a
# last-resort fallback before raising.
out_text = getattr(response, "output_text", None)
if isinstance(out_text, str) and out_text.strip():
logger.debug(
"Codex response has empty output but output_text is present (%d chars); "
"synthesizing output item.", len(out_text.strip()),
)
output = [SimpleNamespace(
type="message", role="assistant", status="completed",
content=[SimpleNamespace(type="output_text", text=out_text.strip())],
)]
response.output = output
else:
raise RuntimeError("Responses API returned no output items")
response_status = getattr(response, "status", None)
if isinstance(response_status, str):
response_status = response_status.strip().lower()
else:
response_status = None
if response_status in {"failed", "cancelled"}:
error_obj = getattr(response, "error", None)
if isinstance(error_obj, dict):
error_msg = error_obj.get("message") or str(error_obj)
else:
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
raise RuntimeError(error_msg)
content_parts: List[str] = []
reasoning_parts: List[str] = []
reasoning_items_raw: List[Dict[str, Any]] = []
tool_calls: List[Any] = []
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
saw_commentary_phase = False
saw_final_answer_phase = False
for item in output:
item_type = getattr(item, "type", None)
item_status = getattr(item, "status", None)
if isinstance(item_status, str):
item_status = item_status.strip().lower()
else:
item_status = None
if item_status in {"queued", "in_progress", "incomplete"}:
has_incomplete_items = True
if item_type == "message":
item_phase = getattr(item, "phase", None)
if isinstance(item_phase, str):
normalized_phase = item_phase.strip().lower()
if normalized_phase in {"commentary", "analysis"}:
saw_commentary_phase = True
elif normalized_phase in {"final_answer", "final"}:
saw_final_answer_phase = True
message_text = _extract_responses_message_text(item)
if message_text:
content_parts.append(message_text)
elif item_type == "reasoning":
reasoning_text = _extract_responses_reasoning_text(item)
if reasoning_text:
reasoning_parts.append(reasoning_text)
# Capture the full reasoning item for multi-turn continuity.
# encrypted_content is an opaque blob the API needs back on
# subsequent turns to maintain coherent reasoning chains.
encrypted = getattr(item, "encrypted_content", None)
if isinstance(encrypted, str) and encrypted:
raw_item = {"type": "reasoning", "encrypted_content": encrypted}
item_id = getattr(item, "id", None)
if isinstance(item_id, str) and item_id:
raw_item["id"] = item_id
# Capture summary — required by the API when replaying reasoning items
summary = getattr(item, "summary", None)
if isinstance(summary, list):
raw_summary = []
for part in summary:
text = getattr(part, "text", None)
if isinstance(text, str):
raw_summary.append({"type": "summary_text", "text": text})
raw_item["summary"] = raw_summary
reasoning_items_raw.append(raw_item)
elif item_type == "function_call":
if item_status in {"queued", "in_progress", "incomplete"}:
continue
fn_name = getattr(item, "name", "") or ""
arguments = getattr(item, "arguments", "{}")
if not isinstance(arguments, str):
arguments = json.dumps(arguments, ensure_ascii=False)
raw_call_id = getattr(item, "call_id", None)
raw_item_id = getattr(item, "id", None)
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
call_id = call_id.strip()
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
tool_calls.append(SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=response_item_id,
type="function",
function=SimpleNamespace(name=fn_name, arguments=arguments),
))
elif item_type == "custom_tool_call":
fn_name = getattr(item, "name", "") or ""
arguments = getattr(item, "input", "{}")
if not isinstance(arguments, str):
arguments = json.dumps(arguments, ensure_ascii=False)
raw_call_id = getattr(item, "call_id", None)
raw_item_id = getattr(item, "id", None)
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
call_id = call_id.strip()
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
tool_calls.append(SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=response_item_id,
type="function",
function=SimpleNamespace(name=fn_name, arguments=arguments),
))
final_text = "\n".join([p for p in content_parts if p]).strip()
if not final_text and hasattr(response, "output_text"):
out_text = getattr(response, "output_text", "")
if isinstance(out_text, str):
final_text = out_text.strip()
assistant_message = SimpleNamespace(
content=final_text,
tool_calls=tool_calls,
reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
reasoning_content=None,
reasoning_details=None,
codex_reasoning_items=reasoning_items_raw or None,
)
if tool_calls:
finish_reason = "tool_calls"
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
finish_reason = "incomplete"
elif reasoning_items_raw and not final_text:
# Response contains only reasoning (encrypted thinking state) with
# no visible content or tool calls. The model is still thinking and
# needs another turn to produce the actual answer. Marking this as
# "stop" would send it into the empty-content retry loop which burns
# 3 retries then fails — treat it as incomplete instead so the Codex
# continuation path handles it correctly.
finish_reason = "incomplete"
else:
finish_reason = "stop"
return assistant_message, finish_reason
+7 -18
View File
@@ -31,7 +31,6 @@ from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
from agent.redact import redact_sensitive_text
logger = logging.getLogger(__name__)
@@ -551,15 +550,11 @@ class ContextCompressor(ContextEngine):
Includes tool call arguments and result content (up to
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
specific details like file paths, commands, and outputs.
All content is redacted before serialization to prevent secrets
(API keys, tokens, passwords) from leaking into the summary that
gets sent to the auxiliary model and persisted across compactions.
"""
parts = []
for msg in turns:
role = msg.get("role", "unknown")
content = redact_sensitive_text(msg.get("content") or "")
content = msg.get("content") or ""
# Tool results: keep enough content for the summarizer
if role == "tool":
@@ -580,7 +575,7 @@ class ContextCompressor(ContextEngine):
if isinstance(tc, dict):
fn = tc.get("function", {})
name = fn.get("name", "?")
args = redact_sensitive_text(fn.get("arguments", ""))
args = fn.get("arguments", "")
# Truncate long arguments but keep enough for context
if len(args) > self._TOOL_ARGS_MAX:
args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -640,11 +635,7 @@ class ContextCompressor(ContextEngine):
"only output the structured summary. "
"Do NOT include any preamble, greeting, or prefix. "
"Write the summary in the same language the user was using in the "
"conversation — do not translate or switch to English. "
"NEVER include API keys, tokens, passwords, secrets, credentials, "
"or connection strings in the summary — replace any that appear "
"with [REDACTED]. Note that the user had credentials present, but "
"do not preserve their values."
"conversation — do not translate or switch to English."
)
# Shared structured template (used by both paths).
@@ -701,7 +692,7 @@ Be specific with file paths, commands, line numbers, and results.]
[What remains to be done — framed as context, not instructions]
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
@@ -741,7 +732,7 @@ Use this exact structure:
prompt += f"""
FOCUS TOPIC: "{focus_topic}"
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
try:
call_kwargs = {
@@ -764,9 +755,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Handle cases where content is not a string (e.g., dict from llama.cpp)
if not isinstance(content, str):
content = str(content) if content else ""
# Redact the summary output as well — the summarizer LLM may
# ignore prompt instructions and echo back secrets verbatim.
summary = redact_sensitive_text(content.strip())
summary = content.strip()
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0
@@ -807,7 +796,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
)
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown
return self._generate_summary(turns_to_summarize) # retry immediately
return self._generate_summary(messages, summary_budget) # retry immediately
# Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60
+9 -27
View File
@@ -21,9 +21,6 @@ from pathlib import Path
from types import SimpleNamespace
from typing import Any
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
@@ -57,18 +54,6 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
}
def _permission_denied(message_id: Any) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "cancelled",
}
},
}
def _format_messages_as_prompt(
messages: list[dict[str, Any]],
model: str | None = None,
@@ -401,8 +386,6 @@ class CopilotACPClient:
stderr_tail: deque[str] = deque(maxlen=40)
def _stdout_reader() -> None:
if proc.stdout is None:
return
for line in proc.stdout:
try:
inbox.put(json.loads(line))
@@ -550,13 +533,18 @@ class CopilotACPClient:
params = msg.get("params") or {}
if method == "session/request_permission":
response = _permission_denied(message_id)
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "allow_once",
}
},
}
elif method == "fs/read_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
block_error = get_read_block_error(str(path))
if block_error:
raise PermissionError(block_error)
content = path.read_text() if path.exists() else ""
line = params.get("line")
limit = params.get("limit")
@@ -565,8 +553,6 @@ class CopilotACPClient:
start = line - 1
end = start + limit if isinstance(limit, int) and limit > 0 else None
content = "".join(lines[start:end])
if content:
content = redact_sensitive_text(content)
response = {
"jsonrpc": "2.0",
"id": message_id,
@@ -579,10 +565,6 @@ class CopilotACPClient:
elif method == "fs/write_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
if is_write_denied(str(path)):
raise PermissionError(
f"Write denied: '{path}' is a protected system/credential file."
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(params.get("content") or ""))
response = {
+69 -91
View File
@@ -983,14 +983,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
active_sources: Set[str] = set()
auth_store = _load_auth_store()
# Shared suppression gate — used at every upsert site so
# `hermes auth remove <provider> <N>` is stable across all source types.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "anthropic":
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
# when the user has explicitly configured anthropic as their provider.
@@ -1010,8 +1002,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
("claude_code", read_claude_code_credentials()),
):
if creds and creds.get("accessToken"):
if _is_suppressed(provider, source_name):
continue
# Check if user explicitly removed this source
try:
from hermes_cli.auth import is_source_suppressed
if is_source_suppressed(provider, source_name):
continue
except ImportError:
pass
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
@@ -1029,7 +1026,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
elif provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state and not _is_suppressed(provider, "device_code"):
if state:
active_sources.add("device_code")
# Prefer a user-supplied label embedded in the singleton state
# (set by persist_nous_credentials(label=...) when the user ran
@@ -1070,21 +1067,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token, source = resolve_copilot_token()
if token:
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
except Exception as exc:
logger.debug("Copilot token seed failed: %s", exc)
@@ -1100,21 +1096,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token = creds.get("api_key", "")
if token:
source_name = creds.get("source", "qwen-cli")
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
except Exception as exc:
logger.debug("Qwen OAuth token seed failed: %s", exc)
@@ -1123,7 +1118,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
# the device_code source as suppressed so it won't be re-seeded from
# the Hermes auth store. Without this gate the removal is instantly
# undone on the next load_pool() call.
if _is_suppressed(provider, "device_code"):
codex_suppressed = False
try:
from hermes_cli.auth import is_source_suppressed
codex_suppressed = is_source_suppressed(provider, "device_code")
except ImportError:
pass
if codex_suppressed:
return changed, active_sources
state = _load_provider_state(auth_store, "openai-codex")
@@ -1157,22 +1158,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
changed = False
active_sources: Set[str] = set()
# Honour user suppression — `hermes auth remove <provider> <N>` for an
# env-seeded credential marks the env:<VAR> source as suppressed so it
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
# Without this gate the removal is silently undone on the next
# load_pool() call whenever the var is still exported by the shell.
try:
from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
except ImportError:
def _is_source_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "openrouter":
token = os.getenv("OPENROUTER_API_KEY", "").strip()
if token:
source = "env:OPENROUTER_API_KEY"
if _is_source_suppressed(provider, source):
return changed, active_sources
active_sources.add(source)
changed |= _upsert_entry(
entries,
@@ -1209,8 +1198,6 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
if not token:
continue
source = f"env:{env_var}"
if _is_source_suppressed(provider, source):
continue
active_sources.add(source)
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
base_url = env_url or pconfig.inference_base_url
@@ -1255,13 +1242,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
changed = False
active_sources: Set[str] = set()
# Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
# Seed from the custom_providers config entry's api_key field
cp_config = _get_custom_provider_config(pool_key)
if cp_config:
@@ -1270,20 +1250,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
name = str(cp_config.get("name") or "").strip()
if api_key:
source = f"config:{name}"
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
# Seed from model.api_key if model.provider=='custom' and model.base_url matches
try:
@@ -1303,20 +1282,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
matched_key = get_custom_provider_pool_key(model_base_url)
if matched_key == pool_key:
source = "model_config"
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
except Exception:
pass
-401
View File
@@ -1,401 +0,0 @@
"""Unified removal contract for every credential source Hermes reads from.
Hermes seeds its credential pool from many places:
env:<VAR> — os.environ / ~/.hermes/.env
claude_code — ~/.claude/.credentials.json
hermes_pkce — ~/.hermes/.anthropic_oauth.json
device_code — auth.json providers.<provider> (nous, openai-codex, ...)
qwen-cli — ~/.qwen/oauth_creds.json
gh_cli — gh auth token
config:<name> — custom_providers config entry
model_config — model.api_key when model.provider == "custom"
manual — user ran `hermes auth add`
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
(which keep their existing shape — we haven't restructured them). What we
unify here is **removal**:
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
Before this module, every source had an ad-hoc removal branch in
``auth_remove_command``, and several sources had no branch at all — so
``auth remove`` silently reverted on the next ``load_pool()`` call for
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
custom-config sources.
Now every source registers a ``RemovalStep`` that does exactly three things
in the same shape:
1. Clean up whatever externally-readable state the source reads from
(.env line, auth.json block, OAuth file, etc.)
2. Suppress the ``(provider, source_id)`` in auth.json so the
corresponding ``_seed_from_*`` branch skips the upsert on re-load
3. Return ``RemovalResult`` describing what was cleaned and any
diagnostic hints the user should see (shell-exported env vars,
external credential files we deliberately don't delete, etc.)
Adding a new credential source is:
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
- gate that reader behind ``is_source_suppressed(provider, source_id)``
- register a ``RemovalStep`` here
No more per-source if/elif chain in ``auth_remove_command``.
"""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, List, Optional
@dataclass
class RemovalResult:
"""Outcome of removing a credential source.
Attributes:
cleaned: Short strings describing external state that was actually
mutated (``"Cleared XAI_API_KEY from .env"``,
``"Cleared openai-codex OAuth tokens from auth store"``).
Printed as plain lines to the user.
hints: Diagnostic lines ABOUT state the user may need to clean up
themselves or is deliberately left intact (shell-exported env
var, Claude Code credential file we don't delete, etc.).
Printed as plain lines to the user. Always non-destructive.
suppress: Whether to call ``suppress_credential_source`` after
cleanup so future ``load_pool`` calls skip this source.
Default True — almost every source needs this to stay sticky.
The only legitimate False is ``manual`` entries, which aren't
seeded from anywhere external.
"""
cleaned: List[str] = field(default_factory=list)
hints: List[str] = field(default_factory=list)
suppress: bool = True
@dataclass
class RemovalStep:
"""How to remove one specific credential source cleanly.
Attributes:
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
Special value ``"*"`` means "matches any provider" — used for
sources like ``manual`` that aren't provider-specific.
source_id: Source identifier as it appears in
``PooledCredential.source``. May be a literal (``"claude_code"``)
or a prefix pattern matched via ``match_fn``.
match_fn: Optional predicate overriding literal ``source_id``
matching. Gets the removed entry's source string. Used for
``env:*`` (any env-seeded key), ``config:*`` (any custom
pool), and ``manual:*`` (any manual-source variant).
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
actual cleanup and returns what happened for the user.
description: One-line human-readable description for docs / tests.
"""
provider: str
source_id: str
remove_fn: Callable[..., RemovalResult]
match_fn: Optional[Callable[[str], bool]] = None
description: str = ""
def matches(self, provider: str, source: str) -> bool:
if self.provider != "*" and self.provider != provider:
return False
if self.match_fn is not None:
return self.match_fn(source)
return source == self.source_id
_REGISTRY: List[RemovalStep] = []
def register(step: RemovalStep) -> RemovalStep:
_REGISTRY.append(step)
return step
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
"""Return the first matching RemovalStep, or None if unregistered.
Unregistered sources fall through to the default remove path in
``auth_remove_command``: the pool entry is already gone (that happens
before dispatch), no external cleanup, no suppression. This is the
correct behaviour for ``manual`` entries — they were only ever stored
in the pool, nothing external to clean up.
"""
for step in _REGISTRY:
if step.matches(provider, source):
return step
return None
# ---------------------------------------------------------------------------
# Individual RemovalStep implementations — one per source.
# ---------------------------------------------------------------------------
# Each remove_fn is intentionally small and single-purpose. Adding a new
# credential source means adding ONE entry here — no other changes to
# auth_remove_command.
def _remove_env_source(provider: str, removed) -> RemovalResult:
"""env:<VAR> — the most common case.
Handles three user situations:
1. Var lives only in ~/.hermes/.env → clear it
2. Var lives only in the user's shell (shell profile, systemd
EnvironmentFile, launchd plist) → hint them where to unset it
3. Var lives in both → clear from .env, hint about shell
"""
from hermes_cli.config import get_env_path, remove_env_value
result = RemovalResult()
env_var = removed.source[len("env:"):]
if not env_var:
return result
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
env_in_process = bool(os.getenv(env_var))
env_in_dotenv = False
try:
env_path = get_env_path()
if env_path.exists():
env_in_dotenv = any(
line.strip().startswith(f"{env_var}=")
for line in env_path.read_text(errors="replace").splitlines()
)
except OSError:
pass
shell_exported = env_in_process and not env_in_dotenv
cleared = remove_env_value(env_var)
if cleared:
result.cleaned.append(f"Cleared {env_var} from .env")
if shell_exported:
result.hints.extend([
f"Note: {env_var} is still set in your shell environment "
f"(not in ~/.hermes/.env).",
" Unset it there (shell profile, systemd EnvironmentFile, "
"launchd plist, etc.) or it will keep being visible to Hermes.",
f" The pool entry is now suppressed — Hermes will ignore "
f"{env_var} until you run `hermes auth add {provider}`.",
])
else:
result.hints.append(
f"Suppressed env:{env_var} — it will not be re-seeded even "
f"if the variable is re-exported later."
)
return result
def _remove_claude_code(provider: str, removed) -> RemovalResult:
"""~/.claude/.credentials.json is owned by Claude Code itself.
We don't delete it — the user's Claude Code install still needs to
work. We just suppress it so Hermes stops reading it.
"""
return RemovalResult(hints=[
"Suppressed claude_code credential — it will not be re-seeded.",
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
"Run `hermes auth add anthropic` to re-enable if needed.",
])
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
from hermes_constants import get_hermes_home
result = RemovalResult()
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
try:
oauth_file.unlink()
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
except OSError as exc:
result.hints.append(f"Could not delete {oauth_file}: {exc}")
return result
def _clear_auth_store_provider(provider: str) -> bool:
"""Delete auth_store.providers[provider]. Returns True if deleted."""
from hermes_cli.auth import (
_auth_store_lock,
_load_auth_store,
_save_auth_store,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
return True
return False
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
We suppress in addition to clearing because nothing else stops the
user's next `hermes login` run from writing providers.nous again
before they decide to. Suppression forces them to go through
`hermes auth add nous` to re-engage, which is the documented re-add
path and clears the suppression atomically.
"""
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
return result
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
refresh_codex_oauth_pure() writes both every time, so clearing only
the Hermes auth store is not enough — _seed_from_singletons() would
re-import from ~/.codex/auth.json on the next load_pool() call and
the removal would be instantly undone. We suppress instead of
deleting Codex CLI's file, so the Codex CLI itself keeps working.
The canonical source name in ``_seed_from_singletons`` is
``"device_code"`` (no prefix). Entries may show up in the pool as
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
via ``hermes auth add openai-codex``), but in both cases the re-seed
gate lives at the ``"device_code"`` suppression key. We suppress
that canonical key here; the central dispatcher also suppresses
``removed.source`` which is fine — belt-and-suspenders, idempotent.
"""
from hermes_cli.auth import suppress_credential_source
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
# Suppress the canonical re-seed source, not just whatever source the
# removed entry had. Otherwise `manual:device_code` removals wouldn't
# block the `device_code` re-seed path.
suppress_credential_source(provider, "device_code")
result.hints.extend([
"Suppressed openai-codex device_code source — it will not be re-seeded.",
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
"Run `hermes auth add openai-codex` to re-enable if needed.",
])
return result
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
Same pattern as claude_code — suppress, don't delete. The user's
Qwen CLI install still reads from that file.
"""
return RemovalResult(hints=[
"Suppressed qwen-cli credential — it will not be re-seeded.",
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
])
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
Copilot is special: the same token can be seeded as multiple source
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
``_seed_from_env``), so removing one entry without suppressing the
others lets the duplicates resurrect. We suppress ALL known copilot
sources here so removal is stable regardless of which entry the
user clicked.
We don't touch the user's gh CLI or shell state — just suppress so
Hermes stops picking the token up.
"""
# Suppress ALL copilot source variants up-front so no path resurrects
# the pool entry. The central dispatcher in auth_remove_command will
# ALSO suppress removed.source, but it's idempotent so double-calling
# is harmless.
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "gh_cli")
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
suppress_credential_source(provider, f"env:{env_var}")
return RemovalResult(hints=[
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
"Note: Your gh CLI / shell environment is unchanged.",
"Run `hermes auth add copilot` to re-enable if needed.",
])
def _remove_custom_config(provider: str, removed) -> RemovalResult:
"""Custom provider pools are seeded from custom_providers config or
model.api_key. Both are in config.yaml — modifying that from here
is more invasive than suppression. We suppress; the user can edit
config.yaml if they want to remove the key from disk entirely.
"""
source_label = removed.source
return RemovalResult(hints=[
f"Suppressed {source_label} — it will not be re-seeded.",
"Note: The underlying value in config.yaml is unchanged. Edit it "
"directly if you want to remove the credential from disk.",
])
def _register_all_sources() -> None:
"""Called once on module import.
ORDER MATTERS — ``find_removal_step`` returns the first match. Put
provider-specific steps before the generic ``env:*`` step so that e.g.
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
doesn't touch the user's shell), not the generic env-var removal
(which would try to clear .env).
"""
register(RemovalStep(
provider="copilot", source_id="gh_cli",
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
remove_fn=_remove_copilot_gh,
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
))
register(RemovalStep(
provider="*", source_id="env:",
match_fn=lambda src: src.startswith("env:"),
remove_fn=_remove_env_source,
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
))
register(RemovalStep(
provider="anthropic", source_id="claude_code",
remove_fn=_remove_claude_code,
description="~/.claude/.credentials.json",
))
register(RemovalStep(
provider="anthropic", source_id="hermes_pkce",
remove_fn=_remove_hermes_pkce,
description="~/.hermes/.anthropic_oauth.json",
))
register(RemovalStep(
provider="nous", source_id="device_code",
remove_fn=_remove_nous_device_code,
description="auth.json providers.nous",
))
register(RemovalStep(
provider="openai-codex", source_id="device_code",
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
remove_fn=_remove_codex_device_code,
description="auth.json providers.openai-codex + ~/.codex/auth.json",
))
register(RemovalStep(
provider="qwen-oauth", source_id="qwen-cli",
remove_fn=_remove_qwen_cli,
description="~/.qwen/oauth_creds.json",
))
register(RemovalStep(
provider="*", source_id="config:",
match_fn=lambda src: src.startswith("config:") or src == "model_config",
remove_fn=_remove_custom_config,
description="Custom provider config.yaml api_key field",
))
_register_all_sources()
-111
View File
@@ -1,111 +0,0 @@
"""Shared file safety rules used by both tools and ACP shims."""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional
def _hermes_home_path() -> Path:
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
try:
from hermes_constants import get_hermes_home # local import to avoid cycles
return get_hermes_home()
except Exception:
return Path(os.path.expanduser("~/.hermes"))
def build_write_denied_paths(home: str) -> set[str]:
"""Return exact sensitive paths that must never be written."""
hermes_home = _hermes_home_path()
return {
os.path.realpath(p)
for p in [
os.path.join(home, ".ssh", "authorized_keys"),
os.path.join(home, ".ssh", "id_rsa"),
os.path.join(home, ".ssh", "id_ed25519"),
os.path.join(home, ".ssh", "config"),
str(hermes_home / ".env"),
os.path.join(home, ".bashrc"),
os.path.join(home, ".zshrc"),
os.path.join(home, ".profile"),
os.path.join(home, ".bash_profile"),
os.path.join(home, ".zprofile"),
os.path.join(home, ".netrc"),
os.path.join(home, ".pgpass"),
os.path.join(home, ".npmrc"),
os.path.join(home, ".pypirc"),
"/etc/sudoers",
"/etc/passwd",
"/etc/shadow",
]
}
def build_write_denied_prefixes(home: str) -> list[str]:
"""Return sensitive directory prefixes that must never be written."""
return [
os.path.realpath(p) + os.sep
for p in [
os.path.join(home, ".ssh"),
os.path.join(home, ".aws"),
os.path.join(home, ".gnupg"),
os.path.join(home, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
os.path.join(home, ".docker"),
os.path.join(home, ".azure"),
os.path.join(home, ".config", "gh"),
]
]
def get_safe_write_root() -> Optional[str]:
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not root:
return None
try:
return os.path.realpath(os.path.expanduser(root))
except Exception:
return None
def is_write_denied(path: str) -> bool:
"""Return True if path is blocked by the write denylist or safe root."""
home = os.path.realpath(os.path.expanduser("~"))
resolved = os.path.realpath(os.path.expanduser(str(path)))
if resolved in build_write_denied_paths(home):
return True
for prefix in build_write_denied_prefixes(home):
if resolved.startswith(prefix):
return True
safe_root = get_safe_write_root()
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
return True
return False
def get_read_block_error(path: str) -> Optional[str]:
"""Return an error message when a read targets internal Hermes cache files."""
resolved = Path(path).expanduser().resolve()
hermes_home = _hermes_home_path().resolve()
blocked_dirs = [
hermes_home / "skills" / ".hub" / "index-cache",
hermes_home / "skills" / ".hub",
]
for blocked in blocked_dirs:
try:
resolved.relative_to(blocked)
except ValueError:
continue
return (
f"Access denied: {path} is an internal Hermes cache file "
"and cannot be read directly to prevent prompt injection. "
"Use the skills_list or skill_view tools instead."
)
return None
+1 -2
View File
@@ -799,8 +799,7 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
_raw_details = err_obj.get("details")
err_details_list = _raw_details if isinstance(_raw_details, list) else []
err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
# than one ErrorInfo (rare), so we pick the first one with a reason.
+1 -2
View File
@@ -613,8 +613,7 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
_raw_details = err_obj.get("details")
details_list = _raw_details if isinstance(_raw_details, list) else []
details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
reason = ""
retry_after: Optional[float] = None
+13 -86
View File
@@ -14,8 +14,6 @@ from urllib.parse import urlparse
import requests
import yaml
from utils import base_url_host_matches, base_url_hostname
from hermes_constants import OPENROUTER_MODELS_URL
logger = logging.getLogger(__name__)
@@ -170,7 +168,6 @@ DEFAULT_CONTEXT_LENGTHS = {
"Qwen/Qwen3.5-35B-A3B": 131072,
"deepseek-ai/DeepSeek-V3.2": 65536,
"moonshotai/Kimi-K2.5": 262144,
"moonshotai/Kimi-K2.6": 262144,
"moonshotai/Kimi-K2-Thinking": 262144,
"MiniMaxAI/MiniMax-M2.5": 204800,
"XiaomiMiMo/MiMo-V2-Flash": 256000,
@@ -213,15 +210,8 @@ def _normalize_base_url(base_url: str) -> str:
return (base_url or "").strip().rstrip("/")
def _auth_headers(api_key: str = "") -> Dict[str, str]:
token = str(api_key or "").strip()
if not token:
return {}
return {"Authorization": f"Bearer {token}"}
def _is_openrouter_base_url(base_url: str) -> bool:
return base_url_host_matches(base_url, "openrouter.ai")
return "openrouter.ai" in _normalize_base_url(base_url).lower()
def _is_custom_endpoint(base_url: str) -> bool:
@@ -319,7 +309,7 @@ def is_local_endpoint(base_url: str) -> bool:
return False
def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
def detect_local_server_type(base_url: str) -> Optional[str]:
"""Detect which local server is running at base_url by probing known endpoints.
Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -331,10 +321,8 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=2.0, headers=headers) as client:
with httpx.Client(timeout=2.0) as client:
# LM Studio exposes /api/v1/models — check first (most specific)
try:
r = client.get(f"{server_url}/api/v1/models")
@@ -521,59 +509,6 @@ def fetch_endpoint_model_metadata(
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
last_error: Optional[Exception] = None
if is_local_endpoint(normalized):
try:
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
response = requests.get(
server_url.rstrip("/") + "/api/v1/models",
headers=headers,
timeout=10,
)
response.raise_for_status()
payload = response.json()
cache: Dict[str, Dict[str, Any]] = {}
for model in payload.get("models", []):
if not isinstance(model, dict):
continue
model_id = model.get("key") or model.get("id")
if not model_id:
continue
entry: Dict[str, Any] = {"name": model.get("name", model_id)}
context_length = None
for inst in model.get("loaded_instances", []) or []:
if not isinstance(inst, dict):
continue
cfg = inst.get("config", {})
ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
if isinstance(ctx, int) and ctx > 0:
context_length = ctx
break
if context_length is None:
context_length = _extract_context_length(model)
if context_length is not None:
entry["context_length"] = context_length
max_completion_tokens = _extract_max_completion_tokens(model)
if max_completion_tokens is not None:
entry["max_completion_tokens"] = max_completion_tokens
pricing = _extract_pricing(model)
if pricing:
entry["pricing"] = pricing
_add_model_aliases(cache, model_id, entry)
alt_id = model.get("id")
if isinstance(alt_id, str) and alt_id and alt_id != model_id:
_add_model_aliases(cache, alt_id, entry)
_endpoint_model_metadata_cache[normalized] = cache
_endpoint_model_metadata_cache_time[normalized] = time.time()
return cache
except Exception as exc:
last_error = exc
for candidate in candidates:
url = candidate.rstrip("/") + "/models"
try:
@@ -780,7 +715,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
return False
def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
"""Query an Ollama server for the model's context length.
Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -798,16 +733,14 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
server_url = server_url[:-3]
try:
server_type = detect_local_server_type(base_url, api_key=api_key)
server_type = detect_local_server_type(base_url)
except Exception:
return None
if server_type != "ollama":
return None
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
with httpx.Client(timeout=3.0) as client:
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
if resp.status_code != 200:
return None
@@ -835,7 +768,7 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
return None
def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
"""Query a local server for the model's context length."""
import httpx
@@ -848,15 +781,13 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
server_type = detect_local_server_type(base_url, api_key=api_key)
server_type = detect_local_server_type(base_url)
except Exception:
server_type = None
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
with httpx.Client(timeout=3.0) as client:
# Ollama: /api/show returns model details with context info
if server_type == "ollama":
resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -1067,7 +998,7 @@ def get_model_context_length(
if not _is_known_provider_base_url(base_url):
# 3. Try querying local server directly
if is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
local_ctx = _query_local_context_length(model, base_url)
if local_ctx and local_ctx > 0:
save_context_length(model, base_url, local_ctx)
return local_ctx
@@ -1081,7 +1012,7 @@ def get_model_context_length(
# 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
if provider == "anthropic" or (
base_url and base_url_hostname(base_url) == "api.anthropic.com"
base_url and "api.anthropic.com" in base_url
):
ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
if ctx:
@@ -1090,11 +1021,7 @@ def get_model_context_length(
# 4b. AWS Bedrock — use static context length table.
# Bedrock's ListFoundationModels doesn't expose context window sizes,
# so we maintain a curated table in bedrock_adapter.py.
if provider == "bedrock" or (
base_url
and base_url_hostname(base_url).startswith("bedrock-runtime.")
and base_url_host_matches(base_url, "amazonaws.com")
):
if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
try:
from agent.bedrock_adapter import get_bedrock_context_length
return get_bedrock_context_length(model)
@@ -1141,7 +1068,7 @@ def get_model_context_length(
# 9. Query local server as last resort
if base_url and is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
local_ctx = _query_local_context_length(model, base_url)
if local_ctx and local_ctx > 0:
save_context_length(model, base_url, local_ctx)
return local_ctx
-142
View File
@@ -13,48 +13,6 @@ import re
logger = logging.getLogger(__name__)
# Sensitive query-string parameter names (case-insensitive exact match).
# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
_SENSITIVE_QUERY_PARAMS = frozenset({
"access_token",
"refresh_token",
"id_token",
"token",
"api_key",
"apikey",
"client_secret",
"password",
"auth",
"jwt",
"session",
"secret",
"key",
"code", # OAuth authorization codes
"signature", # pre-signed URL signatures
"x-amz-signature",
})
# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
# Ported from nearai/ironclaw#2529.
_SENSITIVE_BODY_KEYS = frozenset({
"access_token",
"refresh_token",
"id_token",
"token",
"api_key",
"apikey",
"client_secret",
"password",
"auth",
"jwt",
"secret",
"private_key",
"authorization",
"key",
})
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -150,30 +108,6 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
# Negative lookahead prevents matching hex strings or identifiers
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
# URLs containing query strings — matches `scheme://...?...[# or end]`.
# Used to scan text for URLs whose query params may contain secrets.
# Ported from nearai/ironclaw#2529.
_URL_WITH_QUERY_RE = re.compile(
r"(https?|wss?|ftp)://" # scheme
r"([^\s/?#]+)" # authority (may include userinfo)
r"([^\s?#]*)" # path
r"\?([^\s#]+)" # query (required)
r"(#\S*)?", # optional fragment
)
# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
# (not just DB protocols already covered by _DB_CONNSTR_RE above).
# Catches things like `https://user:token@api.example.com/v1/foo`.
_URL_USERINFO_RE = re.compile(
r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
)
# Form-urlencoded body detection: conservative — only applies when the entire
# text looks like a query string (k=v&k=v pattern with no newlines).
_FORM_BODY_RE = re.compile(
r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
)
# Compile known prefix patterns into one alternation
_PREFIX_RE = re.compile(
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -187,72 +121,6 @@ def _mask_token(token: str) -> str:
return f"{token[:6]}...{token[-4:]}"
def _redact_query_string(query: str) -> str:
"""Redact sensitive parameter values in a URL query string.
Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
replaced with `***`. Non-sensitive keys pass through unchanged.
Empty or malformed pairs are preserved as-is.
"""
if not query:
return query
parts = []
for pair in query.split("&"):
if "=" not in pair:
parts.append(pair)
continue
key, _, value = pair.partition("=")
if key.lower() in _SENSITIVE_QUERY_PARAMS:
parts.append(f"{key}=***")
else:
parts.append(pair)
return "&".join(parts)
def _redact_url_query_params(text: str) -> str:
"""Scan text for URLs with query strings and redact sensitive params.
Catches opaque tokens that don't match vendor prefix regexes, e.g.
`https://example.com/cb?code=ABC123&state=xyz` `...?code=***&state=xyz`.
"""
def _sub(m: re.Match) -> str:
scheme = m.group(1)
authority = m.group(2)
path = m.group(3)
query = _redact_query_string(m.group(4))
fragment = m.group(5) or ""
return f"{scheme}://{authority}{path}?{query}{fragment}"
return _URL_WITH_QUERY_RE.sub(_sub, text)
def _redact_url_userinfo(text: str) -> str:
"""Strip `user:password@` from HTTP/WS/FTP URLs.
DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
separately by `_DB_CONNSTR_RE`.
"""
return _URL_USERINFO_RE.sub(
lambda m: f"{m.group(1)}://{m.group(2)}:***@",
text,
)
def _redact_form_body(text: str) -> str:
"""Redact sensitive values in a form-urlencoded body.
Only applies when the entire input looks like a pure form body
(k=v&k=v with no newlines, no other text). Single-line non-form
text passes through unchanged. This is a conservative pass the
`_redact_url_query_params` function handles embedded query strings.
"""
if not text or "\n" in text or "&" not in text:
return text
# The body-body form check is strict: only trigger on clean k=v&k=v.
if not _FORM_BODY_RE.match(text.strip()):
return text
return _redact_query_string(text.strip())
def redact_sensitive_text(text: str) -> str:
"""Apply all redaction patterns to a block of text.
@@ -305,16 +173,6 @@ def redact_sensitive_text(text: str) -> str:
# JWT tokens (eyJ... — base64-encoded JSON headers)
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
# DB schemes are handled above by _DB_CONNSTR_RE.
text = _redact_url_userinfo(text)
# URL query params containing opaque tokens (?access_token=…&code=…)
text = _redact_url_query_params(text)
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
text = _redact_form_body(text)
# Discord user/role mentions (<@snowflake_id>)
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
-831
View File
@@ -1,831 +0,0 @@
"""
Shell-script hooks bridge.
Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
consent on first use of each ``(event, command)`` pair, and registers
callbacks on the existing plugin hook manager so every existing
``invoke_hook()`` site dispatches to the configured shell scripts with
zero changes to call sites.
Design notes
------------
* Python plugins and shell hooks compose naturally: both flow through
:func:`hermes_cli.plugins.invoke_hook` and its aggregators. Python
plugins are registered first (via ``discover_and_load()``) so their
block decisions win ties over shell-hook blocks.
* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
with ``shell=False`` no shell injection footguns. Users that need
pipes/redirection wrap their logic in a script.
* First-use consent is gated by the allowlist under
``~/.hermes/shell-hooks-allowlist.json``. Non-TTY callers must pass
``accept_hooks=True`` (resolved from ``--accept-hooks``,
``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
for registration to succeed without a prompt.
* Registration is idempotent safe to invoke from both the CLI entry
point (``hermes_cli/main.py``) and the gateway entry point
(``gateway/run.py``).
Wire protocol
-------------
**stdin** (JSON, piped to the script)::
{
"hook_event_name": "pre_tool_call",
"tool_name": "terminal",
"tool_input": {"command": "rm -rf /"},
"session_id": "sess_abc123",
"cwd": "/home/user/project",
"extra": {...} # event-specific kwargs
}
**stdout** (JSON, optional anything else is ignored)::
# Block a pre_tool_call (either shape accepted; normalised internally):
{"decision": "block", "reason": "Forbidden command"} # Claude-Code-style
{"action": "block", "message": "Forbidden command"} # Hermes-canonical
# Inject context for pre_llm_call:
{"context": "Today is Friday"}
# Silent no-op:
<empty or any non-matching JSON object>
"""
from __future__ import annotations
import difflib
import json
import logging
import os
import re
import shlex
import subprocess
import sys
import tempfile
import threading
import time
from contextlib import contextmanager
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
try:
import fcntl # POSIX only; Windows falls back to best-effort without flock.
except ImportError: # pragma: no cover
fcntl = None # type: ignore[assignment]
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
DEFAULT_TIMEOUT_SECONDS = 60
MAX_TIMEOUT_SECONDS = 300
ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
# (event, matcher, command) triples that have been wired to the plugin
# manager in the current process. Matcher is part of the key because
# the same script can legitimately register for different matchers under
# the same event (e.g. one entry per tool the user wants to gate).
# Second registration attempts for the exact same triple become no-ops
# so the CLI and gateway can both call register_from_config() safely.
_registered: Set[Tuple[str, Optional[str], str]] = set()
_registered_lock = threading.Lock()
# Intra-process lock for allowlist read-modify-write on platforms that
# lack ``fcntl`` (non-POSIX). Kept separate from ``_registered_lock``
# because ``register_from_config`` already holds ``_registered_lock`` when
# it triggers ``_record_approval`` — reusing it here would self-deadlock
# (``threading.Lock`` is non-reentrant). POSIX callers use the sibling
# ``.lock`` file via ``fcntl.flock`` and bypass this.
_allowlist_write_lock = threading.Lock()
@dataclass
class ShellHookSpec:
"""Parsed and validated representation of a single ``hooks:`` entry."""
event: str
command: str
matcher: Optional[str] = None
timeout: int = DEFAULT_TIMEOUT_SECONDS
compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
def __post_init__(self) -> None:
# Strip whitespace introduced by YAML quirks (e.g. multi-line string
# folding) — a matcher of " terminal" would otherwise silently fail
# to match "terminal" without any diagnostic.
if isinstance(self.matcher, str):
stripped = self.matcher.strip()
self.matcher = stripped if stripped else None
if self.matcher:
try:
self.compiled_matcher = re.compile(self.matcher)
except re.error as exc:
logger.warning(
"shell hook matcher %r is invalid (%s) — treating as "
"literal equality", self.matcher, exc,
)
self.compiled_matcher = None
def matches_tool(self, tool_name: Optional[str]) -> bool:
if not self.matcher:
return True
if tool_name is None:
return False
if self.compiled_matcher is not None:
return self.compiled_matcher.fullmatch(tool_name) is not None
# compiled_matcher is None only when the regex failed to compile,
# in which case we already warned and fall back to literal equality.
return tool_name == self.matcher
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def register_from_config(
cfg: Optional[Dict[str, Any]],
*,
accept_hooks: bool = False,
) -> List[ShellHookSpec]:
"""Register every configured shell hook on the plugin manager.
``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
output). The ``hooks:`` key is read out of it. Missing, empty, or
non-dict ``hooks`` is treated as zero configured hooks.
``accept_hooks=True`` skips the TTY consent prompt the caller is
promising that the user has opted in via a flag, env var, or config
setting. ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
also honored inside this function so either CLI or gateway call sites
pick them up.
Returns the list of :class:`ShellHookSpec` entries that ended up wired
up on the plugin manager. Skipped entries (unknown events, malformed,
not allowlisted, already registered) are logged but not returned.
"""
if not isinstance(cfg, dict):
return []
effective_accept = _resolve_effective_accept(cfg, accept_hooks)
specs = _parse_hooks_block(cfg.get("hooks"))
if not specs:
return []
registered: List[ShellHookSpec] = []
# Import lazily — avoids circular imports at module-load time.
from hermes_cli.plugins import get_plugin_manager
manager = get_plugin_manager()
# Idempotence + allowlist read happen under the lock; the TTY
# prompt runs outside so other threads aren't parked on a blocking
# input(). Mutation re-takes the lock with a defensive idempotence
# re-check in case two callers ever race through the prompt.
for spec in specs:
key = (spec.event, spec.matcher, spec.command)
with _registered_lock:
if key in _registered:
continue
already_allowlisted = _is_allowlisted(spec.event, spec.command)
if not already_allowlisted:
if not _prompt_and_record(
spec.event, spec.command, accept_hooks=effective_accept,
):
logger.warning(
"shell hook for %s (%s) not allowlisted — skipped. "
"Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
"hooks_auto_accept: true, or approve at the TTY "
"prompt next run.",
spec.event, spec.command,
)
continue
with _registered_lock:
if key in _registered:
continue
manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
_registered.add(key)
registered.append(spec)
logger.info(
"shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
spec.event, spec.command, spec.matcher, spec.timeout,
)
return registered
def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
"""Return the parsed ``ShellHookSpec`` entries from config without
registering anything. Used by ``hermes hooks list`` and ``doctor``."""
if not isinstance(cfg, dict):
return []
return _parse_hooks_block(cfg.get("hooks"))
def reset_for_tests() -> None:
"""Clear the idempotence set. Test-only helper."""
with _registered_lock:
_registered.clear()
# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------
def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
"""Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
Malformed entries warn-and-skip we never raise from config parsing
because a broken hook must not crash the agent.
"""
from hermes_cli.plugins import VALID_HOOKS
if not isinstance(hooks_cfg, dict):
return []
specs: List[ShellHookSpec] = []
for event_name, entries in hooks_cfg.items():
if event_name not in VALID_HOOKS:
suggestion = difflib.get_close_matches(
str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
)
if suggestion:
logger.warning(
"unknown hook event %r in hooks: config — did you mean %r?",
event_name, suggestion[0],
)
else:
logger.warning(
"unknown hook event %r in hooks: config (valid: %s)",
event_name, ", ".join(sorted(VALID_HOOKS)),
)
continue
if entries is None:
continue
if not isinstance(entries, list):
logger.warning(
"hooks.%s must be a list of hook definitions; got %s",
event_name, type(entries).__name__,
)
continue
for i, raw in enumerate(entries):
spec = _parse_single_entry(event_name, i, raw)
if spec is not None:
specs.append(spec)
return specs
def _parse_single_entry(
event: str, index: int, raw: Any,
) -> Optional[ShellHookSpec]:
if not isinstance(raw, dict):
logger.warning(
"hooks.%s[%d] must be a mapping with a 'command' key; got %s",
event, index, type(raw).__name__,
)
return None
command = raw.get("command")
if not isinstance(command, str) or not command.strip():
logger.warning(
"hooks.%s[%d] is missing a non-empty 'command' field",
event, index,
)
return None
matcher = raw.get("matcher")
if matcher is not None and not isinstance(matcher, str):
logger.warning(
"hooks.%s[%d].matcher must be a string regex; ignoring",
event, index,
)
matcher = None
if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
logger.warning(
"hooks.%s[%d].matcher=%r will be ignored at runtime — the "
"matcher field is only honored for pre_tool_call / "
"post_tool_call. The hook will fire on every %s event.",
event, index, matcher, event,
)
matcher = None
timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
try:
timeout = int(timeout_raw)
except (TypeError, ValueError):
logger.warning(
"hooks.%s[%d].timeout must be an int (got %r); using default %ds",
event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
)
timeout = DEFAULT_TIMEOUT_SECONDS
if timeout < 1:
logger.warning(
"hooks.%s[%d].timeout must be >=1; using default %ds",
event, index, DEFAULT_TIMEOUT_SECONDS,
)
timeout = DEFAULT_TIMEOUT_SECONDS
if timeout > MAX_TIMEOUT_SECONDS:
logger.warning(
"hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
event, index, timeout, MAX_TIMEOUT_SECONDS,
)
timeout = MAX_TIMEOUT_SECONDS
return ShellHookSpec(
event=event,
command=command.strip(),
matcher=matcher,
timeout=timeout,
)
# ---------------------------------------------------------------------------
# Subprocess callback
# ---------------------------------------------------------------------------
_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
"""Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
Returns a diagnostic dict with the same keys for every outcome
(``returncode``, ``stdout``, ``stderr``, ``timed_out``,
``elapsed_seconds``, ``error``). This is the single place the
subprocess is actually invoked both the live callback path
(:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
go through it.
"""
result: Dict[str, Any] = {
"returncode": None,
"stdout": "",
"stderr": "",
"timed_out": False,
"elapsed_seconds": 0.0,
"error": None,
}
try:
argv = shlex.split(os.path.expanduser(spec.command))
except ValueError as exc:
result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
return result
if not argv:
result["error"] = "empty command"
return result
t0 = time.monotonic()
try:
proc = subprocess.run(
argv,
input=stdin_json,
capture_output=True,
timeout=spec.timeout,
text=True,
shell=False,
)
except subprocess.TimeoutExpired:
result["timed_out"] = True
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
return result
except FileNotFoundError:
result["error"] = "command not found"
return result
except PermissionError:
result["error"] = "command not executable"
return result
except Exception as exc: # pragma: no cover — defensive
result["error"] = str(exc)
return result
result["returncode"] = proc.returncode
result["stdout"] = proc.stdout or ""
result["stderr"] = proc.stderr or ""
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
return result
def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
"""Build the closure that ``invoke_hook()`` will call per firing."""
def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
# Matcher gate — only meaningful for tool-scoped events.
if spec.event in ("pre_tool_call", "post_tool_call"):
if not spec.matches_tool(kwargs.get("tool_name")):
return None
r = _spawn(spec, _serialize_payload(spec.event, kwargs))
if r["error"]:
logger.warning(
"shell hook failed (event=%s command=%s): %s",
spec.event, spec.command, r["error"],
)
return None
if r["timed_out"]:
logger.warning(
"shell hook timed out after %.2fs (event=%s command=%s)",
r["elapsed_seconds"], spec.event, spec.command,
)
return None
stderr = r["stderr"].strip()
if stderr:
logger.debug(
"shell hook stderr (event=%s command=%s): %s",
spec.event, spec.command, stderr[:400],
)
# Non-zero exits: log but still parse stdout so scripts that
# signal failure via exit code can also return a block directive.
if r["returncode"] != 0:
logger.warning(
"shell hook exited %d (event=%s command=%s); stderr=%s",
r["returncode"], spec.event, spec.command, stderr[:400],
)
return _parse_response(spec.event, r["stdout"])
_callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
_callback.__qualname__ = _callback.__name__
return _callback
def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
"""Render the stdin JSON payload. Unserialisable values are
stringified via ``default=str`` rather than dropped."""
extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
try:
cwd = str(Path.cwd())
except OSError:
cwd = ""
payload = {
"hook_event_name": event,
"tool_name": kwargs.get("tool_name"),
"tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
"session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
"cwd": cwd,
"extra": extras,
}
return json.dumps(payload, ensure_ascii=False, default=str)
def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
"""Translate stdout JSON into a Hermes wire-shape dict.
For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
"reason": "..."}`` payload is translated into the canonical Hermes
``{"action": "block", "message": "..."}`` shape expected by
:func:`hermes_cli.plugins.get_pre_tool_call_block_message`. This is
the single most important correctness invariant in this module
skipping the translation silently breaks every ``pre_tool_call``
block directive.
For ``pre_llm_call``, ``{"context": "..."}`` is passed through
unchanged to match the existing plugin-hook contract.
Anything else returns ``None``.
"""
stdout = (stdout or "").strip()
if not stdout:
return None
try:
data = json.loads(stdout)
except json.JSONDecodeError:
logger.warning(
"shell hook stdout was not valid JSON (event=%s): %s",
event, stdout[:200],
)
return None
if not isinstance(data, dict):
return None
if event == "pre_tool_call":
if data.get("action") == "block":
message = data.get("message") or data.get("reason") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
if data.get("decision") == "block":
message = data.get("reason") or data.get("message") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
return None
context = data.get("context")
if isinstance(context, str) and context.strip():
return {"context": context}
return None
# ---------------------------------------------------------------------------
# Allowlist / consent
# ---------------------------------------------------------------------------
def allowlist_path() -> Path:
"""Path to the per-user shell-hook allowlist file."""
return get_hermes_home() / ALLOWLIST_FILENAME
def load_allowlist() -> Dict[str, Any]:
"""Return the parsed allowlist, or an empty skeleton if absent."""
try:
raw = json.loads(allowlist_path().read_text())
except (FileNotFoundError, json.JSONDecodeError, OSError):
return {"approvals": []}
if not isinstance(raw, dict):
return {"approvals": []}
approvals = raw.get("approvals")
if not isinstance(approvals, list):
raw["approvals"] = []
return raw
def save_allowlist(data: Dict[str, Any]) -> None:
"""Atomically persist the allowlist via per-process ``mkstemp`` +
``os.replace``. Cross-process read-modify-write races are handled
by :func:`_locked_update_approvals` (``fcntl.flock``). On OSError
the failure is logged; the in-process hook still registers but
the approval won't survive across runs."""
p = allowlist_path()
try:
p.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(
prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
)
try:
with os.fdopen(fd, "w") as fh:
fh.write(json.dumps(data, indent=2, sort_keys=True))
os.replace(tmp_path, p)
except Exception:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
except OSError as exc:
logger.warning(
"Failed to persist shell hook allowlist to %s: %s. "
"The approval is in-memory for this run, but the next "
"startup will re-prompt (or skip registration on non-TTY "
"runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
p, exc,
)
def _is_allowlisted(event: str, command: str) -> bool:
data = load_allowlist()
return any(
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
for e in data.get("approvals", [])
)
@contextmanager
def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
"""Serialise read-modify-write on the allowlist across processes.
Holds an exclusive ``flock`` on a sibling lock file for the duration
of the update so concurrent ``_record_approval``/``revoke`` callers
cannot clobber each other's changes (the race Codex reproduced with
2050 simultaneous writers). Falls back to an in-process lock on
platforms without ``fcntl``.
"""
p = allowlist_path()
p.parent.mkdir(parents=True, exist_ok=True)
lock_path = p.with_suffix(p.suffix + ".lock")
if fcntl is None: # pragma: no cover — non-POSIX fallback
with _allowlist_write_lock:
data = load_allowlist()
yield data
save_allowlist(data)
return
with open(lock_path, "a+") as lock_fh:
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
try:
data = load_allowlist()
yield data
save_allowlist(data)
finally:
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
def _prompt_and_record(
event: str, command: str, *, accept_hooks: bool,
) -> bool:
"""Decide whether to approve an unseen ``(event, command)`` pair.
Returns ``True`` iff the approval was granted and recorded.
"""
if accept_hooks:
_record_approval(event, command)
logger.info(
"shell hook auto-approved via --accept-hooks / env / config: "
"%s -> %s", event, command,
)
return True
if not sys.stdin.isatty():
return False
print(
f"\n⚠ Hermes is about to register a shell hook that will run a\n"
f" command on your behalf.\n\n"
f" Event: {event}\n"
f" Command: {command}\n\n"
f" Commands run with your full user credentials. Only approve\n"
f" commands you trust."
)
try:
answer = input("Allow this hook to run? [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
print() # keep the terminal tidy after ^C
return False
if answer in ("y", "yes"):
_record_approval(event, command)
return True
return False
def _record_approval(event: str, command: str) -> None:
entry = {
"event": event,
"command": command,
"approved_at": _utc_now_iso(),
"script_mtime_at_approval": script_mtime_iso(command),
}
with _locked_update_approvals() as data:
data["approvals"] = [
e for e in data.get("approvals", [])
if not (
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
)
] + [entry]
def _utc_now_iso() -> str:
return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
def revoke(command: str) -> int:
"""Remove every allowlist entry matching ``command``.
Returns the number of entries removed. Does not unregister any
callbacks that are already live on the plugin manager in the current
process restart the CLI / gateway to drop them.
"""
with _locked_update_approvals() as data:
before = len(data.get("approvals", []))
data["approvals"] = [
e for e in data.get("approvals", [])
if not (isinstance(e, dict) and e.get("command") == command)
]
after = len(data["approvals"])
return before - after
_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
".sh", ".bash", ".zsh", ".fish",
".py", ".pyw",
".rb", ".pl", ".lua",
".js", ".mjs", ".cjs", ".ts",
)
def _command_script_path(command: str) -> str:
"""Return the script path from ``command`` for doctor / drift checks.
Prefers a token ending in a known script extension, then a token
containing ``/`` or leading ``~``, then the first token. Handles
``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
common bare-path form.
"""
try:
parts = shlex.split(command)
except ValueError:
return command
if not parts:
return command
for part in parts:
if part.lower().endswith(_SCRIPT_EXTENSIONS):
return part
for part in parts:
if "/" in part or part.startswith("~"):
return part
return parts[0]
# ---------------------------------------------------------------------------
# Helpers for accept-hooks resolution
# ---------------------------------------------------------------------------
def _resolve_effective_accept(
cfg: Dict[str, Any], accept_hooks_arg: bool,
) -> bool:
"""Combine all three opt-in channels into a single boolean.
Precedence (any truthy source flips us on):
1. ``--accept-hooks`` flag (CLI) / explicit argument
2. ``HERMES_ACCEPT_HOOKS`` env var
3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
"""
if accept_hooks_arg:
return True
env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
if env in ("1", "true", "yes", "on"):
return True
cfg_val = cfg.get("hooks_auto_accept", False)
return bool(cfg_val)
# ---------------------------------------------------------------------------
# Introspection (used by `hermes hooks` CLI)
# ---------------------------------------------------------------------------
def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
"""Return the allowlist record for this pair, if any."""
for e in load_allowlist().get("approvals", []):
if (
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
):
return e
return None
def script_mtime_iso(command: str) -> Optional[str]:
"""ISO-8601 mtime of the resolved script path, or ``None`` if the
script is missing."""
path = _command_script_path(command)
if not path:
return None
try:
expanded = os.path.expanduser(path)
return datetime.fromtimestamp(
os.path.getmtime(expanded), tz=timezone.utc,
).isoformat().replace("+00:00", "Z")
except OSError:
return None
def script_is_executable(command: str) -> bool:
"""Return ``True`` iff ``command`` is runnable as configured.
For a bare invocation (``/path/hook.sh``) the script itself must be
executable. For interpreter-prefixed commands (``python3
/path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
to be readable the interpreter doesn't care about the ``X_OK``
bit. Mirrors what ``_spawn`` would actually do at runtime."""
path = _command_script_path(command)
if not path:
return False
expanded = os.path.expanduser(path)
if not os.path.isfile(expanded):
return False
try:
argv = shlex.split(command)
except ValueError:
return False
is_bare_invocation = bool(argv) and argv[0] == path
required = os.X_OK if is_bare_invocation else os.R_OK
return os.access(expanded, required)
def run_once(
spec: ShellHookSpec, kwargs: Dict[str, Any],
) -> Dict[str, Any]:
"""Fire a single shell-hook invocation with a synthetic payload.
Used by ``hermes hooks test`` and ``hermes hooks doctor``.
``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
would pass at runtime. It is routed through :func:`_serialize_payload`
so the synthetic stdin exactly matches what a real hook firing would
produce otherwise scripts tested via ``hermes hooks test`` could
diverge silently from production behaviour.
Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
holding the canonical Hermes-wire-shape response."""
stdin_json = _serialize_payload(spec.event, kwargs)
result = _spawn(spec, stdin_json)
result["parsed"] = _parse_response(spec.event, result["stdout"])
return result
+3 -134
View File
@@ -8,7 +8,6 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
import json
import logging
import re
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
@@ -23,110 +22,6 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
# left as-is so the user can debug them.
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
# Matches inline shell snippets like: !`date +%Y-%m-%d`
# Non-greedy, single-line only — no newlines inside the backticks.
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
# Cap inline-shell output so a runaway command can't blow out the context.
_INLINE_SHELL_MAX_OUTPUT = 4000
def _load_skills_config() -> dict:
"""Load the ``skills`` section of config.yaml (best-effort)."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
skills_cfg = cfg.get("skills")
if isinstance(skills_cfg, dict):
return skills_cfg
except Exception:
logger.debug("Could not read skills config", exc_info=True)
return {}
def _substitute_template_vars(
content: str,
skill_dir: Path | None,
session_id: str | None,
) -> str:
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
Only substitutes tokens for which a concrete value is available
unresolved tokens are left in place so the author can spot them.
"""
if not content:
return content
skill_dir_str = str(skill_dir) if skill_dir else None
def _replace(match: re.Match) -> str:
token = match.group(1)
if token == "HERMES_SKILL_DIR" and skill_dir_str:
return skill_dir_str
if token == "HERMES_SESSION_ID" and session_id:
return str(session_id)
return match.group(0)
return _SKILL_TEMPLATE_RE.sub(_replace, content)
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
"""Execute a single inline-shell snippet and return its stdout (trimmed).
Failures return a short ``[inline-shell error: ...]`` marker instead of
raising, so one bad snippet can't wreck the whole skill message.
"""
try:
completed = subprocess.run(
["bash", "-c", command],
cwd=str(cwd) if cwd else None,
capture_output=True,
text=True,
timeout=max(1, int(timeout)),
check=False,
)
except subprocess.TimeoutExpired:
return f"[inline-shell timeout after {timeout}s: {command}]"
except FileNotFoundError:
return f"[inline-shell error: bash not found]"
except Exception as exc:
return f"[inline-shell error: {exc}]"
output = (completed.stdout or "").rstrip("\n")
if not output and completed.stderr:
output = completed.stderr.rstrip("\n")
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
return output
def _expand_inline_shell(
content: str,
skill_dir: Path | None,
timeout: int,
) -> str:
"""Replace every !`cmd` snippet in ``content`` with its stdout.
Runs each snippet with the skill directory as CWD so relative paths in
the snippet work the way the author expects.
"""
if "!`" not in content:
return content
def _replace(match: re.Match) -> str:
cmd = match.group(1).strip()
if not cmd:
return ""
return _run_inline_shell(cmd, skill_dir, timeout)
return _INLINE_SHELL_RE.sub(_replace, content)
def build_plan_path(
user_instruction: str = "",
@@ -238,36 +133,14 @@ def _build_skill_message(
activation_note: str,
user_instruction: str = "",
runtime_note: str = "",
session_id: str | None = None,
) -> str:
"""Format a loaded skill into a user/system message payload."""
from tools.skills_tool import SKILLS_DIR
content = str(loaded_skill.get("content") or "")
# ── Template substitution and inline-shell expansion ──
# Done before anything else so downstream blocks (setup notes,
# supporting-file hints) see the expanded content.
skills_cfg = _load_skills_config()
if skills_cfg.get("template_vars", True):
content = _substitute_template_vars(content, skill_dir, session_id)
if skills_cfg.get("inline_shell", False):
timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
content = _expand_inline_shell(content, skill_dir, timeout)
parts = [activation_note, "", content.strip()]
# ── Inject the absolute skill directory so the agent can reference
# bundled scripts without an extra skill_view() round-trip. ──
if skill_dir:
parts.append("")
parts.append(f"[Skill directory: {skill_dir}]")
parts.append(
"Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
"`templates/config.yaml`) against that directory, then run them "
"with the terminal tool using the absolute path."
)
# ── Inject resolved skill config values ──
_inject_skill_config(loaded_skill, parts)
@@ -315,13 +188,11 @@ def _build_skill_message(
# Skill is from an external dir — use the skill name instead
skill_view_target = skill_dir.name
parts.append("")
parts.append("[This skill has supporting files:]")
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
for sf in supporting:
parts.append(f"- {sf} -> {skill_dir / sf}")
parts.append(f"- {sf}")
parts.append(
f'\nLoad any of these with skill_view(name="{skill_view_target}", '
f'file_path="<path>"), or run scripts directly by absolute path '
f"(e.g. `node {skill_dir}/scripts/foo.js`)."
f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
)
if user_instruction:
@@ -461,7 +332,6 @@ def build_skill_invocation_message(
activation_note,
user_instruction=user_instruction,
runtime_note=runtime_note,
session_id=task_id,
)
@@ -500,7 +370,6 @@ def build_preloaded_skills_prompt(
loaded_skill,
skill_dir,
activation_note,
session_id=task_id,
)
)
loaded_names.append(skill_name)
-39
View File
@@ -1,39 +0,0 @@
"""Transport layer types and registry for provider response normalization.
Usage:
from agent.transports import get_transport
transport = get_transport("anthropic_messages")
result = transport.normalize_response(raw_response)
"""
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
_REGISTRY: dict = {}
def register_transport(api_mode: str, transport_cls: type) -> None:
"""Register a transport class for an api_mode string."""
_REGISTRY[api_mode] = transport_cls
def get_transport(api_mode: str):
"""Get a transport instance for the given api_mode.
Returns None if no transport is registered for this api_mode.
This allows gradual migration call sites can check for None
and fall back to the legacy code path.
"""
if not _REGISTRY:
_discover_transports()
cls = _REGISTRY.get(api_mode)
if cls is None:
return None
return cls()
def _discover_transports() -> None:
"""Import all transport modules to trigger auto-registration."""
try:
import agent.transports.anthropic # noqa: F401
except ImportError:
pass
-129
View File
@@ -1,129 +0,0 @@
"""Anthropic Messages API transport.
Delegates to the existing adapter functions in agent/anthropic_adapter.py.
This transport owns format conversion and normalization NOT client lifecycle.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse
class AnthropicTransport(ProviderTransport):
"""Transport for api_mode='anthropic_messages'.
Wraps the existing functions in anthropic_adapter.py behind the
ProviderTransport ABC. Each method delegates no logic is duplicated.
"""
@property
def api_mode(self) -> str:
return "anthropic_messages"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI messages to Anthropic (system, messages) tuple.
kwargs:
base_url: Optional[str] affects thinking signature handling.
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
base_url = kwargs.get("base_url")
return convert_messages_to_anthropic(messages, base_url=base_url)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Anthropic input_schema format."""
from agent.anthropic_adapter import convert_tools_to_anthropic
return convert_tools_to_anthropic(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Anthropic messages.create() kwargs.
Calls convert_messages and convert_tools internally.
params (all optional):
max_tokens: int
reasoning_config: dict | None
tool_choice: str | None
is_oauth: bool
preserve_dots: bool
context_length: int | None
base_url: str | None
fast_mode: bool
"""
from agent.anthropic_adapter import build_anthropic_kwargs
return build_anthropic_kwargs(
model=model,
messages=messages,
tools=tools,
max_tokens=params.get("max_tokens", 16384),
reasoning_config=params.get("reasoning_config"),
tool_choice=params.get("tool_choice"),
is_oauth=params.get("is_oauth", False),
preserve_dots=params.get("preserve_dots", False),
context_length=params.get("context_length"),
base_url=params.get("base_url"),
fast_mode=params.get("fast_mode", False),
)
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Anthropic response to NormalizedResponse.
kwargs:
strip_tool_prefix: bool strip 'mcp_mcp_' prefixes from tool names.
"""
from agent.anthropic_adapter import normalize_anthropic_response_v2
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
def validate_response(self, response: Any) -> bool:
"""Check Anthropic response structure is valid."""
if response is None:
return False
content_blocks = getattr(response, "content", None)
if not isinstance(content_blocks, list):
return False
if not content_blocks:
return False
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Extract Anthropic cache_read and cache_creation token counts."""
usage = getattr(response, "usage", None)
if usage is None:
return None
cached = getattr(usage, "cache_read_input_tokens", 0) or 0
written = getattr(usage, "cache_creation_input_tokens", 0) or 0
if cached or written:
return {"cached_tokens": cached, "creation_tokens": written}
return None
# Promote the adapter's canonical mapping to module level so it's shared
_STOP_REASON_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Anthropic stop_reason to OpenAI finish_reason."""
return self._STOP_REASON_MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("anthropic_messages", AnthropicTransport)
-89
View File
@@ -1,89 +0,0 @@
"""Abstract base for provider transports.
A transport owns the data path for one api_mode:
convert_messages convert_tools build_kwargs normalize_response
It does NOT own: client construction, streaming, credential refresh,
prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
"""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from agent.transports.types import NormalizedResponse
class ProviderTransport(ABC):
"""Base class for provider-specific format conversion and normalization."""
@property
@abstractmethod
def api_mode(self) -> str:
"""The api_mode string this transport handles (e.g. 'anthropic_messages')."""
...
@abstractmethod
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI-format messages to provider-native format.
Returns provider-specific structure (e.g. (system, messages) for Anthropic,
or the messages list unchanged for chat_completions).
"""
...
@abstractmethod
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI-format tool definitions to provider-native format.
Returns provider-specific tool list (e.g. Anthropic input_schema format).
"""
...
@abstractmethod
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build the complete API call kwargs dict.
This is the primary entry point it typically calls convert_messages()
and convert_tools() internally, then adds model-specific config.
Returns a dict ready to be passed to the provider's SDK client.
"""
...
@abstractmethod
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize a raw provider response to the shared NormalizedResponse type.
This is the only method that returns a transport-layer type.
"""
...
def validate_response(self, response: Any) -> bool:
"""Optional: check if the raw response is structurally valid.
Returns True if valid, False if the response should be treated as invalid.
Default implementation always returns True.
"""
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Optional: extract provider-specific cache hit/creation stats.
Returns dict with 'cached_tokens' and 'creation_tokens', or None.
Default returns None.
"""
return None
def map_finish_reason(self, raw_reason: str) -> str:
"""Optional: map provider-specific stop reason to OpenAI equivalent.
Default returns the raw reason unchanged. Override for providers
with different stop reason vocabularies.
"""
return raw_reason
-100
View File
@@ -1,100 +0,0 @@
"""Shared types for normalized provider responses.
These dataclasses define the canonical shape that all provider adapters
normalize responses to. The shared surface is intentionally minimal
only fields that every downstream consumer reads are top-level.
Protocol-specific state goes in ``provider_data`` dicts (response-level
and per-tool-call) so that protocol-aware code paths can access it
without polluting the shared type.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class ToolCall:
"""A normalized tool call from any provider.
``id`` is the protocol's canonical identifier — what gets used in
``tool_call_id`` / ``tool_use_id`` when constructing tool result
messages. May be ``None`` when the provider omits it; the agent
fills it via ``_deterministic_call_id()`` before storing in history.
``provider_data`` carries per-tool-call protocol metadata that only
protocol-aware code reads:
* Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
* Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
* Others: ``None``
"""
id: Optional[str]
name: str
arguments: str # JSON string
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
@dataclass
class Usage:
"""Token usage from an API response."""
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
cached_tokens: int = 0
@dataclass
class NormalizedResponse:
"""Normalized API response from any provider.
Shared fields are truly cross-provider every caller can rely on
them without branching on api_mode. Protocol-specific state goes in
``provider_data`` so that only protocol-aware code paths read it.
Response-level ``provider_data`` examples:
* Anthropic: ``{"reasoning_details": [...]}``
* Codex: ``{"codex_reasoning_items": [...]}``
* Others: ``None``
"""
content: Optional[str]
tool_calls: Optional[List[ToolCall]]
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
reasoning: Optional[str] = None
usage: Optional[Usage] = None
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
# ---------------------------------------------------------------------------
# Factory helpers
# ---------------------------------------------------------------------------
def build_tool_call(
id: Optional[str],
name: str,
arguments: Any,
**provider_fields: Any,
) -> ToolCall:
"""Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
Any extra keyword arguments are collected into ``provider_data``.
"""
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
pd = dict(provider_fields) if provider_fields else None
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
"""Translate a provider-specific stop reason to the normalised set.
Falls back to ``"stop"`` for unknown or ``None`` reasons.
"""
if reason is None:
return "stop"
return mapping.get(reason, "stop")
+1 -2
View File
@@ -6,7 +6,6 @@ from decimal import Decimal
from typing import Any, Dict, Literal, Optional
from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
from utils import base_url_host_matches
DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
@@ -394,7 +393,7 @@ def resolve_billing_route(
if provider_name == "openai-codex":
return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
if provider_name == "openrouter" or "openrouter.ai" in base:
return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
if provider_name == "anthropic":
return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
+4 -4
View File
@@ -1190,12 +1190,12 @@ def main(
"""
# Handle list distributions
if list_distributions:
from toolset_distributions import print_distribution_info
from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
print("📊 Available Toolset Distributions")
print("=" * 70)
all_dists = list_distributions()
all_dists = get_all_dists()
for dist_name in sorted(all_dists.keys()):
print_distribution_info(dist_name)
+2 -40
View File
@@ -770,12 +770,10 @@ code_execution:
# Subagent Delegation
# =============================================================================
# The delegate_task tool spawns child agents with isolated context.
# Supports single tasks and batch mode (default 3 parallel, configurable).
# Supports single tasks and batch mode (up to 3 parallel).
delegation:
max_iterations: 50 # Max tool-calling turns per child (default: 50)
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
# # Resolves full credentials (base_url, api_key) automatically.
@@ -919,39 +917,3 @@ display:
# # Names and usernames are NOT affected (user-chosen, publicly visible).
# # Routing/delivery still uses the original values internally.
# redact_pii: false
# =============================================================================
# Shell-script hooks
# =============================================================================
# Register shell scripts as plugin-hook callbacks. Each entry is executed as
# a subprocess (shell=False, shlex.split) with a JSON payload on stdin. On
# stdout the script may return JSON that either blocks the tool call or
# injects context into the next LLM call.
#
# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
# pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
# pre_api_request, post_api_request, on_session_start, on_session_end,
# on_session_finalize, on_session_reset, subagent_stop
#
# First-use consent: each (event, command) pair prompts once on a TTY, then
# is persisted to ~/.hermes/shell-hooks-allowlist.json. Non-interactive
# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
# hooks_auto_accept key below.
#
# See website/docs/user-guide/features/hooks.md for the full JSON wire
# protocol and worked examples.
#
# hooks:
# pre_tool_call:
# - matcher: "terminal"
# command: "~/.hermes/agent-hooks/block-rm-rf.sh"
# timeout: 10
# post_tool_call:
# - matcher: "write_file|patch"
# command: "~/.hermes/agent-hooks/auto-format.sh"
# pre_llm_call:
# - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
# subagent_stop:
# - command: "~/.hermes/agent-hooks/log-orchestration.sh"
#
# hooks_auto_accept: false
+98 -306
View File
@@ -19,14 +19,12 @@ import shutil
import sys
import json
import re
import concurrent.futures
import base64
import atexit
import tempfile
import time
import uuid
import textwrap
from urllib.parse import unquote, urlparse
from contextlib import contextmanager
from pathlib import Path
from datetime import datetime
@@ -67,7 +65,6 @@ from agent.usage_pricing import (
format_duration_compact,
format_token_count_compact,
)
from agent.account_usage import fetch_account_usage, render_account_usage_lines
from hermes_cli.banner import _format_context_length, format_banner_version_label
_COMMAND_SPINNER_FRAMES = ("", "", "", "", "", "", "", "", "", "")
@@ -77,7 +74,6 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
# User-managed env files should override stale shell exports on restart.
from hermes_constants import get_hermes_home, display_hermes_home
from hermes_cli.env_loader import load_hermes_dotenv
from utils import base_url_host_matches
_hermes_home = get_hermes_home()
_project_env = Path(__file__).parent / '.env'
@@ -371,6 +367,7 @@ def load_cli_config() -> Dict[str, Any]:
},
"delegation": {
"max_iterations": 45, # Max tool-calling turns per child agent
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
"model": "", # Subagent model override (empty = inherit parent model)
"provider": "", # Subagent provider override (empty = inherit parent provider)
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
@@ -531,6 +528,7 @@ def load_cli_config() -> Dict[str, Any]:
if _file_has_terminal_config or env_var not in os.environ:
val = terminal_config[config_key]
if isinstance(val, list):
import json
os.environ[env_var] = json.dumps(val)
else:
os.environ[env_var] = str(val)
@@ -1145,6 +1143,8 @@ def _rich_text_from_ansi(text: str) -> _RichText:
def _strip_markdown_syntax(text: str) -> str:
"""Best-effort markdown marker removal for plain-text display."""
import re
plain = _rich_text_from_ansi(text or "").plain
plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE)
plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE)
@@ -1154,11 +1154,11 @@ def _strip_markdown_syntax(text: str) -> str:
plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain)
plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain)
plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain)
plain = re.sub(r"(?<!\w)___([^_]+)___(?!\w)", r"\1", plain)
plain = re.sub(r"___([^_]+)___", r"\1", plain)
plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain)
plain = re.sub(r"(?<!\w)__([^_]+)__(?!\w)", r"\1", plain)
plain = re.sub(r"__([^_]+)__", r"\1", plain)
plain = re.sub(r"\*([^*]+)\*", r"\1", plain)
plain = re.sub(r"(?<!\w)_([^_]+)_(?!\w)", r"\1", plain)
plain = re.sub(r"_([^_]+)_", r"\1", plain)
plain = re.sub(r"~~([^~]+)~~", r"\1", plain)
plain = re.sub(r"\n{3,}", "\n\n", plain)
return plain.strip("\n")
@@ -1271,21 +1271,10 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")):
token = token[1:-1].strip()
token = token.replace('\\ ', ' ')
if not token:
return None
expanded = token
if token.startswith("file://"):
try:
parsed = urlparse(token)
if parsed.scheme == "file":
expanded = unquote(parsed.path or "")
if parsed.netloc and os.name == "nt":
expanded = f"//{parsed.netloc}{expanded}"
except Exception:
expanded = token
expanded = os.path.expandvars(os.path.expanduser(expanded))
expanded = os.path.expandvars(os.path.expanduser(token))
if os.name != "nt":
normalized = expanded.replace("\\", "/")
if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
@@ -1372,7 +1361,6 @@ def _detect_file_drop(user_input: str) -> "dict | None":
or stripped.startswith("~")
or stripped.startswith("./")
or stripped.startswith("../")
or stripped.startswith("file://")
or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
or stripped.startswith('"/')
or stripped.startswith('"~')
@@ -1383,25 +1371,8 @@ def _detect_file_drop(user_input: str) -> "dict | None":
if not starts_like_path:
return None
direct_path = _resolve_attachment_path(stripped)
if direct_path is not None:
return {
"path": direct_path,
"is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS,
"remainder": "",
}
first_token, remainder = _split_path_input(stripped)
drop_path = _resolve_attachment_path(first_token)
if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}:
space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "]
for pos in reversed(space_positions):
candidate = stripped[:pos].rstrip()
resolved = _resolve_attachment_path(candidate)
if resolved is not None:
drop_path = resolved
remainder = stripped[pos + 1 :].strip()
break
if drop_path is None:
return None
@@ -1865,7 +1836,7 @@ class HermesCLI:
# Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY,
# custom endpoint → prefer OPENAI_API_KEY (issue #560).
# Note: _ensure_runtime_credentials() re-resolves this before first use.
if self.base_url and base_url_host_matches(self.base_url, "openrouter.ai"):
if self.base_url and "openrouter.ai" in self.base_url:
self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY")
else:
self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
@@ -2030,7 +2001,8 @@ class HermesCLI:
def _invalidate(self, min_interval: float = 0.25) -> None:
"""Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
now = time.monotonic()
import time as _time
now = _time.monotonic()
if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval:
self._last_invalidate = now
self._app.invalidate()
@@ -2248,7 +2220,8 @@ class HermesCLI:
return ""
t0 = getattr(self, "_tool_start_time", 0) or 0
if t0 > 0:
elapsed = time.monotonic() - t0
import time as _time
elapsed = _time.monotonic() - t0
if elapsed >= 60:
_m, _s = int(elapsed // 60), int(elapsed % 60)
elapsed_str = f"{_m}m {_s}s"
@@ -2503,6 +2476,9 @@ class HermesCLI:
def _emit_reasoning_preview(self, reasoning_text: str) -> None:
"""Render a buffered reasoning preview as a single [thinking] block."""
import re
import textwrap
preview_text = reasoning_text.strip()
if not preview_text:
return
@@ -2621,7 +2597,9 @@ class HermesCLI:
"""Expand [Pasted text #N -> file] placeholders into file contents."""
if not isinstance(text, str) or "[Pasted text #" not in text:
return text or ""
paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
import re as _re
paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
def _expand_ref(match):
path = Path(match.group(1))
@@ -2944,7 +2922,9 @@ class HermesCLI:
def _command_spinner_frame(self) -> str:
"""Return the current spinner frame for slow slash commands."""
frame_idx = int(time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
import time as _time
frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
return _COMMAND_SPINNER_FRAMES[frame_idx]
@contextmanager
@@ -3955,6 +3935,7 @@ class HermesCLI:
image later with ``vision_analyze`` if needed.
"""
import asyncio as _asyncio
import json as _json
from tools.vision_tools import vision_analyze_tool
analysis_prompt = (
@@ -3974,7 +3955,7 @@ class HermesCLI:
result_json = _asyncio.run(
vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt)
)
result = json.loads(result_json)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "")
enriched_parts.append(
@@ -4229,37 +4210,8 @@ class HermesCLI:
"""
import shlex
from argparse import Namespace
from contextlib import redirect_stdout
from io import StringIO
from hermes_cli.tools_config import tools_disable_enable_command
def _run_capture(ns: Namespace) -> None:
"""Run tools_disable_enable_command, routing its ANSI-colored
print() output through _cprint when inside the interactive TUI
so escapes aren't mangled by patch_stdout's StdoutProxy into
garbled '?[32m...?[0m' text.
Outside the TUI (standalone mode, tests), call straight through
so real stdout / pytest capture works as expected.
"""
# Standalone/tests, run as usual
if getattr(self, "_app", None) is None:
tools_disable_enable_command(ns)
return
# Buffer reports isatty()=True so color() in hermes_cli/colors.py
# still emits ANSI escapes. StringIO.isatty() is False, which
# would otherwise strip all colors before we re-render them.
class _TTYBuf(StringIO):
def isatty(self) -> bool:
return True
buf = _TTYBuf()
with redirect_stdout(buf):
tools_disable_enable_command(ns)
for line in buf.getvalue().splitlines():
_cprint(line)
try:
parts = shlex.split(cmd)
except ValueError:
@@ -4271,7 +4223,8 @@ class HermesCLI:
return
if subcommand == "list":
_run_capture(Namespace(tools_action="list", platform="cli"))
tools_disable_enable_command(
Namespace(tools_action="list", platform="cli"))
return
names = parts[2:]
@@ -4288,7 +4241,8 @@ class HermesCLI:
label = ", ".join(names)
_cprint(f"{_ACCENT}{verb} {label}...{_RST}")
_run_capture(Namespace(tools_action=subcommand, names=names, platform="cli"))
tools_disable_enable_command(
Namespace(tools_action=subcommand, names=names, platform="cli"))
# Reset session so the new tool config is picked up from a clean state
from hermes_cli.tools_config import _get_platform_tools
@@ -5015,7 +4969,7 @@ class HermesCLI:
pass
cache_enabled = (
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
or result.api_mode == "anthropic_messages"
)
if cache_enabled:
@@ -5243,7 +5197,7 @@ class HermesCLI:
# Cache notice
cache_enabled = (
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
or result.api_mode == "anthropic_messages"
)
if cache_enabled:
@@ -5274,30 +5228,6 @@ class HermesCLI:
except Exception:
return False
def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool:
"""Return True when /steer should be dispatched immediately while the agent is running.
/steer MUST bypass the normal _pending_input process_loop path when
the agent is active, because process_loop is blocked inside
self.chat() for the duration of the run. By the time the queued
command is pulled from _pending_input, _agent_running has already
flipped back to False, and process_command() takes the idle
fallback delivering the steer as a next-turn message instead of
injecting it mid-run. Dispatching inline on the UI thread calls
agent.steer() directly, which is thread-safe (uses _pending_steer_lock).
"""
if not text or has_images or not _looks_like_slash_command(text):
return False
if not getattr(self, "_agent_running", False):
return False
try:
from hermes_cli.commands import resolve_command
base = text.split(None, 1)[0].lower().lstrip('/')
cmd = resolve_command(base)
return bool(cmd and cmd.name == "steer")
except Exception:
return False
def _show_model_and_providers(self):
"""Show current model + provider and list all authenticated providers.
@@ -6300,7 +6230,8 @@ class HermesCLI:
# with the output (fixes #2718).
if self._app:
self._app.invalidate()
time.sleep(0.05) # brief pause for refresh
import time as _tmod
_tmod.sleep(0.05) # brief pause for refresh
print()
ChatConsole().print(f"[{_accent_hex()}]{'' * 40}[/]")
_cprint(f" ✅ Background task #{task_num} complete")
@@ -6340,7 +6271,8 @@ class HermesCLI:
# Same TUI refresh pattern as success path (#2718)
if self._app:
self._app.invalidate()
time.sleep(0.05)
import time as _tmod
_tmod.sleep(0.05)
print()
_cprint(f" ❌ Background task #{task_num} failed: {e}")
finally:
@@ -6560,6 +6492,7 @@ class HermesCLI:
_launched = self._try_launch_chrome_debug(_port, _plat.system())
if _launched:
# Wait for the port to come up
import time as _time
for _wait in range(10):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -6569,7 +6502,7 @@ class HermesCLI:
_already_open = True
break
except (OSError, socket.timeout):
time.sleep(0.5)
_time.sleep(0.5)
if _already_open:
print(f" ✓ Chrome launched and listening on port {_port}")
else:
@@ -7049,27 +6982,6 @@ class HermesCLI:
if cost_result.status == "unknown":
print(f" Note: Pricing unknown for {agent.model}")
# Account limits -- fetched off-thread with a hard timeout so slow
# provider APIs don't hang the prompt.
provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
account_snapshot = None
if provider:
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
try:
account_snapshot = _pool.submit(
fetch_account_usage, provider,
base_url=base_url, api_key=api_key,
).result(timeout=10.0)
except (concurrent.futures.TimeoutError, Exception):
account_snapshot = None
account_lines = [f" {line}" for line in render_account_usage_lines(account_snapshot)]
if account_lines:
print()
for line in account_lines:
print(line)
if self.verbose:
logging.getLogger().setLevel(logging.DEBUG)
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@@ -7120,6 +7032,7 @@ class HermesCLI:
known state. When a change is detected, triggers _reload_mcp() and
informs the user so they know the tool list has been refreshed.
"""
import time
import yaml as _yaml
CONFIG_WATCH_INTERVAL = 5.0 # seconds between config.yaml stat() calls
@@ -7211,6 +7124,7 @@ class HermesCLI:
# Refresh the agent's tool list so the model can call new tools
if self.agent is not None:
from model_tools import get_tool_definitions
self.agent.tools = get_tool_definitions(
enabled_toolsets=self.agent.enabled_toolsets
if hasattr(self.agent, "enabled_toolsets") else None,
@@ -7293,6 +7207,7 @@ class HermesCLI:
full history of tool calls (not just the current one in the spinner).
"""
if event_type == "tool.completed":
import time as _time
self._tool_start_time = 0.0
# Print stacked scrollback line for "all" / "new" modes
if function_name and self.tool_progress_mode in ("all", "new"):
@@ -7321,6 +7236,7 @@ class HermesCLI:
if event_type != "tool.started":
return
if function_name and not function_name.startswith("_"):
import time as _time
from agent.display import get_tool_emoji
emoji = get_tool_emoji(function_name)
label = preview or function_name
@@ -7329,7 +7245,7 @@ class HermesCLI:
if _pl > 0 and len(label) > _pl:
label = label[:_pl - 3] + "..."
self._spinner_text = f"{emoji} {label}"
self._tool_start_time = time.monotonic()
self._tool_start_time = _time.monotonic()
# Store args for stacked scrollback line on completion
self._pending_tool_info.setdefault(function_name, []).append(
function_args if function_args is not None else {}
@@ -7446,12 +7362,11 @@ class HermesCLI:
self._voice_stop_and_transcribe()
# Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict)
if self._voice_beeps_enabled():
try:
from tools.voice_mode import play_beep
play_beep(frequency=880, count=1)
except Exception:
pass
try:
from tools.voice_mode import play_beep
play_beep(frequency=880, count=1)
except Exception:
pass
try:
self._voice_recorder.start(on_silence_stop=_on_silence)
@@ -7499,12 +7414,11 @@ class HermesCLI:
wav_path = self._voice_recorder.stop()
# Audio cue: double beep after stream stopped (no CoreAudio conflict)
if self._voice_beeps_enabled():
try:
from tools.voice_mode import play_beep
play_beep(frequency=660, count=2)
except Exception:
pass
try:
from tools.voice_mode import play_beep
play_beep(frequency=660, count=2)
except Exception:
pass
if wav_path is None:
_cprint(f"{_DIM}No speech detected.{_RST}")
@@ -7587,6 +7501,7 @@ class HermesCLI:
try:
from tools.tts_tool import text_to_speech_tool
from tools.voice_mode import play_audio_file
import re
# Strip markdown and non-speech content for cleaner TTS
tts_text = text[:4000] if len(text) > 4000 else text
@@ -7654,17 +7569,6 @@ class HermesCLI:
_cprint(f"Unknown voice subcommand: {subcommand}")
_cprint("Usage: /voice [on|off|tts|status]")
def _voice_beeps_enabled(self) -> bool:
"""Return whether CLI voice mode should play record start/stop beeps."""
try:
from hermes_cli.config import load_config
voice_cfg = load_config().get("voice", {})
if isinstance(voice_cfg, dict):
return bool(voice_cfg.get("beep_enabled", True))
except Exception:
pass
return True
def _enable_voice_mode(self):
"""Enable voice mode after checking requirements."""
if self._voice_mode:
@@ -7974,9 +7878,7 @@ class HermesCLI:
return
selected = state.get("selected", 0)
choices = state.get("choices")
if not isinstance(choices, list):
choices = []
choices = state.get("choices") or []
if not (0 <= selected < len(choices)):
return
@@ -8068,18 +7970,8 @@ class HermesCLI:
choice_wrapped: list[tuple[int, str]] = []
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' ' # No number for items beyond 10th
if i == selected:
prefix = f' {num_prefix}. '
else:
prefix = f' {num_prefix}. '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Budget vertical space so HSplit never clips the command or choices.
@@ -8370,17 +8262,6 @@ class HermesCLI:
def run_agent():
nonlocal result
# Set callbacks inside the agent thread so thread-local storage
# in terminal_tool is populated for this thread. The main thread
# registration (run() line ~9046) is invisible here because
# _callback_tls is threading.local(). Matches the pattern used
# by acp_adapter/server.py for ACP sessions.
set_sudo_password_callback(self._sudo_password_callback)
set_approval_callback(self._approval_callback)
try:
set_secret_capture_callback(self._secret_capture_callback)
except Exception:
pass
agent_message = _voice_prefix + message if _voice_prefix else message
# Prepend pending model switch note so the model knows about the switch
_msn = getattr(self, '_pending_model_switch_note', None)
@@ -8406,15 +8287,6 @@ class HermesCLI:
"failed": True,
"error": _summary,
}
finally:
# Clear thread-local callbacks so a reused thread doesn't
# hold stale references to a disposed CLI instance.
try:
set_sudo_password_callback(None)
set_approval_callback(None)
set_secret_capture_callback(None)
except Exception:
pass
# Start agent in background thread (daemon so it cannot keep the
# process alive when the user closes the terminal tab — SIGHUP
@@ -8452,7 +8324,8 @@ class HermesCLI:
try:
_dbg = _hermes_home / "interrupt_debug.log"
with open(_dbg, "a") as _f:
_f.write(f"{time.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
import time as _t
_f.write(f"{_t.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
f"children={len(self.agent._active_children)}, "
f"parent._interrupt={self.agent._interrupt_requested}\n")
for _ci, _ch in enumerate(self.agent._active_children):
@@ -8528,8 +8401,9 @@ class HermesCLI:
# buffer so tool/status lines render ABOVE our response box.
# The flush pushes data into the renderer queue; the short
# sleep lets the renderer actually paint it before we draw.
import time as _time
sys.stdout.flush()
time.sleep(0.15)
_time.sleep(0.15)
# Update history with full conversation
self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history
@@ -9166,17 +9040,6 @@ class HermesCLI:
event.app.current_buffer.reset(append_to_history=True)
return
# Handle /steer while the agent is running immediately on the
# UI thread. Queuing through _pending_input would deadlock the
# steer until after the agent loop finishes (process_loop is
# blocked inside self.chat()), which turns /steer into a
# post-run next-turn message — defeating mid-run injection.
# agent.steer() is thread-safe (holds _pending_steer_lock).
if self._should_handle_steer_command_inline(text, has_images=has_images):
self.process_command(text)
event.app.current_buffer.reset(append_to_history=True)
return
# Snapshot and clear attached images
images = list(self._attached_images)
self._attached_images.clear()
@@ -9195,7 +9058,8 @@ class HermesCLI:
try:
_dbg = _hermes_home / "interrupt_debug.log"
with open(_dbg, "a") as _f:
_f.write(f"{time.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
import time as _t
_f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
f"agent_running={self._agent_running}\n")
except Exception:
pass
@@ -9274,29 +9138,6 @@ class HermesCLI:
self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
event.app.invalidate()
# Number keys for quick clarify selection (1-9, 0 for 10th item)
def _make_clarify_number_handler(idx):
def handler(event):
if self._clarify_state and not self._clarify_freetext:
choices = self._clarify_state.get("choices") or []
# Map index to choice (treating "Other" as the last option)
if idx < len(choices):
# Select a numbered choice
self._clarify_state["response_queue"].put(choices[idx])
self._clarify_state = None
self._clarify_freetext = False
event.app.invalidate()
elif idx == len(choices):
# Select "Other" option
self._clarify_freetext = True
event.app.invalidate()
return handler
for _num in range(10):
# 1-9 select items 0-8, 0 selects item 9 (10thitem)
_idx = 9 if _num == 0 else _num - 1
kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx))
# --- Dangerous command approval: arrow-key navigation ---
@kb.add('up', filter=Condition(lambda: bool(self._approval_state)))
@@ -9338,20 +9179,6 @@ class HermesCLI:
event.app.current_buffer.reset()
event.app.invalidate()
# Number keys for quick approval selection (1-9, 0 for 10th item)
def _make_approval_number_handler(idx):
def handler(event):
if self._approval_state and idx < len(self._approval_state["choices"]):
self._approval_state["selected"] = idx
self._handle_approval_selection()
event.app.invalidate()
return handler
for _num in range(10):
# 1-9 select items 0-8, 0 selects item 9 (10th item)
_idx = 9 if _num == 0 else _num - 1
kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx))
# --- History navigation: up/down browse history in normal input mode ---
# The TextArea is multiline, so by default up/down only move the cursor.
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@@ -9380,7 +9207,8 @@ class HermesCLI:
2. Interrupt the running agent (first press)
3. Force exit (second press within 2s, or when idle)
"""
now = time.time()
import time as _time
now = _time.time()
# Cancel active voice recording.
# Run cancel() in a background thread to prevent blocking the
@@ -9488,11 +9316,12 @@ class HermesCLI:
@kb.add('c-z')
def handle_ctrl_z(event):
"""Handle Ctrl+Z - suspend process to background (Unix only)."""
import sys
if sys.platform == 'win32':
_cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}")
event.app.invalidate()
return
import signal as _sig
import os, signal as _sig
from prompt_toolkit.application import run_in_terminal
from hermes_cli.skin_engine import get_active_skin
agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent")
@@ -9806,29 +9635,31 @@ class HermesCLI:
# extra instructions (sudo countdown, approval navigation, clarify).
# The agent-running interrupt hint is now an inline placeholder above.
def get_hint_text():
import time as _time
if cli_ref._sudo_state:
remaining = max(0, int(cli_ref._sudo_deadline - time.monotonic()))
remaining = max(0, int(cli_ref._sudo_deadline - _time.monotonic()))
return [
('class:hint', ' password hidden · Enter to skip'),
('class:clarify-countdown', f' ({remaining}s)'),
]
if cli_ref._secret_state:
remaining = max(0, int(cli_ref._secret_deadline - time.monotonic()))
remaining = max(0, int(cli_ref._secret_deadline - _time.monotonic()))
return [
('class:hint', ' secret hidden · Enter to skip'),
('class:clarify-countdown', f' ({remaining}s)'),
]
if cli_ref._approval_state:
remaining = max(0, int(cli_ref._approval_deadline - time.monotonic()))
remaining = max(0, int(cli_ref._approval_deadline - _time.monotonic()))
return [
('class:hint', ' ↑/↓ to select, Enter to confirm'),
('class:clarify-countdown', f' ({remaining}s)'),
]
if cli_ref._clarify_state:
remaining = max(0, int(cli_ref._clarify_deadline - time.monotonic()))
remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
countdown = f' ({remaining}s)' if cli_ref._clarify_deadline else ''
if cli_ref._clarify_freetext:
return [
@@ -9920,32 +9751,14 @@ class HermesCLI:
selected = state.get("selected", 0)
preview_lines = _wrap_panel_text(question, 60)
for i, choice in enumerate(choices):
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' '
if i == selected and not cli_ref._clarify_freetext:
prefix = f" {num_prefix}. "
else:
prefix = f" {num_prefix}. "
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
# "Other" option in preview
other_num = len(choices) + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_num_prefix = ' '
prefix = " " if i == selected and not cli_ref._clarify_freetext else " "
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
other_label = (
f" {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext
else f" {other_num_prefix}. Other (type your answer)" if selected == len(choices)
else f" {other_num_prefix}. Other (type your answer)"
" Other (type below)" if cli_ref._clarify_freetext
else " Other (type your answer)" if selected == len(choices)
else " Other (type your answer)"
)
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
box_width = _panel_box_width("Hermes needs your input", preview_lines)
inner_text_width = max(8, box_width - 2)
@@ -9953,35 +9766,18 @@ class HermesCLI:
choice_wrapped: list[tuple[int, str]] = []
if choices:
for i, choice in enumerate(choices):
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' '
if i == selected and not cli_ref._clarify_freetext:
prefix = f' {num_prefix}. '
else:
prefix = f' {num_prefix}. '
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
prefix = ' ' if i == selected and not cli_ref._clarify_freetext else ' '
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Trailing Other row(s)
other_idx = len(choices)
other_num = other_idx + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_num_prefix = ' '
if selected == other_idx and not cli_ref._clarify_freetext:
other_label_mand = f' {other_num_prefix}. Other (type your answer)'
other_label_mand = ' Other (type your answer)'
elif cli_ref._clarify_freetext:
other_label_mand = f' {other_num_prefix}. Other (type below)'
other_label_mand = ' Other (type below)'
else:
other_label_mand = f' {other_num_prefix}. Other (type your answer)'
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
other_label_mand = ' Other (type your answer)'
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
elif cli_ref._clarify_freetext:
# Freetext-only mode: the guidance line takes the place of choices.
other_wrapped = _wrap_panel_text(
@@ -10046,15 +9842,6 @@ class HermesCLI:
# "Other" option (trailing row(s), only shown when choices exist)
other_idx = len(choices)
# Calculate number prefix for "Other" option
other_num = other_idx + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_num_prefix = ' '
if selected == other_idx and not cli_ref._clarify_freetext:
other_style = 'class:clarify-selected'
elif cli_ref._clarify_freetext:
@@ -10162,8 +9949,7 @@ class HermesCLI:
if stage == "provider":
title = "⚙ Model Picker — Select Provider"
choices = []
_providers = state.get("providers")
for p in _providers if isinstance(_providers, list) else []:
for p in state.get("providers") or []:
count = p.get("total_models", len(p.get("models", [])))
label = f"{p['name']} ({count} model{'s' if count != 1 else ''})"
if p.get("is_current"):
@@ -10420,20 +10206,22 @@ class HermesCLI:
app._on_resize = _resize_clear_ghosts
def spinner_loop():
import time as _time
last_idle_refresh = 0.0
while not self._should_exit:
if not self._app:
time.sleep(0.1)
_time.sleep(0.1)
continue
if self._command_running:
self._invalidate(min_interval=0.1)
time.sleep(0.1)
_time.sleep(0.1)
else:
now = time.monotonic()
now = _time.monotonic()
if now - last_idle_refresh >= 1.0:
last_idle_refresh = now
self._invalidate(min_interval=1.0)
time.sleep(0.2)
_time.sleep(0.2)
spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
spinner_thread.start()
@@ -10502,7 +10290,8 @@ class HermesCLI:
continue
# Expand paste references back to full content
_paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
import re as _re
_paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else []
if paste_refs:
user_input = self._expand_paste_references(user_input)
@@ -10594,12 +10383,13 @@ class HermesCLI:
try:
if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
self.agent.interrupt(f"received signal {signum}")
import time as _t
try:
_grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
except (TypeError, ValueError):
_grace = 1.5
if _grace > 0:
time.sleep(_grace)
_t.sleep(_grace)
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
@@ -10632,7 +10422,8 @@ class HermesCLI:
# uv-managed Python, fd 0 can be invalid or unregisterable with the
# asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
try:
os.fstat(0)
import os as _os
_os.fstat(0)
except OSError:
print(
"Error: stdin (fd 0) is not available.\n"
@@ -10925,12 +10716,13 @@ def main(
_agent = getattr(cli, "agent", None)
if _agent is not None:
_agent.interrupt(f"received signal {signum}")
import time as _t
try:
_grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
except (TypeError, ValueError):
_grace = 1.5
if _grace > 0:
time.sleep(_grace)
_t.sleep(_grace)
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
+46 -54
View File
@@ -9,7 +9,6 @@ import copy
import json
import logging
import tempfile
import threading
import os
import re
import uuid
@@ -35,11 +34,6 @@ except ImportError:
HERMES_DIR = get_hermes_home().resolve()
CRON_DIR = HERMES_DIR / "cron"
JOBS_FILE = CRON_DIR / "jobs.json"
# In-process lock protecting load_jobs→modify→save_jobs cycles.
# Required when tick() runs jobs in parallel threads — without this,
# concurrent mark_job_run / advance_next_run calls can clobber each other.
_jobs_file_lock = threading.Lock()
OUTPUT_DIR = CRON_DIR / "output"
ONESHOT_GRACE_SECONDS = 120
@@ -600,44 +594,43 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
``delivery_error`` is tracked separately from the agent error a job
can succeed (agent produced output) but fail delivery (platform down).
"""
with _jobs_file_lock:
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
save_jobs(jobs)
return
save_jobs(jobs)
return
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
def advance_next_run(job_id: str) -> bool:
@@ -652,21 +645,20 @@ def advance_next_run(job_id: str) -> bool:
Returns True if next_run_at was advanced, False otherwise.
"""
with _jobs_file_lock:
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in ("cron", "interval"):
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
if new_next and new_next != job.get("next_run_at"):
job["next_run_at"] = new_next
save_jobs(jobs)
return True
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in ("cron", "interval"):
return False
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
if new_next and new_next != job.get("next_run_at"):
job["next_run_at"] = new_next
save_jobs(jobs)
return True
return False
return False
def get_due_jobs() -> List[Dict[str, Any]]:
+33 -70
View File
@@ -252,11 +252,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
future = asyncio.run_coroutine_threadsafe(coro, loop)
try:
result = future.result(timeout=30)
except TimeoutError:
future.cancel()
raise
result = future.result(timeout=30)
if result and not getattr(result, "success", True):
logger.warning(
"Job '%s': media send failed for %s: %s",
@@ -386,11 +382,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
try:
send_result = future.result(timeout=60)
except TimeoutError:
future.cancel()
raise
send_result = future.result(timeout=60)
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
@@ -430,6 +422,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
@@ -754,17 +747,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
# scheduler process — every job this process runs is a cron job.
os.environ["HERMES_CRON_SESSION"] = "1"
# Use ContextVars for per-job session/delivery state so parallel jobs
# don't clobber each other's targets (os.environ is process-global).
from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
_ctx_tokens = set_session_vars(
platform=origin["platform"] if origin else "",
chat_id=str(origin["chat_id"]) if origin else "",
chat_name=origin.get("chat_name", "") if origin else "",
)
try:
# Inject origin context so the agent's send_message tool knows the chat.
# Must be INSIDE the try block so the finally cleanup always runs.
if origin:
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
if origin.get("chat_name"):
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
from dotenv import load_dotenv
@@ -775,10 +765,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
delivery_target = _resolve_delivery_target(job)
if delivery_target:
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
if delivery_target.get("thread_id") is not None:
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
@@ -817,13 +807,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
prefill_messages = None
prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
if prefill_file:
import json as _json
pfpath = Path(prefill_file).expanduser()
if not pfpath.is_absolute():
pfpath = _hermes_home / pfpath
if pfpath.exists():
try:
with open(pfpath, "r", encoding="utf-8") as _pf:
prefill_messages = json.load(_pf)
prefill_messages = _json.load(_pf)
if not isinstance(prefill_messages, list):
prefill_messages = None
except Exception as e:
@@ -1021,8 +1012,16 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
return False, output, "", error_msg
finally:
# Clean up ContextVar session/delivery state for this job.
clear_session_vars(_ctx_tokens)
# Clean up injected env vars so they don't leak to other jobs
for key in (
"HERMES_SESSION_PLATFORM",
"HERMES_SESSION_CHAT_ID",
"HERMES_SESSION_CHAT_NAME",
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
):
os.environ.pop(key, None)
if _session_db:
try:
_session_db.end_session(_cron_session_id, "cron_complete")
@@ -1075,41 +1074,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
if verbose:
logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
# Advance next_run_at for all recurring jobs FIRST, under the file lock,
# before any execution begins. This preserves at-most-once semantics.
executed = 0
for job in due_jobs:
advance_next_run(job["id"])
# Resolve max parallel workers: env var > config.yaml > unbounded.
# Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
_max_workers: Optional[int] = None
try:
_env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
if _env_par:
_max_workers = int(_env_par) or None
except (ValueError, TypeError):
logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
if _max_workers is None:
try:
_ucfg = load_config() or {}
_cfg_par = (
_ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
).get("max_parallel_jobs")
if _cfg_par is not None:
_max_workers = int(_cfg_par) or None
except Exception:
pass
# For recurring jobs (cron/interval), advance next_run_at to the
# next future occurrence BEFORE execution. This way, if the
# process crashes mid-run, the job won't re-fire on restart.
# One-shot jobs are left alone so they can retry on restart.
advance_next_run(job["id"])
if verbose:
logger.info(
"Running %d job(s) in parallel (max_workers=%s)",
len(due_jobs),
_max_workers if _max_workers else "unbounded",
)
def _process_job(job: dict) -> bool:
"""Run one due job end-to-end: execute, save, deliver, mark."""
try:
success, output, final_response, error = run_job(job)
output_file = save_job_output(job["id"], output)
@@ -1141,23 +1114,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
return True
executed += 1
except Exception as e:
logger.error("Error processing job %s: %s", job['id'], e)
mark_job_run(job["id"], False, str(e))
return False
# Run all due jobs concurrently, each in its own ContextVar copy
# so session/delivery state stays isolated per-thread.
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
_futures = []
for job in due_jobs:
_ctx = contextvars.copy_context()
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
_results = [f.result() for f in _futures]
return sum(_results)
return executed
finally:
if fcntl:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
+1
View File
@@ -53,6 +53,7 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
try:
loop = asyncio.get_running_loop()
# We're in an async context -- need to run in thread
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(
handle_function_call, tool_name, arguments, task_id
+3 -23
View File
@@ -576,14 +576,6 @@ def load_gateway_config() -> GatewayConfig:
bridged["free_response_channels"] = platform_cfg["free_response_channels"]
if "mention_patterns" in platform_cfg:
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
if "dm_policy" in platform_cfg:
bridged["dm_policy"] = platform_cfg["dm_policy"]
if "allow_from" in platform_cfg:
bridged["allow_from"] = platform_cfg["allow_from"]
if "group_policy" in platform_cfg:
bridged["group_policy"] = platform_cfg["group_policy"]
if "group_allow_from" in platform_cfg:
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
@@ -670,7 +662,8 @@ def load_gateway_config() -> GatewayConfig:
if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
import json as _json
os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
frc = telegram_cfg.get("free_response_chats")
if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
if isinstance(frc, list):
@@ -707,20 +700,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
af = whatsapp_cfg.get("allow_from")
if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
if isinstance(af, list):
af = ",".join(str(v) for v in af)
os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
gaf = whatsapp_cfg.get("group_allow_from")
if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
if isinstance(gaf, list):
gaf = ",".join(str(v) for v in gaf)
os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)
# DingTalk settings → env vars (env vars take precedence)
dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -1258,6 +1237,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
if legacy_home:
qq_home = legacy_home
qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
import logging
logging.getLogger(__name__).warning(
"QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
"in your .env for consistency with the platform key."
+48 -58
View File
@@ -323,6 +323,7 @@ class ResponseStore:
).fetchone()
if row is None:
return None
import time
self._conn.execute(
"UPDATE responses SET accessed_at = ? WHERE response_id = ?",
(time.time(), response_id),
@@ -332,6 +333,7 @@ class ResponseStore:
def put(self, response_id: str, data: Dict[str, Any]) -> None:
"""Store a response, evicting the oldest if at capacity."""
import time
self._conn.execute(
"INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
(response_id, json.dumps(data, default=str), time.time()),
@@ -467,12 +469,12 @@ class _IdempotencyCache:
def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
from collections import OrderedDict
self._store = OrderedDict()
self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
self._ttl = ttl_seconds
self._max = max_items
def _purge(self):
now = time.time()
import time as _t
now = _t.time()
expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
for k in expired:
self._store.pop(k, None)
@@ -484,27 +486,11 @@ class _IdempotencyCache:
item = self._store.get(key)
if item and item["fp"] == fingerprint:
return item["resp"]
inflight_key = (key, fingerprint)
task = self._inflight.get(inflight_key)
if task is None:
async def _compute_and_store():
resp = await compute_coro()
import time as _t
self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
self._purge()
return resp
task = asyncio.create_task(_compute_and_store())
self._inflight[inflight_key] = task
def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
if self._inflight.get(inflight_key) is done_task:
self._inflight.pop(inflight_key, None)
task.add_done_callback(_clear_inflight)
return await asyncio.shield(task)
resp = await compute_coro()
import time as _t
self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
self._purge()
return resp
_idem_cache = _IdempotencyCache()
@@ -534,30 +520,6 @@ def _derive_chat_session_id(
return f"api-{digest}"
_CRON_AVAILABLE = False
try:
from cron.jobs import (
list_jobs as _cron_list,
get_job as _cron_get,
create_job as _cron_create,
update_job as _cron_update,
remove_job as _cron_remove,
pause_job as _cron_pause,
resume_job as _cron_resume,
trigger_job as _cron_trigger,
)
_CRON_AVAILABLE = True
except ImportError:
_cron_list = None
_cron_get = None
_cron_create = None
_cron_update = None
_cron_remove = None
_cron_pause = None
_cron_resume = None
_cron_trigger = None
class APIServerAdapter(BasePlatformAdapter):
"""
OpenAI-compatible HTTP API server adapter.
@@ -1887,16 +1849,44 @@ class APIServerAdapter(BasePlatformAdapter):
# Cron jobs API
# ------------------------------------------------------------------
# Check cron module availability once (not per-request)
_CRON_AVAILABLE = False
try:
from cron.jobs import (
list_jobs as _cron_list,
get_job as _cron_get,
create_job as _cron_create,
update_job as _cron_update,
remove_job as _cron_remove,
pause_job as _cron_pause,
resume_job as _cron_resume,
trigger_job as _cron_trigger,
)
# Wrap as staticmethod to prevent descriptor binding — these are plain
# module functions, not instance methods. Without this, self._cron_*()
# injects ``self`` as the first positional argument and every call
# raises TypeError.
_cron_list = staticmethod(_cron_list)
_cron_get = staticmethod(_cron_get)
_cron_create = staticmethod(_cron_create)
_cron_update = staticmethod(_cron_update)
_cron_remove = staticmethod(_cron_remove)
_cron_pause = staticmethod(_cron_pause)
_cron_resume = staticmethod(_cron_resume)
_cron_trigger = staticmethod(_cron_trigger)
_CRON_AVAILABLE = True
except ImportError:
pass
_JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
# Allowed fields for update — prevents clients injecting arbitrary keys
_UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
_MAX_NAME_LENGTH = 200
_MAX_PROMPT_LENGTH = 5000
@staticmethod
def _check_jobs_available() -> Optional["web.Response"]:
def _check_jobs_available(self) -> Optional["web.Response"]:
"""Return error response if cron module isn't available."""
if not _CRON_AVAILABLE:
if not self._CRON_AVAILABLE:
return web.json_response(
{"error": "Cron module not available"}, status=501,
)
@@ -1921,7 +1911,7 @@ class APIServerAdapter(BasePlatformAdapter):
return cron_err
try:
include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
jobs = _cron_list(include_disabled=include_disabled)
jobs = self._cron_list(include_disabled=include_disabled)
return web.json_response({"jobs": jobs})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -1969,7 +1959,7 @@ class APIServerAdapter(BasePlatformAdapter):
if repeat is not None:
kwargs["repeat"] = repeat
job = _cron_create(**kwargs)
job = self._cron_create(**kwargs)
return web.json_response({"job": job})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -1986,7 +1976,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_get(job_id)
job = self._cron_get(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2019,7 +2009,7 @@ class APIServerAdapter(BasePlatformAdapter):
return web.json_response(
{"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
)
job = _cron_update(job_id, sanitized)
job = self._cron_update(job_id, sanitized)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2038,7 +2028,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
success = _cron_remove(job_id)
success = self._cron_remove(job_id)
if not success:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"ok": True})
@@ -2057,7 +2047,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_pause(job_id)
job = self._cron_pause(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2076,7 +2066,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_resume(job_id)
job = self._cron_resume(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2095,7 +2085,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_trigger(job_id)
job = self._cron_trigger(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
+18 -8
View File
@@ -19,8 +19,6 @@ import uuid
from abc import ABC, abstractmethod
from urllib.parse import urlsplit
from utils import normalize_proxy_url
logger = logging.getLogger(__name__)
@@ -161,13 +159,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
if platform_env_var:
value = (os.environ.get(platform_env_var) or "").strip()
if value:
return normalize_proxy_url(value)
return value
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = (os.environ.get(key) or "").strip()
if value:
return normalize_proxy_url(value)
return normalize_proxy_url(_detect_macos_system_proxy())
return value
return _detect_macos_system_proxy()
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -393,9 +391,12 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
import asyncio
import httpx
_log = logging.getLogger(__name__)
import logging as _logging
_log = _logging.getLogger(__name__)
last_exc = None
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
@@ -413,6 +414,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
response.raise_for_status()
return cache_image_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < retries:
@@ -428,6 +430,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
await asyncio.sleep(wait)
continue
raise
raise last_exc
def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -507,9 +510,12 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
import asyncio
import httpx
_log = logging.getLogger(__name__)
import logging as _logging
_log = _logging.getLogger(__name__)
last_exc = None
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
@@ -527,6 +533,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
response.raise_for_status()
return cache_audio_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < retries:
@@ -542,6 +549,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
await asyncio.sleep(wait)
continue
raise
raise last_exc
# ---------------------------------------------------------------------------
@@ -1343,7 +1351,7 @@ class BasePlatformAdapter(ABC):
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
# and quoted/backticked paths for LLM-formatted outputs.
media_pattern = re.compile(
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
)
for match in media_pattern.finditer(content):
path = match.group("path").strip()
@@ -1779,6 +1787,8 @@ class BasePlatformAdapter(ABC):
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
"""
import random
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
if mode == "off":
return 0.0
+1 -1
View File
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
def check_bluebubbles_requirements() -> bool:
try:
import aiohttp # noqa: F401
import httpx # noqa: F401
import httpx as _httpx # noqa: F401
except ImportError:
return False
return True
+11 -17
View File
@@ -541,6 +541,7 @@ class DiscordAdapter(BasePlatformAdapter):
# ctypes.util.find_library fails on macOS with Homebrew-installed libs,
# so fall back to known Homebrew paths if needed.
if not opus_path:
import sys
_homebrew_paths = (
"/opt/homebrew/lib/libopus.dylib", # Apple Silicon
"/usr/local/lib/libopus.dylib", # Intel Mac
@@ -1421,7 +1422,8 @@ class DiscordAdapter(BasePlatformAdapter):
speaking_user_ids: set = set()
receiver = self._voice_receivers.get(guild_id)
if receiver:
now = time.monotonic()
import time as _time
now = _time.monotonic()
with receiver._lock:
for ssrc, last_t in receiver._last_packet_time.items():
# Consider "speaking" if audio received within last 2 seconds
@@ -2960,17 +2962,6 @@ class DiscordAdapter(BasePlatformAdapter):
parent_channel_id = self._get_parent_channel_id(message.channel)
is_voice_linked_channel = False
# Save mention-stripped text before auto-threading since create_thread()
# can clobber message.content, breaking /command detection in channels.
raw_content = message.content.strip()
normalized_content = raw_content
mention_prefix = False
if self._client.user and self._client.user in message.mentions:
mention_prefix = True
normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
message.content = normalized_content
if not isinstance(message.channel, discord.DMChannel):
channel_ids = {str(message.channel.id)}
if parent_channel_id:
@@ -3008,8 +2999,13 @@ class DiscordAdapter(BasePlatformAdapter):
in_bot_thread = is_thread and thread_id in self._threads
if require_mention and not is_free_channel and not in_bot_thread:
if self._client.user not in message.mentions and not mention_prefix:
if self._client.user not in message.mentions:
return
if self._client.user and self._client.user in message.mentions:
message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
# Auto-thread: when enabled, automatically create a thread for every
# @mention in a text channel so each conversation is isolated (like Slack).
# Messages already inside threads or DMs are unaffected.
@@ -3031,7 +3027,7 @@ class DiscordAdapter(BasePlatformAdapter):
# Determine message type
msg_type = MessageType.TEXT
if normalized_content.startswith("/"):
if message.content.startswith("/"):
msg_type = MessageType.COMMAND
elif message.attachments:
# Check attachment types
@@ -3171,9 +3167,7 @@ class DiscordAdapter(BasePlatformAdapter):
att.filename, e, exc_info=True,
)
# Use normalized_content (saved before auto-threading) instead of message.content,
# to detect /slash commands in channel messages.
event_text = normalized_content
event_text = message.content
if pending_text_injection:
event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection
+1
View File
@@ -410,6 +410,7 @@ class MattermostAdapter(BasePlatformAdapter):
logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
import asyncio
import aiohttp
last_exc = None
+8 -3
View File
@@ -1086,8 +1086,11 @@ class QQAdapter(BasePlatformAdapter):
return MessageType.VIDEO
if "image" in first_type or "photo" in first_type:
return MessageType.PHOTO
# Unknown content type with an attachment — don't assume PHOTO
# to prevent non-image files from being sent to vision analysis.
logger.debug(
"Unknown media content_type '%s', defaulting to TEXT",
"[%s] Unknown media content_type '%s', defaulting to TEXT",
self._log_tag,
first_type,
)
return MessageType.TEXT
@@ -1823,12 +1826,14 @@ class QQAdapter(BasePlatformAdapter):
body["file_name"] = file_name
# Retry transient upload failures
last_exc = None
for attempt in range(3):
try:
return await self._api_request(
"POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
)
except RuntimeError as exc:
last_exc = exc
err_msg = str(exc)
if any(
kw in err_msg
@@ -1837,8 +1842,8 @@ class QQAdapter(BasePlatformAdapter):
raise
if attempt < 2:
await asyncio.sleep(1.5 * (attempt + 1))
else:
raise
raise last_exc # type: ignore[misc]
# Maximum time (seconds) to wait for reconnection before giving up on send.
_RECONNECT_WAIT_SECONDS = 15.0
+8
View File
@@ -1600,9 +1600,11 @@ class SlackAdapter(BasePlatformAdapter):
async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
"""Download a Slack file using the bot token for auth, with retry."""
import asyncio
import httpx
bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
last_exc = None
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
for attempt in range(3):
@@ -1632,6 +1634,7 @@ class SlackAdapter(BasePlatformAdapter):
from gateway.platforms.base import cache_image_from_bytes
return cache_image_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < 2:
@@ -1640,12 +1643,15 @@ class SlackAdapter(BasePlatformAdapter):
await asyncio.sleep(1.5 * (attempt + 1))
continue
raise
raise last_exc
async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
"""Download a Slack file and return raw bytes, with retry."""
import asyncio
import httpx
bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
last_exc = None
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
for attempt in range(3):
@@ -1657,6 +1663,7 @@ class SlackAdapter(BasePlatformAdapter):
response.raise_for_status()
return response.content
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < 2:
@@ -1665,6 +1672,7 @@ class SlackAdapter(BasePlatformAdapter):
await asyncio.sleep(1.5 * (attempt + 1))
continue
raise
raise last_exc
# ── Channel mention gating ─────────────────────────────────────────────
+10 -37
View File
@@ -496,13 +496,6 @@ class TelegramAdapter(BasePlatformAdapter):
"[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
self.name, name, chat_id,
)
elif "not a forum" in error_text or "forums_disabled" in error_text:
logger.warning(
"[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
"The user must open the DM with this bot in Telegram, tap the bot name "
"at the top, and enable 'Topics' in chat settings before topics can be created.",
self.name, name, chat_id,
)
else:
logger.warning(
"[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -794,28 +787,8 @@ class TelegramAdapter(BasePlatformAdapter):
# Telegram pushes updates to our HTTP endpoint. This
# enables cloud platforms (Fly.io, Railway) to auto-wake
# suspended machines on inbound HTTP traffic.
#
# SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
# python-telegram-bot passes secret_token=None and the
# webhook endpoint accepts any HTTP POST — attackers can
# inject forged updates as if from Telegram. Refuse to
# start rather than silently run in fail-open mode.
# See GHSA-3vpc-7q5r-276h.
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
if not webhook_secret:
raise RuntimeError(
"TELEGRAM_WEBHOOK_SECRET is required when "
"TELEGRAM_WEBHOOK_URL is set. Without it, the "
"webhook endpoint accepts forged updates from "
"anyone who can reach it — see "
"https://github.com/NousResearch/hermes-agent/"
"security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
"Generate a secret and set it in your .env:\n"
" export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
"Then register it with Telegram when setting the "
"webhook via setWebhook's secret_token parameter."
)
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
from urllib.parse import urlparse
webhook_path = urlparse(webhook_url).path or "/telegram"
@@ -1733,6 +1706,7 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(audio_path):
return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
@@ -1781,6 +1755,7 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(image_path):
return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
@@ -2093,7 +2068,7 @@ class TelegramAdapter(BasePlatformAdapter):
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
return _ph(f'[{display}]({url})')
text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
# 4) Convert markdown headers (## Title) → bold *Title*
def _convert_header(m):
@@ -2353,16 +2328,10 @@ class TelegramAdapter(BasePlatformAdapter):
DMs remain unrestricted. Group/supergroup messages are accepted when:
- the chat is explicitly allowlisted in ``free_response_chats``
- ``require_mention`` is disabled
- the message is a command
- the message replies to the bot
- the bot is @mentioned
- the text/caption matches a configured regex wake-word pattern
When ``require_mention`` is enabled, slash commands are not given
special treatment they must pass the same mention/reply checks
as any other group message. Users can still trigger commands via
the Telegram bot menu (``/command@botname``) or by explicitly
mentioning the bot (``@botname /command``), both of which are
recognised as mentions by :meth:`_message_mentions_bot`.
"""
if not self._is_group_chat(message):
return True
@@ -2377,6 +2346,8 @@ class TelegramAdapter(BasePlatformAdapter):
return True
if not self._telegram_require_mention():
return True
if is_command:
return True
if self._is_reply_to_bot(message):
return True
if self._message_mentions_bot(message):
@@ -2845,11 +2816,13 @@ class TelegramAdapter(BasePlatformAdapter):
logger.info("[Telegram] Analyzing sticker at %s", cached_path)
from tools.vision_tools import vision_analyze_tool
import json as _json
result_json = await vision_analyze_tool(
image_url=cached_path,
user_prompt=STICKER_VISION_PROMPT,
)
result = json.loads(result_json)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "a sticker")
+5 -10
View File
@@ -624,16 +624,13 @@ class WeComAdapter(BasePlatformAdapter):
msgtype = str(body.get("msgtype") or "").lower()
if msgtype == "mixed":
_raw_mixed = body.get("mixed")
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
_raw_items = mixed.get("msg_item")
items = _raw_items if isinstance(_raw_items, list) else []
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
for item in items:
if not isinstance(item, dict):
continue
if str(item.get("msgtype") or "").lower() == "text":
_raw_text = item.get("text")
text_block = _raw_text if isinstance(_raw_text, dict) else {}
text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
content = str(text_block.get("content") or "").strip()
if content:
text_parts.append(content)
@@ -675,10 +672,8 @@ class WeComAdapter(BasePlatformAdapter):
msgtype = str(body.get("msgtype") or "").lower()
if msgtype == "mixed":
_raw_mixed = body.get("mixed")
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
_raw_items = mixed.get("msg_item")
items = _raw_items if isinstance(_raw_items, list) else []
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
for item in items:
if not isinstance(item, dict):
continue
+12 -79
View File
@@ -66,37 +66,6 @@ def _kill_port_process(port: int) -> None:
except Exception:
pass
def _terminate_bridge_process(proc, *, force: bool = False) -> None:
"""Terminate the bridge process using process-tree semantics where possible."""
if _IS_WINDOWS:
cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
if force:
cmd.append("/F")
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=10,
)
except FileNotFoundError:
if force:
proc.kill()
else:
proc.terminate()
return
if result.returncode != 0:
details = (result.stderr or result.stdout or "").strip()
raise OSError(details or f"taskkill failed for PID {proc.pid}")
return
import signal
sig = signal.SIGTERM if not force else signal.SIGKILL
os.killpg(os.getpgid(proc.pid), sig)
import sys
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
@@ -149,10 +118,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
- bridge_script: Path to the Node.js bridge script
- bridge_port: Port for HTTP communication (default: 3000)
- session_path: Path to store WhatsApp session data
- dm_policy: "open" | "allowlist" | "disabled" how DMs are handled (default: "open")
- allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
- group_policy: "open" | "allowlist" | "disabled" which groups are processed (default: "open")
- group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
"""
# WhatsApp message limits — practical UX limit, not protocol max.
@@ -175,10 +140,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
))
self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
self._mention_patterns = self._compile_mention_patterns()
self._message_queue: asyncio.Queue = asyncio.Queue()
self._bridge_log_fh = None
@@ -202,33 +163,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
return {str(part).strip() for part in raw if str(part).strip()}
return {part.strip() for part in str(raw).split(",") if part.strip()}
@staticmethod
def _coerce_allow_list(raw) -> set[str]:
"""Parse allow_from / group_allow_from from config or env var."""
if raw is None:
return set()
if isinstance(raw, list):
return {str(part).strip() for part in raw if str(part).strip()}
return {part.strip() for part in str(raw).split(",") if part.strip()}
def _is_dm_allowed(self, sender_id: str) -> bool:
"""Check whether a DM from the given sender should be processed."""
if self._dm_policy == "disabled":
return False
if self._dm_policy == "allowlist":
return sender_id in self._allow_from
# "open" — all DMs allowed
return True
def _is_group_allowed(self, chat_id: str) -> bool:
"""Check whether a group chat should be processed."""
if self._group_policy == "disabled":
return False
if self._group_policy == "allowlist":
return chat_id in self._group_allow_from
# "open" — all groups allowed
return True
def _compile_mention_patterns(self):
patterns = self.config.extra.get("mention_patterns")
if patterns is None:
@@ -321,18 +255,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
return cleaned.strip() or text
def _should_process_message(self, data: Dict[str, Any]) -> bool:
is_group = data.get("isGroup", False)
if is_group:
chat_id = str(data.get("chatId") or "")
if not self._is_group_allowed(chat_id):
return False
else:
sender_id = str(data.get("senderId") or data.get("from") or "")
if not self._is_dm_allowed(sender_id):
return False
# DMs that pass the policy gate are always processed
if not data.get("isGroup"):
return True
# Group messages: check mention / free-response settings
chat_id = str(data.get("chatId") or "")
if chat_id in self._whatsapp_free_response_chats():
return True
@@ -399,6 +323,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
# Check if bridge is already running and connected
import aiohttp
import asyncio
try:
async with aiohttp.ClientSession() as session:
async with session.get(
@@ -567,14 +492,22 @@ class WhatsAppAdapter(BasePlatformAdapter):
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
if self._bridge_process:
try:
# Kill the entire process group so child node processes die too
import signal
try:
_terminate_bridge_process(self._bridge_process, force=False)
if _IS_WINDOWS:
self._bridge_process.terminate()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
except (ProcessLookupError, PermissionError):
self._bridge_process.terminate()
await asyncio.sleep(1)
if self._bridge_process.poll() is None:
try:
_terminate_bridge_process(self._bridge_process, force=True)
if _IS_WINDOWS:
self._bridge_process.kill()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
except (ProcessLookupError, PermissionError):
self._bridge_process.kill()
except Exception as e:
+87 -157
View File
@@ -30,8 +30,6 @@ from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List
from agent.account_usage import fetch_account_usage, render_account_usage_lines
# --- Agent cache tuning ---------------------------------------------------
# Bounds the per-session AIAgent cache to prevent unbounded growth in
# long-lived gateways (each AIAgent holds LLM clients, tool schemas,
@@ -88,7 +86,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
# Resolve Hermes home directory (respects HERMES_HOME override)
from hermes_constants import get_hermes_home
from utils import atomic_yaml_write, base_url_host_matches, is_truthy_value
from utils import atomic_yaml_write, is_truthy_value
_hermes_home = get_hermes_home()
# Load environment variables from ~/.hermes/.env first.
@@ -281,7 +279,6 @@ from gateway.session import (
build_session_context,
build_session_context_prompt,
build_session_key,
is_shared_multi_user_session,
)
from gateway.delivery import DeliveryRouter
from gateway.platforms.base import (
@@ -1269,6 +1266,7 @@ class GatewayRunner:
the prefill_messages_file key in ~/.hermes/config.yaml.
Relative paths are resolved from ~/.hermes/.
"""
import json as _json
file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
if not file_path:
try:
@@ -1290,7 +1288,7 @@ class GatewayRunner:
return []
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
data = _json.load(f)
if not isinstance(data, list):
logger.warning("Prefill messages file must contain a JSON array: %s", path)
return []
@@ -1962,39 +1960,6 @@ class GatewayRunner:
"or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
)
# Discover Python plugins before shell hooks so plugin block
# decisions take precedence in tie cases. The CLI startup path
# does this via an explicit call in hermes_cli/main.py; the
# gateway lazily imports run_agent inside per-request handlers,
# so the discover_plugins() side-effect in model_tools.py is NOT
# guaranteed to have run by the time we reach this point.
try:
from hermes_cli.plugins import discover_plugins
discover_plugins()
except Exception:
logger.debug(
"plugin discovery failed at gateway startup", exc_info=True,
)
# Register declarative shell hooks from cli-config.yaml. Gateway
# has no TTY, so consent has to come from one of the three opt-in
# channels (--accept-hooks on launch, HERMES_ACCEPT_HOOKS env var,
# or hooks_auto_accept: true in config.yaml). We pass
# accept_hooks=False here and let register_from_config resolve
# the effective value from env + config itself — the CLI-side
# registration already honored --accept-hooks, and re-reading
# hooks_auto_accept here would just duplicate that lookup.
# Failures are logged but must never block gateway startup.
try:
from hermes_cli.config import load_config
from agent.shell_hooks import register_from_config
register_from_config(load_config(), accept_hooks=False)
except Exception:
logger.debug(
"shell-hook registration failed at gateway startup",
exc_info=True,
)
# Discover and load event hooks
self.hooks.discover_and_load()
@@ -3277,9 +3242,10 @@ class GatewayRunner:
return "Usage: /queue <prompt>"
adapter = self.adapters.get(source.platform)
if adapter:
queued_event = MessageEvent(
from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
queued_event = _ME(
text=queued_text,
message_type=MessageType.TEXT,
message_type=_MT.TEXT,
source=event.source,
message_id=event.message_id,
channel_prompt=event.channel_prompt,
@@ -3301,9 +3267,10 @@ class GatewayRunner:
# Agent hasn't started yet — queue as turn-boundary fallback.
adapter = self.adapters.get(source.platform)
if adapter:
queued_event = MessageEvent(
from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
queued_event = _ME(
text=steer_text,
message_type=MessageType.TEXT,
message_type=_MT.TEXT,
source=event.source,
message_id=event.message_id,
channel_prompt=event.channel_prompt,
@@ -3323,9 +3290,10 @@ class GatewayRunner:
# Running agent is missing or lacks steer() — fall back to queue.
adapter = self.adapters.get(source.platform)
if adapter:
queued_event = MessageEvent(
from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
queued_event = _ME(
text=steer_text,
message_type=MessageType.TEXT,
message_type=_MT.TEXT,
source=event.source,
message_id=event.message_id,
channel_prompt=event.channel_prompt,
@@ -3674,8 +3642,9 @@ class GatewayRunner:
plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
if plugin_handler:
user_args = event.get_command_args().strip()
import asyncio as _aio
result = plugin_handler(user_args)
if asyncio.iscoroutine(result):
if _aio.iscoroutine(result):
result = await result
return str(result) if result else None
except Exception as e:
@@ -3792,12 +3761,12 @@ class GatewayRunner:
history = history or []
message_text = event.text or ""
_is_shared_multi_user = is_shared_multi_user_session(
source,
group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
_is_shared_thread = (
source.chat_type != "dm"
and source.thread_id
and not getattr(self.config, "thread_sessions_per_user", False)
)
if _is_shared_multi_user and source.user_name:
if _is_shared_thread and source.user_name:
message_text = f"[{source.user_name}] {message_text}"
if event.media_urls:
@@ -3857,7 +3826,9 @@ class GatewayRunner:
for i, path in enumerate(event.media_urls):
mtype = event.media_types[i] if i < len(event.media_types) else ""
if mtype in ("", "application/octet-stream"):
_ext = os.path.splitext(path)[1].lower()
import os as _os2
_ext = _os2.path.splitext(path)[1].lower()
if _ext in _TEXT_EXTENSIONS:
mtype = "text/plain"
else:
@@ -3867,10 +3838,13 @@ class GatewayRunner:
if not mtype.startswith(("application/", "text/")):
continue
basename = os.path.basename(path)
import os as _os
import re as _re
basename = _os.path.basename(path)
parts = basename.split("_", 2)
display_name = parts[2] if len(parts) >= 3 else basename
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
if mtype.startswith("text/"):
context_note = (
@@ -3887,14 +3861,14 @@ class GatewayRunner:
message_text = f"{context_note}\n\n{message_text}"
if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
# Always inject the reply-to pointer — even when the quoted text
# already appears in history. The prefix isn't deduplication, it's
# disambiguation: it tells the agent *which* prior message the user
# is referencing. History can contain the same or similar text
# multiple times, and without an explicit pointer the agent has to
# guess (or answer for both subjects). Token overhead is minimal.
reply_snippet = event.reply_to_text[:500]
message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
found_in_history = any(
reply_snippet[:200] in (msg.get("content") or "")
for msg in history
if msg.get("role") in ("assistant", "user", "tool")
)
if not found_in_history:
message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
if "@" in message_text:
try:
@@ -3902,11 +3876,9 @@ class GatewayRunner:
from agent.model_metadata import get_model_context_length
_msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
_msg_runtime = _resolve_runtime_agent_kwargs()
_msg_ctx_len = get_model_context_length(
self._model,
base_url=self._base_url or _msg_runtime.get("base_url") or "",
api_key=_msg_runtime.get("api_key") or "",
base_url=self._base_url or "",
)
_ctx_result = await preprocess_context_references_async(
message_text,
@@ -5168,6 +5140,7 @@ class GatewayRunner:
# Save the requester's routing info so the new gateway process can
# notify them once it comes back online.
try:
import json as _json
notify_data = {
"platform": event.source.platform.value if event.source.platform else None,
"chat_id": event.source.chat_id,
@@ -5175,7 +5148,7 @@ class GatewayRunner:
if event.source.thread_id:
notify_data["thread_id"] = event.source.thread_id
(_hermes_home / ".restart_notify.json").write_text(
json.dumps(notify_data)
_json.dumps(notify_data)
)
except Exception as e:
logger.debug("Failed to write restart notify file: %s", e)
@@ -5186,14 +5159,16 @@ class GatewayRunner:
# marker persists so the new gateway can still detect a delayed
# /restart redelivery from Telegram. Overwritten on every /restart.
try:
import json as _json
import time as _time
dedup_data = {
"platform": event.source.platform.value if event.source.platform else None,
"requested_at": time.time(),
"requested_at": _time.time(),
}
if event.platform_update_id is not None:
dedup_data["update_id"] = event.platform_update_id
(_hermes_home / ".restart_last_processed.json").write_text(
json.dumps(dedup_data)
_json.dumps(dedup_data)
)
except Exception as e:
logger.debug("Failed to write restart dedup marker: %s", e)
@@ -5241,10 +5216,12 @@ class GatewayRunner:
return False
try:
import json as _json
import time as _time
marker_path = _hermes_home / ".restart_last_processed.json"
if not marker_path.exists():
return False
data = json.loads(marker_path.read_text())
data = _json.loads(marker_path.read_text())
except Exception:
return False
@@ -5258,7 +5235,7 @@ class GatewayRunner:
# swallow a fresh /restart from the user.
requested_at = data.get("requested_at")
if isinstance(requested_at, (int, float)):
if time.time() - requested_at > 300:
if _time.time() - requested_at > 300:
return False
return event.platform_update_id <= recorded_uid
@@ -5649,7 +5626,7 @@ class GatewayRunner:
# Cache notice
cache_enabled = (
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
or result.api_mode == "anthropic_messages"
)
if cache_enabled:
@@ -7264,38 +7241,6 @@ class GatewayRunner:
if cached:
agent = cached[0]
# Resolve provider/base_url/api_key for the account-usage fetch.
# Prefer the live agent; fall back to persisted billing data on the
# SessionDB row so `/usage` still returns account info between turns
# when no agent is resident.
provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
if not provider and getattr(self, "_session_db", None) is not None:
try:
_entry_for_billing = self.session_store.get_or_create_session(source)
persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
except Exception:
persisted = {}
provider = provider or persisted.get("billing_provider")
base_url = base_url or persisted.get("billing_base_url")
# Fetch account usage off the event loop so slow provider APIs don't
# block the gateway. Failures are non-fatal -- account_lines stays [].
account_lines: list[str] = []
if provider:
try:
account_snapshot = await asyncio.to_thread(
fetch_account_usage,
provider,
base_url=base_url,
api_key=api_key,
)
except Exception:
account_snapshot = None
if account_snapshot:
account_lines = render_account_usage_lines(account_snapshot, markdown=True)
if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
lines = []
@@ -7353,10 +7298,6 @@ class GatewayRunner:
if ctx.compression_count:
lines.append(f"Compressions: {ctx.compression_count}")
if account_lines:
lines.append("")
lines.extend(account_lines)
return "\n".join(lines)
# No agent at all -- check session history for a rough count
@@ -7366,26 +7307,23 @@ class GatewayRunner:
from agent.model_metadata import estimate_messages_tokens_rough
msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
approx = estimate_messages_tokens_rough(msgs)
lines = [
"📊 **Session Info**",
f"Messages: {len(msgs)}",
f"Estimated context: ~{approx:,} tokens",
"_(Detailed usage available after the first agent response)_",
]
if account_lines:
lines.append("")
lines.extend(account_lines)
return "\n".join(lines)
if account_lines:
return "\n".join(account_lines)
return (
f"📊 **Session Info**\n"
f"Messages: {len(msgs)}\n"
f"Estimated context: ~{approx:,} tokens\n"
f"_(Detailed usage available after the first agent response)_"
)
return "No usage data available for this session."
async def _handle_insights_command(self, event: MessageEvent) -> str:
"""Handle /insights command -- show usage insights and analytics."""
import asyncio as _asyncio
args = event.get_command_args().strip()
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
import re as _re
args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
days = 30
source = None
@@ -7414,7 +7352,7 @@ class GatewayRunner:
from hermes_state import SessionDB
from agent.insights import InsightsEngine
loop = asyncio.get_running_loop()
loop = _asyncio.get_running_loop()
def _run_insights():
db = SessionDB()
@@ -7772,6 +7710,9 @@ class GatewayRunner:
the messenger. The user's next message is intercepted by
``_handle_message`` and written to ``.update_response``.
"""
import json
import re as _re
pending_path = _hermes_home / ".update_pending.json"
claimed_path = _hermes_home / ".update_pending.claimed.json"
output_path = _hermes_home / ".update_output.txt"
@@ -7816,7 +7757,7 @@ class GatewayRunner:
return
def _strip_ansi(text: str) -> str:
return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
bytes_sent = 0
last_stream_time = loop.time()
@@ -7964,6 +7905,9 @@ class GatewayRunner:
cannot resolve the adapter (e.g. after a gateway restart where the
platform hasn't reconnected yet).
"""
import json
import re as _re
pending_path = _hermes_home / ".update_pending.json"
claimed_path = _hermes_home / ".update_pending.claimed.json"
output_path = _hermes_home / ".update_output.txt"
@@ -8009,7 +7953,7 @@ class GatewayRunner:
if adapter and chat_id:
# Strip ANSI escape codes for clean display
output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
if output:
if len(output) > 3500:
output = "" + output[-3500:]
@@ -8042,12 +7986,14 @@ class GatewayRunner:
async def _send_restart_notification(self) -> None:
"""Notify the chat that initiated /restart that the gateway is back."""
import json as _json
notify_path = _hermes_home / ".restart_notify.json"
if not notify_path.exists():
return
try:
data = json.loads(notify_path.read_text())
data = _json.loads(notify_path.read_text())
platform_str = data.get("platform")
chat_id = data.get("chat_id")
thread_id = data.get("thread_id")
@@ -8133,6 +8079,7 @@ class GatewayRunner:
The enriched message string with vision descriptions prepended.
"""
from tools.vision_tools import vision_analyze_tool
import json as _json
analysis_prompt = (
"Describe everything visible in this image in thorough detail. "
@@ -8148,7 +8095,7 @@ class GatewayRunner:
image_url=path,
user_prompt=analysis_prompt,
)
result = json.loads(result_json)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "")
enriched_parts.append(
@@ -8207,6 +8154,7 @@ class GatewayRunner:
return disabled_note
from tools.transcription_tools import transcribe_audio
import asyncio
enriched_parts = []
for path in audio_paths:
@@ -8342,6 +8290,7 @@ class GatewayRunner:
if not adapter:
return
try:
from gateway.platforms.base import MessageEvent, MessageType
synth_event = MessageEvent(
text=synth_text,
message_type=MessageType.TEXT,
@@ -8446,6 +8395,7 @@ class GatewayRunner:
break
if adapter and source.chat_id:
try:
from gateway.platforms.base import MessageEvent, MessageType
synth_event = MessageEvent(
text=synth_text,
message_type=MessageType.TEXT,
@@ -8967,6 +8917,7 @@ class GatewayRunner:
if _streaming_enabled:
try:
from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
from gateway.config import Platform
_adapter = self.adapters.get(source.platform)
if _adapter:
_adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
@@ -9250,7 +9201,8 @@ class GatewayRunner:
if args:
from agent.display import get_tool_preview_max_len
_pl = get_tool_preview_max_len()
args_str = json.dumps(args, ensure_ascii=False, default=str)
import json as _json
args_str = _json.dumps(args, ensure_ascii=False, default=str)
# When tool_preview_length is 0 (default), don't truncate
# in verbose mode — the user explicitly asked for full
# detail. Platform message-length limits handle the rest.
@@ -9316,7 +9268,8 @@ class GatewayRunner:
# Skip tool progress for platforms that don't support message
# editing (e.g. iMessage/BlueBubbles) — each progress update
# would become a separate message bubble, which is noisy.
if type(adapter).edit_message is BasePlatformAdapter.edit_message:
from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
if type(adapter).edit_message is _BaseAdapter.edit_message:
while not progress_queue.empty():
try:
progress_queue.get_nowait()
@@ -10764,6 +10717,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
# The PID file is scoped to HERMES_HOME, so future multi-profile
# setups (each profile using a distinct HERMES_HOME) will naturally
# allow concurrent instances without tripping this guard.
import time as _time
from gateway.status import get_running_pid, remove_pid_file, terminate_pid
existing_pid = get_running_pid()
if existing_pid is not None and existing_pid != os.getpid():
@@ -10803,7 +10757,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
for _ in range(20):
try:
os.kill(existing_pid, 0)
time.sleep(0.5)
_time.sleep(0.5)
except (ProcessLookupError, PermissionError):
break # Process is gone
else:
@@ -10814,16 +10768,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
)
try:
terminate_pid(existing_pid, force=True)
time.sleep(0.5)
_time.sleep(0.5)
except (ProcessLookupError, PermissionError, OSError):
pass
remove_pid_file()
# remove_pid_file() is a no-op when the PID doesn't match.
# Force-unlink to cover the old-process-crashed case.
try:
(get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
except Exception:
pass
# Clean up any takeover marker the old process didn't consume
# (e.g. SIGKILL'd before its shutdown handler could read it).
try:
@@ -10962,30 +10910,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
else:
logger.info("Skipping signal handlers (not running in main thread).")
# Claim the PID file BEFORE bringing up any platform adapters.
# This closes the --replace race window: two concurrent `gateway run
# --replace` invocations both pass the termination-wait above, but
# only the winner of the O_CREAT|O_EXCL race below will ever open
# Telegram polling, Discord gateway sockets, etc. The loser exits
# cleanly before touching any external service.
import atexit
from gateway.status import write_pid_file, remove_pid_file, get_running_pid
_current_pid = get_running_pid()
if _current_pid is not None and _current_pid != os.getpid():
logger.error(
"Another gateway instance (PID %d) started during our startup. "
"Exiting to avoid double-running.", _current_pid
)
return False
try:
write_pid_file()
except FileExistsError:
logger.error(
"PID file race lost to another gateway instance. Exiting."
)
return False
atexit.register(remove_pid_file)
# Start the gateway
success = await runner.start()
if not success:
@@ -10995,6 +10919,12 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
return True
# Write PID file so CLI can detect gateway is running
import atexit
from gateway.status import write_pid_file, remove_pid_file
write_pid_file()
atexit.register(remove_pid_file)
# Start background cron ticker so scheduled jobs fire automatically.
# Pass the event loop so cron delivery can use live adapters (E2EE support).
cron_stop = threading.Event()
+11 -37
View File
@@ -152,7 +152,6 @@ class SessionContext:
source: SessionSource
connected_platforms: List[Platform]
home_channels: Dict[Platform, HomeChannel]
shared_multi_user_session: bool = False
# Session metadata
session_key: str = ""
@@ -167,7 +166,6 @@ class SessionContext:
"home_channels": {
p.value: hc.to_dict() for p, hc in self.home_channels.items()
},
"shared_multi_user_session": self.shared_multi_user_session,
"session_key": self.session_key,
"session_id": self.session_id,
"created_at": self.created_at.isoformat() if self.created_at else None,
@@ -242,16 +240,18 @@ def build_session_context_prompt(
lines.append(f"**Channel Topic:** {context.source.chat_topic}")
# User identity.
# In shared multi-user sessions (shared threads OR shared non-thread groups
# when group_sessions_per_user=False), multiple users contribute to the same
# conversation. Don't pin a single user name in the system prompt — it
# changes per-turn and would bust the prompt cache. Instead, note that
# this is a multi-user session; individual sender names are prefixed on
# each user message by the gateway.
if context.shared_multi_user_session:
session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
# In shared thread sessions (non-DM with thread_id), multiple users
# contribute to the same conversation. Don't pin a single user name
# in the system prompt — it changes per-turn and would bust the prompt
# cache. Instead, note that this is a multi-user thread; individual
# sender names are prefixed on each user message by the gateway.
_is_shared_thread = (
context.source.chat_type != "dm"
and context.source.thread_id
)
if _is_shared_thread:
lines.append(
f"**Session type:** {session_label} — messages are prefixed "
"**Session type:** Multi-user thread — messages are prefixed "
"with [sender name]. Multiple users may participate."
)
elif context.source.user_name:
@@ -467,27 +467,6 @@ class SessionEntry:
)
def is_shared_multi_user_session(
source: SessionSource,
*,
group_sessions_per_user: bool = True,
thread_sessions_per_user: bool = False,
) -> bool:
"""Return True when a non-DM session is shared across participants.
Mirrors the isolation rules in :func:`build_session_key`:
- DMs are never shared.
- Threads are shared unless ``thread_sessions_per_user`` is True.
- Non-thread group/channel sessions are shared unless
``group_sessions_per_user`` is True (default: True = isolated).
"""
if source.chat_type == "dm":
return False
if source.thread_id:
return not thread_sessions_per_user
return not group_sessions_per_user
def build_session_key(
source: SessionSource,
group_sessions_per_user: bool = True,
@@ -1259,11 +1238,6 @@ def build_session_context(
source=source,
connected_platforms=connected,
home_channels=home_channels,
shared_multi_user_session=is_shared_multi_user_session(
source,
group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
),
)
if session_entry:
-9
View File
@@ -56,12 +56,6 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
_SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
_SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)
# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
# don't clobber each other's delivery targets.
_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
_VAR_MAP = {
"HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
"HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
@@ -70,9 +64,6 @@ _VAR_MAP = {
"HERMES_SESSION_USER_ID": _SESSION_USER_ID,
"HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
"HERMES_SESSION_KEY": _SESSION_KEY,
"HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
"HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
}
+2 -22
View File
@@ -225,28 +225,8 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
def write_pid_file() -> None:
"""Write the current process PID and metadata to the gateway PID file.
Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
invocations race: exactly one process wins and the rest get
FileExistsError.
"""
path = _get_pid_path()
path.parent.mkdir(parents=True, exist_ok=True)
record = json.dumps(_build_pid_record())
try:
fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
except FileExistsError:
raise # Let caller decide: another gateway is racing us
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(record)
except Exception:
try:
path.unlink(missing_ok=True)
except OSError:
pass
raise
"""Write the current process PID and metadata to the gateway PID file."""
_write_json_file(_get_pid_path(), _build_pid_record())
def write_runtime_status(
+63 -37
View File
@@ -152,23 +152,6 @@ def auth_add_command(args) -> None:
pool = load_pool(provider)
# Clear ALL suppressions for this provider — re-adding a credential is
# a strong signal the user wants auth re-enabled. This covers env:*
# (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
# device_code (codex), etc. One consistent re-engagement pattern.
# Matches the Codex device_code re-link pattern that predates this.
if not provider.startswith(CUSTOM_POOL_PREFIX):
try:
from hermes_cli.auth import (
_load_auth_store,
unsuppress_credential_source,
)
suppressed = _load_auth_store().get("suppressed_sources", {})
for src in list(suppressed.get(provider, []) or []):
unsuppress_credential_source(provider, src)
except Exception:
pass
if requested_type == AUTH_TYPE_API_KEY:
token = (getattr(args, "api_key", None) or "").strip()
if not token:
@@ -355,28 +338,71 @@ def auth_remove_command(args) -> None:
raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
print(f"Removed {provider} credential #{index} ({removed.label})")
# Unified removal dispatch. Every credential source Hermes reads from
# (env vars, external OAuth files, auth.json blocks, custom config)
# has a RemovalStep registered in agent.credential_sources. The step
# handles its source-specific cleanup and we centralise suppression +
# user-facing output here so every source behaves identically from
# the user's perspective.
from agent.credential_sources import find_removal_step
from hermes_cli.auth import suppress_credential_source
# If this was an env-seeded credential, also clear the env var from .env
# so it doesn't get re-seeded on the next load_pool() call.
if removed.source.startswith("env:"):
env_var = removed.source[len("env:"):]
if env_var:
from hermes_cli.config import remove_env_value
cleared = remove_env_value(env_var)
if cleared:
print(f"Cleared {env_var} from .env")
step = find_removal_step(provider, removed.source)
if step is None:
# Unregistered source — e.g. "manual", which has nothing external
# to clean up. The pool entry is already gone; we're done.
return
# If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
# clear the underlying auth store / credential file so it doesn't get
# re-seeded on the next load_pool() call.
elif provider == "openai-codex" and (
removed.source == "device_code" or removed.source.endswith(":device_code")
):
# Codex tokens live in TWO places: the Hermes auth store and
# ~/.codex/auth.json (the Codex CLI shared file). On every refresh,
# refresh_codex_oauth_pure() writes to both. So clearing only the
# Hermes auth store is not enough — _seed_from_singletons() will
# auto-import from ~/.codex/auth.json on the next load_pool() and
# the removal is instantly undone. Mark the source as suppressed
# so auto-import is skipped; leave ~/.codex/auth.json untouched so
# the Codex CLI itself keeps working.
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
suppress_credential_source,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
suppress_credential_source(provider, "device_code")
print("Suppressed openai-codex device_code source — it will not be re-seeded.")
print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
print("Run `hermes auth add openai-codex` to re-enable if needed.")
result = step.remove_fn(provider, removed)
for line in result.cleaned:
print(line)
if result.suppress:
suppress_credential_source(provider, removed.source)
for line in result.hints:
print(line)
elif removed.source == "device_code" and provider == "nous":
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
elif removed.source == "hermes_pkce" and provider == "anthropic":
from hermes_constants import get_hermes_home
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
oauth_file.unlink()
print("Cleared Hermes Anthropic OAuth credentials")
elif removed.source == "claude_code" and provider == "anthropic":
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "claude_code")
print("Suppressed claude_code credential — it will not be re-seeded.")
print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
print("Run `hermes auth add anthropic` to re-enable if needed.")
def auth_reset_command(args) -> None:
+8 -24
View File
@@ -924,22 +924,12 @@ class SlashCommandCompleter(Completer):
display_meta=meta,
)
# If the user typed @file: / @folder: (or just @file / @folder with
# no colon yet), delegate to path completions. Accepting the bare
# form lets the picker surface directories as soon as the user has
# typed `@folder`, without requiring them to first accept the static
# `@folder:` hint and re-trigger completion.
# If the user typed @file: or @folder:, delegate to path completions
for prefix in ("@file:", "@folder:"):
bare = prefix[:-1]
if word == bare or word.startswith(prefix):
want_dir = prefix == "@folder:"
path_part = '' if word == bare else word[len(prefix):]
if word.startswith(prefix):
path_part = word[len(prefix):] or "."
expanded = os.path.expanduser(path_part)
if not expanded or expanded == ".":
search_dir, match_prefix = ".", ""
elif expanded.endswith("/"):
if expanded.endswith("/"):
search_dir, match_prefix = expanded, ""
else:
search_dir = os.path.dirname(expanded) or "."
@@ -955,21 +945,15 @@ class SlashCommandCompleter(Completer):
for entry in sorted(entries):
if match_prefix and not entry.lower().startswith(prefix_lower):
continue
full_path = os.path.join(search_dir, entry)
is_dir = os.path.isdir(full_path)
# `@folder:` must only surface directories; `@file:` only
# regular files. Without this filter `@folder:` listed
# every .env / .gitignore in the cwd, defeating the
# explicit prefix and confusing users expecting a
# directory picker.
if want_dir != is_dir:
continue
if count >= limit:
break
full_path = os.path.join(search_dir, entry)
is_dir = os.path.isdir(full_path)
display_path = os.path.relpath(full_path)
suffix = "/" if is_dir else ""
kind = "folder" if is_dir else "file"
meta = "dir" if is_dir else _file_size_label(full_path)
completion = f"{prefix}{display_path}{suffix}"
completion = f"@{kind}:{display_path}{suffix}"
yield Completion(
completion,
start_position=-len(word),
+6 -63
View File
@@ -387,26 +387,6 @@ DEFAULT_CONFIG = {
# (terminal and execute_code). Skill-declared required_environment_variables
# are passed through automatically; this list is for non-skill use cases.
"env_passthrough": [],
# Extra files to source in the login shell when building the
# per-session environment snapshot. Use this when tools like nvm,
# pyenv, asdf, or custom PATH entries are registered by files that
# a bash login shell would skip — most commonly ``~/.bashrc``
# (bash doesn't source bashrc in non-interactive login mode) or
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
# Paths support ``~`` / ``${VAR}``. Missing files are silently
# skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
# snapshot shell is bash (this is the ``auto_source_bashrc``
# behaviour — disable with that key if you want strict login-only
# semantics).
"shell_init_files": [],
# When true (default), Hermes sources ``~/.bashrc`` in the login
# shell used to build the environment snapshot. This captures
# PATH additions, shell functions, and aliases defined in the
# user's bashrc — which a plain ``bash -l -c`` would otherwise
# miss because bash skips bashrc in non-interactive login mode.
# Turn this off if you have a bashrc that misbehaves when sourced
# non-interactively (e.g. one that hard-exits on TTY checks).
"auto_source_bashrc": True,
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
"docker_forward_env": [],
# Explicit environment variables to set inside Docker containers.
@@ -665,7 +645,6 @@ DEFAULT_CONFIG = {
"record_key": "ctrl+b",
"max_recording_seconds": 120,
"auto_tts": False,
"beep_enabled": True, # Play record start/stop beeps in CLI voice mode
"silence_threshold": 200, # RMS below this = silence (0-32767)
"silence_duration": 3.0, # Seconds of silence before auto-stop
},
@@ -712,12 +691,6 @@ DEFAULT_CONFIG = {
# independent of the parent's max_iterations)
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
# "low", "minimal", "none" (empty = inherit parent's level)
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
# Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
# and _get_orchestrator_enabled). Values are clamped to [1, 3] with a
# warning log if out of range.
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
"orchestrator_enabled": True, # kill switch for role="orchestrator"
},
# Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -730,20 +703,6 @@ DEFAULT_CONFIG = {
# always goes to ~/.hermes/skills/.
"skills": {
"external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"]
# Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
# content with the absolute skill directory and the active session id
# before the agent sees it. Lets skill authors reference bundled
# scripts without the agent having to join paths.
"template_vars": True,
# Pre-execute inline shell snippets written as !`cmd` in SKILL.md
# body. Their stdout is inlined into the skill message before the
# agent reads it, so skills can inject dynamic context (dates, git
# state, detected tool versions, …). Off by default because any
# content from the skill author runs on the host without approval;
# only enable for skill sources you trust.
"inline_shell": False,
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
"inline_shell_timeout": 10,
},
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -814,21 +773,6 @@ DEFAULT_CONFIG = {
"command_allowlist": [],
# User-defined quick commands that bypass the agent loop (type: exec only)
"quick_commands": {},
# Shell-script hooks — declarative bridge that invokes shell scripts
# on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
# subagent_stop, etc.). Each entry maps an event name to a list of
# {matcher, command, timeout} dicts. First registration of a new
# command prompts the user for consent; subsequent runs reuse the
# stored approval from ~/.hermes/shell-hooks-allowlist.json.
# See `website/docs/user-guide/features/hooks.md` for schema + examples.
"hooks": {},
# Auto-accept shell-hook registrations without a TTY prompt. Also
# toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1.
# Gateway / cron / non-interactive runs need this (or one of the other
# channels) to pick up newly-added hooks.
"hooks_auto_accept": False,
# Custom personalities — add your own entries here
# Supports string format: {"name": "system prompt"}
# Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
@@ -852,11 +796,6 @@ DEFAULT_CONFIG = {
# Wrap delivered cron responses with a header (task name) and footer
# ("The agent cannot see this message"). Set to false for clean output.
"wrap_response": True,
# Maximum number of due jobs to run in parallel per tick.
# null/0 = unbounded (limited only by thread count).
# 1 = serial (pre-v0.9 behaviour).
# Also overridable via HERMES_CRON_MAX_PARALLEL env var.
"max_parallel_jobs": None,
},
# execute_code settings — controls the tool used for programmatic tool calls.
@@ -890,7 +829,7 @@ DEFAULT_CONFIG = {
},
# Config schema version - bump this when adding new required fields
"_config_version": 22,
"_config_version": 21,
}
# =============================================================================
@@ -2255,6 +2194,7 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
if not issues:
return
import sys
lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
for ci in issues:
marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@@ -2269,6 +2209,7 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
These env vars are deprecated the canonical setting is terminal.cwd
in config.yaml. Prints a migration hint to stderr.
"""
import os, sys
messaging_cwd = os.environ.get("MESSAGING_CWD")
terminal_cwd_env = os.environ.get("TERMINAL_CWD")
@@ -2611,7 +2552,8 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
# Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
grandfathered: List[str] = []
try:
user_plugins_dir = get_hermes_home() / "plugins"
from hermes_constants import get_hermes_home as _ghome
user_plugins_dir = _ghome() / "plugins"
if user_plugins_dir.is_dir():
for child in sorted(user_plugins_dir.iterdir()):
if not child.is_dir():
@@ -3276,6 +3218,7 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
import sys
print(
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"
+1 -2
View File
@@ -30,7 +30,6 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
from hermes_cli.colors import Colors, color
from hermes_constants import OPENROUTER_MODELS_URL
from utils import base_url_host_matches
_PROVIDER_ENV_HINTS = (
@@ -953,7 +952,7 @@ def run_doctor(args):
_base = _to_openai_base_url(_base)
_url = (_base.rstrip("/") + "/models") if _base else _default_url
_headers = {"Authorization": f"Bearer {_key}"}
if base_url_host_matches(_base, "api.kimi.com"):
if "api.kimi.com" in _url.lower():
_headers["User-Agent"] = "KimiCLI/1.30.0"
_resp = httpx.get(
_url,
+1 -50
View File
@@ -3,7 +3,6 @@
from __future__ import annotations
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
@@ -15,26 +14,6 @@ from dotenv import load_dotenv
# pure ASCII (they become HTTP header values).
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
# Names we've already warned about during this process, so repeated
# load_hermes_dotenv() calls (user env + project env, gateway hot-reload,
# tests) don't spam the same warning multiple times.
_WARNED_KEYS: set[str] = set()
def _format_offending_chars(value: str, limit: int = 3) -> str:
"""Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
seen: list[str] = []
for ch in value:
if ord(ch) > 127:
label = f"U+{ord(ch):04X}"
if ch.isprintable():
label += f" ({ch!r})"
if label not in seen:
seen.append(label)
if len(seen) >= limit:
break
return ", ".join(seen)
def _sanitize_loaded_credentials() -> None:
"""Strip non-ASCII characters from credential env vars in os.environ.
@@ -42,42 +21,14 @@ def _sanitize_loaded_credentials() -> None:
Called after dotenv loads so the rest of the codebase never sees
non-ASCII API keys. Only touches env vars whose names end with
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
Emits a one-line warning to stderr when characters are stripped.
Silent stripping would mask copy-paste corruption (Unicode lookalike
glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque
provider-side "invalid API key" errors (see #6843).
"""
for key, value in list(os.environ.items()):
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
continue
try:
value.encode("ascii")
continue
except UnicodeEncodeError:
pass
cleaned = value.encode("ascii", errors="ignore").decode("ascii")
os.environ[key] = cleaned
if key in _WARNED_KEYS:
continue
_WARNED_KEYS.add(key)
stripped = len(value) - len(cleaned)
detail = _format_offending_chars(value) or "non-printable"
print(
f" Warning: {key} contained {stripped} non-ASCII character"
f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the "
f"key can be sent as an HTTP header.",
file=sys.stderr,
)
print(
" This usually means the key was copy-pasted from a PDF, "
"rich-text editor, or web page that substituted lookalike\n"
" Unicode glyphs for ASCII letters. If authentication fails "
"(e.g. \"API key not valid\"), re-copy the key from the\n"
" provider's dashboard and run `hermes setup` (or edit the "
".env file in a plain-text editor).",
file=sys.stderr,
)
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
+4
View File
@@ -994,6 +994,8 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
if not is_linux():
return None, "not supported on this platform"
import shutil
if not shutil.which("loginctl"):
return None, "loginctl not found"
@@ -1345,6 +1347,7 @@ def _ensure_linger_enabled() -> None:
return
import getpass
import shutil
username = getpass.getuser()
linger_file = Path(f"/var/lib/systemd/linger/{username}")
@@ -1653,6 +1656,7 @@ def get_launchd_label() -> str:
def _launchd_domain() -> str:
import os
return f"gui/{os.getuid()}"
-385
View File
@@ -1,385 +0,0 @@
"""hermes hooks — inspect and manage shell-script hooks.
Usage::
hermes hooks list
hermes hooks test <event> [--for-tool X] [--payload-file F]
hermes hooks revoke <command>
hermes hooks doctor
Consent records live under ``~/.hermes/shell-hooks-allowlist.json`` and
hook definitions come from the ``hooks:`` block in ``~/.hermes/config.yaml``
(the same config read by the CLI / gateway at startup).
This module is a thin CLI shell over :mod:`agent.shell_hooks`; every
shared concern (payload serialisation, response parsing, allowlist
format) lives there.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Optional
def hooks_command(args) -> None:
"""Entry point for ``hermes hooks`` — dispatches to the requested action."""
sub = getattr(args, "hooks_action", None)
if not sub:
print("Usage: hermes hooks {list|test|revoke|doctor}")
print("Run 'hermes hooks --help' for details.")
return
if sub in ("list", "ls"):
_cmd_list(args)
elif sub == "test":
_cmd_test(args)
elif sub in ("revoke", "remove", "rm"):
_cmd_revoke(args)
elif sub == "doctor":
_cmd_doctor(args)
else:
print(f"Unknown hooks subcommand: {sub}")
# ---------------------------------------------------------------------------
# list
# ---------------------------------------------------------------------------
def _cmd_list(_args) -> None:
from hermes_cli.config import load_config
from agent import shell_hooks
specs = shell_hooks.iter_configured_hooks(load_config())
if not specs:
print("No shell hooks configured in ~/.hermes/config.yaml.")
print("See `hermes hooks --help` or")
print(" website/docs/user-guide/features/hooks.md")
print("for the config schema and worked examples.")
return
by_event: Dict[str, List] = {}
for spec in specs:
by_event.setdefault(spec.event, []).append(spec)
allowlist = shell_hooks.load_allowlist()
approved = {
(e.get("event"), e.get("command"))
for e in allowlist.get("approvals", [])
if isinstance(e, dict)
}
print(f"Configured shell hooks ({len(specs)} total):\n")
for event in sorted(by_event.keys()):
print(f" [{event}]")
for spec in by_event[event]:
is_approved = (spec.event, spec.command) in approved
status = "✓ allowed" if is_approved else "✗ not allowlisted"
matcher_part = f" matcher={spec.matcher!r}" if spec.matcher else ""
print(
f" - {spec.command}{matcher_part} "
f"(timeout={spec.timeout}s, {status})"
)
if is_approved:
entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
if entry and entry.get("approved_at"):
print(f" approved_at: {entry['approved_at']}")
mtime_now = shell_hooks.script_mtime_iso(spec.command)
mtime_at = entry.get("script_mtime_at_approval")
if mtime_now and mtime_at and mtime_now > mtime_at:
print(
f" ⚠ script modified since approval "
f"(was {mtime_at}, now {mtime_now}) — "
f"run `hermes hooks doctor` to re-validate"
)
print()
# ---------------------------------------------------------------------------
# test
# ---------------------------------------------------------------------------
# Synthetic kwargs matching the real invoke_hook() call sites — these are
# passed verbatim to agent.shell_hooks.run_once(), which routes them through
# the same _serialize_payload() that production firings use. That way the
# stdin a script sees under `hermes hooks test` and `hermes hooks doctor`
# is identical in shape to what it will see at runtime.
_DEFAULT_PAYLOADS = {
"pre_tool_call": {
"tool_name": "terminal",
"args": {"command": "echo hello"},
"session_id": "test-session",
"task_id": "test-task",
"tool_call_id": "test-call",
},
"post_tool_call": {
"tool_name": "terminal",
"args": {"command": "echo hello"},
"session_id": "test-session",
"task_id": "test-task",
"tool_call_id": "test-call",
"result": '{"output": "hello"}',
},
"pre_llm_call": {
"session_id": "test-session",
"user_message": "What is the weather?",
"conversation_history": [],
"is_first_turn": True,
"model": "gpt-4",
"platform": "cli",
},
"post_llm_call": {
"session_id": "test-session",
"model": "gpt-4",
"platform": "cli",
},
"on_session_start": {"session_id": "test-session"},
"on_session_end": {"session_id": "test-session"},
"on_session_finalize": {"session_id": "test-session"},
"on_session_reset": {"session_id": "test-session"},
"pre_api_request": {
"session_id": "test-session",
"task_id": "test-task",
"platform": "cli",
"model": "claude-sonnet-4-6",
"provider": "anthropic",
"base_url": "https://api.anthropic.com",
"api_mode": "anthropic_messages",
"api_call_count": 1,
"message_count": 4,
"tool_count": 12,
"approx_input_tokens": 2048,
"request_char_count": 8192,
"max_tokens": 4096,
},
"post_api_request": {
"session_id": "test-session",
"task_id": "test-task",
"platform": "cli",
"model": "claude-sonnet-4-6",
"provider": "anthropic",
"base_url": "https://api.anthropic.com",
"api_mode": "anthropic_messages",
"api_call_count": 1,
"api_duration": 1.234,
"finish_reason": "stop",
"message_count": 4,
"response_model": "claude-sonnet-4-6",
"usage": {"input_tokens": 2048, "output_tokens": 512},
"assistant_content_chars": 1200,
"assistant_tool_call_count": 0,
},
"subagent_stop": {
"parent_session_id": "parent-sess",
"child_role": None,
"child_summary": "Synthetic summary for hooks test",
"child_status": "completed",
"duration_ms": 1234,
},
}
def _cmd_test(args) -> None:
from hermes_cli.config import load_config
from hermes_cli.plugins import VALID_HOOKS
from agent import shell_hooks
event = args.event
if event not in VALID_HOOKS:
print(f"Unknown event: {event!r}")
print(f"Valid events: {', '.join(sorted(VALID_HOOKS))}")
return
# Synthetic kwargs in the same shape invoke_hook() would pass. Merged
# with --for-tool (overrides tool_name) and --payload-file (extra kwargs).
payload = dict(_DEFAULT_PAYLOADS.get(event, {"session_id": "test-session"}))
if getattr(args, "for_tool", None):
payload["tool_name"] = args.for_tool
if getattr(args, "payload_file", None):
try:
custom = json.loads(Path(args.payload_file).read_text())
if isinstance(custom, dict):
payload.update(custom)
else:
print(f"Warning: {args.payload_file} is not a JSON object; ignoring")
except Exception as exc:
print(f"Error reading payload file: {exc}")
return
specs = shell_hooks.iter_configured_hooks(load_config())
specs = [s for s in specs if s.event == event]
if getattr(args, "for_tool", None):
specs = [
s for s in specs
if s.event not in ("pre_tool_call", "post_tool_call")
or s.matches_tool(args.for_tool)
]
if not specs:
print(f"No shell hooks configured for event: {event}")
if getattr(args, "for_tool", None):
print(f"(with matcher filter --for-tool={args.for_tool})")
return
print(f"Firing {len(specs)} hook(s) for event '{event}':\n")
for spec in specs:
print(f"{spec.command}")
result = shell_hooks.run_once(spec, payload)
_print_run_result(result)
print()
def _print_run_result(result: Dict[str, Any]) -> None:
if result.get("error"):
print(f" ✗ error: {result['error']}")
return
if result.get("timed_out"):
print(f" ✗ timed out after {result['elapsed_seconds']}s")
return
rc = result.get("returncode")
elapsed = result.get("elapsed_seconds", 0)
print(f" exit={rc} elapsed={elapsed}s")
stdout = (result.get("stdout") or "").strip()
stderr = (result.get("stderr") or "").strip()
if stdout:
print(f" stdout: {_truncate(stdout, 400)}")
if stderr:
print(f" stderr: {_truncate(stderr, 400)}")
parsed = result.get("parsed")
if parsed:
print(f" parsed (Hermes wire shape): {json.dumps(parsed)}")
else:
print(" parsed: <none — hook contributed nothing to the dispatcher>")
def _truncate(s: str, n: int) -> str:
return s if len(s) <= n else s[: n - 3] + "..."
# ---------------------------------------------------------------------------
# revoke
# ---------------------------------------------------------------------------
def _cmd_revoke(args) -> None:
from agent import shell_hooks
removed = shell_hooks.revoke(args.command)
if removed == 0:
print(f"No allowlist entry found for command: {args.command}")
return
print(f"Removed {removed} allowlist entry/entries for: {args.command}")
print(
"Note: currently running CLI / gateway processes keep their "
"already-registered callbacks until they restart."
)
# ---------------------------------------------------------------------------
# doctor
# ---------------------------------------------------------------------------
def _cmd_doctor(_args) -> None:
from hermes_cli.config import load_config
from agent import shell_hooks
specs = shell_hooks.iter_configured_hooks(load_config())
if not specs:
print("No shell hooks configured — nothing to check.")
return
print(f"Checking {len(specs)} configured shell hook(s)...\n")
problems = 0
for spec in specs:
print(f" [{spec.event}] {spec.command}")
problems += _doctor_one(spec, shell_hooks)
print()
if problems:
print(f"{problems} issue(s) found. Fix before relying on these hooks.")
else:
print("All shell hooks look healthy.")
def _doctor_one(spec, shell_hooks) -> int:
problems = 0
# 1. Script exists and is executable
if shell_hooks.script_is_executable(spec.command):
print(" ✓ script exists and is executable")
else:
problems += 1
print(" ✗ script missing or not executable "
"(chmod +x the file, or fix the path)")
# 2. Allowlist status
entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
if entry:
print(f" ✓ allowlisted (approved {entry.get('approved_at', '?')})")
else:
problems += 1
print(" ✗ not allowlisted — hook will NOT fire at runtime "
"(run with --accept-hooks once, or confirm at the TTY prompt)")
# 3. Mtime drift
if entry and entry.get("script_mtime_at_approval"):
mtime_now = shell_hooks.script_mtime_iso(spec.command)
mtime_at = entry["script_mtime_at_approval"]
if mtime_now and mtime_at and mtime_now > mtime_at:
problems += 1
print(f" ⚠ script modified since approval "
f"(was {mtime_at}, now {mtime_now}) — review changes, "
f"then `hermes hooks revoke` + re-approve to refresh")
elif mtime_now and mtime_at and mtime_now == mtime_at:
print(" ✓ script unchanged since approval")
# 4. Produces valid JSON for a synthetic payload — only when the entry
# is already allowlisted. Otherwise `hermes hooks doctor` would execute
# every script listed in a freshly-pulled config before the user has
# reviewed them, which directly contradicts the documented workflow
# ("spot newly-added hooks *before they register*").
if not entry:
print(" skipped JSON smoke test — not allowlisted yet. "
"Approve the hook first (via TTY prompt or --accept-hooks), "
"then re-run `hermes hooks doctor`.")
elif shell_hooks.script_is_executable(spec.command):
payload = _DEFAULT_PAYLOADS.get(spec.event, {"extra": {}})
result = shell_hooks.run_once(spec, payload)
if result.get("timed_out"):
problems += 1
print(f" ✗ timed out after {result['elapsed_seconds']}s "
f"on synthetic payload (timeout={spec.timeout}s)")
elif result.get("error"):
problems += 1
print(f" ✗ execution error: {result['error']}")
else:
rc = result.get("returncode")
elapsed = result.get("elapsed_seconds", 0)
stdout = (result.get("stdout") or "").strip()
if stdout:
try:
json.loads(stdout)
print(f" ✓ produced valid JSON on synthetic payload "
f"(exit={rc}, {elapsed}s)")
except json.JSONDecodeError:
problems += 1
print(f" ✗ stdout was not valid JSON (exit={rc}, "
f"{elapsed}s): {_truncate(stdout, 120)}")
else:
print(f" ✓ ran clean with empty stdout "
f"(exit={rc}, {elapsed}s) — hook is observer-only")
return problems
+42 -246
View File
@@ -51,19 +51,6 @@ import sys
from pathlib import Path
from typing import Optional
def _add_accept_hooks_flag(parser) -> None:
"""Attach the ``--accept-hooks`` flag. Shared across every agent
subparser so the flag works regardless of CLI position."""
parser.add_argument(
"--accept-hooks",
action="store_true",
default=argparse.SUPPRESS,
help=(
"Auto-approve unseen shell hooks without a TTY prompt "
"(equivalent to HERMES_ACCEPT_HOOKS=1 / hooks_auto_accept: true)."
),
)
def _require_tty(command_name: str) -> None:
"""Exit with a clear error if stdin is not a terminal.
@@ -193,7 +180,7 @@ import time as _time
from datetime import datetime
from hermes_cli import __version__, __release_date__
from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
@@ -618,6 +605,7 @@ def _exec_in_container(container_info: dict, cli_args: list):
container_info: dict with backend, container_name, exec_user, hermes_bin
cli_args: the original CLI arguments (everything after 'hermes')
"""
import shutil
backend = container_info["backend"]
container_name = container_info["container_name"]
@@ -1015,17 +1003,6 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
)
env.setdefault("HERMES_PYTHON", sys.executable)
env.setdefault("HERMES_CWD", os.getcwd())
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
# ~1.54GB depending on version and can fatal-OOM on long sessions with
# large transcripts / reasoning blobs. Token-level merge: respect any
# user-supplied --max-old-space-size (they may have set it higher) and
# avoid duplicating --expose-gc.
_tokens = env.get("NODE_OPTIONS", "").split()
if not any(t.startswith("--max-old-space-size=") for t in _tokens):
_tokens.append("--max-old-space-size=8192")
if "--expose-gc" not in _tokens:
_tokens.append("--expose-gc")
env["NODE_OPTIONS"] = " ".join(_tokens)
if resume_session_id:
env["HERMES_TUI_RESUME"] = resume_session_id
@@ -1180,6 +1157,8 @@ def cmd_gateway(args):
def cmd_whatsapp(args):
"""Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
_require_tty("whatsapp")
import subprocess
from pathlib import Path
from hermes_cli.config import get_env_value, save_env_value
print()
@@ -1288,27 +1267,16 @@ def cmd_whatsapp(args):
return
if not (bridge_dir / "node_modules").exists():
print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
npm = shutil.which("npm")
if not npm:
print(" ✗ npm not found on PATH — install Node.js first")
return
try:
result = subprocess.run(
[npm, "install", "--no-fund", "--no-audit", "--progress=false"],
cwd=str(bridge_dir),
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True,
)
except KeyboardInterrupt:
print("\n ✗ Install cancelled")
return
print("\n→ Installing WhatsApp bridge dependencies...")
result = subprocess.run(
["npm", "install"],
cwd=str(bridge_dir),
capture_output=True,
text=True,
timeout=120,
)
if result.returncode != 0:
err = (result.stderr or "").strip()
preview = "\n".join(err.splitlines()[-30:]) if err else "(no output)"
print(" ✗ npm install failed:")
print(preview)
print(f" ✗ npm install failed: {result.stderr}")
return
print(" ✓ Dependencies installed")
else:
@@ -1327,6 +1295,8 @@ def cmd_whatsapp(args):
except (EOFError, KeyboardInterrupt):
response = "n"
if response.lower() in ("y", "yes"):
import shutil
shutil.rmtree(session_dir, ignore_errors=True)
session_dir.mkdir(parents=True, exist_ok=True)
print(" ✓ Session cleared")
@@ -1422,6 +1392,8 @@ def select_provider_and_model(args=None):
# Read effective provider the same way the CLI does at startup:
# config.yaml model.provider > env var > auto-detect
import os
config_provider = None
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
@@ -1532,8 +1504,6 @@ def select_provider_and_model(args=None):
# Step 2: Provider-specific setup + model selection
if selected_provider == "openrouter":
_model_flow_openrouter(config, current_model)
elif selected_provider == "ai-gateway":
_model_flow_ai_gateway(config, current_model)
elif selected_provider == "nous":
_model_flow_nous(config, current_model, args=args)
elif selected_provider == "openai-codex":
@@ -1579,6 +1549,7 @@ def select_provider_and_model(args=None):
"kilocode",
"opencode-zen",
"opencode-go",
"ai-gateway",
"alibaba",
"huggingface",
"xiaomi",
@@ -2050,63 +2021,6 @@ def _model_flow_openrouter(config, current_model=""):
print("No change.")
def _model_flow_ai_gateway(config, current_model=""):
"""Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
from hermes_cli.auth import (
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import get_env_value, save_env_value
api_key = get_env_value("AI_GATEWAY_API_KEY")
if not api_key:
print("No Vercel AI Gateway API key configured.")
print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
print("Add a payment method to get $5 in free credits.")
print()
try:
import getpass
key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not key:
print("Cancelled.")
return
save_env_value("AI_GATEWAY_API_KEY", key)
print("API key saved.")
print()
from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider
models_list = ai_gateway_model_ids(force_refresh=True)
pricing = get_pricing_for_provider("ai-gateway", force_refresh=True)
selected = _prompt_model_selection(
models_list, current_model=current_model, pricing=pricing
)
if selected:
_save_model_choice(selected)
from hermes_cli.config import load_config, save_config
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "ai-gateway"
model["base_url"] = AI_GATEWAY_BASE_URL
model["api_mode"] = "chat_completions"
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via Vercel AI Gateway)")
else:
print("No change.")
def _model_flow_nous(config, current_model="", args=None):
"""Nous Portal provider: ensure logged in, then pick model."""
from hermes_cli.auth import (
@@ -2127,6 +2041,7 @@ def _model_flow_nous(config, current_model="", args=None):
save_env_value,
)
from hermes_cli.nous_subscription import prompt_enable_tool_gateway
import argparse
state = get_provider_auth_state("nous")
if not state or not state.get("access_token"):
@@ -2294,6 +2209,7 @@ def _model_flow_openai_codex(config, current_model=""):
DEFAULT_CODEX_BASE_URL,
)
from hermes_cli.codex_models import get_codex_model_ids
import argparse
status = get_codex_auth_status()
if not status.get("logged_in"):
@@ -3424,9 +3340,8 @@ def _model_flow_kimi(config, current_model=""):
# Step 3: Model selection — show appropriate models for the endpoint
if is_coding_plan:
# Coding Plan models (kimi-k2.6 first)
# Coding Plan models (kimi-k2.5 first)
model_list = [
"kimi-k2.6",
"kimi-k2.5",
"kimi-for-coding",
"kimi-k2-thinking",
@@ -4165,12 +4080,6 @@ def cmd_webhook(args):
webhook_command(args)
def cmd_hooks(args):
"""Shell-hook inspection and management."""
from hermes_cli.hooks import hooks_command
hooks_command(args)
def cmd_doctor(args):
"""Check configuration and dependencies."""
from hermes_cli.doctor import run_doctor
@@ -4280,7 +4189,9 @@ def _clear_bytecode_cache(root: Path) -> int:
]
if os.path.basename(dirpath) == "__pycache__":
try:
shutil.rmtree(dirpath)
import shutil as _shutil
_shutil.rmtree(dirpath)
removed += 1
except OSError:
pass
@@ -4319,6 +4230,8 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
tmp.replace(prompt_path)
# Poll for response
import time as _time
deadline = _time.monotonic() + timeout
while _time.monotonic() < deadline:
if response_path.exists():
@@ -4350,6 +4263,7 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
"""
if not (web_dir / "package.json").exists():
return True
import shutil
npm = shutil.which("npm")
if not npm:
@@ -4386,6 +4300,7 @@ def _update_via_zip(args):
Used on Windows when git file I/O is broken (antivirus, NTFS filter
drivers causing 'Invalid argument' errors on file creation).
"""
import shutil
import tempfile
import zipfile
from urllib.request import urlretrieve
@@ -4462,6 +4377,7 @@ def _update_via_zip(args):
# breaks on this machine, keep base deps and reinstall the remaining extras
# individually so update does not silently strip working capabilities.
print("→ Updating Python dependencies...")
import subprocess
uv_bin = shutil.which("uv")
if uv_bin:
@@ -5212,11 +5128,9 @@ def _install_hangup_protection(gateway_mode: bool = False):
# (2) Mirror output to update.log and wrap stdio for broken-pipe
# tolerance. Any failure here is non-fatal; we just skip the wrap.
try:
# Late-bound import so tests can monkeypatch
# hermes_cli.config.get_hermes_home to simulate setup failure.
from hermes_cli.config import get_hermes_home as _get_hermes_home
from hermes_cli.config import get_hermes_home
logs_dir = _get_hermes_home() / "logs"
logs_dir = get_hermes_home() / "logs"
logs_dir.mkdir(parents=True, exist_ok=True)
log_path = logs_dir / "update.log"
log_file = open(log_path, "a", buffering=1, encoding="utf-8")
@@ -5791,6 +5705,8 @@ def _cmd_update_impl(args, gateway_mode: bool):
# Verify the service actually survived the
# restart. systemctl restart returns 0 even
# if the new process crashes immediately.
import time as _time
_time.sleep(3)
verify = subprocess.run(
scope_cmd + ["is-active", svc_name],
@@ -6443,17 +6359,6 @@ For more help on a command:
default=False,
help="Run in an isolated git worktree (for parallel agents)",
)
parser.add_argument(
"--accept-hooks",
action="store_true",
default=False,
help=(
"Auto-approve any unseen shell hooks declared in config.yaml "
"without a TTY prompt. Equivalent to HERMES_ACCEPT_HOOKS=1 or "
"hooks_auto_accept: true in config.yaml. Use on CI / headless "
"runs that can't prompt."
),
)
parser.add_argument(
"--skills",
"-s",
@@ -6576,16 +6481,6 @@ For more help on a command:
default=argparse.SUPPRESS,
help="Run in an isolated git worktree (for parallel agents on the same repo)",
)
chat_parser.add_argument(
"--accept-hooks",
action="store_true",
default=argparse.SUPPRESS,
help=(
"Auto-approve any unseen shell hooks declared in config.yaml "
"without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
"hooks_auto_accept: in config.yaml)."
),
)
chat_parser.add_argument(
"--checkpoints",
action="store_true",
@@ -6705,8 +6600,6 @@ For more help on a command:
action="store_true",
help="Replace any existing gateway instance (useful for systemd)",
)
_add_accept_hooks_flag(gateway_run)
_add_accept_hooks_flag(gateway_parser)
# gateway start
gateway_start = gateway_subparsers.add_parser(
@@ -7071,7 +6964,6 @@ For more help on a command:
"run", help="Run a job on the next scheduler tick"
)
cron_run.add_argument("job_id", help="Job ID to trigger")
_add_accept_hooks_flag(cron_run)
cron_remove = cron_subparsers.add_parser(
"remove", aliases=["rm", "delete"], help="Remove a scheduled job"
@@ -7082,9 +6974,8 @@ For more help on a command:
cron_subparsers.add_parser("status", help="Check if cron scheduler is running")
# cron tick (mostly for debugging)
cron_tick = cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
_add_accept_hooks_flag(cron_tick)
_add_accept_hooks_flag(cron_parser)
cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
cron_parser.set_defaults(func=cmd_cron)
# =========================================================================
@@ -7151,67 +7042,6 @@ For more help on a command:
webhook_parser.set_defaults(func=cmd_webhook)
# =========================================================================
# hooks command — shell-hook inspection and management
# =========================================================================
hooks_parser = subparsers.add_parser(
"hooks",
help="Inspect and manage shell-script hooks",
description=(
"Inspect shell-script hooks declared in ~/.hermes/config.yaml, "
"test them against synthetic payloads, and manage the first-use "
"consent allowlist at ~/.hermes/shell-hooks-allowlist.json."
),
)
hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")
hooks_subparsers.add_parser(
"list", aliases=["ls"],
help="List configured hooks with matcher, timeout, and consent status",
)
_hk_test = hooks_subparsers.add_parser(
"test",
help="Fire every hook matching <event> against a synthetic payload",
)
_hk_test.add_argument(
"event",
help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
)
_hk_test.add_argument(
"--for-tool", dest="for_tool", default=None,
help=(
"Only fire hooks whose matcher matches this tool name "
"(used for pre_tool_call / post_tool_call)"
),
)
_hk_test.add_argument(
"--payload-file", dest="payload_file", default=None,
help=(
"Path to a JSON file whose contents are merged into the "
"synthetic payload before execution"
),
)
_hk_revoke = hooks_subparsers.add_parser(
"revoke", aliases=["remove", "rm"],
help="Remove a command's allowlist entries (takes effect on next restart)",
)
_hk_revoke.add_argument(
"command",
help="The exact command string to revoke (as declared in config.yaml)",
)
hooks_subparsers.add_parser(
"doctor",
help=(
"Check each configured hook: exec bit, allowlist, mtime drift, "
"JSON validity, and synthetic run timing"
),
)
hooks_parser.set_defaults(func=cmd_hooks)
# =========================================================================
# doctor command
# =========================================================================
@@ -7677,7 +7507,9 @@ Examples:
)
cmd_info["setup_fn"](plugin_parser)
except Exception as _exc:
logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
import logging as _log
_log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
# =========================================================================
# memory command
@@ -7883,7 +7715,6 @@ Examples:
action="store_true",
help="Enable verbose logging on stderr",
)
_add_accept_hooks_flag(mcp_serve_p)
mcp_add_p = mcp_sub.add_parser(
"add", help="Add an MCP server (discovery-first install)"
@@ -7922,8 +7753,6 @@ Examples:
)
mcp_login_p.add_argument("name", help="Server name to re-authenticate")
_add_accept_hooks_flag(mcp_parser)
def cmd_mcp(args):
from hermes_cli.mcp_config import mcp_command
@@ -8062,6 +7891,7 @@ Examples:
return
line = _json.dumps(data, ensure_ascii=False) + "\n"
if args.output == "-":
import sys
sys.stdout.write(line)
else:
@@ -8071,6 +7901,7 @@ Examples:
else:
sessions = db.export_all(source=args.source)
if args.output == "-":
import sys
for s in sessions:
sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
@@ -8141,6 +7972,8 @@ Examples:
# Launch hermes --resume <id> by replacing the current process
print(f"Resuming session: {selected_id}")
import shutil
hermes_bin = shutil.which("hermes")
if hermes_bin:
os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
@@ -8331,7 +8164,6 @@ Examples:
help="Run Hermes Agent as an ACP (Agent Client Protocol) server",
description="Start Hermes Agent in ACP mode for editor integration (VS Code, Zed, JetBrains)",
)
_add_accept_hooks_flag(acp_parser)
def cmd_acp(args):
"""Launch Hermes Agent as an ACP server."""
@@ -8605,42 +8437,6 @@ Examples:
cmd_version(args)
return
# Discover Python plugins and register shell hooks once, before any
# command that can fire lifecycle hooks. Both are idempotent; gated
# so introspection/management commands (hermes hooks list, cron
# list, gateway status, mcp add, ...) don't pay discovery cost or
# trigger consent prompts for hooks the user is still inspecting.
# Groups with mixed admin/CRUD vs. agent-running entries narrow via
# the nested subcommand (dest varies by parser).
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
_AGENT_SUBCOMMANDS = {
"cron": ("cron_command", {"run", "tick"}),
"gateway": ("gateway_command", {"run"}),
"mcp": ("mcp_action", {"serve"}),
}
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
if (
args.command in _AGENT_COMMANDS
or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
):
_accept_hooks = bool(getattr(args, "accept_hooks", False))
try:
from hermes_cli.plugins import discover_plugins
discover_plugins()
except Exception:
logger.debug(
"plugin discovery failed at CLI startup", exc_info=True,
)
try:
from hermes_cli.config import load_config
from agent.shell_hooks import register_from_config
register_from_config(load_config(), accept_hooks=_accept_hooks)
except Exception:
logger.debug(
"shell-hook registration failed at CLI startup",
exc_info=True,
)
# Handle top-level --resume / --continue as shortcut to chat
if (args.resume or args.continue_last) and args.command is None:
args.command = "chat"
-1
View File
@@ -1095,7 +1095,6 @@ def list_authenticated_providers(
"api_url": api_url,
})
seen_slugs.add(ep_name.lower())
seen_slugs.add(custom_provider_slug(display_name).lower())
_pair = (
str(display_name).strip().lower(),
str(api_url).strip().rstrip("/").lower(),
+25 -272
View File
@@ -32,7 +32,7 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
# Fallback OpenRouter snapshot used when the live catalog is unavailable.
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
("moonshotai/kimi-k2.6", "recommended"),
("moonshotai/kimi-k2.5", "recommended"),
("anthropic/claude-opus-4.7", ""),
("anthropic/claude-opus-4.6", ""),
("anthropic/claude-sonnet-4.6", ""),
@@ -68,31 +68,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
_openrouter_catalog_cache: list[tuple[str, str]] | None = None
# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
# OSS / open-weight models prioritized first, then closed-source by family.
# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
# zai/ and xai/ without hyphens).
VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
("moonshotai/kimi-k2.6", "recommended"),
("alibaba/qwen3.6-plus", ""),
("zai/glm-5.1", ""),
("minimax/minimax-m2.7", ""),
("anthropic/claude-sonnet-4.6", ""),
("anthropic/claude-opus-4.7", ""),
("anthropic/claude-opus-4.6", ""),
("anthropic/claude-haiku-4.5", ""),
("openai/gpt-5.4", ""),
("openai/gpt-5.4-mini", ""),
("openai/gpt-5.3-codex", ""),
("google/gemini-3.1-pro-preview", ""),
("google/gemini-3-flash", ""),
("google/gemini-3.1-flash-lite-preview", ""),
("xai/grok-4.20-reasoning", ""),
]
_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None
def _codex_curated_models() -> list[str]:
"""Derive the openai-codex curated list from codex_models.py.
@@ -106,7 +81,7 @@ def _codex_curated_models() -> list[str]:
_PROVIDER_MODELS: dict[str, list[str]] = {
"nous": [
"moonshotai/kimi-k2.6",
"moonshotai/kimi-k2.5",
"xiaomi/mimo-v2-pro",
"anthropic/claude-opus-4.7",
"anthropic/claude-opus-4.6",
@@ -190,13 +165,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
# (map to OpenRouter defaults — users get familiar picks on NIM)
"qwen/qwen3.5-397b-a17b",
"deepseek-ai/deepseek-v3.2",
"moonshotai/kimi-k2.6",
"moonshotai/kimi-k2.5",
"minimaxai/minimax-m2.5",
"z-ai/glm5",
"openai/gpt-oss-120b",
],
"kimi-coding": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-for-coding",
"kimi-k2-thinking",
@@ -205,14 +179,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"kimi-k2-0905-preview",
],
"kimi-coding-cn": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
],
"moonshot": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2-turbo-preview",
@@ -292,7 +264,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"big-pickle",
],
"opencode-go": [
"kimi-k2.6",
"kimi-k2.5",
"glm-5.1",
"glm-5",
@@ -300,8 +271,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"mimo-v2-omni",
"minimax-m2.7",
"minimax-m2.5",
"qwen3.6-plus",
"qwen3.5-plus",
],
"ai-gateway": [
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-haiku-4.5",
"openai/gpt-5",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
"google/gemini-3-pro-preview",
"google/gemini-3-flash",
"google/gemini-2.5-pro",
"google/gemini-2.5-flash",
"deepseek/deepseek-v3.2",
],
"kilocode": [
"anthropic/claude-opus-4.6",
@@ -335,7 +318,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"zai-org/GLM-5",
"XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking",
"moonshotai/Kimi-K2.6",
],
# AWS Bedrock — static fallback list used when dynamic discovery is
# unavailable (no boto3, no credentials, or API error). The agent
@@ -355,12 +337,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
],
}
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
# and the static fallback catalog (bare ids) stay in sync from a single
# source of truth.
_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
# ---------------------------------------------------------------------------
# Nous Portal free-model filtering
# ---------------------------------------------------------------------------
@@ -518,6 +494,8 @@ def check_nous_free_tier() -> bool:
Returns False (assume paid) on any error never blocks paying users.
"""
global _free_tier_cache
import time
now = time.monotonic()
if _free_tier_cache is not None:
cached_result, cached_at = _free_tier_cache
@@ -569,7 +547,6 @@ class ProviderEntry(NamedTuple):
CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
@@ -593,6 +570,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
]
@@ -688,31 +666,6 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
return False
def _openrouter_model_supports_tools(item: Any) -> bool:
"""Return True when the model's ``supported_parameters`` advertise tool calling.
hermes-agent is tool-calling-first every provider path assumes the model
can invoke tools. Models that don't advertise ``tools`` in their
``supported_parameters`` (e.g. image-only or completion-only models) cannot
be driven by the agent loop and would fail at the first tool call.
**Permissive when the field is missing.** Some OpenRouter-compatible gateways
(Nous Portal, private mirrors, older catalog snapshots) don't populate
``supported_parameters`` at all. Treat that as "unknown capability → allow"
so the picker doesn't silently empty for those users. Only hide models
whose ``supported_parameters`` is an explicit list that omits ``tools``.
Ported from Kilo-Org/kilocode#9068.
"""
if not isinstance(item, dict):
return True
params = item.get("supported_parameters")
if not isinstance(params, list):
# Field absent / malformed / None — be permissive.
return True
return "tools" in params
def fetch_openrouter_models(
timeout: float = 8.0,
*,
@@ -755,11 +708,6 @@ def fetch_openrouter_models(
live_item = live_by_id.get(preferred_id)
if live_item is None:
continue
# Hide models that don't advertise tool-calling support — hermes-agent
# requires it and surfacing them leads to immediate runtime failures
# when the user selects them. Ported from Kilo-Org/kilocode#9068.
if not _openrouter_model_supports_tools(live_item):
continue
desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
curated.append((preferred_id, desc))
@@ -777,93 +725,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
def _ai_gateway_model_is_free(pricing: Any) -> bool:
"""Return True if an AI Gateway model has $0 input AND output pricing."""
if not isinstance(pricing, dict):
return False
try:
return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
except (TypeError, ValueError):
return False
def fetch_ai_gateway_models(
timeout: float = 8.0,
*,
force_refresh: bool = False,
) -> list[tuple[str, str]]:
"""Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
global _ai_gateway_catalog_cache
if _ai_gateway_catalog_cache is not None and not force_refresh:
return list(_ai_gateway_catalog_cache)
from hermes_constants import AI_GATEWAY_BASE_URL
fallback = list(VERCEL_AI_GATEWAY_MODELS)
preferred_ids = [mid for mid, _ in fallback]
try:
req = urllib.request.Request(
f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
payload = json.loads(resp.read().decode())
except Exception:
return list(_ai_gateway_catalog_cache or fallback)
live_items = payload.get("data", [])
if not isinstance(live_items, list):
return list(_ai_gateway_catalog_cache or fallback)
live_by_id: dict[str, dict[str, Any]] = {}
for item in live_items:
if not isinstance(item, dict):
continue
mid = str(item.get("id") or "").strip()
if not mid:
continue
live_by_id[mid] = item
curated: list[tuple[str, str]] = []
for preferred_id in preferred_ids:
live_item = live_by_id.get(preferred_id)
if live_item is None:
continue
desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
curated.append((preferred_id, desc))
if not curated:
return list(_ai_gateway_catalog_cache or fallback)
# If the live catalog offers a free Moonshot model, auto-promote it to
# position #1 as "recommended" — dynamic discovery without a PR.
free_moonshot = next(
(
mid
for mid, item in live_by_id.items()
if mid.startswith("moonshotai/")
and _ai_gateway_model_is_free(item.get("pricing"))
),
None,
)
if free_moonshot:
curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot]
curated.insert(0, (free_moonshot, "recommended"))
else:
first_id, _ = curated[0]
curated[0] = (first_id, "recommended")
_ai_gateway_catalog_cache = curated
return list(curated)
def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
"""Return just the AI Gateway model-id strings."""
return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
# ---------------------------------------------------------------------------
@@ -1008,56 +869,6 @@ def fetch_models_with_pricing(
return result
def fetch_ai_gateway_pricing(
timeout: float = 8.0,
*,
force_refresh: bool = False,
) -> dict[str, dict[str, str]]:
"""Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing.
Vercel uses ``input`` / ``output`` field names; hermes's picker expects
``prompt`` / ``completion``. This translates. Cache read/write field names
already match.
"""
from hermes_constants import AI_GATEWAY_BASE_URL
cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
if not force_refresh and cache_key in _pricing_cache:
return _pricing_cache[cache_key]
try:
req = urllib.request.Request(
f"{cache_key}/models",
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
payload = json.loads(resp.read().decode())
except Exception:
_pricing_cache[cache_key] = {}
return {}
result: dict[str, dict[str, str]] = {}
for item in payload.get("data", []):
if not isinstance(item, dict):
continue
mid = item.get("id")
pricing = item.get("pricing")
if not (mid and isinstance(pricing, dict)):
continue
entry: dict[str, str] = {
"prompt": str(pricing.get("input", "")),
"completion": str(pricing.get("output", "")),
}
if pricing.get("input_cache_read"):
entry["input_cache_read"] = str(pricing["input_cache_read"])
if pricing.get("input_cache_write"):
entry["input_cache_write"] = str(pricing["input_cache_write"])
result[mid] = entry
_pricing_cache[cache_key] = result
return result
def _resolve_openrouter_api_key() -> str:
"""Best-effort OpenRouter API key for pricing fetch."""
return os.getenv("OPENROUTER_API_KEY", "").strip()
@@ -1076,7 +887,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:
def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
"""Return live pricing for providers that support it (openrouter, nous, ai-gateway)."""
"""Return live pricing for providers that support it (openrouter, nous)."""
normalized = normalize_provider(provider)
if normalized == "openrouter":
return fetch_models_with_pricing(
@@ -1084,8 +895,6 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
base_url="https://openrouter.ai/api",
force_refresh=force_refresh,
)
if normalized == "ai-gateway":
return fetch_ai_gateway_pricing(force_refresh=force_refresh)
if normalized == "nous":
api_key, base_url = _resolve_nous_pricing_credentials()
if base_url:
@@ -1290,6 +1099,7 @@ def detect_provider_for_model(
from hermes_cli.auth import PROVIDER_REGISTRY
pconfig = PROVIDER_REGISTRY.get(direct_match)
if pconfig:
import os
for env_var in pconfig.api_key_env_vars:
if os.getenv(env_var, "").strip():
has_creds = True
@@ -2426,70 +2236,13 @@ def validate_requested_model(
except Exception:
pass # Fall through to generic warning
# Static-catalog fallback: when the /models probe was unreachable,
# validate against the curated list from provider_model_ids() — same
# pattern as the openai-codex and minimax branches above. This fixes
# /model switches in the gateway for providers like opencode-go and
# opencode-zen whose /models endpoint returns 404 against the HTML
# marketing site. Without this block, validate_requested_model would
# reject every model on such providers, switch_model() would return
# success=False, and the gateway would never write to
# _session_model_overrides.
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
try:
catalog_models = provider_model_ids(normalized)
except Exception:
catalog_models = []
if catalog_models:
catalog_lower = {m.lower(): m for m in catalog_models}
if requested_for_lookup.lower() in catalog_lower:
return {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
catalog_lower_list = list(catalog_lower.keys())
auto = get_close_matches(
requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
)
if auto:
corrected = catalog_lower[auto[0]]
return {
"accepted": True,
"persist": True,
"recognized": True,
"corrected_model": corrected,
"message": f"Auto-corrected `{requested}` → `{corrected}`",
}
suggestions = get_close_matches(
requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(
f"`{catalog_lower[s]}`" for s in suggestions
)
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in the {provider_label} curated catalog "
f"and the /models endpoint was unreachable.{suggestion_text}"
f"\n The model may still work if it exists on the provider."
),
}
# No catalog available — accept with a warning, matching the comment's
# stated intent ("Accept and persist, but warn").
return {
"accepted": True,
"persist": True,
"accepted": False,
"persist": False,
"recognized": False,
"message": (
f"Note: could not reach the {provider_label} API to validate `{requested}`. "
f"Could not reach the {provider_label} API to validate `{requested}`. "
f"If the service isn't down, this model may not be valid."
),
}
+4 -4
View File
@@ -10,7 +10,6 @@ from hermes_cli.auth import get_nous_auth_status
from hermes_cli.config import get_env_value, load_config
from tools.managed_tool_gateway import is_managed_tool_gateway_ready
from tools.tool_backend_helpers import (
fal_key_is_configured,
has_direct_modal_credentials,
managed_nous_tools_enabled,
normalize_browser_cloud_provider,
@@ -272,7 +271,7 @@ def get_nous_subscription_features(
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
direct_fal = fal_key_is_configured()
direct_fal = bool(get_env_value("FAL_KEY"))
direct_openai_tts = bool(resolve_openai_audio_api_key())
direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@@ -521,7 +520,7 @@ def apply_nous_managed_defaults(
browser_cfg["cloud_provider"] = "browser-use"
changed.add("browser")
if "image_gen" in selected_toolsets and not fal_key_is_configured():
if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
changed.add("image_gen")
return changed
@@ -549,7 +548,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
or get_env_value("TAVILY_API_KEY")
or get_env_value("EXA_API_KEY")
),
"image_gen": fal_key_is_configured(),
"image_gen": bool(get_env_value("FAL_KEY")),
"tts": bool(
resolve_openai_audio_api_key()
or get_env_value("ELEVENLABS_API_KEY")
@@ -587,6 +586,7 @@ def get_gateway_eligible_tools(
return [], [], []
if config is None:
from hermes_cli.config import load_config
config = load_config() or {}
# Quick provider check without the heavy get_nous_subscription_features call
-1
View File
@@ -70,7 +70,6 @@ VALID_HOOKS: Set[str] = {
"on_session_end",
"on_session_finalize",
"on_session_reset",
"subagent_stop",
}
ENTRY_POINTS_GROUP = "hermes_agent.plugins"
+3 -6
View File
@@ -23,8 +23,6 @@ import logging
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
from utils import base_url_host_matches, base_url_hostname
logger = logging.getLogger(__name__)
@@ -436,12 +434,11 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
# URL-based heuristics for custom / unknown providers
if base_url:
url_lower = base_url.rstrip("/").lower()
hostname = base_url_hostname(base_url)
if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
return "anthropic_messages"
if hostname == "api.openai.com":
if "api.openai.com" in url_lower:
return "codex_responses"
if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
return "bedrock_converse"
return "chat_completions"
+7 -12
View File
@@ -29,7 +29,6 @@ from hermes_cli.auth import (
)
from hermes_cli.config import get_compatible_custom_providers, load_config
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname
def _normalize_custom_provider_name(value: str) -> str:
@@ -48,10 +47,9 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
``chat_completions``.
"""
normalized = (base_url or "").strip().lower().rstrip("/")
hostname = base_url_hostname(base_url)
if hostname == "api.x.ai":
if "api.x.ai" in normalized:
return "codex_responses"
if hostname == "api.openai.com":
if "api.openai.com" in normalized and "openrouter" not in normalized:
return "codex_responses"
if normalized.endswith("/anthropic"):
return "anthropic_messages"
@@ -482,7 +480,7 @@ def _resolve_openrouter_runtime(
# When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer
# OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
# provider (issues #420, #560).
_is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
_is_openrouter_url = "openrouter.ai" in base_url
if _is_openrouter_url:
api_key_candidates = [
explicit_api_key,
@@ -492,12 +490,8 @@ def _resolve_openrouter_runtime(
else:
# Custom endpoint: use api_key from config when using config base_url (#1760).
# When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
# the canonical env var for ollama.com authentication. Match on
# HOST, not substring — a custom base_url whose path contains
# "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
# hostname is a look-alike (ollama.com.attacker.test) must not
# receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
# the canonical env var for ollama.com authentication.
_is_ollama_url = "ollama.com" in base_url.lower()
api_key_candidates = [
explicit_api_key,
(cfg_api_key if use_config_base_url else ""),
@@ -910,7 +904,8 @@ def resolve_runtime_provider(
code="no_aws_credentials",
)
# Read bedrock-specific config from config.yaml
_bedrock_cfg = load_config().get("bedrock", {})
from hermes_cli.config import load_config as _load_bedrock_config
_bedrock_cfg = _load_bedrock_config().get("bedrock", {})
# Region priority: config.yaml bedrock.region → env var → us-east-1
region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
+56 -142
View File
@@ -22,7 +22,6 @@ from typing import Optional, Dict, Any
from hermes_cli.nous_subscription import get_nous_subscription_features
from tools.tool_backend_helpers import managed_nous_tools_enabled
from utils import base_url_hostname
from hermes_constants import get_optional_skills_dir
logger = logging.getLogger(__name__)
@@ -94,15 +93,15 @@ _DEFAULT_PROVIDER_MODELS = {
"gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
],
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"kimi-coding-cn": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
"minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
"minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
"opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
"huggingface": [
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -434,6 +433,7 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
elif tts_provider == "neutts":
try:
import importlib.util
neutts_ok = importlib.util.find_spec("neutts") is not None
except Exception:
neutts_ok = False
@@ -441,16 +441,6 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
else:
tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
elif tts_provider == "kittentts":
try:
import importlib.util
kittentts_ok = importlib.util.find_spec("kittentts") is not None
except Exception:
kittentts_ok = False
if kittentts_ok:
tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
else:
tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
else:
tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
@@ -813,8 +803,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
elif _vision_idx == 1: # OpenAI-compatible endpoint
_base_url = prompt(" Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
_api_key_label = " API key"
_is_native_openai = base_url_hostname(_base_url) == "api.openai.com"
if _is_native_openai:
if "api.openai.com" in _base_url.lower():
_api_key_label = " OpenAI API key"
_oai_key = prompt(_api_key_label, password=True).strip()
if _oai_key:
@@ -822,7 +811,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
# Save vision base URL to config (not .env — only secrets go there)
_vaux = config.setdefault("auxiliary", {}).setdefault("vision", {})
_vaux["base_url"] = _base_url
if _is_native_openai:
if "api.openai.com" in _base_url.lower():
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
@@ -858,6 +847,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
def _check_espeak_ng() -> bool:
"""Check if espeak-ng is installed."""
import shutil
return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None
@@ -911,31 +901,6 @@ def _install_neutts_deps() -> bool:
return False
def _install_kittentts_deps() -> bool:
"""Install KittenTTS dependencies with user approval. Returns True on success."""
import subprocess
import sys
wheel_url = (
"https://github.com/KittenML/KittenTTS/releases/download/"
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
)
print()
print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
print()
try:
subprocess.run(
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
check=True, timeout=300,
)
print_success("kittentts installed successfully")
return True
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
print_error(f"Failed to install kittentts: {e}")
print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
return False
def _setup_tts_provider(config: dict):
"""Interactive TTS provider selection with install flow for NeuTTS."""
tts_config = config.get("tts", {})
@@ -951,7 +916,6 @@ def _setup_tts_provider(config: dict):
"mistral": "Mistral Voxtral TTS",
"gemini": "Google Gemini TTS",
"neutts": "NeuTTS",
"kittentts": "KittenTTS",
}
current_label = provider_labels.get(current_provider, current_provider)
@@ -975,10 +939,9 @@ def _setup_tts_provider(config: dict):
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
"NeuTTS (local on-device, free, ~300MB model download)",
"KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
]
)
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
choices.append(f"Keep current ({current_label})")
keep_current_idx = len(choices) - 1
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -999,6 +962,7 @@ def _setup_tts_provider(config: dict):
if selected == "neutts":
# Check if already installed
try:
import importlib.util
already_installed = importlib.util.find_spec("neutts") is not None
except Exception:
already_installed = False
@@ -1097,29 +1061,6 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "kittentts":
# Check if already installed
try:
import importlib.util
already_installed = importlib.util.find_spec("kittentts") is not None
except Exception:
already_installed = False
if already_installed:
print_success("KittenTTS is already installed")
else:
print()
print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
print()
if prompt_yes_no("Install KittenTTS now?", True):
if not _install_kittentts_deps():
print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
selected = "edge"
else:
print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
selected = "edge"
# Save the selection
if "tts" not in config:
config["tts"] = {}
@@ -1141,6 +1082,8 @@ def setup_tts(config: dict):
def setup_terminal_backend(config: dict):
"""Configure the terminal execution backend."""
import platform as _platform
import shutil
print_header("Terminal Backend")
print_info("Choose where Hermes runs shell commands and code.")
print_info("This affects tool execution, file access, and isolation.")
@@ -2415,74 +2358,6 @@ def setup_tools(config: dict, first_install: bool = False):
# =============================================================================
def _model_section_has_credentials(config: dict) -> bool:
"""Return True when any known inference provider has usable credentials.
Sources of truth:
* ``PROVIDER_REGISTRY`` in ``hermes_cli.auth`` lists every supported
provider along with its ``api_key_env_vars``.
* ``active_provider`` in the auth store covers OAuth device-code /
external-OAuth providers (Nous, Codex, Qwen, Gemini CLI, ...).
* The legacy OpenRouter aggregator env vars, which route generic
``OPENAI_API_KEY`` / ``OPENROUTER_API_KEY`` values through OpenRouter.
"""
try:
from hermes_cli.auth import get_active_provider
if get_active_provider():
return True
except Exception:
pass
try:
from hermes_cli.auth import PROVIDER_REGISTRY
except Exception:
PROVIDER_REGISTRY = {} # type: ignore[assignment]
def _has_key(pconfig) -> bool:
for env_var in pconfig.api_key_env_vars:
# CLAUDE_CODE_OAUTH_TOKEN is set by Claude Code itself, not by
# the user — mirrors is_provider_explicitly_configured in auth.py.
if env_var == "CLAUDE_CODE_OAUTH_TOKEN":
continue
if get_env_value(env_var):
return True
return False
# Prefer the provider declared in config.yaml, avoids false positives
# from stray env vars (GH_TOKEN, etc.) when the user has already picked
# a different provider.
model_cfg = config.get("model") if isinstance(config, dict) else None
if isinstance(model_cfg, dict):
provider_id = (model_cfg.get("provider") or "").strip().lower()
if provider_id in PROVIDER_REGISTRY:
if _has_key(PROVIDER_REGISTRY[provider_id]):
return True
if provider_id == "openrouter":
for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
if get_env_value(env_var):
return True
# OpenRouter aggregator fallback (no provider declared in config).
for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
if get_env_value(env_var):
return True
for pid, pconfig in PROVIDER_REGISTRY.items():
# Skip copilot in auto-detect: GH_TOKEN / GITHUB_TOKEN are
# commonly set for git tooling. Mirrors resolve_provider in auth.py.
if pid == "copilot":
continue
if _has_key(pconfig):
return True
return False
def _gateway_platform_short_label(label: str) -> str:
"""Strip trailing parenthetical qualifiers from a gateway platform label."""
base = label.split("(", 1)[0].strip()
return base or label
def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]:
"""Return a short summary if a setup section is already configured, else None.
@@ -2491,7 +2366,20 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
so that test patches on ``setup_mod.get_env_value`` take effect.
"""
if section_key == "model":
if not _model_section_has_credentials(config):
has_key = bool(
get_env_value("OPENROUTER_API_KEY")
or get_env_value("OPENAI_API_KEY")
or get_env_value("ANTHROPIC_API_KEY")
)
if not has_key:
# Check for OAuth providers
try:
from hermes_cli.auth import get_active_provider
if get_active_provider():
has_key = True
except Exception:
pass
if not has_key:
return None
model = config.get("model")
if isinstance(model, str) and model.strip():
@@ -2509,11 +2397,37 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
return f"max turns: {max_turns}"
elif section_key == "gateway":
platforms = [
_gateway_platform_short_label(label)
for label, env_var, _ in _GATEWAY_PLATFORMS
if get_env_value(env_var)
]
platforms = []
if get_env_value("TELEGRAM_BOT_TOKEN"):
platforms.append("Telegram")
if get_env_value("DISCORD_BOT_TOKEN"):
platforms.append("Discord")
if get_env_value("SLACK_BOT_TOKEN"):
platforms.append("Slack")
if get_env_value("SIGNAL_ACCOUNT"):
platforms.append("Signal")
if get_env_value("EMAIL_ADDRESS"):
platforms.append("Email")
if get_env_value("TWILIO_ACCOUNT_SID"):
platforms.append("SMS")
if get_env_value("MATRIX_ACCESS_TOKEN") or get_env_value("MATRIX_PASSWORD"):
platforms.append("Matrix")
if get_env_value("MATTERMOST_TOKEN"):
platforms.append("Mattermost")
if get_env_value("WHATSAPP_PHONE_NUMBER_ID"):
platforms.append("WhatsApp")
if get_env_value("DINGTALK_CLIENT_ID"):
platforms.append("DingTalk")
if get_env_value("FEISHU_APP_ID"):
platforms.append("Feishu")
if get_env_value("WECOM_BOT_ID"):
platforms.append("WeCom")
if get_env_value("WEIXIN_ACCOUNT_ID"):
platforms.append("Weixin")
if get_env_value("BLUEBUBBLES_SERVER_URL"):
platforms.append("BlueBubbles")
if get_env_value("WEBHOOK_ENABLED"):
platforms.append("Webhooks")
if platforms:
return ", ".join(platforms)
return None # No platforms configured — section must run
+1 -1
View File
@@ -127,7 +127,7 @@ TIPS = [
# --- Tools & Capabilities ---
"execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
"delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
"delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
"web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
"search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
"patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
+5 -48
View File
@@ -24,8 +24,7 @@ from hermes_cli.nous_subscription import (
apply_nous_managed_defaults,
get_nous_subscription_features,
)
from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
from utils import base_url_hostname
from tools.tool_backend_helpers import managed_nous_tools_enabled
logger = logging.getLogger(__name__)
@@ -182,14 +181,6 @@ TOOL_CATEGORIES = {
],
"tts_provider": "gemini",
},
{
"name": "KittenTTS",
"badge": "local · free",
"tag": "Lightweight local ONNX TTS (~25MB), no API key",
"env_vars": [],
"tts_provider": "kittentts",
"post_setup": "kittentts",
},
],
},
"web": {
@@ -431,36 +422,6 @@ def _run_post_setup(post_setup_key: str):
_print_warning(" Node.js not found. Install Camofox via Docker:")
_print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
elif post_setup_key == "kittentts":
try:
__import__("kittentts")
_print_success(" kittentts is already installed")
return
except ImportError:
pass
import subprocess
_print_info(" Installing kittentts (~25-80MB model, CPU-only)...")
wheel_url = (
"https://github.com/KittenML/KittenTTS/releases/download/"
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
)
try:
result = subprocess.run(
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
capture_output=True, text=True, timeout=300,
)
if result.returncode == 0:
_print_success(" kittentts installed")
_print_info(" Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
_print_info(" Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
else:
_print_warning(" kittentts install failed:")
_print_info(f" {result.stderr.strip()[:300]}")
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
except subprocess.TimeoutExpired:
_print_warning(" kittentts install timed out (>5min)")
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
elif post_setup_key == "rl_training":
try:
__import__("tinker_atropos")
@@ -585,10 +546,6 @@ def _get_platform_tools(
ts_tools = set(resolve_toolset(ts_key))
if ts_tools and ts_tools.issubset(all_tool_names):
enabled_toolsets.add(ts_key)
default_off = set(_DEFAULT_OFF_TOOLSETS)
if platform in default_off:
default_off.remove(platform)
enabled_toolsets -= default_off
# Plugin toolsets: enabled by default unless explicitly disabled.
# A plugin toolset is "known" for a platform once `hermes tools`
@@ -876,7 +833,7 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
browser_cfg = config.get("browser", {})
return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
if ts_key == "image_gen":
return not fal_key_is_configured()
return not get_env_value("FAL_KEY")
return not _toolset_has_keys(ts_key, config)
@@ -1218,17 +1175,17 @@ def _configure_simple_requirements(ts_key: str):
_print_warning(" Skipped")
elif idx == 1:
base_url = _prompt(" OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
is_native_openai = base_url_hostname(base_url) == "api.openai.com"
key_label = " OPENAI_API_KEY" if is_native_openai else " API key"
key_label = " OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else " API key"
api_key = _prompt(key_label, password=True)
if api_key and api_key.strip():
save_env_value("OPENAI_API_KEY", api_key.strip())
# Save vision base URL to config (not .env — only secrets go there)
from hermes_cli.config import load_config, save_config
_cfg = load_config()
_aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
_aux["base_url"] = base_url
save_config(_cfg)
if is_native_openai:
if "api.openai.com" in base_url.lower():
save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
_print_success(" Saved")
else:
+3 -223
View File
@@ -16,7 +16,6 @@ import json
import logging
import os
import secrets
import subprocess
import sys
import threading
import time
@@ -115,91 +114,6 @@ def _require_token(request: Request) -> None:
raise HTTPException(status_code=401, detail="Unauthorized")
# Accepted Host header values for loopback binds. DNS rebinding attacks
# point a victim browser at an attacker-controlled hostname (evil.test)
# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin
# checks because the browser now considers evil.test and our dashboard
# "same origin". Validating the Host header at the app layer rejects any
# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
_LOOPBACK_HOST_VALUES: frozenset = frozenset({
"localhost", "127.0.0.1", "::1",
})
def _is_accepted_host(host_header: str, bound_host: str) -> bool:
"""True if the Host header targets the interface we bound to.
Accepts:
- Exact bound host (with or without port suffix)
- Loopback aliases when bound to loopback
- Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback,
no protection possible at this layer)
"""
if not host_header:
return False
# Strip port suffix. IPv6 addresses use bracket notation:
# [::1] — no port
# [::1]:9119 — with port
# Plain hosts/v4:
# localhost:9119
# 127.0.0.1:9119
h = host_header.strip()
if h.startswith("["):
# IPv6 bracketed — port (if any) follows "]:"
close = h.find("]")
if close != -1:
host_only = h[1:close] # strip brackets
else:
host_only = h.strip("[]")
else:
host_only = h.rsplit(":", 1)[0] if ":" in h else h
host_only = host_only.lower()
# 0.0.0.0 bind means operator explicitly opted into all-interfaces
# (requires --insecure per web_server.start_server). No Host-layer
# defence can protect that mode; rely on operator network controls.
if bound_host in ("0.0.0.0", "::"):
return True
# Loopback bind: accept the loopback names
bound_lc = bound_host.lower()
if bound_lc in _LOOPBACK_HOST_VALUES:
return host_only in _LOOPBACK_HOST_VALUES
# Explicit non-loopback bind: require exact host match
return host_only == bound_lc
@app.middleware("http")
async def host_header_middleware(request: Request, call_next):
"""Reject requests whose Host header doesn't match the bound interface.
Defends against DNS rebinding: a victim browser on a localhost
dashboard is tricked into fetching from an attacker hostname that
TTL-flips to 127.0.0.1. CORS and same-origin checks don't help —
the browser now treats the attacker origin as same-origin with the
dashboard. Host-header validation at the app layer catches it.
See GHSA-ppp5-vxwm-4cf7.
"""
# Store the bound host on app.state so this middleware can read it —
# set by start_server() at listen time.
bound_host = getattr(app.state, "bound_host", None)
if bound_host:
host_header = request.headers.get("host", "")
if not _is_accepted_host(host_header, bound_host):
return JSONResponse(
status_code=400,
content={
"detail": (
"Invalid Host header. Dashboard requests must use "
"the hostname the server was bound to."
),
},
)
return await call_next(request)
@app.middleware("http")
async def auth_middleware(request: Request, call_next):
"""Require the session token on all /api/ routes except the public list."""
@@ -562,138 +476,6 @@ async def get_status():
}
# ---------------------------------------------------------------------------
# Gateway + update actions (invoked from the Status page).
#
# Both commands are spawned as detached subprocesses so the HTTP request
# returns immediately. stdin is closed (``DEVNULL``) so any stray ``input()``
# calls fail fast with EOF rather than hanging forever. stdout/stderr are
# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
# the dashboard can tail them back to the user.
# ---------------------------------------------------------------------------
_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
# Short ``name`` (from the URL) → absolute log file path.
_ACTION_LOG_FILES: Dict[str, str] = {
"gateway-restart": "gateway-restart.log",
"hermes-update": "hermes-update.log",
}
# ``name`` → most recently spawned Popen handle. Used so ``status`` can
# report liveness and exit code without shelling out to ``ps``.
_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
"""Spawn ``hermes <subcommand>`` detached and record the Popen handle.
Uses the running interpreter's ``hermes_cli.main`` module so the action
inherits the same venv/PYTHONPATH the web server is using.
"""
log_file_name = _ACTION_LOG_FILES[name]
_ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
log_path = _ACTION_LOG_DIR / log_file_name
log_file = open(log_path, "ab", buffering=0)
log_file.write(
f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
)
cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
popen_kwargs: Dict[str, Any] = {
"cwd": str(PROJECT_ROOT),
"stdin": subprocess.DEVNULL,
"stdout": log_file,
"stderr": subprocess.STDOUT,
"env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
}
if sys.platform == "win32":
popen_kwargs["creationflags"] = (
subprocess.CREATE_NEW_PROCESS_GROUP # type: ignore[attr-defined]
| getattr(subprocess, "DETACHED_PROCESS", 0)
)
else:
popen_kwargs["start_new_session"] = True
proc = subprocess.Popen(cmd, **popen_kwargs)
_ACTION_PROCS[name] = proc
return proc
def _tail_lines(path: Path, n: int) -> List[str]:
"""Return the last ``n`` lines of ``path``. Reads the whole file — fine
for our small per-action logs. Binary-decoded with ``errors='replace'``
so log corruption doesn't 500 the endpoint."""
if not path.exists():
return []
try:
text = path.read_text(errors="replace")
except OSError:
return []
lines = text.splitlines()
return lines[-n:] if n > 0 else lines
@app.post("/api/gateway/restart")
async def restart_gateway():
"""Kick off a ``hermes gateway restart`` in the background."""
try:
proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
except Exception as exc:
_log.exception("Failed to spawn gateway restart")
raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
return {
"ok": True,
"pid": proc.pid,
"name": "gateway-restart",
}
@app.post("/api/hermes/update")
async def update_hermes():
"""Kick off ``hermes update`` in the background."""
try:
proc = _spawn_hermes_action(["update"], "hermes-update")
except Exception as exc:
_log.exception("Failed to spawn hermes update")
raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
return {
"ok": True,
"pid": proc.pid,
"name": "hermes-update",
}
@app.get("/api/actions/{name}/status")
async def get_action_status(name: str, lines: int = 200):
"""Tail an action log and report whether the process is still running."""
log_file_name = _ACTION_LOG_FILES.get(name)
if log_file_name is None:
raise HTTPException(status_code=404, detail=f"Unknown action: {name}")
log_path = _ACTION_LOG_DIR / log_file_name
tail = _tail_lines(log_path, min(max(lines, 1), 2000))
proc = _ACTION_PROCS.get(name)
if proc is None:
running = False
exit_code: Optional[int] = None
pid: Optional[int] = None
else:
exit_code = proc.poll()
running = exit_code is None
pid = proc.pid
return {
"name": name,
"running": running,
"exit_code": exit_code,
"pid": pid,
"lines": tail,
}
@app.get("/api/sessions")
async def get_sessions(limit: int = 20, offset: int = 0):
try:
@@ -2541,15 +2323,13 @@ def start_server(
"authentication. Only use on trusted networks.", host,
)
# Record the bound host so host_header_middleware can validate incoming
# Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
app.state.bound_host = host
if open_browser:
import threading
import webbrowser
def _open():
time.sleep(1.0)
import time as _t
_t.sleep(1.0)
webbrowser.open(f"http://{host}:{port}")
threading.Thread(target=_open, daemon=True).start()
+3 -10
View File
@@ -47,19 +47,12 @@ def _effective_temperature_for_model(
model: str,
base_url: Optional[str] = None,
) -> Optional[float]:
"""Return a fixed temperature for models with strict sampling contracts.
Returns ``None`` when the model manages temperature server-side (Kimi);
callers must omit the ``temperature`` kwarg entirely in that case.
"""
"""Return a fixed temperature for models with strict sampling contracts."""
try:
from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
from agent.auxiliary_client import _fixed_temperature_for_model
except Exception:
return None
result = _fixed_temperature_for_model(model, base_url)
if result is OMIT_TEMPERATURE:
return None # caller must omit temperature
return result
return _fixed_temperature_for_model(model, base_url)
+1 -2
View File
@@ -7,8 +7,7 @@
let
hermes-agent = inputs.self.packages.${system}.default;
hermes-tui = inputs.self.packages.${system}.tui;
hermes-web = inputs.self.packages.${system}.web;
packages = [ hermes-agent hermes-tui hermes-web ];
packages = [ hermes-agent hermes-tui ];
in {
devShells.default = pkgs.mkShell {
inputsFrom = packages;
-217
View File
@@ -1,217 +0,0 @@
# nix/lib.nix — Shared helpers for nix stuff
{ pkgs, npm-lockfile-fix }:
{
# Returns a buildNpmPackage-compatible attrs set that provides:
# patchPhase — ensures lockfile has exactly one trailing newline
# nativeBuildInputs — [ updateLockfileScript ] (list, prepend with ++ for more)
# passthru.devShellHook — stamp-checked npm install + hash auto-update
# passthru.npmLockfile — metadata for mkFixLockfiles
#
# NOTE: npmConfigHook runs `diff` between the source lockfile and the
# npm-deps cache lockfile. fetchNpmDeps preserves whatever trailing
# newlines the lockfile has. The patchPhase normalizes to exactly one
# trailing newline so both sides always match.
#
# Usage:
# npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
# pkgs.buildNpmPackage (npm // { ... } # or:
# pkgs.buildNpmPackage ({ ... } // npm)
mkNpmPassthru =
{
folder, # repo-relative folder with package.json, e.g. "ui-tui"
attr, # flake package attr, e.g. "tui"
pname, # e.g. "hermes-tui"
nixFile ? "nix/${attr}.nix", # defaults to nix/<attr>.nix
}:
{
patchPhase = ''
runHook prePatch
# Normalize trailing newlines so source and npm-deps always match,
# regardless of what fetchNpmDeps preserves.
sed -i -z 's/\n*$/\n/' package-lock.json
# Make npmConfigHook's byte-for-byte diff newline-agnostic by
# replacing its hardcoded /nix/store/.../diff with a wrapper that
# normalizes trailing newlines on both sides before comparing.
mkdir -p "$TMPDIR/bin"
cat > "$TMPDIR/bin/diff" << DIFFWRAP
#!/bin/sh
f1=\$(mktemp) && sed -z 's/\n*$/\n/' "\$1" > "\$f1"
f2=\$(mktemp) && sed -z 's/\n*$/\n/' "\$2" > "\$f2"
${pkgs.diffutils}/bin/diff "\$f1" "\$f2" && rc=0 || rc=\$?
rm -f "\$f1" "\$f2"
exit \$rc
DIFFWRAP
chmod +x "$TMPDIR/bin/diff"
export PATH="$TMPDIR/bin:$PATH"
runHook postPatch
'';
nativeBuildInputs = [
(pkgs.writeShellScriptBin "update_${attr}_lockfile" ''
set -euox pipefail
REPO_ROOT=$(git rev-parse --show-toplevel)
cd "$REPO_ROOT/${folder}"
rm -rf node_modules/
npm cache clean --force
CI=true npm install
${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
NIX_FILE="$REPO_ROOT/${nixFile}"
sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
NIX_OUTPUT=$(nix build .#${attr} 2>&1 || true)
NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
echo got new hash $NEW_HASH
sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
nix build .#${attr}
echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
'')
];
passthru = {
devShellHook = pkgs.writeShellScript "npm-dev-hook-${pname}" ''
REPO_ROOT=$(git rev-parse --show-toplevel)
_hermes_npm_stamp() {
sha256sum "${folder}/package.json" "${folder}/package-lock.json" \
2>/dev/null | sha256sum | awk '{print $1}'
}
STAMP=".nix-stamps/${pname}"
STAMP_VALUE="$(_hermes_npm_stamp)"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "${pname}: installing npm dependencies..."
( cd ${folder} && CI=true npm install --silent --no-fund --no-audit 2>/dev/null )
# Auto-update the nix hash so it stays in sync with the lockfile
echo "${pname}: prefetching npm deps..."
NIX_FILE="$REPO_ROOT/${nixFile}"
if NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "${folder}/package-lock.json" 2>/dev/null); then
sed -i "s|hash = \"sha256-[A-Za-z0-9+/=]+\"|hash = \"$NEW_HASH\";|" "$NIX_FILE"
echo "${pname}: updated hash to $NEW_HASH"
else
echo "${pname}: warning: prefetch failed, run 'nix run .#fix-lockfiles -- --apply' manually" >&2
fi
mkdir -p .nix-stamps
_hermes_npm_stamp > "$STAMP"
fi
unset -f _hermes_npm_stamp
'';
npmLockfile = {
inherit attr folder nixFile;
};
};
};
# Aggregate `fix-lockfiles` bin from a list of packages carrying
# passthru.npmLockfile = { attr; folder; nixFile; };
# Invocations:
# fix-lockfiles --check # exit 1 if any hash is stale
# fix-lockfiles --apply # rewrite stale hashes in place
# Writes machine-readable fields (stale, changed, report) to $GITHUB_OUTPUT
# when set, so CI workflows can post a sticky PR comment directly.
mkFixLockfiles =
{
packages, # list of packages with passthru.npmLockfile
}:
let
entries = map (p: p.passthru.npmLockfile) packages;
entryArgs = pkgs.lib.concatMapStringsSep " " (e: "\"${e.attr}:${e.folder}:${e.nixFile}\"") entries;
in
pkgs.writeShellScriptBin "fix-lockfiles" ''
set -uox pipefail
MODE="''${1:---check}"
case "$MODE" in
--check|--apply) ;;
-h|--help)
echo "usage: fix-lockfiles [--check|--apply]"
exit 0 ;;
*)
echo "usage: fix-lockfiles [--check|--apply]" >&2
exit 2 ;;
esac
ENTRIES=(${entryArgs})
REPO_ROOT="$(git rev-parse --show-toplevel)"
cd "$REPO_ROOT"
# When running in GH Actions, emit Markdown links in the report pointing
# at the offending line of the nix file (and the lockfile) at the exact
# commit that was checked. LINK_SHA should be set by the workflow to the
# PR head SHA; falls back to GITHUB_SHA (which on pull_request is the
# test-merge commit, still browseable).
LINK_SERVER="''${GITHUB_SERVER_URL:-https://github.com}"
LINK_REPO="''${GITHUB_REPOSITORY:-}"
LINK_SHA="''${LINK_SHA:-''${GITHUB_SHA:-}}"
STALE=0
FIXED=0
REPORT=""
for entry in "''${ENTRIES[@]}"; do
IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo " ok"
continue
fi
NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
if [ -z "$NEW_HASH" ]; then
echo " build failed with no hash mismatch:" >&2
echo "$OUTPUT" | tail -40 >&2
exit 1
fi
HASH_LINE=$(grep -n 'hash = "sha256-' "$NIX_FILE" | head -1 | cut -d: -f1)
OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
| sed -E 's/hash = "(.*)"/\1/')
LOCK_FILE="$FOLDER/package-lock.json"
echo " stale: $NIX_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
STALE=1
if [ -n "$LINK_REPO" ] && [ -n "$LINK_SHA" ]; then
NIX_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$NIX_FILE#L$HASH_LINE"
LOCK_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LOCK_FILE"
REPORT+="- [\`$NIX_FILE:$HASH_LINE\`]($NIX_URL) (\`.#$ATTR\`): \`$OLD_HASH\` \`$NEW_HASH\` lockfile: [\`$LOCK_FILE\`]($LOCK_URL)"$'\n'
else
REPORT+="- \`$NIX_FILE:$HASH_LINE\` (\`.#$ATTR\`): \`$OLD_HASH\` \`$NEW_HASH\`"$'\n'
fi
if [ "$MODE" = "--apply" ]; then
sed -i "s|hash = \"sha256-[^\"]*\";|hash = \"$NEW_HASH\";|" "$NIX_FILE"
nix build ".#$ATTR.npmDeps" --no-link --print-build-logs
FIXED=1
echo " fixed"
fi
done
if [ -n "''${GITHUB_OUTPUT:-}" ]; then
{
[ "$STALE" -eq 1 ] && echo "stale=true" || echo "stale=false"
[ "$FIXED" -eq 1 ] && echo "changed=true" || echo "changed=false"
if [ -n "$REPORT" ]; then
echo "report<<REPORT_EOF"
printf "%s" "$REPORT"
echo "REPORT_EOF"
fi
} >> "$GITHUB_OUTPUT"
fi
if [ "$STALE" -eq 1 ] && [ "$MODE" = "--check" ]; then
echo
echo "Stale lockfile hashes detected. Run:"
echo " nix run .#fix-lockfiles -- --apply"
exit 1
fi
exit 0
'';
}
+2 -10
View File
@@ -8,12 +8,8 @@
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
};
hermesNpmLib = pkgs.callPackage ./lib.nix {
npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
};
hermesTui = pkgs.callPackage ./tui.nix {
inherit hermesNpmLib;
npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
};
# Import bundled skills, excluding runtime caches
@@ -23,7 +19,7 @@
};
hermesWeb = pkgs.callPackage ./web.nix {
inherit hermesNpmLib;
npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
};
runtimeDeps = with pkgs; [
@@ -115,10 +111,6 @@
tui = hermesTui;
web = hermesWeb;
fix-lockfiles = hermesNpmLib.mkFixLockfiles {
packages = [ hermesTui hermesWeb ];
};
};
};
}
+43 -6
View File
@@ -1,18 +1,18 @@
# nix/tui.nix — Hermes TUI (Ink/React) compiled with tsc and bundled
{ pkgs, hermesNpmLib, ... }:
{ pkgs, npm-lockfile-fix, ... }:
let
src = ../ui-tui;
npmDeps = pkgs.fetchNpmDeps {
inherit src;
hash = "sha256-RU4qSHgJPMyfRSEJDzkG4+MReDZDc6QbTD2wisa5QE0=";
hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90=";
};
npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json"));
version = packageJson.version;
npmLockHash = builtins.hashString "sha256" (builtins.readFile ../ui-tui/package-lock.json);
in
pkgs.buildNpmPackage (npm // {
pkgs.buildNpmPackage {
pname = "hermes-tui";
inherit src npmDeps version;
@@ -37,4 +37,41 @@ pkgs.buildNpmPackage (npm // {
runHook postInstall
'';
})
nativeBuildInputs = [
(pkgs.writeShellScriptBin "update_tui_lockfile" ''
set -euox pipefail
# get root of repo
REPO_ROOT=$(git rev-parse --show-toplevel)
# cd into ui-tui and reinstall
cd "$REPO_ROOT/ui-tui"
rm -rf node_modules/
npm cache clean --force
CI=true npm install # ci env var to suppress annoying unicode install banner lag
${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
NIX_FILE="$REPO_ROOT/nix/tui.nix"
# compute the new hash
sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
NIX_OUTPUT=$(nix build .#tui 2>&1 || true)
NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
echo got new hash $NEW_HASH
sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
nix build .#tui
echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
'')
];
passthru.devShellHook = ''
STAMP=".nix-stamps/hermes-tui"
STAMP_VALUE="${npmLockHash}"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "hermes-tui: installing npm dependencies..."
cd ui-tui && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
mkdir -p .nix-stamps
echo "$STAMP_VALUE" > "$STAMP"
fi
'';
}
+39 -5
View File
@@ -1,15 +1,15 @@
# nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build
{ pkgs, hermesNpmLib, ... }:
{ pkgs, npm-lockfile-fix, ... }:
let
src = ../web;
npmDeps = pkgs.fetchNpmDeps {
inherit src;
hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4=";
};
npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json);
in
pkgs.buildNpmPackage (npm // {
pkgs.buildNpmPackage {
pname = "hermes-web";
version = "0.0.0";
inherit src npmDeps;
@@ -26,4 +26,38 @@ pkgs.buildNpmPackage (npm // {
cp -r dist $out
runHook postInstall
'';
})
nativeBuildInputs = [
(pkgs.writeShellScriptBin "update_web_lockfile" ''
set -euox pipefail
REPO_ROOT=$(git rev-parse --show-toplevel)
cd "$REPO_ROOT/web"
rm -rf node_modules/
npm cache clean --force
CI=true npm install
${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
NIX_FILE="$REPO_ROOT/nix/web.nix"
sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
NIX_OUTPUT=$(nix build .#web 2>&1 || true)
NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
echo got new hash $NEW_HASH
sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
nix build .#web
echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
'')
];
passthru.devShellHook = ''
STAMP=".nix-stamps/hermes-web"
STAMP_VALUE="${npmLockHash}"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "hermes-web: installing npm dependencies..."
cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
mkdir -p .nix-stamps
echo "$STAMP_VALUE" > "$STAMP"
fi
'';
}
-3
View File
@@ -1,3 +0,0 @@
# Dogfood — Advanced QA & Testing Skills
Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses.
@@ -1,190 +0,0 @@
---
name: adversarial-ux-test
description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only.
version: 1.0.0
author: Omni @ Comelse
license: MIT
metadata:
hermes:
tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing]
related_skills: [dogfood]
---
# Adversarial UX Test
Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise.
Think of it as an automated "mom test" — but angry.
## Why This Works
Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches:
- Confusing terminology that makes sense to developers but not users
- Too many steps to accomplish basic tasks
- Missing onboarding or "aha moments"
- Accessibility issues (font size, contrast, click targets)
- Cold-start problems (empty states, no demo content)
- Paywall/signup friction that kills conversion
The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs.
## How to Use
Tell the agent:
```
"Run an adversarial UX test on [URL]"
"Be a grumpy [persona type] and test [app name]"
"Do an asshole user test on my staging site"
```
You can provide a persona or let the agent generate one based on your product's target audience.
## Step 1: Define the Persona
If no persona is provided, generate one by answering:
1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way")
2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email)
3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list)
4. **What would make them give up?** (too many clicks, jargon, slow, confusing)
5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing)
### Good Persona Example
> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords.
### Bad Persona Example
> "A user who doesn't like the app" — too vague, no constraints, no voice.
The persona must be **specific enough to stay in character** for 20 minutes of testing.
## Step 2: Become the Asshole (Browse as the Persona)
1. Read any available project docs for app context and URLs
2. **Fully inhabit the persona** — their frustrations, limitations, goals
3. Navigate to the app using browser tools
4. **Attempt the persona's ACTUAL TASKS** (not a feature tour):
- Can they do what they came to do?
- How many clicks/screens to accomplish it?
- What confuses them?
- What makes them angry?
- Where do they get lost?
- What would make them give up and go back to their old way?
5. Test these friction categories:
- **First impression** — would they even bother past the landing page?
- **Core workflow** — the ONE thing they need to do most often
- **Error recovery** — what happens when they do something wrong?
- **Readability** — text size, contrast, information density
- **Speed** — does it feel faster than their current method?
- **Terminology** — any jargon they wouldn't understand?
- **Navigation** — can they find their way back? do they know where they are?
6. Take screenshots of every pain point
7. Check browser console for JS errors on every page
## Step 3: The Rant (Write Feedback in Character)
Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting.
```
[PERSONA NAME]'s Review of [PRODUCT]
Overall: [Would they keep using it? Yes/No/Maybe with conditions]
THE GOOD (grudging admission):
- [things even they have to admit work]
THE BAD (legitimate UX issues):
- [real problems that would stop them from using the product]
THE UGLY (showstoppers):
- [things that would make them uninstall/cancel immediately]
SPECIFIC COMPLAINTS:
1. [Page/feature]: "[quote in persona voice]" — [what happened, expected]
2. ...
VERDICT: "[one-line persona quote summarizing their experience]"
```
## Step 4: The Pragmatism Filter (Critical — Do Not Skip)
Step OUT of the persona. Evaluate each complaint as a product person:
- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it.
- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it.
- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it.
- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it.
### Filter Criteria
1. Would a 35-year-old competent-but-busy user have the same complaint? → RED
2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED
3. Is this "I want it to work like paper" resistance to digital? → WHITE
4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED
5. Would fixing this add complexity for the 80% who are fine? → WHITE
6. Does the complaint reveal a missing onboarding moment? → GREEN
**This filter is MANDATORY.** Never ship raw persona complaints as tickets.
## Step 5: Create Tickets
For **RED** and **GREEN** items only:
- Clear, actionable title
- Include the persona's verbatim quote (entertaining + memorable)
- The real UX issue underneath (objective)
- A suggested fix (actionable)
- Tag/label: "ux-review"
For **YELLOW** items: one catch-all ticket with all notes.
**WHITE** items appear in the report only. No tickets.
**Max 10 tickets per session** — focus on the worst issues.
## Step 6: Report
Deliver:
1. The persona rant (Step 3) — entertaining and visceral
2. The filtered assessment (Step 4) — pragmatic and actionable
3. Tickets created (Step 5) — with links
4. Screenshots of key issues
## Tips
- **One persona per session.** Don't mix perspectives.
- **Stay in character during Steps 2-3.** Break character only at Step 4.
- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages.
- **Empty states are gold.** New user experience reveals the most friction.
- **The best findings are RED items the persona found accidentally** while trying to do something else.
- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways.
- **Run this before demos, launches, or after shipping a batch of features.**
- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives.
- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona.
- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain.
- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage.
- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level.
## Example Personas by Industry
These are starting points — customize for your specific product:
| Product Type | Persona | Age | Key Trait |
|-------------|---------|-----|-----------|
| CRM | Retirement home director | 68 | Filing cabinet is the current CRM |
| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper |
| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups |
| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes |
| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions |
| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls |
| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer |
| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders |
## Rules
- Stay in character during Steps 2-3
- Be genuinely mean but fair — find real problems, not manufactured ones
- The pragmatism filter (Step 4) is **MANDATORY**
- Screenshots required for every complaint
- Max 10 tickets per session
- Test on staging/deployed app, not local dev
- One persona, one session, one report
-3
View File
@@ -1069,7 +1069,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"baseline-browser-mapping": "^2.10.12",
"caniuse-lite": "^1.0.30001782",
@@ -3912,7 +3911,6 @@
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
"integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"playwright-core": "1.59.1"
},
@@ -3931,7 +3929,6 @@
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
"integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
"license": "Apache-2.0",
"peer": true,
"bin": {
"playwright-core": "cli.js"
},
+36
View File
@@ -0,0 +1,36 @@
# NOTE: This file is maintained for convenience only.
# The canonical dependency list is in pyproject.toml.
# Preferred install: pip install -e ".[all]"
# Core dependencies
openai
python-dotenv
fire
httpx
rich
tenacity
prompt_toolkit
pyyaml
requests
jinja2
pydantic>=2.0
PyJWT[crypto]
debugpy
# Web tools
firecrawl-py
parallel-web>=0.4.2
# Image generation
fal-client
# Text-to-speech (Edge TTS is free, no API key needed)
edge-tts
# Optional: For cron expression parsing (cronjob scheduling)
croniter
# Optional: For messaging platform integrations (gateway)
python-telegram-bot[webhooks]>=22.6
discord.py>=2.0
aiohttp>=3.9.0
+860 -390
View File
File diff suppressed because it is too large Load Diff
-23
View File
@@ -46,8 +46,6 @@ AUTHOR_MAP = {
# contributors (from noreply pattern)
"snreynolds2506@gmail.com": "snreynolds",
"35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
"71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo",
"massivemassimo@users.noreply.github.com": "MassiveMassimo",
"82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
"kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
"kshitijk4poor@gmail.com": "kshitijk4poor",
@@ -55,9 +53,6 @@ AUTHOR_MAP = {
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
"valdi.jorge@gmail.com": "jvcl",
"francip@gmail.com": "francip",
"omni@comelse.com": "omnissiah-comelse",
"oussama.redcode@gmail.com": "mavrickdeveloper",
"126368201+vilkasdev@users.noreply.github.com": "vilkasdev",
"137614867+cutepawss@users.noreply.github.com": "cutepawss",
"96793918+memosr@users.noreply.github.com": "memosr",
@@ -96,25 +91,19 @@ AUTHOR_MAP = {
"i@troy-y.org": "TroyMitchell911",
"mygamez@163.com": "zhongyueming1121",
"hansnow@users.noreply.github.com": "hansnow",
"134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
"ben.burtenshaw@gmail.com": "burtenshaw",
# contributors (manual mapping from git names)
"ahmedsherif95@gmail.com": "asheriif",
"liujinkun@bytedance.com": "liujinkun2025",
"dmayhem93@gmail.com": "dmahan93",
"fr@tecompanytea.com": "ifrederico",
"cdanis@gmail.com": "cdanis",
"samherring99@gmail.com": "samherring99",
"desaiaum08@gmail.com": "Aum08Desai",
"shannon.sands.1979@gmail.com": "shannonsands",
"shannon@nousresearch.com": "shannonsands",
"abdi.moya@gmail.com": "AxDSan",
"eri@plasticlabs.ai": "Erosika",
"hjcpuro@gmail.com": "hjc-puro",
"xaydinoktay@gmail.com": "aydnOktay",
"abdullahfarukozden@gmail.com": "Farukest",
"lovre.pesut@gmail.com": "rovle",
"xjtumj@gmail.com": "mengjian-github",
"kevinskysunny@gmail.com": "kevinskysunny",
"xiewenxuan462@gmail.com": "yule975",
"yiweimeng.dlut@hotmail.com": "meng93",
@@ -124,12 +113,10 @@ AUTHOR_MAP = {
"alexazzjjtt@163.com": "alexzhu0",
"1180176+Swift42@users.noreply.github.com": "Swift42",
"ruzzgarcn@gmail.com": "Ruzzgar",
"yukipukikedy@gmail.com": "Yukipukii1",
"alireza78.crypto@gmail.com": "alireza78a",
"brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
"withapurpose37@gmail.com": "StefanIsMe",
"4317663+helix4u@users.noreply.github.com": "helix4u",
"ifkellx@users.noreply.github.com": "Ifkellx",
"331214+counterposition@users.noreply.github.com": "counterposition",
"blspear@gmail.com": "BrennerSpear",
"akhater@gmail.com": "akhater",
@@ -186,7 +173,6 @@ AUTHOR_MAP = {
"simon@simonmarcus.org": "simon-marcus",
"xowiekk@gmail.com": "Xowiek",
"1243352777@qq.com": "zons-zhaozhy",
"e.silacandmr@gmail.com": "Es1la",
# ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
# crossref, and GH contributor list matching (April 2026 audit) ──
"1115117931@qq.com": "aaronagent",
@@ -215,8 +201,6 @@ AUTHOR_MAP = {
"don.rhm@gmail.com": "donrhmexe",
"dorukardahan@hotmail.com": "dorukardahan",
"dsocolobsky@gmail.com": "dsocolobsky",
"dylan.socolobsky@lambdaclass.com": "dsocolobsky",
"ignacio.avecilla@lambdaclass.com": "IAvecilla",
"duerzy@gmail.com": "duerzy",
"emozilla@nousresearch.com": "emozilla",
"fancydirty@gmail.com": "fancydirty",
@@ -304,7 +288,6 @@ AUTHOR_MAP = {
"ywt000818@gmail.com": "OwenYWT",
"dhandhalyabhavik@gmail.com": "v1k22",
"rucchizhao@zhaochenfeideMacBook-Pro.local": "RucchiZ",
"tannerfokkens@Mac.attlocal.net": "tannerfokkens-maker",
"lehaolin98@outlook.com": "LehaoLin",
"yuewang1@microsoft.com": "imink",
"1736355688@qq.com": "hedgeho9X",
@@ -315,7 +298,6 @@ AUTHOR_MAP = {
"anthhub@163.com": "anthhub",
"shenuu@gmail.com": "shenuu",
"xiayh17@gmail.com": "xiayh0107",
"zhujianxyz@gmail.com": "opriz",
"asurla@nvidia.com": "anniesurla",
"limkuan24@gmail.com": "WideLee",
"aviralarora002@gmail.com": "AviArora02-commits",
@@ -328,11 +310,6 @@ AUTHOR_MAP = {
"261797239+lumenradley@users.noreply.github.com": "lumenradley",
"166376523+sjz-ks@users.noreply.github.com": "sjz-ks",
"haileymarshall005@gmail.com": "haileymarshall",
"aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80",
"zheng.jerilyn@gmail.com": "jerilynzheng",
"asslaenn5@gmail.com": "Aslaaen",
"shalompmc0505@naver.com": "pinion05",
"105142614+VTRiot@users.noreply.github.com": "VTRiot",
}
-31
View File
@@ -372,37 +372,6 @@ async function startSocket() {
const app = express();
app.use(express.json());
// Host-header validation — defends against DNS rebinding.
// The bridge binds loopback-only (127.0.0.1) but a victim browser on
// the same machine could be tricked into fetching from an attacker
// hostname that TTL-flips to 127.0.0.1. Reject any request whose Host
// header doesn't resolve to a loopback alias.
// See GHSA-ppp5-vxwm-4cf7.
const _ACCEPTED_HOST_VALUES = new Set([
'localhost',
'127.0.0.1',
'[::1]',
'::1',
]);
app.use((req, res, next) => {
const raw = (req.headers.host || '').trim();
if (!raw) {
return res.status(400).json({ error: 'Missing Host header' });
}
// Strip port suffix: "localhost:3000" → "localhost"
const hostOnly = (raw.includes(':')
? raw.substring(0, raw.lastIndexOf(':'))
: raw
).replace(/^\[|\]$/g, '').toLowerCase();
if (!_ACCEPTED_HOST_VALUES.has(hostOnly)) {
return res.status(400).json({
error: 'Invalid Host header. Bridge accepts loopback hosts only.',
});
}
next();
});
// Poll for new messages (long-poll style)
app.get('/messages', (req, res) => {
const msgs = messageQueue.splice(0, messageQueue.length);
+324 -134
View File
@@ -1,112 +1,217 @@
---
name: llama-cpp
description: llama.cpp local GGUF inference + HF Hub model discovery.
version: 2.1.2
description: Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (28 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization.
version: 2.0.0
author: Orchestra Research
license: MIT
dependencies: [llama-cpp-python>=0.2.0]
metadata:
hermes:
tags: [llama.cpp, GGUF, Quantization, Hugging Face Hub, CPU Inference, Apple Silicon, Edge Deployment, AMD GPUs, Intel GPUs, NVIDIA, URL-first]
tags: [llama.cpp, GGUF, Quantization, CPU Inference, Apple Silicon, Edge Deployment, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded, Model Compression]
---
# llama.cpp + GGUF
Use this skill for local GGUF inference, quant selection, or Hugging Face repo discovery for llama.cpp.
Pure C/C++ LLM inference with minimal dependencies, plus the GGUF (GPT-Generated Unified Format) standard used for quantized weights. One toolchain covers conversion, quantization, and serving.
## When to use
- Run local models on CPU, Apple Silicon, CUDA, ROCm, or Intel GPUs
- Find the right GGUF for a specific Hugging Face repo
- Build a `llama-server` or `llama-cli` command from the Hub
- Search the Hub for models that already support llama.cpp
- Enumerate available `.gguf` files and sizes for a repo
- Decide between Q4/Q5/Q6/IQ variants for the user's RAM or VRAM
**Use llama.cpp + GGUF when:**
- Running on CPU-only machines or Apple Silicon (M1/M2/M3/M4) with Metal acceleration
- Using AMD (ROCm) or Intel GPUs where CUDA isn't available
- Edge deployment (Raspberry Pi, embedded systems, consumer laptops)
- Need flexible quantization (28 bit with K-quants)
- Want local AI tools (LM Studio, Ollama, text-generation-webui, koboldcpp)
- Want a single binary deploy without Docker/Python
## Model Discovery workflow
**Key advantages:**
- Universal hardware: CPU, Apple Silicon, NVIDIA, AMD, Intel
- No Python runtime required (pure C/C++)
- K-quants + imatrix for better low-bit quality
- OpenAI-compatible server built in
- Rich ecosystem (Ollama, LM Studio, llama-cpp-python)
Prefer URL workflows before asking for `hf`, Python, or custom scripts.
1. Search for candidate repos on the Hub:
- Base: `https://huggingface.co/models?apps=llama.cpp&sort=trending`
- Add `search=<term>` for a model family
- Add `num_parameters=min:0,max:24B` or similar when the user has size constraints
2. Open the repo with the llama.cpp local-app view:
- `https://huggingface.co/<repo>?local-app=llama.cpp`
3. Treat the local-app snippet as the source of truth when it is visible:
- copy the exact `llama-server` or `llama-cli` command
- report the recommended quant exactly as HF shows it
4. Read the same `?local-app=llama.cpp` URL as page text or HTML and extract the section under `Hardware compatibility`:
- prefer its exact quant labels and sizes over generic tables
- keep repo-specific labels such as `UD-Q4_K_M` or `IQ4_NL_XL`
- if that section is not visible in the fetched page source, say so and fall back to the tree API plus generic quant guidance
5. Query the tree API to confirm what actually exists:
- `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
- keep entries where `type` is `file` and `path` ends with `.gguf`
- use `path` and `size` as the source of truth for filenames and byte sizes
- separate quantized checkpoints from `mmproj-*.gguf` projector files and `BF16/` shard files
- use `https://huggingface.co/<repo>/tree/main` only as a human fallback
6. If the local-app snippet is not text-visible, reconstruct the command from the repo plus the chosen quant:
- shorthand quant selection: `llama-server -hf <repo>:<QUANT>`
- exact-file fallback: `llama-server --hf-repo <repo> --hf-file <filename.gguf>`
7. Only suggest conversion from Transformers weights if the repo does not already expose GGUF files.
**Use alternatives instead:**
- **vLLM** — NVIDIA GPUs, PagedAttention, Python-first, max throughput
- **TensorRT-LLM** — Production NVIDIA (A100/H100), maximum speed
- **AWQ/GPTQ** — Calibrated quantization for NVIDIA-only deployments
- **bitsandbytes** — Simple HuggingFace transformers integration
- **HQQ** — Fast calibration-free quantization
## Quick start
### Install llama.cpp
### Install
```bash
# macOS / Linux (simplest)
brew install llama.cpp
```
```bash
winget install llama.cpp
```
```bash
# Or build from source
git clone https://github.com/ggml-org/llama.cpp
cd llama.cpp
cmake -B build
cmake --build build --config Release
make # CPU
make GGML_METAL=1 # Apple Silicon
make GGML_CUDA=1 # NVIDIA CUDA
make LLAMA_HIP=1 # AMD ROCm
# Python bindings (optional)
pip install llama-cpp-python
# With CUDA: CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
# With Metal: CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
```
### Run directly from the Hugging Face Hub
### Download a pre-quantized GGUF
```bash
llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
# TheBloke hosts most popular models pre-quantized
huggingface-cli download \
TheBloke/Llama-2-7B-Chat-GGUF \
llama-2-7b-chat.Q4_K_M.gguf \
--local-dir models/
```
### Or convert a HuggingFace model to GGUF
```bash
llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
# 1. Download HF model
huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
# 2. Convert to FP16 GGUF
python convert_hf_to_gguf.py ./llama-3.1-8b \
--outfile llama-3.1-8b-f16.gguf \
--outtype f16
# 3. Quantize to Q4_K_M
./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
```
### Run an exact GGUF file from the Hub
Use this when the tree API shows custom file naming or the exact HF snippet is missing.
### Run inference
```bash
llama-server \
--hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
--hf-file Phi-3-mini-4k-instruct-q4.gguf \
-c 4096
# One-shot prompt
./llama-cli -m model.Q4_K_M.gguf -p "Explain quantum computing" -n 256
# Interactive chat
./llama-cli -m model.Q4_K_M.gguf --interactive
# With GPU offload
./llama-cli -m model.Q4_K_M.gguf -ngl 35 -p "Hello!"
```
### OpenAI-compatible server check
### Serve an OpenAI-compatible API
```bash
./llama-server \
-m model.Q4_K_M.gguf \
--host 0.0.0.0 \
--port 8080 \
-ngl 35 \
-c 4096 \
--parallel 4 \
--cont-batching
```
```bash
curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"messages": [
{"role": "user", "content": "Write a limerick about Python exceptions"}
]
"model": "local",
"messages": [{"role": "user", "content": "Hello!"}],
"temperature": 0.7,
"max_tokens": 100
}'
```
## Python bindings (llama-cpp-python)
## Quantization formats (GGUF)
`pip install llama-cpp-python` (CUDA: `CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir`; Metal: `CMAKE_ARGS="-DGGML_METAL=on" ...`).
### K-quant methods (recommended)
| Type | Bits | Size (7B) | Quality | Use Case |
|------|------|-----------|---------|----------|
| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression (testing only) |
| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
| Q3_K_M | 3.3 | ~3.3 GB | Medium | Fits small devices |
| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Speed critical |
| **Q4_K_M** | 4.5 | ~4.1 GB | High | **Recommended default** |
| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality, minimal degradation |
**Variant suffixes** — `_S` (Small, faster, lower quality), `_M` (Medium, balanced), `_L` (Large, better quality).
**Legacy (Q4_0/Q4_1/Q5_0/Q5_1) exist** but always prefer K-quants for better quality/size ratio.
**IQ quantization** — ultra-low-bit with importance-aware methods: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS. Require `--imatrix`.
**Task-specific defaults:**
- General chat / assistants: Q4_K_M, or Q5_K_M if RAM allows
- Code generation: Q5_K_M or Q6_K (higher precision helps)
- Technical / medical: Q6_K or Q8_0
- Very large (70B, 405B) on consumer hardware: Q3_K_M or Q4_K_S
- Raspberry Pi / edge: Q2_K or Q3_K_S
## Conversion workflows
### Basic: HF → GGUF → quantized
```bash
python convert_hf_to_gguf.py ./model --outfile model-f16.gguf --outtype f16
./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
./llama-cli -m model-q4_k_m.gguf -p "Hello!" -n 50
```
### With importance matrix (imatrix) — better low-bit quality
`imatrix` gives 1020% perplexity improvement at Q4, essential at Q3 and below.
```bash
# 1. Convert to FP16 GGUF
python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
# 2. Prepare calibration data (diverse text, ~100MB is ideal)
cat > calibration.txt << 'EOF'
The quick brown fox jumps over the lazy dog.
Machine learning is a subset of artificial intelligence.
# Add more diverse text samples...
EOF
# 3. Generate importance matrix
./llama-imatrix -m model-f16.gguf \
-f calibration.txt \
--chunk 512 \
-o model.imatrix \
-ngl 35
# 4. Quantize with imatrix
./llama-quantize --imatrix model.imatrix \
model-f16.gguf model-q4_k_m.gguf Q4_K_M
```
### Multi-quant batch
```bash
#!/bin/bash
MODEL="llama-3.1-8b-f16.gguf"
IMATRIX="llama-3.1-8b.imatrix"
./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
done
```
### Quality testing (perplexity)
```bash
./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw -c 512
# Baseline FP16: ~5.96 | Q4_K_M: ~6.06 (+1.7%) | Q2_K: ~6.87 (+15.3%)
```
## Python bindings (llama-cpp-python)
### Basic generation
@@ -116,32 +221,39 @@ from llama_cpp import Llama
llm = Llama(
model_path="./model-q4_k_m.gguf",
n_ctx=4096,
n_gpu_layers=35, # 0 for CPU, 99 to offload everything
n_gpu_layers=35, # 0 for CPU only, 99 to offload everything
n_threads=8,
)
out = llm("What is machine learning?", max_tokens=256, temperature=0.7)
print(out["choices"][0]["text"])
output = llm(
"What is machine learning?",
max_tokens=256,
temperature=0.7,
stop=["</s>", "\n\n"],
)
print(output["choices"][0]["text"])
```
### Chat + streaming
### Chat completion + streaming
```python
llm = Llama(
model_path="./model-q4_k_m.gguf",
n_ctx=4096,
n_gpu_layers=35,
chat_format="llama-3", # or "chatml", "mistral", etc.
chat_format="llama-3", # Or "chatml", "mistral", etc.
)
resp = llm.create_chat_completion(
# Non-streaming
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is Python?"},
],
max_tokens=256,
temperature=0.7,
)
print(resp["choices"][0]["message"]["content"])
print(response["choices"][0]["message"]["content"])
# Streaming
for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True):
@@ -156,93 +268,171 @@ vec = llm.embed("This is a test sentence.")
print(f"Embedding dimension: {len(vec)}")
```
You can also load a GGUF straight from the Hub:
## Hardware acceleration
### Apple Silicon (Metal)
```bash
make clean && make GGML_METAL=1
./llama-cli -m model.gguf -ngl 99 -p "Hello" # offload all layers
```
```python
llm = Llama.from_pretrained(
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
filename="*Q4_K_M.gguf",
n_gpu_layers=35,
llm = Llama(
model_path="model.gguf",
n_gpu_layers=99, # Offload everything
n_threads=1, # Metal handles parallelism
)
```
## Choosing a quant
Performance: M3 Max ~4060 tok/s on Llama 2-7B Q4_K_M.
Use the Hub page first, generic heuristics second.
### NVIDIA (CUDA)
- Prefer the exact quant that HF marks as compatible for the user's hardware profile.
- For general chat, start with `Q4_K_M`.
- For code or technical work, prefer `Q5_K_M` or `Q6_K` if memory allows.
- For very tight RAM budgets, consider `Q3_K_M`, `IQ` variants, or `Q2` variants only if the user explicitly prioritizes fit over quality.
- For multimodal repos, mention `mmproj-*.gguf` separately. The projector is not the main model file.
- Do not normalize repo-native labels. If the page says `UD-Q4_K_M`, report `UD-Q4_K_M`.
```bash
make clean && make GGML_CUDA=1
./llama-cli -m model.gguf -ngl 35 -p "Hello"
## Extracting available GGUFs from a repo
# Hybrid for large models
./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20 # GPU: 20 layers, CPU: rest
When the user asks what GGUFs exist, return:
- filename
- file size
- quant label
- whether it is a main model or an auxiliary projector
Ignore unless requested:
- README
- BF16 shard files
- imatrix blobs or calibration artifacts
Use the tree API for this step:
- `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
For a repo like `unsloth/Qwen3.6-35B-A3B-GGUF`, the local-app page can show quant chips such as `UD-Q4_K_M`, `UD-Q5_K_M`, `UD-Q6_K`, and `Q8_0`, while the tree API exposes exact file paths such as `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` and `Qwen3.6-35B-A3B-Q8_0.gguf` with byte sizes. Use the tree API to turn a quant label into an exact filename.
## Search patterns
Use these URL shapes directly:
```text
https://huggingface.co/models?apps=llama.cpp&sort=trending
https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
https://huggingface.co/<repo>?local-app=llama.cpp
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
https://huggingface.co/<repo>/tree/main
# Multi-GPU split
./llama-cli -m large-model.gguf --tensor-split 0.5,0.5 -ngl 60
```
## Output format
### AMD (ROCm)
When answering discovery requests, prefer a compact structured result like:
```text
Repo: <repo>
Recommended quant from HF: <label> (<size>)
llama-server: <command>
Other GGUFs:
- <filename> - <size>
- <filename> - <size>
Source URLs:
- <local-app URL>
- <tree API URL>
```bash
make LLAMA_HIP=1
./llama-cli -m model.gguf -ngl 999
```
### CPU
```bash
# Match PHYSICAL cores, not logical
./llama-cli -m model.gguf -t 8 -p "Hello"
# BLAS acceleration (23× speedup)
make LLAMA_OPENBLAS=1
```
```python
llm = Llama(
model_path="model.gguf",
n_gpu_layers=0,
n_threads=8,
n_batch=512, # Larger batch = faster prompt processing
)
```
## Performance benchmarks
### CPU (Llama 2-7B Q4_K_M)
| CPU | Threads | Speed |
|-----|---------|-------|
| Apple M3 Max (Metal) | 16 | 50 tok/s |
| AMD Ryzen 9 7950X | 32 | 35 tok/s |
| Intel i9-13900K | 32 | 30 tok/s |
### GPU offloading on RTX 4090
| Layers GPU | Speed | VRAM |
|------------|-------|------|
| 0 (CPU only) | 30 tok/s | 0 GB |
| 20 (hybrid) | 80 tok/s | 8 GB |
| 35 (all) | 120 tok/s | 12 GB |
## Supported models
- **LLaMA family**: Llama 2 (7B/13B/70B), Llama 3 (8B/70B/405B), Code Llama
- **Mistral family**: Mistral 7B, Mixtral 8x7B/8x22B
- **Other**: Falcon, BLOOM, GPT-J, Phi-3, Gemma, Qwen, LLaVA (vision), Whisper (audio)
Find GGUF models: https://huggingface.co/models?library=gguf
## Ecosystem integrations
### Ollama
```bash
cat > Modelfile << 'EOF'
FROM ./model-q4_k_m.gguf
TEMPLATE """{{ .System }}
{{ .Prompt }}"""
PARAMETER temperature 0.7
PARAMETER num_ctx 4096
EOF
ollama create mymodel -f Modelfile
ollama run mymodel "Hello!"
```
### LM Studio
1. Place GGUF file in `~/.cache/lm-studio/models/`
2. Open LM Studio and select the model
3. Configure context length and GPU offload, start inference
### text-generation-webui
```bash
cp model-q4_k_m.gguf text-generation-webui/models/
python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
```
### OpenAI client → llama-server
```python
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
response = client.chat.completions.create(
model="local-model",
messages=[{"role": "user", "content": "Hello!"}],
max_tokens=256,
)
print(response.choices[0].message.content)
```
## Best practices
1. **Use K-quants** — Q4_K_M is the recommended default
2. **Use imatrix** for Q4 and below (calibration improves quality substantially)
3. **Offload as many layers as VRAM allows** — start high, reduce by 5 on OOM
4. **Thread count** — match physical cores, not logical
5. **Batch size** — increase `n_batch` (e.g. 512) for faster prompt processing
6. **Context** — start at 4096, grow only as needed (memory scales with ctx)
7. **Flash Attention** — add `--flash-attn` if your build supports it
## Common issues (quick fixes)
**Model loads slowly** — use `--mmap` for memory-mapped loading.
**Out of memory (GPU)** — reduce `-ngl`, use a smaller quant (Q4_K_S / Q3_K_M), or quantize the KV cache:
```python
Llama(model_path="...", type_k=2, type_v=2, n_gpu_layers=35) # Q4_0 KV cache
```
**Garbage output** — wrong `chat_format`, temperature too high, or model file corrupted. Test with `temperature=0.1` and verify FP16 baseline works.
**Connection refused (server)** — bind to `--host 0.0.0.0`, check `lsof -i :8080`.
See `references/troubleshooting.md` for the full playbook.
## References
- **[hub-discovery.md](references/hub-discovery.md)** - URL-only Hugging Face workflows, search patterns, GGUF extraction, and command reconstruction
- **[advanced-usage.md](references/advanced-usage.md)** — speculative decoding, batched inference, grammar-constrained generation, LoRA, multi-GPU, custom builds, benchmark scripts
- **[quantization.md](references/quantization.md)** — quant quality tradeoffs, when to use Q4/Q5/Q6/IQ, model size scaling, imatrix
- **[server.md](references/server.md)** — direct-from-Hub server launch, OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
- **[quantization.md](references/quantization.md)** — perplexity tables, use-case guide, model size scaling (7B/13B/70B RAM needs), imatrix deep dive
- **[server.md](references/server.md)** — OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
- **[optimization.md](references/optimization.md)** — CPU threading, BLAS, GPU offload heuristics, batch tuning, benchmarks
- **[troubleshooting.md](references/troubleshooting.md)** — install/convert/quantize/inference/server issues, Apple Silicon, debugging
## Resources
- **GitHub**: https://github.com/ggml-org/llama.cpp
- **Hugging Face GGUF + llama.cpp docs**: https://huggingface.co/docs/hub/gguf-llamacpp
- **Hugging Face Local Apps docs**: https://huggingface.co/docs/hub/main/local-apps
- **Hugging Face Local Agents docs**: https://huggingface.co/docs/hub/agents-local
- **Example local-app page**: https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
- **Example tree API**: https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
- **Example llama.cpp search**: https://huggingface.co/models?num_parameters=min:0,max:24B&apps=llama.cpp&sort=trending
- **Python bindings**: https://github.com/abetlen/llama-cpp-python
- **Pre-quantized models**: https://huggingface.co/TheBloke
- **GGUF converter Space**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
- **License**: MIT
@@ -1,168 +0,0 @@
# Hugging Face URL Workflows for llama.cpp
Use URL-only workflows first. Do not require `hf` or API clients just to find GGUF files, choose a quant, or build a `llama-server` command.
## Core URLs
```text
Search:
https://huggingface.co/models?apps=llama.cpp&sort=trending
Search with text:
https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
Search with size bounds:
https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
Repo local-app view:
https://huggingface.co/<repo>?local-app=llama.cpp
Repo tree API:
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
Repo file tree:
https://huggingface.co/<repo>/tree/main
```
## 1. Search for llama.cpp-compatible models
Start from the models page with `apps=llama.cpp`.
Use:
- `search=<term>` for model family names such as `Qwen`, `Gemma`, `Phi`, or `Mistral`
- `num_parameters=min:0,max:24B` or similar if the user has hardware limits
- `sort=trending` when the user wants popular repos right now
Do not start with random GGUF repos if the user has not chosen a model family yet. Search first, shortlist second.
Example: https://huggingface.co/models?search=Qwen&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
## 2. Use the local-app page for the recommended quant
Open:
```text
https://huggingface.co/<repo>?local-app=llama.cpp
```
Extract, in order:
1. The exact `Use this model` snippet, if it is visible as text
2. The `Hardware compatibility` section from the fetched page text or HTML:
- quant label
- file size
- bit-depth grouping
3. Any extra launch flags shown in the snippet, such as `--jinja`
Treat the HF local-app snippet as the source of truth when it is visible.
Do this by reading the URL itself, not by assuming the UI rendered in a browser. If the fetched page source does not expose `Hardware compatibility`, say that the section was not text-visible and fall back to the tree API plus generic guidance from `quantization.md`.
## 3. Confirm exact files from the tree API
Open:
```text
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
```
Treat the JSON response as the source of truth for repo inventory.
Keep entries where:
- `type` is `file`
- `path` ends with `.gguf`
Use these fields:
- `path` for the filename and subdirectory
- `size` for the byte size
- optionally `lfs.size` to confirm the LFS payload size
Separate files into:
- quantized single-file checkpoints, for example `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
- projector weights, usually `mmproj-*.gguf`
- BF16 shard files, usually under `BF16/`
- everything else
Ignore unless the user asks:
- `README.md`
- imatrix or calibration blobs
Use `https://huggingface.co/<repo>/tree/main` only as a human fallback if the API endpoint fails or the user wants the web view.
## 4. Build the command
Preferred order:
1. Copy the exact HF snippet from the local-app page
2. If the page gives a clean quant label, use shorthand selection:
```bash
llama-server -hf <repo>:<QUANT>
```
3. If you need an exact file from the tree API, use the file-specific form:
```bash
llama-server --hf-repo <repo> --hf-file <filename.gguf>
```
4. For CLI usage instead of a server, use:
```bash
llama-cli -hf <repo>:<QUANT>
```
Use the exact-file form when the repo uses custom labels or nonstandard naming that could make `:<QUANT>` ambiguous.
## 5. Example: `unsloth/Qwen3.6-35B-A3B-GGUF`
Use these URLs:
```text
https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main
```
On the local-app page, the hardware compatibility section can expose entries such as:
- `UD-IQ4_XS` - 17.7 GB
- `UD-Q4_K_S` - 20.9 GB
- `UD-Q4_K_M` - 22.1 GB
- `UD-Q5_K_M` - 26.5 GB
- `UD-Q6_K` - 29.3 GB
- `Q8_0` - 36.9 GB
On the tree API, you can confirm exact filenames such as:
- `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
- `Qwen3.6-35B-A3B-UD-Q5_K_M.gguf`
- `Qwen3.6-35B-A3B-UD-Q6_K.gguf`
- `Qwen3.6-35B-A3B-Q8_0.gguf`
- `mmproj-F16.gguf`
Good final output for this repo:
```text
Repo: unsloth/Qwen3.6-35B-A3B-GGUF
Recommended quant from HF: UD-Q4_K_M (22.1 GB)
llama-server: llama-server --hf-repo unsloth/Qwen3.6-35B-A3B-GGUF --hf-file Qwen3.6-35B-A3B-UD-Q4_K_M.gguf
Other GGUFs:
- Qwen3.6-35B-A3B-UD-Q5_K_M.gguf - 26.5 GB
- Qwen3.6-35B-A3B-UD-Q6_K.gguf - 29.3 GB
- Qwen3.6-35B-A3B-Q8_0.gguf - 36.9 GB
Projector:
- mmproj-F16.gguf - 899 MB
```
## Notes
- Repo-specific quant labels matter. Do not rewrite `UD-Q4_K_M` to `Q4_K_M` unless the page itself does.
- `mmproj` files are projector weights for multimodal models, not the main language model checkpoint.
- If the HF hardware compatibility panel is missing because the user has no hardware profile configured, or because the fetched page source did not expose it, still use the tree API plus generic quant guidance from `quantization.md`.
- If the repo already has GGUFs, do not jump straight to conversion workflows.
@@ -2,22 +2,6 @@
Complete guide to GGUF quantization formats and model conversion.
## Hub-first quant selection
Before using generic tables, open the model repo with:
```text
https://huggingface.co/<repo>?local-app=llama.cpp
```
Prefer the exact quant labels and sizes shown in the `Hardware compatibility` section of the fetched `?local-app=llama.cpp` page text or HTML. Then confirm the matching filenames in:
```text
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
```
Use the Hub page first, and only fall back to the generic heuristics below when the repo page does not expose a clear recommendation.
## Quantization Overview
**GGUF** (GPT-Generated Unified Format) - Standard format for llama.cpp models.
@@ -39,11 +23,11 @@ Use the Hub page first, and only fall back to the generic heuristics below when
## Converting Models
### Hugging Face to GGUF
### HuggingFace to GGUF
```bash
# 1. Download Hugging Face model
hf download meta-llama/Llama-2-7b-chat-hf \
# 1. Download HuggingFace model
huggingface-cli download meta-llama/Llama-2-7b-chat-hf \
--local-dir models/llama-2-7b-chat/
# 2. Convert to FP16 GGUF
@@ -168,32 +152,18 @@ Q2_K or Q3_K_S - Fit in limited RAM
## Finding Pre-Quantized Models
Use the Hub search with the llama.cpp app filter:
```text
https://huggingface.co/models?apps=llama.cpp&sort=trending
https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
```
For a specific repo, open:
```text
https://huggingface.co/<repo>?local-app=llama.cpp
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
```
Then launch directly from the Hub without extra Hub tooling:
**TheBloke** on HuggingFace:
- https://huggingface.co/TheBloke
- Most models available in all GGUF formats
- No conversion needed
**Example**:
```bash
llama-cli -hf <repo>:Q4_K_M
llama-server -hf <repo>:Q4_K_M
```
If you need the exact file name from the tree API:
```bash
llama-server --hf-repo <repo> --hf-file <filename.gguf>
# Download pre-quantized Llama 2-7B
huggingface-cli download \
TheBloke/Llama-2-7B-Chat-GGUF \
llama-2-7b-chat.Q4_K_M.gguf \
--local-dir models/
```
## Importance Matrices (imatrix)
@@ -2,31 +2,6 @@
Production deployment of llama.cpp server with OpenAI-compatible API.
## Direct from Hugging Face Hub
Prefer the model repo's local-app page first:
```text
https://huggingface.co/<repo>?local-app=llama.cpp
```
If the page shows an exact snippet, copy it. If not, use one of these forms:
```bash
# Choose a quant label directly from the Hub repo
llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
```
```bash
# Pin an exact GGUF file from the repo tree
llama-server \
--hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
--hf-file Phi-3-mini-4k-instruct-q4.gguf \
-c 4096
```
Use the file-specific form when the repo has custom naming or when you already extracted the exact filename from the tree API.
## Server Modes
### llama-server
+7 -8
View File
@@ -2,7 +2,7 @@
name: maps
description: >
Location intelligence — geocode a place, reverse-geocode coordinates,
find nearby places (46 POI categories), driving/walking/cycling
find nearby places (44 POI categories), driving/walking/cycling
distance + time, turn-by-turn directions, timezone lookup, bounding
box + area for a named place, and POI search within a rectangle.
Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
@@ -83,13 +83,12 @@ python3 $MAPS nearby --near "90210" --category pharmacy
python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10
```
46 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, guest_house,
camp_site, supermarket, atm, gas_station, parking, museum, park, school,
university, bank, police, fire_station, library, airport, train_station,
bus_stop, church, mosque, synagogue, dentist, doctor, cinema, theatre, gym,
swimming_pool, post_office, convenience_store, bakery, bookshop, laundry,
car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground,
stadium, nightclub.
44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket,
atm, gas_station, parking, museum, park, school, university, bank, police,
fire_station, library, airport, train_station, bus_stop, church, mosque,
synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office,
convenience_store, bakery, bookshop, laundry, car_wash, car_rental,
bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub.
Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`,
`maps_url` (clickable Google Maps link), `directions_url` (Google Maps
+19 -63
View File
@@ -58,9 +58,7 @@ CATEGORY_TAGS = {
"restaurant": ("amenity", "restaurant"),
"cafe": ("amenity", "cafe"),
"bar": ("amenity", "bar"),
# bakery is tagged as shop=bakery in the OSM wiki, but some mappers use
# amenity=bakery. Search both so small indie bakeries aren't missed.
"bakery": [("shop", "bakery"), ("amenity", "bakery")],
"bakery": ("shop", "bakery"),
"convenience_store": ("shop", "convenience"),
# Health
"hospital": ("amenity", "hospital"),
@@ -70,8 +68,6 @@ CATEGORY_TAGS = {
"veterinary": ("amenity", "veterinary"),
# Accommodation
"hotel": ("tourism", "hotel"),
"guest_house": ("tourism", "guest_house"),
"camp_site": ("tourism", "camp_site"),
# Shopping & Services
"supermarket": ("shop", "supermarket"),
"bookshop": ("shop", "books"),
@@ -124,19 +120,6 @@ RELIGION_FILTER = {
VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys())
def _tags_for(category):
"""Return the CATEGORY_TAGS entry as a list of (key, value) pairs.
Most categories map to a single (tag_key, tag_val) tuple, but some
(e.g. ``bakery``) are tagged under more than one OSM key and are
represented as a list of tuples. Normalise both forms to a list.
"""
entry = CATEGORY_TAGS[category]
if isinstance(entry, list):
return list(entry)
return [entry]
OSRM_PROFILES = {
"driving": "driving",
"walking": "foot",
@@ -355,63 +338,36 @@ def geocode_single(query):
# ---------------------------------------------------------------------------
def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
religion=None, tag_pairs=None):
"""Build an Overpass QL query for nearby POIs around a point.
If ``tag_pairs`` is provided, the query unions across every
``(key, value)`` pair (used for categories like ``bakery`` that are
tagged under more than one OSM key). Otherwise falls back to the
single ``tag_key``/``tag_val`` pair for back-compat.
"""
pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
religion=None):
"""Build an Overpass QL query for nearby POIs around a point."""
religion_filter = ""
if religion:
religion_filter = f'["religion"="{religion}"]'
body_lines = []
for k, v in pairs:
body_lines.append(
f' node["{k}"="{v}"]{religion_filter}'
f'(around:{radius},{lat},{lon});'
)
body_lines.append(
f' way["{k}"="{v}"]{religion_filter}'
f'(around:{radius},{lat},{lon});'
)
body = "\n".join(body_lines)
return (
f'[out:json][timeout:25];\n'
f'(\n'
f'{body}\n'
f' node["{tag_key}"="{tag_val}"]{religion_filter}'
f'(around:{radius},{lat},{lon});\n'
f' way["{tag_key}"="{tag_val}"]{religion_filter}'
f'(around:{radius},{lat},{lon});\n'
f');\n'
f'out center {limit};\n'
)
def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit,
religion=None, tag_pairs=None):
"""Build an Overpass QL query for POIs within a bounding box.
See ``build_overpass_nearby`` for ``tag_pairs`` semantics.
"""
pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
religion=None):
"""Build an Overpass QL query for POIs within a bounding box."""
religion_filter = ""
if religion:
religion_filter = f'["religion"="{religion}"]'
body_lines = []
for k, v in pairs:
body_lines.append(
f' node["{k}"="{v}"]{religion_filter}'
f'({south},{west},{north},{east});'
)
body_lines.append(
f' way["{k}"="{v}"]{religion_filter}'
f'({south},{west},{north},{east});'
)
body = "\n".join(body_lines)
return (
f'[out:json][timeout:25];\n'
f'(\n'
f'{body}\n'
f' node["{tag_key}"="{tag_val}"]{religion_filter}'
f'({south},{west},{north},{east});\n'
f' way["{tag_key}"="{tag_val}"]{religion_filter}'
f'({south},{west},{north},{east});\n'
f');\n'
f'out center {limit};\n'
)
@@ -649,10 +605,10 @@ def cmd_nearby(args):
# appear twice.
merged = {}
for category in categories:
tag_pairs = _tags_for(category)
tag_key, tag_val = CATEGORY_TAGS[category]
religion = RELIGION_FILTER.get(category)
query = build_overpass_nearby(None, None, lat, lon, radius, limit,
religion=religion, tag_pairs=tag_pairs)
query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
religion=religion)
raw = overpass_query(query)
elements = raw.get("elements", [])
for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon):
@@ -989,10 +945,10 @@ def cmd_bbox(args):
if limit <= 0:
error_exit("Limit must be a positive integer.")
tag_pairs = _tags_for(category)
tag_key, tag_val = CATEGORY_TAGS[category]
religion = RELIGION_FILTER.get(category)
query = build_overpass_bbox(None, None, south, west, north, east,
limit, religion=religion, tag_pairs=tag_pairs)
query = build_overpass_bbox(tag_key, tag_val, south, west, north, east,
limit, religion=religion)
raw = overpass_query(query)
+8 -64
View File
@@ -1,7 +1,7 @@
---
name: llm-wiki
description: "Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency."
version: 2.1.0
version: 2.0.0
author: Hermes Agent
license: MIT
metadata:
@@ -122,10 +122,6 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
- When updating a page, always bump the `updated` date
- Every new page must be added to `index.md` under the correct section
- Every action must be appended to `log.md`
- **Provenance markers:** On pages that synthesize 3+ sources, append `^[raw/articles/source-file.md]`
at the end of paragraphs whose claims come from a specific source. This lets a reader trace each
claim back without re-reading the whole raw file. Optional on single-source pages where the
`sources:` frontmatter is enough.
## Frontmatter
```yaml
@@ -136,33 +132,9 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
type: entity | concept | comparison | query | summary
tags: [from taxonomy below]
sources: [raw/articles/source-name.md]
# Optional quality signals:
confidence: high | medium | low # how well-supported the claims are
contested: true # set when the page has unresolved contradictions
contradictions: [other-page-slug] # pages this one conflicts with
---
```
`confidence` and `contested` are optional but recommended for opinion-heavy or fast-moving
topics. Lint surfaces `contested: true` and `confidence: low` pages for review so weak claims
don't silently harden into accepted wiki fact.
### raw/ Frontmatter
Raw sources ALSO get a small frontmatter block so re-ingests can detect drift:
```yaml
---
source_url: https://example.com/article # original URL, if applicable
ingested: YYYY-MM-DD
sha256: <hex digest of the raw content below the frontmatter>
---
```
The `sha256:` lets a future re-ingest of the same URL skip processing when content is unchanged,
and flag drift when it has changed. Compute over the body only (everything after the closing
`---`), not the frontmatter itself.
## Tag Taxonomy
[Define 10-20 top-level tags for the domain. Add new tags here BEFORE using them.]
@@ -262,10 +234,6 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
- PDF → use `web_extract` (handles PDFs), save to `raw/papers/`
- Pasted text → save to appropriate `raw/` subdirectory
- Name the file descriptively: `raw/articles/karpathy-llm-wiki-2026.md`
- **Add raw frontmatter** (`source_url`, `ingested`, `sha256` of the body).
On re-ingest of the same URL: recompute the sha256, compare to the stored value —
skip if identical, flag drift and update if different. This is cheap enough to
do on every re-ingest and catches silent source changes.
**Discuss takeaways** with the user — what's interesting, what matters for
the domain. (Skip this in automated/cron contexts — proceed directly.)
@@ -282,11 +250,6 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
- **Cross-reference:** Every new or updated page must link to at least 2 other
pages via `[[wikilinks]]`. Check that existing pages link back.
- **Tags:** Only use tags from the taxonomy in SCHEMA.md
- **Provenance:** On pages synthesizing 3+ sources, append `^[raw/articles/source.md]`
markers to paragraphs whose claims trace to a specific source.
- **Confidence:** For opinion-heavy, fast-moving, or single-source claims, set
`confidence: medium` or `low` in frontmatter. Don't mark `high` unless the
claim is well-supported across multiple sources.
⑤ **Update navigation:**
- Add new pages to `index.md` under the correct section, alphabetically
@@ -341,28 +304,18 @@ wiki = "<WIKI_PATH>"
recent source that mentions the same entities.
**Contradictions:** Pages on the same topic with conflicting claims. Look for
pages that share tags/entities but state different facts. Surface all pages
with `contested: true` or `contradictions:` frontmatter for user review.
pages that share tags/entities but state different facts.
**Quality signals:** List pages with `confidence: low` and any page that cites
only a single source but has no confidence field set — these are candidates
for either finding corroboration or demoting to `confidence: medium`.
**Page size:** Flag pages over 200 lines — candidates for splitting.
**Source drift:** For each file in `raw/` with a `sha256:` frontmatter, recompute
the hash and flag mismatches. Mismatches indicate the raw file was edited
(shouldn't happen — raw/ is immutable) or ingested from a URL that has since
changed. Not a hard error, but worth reporting.
**Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
**Page size:** Flag pages over 200 lines — candidates for splitting.
**Log rotation:** If log.md exceeds 500 entries, rotate it.
**Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
**Report findings** with specific file paths and suggested actions, grouped by
severity (broken links > orphans > stale content > style issues).
**Log rotation:** If log.md exceeds 500 entries, rotate it.
**Report findings** with specific file paths and suggested actions, grouped by
severity (broken links > orphans > source drift > contested pages > stale content > style issues).
**Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
**Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
## Working with the Wiki
@@ -495,12 +448,3 @@ vault in Obsidian on your laptop/phone — changes appear within seconds.
The agent should check log size during lint.
- **Handle contradictions explicitly** — don't silently overwrite. Note both claims with dates,
mark in frontmatter, flag for user review.
## Related Tools
[llm-wiki-compiler](https://github.com/atomicmemory/llm-wiki-compiler) is a Node.js CLI that
compiles sources into a concept wiki with the same Karpathy inspiration. It's Obsidian-compatible,
so users who want a scheduled/CLI-driven compile pipeline can point it at the same vault this
skill maintains. Trade-offs: it owns page generation (replaces the agent's judgment on page
creation) and is tuned for small corpora. Use this skill when you want agent-in-the-loop curation;
use llmwiki when you want batch compile of a source directory.
+1 -8
View File
@@ -1,7 +1,7 @@
---
name: xurl
description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access.
version: 1.1.1
version: 1.1.0
author: xdevplatform + openclaw + Hermes Agent
license: MIT
platforms: [linux, macos]
@@ -95,12 +95,6 @@ These steps must be performed by the user directly, NOT by the agent, because th
xurl auth oauth2 --app my-app
```
(This opens a browser for the OAuth 2.0 PKCE flow.)
If X returns a `UsernameNotFound` error or 403 on the post-OAuth `/2/users/me` lookup, pass your handle explicitly (xurl v1.1.0+):
```bash
xurl auth oauth2 --app my-app YOUR_USERNAME
```
This binds the token to your handle and skips the broken `/2/users/me` call.
6. Set the app as default so all commands use it:
```bash
xurl auth default my-app
@@ -386,7 +380,6 @@ xurl --app staging /2/users/me # one-off against staging
| --- | --- | --- |
| Auth errors after successful OAuth flow | Token saved to `default` app (no client-id/secret) instead of your named app | `xurl auth oauth2 --app my-app` then `xurl auth default my-app` |
| `unauthorized_client` during OAuth | App type set to "Native App" in X dashboard | Change to "Web app, automated app or bot" in User Authentication Settings |
| `UsernameNotFound` or 403 on `/2/users/me` right after OAuth | X not returning username reliably from `/2/users/me` | Re-run `xurl auth oauth2 --app my-app YOUR_USERNAME` (xurl v1.1.0+) to pass the handle explicitly |
| 401 on every request | Token expired or wrong default app | Check `xurl auth status` — verify `▸` points to an app with oauth2 tokens |
| `client-forbidden` / `client-not-enrolled` | X platform enrollment issue | Dashboard → Apps → Manage → Move to "Pay-per-use" package → Production environment |
| `CreditsDepleted` | $0 balance on X API | Buy credits (min $5) in Developer Console → Billing |
-170
View File
@@ -1,170 +0,0 @@
"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr.
Two related ACP approval-flow issues:
- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards`
took the non-interactive auto-approve path and never consulted the
ACP-supplied callback.
- qg5c: `_approval_callback` was a module-global in terminal_tool;
overlapping ACP sessions overwrote each other's callback slot.
Both fixed together by:
1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call).
2. Storing the callback in thread-local state so concurrent executor
threads don't collide.
"""
import os
import threading
from unittest.mock import MagicMock
import pytest
class TestThreadLocalApprovalCallback:
"""GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so
concurrent ACP sessions don't stomp on each other's handlers."""
def test_set_and_get_in_same_thread(self):
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb1 = lambda cmd, desc: "once" # noqa: E731
set_approval_callback(cb1)
assert _get_approval_callback() is cb1
def test_callback_not_visible_in_different_thread(self):
"""Thread A's callback is NOT visible to Thread B."""
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb_a = lambda cmd, desc: "thread_a" # noqa: E731
cb_b = lambda cmd, desc: "thread_b" # noqa: E731
seen_in_a = []
seen_in_b = []
def thread_a():
set_approval_callback(cb_a)
# Pause so thread B has time to set its own callback
import time
time.sleep(0.05)
seen_in_a.append(_get_approval_callback())
def thread_b():
set_approval_callback(cb_b)
import time
time.sleep(0.05)
seen_in_b.append(_get_approval_callback())
ta = threading.Thread(target=thread_a)
tb = threading.Thread(target=thread_b)
ta.start()
tb.start()
ta.join()
tb.join()
# Each thread must see ONLY its own callback — not the other's
assert seen_in_a == [cb_a]
assert seen_in_b == [cb_b]
def test_main_thread_callback_not_leaked_to_worker(self):
"""A callback set in the main thread does NOT leak into a
freshly-spawned worker thread."""
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb_main = lambda cmd, desc: "main" # noqa: E731
set_approval_callback(cb_main)
worker_saw = []
def worker():
worker_saw.append(_get_approval_callback())
t = threading.Thread(target=worker)
t.start()
t.join()
# Worker thread has no callback set — TLS is empty for it
assert worker_saw == [None]
# Main thread still has its callback
assert _get_approval_callback() is cb_main
def test_sudo_password_callback_also_thread_local(self):
"""Same protection applies to the sudo password callback."""
from tools.terminal_tool import (
set_sudo_password_callback,
_get_sudo_password_callback,
)
cb_main = lambda: "main-password" # noqa: E731
set_sudo_password_callback(cb_main)
worker_saw = []
def worker():
worker_saw.append(_get_sudo_password_callback())
t = threading.Thread(target=worker)
t.start()
t.join()
assert worker_saw == [None]
assert _get_sudo_password_callback() is cb_main
class TestAcpExecAskGate:
"""GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
that tools.approval.check_all_command_guards takes the CLI-interactive
path (consults the registered callback via prompt_dangerous_approval)
instead of the non-interactive auto-approve shortcut.
(HERMES_EXEC_ASK takes the gateway-queue path which requires a
notify_cb registered in _gateway_notify_cbs not applicable to ACP,
which uses a direct callback shape.)"""
def test_interactive_env_var_routes_to_callback(self, monkeypatch):
"""When HERMES_INTERACTIVE is set and an approval callback is
registered, a dangerous command must route through the callback."""
# Clean env
monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
from tools.approval import check_all_command_guards
called_with = []
def fake_cb(command, description, *, allow_permanent=True):
called_with.append((command, description))
return "once"
# Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called
result = check_all_command_guards(
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
)
assert result["approved"] is True
assert called_with == [], (
"without HERMES_INTERACTIVE the non-interactive auto-approve "
"path should fire without consulting the callback"
)
# With HERMES_INTERACTIVE: callback IS called, approval flows through it
monkeypatch.setenv("HERMES_INTERACTIVE", "1")
called_with.clear()
result = check_all_command_guards(
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
)
assert called_with, (
"with HERMES_INTERACTIVE the approval path should consult the "
"registered callback — this was the ACP bypass in "
"GHSA-96vc-wcxf-jjff"
)
assert result["approved"] is True
-14
View File
@@ -73,17 +73,3 @@ class TestApprovalMapping:
result = cb("rm -rf /", "dangerous")
assert result == "deny"
def test_approval_none_response_returns_deny(self):
"""When request_permission resolves to None, the callback should return 'deny'."""
loop = MagicMock(spec=asyncio.AbstractEventLoop)
mock_rp = MagicMock(name="request_permission")
future = MagicMock(spec=Future)
future.result.return_value = None
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
result = cb("echo hi", "demo")
assert result == "deny"
+5 -74
View File
@@ -95,37 +95,19 @@ class TestInitialize:
class TestAuthenticate:
@pytest.mark.asyncio
async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
async def test_authenticate_with_provider_configured(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.detect_provider",
lambda: "openrouter",
"acp_adapter.server.has_provider",
lambda: True,
)
resp = await agent.authenticate(method_id="openrouter")
assert isinstance(resp, AuthenticateResponse)
@pytest.mark.asyncio
async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.detect_provider",
lambda: "openrouter",
)
resp = await agent.authenticate(method_id="OpenRouter")
assert isinstance(resp, AuthenticateResponse)
@pytest.mark.asyncio
async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.detect_provider",
lambda: "openrouter",
)
resp = await agent.authenticate(method_id="totally-invalid-method")
assert resp is None
@pytest.mark.asyncio
async def test_authenticate_without_provider(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.detect_provider",
lambda: None,
"acp_adapter.server.has_provider",
lambda: False,
)
resp = await agent.authenticate(method_id="openrouter")
assert resp is None
@@ -270,57 +252,6 @@ class TestListAndFork:
mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
@pytest.mark.asyncio
async def test_list_sessions_pagination_first_page(self, agent):
from acp_adapter import server as acp_server
infos = [
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5)
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions()
assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE
assert resp.next_cursor == resp.sessions[-1].session_id
@pytest.mark.asyncio
async def test_list_sessions_pagination_no_more(self, agent):
infos = [
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
for i in range(3)
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions()
assert len(resp.sessions) == 3
assert resp.next_cursor is None
@pytest.mark.asyncio
async def test_list_sessions_cursor_resumes_after_match(self, agent):
infos = [
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0},
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions(cursor="s1")
assert [s.session_id for s in resp.sessions] == ["s2", "s3"]
assert resp.next_cursor is None
@pytest.mark.asyncio
async def test_list_sessions_unknown_cursor_returns_empty(self, agent):
infos = [
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions(cursor="does-not-exist")
assert resp.sessions == []
assert resp.next_cursor is None
# ---------------------------------------------------------------------------
# session configuration / model routing
# ---------------------------------------------------------------------------
+1 -5
View File
@@ -414,11 +414,7 @@ class TestRunOauthSetupToken:
token = run_oauth_setup_token()
assert token == "from-cred-file"
# Don't assert exact call count — the contract is "credentials flow
# through", not "exactly one subprocess call". xdist cross-test
# pollution (other tests shimming subprocess via plugins) has flaked
# assert_called_once() in CI.
assert mock_run.called
mock_run.assert_called_once()
def test_returns_token_from_env_var(self, monkeypatch, tmp_path):
"""Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files."""
-238
View File
@@ -1,238 +0,0 @@
"""Regression tests: normalize_anthropic_response_v2 vs v1.
Constructs mock Anthropic responses and asserts that the v2 function
(returning NormalizedResponse) produces identical field values to the
original v1 function (returning SimpleNamespace + finish_reason).
"""
import json
import pytest
from types import SimpleNamespace
from agent.anthropic_adapter import (
normalize_anthropic_response,
normalize_anthropic_response_v2,
)
from agent.transports.types import NormalizedResponse, ToolCall
# ---------------------------------------------------------------------------
# Helpers to build mock Anthropic SDK responses
# ---------------------------------------------------------------------------
def _text_block(text: str):
return SimpleNamespace(type="text", text=text)
def _thinking_block(thinking: str, signature: str = "sig_abc"):
return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
def _tool_use_block(id: str, name: str, input: dict):
return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
def _response(content_blocks, stop_reason="end_turn"):
return SimpleNamespace(
content=content_blocks,
stop_reason=stop_reason,
usage=SimpleNamespace(
input_tokens=10,
output_tokens=5,
),
)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestTextOnly:
"""Text-only response — no tools, no thinking."""
def setup_method(self):
self.resp = _response([_text_block("Hello world")])
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_type(self):
assert isinstance(self.v2, NormalizedResponse)
def test_content_matches(self):
assert self.v2.content == self.v1_msg.content
def test_finish_reason_matches(self):
assert self.v2.finish_reason == self.v1_finish
def test_no_tool_calls(self):
assert self.v2.tool_calls is None
assert self.v1_msg.tool_calls is None
def test_no_reasoning(self):
assert self.v2.reasoning is None
assert self.v1_msg.reasoning is None
class TestWithToolCalls:
"""Response with tool calls."""
def setup_method(self):
self.resp = _response(
[
_text_block("I'll check that"),
_tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
_tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
],
stop_reason="tool_use",
)
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_finish_reason(self):
assert self.v2.finish_reason == "tool_calls"
assert self.v1_finish == "tool_calls"
def test_tool_call_count(self):
assert len(self.v2.tool_calls) == 2
assert len(self.v1_msg.tool_calls) == 2
def test_tool_call_ids_match(self):
for i in range(2):
assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
def test_tool_call_names_match(self):
assert self.v2.tool_calls[0].name == "terminal"
assert self.v2.tool_calls[1].name == "read_file"
for i in range(2):
assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
def test_tool_call_arguments_match(self):
for i in range(2):
assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
def test_content_preserved(self):
assert self.v2.content == self.v1_msg.content
assert "check that" in self.v2.content
class TestWithThinking:
"""Response with thinking blocks (Claude 3.5+ extended thinking)."""
def setup_method(self):
self.resp = _response([
_thinking_block("Let me think about this carefully..."),
_text_block("The answer is 42."),
])
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_reasoning_matches(self):
assert self.v2.reasoning == self.v1_msg.reasoning
assert "think about this" in self.v2.reasoning
def test_reasoning_details_in_provider_data(self):
v1_details = self.v1_msg.reasoning_details
v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
assert v1_details is not None
assert v2_details is not None
assert len(v2_details) == len(v1_details)
def test_content_excludes_thinking(self):
assert self.v2.content == "The answer is 42."
class TestMixed:
"""Response with thinking + text + tool calls."""
def setup_method(self):
self.resp = _response(
[
_thinking_block("Planning my approach..."),
_text_block("I'll run the command"),
_tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
],
stop_reason="tool_use",
)
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_all_fields_present(self):
assert self.v2.content is not None
assert self.v2.tool_calls is not None
assert self.v2.reasoning is not None
assert self.v2.finish_reason == "tool_calls"
def test_content_matches(self):
assert self.v2.content == self.v1_msg.content
def test_reasoning_matches(self):
assert self.v2.reasoning == self.v1_msg.reasoning
def test_tool_call_matches(self):
assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
class TestStopReasons:
"""Verify finish_reason mapping matches between v1 and v2."""
@pytest.mark.parametrize("stop_reason,expected", [
("end_turn", "stop"),
("tool_use", "tool_calls"),
("max_tokens", "length"),
("stop_sequence", "stop"),
("refusal", "content_filter"),
("model_context_window_exceeded", "length"),
("unknown_future_reason", "stop"),
])
def test_stop_reason_mapping(self, stop_reason, expected):
resp = _response([_text_block("x")], stop_reason=stop_reason)
v1_msg, v1_finish = normalize_anthropic_response(resp)
v2 = normalize_anthropic_response_v2(resp)
assert v2.finish_reason == v1_finish == expected
class TestStripToolPrefix:
"""Verify mcp_ prefix stripping works identically."""
def test_prefix_stripped(self):
resp = _response(
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
stop_reason="tool_use",
)
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
assert v1_msg.tool_calls[0].function.name == "terminal"
assert v2.tool_calls[0].name == "terminal"
def test_prefix_kept(self):
resp = _response(
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
stop_reason="tool_use",
)
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
assert v2.tool_calls[0].name == "mcp_terminal"
class TestEdgeCases:
"""Edge cases: empty content, no blocks, etc."""
def test_empty_content_blocks(self):
resp = _response([])
v1_msg, v1_finish = normalize_anthropic_response(resp)
v2 = normalize_anthropic_response_v2(resp)
assert v2.content == v1_msg.content
assert v2.content is None
def test_no_reasoning_details_means_none_provider_data(self):
resp = _response([_text_block("hi")])
v2 = normalize_anthropic_response_v2(resp)
assert v2.provider_data is None
def test_v2_returns_dataclass_not_namespace(self):
resp = _response([_text_block("hi")])
v2 = normalize_anthropic_response_v2(resp)
assert isinstance(v2, NormalizedResponse)
assert not isinstance(v2, SimpleNamespace)
+18 -97
View File
@@ -476,82 +476,6 @@ class TestGetTextAuxiliaryClient:
assert isinstance(client, CodexAuxiliaryClient)
assert model == "gpt-5.2-codex"
class TestNousAuxiliaryRefresh:
def test_try_nous_prefers_runtime_credentials(self):
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous()
assert client is not None
assert model == "google/gemini-3-flash-preview"
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
def test_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
fresh_client = MagicMock()
fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_client.chat.completions.create.return_value = {"ok": True}
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
):
result = call_llm(
task="compression",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.call_count == 1
assert fresh_client.chat.completions.create.call_count == 1
@pytest.mark.asyncio
async def test_async_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
fresh_async_client = MagicMock()
fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.await_count == 1
assert fresh_async_client.chat.completions.create.await_count == 1
# ── Payment / credit exhaustion fallback ─────────────────────────────────
@@ -772,12 +696,12 @@ class TestIsConnectionError:
assert _is_connection_error(err) is False
class TestKimiTemperatureOmitted:
"""Kimi/Moonshot models should have temperature OMITTED from API kwargs.
class TestKimiTemperatureNotForced:
"""Kimi/Moonshot models should NOT have client-side temperature overrides.
The Kimi gateway selects the correct temperature server-side based on the
active mode (thinking 1.0, non-thinking 0.6). Sending any temperature
value conflicts with gateway-managed defaults.
active mode (thinking on 1.0, thinking off 0.6). Client-side clamping
was removed so we don't conflict with gateway-managed defaults.
"""
@pytest.mark.parametrize(
@@ -785,20 +709,16 @@ class TestKimiTemperatureOmitted:
[
"kimi-for-coding",
"kimi-k2.5",
"kimi-k2.6",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
"kimi-k2-instruct",
"kimi-k2-instruct-0905",
"moonshotai/kimi-k2.5",
"moonshotai/Kimi-K2-Thinking",
"moonshotai/Kimi-K2-Instruct",
],
)
def test_kimi_models_omit_temperature(self, model):
"""No kimi model should have a temperature key in kwargs."""
def test_kimi_models_preserve_caller_temperature(self, model):
"""No kimi model should have its temperature overridden client-side."""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
@@ -808,10 +728,10 @@ class TestKimiTemperatureOmitted:
temperature=0.3,
)
assert "temperature" not in kwargs
assert kwargs["temperature"] == 0.3
def test_kimi_for_coding_no_temperature_when_none(self):
"""When caller passes temperature=None, still no temperature key."""
"""When caller passes temperature=None, no temperature key is emitted."""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
@@ -823,7 +743,7 @@ class TestKimiTemperatureOmitted:
assert "temperature" not in kwargs
def test_sync_call_omits_temperature(self):
def test_sync_call_preserves_caller_temperature(self):
client = MagicMock()
client.base_url = "https://api.kimi.com/coding/v1"
response = MagicMock()
@@ -845,10 +765,10 @@ class TestKimiTemperatureOmitted:
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["model"] == "kimi-for-coding"
assert "temperature" not in kwargs
assert kwargs["temperature"] == 0.1
@pytest.mark.asyncio
async def test_async_call_omits_temperature(self):
async def test_async_call_preserves_caller_temperature(self):
client = MagicMock()
client.base_url = "https://api.kimi.com/coding/v1"
response = MagicMock()
@@ -870,17 +790,18 @@ class TestKimiTemperatureOmitted:
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["model"] == "kimi-for-coding"
assert "temperature" not in kwargs
assert kwargs["temperature"] == 0.1
@pytest.mark.parametrize(
"model",
[
"anthropic/claude-sonnet-4-6",
"gpt-5.4",
"deepseek-chat",
"kimi-k2-instruct",
"moonshotai/Kimi-K2-Instruct",
],
)
def test_non_kimi_models_preserve_temperature(self, model):
def test_non_kimi_models_still_preserve_temperature(self, model):
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
@@ -900,8 +821,8 @@ class TestKimiTemperatureOmitted:
"https://api.kimi.com/coding/v1",
],
)
def test_kimi_k2_5_omits_temperature_regardless_of_endpoint(self, base_url):
"""Temperature is omitted regardless of which Kimi endpoint is used."""
def test_kimi_k2_5_no_override_regardless_of_endpoint(self, base_url):
"""Temperature is preserved regardless of which Kimi endpoint is used."""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
@@ -912,7 +833,7 @@ class TestKimiTemperatureOmitted:
base_url=base_url,
)
assert "temperature" not in kwargs
assert kwargs["temperature"] == 0.1
# ---------------------------------------------------------------------------
+9 -30
View File
@@ -167,7 +167,7 @@ class TestResolveAutoMainFirst:
class TestResolveVisionMainFirst:
"""Vision auto-detection prefers the main provider first."""
"""Vision auto-detection prefers main provider + main model first."""
def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
"""OpenRouter main with vision-capable model → aux vision uses main model."""
@@ -200,49 +200,28 @@ class TestResolveVisionMainFirst:
assert mock_resolve.call_args.args[0] == "openrouter"
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
def test_nous_main_vision_uses_paid_nous_vision_backend(self):
"""Paid Nous main → aux vision uses the dedicated Nous vision backend."""
def test_nous_main_vision_uses_main_model(self):
"""Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="nous",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="openai/gpt-5",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve, patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(MagicMock(), "google/gemini-3-flash-preview"),
):
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "openai/gpt-5")
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "nous"
assert client is not None
assert model == "google/gemini-3-flash-preview"
def test_nous_main_vision_uses_free_tier_nous_vision_backend(self):
"""Free-tier Nous main → aux vision uses MiMo omni, not the text main model."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="nous",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="xiaomi/mimo-v2-pro",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(MagicMock(), "xiaomi/mimo-v2-omni"),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "nous"
assert client is not None
assert model == "xiaomi/mimo-v2-omni"
assert model == "openai/gpt-5"
def test_exotic_provider_with_vision_override_preserved(self):
"""xiaomi → mimo-v2-omni override still wins over main_model."""
-146
View File
@@ -1,146 +0,0 @@
"""Focused regressions for the Copilot ACP shim safety layer."""
from __future__ import annotations
import io
import json
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from agent.copilot_acp_client import CopilotACPClient
class _FakeProcess:
def __init__(self) -> None:
self.stdin = io.StringIO()
class CopilotACPClientSafetyTests(unittest.TestCase):
def setUp(self) -> None:
self.client = CopilotACPClient(acp_cwd="/tmp")
def _dispatch(self, message: dict, *, cwd: str) -> dict:
process = _FakeProcess()
handled = self.client._handle_server_message(
message,
process=process,
cwd=cwd,
text_parts=[],
reasoning_parts=[],
)
self.assertTrue(handled)
payload = process.stdin.getvalue().strip()
self.assertTrue(payload)
return json.loads(payload)
def test_request_permission_is_not_auto_allowed(self) -> None:
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 1,
"method": "session/request_permission",
"params": {},
},
cwd="/tmp",
)
outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
self.assertEqual(outcome, "cancelled")
def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
home = Path(tmpdir) / "home"
blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
blocked.parent.mkdir(parents=True, exist_ok=True)
blocked.write_text('{"token":"sk-test-secret-1234567890"}')
with patch.dict(
os.environ,
{"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
clear=False,
):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 2,
"method": "fs/read_text_file",
"params": {"path": str(blocked)},
},
cwd=str(home),
)
self.assertIn("error", response)
def test_read_text_file_redacts_sensitive_content(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
secret_file = root / "config.env"
secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 3,
"method": "fs/read_text_file",
"params": {"path": str(secret_file)},
},
cwd=str(root),
)
content = ((response.get("result") or {}).get("content") or "")
self.assertNotIn("abc123def456", content)
self.assertIn("OPENAI_API_KEY=", content)
def test_write_text_file_reuses_write_denylist(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
home = Path(tmpdir) / "home"
target = home / ".ssh" / "id_rsa"
target.parent.mkdir(parents=True, exist_ok=True)
with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 4,
"method": "fs/write_text_file",
"params": {
"path": str(target),
"content": "fake-private-key",
},
},
cwd=str(home),
)
self.assertIn("error", response)
self.assertFalse(target.exists())
def test_write_text_file_respects_safe_root(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
safe_root = root / "workspace"
safe_root.mkdir()
outside = root / "outside.txt"
with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 5,
"method": "fs/write_text_file",
"params": {
"path": str(outside),
"content": "should-not-write",
},
},
cwd=str(root),
)
self.assertIn("error", response)
self.assertFalse(outside.exists())
if __name__ == "__main__":
unittest.main()
@@ -1,27 +0,0 @@
from __future__ import annotations
from run_agent import AIAgent
def _agent_with_base_url(base_url: str) -> AIAgent:
agent = object.__new__(AIAgent)
agent.base_url = base_url
return agent
def test_direct_openai_url_requires_openai_host():
agent = _agent_with_base_url("https://api.openai.com.example/v1")
assert agent._is_direct_openai_url() is False
def test_direct_openai_url_ignores_path_segment_match():
agent = _agent_with_base_url("https://proxy.example.test/api.openai.com/v1")
assert agent._is_direct_openai_url() is False
def test_direct_openai_url_accepts_native_host():
agent = _agent_with_base_url("https://api.openai.com/v1")
assert agent._is_direct_openai_url() is True
+3 -2
View File
@@ -516,12 +516,13 @@ class TestGatewayFormatting:
assert "**" in text # Markdown bold
def test_gateway_format_hides_cost(self, populated_db):
"""Gateway format omits dollar figures and internal cache details."""
engine = InsightsEngine(populated_db)
report = engine.generate(days=30)
text = engine.format_gateway(report)
assert "$" not in text
assert "$" in text
assert "Top Skills" in text
assert "Est. cost" in text
assert "cache" not in text.lower()
def test_gateway_format_shows_models(self, populated_db):
+32
View File
@@ -84,6 +84,38 @@ class TestMinimaxAuxModel:
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
class TestMinimaxModelCatalog:
"""Verify the model catalog matches official Anthropic-compat endpoint models.
Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
"""
def test_catalog_includes_current_models(self):
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M2.7" in models
assert "MiniMax-M2.5" in models
assert "MiniMax-M2.1" in models
assert "MiniMax-M2" in models
def test_catalog_excludes_m1_family(self):
"""M1 models are not available on the /anthropic endpoint."""
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M1" not in models
def test_catalog_excludes_highspeed(self):
"""Highspeed variants are available but not shown in default catalog
(users can still specify them manually)."""
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M2.7-highspeed" not in models
assert "MiniMax-M2.5-highspeed" not in models
class TestMinimaxBetaHeaders:
"""MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta.
@@ -424,68 +424,6 @@ class TestQueryLocalContextLengthLmStudio:
)
class TestDetectLocalServerTypeAuth:
def test_passes_bearer_token_to_probe_requests(self):
from agent.model_metadata import detect_local_server_type
resp = MagicMock()
resp.status_code = 200
client_mock = MagicMock()
client_mock.__enter__ = lambda s: client_mock
client_mock.__exit__ = MagicMock(return_value=False)
client_mock.get.return_value = resp
with patch("httpx.Client", return_value=client_mock) as mock_client:
result = detect_local_server_type("http://localhost:1234/v1", api_key="lm-token")
assert result == "lm-studio"
assert mock_client.call_args.kwargs["headers"] == {
"Authorization": "Bearer lm-token"
}
class TestFetchEndpointModelMetadataLmStudio:
"""fetch_endpoint_model_metadata should use LM Studio's native models endpoint."""
def _make_resp(self, body):
resp = MagicMock()
resp.raise_for_status.return_value = None
resp.json.return_value = body
return resp
def test_uses_native_models_endpoint_only(self):
from agent.model_metadata import fetch_endpoint_model_metadata
native_resp = self._make_resp(
{
"models": [
{
"key": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
"id": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
"max_context_length": 131072,
}
]
}
)
with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \
patch("agent.model_metadata.requests.get", return_value=native_resp) as mock_get:
result = fetch_endpoint_model_metadata(
"http://localhost:1234/v1",
api_key="lm-token",
force_refresh=True,
)
assert mock_get.call_count == 1
assert mock_get.call_args[0][0] == "http://localhost:1234/api/v1/models"
assert mock_get.call_args.kwargs["headers"] == {
"Authorization": "Bearer lm-token"
}
assert result["lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
assert result["Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
class TestQueryLocalContextLengthNetworkError:
"""_query_local_context_length handles network failures gracefully."""
@@ -6,8 +6,6 @@ when proxy env vars or custom endpoint URLs are malformed.
"""
from __future__ import annotations
import os
import pytest
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
@@ -33,12 +31,6 @@ def test_proxy_env_accepts_empty(monkeypatch):
_validate_proxy_env_urls() # should not raise
def test_proxy_env_normalizes_socks_alias(monkeypatch):
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
_validate_proxy_env_urls()
assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
@pytest.mark.parametrize("key", [
"HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
"http_proxy", "https_proxy", "all_proxy",

Some files were not shown because too many files have changed in this diff Show More