Compare commits
78 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f90afa03cc | |||
| c6974fd108 | |||
| c6dba918b3 | |||
| 3eade90b39 | |||
| 34d06a9802 | |||
| 2772d99085 | |||
| ee16416c7b | |||
| 3007174a61 | |||
| 2f0a83dd12 | |||
| 110cdd573a | |||
| 4d1b988070 | |||
| 019c11d07e | |||
| fce23e8024 | |||
| 1ec1f6a68a | |||
| 637ad443bf | |||
| a8b85bb887 | |||
| d9753720f3 | |||
| dbc11abcb6 | |||
| 268ee6bdce | |||
| 173289b64f | |||
| 1a3ae6ac6e | |||
| 78e6b06518 | |||
| b650957b40 | |||
| ad06bfccf0 | |||
| 8dfc96dbbb | |||
| 3c8ec7037c | |||
| 161c2c4da4 | |||
| e22416dd9b | |||
| a94099908a | |||
| 851857e413 | |||
| b408379e9d | |||
| e1b0b135cb | |||
| 1eabbe905e | |||
| b962801f6a | |||
| 5cf4fac2aa | |||
| 894e8c8a8f | |||
| 18140199c3 | |||
| 7120d6cdd6 | |||
| d40264d53b | |||
| c506126123 | |||
| d12f8db0b8 | |||
| 25757d631b | |||
| d97f6cec7f | |||
| 241bd4fc7e | |||
| 30a0fcaec8 | |||
| 5449c01d26 | |||
| 1d8d4f28ae | |||
| e94008c404 | |||
| e7d3e9d767 | |||
| 54db7cbbe1 | |||
| ffeaf6ffae | |||
| 989d4ea43d | |||
| 8567031433 | |||
| af4abd2f22 | |||
| 092061711e | |||
| 980fadfea9 | |||
| ae4a884e8d | |||
| 6e3f7f3610 | |||
| 42e366f27b | |||
| 3baafea380 | |||
| e26393ffc2 | |||
| e19252afc4 | |||
| d684d7ee7e | |||
| 7d26feb9a3 | |||
| 875a72e4c8 | |||
| 20a5e589c6 | |||
| 7156f8d866 | |||
| 8de91ce9d2 | |||
| 8385f54e98 | |||
| 105caa001b | |||
| d46db0a1b4 | |||
| 5f4b93c20f | |||
| 5d2fc6d928 | |||
| 3377017eb4 | |||
| a1213d06bd | |||
| 1631895d5a | |||
| 4f467700d4 | |||
| ff6a86cb52 |
@@ -81,6 +81,14 @@
|
||||
# HF_TOKEN=
|
||||
# OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Qwen OAuth)
|
||||
# =============================================================================
|
||||
# Qwen OAuth reuses your local Qwen CLI login (qwen auth qwen-oauth).
|
||||
# No API key needed — credentials come from ~/.qwen/oauth_creds.json.
|
||||
# Optional base URL override:
|
||||
# HERMES_QWEN_BASE_URL=https://portal.qwen.ai/v1
|
||||
|
||||
# =============================================================================
|
||||
# TOOL API KEYS
|
||||
# =============================================================================
|
||||
|
||||
@@ -8,6 +8,9 @@ on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: docker-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -17,22 +20,29 @@ jobs:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build image
|
||||
# Build amd64 only so we can `load` the image for smoke testing.
|
||||
# `load: true` cannot export a multi-arch manifest to the local daemon.
|
||||
# The multi-arch build follows on push to main / release.
|
||||
- name: Build image (amd64, smoke test)
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/amd64
|
||||
tags: nousresearch/hermes-agent:test
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
@@ -51,26 +61,28 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Push image (main branch)
|
||||
- name: Push multi-arch image (main branch)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
nousresearch/hermes-agent:latest
|
||||
nousresearch/hermes-agent:${{ github.sha }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Push image (release)
|
||||
- name: Push multi-arch image (release)
|
||||
if: github.event_name == 'release'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
nousresearch/hermes-agent:latest
|
||||
nousresearch/hermes-agent:${{ github.event.release.tag_name }}
|
||||
|
||||
@@ -27,8 +27,8 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: python -m pip install ascii-guard pyyaml
|
||||
- name: Install ascii-guard
|
||||
run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
@@ -27,8 +27,8 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: DeterminateSystems/nix-installer-action@main
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@main
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
- name: Check flake
|
||||
if: runner.os == 'Linux'
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
FROM debian:13.4
|
||||
|
||||
# Disable Python stdout buffering to ensure logs are printed immediately
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
|
||||
+6
-2
@@ -2,18 +2,22 @@
|
||||
|
||||
**Release Date:** April 8, 2026
|
||||
|
||||
> The intelligence release — native Google AI Studio provider, live model switching across all platforms, self-optimized GPT/Codex guidance, smart inactivity timeouts, approval buttons, interactive model pickers, MCP OAuth 2.1, and 209 merged PRs with 82 resolved issues.
|
||||
> The intelligence release — background task auto-notifications, free MiMo v2 Pro on Nous Portal, live model switching across all platforms, self-optimized GPT/Codex guidance, native Google AI Studio, smart inactivity timeouts, approval buttons, MCP OAuth 2.1, and 209 merged PRs with 82 resolved issues.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Google AI Studio (Gemini) Native Provider** — Direct access to Gemini models through Google's AI Studio API. Includes automatic models.dev registry integration for real-time context length detection across any provider. ([#5577](https://github.com/NousResearch/hermes-agent/pull/5577))
|
||||
- **Background Process Auto-Notifications (`notify_on_complete`)** — Background tasks can now automatically notify the agent when they finish. Start a long-running process (AI model training, test suites, deployments, builds) and the agent gets notified on completion — no polling needed. The agent can keep working on other things and pick up results when they land. ([#5779](https://github.com/NousResearch/hermes-agent/pull/5779))
|
||||
|
||||
- **Free Xiaomi MiMo v2 Pro on Nous Portal** — Nous Portal now supports the free-tier Xiaomi MiMo v2 Pro model for auxiliary tasks (compression, vision, summarization), with free-tier model gating and pricing display in model selection. ([#6018](https://github.com/NousResearch/hermes-agent/pull/6018), [#5880](https://github.com/NousResearch/hermes-agent/pull/5880))
|
||||
|
||||
- **Live Model Switching (`/model` Command)** — Switch models and providers mid-session from CLI, Telegram, Discord, Slack, or any gateway platform. Aggregator-aware resolution keeps you on OpenRouter/Nous when possible, with automatic cross-provider fallback when needed. Interactive model pickers on Telegram and Discord with inline buttons. ([#5181](https://github.com/NousResearch/hermes-agent/pull/5181), [#5742](https://github.com/NousResearch/hermes-agent/pull/5742))
|
||||
|
||||
- **Self-Optimized GPT/Codex Tool-Use Guidance** — The agent diagnosed and patched 5 failure modes in GPT and Codex tool calling through automated behavioral benchmarking, dramatically improving reliability on OpenAI models. Includes execution discipline guidance and thinking-only prefill continuation for structured reasoning. ([#6120](https://github.com/NousResearch/hermes-agent/pull/6120), [#5414](https://github.com/NousResearch/hermes-agent/pull/5414), [#5931](https://github.com/NousResearch/hermes-agent/pull/5931))
|
||||
|
||||
- **Google AI Studio (Gemini) Native Provider** — Direct access to Gemini models through Google's AI Studio API. Includes automatic models.dev registry integration for real-time context length detection across any provider. ([#5577](https://github.com/NousResearch/hermes-agent/pull/5577))
|
||||
|
||||
- **Inactivity-Based Agent Timeouts** — Gateway and cron timeouts now track actual tool activity instead of wall-clock time. Long-running tasks that are actively working will never be killed — only truly idle agents time out. ([#5389](https://github.com/NousResearch/hermes-agent/pull/5389), [#5440](https://github.com/NousResearch/hermes-agent/pull/5440))
|
||||
|
||||
- **Approval Buttons on Slack & Telegram** — Dangerous command approval via native platform buttons instead of typing `/approve`. Slack gets thread context preservation; Telegram gets emoji reactions for approval status. ([#5890](https://github.com/NousResearch/hermes-agent/pull/5890), [#5975](https://github.com/NousResearch/hermes-agent/pull/5975))
|
||||
|
||||
+76
-20
@@ -163,6 +163,17 @@ def _is_oauth_token(key: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _normalize_base_url_text(base_url) -> str:
|
||||
"""Normalize SDK/base transport URL values to a plain string for inspection.
|
||||
|
||||
Some client objects expose ``base_url`` as an ``httpx.URL`` instead of a raw
|
||||
string. Provider/auth detection should accept either shape.
|
||||
"""
|
||||
if not base_url:
|
||||
return ""
|
||||
return str(base_url).strip()
|
||||
|
||||
|
||||
def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
"""Return True for non-Anthropic endpoints using the Anthropic Messages API.
|
||||
|
||||
@@ -170,9 +181,10 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
|
||||
detection should be skipped for these endpoints.
|
||||
"""
|
||||
if not base_url:
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
return False # No base_url = direct Anthropic API
|
||||
normalized = base_url.rstrip("/").lower()
|
||||
normalized = normalized.rstrip("/").lower()
|
||||
if "anthropic.com" in normalized:
|
||||
return False # Direct Anthropic API — OAuth applies
|
||||
return True # Any other endpoint is a third-party proxy
|
||||
@@ -182,12 +194,13 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
"""Return True for Anthropic-compatible providers that require Bearer auth.
|
||||
|
||||
Some third-party /anthropic endpoints implement Anthropic's Messages API but
|
||||
require Authorization: Bearer instead of Anthropic's native x-api-key header.
|
||||
require Authorization: Bearer *** of Anthropic's native x-api-key header.
|
||||
MiniMax's global and China Anthropic-compatible endpoints follow this pattern.
|
||||
"""
|
||||
if not base_url:
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
normalized = base_url.rstrip("/").lower()
|
||||
normalized = normalized.rstrip("/").lower()
|
||||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||
|
||||
|
||||
@@ -203,13 +216,14 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
||||
)
|
||||
from httpx import Timeout
|
||||
|
||||
normalized_base_url = _normalize_base_url_text(base_url)
|
||||
kwargs = {
|
||||
"timeout": Timeout(timeout=900.0, connect=10.0),
|
||||
}
|
||||
if base_url:
|
||||
kwargs["base_url"] = base_url
|
||||
if normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
|
||||
if _requires_bearer_auth(base_url):
|
||||
if _requires_bearer_auth(normalized_base_url):
|
||||
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
|
||||
# Authorization: Bearer even for regular API keys. Route those endpoints
|
||||
# through auth_token so the SDK sends Bearer auth instead of x-api-key.
|
||||
@@ -942,12 +956,18 @@ def _convert_content_to_anthropic(content: Any) -> Any:
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
@@ -1134,7 +1154,14 @@ def convert_messages_to_anthropic(
|
||||
# orphan stripping, message merging) invalidates the signature,
|
||||
# causing HTTP 400 "Invalid signature in thinking block".
|
||||
#
|
||||
# Strategy (following clawdbot/OpenClaw pattern):
|
||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
# thinking blocks if it supports extended thinking.
|
||||
#
|
||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||||
# EXCEPT the last one — preserves reasoning continuity on the
|
||||
# current tool-use chain while avoiding stale signature errors.
|
||||
@@ -1143,6 +1170,7 @@ def convert_messages_to_anthropic(
|
||||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
|
||||
last_assistant_idx = None
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
@@ -1154,16 +1182,19 @@ def convert_messages_to_anthropic(
|
||||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||||
continue
|
||||
|
||||
if idx != last_assistant_idx:
|
||||
# Strip ALL thinking blocks from non-latest assistant messages
|
||||
if _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
stripped = [
|
||||
b for b in m["content"]
|
||||
if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
|
||||
]
|
||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||
else:
|
||||
# Latest assistant: keep signed thinking blocks for reasoning
|
||||
# continuity; downgrade unsigned ones to plain text.
|
||||
# Latest assistant on direct Anthropic: keep signed thinking
|
||||
# blocks for reasoning continuity; downgrade unsigned ones to
|
||||
# plain text.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
@@ -1203,28 +1234,53 @@ def build_anthropic_kwargs(
|
||||
is_oauth: bool = False,
|
||||
preserve_dots: bool = False,
|
||||
context_length: Optional[int] = None,
|
||||
base_url: str | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create().
|
||||
|
||||
When *max_tokens* is None, the model's native output limit is used
|
||||
(e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). If *context_length*
|
||||
is provided, the effective limit is clamped so it doesn't exceed
|
||||
the context window.
|
||||
Naming note — two distinct concepts, easily confused:
|
||||
max_tokens = OUTPUT token cap for a single response.
|
||||
Anthropic's API calls this "max_tokens" but it only
|
||||
limits the *output*. Anthropic's own native SDK
|
||||
renamed it "max_output_tokens" for clarity.
|
||||
context_length = TOTAL context window (input tokens + output tokens).
|
||||
The API enforces: input_tokens + max_tokens ≤ context_length.
|
||||
Stored on the ContextCompressor; reduced on overflow errors.
|
||||
|
||||
When *max_tokens* is None the model's native output ceiling is used
|
||||
(e.g. 128K for Opus 4.6, 64K for Sonnet 4.6).
|
||||
|
||||
When *context_length* is provided and the model's native output ceiling
|
||||
exceeds it (e.g. a local endpoint with an 8K window), the output cap is
|
||||
clamped to context_length − 1. This only kicks in for unusually small
|
||||
context windows; for full-size models the native output cap is always
|
||||
smaller than the context window so no clamping happens.
|
||||
NOTE: this clamping does not account for prompt size — if the prompt is
|
||||
large, Anthropic may still reject the request. The caller must detect
|
||||
"max_tokens too large given prompt" errors and retry with a smaller cap
|
||||
(see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).
|
||||
|
||||
When *is_oauth* is True, applies Claude Code compatibility transforms:
|
||||
system prompt prefix, tool name prefixing, and prompt sanitization.
|
||||
|
||||
When *preserve_dots* is True, model name dots are not converted to hyphens
|
||||
(for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
|
||||
|
||||
When *base_url* points to a third-party Anthropic-compatible endpoint,
|
||||
thinking block signatures are stripped (they are Anthropic-proprietary).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages)
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
# effective_max_tokens = output cap for this call (≠ total context window)
|
||||
effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
|
||||
|
||||
# Clamp to context window if the user set a lower context_length
|
||||
# (e.g. custom endpoint with limited capacity).
|
||||
# Clamp output cap to fit inside the total context window.
|
||||
# Only matters for small custom endpoints where context_length < native
|
||||
# output ceiling. For standard Anthropic models context_length (e.g.
|
||||
# 200K) is always larger than the output ceiling (e.g. 128K), so this
|
||||
# branch is not taken.
|
||||
if context_length and effective_max_tokens > context_length:
|
||||
effective_max_tokens = max(context_length - 1, 1)
|
||||
|
||||
|
||||
+111
-41
@@ -629,11 +629,19 @@ def _nous_base_url() -> str:
|
||||
|
||||
|
||||
def _read_codex_access_token() -> Optional[str]:
|
||||
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store."""
|
||||
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
|
||||
|
||||
If a credential pool exists but currently has no selectable runtime entry
|
||||
(for example all pool slots are marked exhausted), fall back to the
|
||||
profile's auth.json token instead of hard-failing. This keeps explicit
|
||||
fallback-to-Codex working when the pool state is stale but the stored OAuth
|
||||
token is still valid.
|
||||
"""
|
||||
pool_present, entry = _select_pool_entry("openai-codex")
|
||||
if pool_present:
|
||||
token = _pool_runtime_api_key(entry)
|
||||
return token or None
|
||||
if token:
|
||||
return token
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import _read_codex_tokens
|
||||
@@ -694,7 +702,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
|
||||
extra = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
@@ -713,7 +721,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
||||
extra = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
@@ -834,7 +842,7 @@ def _read_main_provider() -> str:
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = model_cfg.get("provider", "")
|
||||
if isinstance(provider, str) and provider.strip():
|
||||
return _normalize_aux_provider(provider)
|
||||
return provider.strip().lower()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
@@ -894,9 +902,13 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
pool_present, entry = _select_pool_entry("openai-codex")
|
||||
if pool_present:
|
||||
codex_token = _pool_runtime_api_key(entry)
|
||||
if not codex_token:
|
||||
return None, None
|
||||
base_url = _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL) or _CODEX_AUX_BASE_URL
|
||||
if codex_token:
|
||||
base_url = _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL) or _CODEX_AUX_BASE_URL
|
||||
else:
|
||||
codex_token = _read_codex_access_token()
|
||||
if not codex_token:
|
||||
return None, None
|
||||
base_url = _CODEX_AUX_BASE_URL
|
||||
else:
|
||||
codex_token = _read_codex_access_token()
|
||||
if not codex_token:
|
||||
@@ -1035,6 +1047,32 @@ def _is_payment_error(exc: Exception) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _is_connection_error(exc: Exception) -> bool:
|
||||
"""Detect connection/network errors that warrant provider fallback.
|
||||
|
||||
Returns True for errors indicating the provider endpoint is unreachable
|
||||
(DNS failure, connection refused, TLS errors, timeouts). These are
|
||||
distinct from API errors (4xx/5xx) which indicate the provider IS
|
||||
reachable but returned an error.
|
||||
"""
|
||||
from openai import APIConnectionError, APITimeoutError
|
||||
|
||||
if isinstance(exc, (APIConnectionError, APITimeoutError)):
|
||||
return True
|
||||
# urllib3 / httpx / httpcore connection errors
|
||||
err_type = type(exc).__name__
|
||||
if any(kw in err_type for kw in ("Connection", "Timeout", "DNS", "SSL")):
|
||||
return True
|
||||
err_lower = str(exc).lower()
|
||||
if any(kw in err_lower for kw in (
|
||||
"connection refused", "name or service not known",
|
||||
"no route to host", "network is unreachable",
|
||||
"timed out", "connection reset",
|
||||
)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _try_payment_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
@@ -1099,7 +1137,7 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
main_model = _read_main_model()
|
||||
if (main_provider and main_model
|
||||
and main_provider not in _AGGREGATOR_PROVIDERS
|
||||
and main_provider not in ("auto", "custom", "")):
|
||||
and main_provider not in ("auto", "")):
|
||||
client, resolved = resolve_provider_client(main_provider, main_model)
|
||||
if client is not None:
|
||||
logger.info("Auxiliary auto-detect: using main provider %s (%s)",
|
||||
@@ -1157,7 +1195,7 @@ def _to_async_client(sync_client, model: str):
|
||||
|
||||
async_kwargs["default_headers"] = copilot_default_headers()
|
||||
elif "api.kimi.com" in base_lower:
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
@@ -1277,7 +1315,13 @@ def resolve_provider_client(
|
||||
)
|
||||
return None, None
|
||||
final_model = model or _read_main_model() or "gpt-4o-mini"
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
extra = {}
|
||||
if "api.kimi.com" in custom_base.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
elif "api.githubcopilot.com" in custom_base.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
# Try custom first, then codex, then API-key providers
|
||||
@@ -1356,7 +1400,7 @@ def resolve_provider_client(
|
||||
# Provider-specific headers
|
||||
headers = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
headers["User-Agent"] = "KimiCLI/1.0"
|
||||
headers["User-Agent"] = "KimiCLI/1.3"
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
@@ -1470,19 +1514,25 @@ def _preferred_main_vision_provider() -> Optional[str]:
|
||||
def get_available_vision_backends() -> List[str]:
|
||||
"""Return the currently available vision backends in auto-selection order.
|
||||
|
||||
Order: OpenRouter → Nous → active provider. This is the single source
|
||||
of truth for setup, tool gating, and runtime auto-routing of vision tasks.
|
||||
Order: active provider → OpenRouter → Nous → stop. This is the single
|
||||
source of truth for setup, tool gating, and runtime auto-routing of
|
||||
vision tasks.
|
||||
"""
|
||||
available = [p for p in _VISION_AUTO_PROVIDER_ORDER
|
||||
if _strict_vision_backend_available(p)]
|
||||
# Also check the user's active provider (may be DeepSeek, Alibaba, named
|
||||
# custom, etc.) — resolve_provider_client handles all provider types.
|
||||
available: List[str] = []
|
||||
# 1. Active provider — if the user configured a provider, try it first.
|
||||
main_provider = _read_main_provider()
|
||||
if (main_provider and main_provider not in ("auto", "")
|
||||
and main_provider not in available):
|
||||
client, _ = resolve_provider_client(main_provider, _read_main_model())
|
||||
if client is not None:
|
||||
available.append(main_provider)
|
||||
if main_provider and main_provider not in ("auto", ""):
|
||||
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
|
||||
if _strict_vision_backend_available(main_provider):
|
||||
available.append(main_provider)
|
||||
else:
|
||||
client, _ = resolve_provider_client(main_provider, _read_main_model())
|
||||
if client is not None:
|
||||
available.append(main_provider)
|
||||
# 2. OpenRouter, 3. Nous — skip if already covered by main provider.
|
||||
for p in _VISION_AUTO_PROVIDER_ORDER:
|
||||
if p not in available and _strict_vision_backend_available(p):
|
||||
available.append(p)
|
||||
return available
|
||||
|
||||
|
||||
@@ -1529,28 +1579,37 @@ def resolve_vision_provider_client(
|
||||
|
||||
if requested == "auto":
|
||||
# Vision auto-detection order:
|
||||
# 1. OpenRouter (known vision-capable default model)
|
||||
# 2. Nous Portal (known vision-capable default model)
|
||||
# 3. Active provider + model (user's main chat config)
|
||||
# 1. Active provider + model (user's main chat config)
|
||||
# 2. OpenRouter (known vision-capable default model)
|
||||
# 3. Nous Portal (known vision-capable default model)
|
||||
# 4. Stop
|
||||
for candidate in _VISION_AUTO_PROVIDER_ORDER:
|
||||
sync_client, default_model = _resolve_strict_vision_backend(candidate)
|
||||
if sync_client is not None:
|
||||
return _finalize(candidate, sync_client, default_model)
|
||||
|
||||
# Fall back to the user's active provider + model.
|
||||
main_provider = _read_main_provider()
|
||||
main_model = _read_main_model()
|
||||
if main_provider and main_provider not in ("auto", ""):
|
||||
sync_client, resolved_model = resolve_provider_client(
|
||||
main_provider, main_model)
|
||||
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
|
||||
# Known strict backend — use its defaults.
|
||||
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
|
||||
if sync_client is not None:
|
||||
return _finalize(main_provider, sync_client, default_model)
|
||||
else:
|
||||
# Exotic provider (DeepSeek, Alibaba, named custom, etc.)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, main_model)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using active provider %s (%s)",
|
||||
main_provider, rpc_model or main_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, rpc_client, rpc_model or main_model)
|
||||
|
||||
# Fall back through aggregators.
|
||||
for candidate in _VISION_AUTO_PROVIDER_ORDER:
|
||||
if candidate == main_provider:
|
||||
continue # already tried above
|
||||
sync_client, default_model = _resolve_strict_vision_backend(candidate)
|
||||
if sync_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using active provider %s (%s)",
|
||||
main_provider, resolved_model or main_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, sync_client, resolved_model or main_model)
|
||||
return _finalize(candidate, sync_client, default_model)
|
||||
|
||||
logger.debug("Auxiliary vision client: none available")
|
||||
return None, None, None
|
||||
@@ -2066,7 +2125,18 @@ def call_llm(
|
||||
# try alternative providers instead of giving up. This handles the
|
||||
# common case where a user runs out of OpenRouter credits but has
|
||||
# Codex OAuth or another provider available.
|
||||
if _is_payment_error(first_err):
|
||||
#
|
||||
# ── Connection error fallback ────────────────────────────────
|
||||
# When a provider endpoint is unreachable (DNS failure, connection
|
||||
# refused, timeout), try alternative providers. This handles stale
|
||||
# Codex/OAuth tokens that authenticate but whose endpoint is down,
|
||||
# and providers the user never configured that got picked up by
|
||||
# the auto-detection chain.
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
if should_fallback:
|
||||
reason = "payment error" if _is_payment_error(first_err) else "connection error"
|
||||
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task)
|
||||
if fb_client is not None:
|
||||
|
||||
+77
-28
@@ -154,12 +154,15 @@ class ContextCompressor:
|
||||
|
||||
def _prune_old_tool_results(
|
||||
self, messages: List[Dict[str, Any]], protect_tail_count: int,
|
||||
protect_tail_tokens: int | None = None,
|
||||
) -> tuple[List[Dict[str, Any]], int]:
|
||||
"""Replace old tool result contents with a short placeholder.
|
||||
|
||||
Walks backward from the end, protecting the most recent
|
||||
``protect_tail_count`` messages. Older tool results get their
|
||||
content replaced with a placeholder string.
|
||||
Walks backward from the end, protecting the most recent messages that
|
||||
fall within ``protect_tail_tokens`` (when provided) OR the last
|
||||
``protect_tail_count`` messages (backward-compatible default).
|
||||
When both are given, the token budget takes priority and the message
|
||||
count acts as a hard minimum floor.
|
||||
|
||||
Returns (pruned_messages, pruned_count).
|
||||
"""
|
||||
@@ -168,7 +171,29 @@ class ContextCompressor:
|
||||
|
||||
result = [m.copy() for m in messages]
|
||||
pruned = 0
|
||||
prune_boundary = len(result) - protect_tail_count
|
||||
|
||||
# Determine the prune boundary
|
||||
if protect_tail_tokens is not None and protect_tail_tokens > 0:
|
||||
# Token-budget approach: walk backward accumulating tokens
|
||||
accumulated = 0
|
||||
boundary = len(result)
|
||||
min_protect = min(protect_tail_count, len(result) - 1)
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
msg = result[i]
|
||||
content_len = len(msg.get("content") or "")
|
||||
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if isinstance(tc, dict):
|
||||
args = tc.get("function", {}).get("arguments", "")
|
||||
msg_tokens += len(args) // _CHARS_PER_TOKEN
|
||||
if accumulated + msg_tokens > protect_tail_tokens and (len(result) - i) >= min_protect:
|
||||
boundary = i
|
||||
break
|
||||
accumulated += msg_tokens
|
||||
boundary = i
|
||||
prune_boundary = max(boundary, len(result) - min_protect)
|
||||
else:
|
||||
prune_boundary = len(result) - protect_tail_count
|
||||
|
||||
for i in range(prune_boundary):
|
||||
msg = result[i]
|
||||
@@ -199,30 +224,39 @@ class ContextCompressor:
|
||||
budget = int(content_tokens * _SUMMARY_RATIO)
|
||||
return max(_MIN_SUMMARY_TOKENS, min(budget, self.max_summary_tokens))
|
||||
|
||||
# Truncation limits for the summarizer input. These bound how much of
|
||||
# each message the summary model sees — the budget is the *summary*
|
||||
# model's context window, not the main model's.
|
||||
_CONTENT_MAX = 6000 # total chars per message body
|
||||
_CONTENT_HEAD = 4000 # chars kept from the start
|
||||
_CONTENT_TAIL = 1500 # chars kept from the end
|
||||
_TOOL_ARGS_MAX = 1500 # tool call argument chars
|
||||
_TOOL_ARGS_HEAD = 1200 # kept from the start of tool args
|
||||
|
||||
def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
|
||||
"""Serialize conversation turns into labeled text for the summarizer.
|
||||
|
||||
Includes tool call arguments and result content (up to 3000 chars
|
||||
per message) so the summarizer can preserve specific details like
|
||||
file paths, commands, and outputs.
|
||||
Includes tool call arguments and result content (up to
|
||||
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
|
||||
specific details like file paths, commands, and outputs.
|
||||
"""
|
||||
parts = []
|
||||
for msg in turns:
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content") or ""
|
||||
|
||||
# Tool results: keep more content than before (3000 chars)
|
||||
# Tool results: keep enough content for the summarizer
|
||||
if role == "tool":
|
||||
tool_id = msg.get("tool_call_id", "")
|
||||
if len(content) > 3000:
|
||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
||||
if len(content) > self._CONTENT_MAX:
|
||||
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||
parts.append(f"[TOOL RESULT {tool_id}]: {content}")
|
||||
continue
|
||||
|
||||
# Assistant messages: include tool call names AND arguments
|
||||
if role == "assistant":
|
||||
if len(content) > 3000:
|
||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
||||
if len(content) > self._CONTENT_MAX:
|
||||
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||
tool_calls = msg.get("tool_calls", [])
|
||||
if tool_calls:
|
||||
tc_parts = []
|
||||
@@ -232,8 +266,8 @@ class ContextCompressor:
|
||||
name = fn.get("name", "?")
|
||||
args = fn.get("arguments", "")
|
||||
# Truncate long arguments but keep enough for context
|
||||
if len(args) > 500:
|
||||
args = args[:400] + "..."
|
||||
if len(args) > self._TOOL_ARGS_MAX:
|
||||
args = args[:self._TOOL_ARGS_HEAD] + "..."
|
||||
tc_parts.append(f" {name}({args})")
|
||||
else:
|
||||
fn = getattr(tc, "function", None)
|
||||
@@ -244,8 +278,8 @@ class ContextCompressor:
|
||||
continue
|
||||
|
||||
# User and other roles
|
||||
if len(content) > 3000:
|
||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
||||
if len(content) > self._CONTENT_MAX:
|
||||
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||
parts.append(f"[{role.upper()}]: {content}")
|
||||
|
||||
return "\n\n".join(parts)
|
||||
@@ -310,6 +344,9 @@ Update the summary using this exact structure. PRESERVE all existing information
|
||||
## Critical Context
|
||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
||||
|
||||
## Tools & Patterns
|
||||
[Which tools were used, how they were used effectively, and any tool-specific discoveries. Accumulate across compactions.]
|
||||
|
||||
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
|
||||
|
||||
Write only the summary body. Do not include any preamble or prefix."""
|
||||
@@ -348,6 +385,9 @@ Use this exact structure:
|
||||
## Critical Context
|
||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
||||
|
||||
## Tools & Patterns
|
||||
[Which tools were used, how they were used effectively, and any tool-specific discoveries (e.g., preferred flags, working invocations, successful command patterns)]
|
||||
|
||||
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions. The goal is to prevent the next assistant from repeating work or losing important details.
|
||||
|
||||
Write only the summary body. Do not include any preamble or prefix."""
|
||||
@@ -518,13 +558,20 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
derived from ``summary_target_ratio * context_length``, so it
|
||||
scales automatically with the model's context window.
|
||||
|
||||
Never cuts inside a tool_call/result group. Falls back to the old
|
||||
``protect_last_n`` if the budget would protect fewer messages.
|
||||
Token budget is the primary criterion. A hard minimum of 3 messages
|
||||
is always protected, but the budget is allowed to exceed by up to
|
||||
1.5x to avoid cutting inside an oversized message (tool output, file
|
||||
read, etc.). If even the minimum 3 messages exceed 1.5x the budget
|
||||
the cut is placed right after the head so compression still runs.
|
||||
|
||||
Never cuts inside a tool_call/result group.
|
||||
"""
|
||||
if token_budget is None:
|
||||
token_budget = self.tail_token_budget
|
||||
n = len(messages)
|
||||
min_tail = self.protect_last_n
|
||||
# Hard minimum: always keep at least 3 messages in the tail
|
||||
min_tail = min(3, n - head_end - 1) if n - head_end > 1 else 0
|
||||
soft_ceiling = int(token_budget * 1.5)
|
||||
accumulated = 0
|
||||
cut_idx = n # start from beyond the end
|
||||
|
||||
@@ -537,21 +584,21 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
if isinstance(tc, dict):
|
||||
args = tc.get("function", {}).get("arguments", "")
|
||||
msg_tokens += len(args) // _CHARS_PER_TOKEN
|
||||
if accumulated + msg_tokens > token_budget and (n - i) >= min_tail:
|
||||
# Stop once we exceed the soft ceiling (unless we haven't hit min_tail yet)
|
||||
if accumulated + msg_tokens > soft_ceiling and (n - i) >= min_tail:
|
||||
break
|
||||
accumulated += msg_tokens
|
||||
cut_idx = i
|
||||
|
||||
# Ensure we protect at least protect_last_n messages
|
||||
# Ensure we protect at least min_tail messages
|
||||
fallback_cut = n - min_tail
|
||||
if cut_idx > fallback_cut:
|
||||
cut_idx = fallback_cut
|
||||
|
||||
# If the token budget would protect everything (small conversations),
|
||||
# fall back to the fixed protect_last_n approach so compression can
|
||||
# still remove middle turns.
|
||||
# force a cut after the head so compression can still remove middle turns.
|
||||
if cut_idx <= head_end:
|
||||
cut_idx = fallback_cut
|
||||
cut_idx = max(fallback_cut, head_end + 1)
|
||||
|
||||
# Align to avoid splitting tool groups
|
||||
cut_idx = self._align_boundary_backward(messages, cut_idx)
|
||||
@@ -576,12 +623,13 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
up so the API never receives mismatched IDs.
|
||||
"""
|
||||
n_messages = len(messages)
|
||||
if n_messages <= self.protect_first_n + self.protect_last_n + 1:
|
||||
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||
_min_for_compress = self.protect_first_n + 3 + 1
|
||||
if n_messages <= _min_for_compress:
|
||||
if not self.quiet_mode:
|
||||
logger.warning(
|
||||
"Cannot compress: only %d messages (need > %d)",
|
||||
n_messages,
|
||||
self.protect_first_n + self.protect_last_n + 1,
|
||||
n_messages, _min_for_compress,
|
||||
)
|
||||
return messages
|
||||
|
||||
@@ -589,7 +637,8 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
|
||||
# Phase 1: Prune old tool results (cheap, no LLM call)
|
||||
messages, pruned_count = self._prune_old_tool_results(
|
||||
messages, protect_tail_count=self.protect_last_n * 3,
|
||||
messages, protect_tail_count=self.protect_last_n,
|
||||
protect_tail_tokens=self.tail_token_budget,
|
||||
)
|
||||
if pruned_count and not self.quiet_mode:
|
||||
logger.info("Pre-compression: pruned %d old tool result(s)", pruned_count)
|
||||
|
||||
@@ -18,12 +18,14 @@ import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
|
||||
KIMI_CODE_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
_codex_access_token_is_expiring,
|
||||
_decode_jwt_claims,
|
||||
_import_codex_cli_tokens,
|
||||
_load_auth_store,
|
||||
_load_provider_state,
|
||||
_resolve_kimi_base_url,
|
||||
_resolve_zai_base_url,
|
||||
read_credential_pool,
|
||||
write_credential_pool,
|
||||
@@ -64,10 +66,10 @@ SUPPORTED_POOL_STRATEGIES = {
|
||||
}
|
||||
|
||||
# Cooldown before retrying an exhausted credential.
|
||||
# 429 (rate-limited) cools down faster since quotas reset frequently.
|
||||
# 402 (billing/quota) and other codes use a longer default.
|
||||
# 429 (rate-limited) and 402 (billing/quota) both cool down after 1 hour.
|
||||
# Provider-supplied reset_at timestamps override these defaults.
|
||||
EXHAUSTED_TTL_429_SECONDS = 60 * 60 # 1 hour
|
||||
EXHAUSTED_TTL_DEFAULT_SECONDS = 24 * 60 * 60 # 24 hours
|
||||
EXHAUSTED_TTL_DEFAULT_SECONDS = 60 * 60 # 1 hour
|
||||
|
||||
# Pool key prefix for custom OpenAI-compatible endpoints.
|
||||
# Custom endpoints all share provider='custom' but are keyed by their
|
||||
@@ -511,6 +513,13 @@ class CredentialPool:
|
||||
except Exception as wexc:
|
||||
logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
|
||||
elif self.provider == "openai-codex":
|
||||
# Proactively sync from ~/.codex/auth.json before refresh.
|
||||
# The Codex CLI (or another Hermes profile) may have already
|
||||
# consumed our refresh_token. Syncing first avoids a
|
||||
# "refresh_token_reused" error when the CLI has a newer pair.
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
refreshed = auth_mod.refresh_codex_oauth_pure(
|
||||
entry.access_token,
|
||||
entry.refresh_token,
|
||||
@@ -596,6 +605,35 @@ class CredentialPool:
|
||||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For openai-codex: the refresh_token may have been consumed by
|
||||
# the Codex CLI between our proactive sync and the refresh call.
|
||||
# Re-sync and retry once.
|
||||
if self.provider == "openai-codex":
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json")
|
||||
try:
|
||||
refreshed = auth_mod.refresh_codex_oauth_pure(
|
||||
synced.access_token,
|
||||
synced.refresh_token,
|
||||
)
|
||||
updated = replace(
|
||||
synced,
|
||||
access_token=refreshed["access_token"],
|
||||
refresh_token=refreshed["refresh_token"],
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as retry_exc:
|
||||
logger.debug("Codex retry refresh also failed: %s", retry_exc)
|
||||
elif not self._entry_needs_refresh(synced):
|
||||
logger.debug("Codex CLI has valid token, using without refresh")
|
||||
return synced
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
@@ -1084,7 +1122,9 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
active_sources.add(source)
|
||||
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
|
||||
base_url = env_url or pconfig.inference_base_url
|
||||
if provider == "zai":
|
||||
if provider == "kimi-coding":
|
||||
base_url = _resolve_kimi_base_url(token, pconfig.inference_base_url, env_url)
|
||||
elif provider == "zai":
|
||||
base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
|
||||
@@ -0,0 +1,792 @@
|
||||
"""API error classification for smart failover and recovery.
|
||||
|
||||
Provides a structured taxonomy of API errors and a priority-ordered
|
||||
classification pipeline that determines the correct recovery action
|
||||
(retry, rotate credential, fallback to another provider, compress
|
||||
context, or abort).
|
||||
|
||||
Replaces scattered inline string-matching with a centralized classifier
|
||||
that the main retry loop in run_agent.py consults for every API failure.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Error taxonomy ──────────────────────────────────────────────────────
|
||||
|
||||
class FailoverReason(enum.Enum):
|
||||
"""Why an API call failed — determines recovery strategy."""
|
||||
|
||||
# Authentication / authorization
|
||||
auth = "auth" # Transient auth (401/403) — refresh/rotate
|
||||
auth_permanent = "auth_permanent" # Auth failed after refresh — abort
|
||||
|
||||
# Billing / quota
|
||||
billing = "billing" # 402 or confirmed credit exhaustion — rotate immediately
|
||||
rate_limit = "rate_limit" # 429 or quota-based throttling — backoff then rotate
|
||||
|
||||
# Server-side
|
||||
overloaded = "overloaded" # 503/529 — provider overloaded, backoff
|
||||
server_error = "server_error" # 500/502 — internal server error, retry
|
||||
|
||||
# Transport
|
||||
timeout = "timeout" # Connection/read timeout — rebuild client + retry
|
||||
|
||||
# Context / payload
|
||||
context_overflow = "context_overflow" # Context too large — compress, not failover
|
||||
payload_too_large = "payload_too_large" # 413 — compress payload
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
|
||||
|
||||
# Catch-all
|
||||
unknown = "unknown" # Unclassifiable — retry with backoff
|
||||
|
||||
|
||||
# ── Classification result ───────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class ClassifiedError:
|
||||
"""Structured classification of an API error with recovery hints."""
|
||||
|
||||
reason: FailoverReason
|
||||
status_code: Optional[int] = None
|
||||
provider: Optional[str] = None
|
||||
model: Optional[str] = None
|
||||
message: str = ""
|
||||
error_context: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Recovery action hints — the retry loop checks these instead of
|
||||
# re-classifying the error itself.
|
||||
retryable: bool = True
|
||||
should_compress: bool = False
|
||||
should_rotate_credential: bool = False
|
||||
should_fallback: bool = False
|
||||
|
||||
@property
|
||||
def is_auth(self) -> bool:
|
||||
return self.reason in (FailoverReason.auth, FailoverReason.auth_permanent)
|
||||
|
||||
@property
|
||||
def is_transient(self) -> bool:
|
||||
"""Error is expected to resolve on retry (with or without backoff)."""
|
||||
return self.reason in (
|
||||
FailoverReason.rate_limit,
|
||||
FailoverReason.overloaded,
|
||||
FailoverReason.server_error,
|
||||
FailoverReason.timeout,
|
||||
FailoverReason.unknown,
|
||||
)
|
||||
|
||||
|
||||
# ── Provider-specific patterns ──────────────────────────────────────────
|
||||
|
||||
# Patterns that indicate billing exhaustion (not transient rate limit)
|
||||
_BILLING_PATTERNS = [
|
||||
"insufficient credits",
|
||||
"insufficient_quota",
|
||||
"credit balance",
|
||||
"credits have been exhausted",
|
||||
"top up your credits",
|
||||
"payment required",
|
||||
"billing hard limit",
|
||||
"exceeded your current quota",
|
||||
"account is deactivated",
|
||||
"plan does not include",
|
||||
]
|
||||
|
||||
# Patterns that indicate rate limiting (transient, will resolve)
|
||||
_RATE_LIMIT_PATTERNS = [
|
||||
"rate limit",
|
||||
"rate_limit",
|
||||
"too many requests",
|
||||
"throttled",
|
||||
"requests per minute",
|
||||
"tokens per minute",
|
||||
"requests per day",
|
||||
"try again in",
|
||||
"please retry after",
|
||||
"resource_exhausted",
|
||||
]
|
||||
|
||||
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
|
||||
_USAGE_LIMIT_PATTERNS = [
|
||||
"usage limit",
|
||||
"quota",
|
||||
"limit exceeded",
|
||||
"key limit exceeded",
|
||||
]
|
||||
|
||||
# Patterns confirming usage limit is transient (not billing)
|
||||
_USAGE_LIMIT_TRANSIENT_SIGNALS = [
|
||||
"try again",
|
||||
"retry",
|
||||
"resets at",
|
||||
"reset in",
|
||||
"wait",
|
||||
"requests remaining",
|
||||
"periodic",
|
||||
"window",
|
||||
]
|
||||
|
||||
# Payload-too-large patterns detected from message text (no status_code attr).
|
||||
# Proxies and some backends embed the HTTP status in the error message.
|
||||
_PAYLOAD_TOO_LARGE_PATTERNS = [
|
||||
"request entity too large",
|
||||
"payload too large",
|
||||
"error code: 413",
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
"context size",
|
||||
"maximum context",
|
||||
"token limit",
|
||||
"too many tokens",
|
||||
"reduce the length",
|
||||
"exceeds the limit",
|
||||
"context window",
|
||||
"prompt is too long",
|
||||
"prompt exceeds max length",
|
||||
"max_tokens",
|
||||
"maximum number of tokens",
|
||||
# Chinese error messages (some providers return these)
|
||||
"超过最大长度",
|
||||
"上下文长度",
|
||||
]
|
||||
|
||||
# Model not found patterns
|
||||
_MODEL_NOT_FOUND_PATTERNS = [
|
||||
"is not a valid model",
|
||||
"invalid model",
|
||||
"model not found",
|
||||
"model_not_found",
|
||||
"does not exist",
|
||||
"no such model",
|
||||
"unknown model",
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
"invalid_api_key",
|
||||
"authentication",
|
||||
"unauthorized",
|
||||
"forbidden",
|
||||
"invalid token",
|
||||
"token expired",
|
||||
"token revoked",
|
||||
"access denied",
|
||||
]
|
||||
|
||||
# Anthropic thinking block signature patterns
|
||||
_THINKING_SIG_PATTERNS = [
|
||||
"signature", # Combined with "thinking" check
|
||||
]
|
||||
|
||||
# Transport error type names
|
||||
_TRANSPORT_ERROR_TYPES = frozenset({
|
||||
"ReadTimeout", "ConnectTimeout", "PoolTimeout",
|
||||
"ConnectError", "RemoteProtocolError",
|
||||
"ConnectionError", "ConnectionResetError",
|
||||
"ConnectionAbortedError", "BrokenPipeError",
|
||||
"TimeoutError", "ReadError",
|
||||
"ServerDisconnectedError",
|
||||
# OpenAI SDK errors (not subclasses of Python builtins)
|
||||
"APIConnectionError",
|
||||
"APITimeoutError",
|
||||
})
|
||||
|
||||
# Server disconnect patterns (no status code, but transport-level)
|
||||
_SERVER_DISCONNECT_PATTERNS = [
|
||||
"server disconnected",
|
||||
"peer closed connection",
|
||||
"connection reset by peer",
|
||||
"connection was closed",
|
||||
"network connection lost",
|
||||
"unexpected eof",
|
||||
"incomplete chunked read",
|
||||
]
|
||||
|
||||
|
||||
# ── Classification pipeline ─────────────────────────────────────────────
|
||||
|
||||
def classify_api_error(
|
||||
error: Exception,
|
||||
*,
|
||||
provider: str = "",
|
||||
model: str = "",
|
||||
approx_tokens: int = 0,
|
||||
context_length: int = 200000,
|
||||
num_messages: int = 0,
|
||||
) -> ClassifiedError:
|
||||
"""Classify an API error into a structured recovery recommendation.
|
||||
|
||||
Priority-ordered pipeline:
|
||||
1. Special-case provider-specific patterns (thinking sigs, tier gates)
|
||||
2. HTTP status code + message-aware refinement
|
||||
3. Error code classification (from body)
|
||||
4. Message pattern matching (billing vs rate_limit vs context vs auth)
|
||||
5. Transport error heuristics
|
||||
6. Server disconnect + large session → context overflow
|
||||
7. Fallback: unknown (retryable with backoff)
|
||||
|
||||
Args:
|
||||
error: The exception from the API call.
|
||||
provider: Current provider name (e.g. "openrouter", "anthropic").
|
||||
model: Current model slug.
|
||||
approx_tokens: Approximate token count of the current context.
|
||||
context_length: Maximum context length for the current model.
|
||||
|
||||
Returns:
|
||||
ClassifiedError with reason and recovery action hints.
|
||||
"""
|
||||
status_code = _extract_status_code(error)
|
||||
error_type = type(error).__name__
|
||||
body = _extract_error_body(error)
|
||||
error_code = _extract_error_code(body)
|
||||
|
||||
# Build a comprehensive error message string for pattern matching.
|
||||
# str(error) alone may not include the body message (e.g. OpenAI SDK's
|
||||
# APIStatusError.__str__ returns the first arg, not the body). Append
|
||||
# the body message so patterns like "try again" in 402 disambiguation
|
||||
# are detected even when only present in the structured body.
|
||||
#
|
||||
# Also extract metadata.raw — OpenRouter wraps upstream provider errors
|
||||
# inside {"error": {"message": "Provider returned error", "metadata":
|
||||
# {"raw": "<actual error JSON>"}}} and the real error message (e.g.
|
||||
# "context length exceeded") is only in the inner JSON.
|
||||
_raw_msg = str(error).lower()
|
||||
_body_msg = ""
|
||||
_metadata_msg = ""
|
||||
if isinstance(body, dict):
|
||||
_err_obj = body.get("error", {})
|
||||
if isinstance(_err_obj, dict):
|
||||
_body_msg = (_err_obj.get("message") or "").lower()
|
||||
# Parse metadata.raw for wrapped provider errors
|
||||
_metadata = _err_obj.get("metadata", {})
|
||||
if isinstance(_metadata, dict):
|
||||
_raw_json = _metadata.get("raw") or ""
|
||||
if isinstance(_raw_json, str) and _raw_json.strip():
|
||||
try:
|
||||
import json
|
||||
_inner = json.loads(_raw_json)
|
||||
if isinstance(_inner, dict):
|
||||
_inner_err = _inner.get("error", {})
|
||||
if isinstance(_inner_err, dict):
|
||||
_metadata_msg = (_inner_err.get("message") or "").lower()
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
if not _body_msg:
|
||||
_body_msg = (body.get("message") or "").lower()
|
||||
# Combine all message sources for pattern matching
|
||||
parts = [_raw_msg]
|
||||
if _body_msg and _body_msg not in _raw_msg:
|
||||
parts.append(_body_msg)
|
||||
if _metadata_msg and _metadata_msg not in _raw_msg and _metadata_msg not in _body_msg:
|
||||
parts.append(_metadata_msg)
|
||||
error_msg = " ".join(parts)
|
||||
provider_lower = (provider or "").strip().lower()
|
||||
model_lower = (model or "").strip().lower()
|
||||
|
||||
def _result(reason: FailoverReason, **overrides) -> ClassifiedError:
|
||||
defaults = {
|
||||
"reason": reason,
|
||||
"status_code": status_code,
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"message": _extract_message(error, body),
|
||||
}
|
||||
defaults.update(overrides)
|
||||
return ClassifiedError(**defaults)
|
||||
|
||||
# ── 1. Provider-specific patterns (highest priority) ────────────
|
||||
|
||||
# Anthropic thinking block signature invalid (400).
|
||||
# Don't gate on provider — OpenRouter proxies Anthropic errors, so the
|
||||
# provider may be "openrouter" even though the error is Anthropic-specific.
|
||||
# The message pattern ("signature" + "thinking") is unique enough.
|
||||
if (
|
||||
status_code == 400
|
||||
and "signature" in error_msg
|
||||
and "thinking" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.thinking_signature,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# Anthropic long-context tier gate (429 "extra usage" + "long context")
|
||||
if (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.long_context_tier,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
classified = _classify_by_status(
|
||||
status_code, error_msg, error_code, body,
|
||||
provider=provider_lower, model=model_lower,
|
||||
approx_tokens=approx_tokens, context_length=context_length,
|
||||
num_messages=num_messages,
|
||||
result_fn=_result,
|
||||
)
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 3. Error code classification ────────────────────────────────
|
||||
|
||||
if error_code:
|
||||
classified = _classify_by_error_code(error_code, error_msg, _result)
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 4. Message pattern matching (no status code) ────────────────
|
||||
|
||||
classified = _classify_by_message(
|
||||
error_msg, error_type,
|
||||
approx_tokens=approx_tokens,
|
||||
context_length=context_length,
|
||||
result_fn=_result,
|
||||
)
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 5. Server disconnect + large session → context overflow ─────
|
||||
# Must come BEFORE generic transport error catch — a disconnect on
|
||||
# a large session is more likely context overflow than a transient
|
||||
# transport hiccup. Without this ordering, RemoteProtocolError
|
||||
# always maps to timeout regardless of session size.
|
||||
|
||||
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
|
||||
if is_disconnect and not status_code:
|
||||
is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
|
||||
if is_large:
|
||||
return _result(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 6. Transport / timeout heuristics ───────────────────────────
|
||||
|
||||
if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 7. Fallback: unknown ────────────────────────────────────────
|
||||
|
||||
return _result(FailoverReason.unknown, retryable=True)
|
||||
|
||||
|
||||
# ── Status code classification ──────────────────────────────────────────
|
||||
|
||||
def _classify_by_status(
|
||||
status_code: int,
|
||||
error_msg: str,
|
||||
error_code: str,
|
||||
body: dict,
|
||||
*,
|
||||
provider: str,
|
||||
model: str,
|
||||
approx_tokens: int,
|
||||
context_length: int,
|
||||
num_messages: int = 0,
|
||||
result_fn,
|
||||
) -> Optional[ClassifiedError]:
|
||||
"""Classify based on HTTP status code with message-aware refinement."""
|
||||
|
||||
if status_code == 401:
|
||||
# Not retryable on its own — credential pool rotation and
|
||||
# provider-specific refresh (Codex, Anthropic, Nous) run before
|
||||
# the retryability check in run_agent.py. If those succeed, the
|
||||
# loop `continue`s. If they fail, retryable=False ensures we
|
||||
# hit the client-error abort path (which tries fallback first).
|
||||
return result_fn(
|
||||
FailoverReason.auth,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 403:
|
||||
# OpenRouter 403 "key limit exceeded" is actually billing
|
||||
if "key limit exceeded" in error_msg or "spending limit" in error_msg:
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
return result_fn(
|
||||
FailoverReason.auth,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 402:
|
||||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
# Generic 404 — could be model or endpoint
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 413:
|
||||
return result_fn(
|
||||
FailoverReason.payload_too_large,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
if status_code == 429:
|
||||
# Already checked long_context_tier above; this is a normal rate limit
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 400:
|
||||
return _classify_400(
|
||||
error_msg, error_code, body,
|
||||
provider=provider, model=model,
|
||||
approx_tokens=approx_tokens,
|
||||
context_length=context_length,
|
||||
num_messages=num_messages,
|
||||
result_fn=result_fn,
|
||||
)
|
||||
|
||||
if status_code in (500, 502):
|
||||
return result_fn(FailoverReason.server_error, retryable=True)
|
||||
|
||||
if status_code in (503, 529):
|
||||
return result_fn(FailoverReason.overloaded, retryable=True)
|
||||
|
||||
# Other 4xx — non-retryable
|
||||
if 400 <= status_code < 500:
|
||||
return result_fn(
|
||||
FailoverReason.format_error,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Other 5xx — retryable
|
||||
if 500 <= status_code < 600:
|
||||
return result_fn(FailoverReason.server_error, retryable=True)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _classify_402(error_msg: str, result_fn) -> ClassifiedError:
|
||||
"""Disambiguate 402: billing exhaustion vs transient usage limit.
|
||||
|
||||
The key insight from OpenClaw: some 402s are transient rate limits
|
||||
disguised as payment errors. "Usage limit, try again in 5 minutes"
|
||||
is NOT a billing problem — it's a periodic quota that resets.
|
||||
"""
|
||||
# Check for transient usage-limit signals first
|
||||
has_usage_limit = any(p in error_msg for p in _USAGE_LIMIT_PATTERNS)
|
||||
has_transient_signal = any(p in error_msg for p in _USAGE_LIMIT_TRANSIENT_SIGNALS)
|
||||
|
||||
if has_usage_limit and has_transient_signal:
|
||||
# Transient quota — treat as rate limit, not billing
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Confirmed billing exhaustion
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
|
||||
def _classify_400(
|
||||
error_msg: str,
|
||||
error_code: str,
|
||||
body: dict,
|
||||
*,
|
||||
provider: str,
|
||||
model: str,
|
||||
approx_tokens: int,
|
||||
context_length: int,
|
||||
num_messages: int = 0,
|
||||
result_fn,
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Context overflow from 400
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Some providers return rate limit / billing errors as 400 instead of 429/402.
|
||||
# Check these patterns before falling through to format_error.
|
||||
if any(p in error_msg for p in _RATE_LIMIT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
if any(p in error_msg for p in _BILLING_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Generic 400 + large session → probable context overflow
|
||||
# Anthropic sometimes returns a bare "Error" message when context is too large
|
||||
err_body_msg = ""
|
||||
if isinstance(body, dict):
|
||||
err_obj = body.get("error", {})
|
||||
if isinstance(err_obj, dict):
|
||||
err_body_msg = (err_obj.get("message") or "").strip().lower()
|
||||
# Responses API (and some providers) use flat body: {"message": "..."}
|
||||
if not err_body_msg:
|
||||
err_body_msg = (body.get("message") or "").strip().lower()
|
||||
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
|
||||
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
|
||||
|
||||
if is_generic and is_large:
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Non-retryable format error
|
||||
return result_fn(
|
||||
FailoverReason.format_error,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
|
||||
# ── Error code classification ───────────────────────────────────────────
|
||||
|
||||
def _classify_by_error_code(
|
||||
error_code: str, error_msg: str, result_fn,
|
||||
) -> Optional[ClassifiedError]:
|
||||
"""Classify by structured error codes from the response body."""
|
||||
code_lower = error_code.lower()
|
||||
|
||||
if code_lower in ("resource_exhausted", "throttled", "rate_limit_exceeded"):
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
)
|
||||
|
||||
if code_lower in ("insufficient_quota", "billing_not_active", "payment_required"):
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if code_lower in ("model_not_found", "model_not_available", "invalid_model"):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if code_lower in ("context_length_exceeded", "max_tokens_exceeded"):
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ── Message pattern classification ──────────────────────────────────────
|
||||
|
||||
def _classify_by_message(
|
||||
error_msg: str,
|
||||
error_type: str,
|
||||
*,
|
||||
approx_tokens: int,
|
||||
context_length: int,
|
||||
result_fn,
|
||||
) -> Optional[ClassifiedError]:
|
||||
"""Classify based on error message patterns when no status code is available."""
|
||||
|
||||
# Payload-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _PAYLOAD_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.payload_too_large,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Billing patterns
|
||||
if any(p in error_msg for p in _BILLING_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Rate limit patterns
|
||||
if any(p in error_msg for p in _RATE_LIMIT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Context overflow patterns
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Auth patterns
|
||||
if any(p in error_msg for p in _AUTH_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.auth,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ── Helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
def _extract_status_code(error: Exception) -> Optional[int]:
|
||||
"""Walk the error and its cause chain to find an HTTP status code."""
|
||||
current = error
|
||||
for _ in range(5): # Max depth to prevent infinite loops
|
||||
code = getattr(current, "status_code", None)
|
||||
if isinstance(code, int):
|
||||
return code
|
||||
# Some SDKs use .status instead of .status_code
|
||||
code = getattr(current, "status", None)
|
||||
if isinstance(code, int) and 100 <= code < 600:
|
||||
return code
|
||||
# Walk cause chain
|
||||
cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
|
||||
if cause is None or cause is current:
|
||||
break
|
||||
current = cause
|
||||
return None
|
||||
|
||||
|
||||
def _extract_error_body(error: Exception) -> dict:
|
||||
"""Extract the structured error body from an SDK exception."""
|
||||
body = getattr(error, "body", None)
|
||||
if isinstance(body, dict):
|
||||
return body
|
||||
# Some errors have .response.json()
|
||||
response = getattr(error, "response", None)
|
||||
if response is not None:
|
||||
try:
|
||||
json_body = response.json()
|
||||
if isinstance(json_body, dict):
|
||||
return json_body
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def _extract_error_code(body: dict) -> str:
|
||||
"""Extract an error code string from the response body."""
|
||||
if not body:
|
||||
return ""
|
||||
error_obj = body.get("error", {})
|
||||
if isinstance(error_obj, dict):
|
||||
code = error_obj.get("code") or error_obj.get("type") or ""
|
||||
if isinstance(code, str) and code.strip():
|
||||
return code.strip()
|
||||
# Top-level code
|
||||
code = body.get("code") or body.get("error_code") or ""
|
||||
if isinstance(code, (str, int)):
|
||||
return str(code).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_message(error: Exception, body: dict) -> str:
|
||||
"""Extract the most informative error message."""
|
||||
# Try structured body first
|
||||
if body:
|
||||
error_obj = body.get("error", {})
|
||||
if isinstance(error_obj, dict):
|
||||
msg = error_obj.get("message", "")
|
||||
if isinstance(msg, str) and msg.strip():
|
||||
return msg.strip()[:500]
|
||||
msg = body.get("message", "")
|
||||
if isinstance(msg, str) and msg.strip():
|
||||
return msg.strip()[:500]
|
||||
# Fallback to str(error)
|
||||
return str(error)[:500]
|
||||
+48
-1
@@ -26,12 +26,14 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||
"github-models", "kimi", "moonshot", "claude", "deep-seek",
|
||||
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"qwen-portal",
|
||||
})
|
||||
|
||||
|
||||
@@ -134,7 +136,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"deepseek-ai/DeepSeek-V3.2": 65536,
|
||||
"moonshotai/Kimi-K2.5": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"minimaxai/minimax-m2.5": 1048576,
|
||||
"MiniMaxAI/MiniMax-M2.5": 1048576,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 32768,
|
||||
"mimo-v2-pro": 1048576,
|
||||
"mimo-v2-omni": 1048576,
|
||||
@@ -187,6 +189,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"api.minimax": "minimax",
|
||||
"dashscope.aliyuncs.com": "alibaba",
|
||||
"dashscope-intl.aliyuncs.com": "alibaba",
|
||||
"portal.qwen.ai": "qwen-oauth",
|
||||
"openrouter.ai": "openrouter",
|
||||
"generativelanguage.googleapis.com": "gemini",
|
||||
"inference-api.nousresearch.com": "nous",
|
||||
@@ -194,6 +197,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"api.githubcopilot.com": "copilot",
|
||||
"models.github.ai": "copilot",
|
||||
"api.fireworks.ai": "fireworks",
|
||||
"opencode.ai": "opencode-go",
|
||||
}
|
||||
|
||||
|
||||
@@ -599,6 +603,49 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
|
||||
"""Detect an "output cap too large" error and return how many output tokens are available.
|
||||
|
||||
Background — two distinct context errors exist:
|
||||
1. "Prompt too long" — the INPUT itself exceeds the context window.
|
||||
Fix: compress history and/or halve context_length.
|
||||
2. "max_tokens too large" — input is fine, but input + requested_output > window.
|
||||
Fix: reduce max_tokens (the output cap) for this call.
|
||||
Do NOT touch context_length — the window hasn't shrunk.
|
||||
|
||||
Anthropic's API returns errors like:
|
||||
"max_tokens: 32768 > context_window: 200000 - input_tokens: 190000 = available_tokens: 10000"
|
||||
|
||||
Returns the number of output tokens that would fit (e.g. 10000 above), or None if
|
||||
the error does not look like a max_tokens-too-large error.
|
||||
"""
|
||||
error_lower = error_msg.lower()
|
||||
|
||||
# Must look like an output-cap error, not a prompt-length error.
|
||||
is_output_cap_error = (
|
||||
"max_tokens" in error_lower
|
||||
and ("available_tokens" in error_lower or "available tokens" in error_lower)
|
||||
)
|
||||
if not is_output_cap_error:
|
||||
return None
|
||||
|
||||
# Extract the available_tokens figure.
|
||||
# Anthropic format: "… = available_tokens: 10000"
|
||||
patterns = [
|
||||
r'available_tokens[:\s]+(\d+)',
|
||||
r'available\s+tokens[:\s]+(\d+)',
|
||||
# fallback: last number after "=" in expressions like "200000 - 190000 = 10000"
|
||||
r'=\s*(\d+)\s*$',
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, error_lower)
|
||||
if match:
|
||||
tokens = int(match.group(1))
|
||||
if tokens >= 1:
|
||||
return tokens
|
||||
return None
|
||||
|
||||
|
||||
def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
|
||||
"""Return True if *candidate_id* (from server) matches *lookup_model* (configured).
|
||||
|
||||
|
||||
@@ -153,6 +153,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"minimax-cn": "minimax-cn",
|
||||
"deepseek": "deepseek",
|
||||
"alibaba": "alibaba",
|
||||
"qwen-oauth": "alibaba",
|
||||
"copilot": "github-copilot",
|
||||
"ai-gateway": "vercel",
|
||||
"opencode-zen": "opencode",
|
||||
|
||||
@@ -349,6 +349,13 @@ PLATFORM_HINTS = {
|
||||
"only — no markdown, no formatting. SMS messages are limited to ~1600 "
|
||||
"characters, so be brief and direct."
|
||||
),
|
||||
"bluebubbles": (
|
||||
"You are chatting via iMessage (BlueBubbles). iMessage does not render "
|
||||
"markdown formatting — use plain text. Keep responses concise as they "
|
||||
"appear as text messages. You can send media files natively: include "
|
||||
"MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
|
||||
".heic) appear as photos and other files arrive as attachments."
|
||||
),
|
||||
}
|
||||
|
||||
CONTEXT_FILE_MAX_CHARS = 20_000
|
||||
|
||||
@@ -0,0 +1,242 @@
|
||||
"""Rate limit tracking for inference API responses.
|
||||
|
||||
Captures x-ratelimit-* headers from provider responses and provides
|
||||
formatted display for the /usage slash command. Currently supports
|
||||
the Nous Portal header format (also used by OpenRouter and OpenAI-compatible
|
||||
APIs that follow the same convention).
|
||||
|
||||
Header schema (12 headers total):
|
||||
x-ratelimit-limit-requests RPM cap
|
||||
x-ratelimit-limit-requests-1h RPH cap
|
||||
x-ratelimit-limit-tokens TPM cap
|
||||
x-ratelimit-limit-tokens-1h TPH cap
|
||||
x-ratelimit-remaining-requests requests left in minute window
|
||||
x-ratelimit-remaining-requests-1h requests left in hour window
|
||||
x-ratelimit-remaining-tokens tokens left in minute window
|
||||
x-ratelimit-remaining-tokens-1h tokens left in hour window
|
||||
x-ratelimit-reset-requests seconds until minute request window resets
|
||||
x-ratelimit-reset-requests-1h seconds until hour request window resets
|
||||
x-ratelimit-reset-tokens seconds until minute token window resets
|
||||
x-ratelimit-reset-tokens-1h seconds until hour token window resets
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Mapping, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class RateLimitBucket:
|
||||
"""One rate-limit window (e.g. requests per minute)."""
|
||||
|
||||
limit: int = 0
|
||||
remaining: int = 0
|
||||
reset_seconds: float = 0.0
|
||||
captured_at: float = 0.0 # time.time() when this was captured
|
||||
|
||||
@property
|
||||
def used(self) -> int:
|
||||
return max(0, self.limit - self.remaining)
|
||||
|
||||
@property
|
||||
def usage_pct(self) -> float:
|
||||
if self.limit <= 0:
|
||||
return 0.0
|
||||
return (self.used / self.limit) * 100.0
|
||||
|
||||
@property
|
||||
def remaining_seconds_now(self) -> float:
|
||||
"""Estimated seconds remaining until reset, adjusted for elapsed time."""
|
||||
elapsed = time.time() - self.captured_at
|
||||
return max(0.0, self.reset_seconds - elapsed)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RateLimitState:
|
||||
"""Full rate-limit state parsed from response headers."""
|
||||
|
||||
requests_min: RateLimitBucket = field(default_factory=RateLimitBucket)
|
||||
requests_hour: RateLimitBucket = field(default_factory=RateLimitBucket)
|
||||
tokens_min: RateLimitBucket = field(default_factory=RateLimitBucket)
|
||||
tokens_hour: RateLimitBucket = field(default_factory=RateLimitBucket)
|
||||
captured_at: float = 0.0 # when the headers were captured
|
||||
provider: str = ""
|
||||
|
||||
@property
|
||||
def has_data(self) -> bool:
|
||||
return self.captured_at > 0
|
||||
|
||||
@property
|
||||
def age_seconds(self) -> float:
|
||||
if not self.has_data:
|
||||
return float("inf")
|
||||
return time.time() - self.captured_at
|
||||
|
||||
|
||||
def _safe_int(value: Any, default: int = 0) -> int:
|
||||
try:
|
||||
return int(float(value))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _safe_float(value: Any, default: float = 0.0) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def parse_rate_limit_headers(
|
||||
headers: Mapping[str, str],
|
||||
provider: str = "",
|
||||
) -> Optional[RateLimitState]:
|
||||
"""Parse x-ratelimit-* headers into a RateLimitState.
|
||||
|
||||
Returns None if no rate limit headers are present.
|
||||
"""
|
||||
# Quick check: at least one rate limit header must exist
|
||||
has_any = any(k.lower().startswith("x-ratelimit-") for k in headers)
|
||||
if not has_any:
|
||||
return None
|
||||
|
||||
now = time.time()
|
||||
|
||||
def _bucket(resource: str, suffix: str = "") -> RateLimitBucket:
|
||||
# e.g. resource="requests", suffix="" -> per-minute
|
||||
# resource="tokens", suffix="-1h" -> per-hour
|
||||
tag = f"{resource}{suffix}"
|
||||
return RateLimitBucket(
|
||||
limit=_safe_int(headers.get(f"x-ratelimit-limit-{tag}")),
|
||||
remaining=_safe_int(headers.get(f"x-ratelimit-remaining-{tag}")),
|
||||
reset_seconds=_safe_float(headers.get(f"x-ratelimit-reset-{tag}")),
|
||||
captured_at=now,
|
||||
)
|
||||
|
||||
return RateLimitState(
|
||||
requests_min=_bucket("requests"),
|
||||
requests_hour=_bucket("requests", "-1h"),
|
||||
tokens_min=_bucket("tokens"),
|
||||
tokens_hour=_bucket("tokens", "-1h"),
|
||||
captured_at=now,
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
|
||||
# ── Formatting ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _fmt_count(n: int) -> str:
|
||||
"""Human-friendly number: 7999856 -> '8.0M', 33599 -> '33.6K', 799 -> '799'."""
|
||||
if n >= 1_000_000:
|
||||
return f"{n / 1_000_000:.1f}M"
|
||||
if n >= 10_000:
|
||||
return f"{n / 1_000:.1f}K"
|
||||
if n >= 1_000:
|
||||
return f"{n / 1_000:.1f}K"
|
||||
return str(n)
|
||||
|
||||
|
||||
def _fmt_seconds(seconds: float) -> str:
|
||||
"""Seconds -> human-friendly duration: '58s', '2m 14s', '58m 57s', '1h 2m'."""
|
||||
s = max(0, int(seconds))
|
||||
if s < 60:
|
||||
return f"{s}s"
|
||||
if s < 3600:
|
||||
m, sec = divmod(s, 60)
|
||||
return f"{m}m {sec}s" if sec else f"{m}m"
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
def _bar(pct: float, width: int = 20) -> str:
|
||||
"""ASCII progress bar: [████████░░░░░░░░░░░░] 40%."""
|
||||
filled = int(pct / 100.0 * width)
|
||||
filled = max(0, min(width, filled))
|
||||
empty = width - filled
|
||||
return f"[{'█' * filled}{'░' * empty}]"
|
||||
|
||||
|
||||
def _bucket_line(label: str, bucket: RateLimitBucket, label_width: int = 14) -> str:
|
||||
"""Format one bucket as a single line."""
|
||||
if bucket.limit <= 0:
|
||||
return f" {label:<{label_width}} (no data)"
|
||||
|
||||
pct = bucket.usage_pct
|
||||
used = _fmt_count(bucket.used)
|
||||
limit = _fmt_count(bucket.limit)
|
||||
remaining = _fmt_count(bucket.remaining)
|
||||
reset = _fmt_seconds(bucket.remaining_seconds_now)
|
||||
|
||||
bar = _bar(pct)
|
||||
return f" {label:<{label_width}} {bar} {pct:5.1f}% {used}/{limit} used ({remaining} left, resets in {reset})"
|
||||
|
||||
|
||||
def format_rate_limit_display(state: RateLimitState) -> str:
|
||||
"""Format rate limit state for terminal/chat display."""
|
||||
if not state.has_data:
|
||||
return "No rate limit data yet — make an API request first."
|
||||
|
||||
age = state.age_seconds
|
||||
if age < 5:
|
||||
freshness = "just now"
|
||||
elif age < 60:
|
||||
freshness = f"{int(age)}s ago"
|
||||
else:
|
||||
freshness = f"{_fmt_seconds(age)} ago"
|
||||
|
||||
provider_label = state.provider.title() if state.provider else "Provider"
|
||||
|
||||
lines = [
|
||||
f"{provider_label} Rate Limits (captured {freshness}):",
|
||||
"",
|
||||
_bucket_line("Requests/min", state.requests_min),
|
||||
_bucket_line("Requests/hr", state.requests_hour),
|
||||
"",
|
||||
_bucket_line("Tokens/min", state.tokens_min),
|
||||
_bucket_line("Tokens/hr", state.tokens_hour),
|
||||
]
|
||||
|
||||
# Add warnings if any bucket is getting hot
|
||||
warnings = []
|
||||
for label, bucket in [
|
||||
("requests/min", state.requests_min),
|
||||
("requests/hr", state.requests_hour),
|
||||
("tokens/min", state.tokens_min),
|
||||
("tokens/hr", state.tokens_hour),
|
||||
]:
|
||||
if bucket.limit > 0 and bucket.usage_pct >= 80:
|
||||
reset = _fmt_seconds(bucket.remaining_seconds_now)
|
||||
warnings.append(f" ⚠ {label} at {bucket.usage_pct:.0f}% — resets in {reset}")
|
||||
|
||||
if warnings:
|
||||
lines.append("")
|
||||
lines.extend(warnings)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_rate_limit_compact(state: RateLimitState) -> str:
|
||||
"""One-line compact summary for status bars / gateway messages."""
|
||||
if not state.has_data:
|
||||
return "No rate limit data."
|
||||
|
||||
rm = state.requests_min
|
||||
tm = state.tokens_min
|
||||
rh = state.requests_hour
|
||||
th = state.tokens_hour
|
||||
|
||||
parts = []
|
||||
if rm.limit > 0:
|
||||
parts.append(f"RPM: {rm.remaining}/{rm.limit}")
|
||||
if rh.limit > 0:
|
||||
parts.append(f"RPH: {_fmt_count(rh.remaining)}/{_fmt_count(rh.limit)} (resets {_fmt_seconds(rh.remaining_seconds_now)})")
|
||||
if tm.limit > 0:
|
||||
parts.append(f"TPM: {_fmt_count(tm.remaining)}/{_fmt_count(tm.limit)}")
|
||||
if th.limit > 0:
|
||||
parts.append(f"TPH: {_fmt_count(th.remaining)}/{_fmt_count(th.limit)} (resets {_fmt_seconds(th.remaining_seconds_now)})")
|
||||
|
||||
return " | ".join(parts)
|
||||
@@ -159,7 +159,10 @@ class SubdirectoryHintTracker:
|
||||
|
||||
def _is_valid_subdir(self, path: Path) -> bool:
|
||||
"""Check if path is a valid directory to scan for hints."""
|
||||
if not path.is_dir():
|
||||
try:
|
||||
if not path.is_dir():
|
||||
return False
|
||||
except OSError:
|
||||
return False
|
||||
if path in self._loaded_dirs:
|
||||
return False
|
||||
@@ -172,7 +175,10 @@ class SubdirectoryHintTracker:
|
||||
found_hints = []
|
||||
for filename in _HINT_FILENAMES:
|
||||
hint_path = directory / filename
|
||||
if not hint_path.is_file():
|
||||
try:
|
||||
if not hint_path.is_file():
|
||||
continue
|
||||
except OSError:
|
||||
continue
|
||||
try:
|
||||
content = hint_path.read_text(encoding="utf-8").strip()
|
||||
|
||||
+42
-3
@@ -48,6 +48,25 @@ model:
|
||||
# api_key: "your-key-here" # Uncomment to set here instead of .env
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
|
||||
# ── Token limits — two settings, easy to confuse ──────────────────────────
|
||||
#
|
||||
# context_length: TOTAL context window (input + output tokens combined).
|
||||
# Controls when Hermes compresses history and validates requests.
|
||||
# Leave unset — Hermes auto-detects the correct value from the provider.
|
||||
# Set manually only when auto-detection is wrong (e.g. a local server with
|
||||
# a custom num_ctx, or a proxy that doesn't expose /v1/models).
|
||||
#
|
||||
# context_length: 131072
|
||||
#
|
||||
# max_tokens: OUTPUT cap — maximum tokens the model may generate per response.
|
||||
# Unrelated to how long your conversation history can be.
|
||||
# The OpenAI-standard name "max_tokens" is a misnomer; Anthropic's native
|
||||
# API has since renamed it "max_output_tokens" for clarity.
|
||||
# Leave unset to use the model's native output ceiling (recommended).
|
||||
# Set only if you want to deliberately limit individual response length.
|
||||
#
|
||||
# max_tokens: 8192
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Provider Routing (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
@@ -117,7 +136,8 @@ terminal:
|
||||
timeout: 180
|
||||
docker_mount_cwd_to_workspace: false # SECURITY: off by default. Opt in to mount the launch cwd into Docker /workspace.
|
||||
lifetime_seconds: 300
|
||||
# sudo_password: "" # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext!
|
||||
# sudo_password: "hunter2" # Optional: pipe a sudo password via sudo -S. SECURITY WARNING: plaintext.
|
||||
# sudo_password: "" # Explicit empty password: try empty and never open the interactive sudo prompt.
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# OPTION 2: SSH remote execution
|
||||
@@ -208,13 +228,18 @@ terminal:
|
||||
#
|
||||
# SECURITY WARNING: Password stored in plaintext!
|
||||
#
|
||||
# INTERACTIVE PROMPT: If no sudo_password is set and the CLI is running,
|
||||
# INTERACTIVE PROMPT: If sudo_password is unset and the CLI is running,
|
||||
# you'll be prompted to enter your password when sudo is needed:
|
||||
# - 45-second timeout (auto-skips if no input)
|
||||
# - Press Enter to skip (command fails gracefully)
|
||||
# - Password is hidden while typing
|
||||
# - Password is cached for the session
|
||||
#
|
||||
# EMPTY PASSWORDS: Setting sudo_password to an explicit empty string is different
|
||||
# from leaving it unset. Hermes will try an empty password via `sudo -S` and
|
||||
# will not open the interactive prompt. This is useful for passwordless sudo,
|
||||
# Touch ID sudo setups, and environments where prompting is just noise.
|
||||
#
|
||||
# ALTERNATIVES:
|
||||
# - SSH backend: Configure passwordless sudo on the remote server
|
||||
# - Containers: Run as root inside the container (no sudo needed)
|
||||
@@ -445,6 +470,16 @@ agent:
|
||||
# Higher = more room for complex tasks, but costs more tokens
|
||||
# Recommended: 20-30 for focused tasks, 50-100 for open exploration
|
||||
max_turns: 60
|
||||
|
||||
# Inactivity timeout for gateway agent runs (seconds, 0 = unlimited).
|
||||
# The agent can run indefinitely when actively calling tools or receiving
|
||||
# API responses. Only fires after the agent has been idle for this duration.
|
||||
# gateway_timeout: 1800
|
||||
|
||||
# Staged warning: send a warning before escalating to full timeout.
|
||||
# Fires once per run when inactivity reaches this threshold (seconds).
|
||||
# Set to 0 to disable the warning.
|
||||
# gateway_timeout_warning: 900
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
@@ -644,10 +679,14 @@ platform_toolsets:
|
||||
# Voice Transcription (Speech-to-Text)
|
||||
# =============================================================================
|
||||
# Automatically transcribe voice messages on messaging platforms.
|
||||
# Requires OPENAI_API_KEY in .env (uses OpenAI Whisper API directly).
|
||||
# Providers: local (free, faster-whisper) | groq (free tier) | openai (Whisper API) | mistral (Voxtral Transcribe)
|
||||
# Set the corresponding API key in .env: GROQ_API_KEY, OPENAI_API_KEY, or MISTRAL_API_KEY.
|
||||
stt:
|
||||
enabled: true
|
||||
# provider: "local" # auto-detected if omitted
|
||||
model: "whisper-1" # whisper-1 (cheapest) | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
||||
# mistral:
|
||||
# model: "voxtral-mini-latest" # voxtral-mini-latest | voxtral-mini-2602
|
||||
|
||||
# =============================================================================
|
||||
# Response Pacing (Messaging Platforms)
|
||||
|
||||
@@ -1546,6 +1546,7 @@ class HermesCLI:
|
||||
self._clarify_deadline = 0
|
||||
self._sudo_state = None
|
||||
self._sudo_deadline = 0
|
||||
self._modal_input_snapshot = None
|
||||
self._approval_state = None
|
||||
self._approval_deadline = 0
|
||||
self._approval_lock = threading.Lock()
|
||||
@@ -1602,7 +1603,12 @@ class HermesCLI:
|
||||
return f"[{('█' * filled) + ('░' * max(0, width - filled))}]"
|
||||
|
||||
def _get_status_bar_snapshot(self) -> Dict[str, Any]:
|
||||
model_name = self.model or "unknown"
|
||||
# Prefer the agent's model name — it updates on fallback.
|
||||
# self.model reflects the originally configured model and never
|
||||
# changes mid-session, so the TUI would show a stale name after
|
||||
# _try_activate_fallback() switches provider/model.
|
||||
agent = getattr(self, "agent", None)
|
||||
model_name = (getattr(agent, "model", None) or self.model or "unknown")
|
||||
model_short = model_name.split("/")[-1] if "/" in model_name else model_name
|
||||
if model_short.endswith(".gguf"):
|
||||
model_short = model_short[:-5]
|
||||
@@ -1628,7 +1634,6 @@ class HermesCLI:
|
||||
"compressions": 0,
|
||||
}
|
||||
|
||||
agent = getattr(self, "agent", None)
|
||||
if not agent:
|
||||
return snapshot
|
||||
|
||||
@@ -4003,59 +4008,7 @@ class HermesCLI:
|
||||
|
||||
print(" To change model or provider, use: hermes model")
|
||||
|
||||
def _handle_prompt_command(self, cmd: str):
|
||||
"""Handle the /prompt command to view or set system prompt."""
|
||||
parts = cmd.split(maxsplit=1)
|
||||
|
||||
if len(parts) > 1:
|
||||
# Set new prompt
|
||||
new_prompt = parts[1].strip()
|
||||
|
||||
if new_prompt.lower() == "clear":
|
||||
self.system_prompt = ""
|
||||
self.agent = None # Force re-init
|
||||
if save_config_value("agent.system_prompt", ""):
|
||||
print("(^_^)b System prompt cleared (saved to config)")
|
||||
else:
|
||||
print("(^_^) System prompt cleared (session only)")
|
||||
else:
|
||||
self.system_prompt = new_prompt
|
||||
self.agent = None # Force re-init
|
||||
if save_config_value("agent.system_prompt", new_prompt):
|
||||
print("(^_^)b System prompt set (saved to config)")
|
||||
else:
|
||||
print("(^_^) System prompt set (session only)")
|
||||
print(f" \"{new_prompt[:60]}{'...' if len(new_prompt) > 60 else ''}\"")
|
||||
else:
|
||||
# Show current prompt
|
||||
print()
|
||||
print("+" + "-" * 50 + "+")
|
||||
print("|" + " " * 15 + "(^_^) System Prompt" + " " * 15 + "|")
|
||||
print("+" + "-" * 50 + "+")
|
||||
print()
|
||||
if self.system_prompt:
|
||||
# Word wrap the prompt for display
|
||||
words = self.system_prompt.split()
|
||||
lines = []
|
||||
current_line = ""
|
||||
for word in words:
|
||||
if len(current_line) + len(word) + 1 <= 50:
|
||||
current_line += (" " if current_line else "") + word
|
||||
else:
|
||||
lines.append(current_line)
|
||||
current_line = word
|
||||
if current_line:
|
||||
lines.append(current_line)
|
||||
for line in lines:
|
||||
print(f" {line}")
|
||||
else:
|
||||
print(" (no custom prompt set - using default)")
|
||||
print()
|
||||
print(" Usage:")
|
||||
print(" /prompt <text> - Set a custom system prompt")
|
||||
print(" /prompt clear - Remove custom prompt")
|
||||
print(" /personality - Use a predefined personality")
|
||||
print()
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
@@ -4555,9 +4508,7 @@ class HermesCLI:
|
||||
self._handle_model_switch(cmd_original)
|
||||
elif canonical == "provider":
|
||||
self._show_model_and_providers()
|
||||
elif canonical == "prompt":
|
||||
# Use original case so prompt text isn't lowercased
|
||||
self._handle_prompt_command(cmd_original)
|
||||
|
||||
elif canonical == "personality":
|
||||
# Use original case (handler lowercases the personality name itself)
|
||||
self._handle_personality_command(cmd_original)
|
||||
@@ -4668,13 +4619,13 @@ class HermesCLI:
|
||||
if output:
|
||||
self.console.print(_rich_text_from_ansi(output))
|
||||
else:
|
||||
ChatConsole().print("[dim]Command returned no output[/]")
|
||||
self.console.print("[dim]Command returned no output[/]")
|
||||
except subprocess.TimeoutExpired:
|
||||
ChatConsole().print("[bold red]Quick command timed out (30s)[/]")
|
||||
self.console.print("[bold red]Quick command timed out (30s)[/]")
|
||||
except Exception as e:
|
||||
ChatConsole().print(f"[bold red]Quick command error: {e}[/]")
|
||||
self.console.print(f"[bold red]Quick command error: {e}[/]")
|
||||
else:
|
||||
ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
|
||||
self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
|
||||
elif qcmd.get("type") == "alias":
|
||||
target = qcmd.get("target", "").strip()
|
||||
if target:
|
||||
@@ -4683,9 +4634,9 @@ class HermesCLI:
|
||||
aliased_command = f"{target} {user_args}".strip()
|
||||
return self.process_command(aliased_command)
|
||||
else:
|
||||
ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
|
||||
self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
|
||||
else:
|
||||
ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
|
||||
self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
|
||||
# Check for plugin-registered slash commands
|
||||
elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
|
||||
from hermes_cli.plugins import get_plugin_command_handler
|
||||
@@ -5408,12 +5359,27 @@ class HermesCLI:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
|
||||
def _show_usage(self):
|
||||
"""Show cumulative token usage for the current session."""
|
||||
"""Show rate limits (if available) and session token usage."""
|
||||
if not self.agent:
|
||||
print("(._.) No active agent -- send a message first.")
|
||||
return
|
||||
|
||||
agent = self.agent
|
||||
calls = agent.session_api_calls
|
||||
|
||||
if calls == 0:
|
||||
print("(._.) No API calls made yet in this session.")
|
||||
return
|
||||
|
||||
# ── Rate limits (shown first when available) ────────────────
|
||||
rl_state = agent.get_rate_limit_state()
|
||||
if rl_state and rl_state.has_data:
|
||||
from agent.rate_limit_tracker import format_rate_limit_display
|
||||
print()
|
||||
print(format_rate_limit_display(rl_state))
|
||||
print()
|
||||
|
||||
# ── Session token usage ─────────────────────────────────────
|
||||
input_tokens = getattr(agent, "session_input_tokens", 0) or 0
|
||||
output_tokens = getattr(agent, "session_output_tokens", 0) or 0
|
||||
cache_read_tokens = getattr(agent, "session_cache_read_tokens", 0) or 0
|
||||
@@ -5421,13 +5387,7 @@ class HermesCLI:
|
||||
prompt = agent.session_prompt_tokens
|
||||
completion = agent.session_completion_tokens
|
||||
total = agent.session_total_tokens
|
||||
calls = agent.session_api_calls
|
||||
|
||||
if calls == 0:
|
||||
print("(._.) No API calls made yet in this session.")
|
||||
return
|
||||
|
||||
# Current context window state
|
||||
compressor = agent.context_compressor
|
||||
last_prompt = compressor.last_prompt_tokens
|
||||
ctx_len = compressor.context_length
|
||||
@@ -6205,6 +6165,7 @@ class HermesCLI:
|
||||
timeout = 45
|
||||
response_queue = queue.Queue()
|
||||
|
||||
self._capture_modal_input_snapshot()
|
||||
self._sudo_state = {
|
||||
"response_queue": response_queue,
|
||||
}
|
||||
@@ -6217,6 +6178,7 @@ class HermesCLI:
|
||||
result = response_queue.get(timeout=1)
|
||||
self._sudo_state = None
|
||||
self._sudo_deadline = 0
|
||||
self._restore_modal_input_snapshot()
|
||||
self._invalidate()
|
||||
if result:
|
||||
_cprint(f"\n{_DIM} ✓ Password received (cached for session){_RST}")
|
||||
@@ -6231,6 +6193,7 @@ class HermesCLI:
|
||||
|
||||
self._sudo_state = None
|
||||
self._sudo_deadline = 0
|
||||
self._restore_modal_input_snapshot()
|
||||
self._invalidate()
|
||||
_cprint(f"\n{_DIM} ⏱ Timeout — continuing without sudo{_RST}")
|
||||
return ""
|
||||
@@ -6403,6 +6366,33 @@ class HermesCLI:
|
||||
def _secret_capture_callback(self, var_name: str, prompt: str, metadata=None) -> dict:
|
||||
return prompt_for_secret(self, var_name, prompt, metadata)
|
||||
|
||||
def _capture_modal_input_snapshot(self) -> None:
|
||||
"""Temporarily clear the input buffer and save the user's in-progress draft."""
|
||||
if self._modal_input_snapshot is not None or not getattr(self, "_app", None):
|
||||
return
|
||||
try:
|
||||
buf = self._app.current_buffer
|
||||
self._modal_input_snapshot = {
|
||||
"text": buf.text,
|
||||
"cursor_position": buf.cursor_position,
|
||||
}
|
||||
buf.reset()
|
||||
except Exception:
|
||||
self._modal_input_snapshot = None
|
||||
|
||||
def _restore_modal_input_snapshot(self) -> None:
|
||||
"""Restore any draft text that was present before a modal prompt opened."""
|
||||
snapshot = self._modal_input_snapshot
|
||||
self._modal_input_snapshot = None
|
||||
if not snapshot or not getattr(self, "_app", None):
|
||||
return
|
||||
try:
|
||||
buf = self._app.current_buffer
|
||||
buf.text = snapshot.get("text", "")
|
||||
buf.cursor_position = min(snapshot.get("cursor_position", 0), len(buf.text))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _submit_secret_response(self, value: str) -> None:
|
||||
if not self._secret_state:
|
||||
return
|
||||
@@ -7130,6 +7120,7 @@ class HermesCLI:
|
||||
# Sudo password prompt state (similar mechanism to clarify)
|
||||
self._sudo_state = None # dict with response_queue when active
|
||||
self._sudo_deadline = 0
|
||||
self._modal_input_snapshot = None
|
||||
|
||||
# Dangerous command approval state (similar mechanism to clarify)
|
||||
self._approval_state = None # dict with command, description, choices, selected, response_queue
|
||||
@@ -7201,7 +7192,6 @@ class HermesCLI:
|
||||
text = event.app.current_buffer.text
|
||||
self._sudo_state["response_queue"].put(text)
|
||||
self._sudo_state = None
|
||||
event.app.current_buffer.reset()
|
||||
event.app.invalidate()
|
||||
return
|
||||
|
||||
@@ -7406,7 +7396,6 @@ class HermesCLI:
|
||||
if self._sudo_state:
|
||||
self._sudo_state["response_queue"].put("")
|
||||
self._sudo_state = None
|
||||
event.app.current_buffer.reset()
|
||||
event.app.invalidate()
|
||||
return
|
||||
|
||||
|
||||
+3
-2
@@ -44,7 +44,7 @@ logger = logging.getLogger(__name__)
|
||||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
"telegram", "discord", "slack", "whatsapp", "signal",
|
||||
"matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
|
||||
"wecom", "sms", "email", "webhook",
|
||||
"wecom", "sms", "email", "webhook", "bluebubbles",
|
||||
})
|
||||
|
||||
from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
|
||||
@@ -91,7 +91,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
|
||||
}
|
||||
# Origin missing (e.g. job created via API/script) — try each
|
||||
# platform's home channel as a fallback instead of silently dropping.
|
||||
for platform_name in ("matrix", "telegram", "discord", "slack"):
|
||||
for platform_name in ("matrix", "telegram", "discord", "slack", "bluebubbles"):
|
||||
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
|
||||
if chat_id:
|
||||
logger.info(
|
||||
@@ -236,6 +236,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
"wecom": Platform.WECOM,
|
||||
"email": Platform.EMAIL,
|
||||
"sms": Platform.SMS,
|
||||
"bluebubbles": Platform.BLUEBUBBLES,
|
||||
}
|
||||
platform = platform_map.get(platform_name.lower())
|
||||
if not platform:
|
||||
|
||||
Generated
+4
-4
@@ -22,16 +22,16 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1751274312,
|
||||
"narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=",
|
||||
"lastModified": 1775036866,
|
||||
"narHash": "sha256-ZojAnPuCdy657PbTq5V0Y+AHKhZAIwSIT2cb8UgAz/U=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674",
|
||||
"rev": "6201e203d09599479a3b3450ed24fa81537ebc4e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-24.11",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
description = "Hermes Agent - AI agent framework by Nous Research";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
flake-parts = {
|
||||
url = "github:hercules-ci/flake-parts";
|
||||
inputs.nixpkgs-lib.follows = "nixpkgs";
|
||||
|
||||
@@ -77,7 +77,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
|
||||
|
||||
# Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
|
||||
for plat_name in ("telegram", "whatsapp", "signal", "email", "sms"):
|
||||
for plat_name in ("telegram", "whatsapp", "signal", "email", "sms", "bluebubbles"):
|
||||
if plat_name not in platforms:
|
||||
platforms[plat_name] = _build_from_sessions(plat_name)
|
||||
|
||||
|
||||
@@ -63,6 +63,7 @@ class Platform(Enum):
|
||||
WEBHOOK = "webhook"
|
||||
FEISHU = "feishu"
|
||||
WECOM = "wecom"
|
||||
BLUEBUBBLES = "bluebubbles"
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -287,6 +288,9 @@ class GatewayConfig:
|
||||
# WeCom uses extra dict for bot credentials
|
||||
elif platform == Platform.WECOM and config.extra.get("bot_id"):
|
||||
connected.append(platform)
|
||||
# BlueBubbles uses extra dict for local server config
|
||||
elif platform == Platform.BLUEBUBBLES and config.extra.get("server_url") and config.extra.get("password"):
|
||||
connected.append(platform)
|
||||
return connected
|
||||
|
||||
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
|
||||
@@ -712,6 +716,13 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
name=os.getenv("DISCORD_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# Reply threading mode for Discord (off/first/all)
|
||||
discord_reply_mode = os.getenv("DISCORD_REPLY_TO_MODE", "").lower()
|
||||
if discord_reply_mode in ("off", "first", "all"):
|
||||
if Platform.DISCORD not in config.platforms:
|
||||
config.platforms[Platform.DISCORD] = PlatformConfig()
|
||||
config.platforms[Platform.DISCORD].reply_to_mode = discord_reply_mode
|
||||
|
||||
# WhatsApp (typically uses different auth mechanism)
|
||||
whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in ("true", "1", "yes")
|
||||
if whatsapp_enabled:
|
||||
@@ -941,6 +952,29 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
name=os.getenv("WECOM_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# BlueBubbles (iMessage)
|
||||
bluebubbles_server_url = os.getenv("BLUEBUBBLES_SERVER_URL")
|
||||
bluebubbles_password = os.getenv("BLUEBUBBLES_PASSWORD")
|
||||
if bluebubbles_server_url and bluebubbles_password:
|
||||
if Platform.BLUEBUBBLES not in config.platforms:
|
||||
config.platforms[Platform.BLUEBUBBLES] = PlatformConfig()
|
||||
config.platforms[Platform.BLUEBUBBLES].enabled = True
|
||||
config.platforms[Platform.BLUEBUBBLES].extra.update({
|
||||
"server_url": bluebubbles_server_url.rstrip("/"),
|
||||
"password": bluebubbles_password,
|
||||
"webhook_host": os.getenv("BLUEBUBBLES_WEBHOOK_HOST", "127.0.0.1"),
|
||||
"webhook_port": int(os.getenv("BLUEBUBBLES_WEBHOOK_PORT", "8645")),
|
||||
"webhook_path": os.getenv("BLUEBUBBLES_WEBHOOK_PATH", "/bluebubbles-webhook"),
|
||||
"send_read_receipts": os.getenv("BLUEBUBBLES_SEND_READ_RECEIPTS", "true").lower() in ("true", "1", "yes"),
|
||||
})
|
||||
bluebubbles_home = os.getenv("BLUEBUBBLES_HOME_CHANNEL")
|
||||
if bluebubbles_home and Platform.BLUEBUBBLES in config.platforms:
|
||||
config.platforms[Platform.BLUEBUBBLES].home_channel = HomeChannel(
|
||||
platform=Platform.BLUEBUBBLES,
|
||||
chat_id=bluebubbles_home,
|
||||
name=os.getenv("BLUEBUBBLES_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# Session settings
|
||||
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
|
||||
if idle_minutes:
|
||||
|
||||
@@ -298,6 +298,7 @@ SUPPORTED_DOCUMENT_TYPES = {
|
||||
".pdf": "application/pdf",
|
||||
".md": "text/markdown",
|
||||
".txt": "text/plain",
|
||||
".log": "text/plain",
|
||||
".zip": "application/zip",
|
||||
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
@@ -407,6 +408,10 @@ class MessageEvent:
|
||||
# Auto-loaded skill for topic/channel bindings (e.g., Telegram DM Topics)
|
||||
auto_skill: Optional[str] = None
|
||||
|
||||
# Internal flag — set for synthetic events (e.g. background process
|
||||
# completion notifications) that must bypass user authorization checks.
|
||||
internal: bool = False
|
||||
|
||||
# Timestamps
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
|
||||
@@ -0,0 +1,828 @@
|
||||
"""BlueBubbles iMessage platform adapter.
|
||||
|
||||
Uses the local BlueBubbles macOS server for outbound REST sends and inbound
|
||||
webhooks. Supports text messaging, media attachments (images, voice, video,
|
||||
documents), tapback reactions, typing indicators, and read receipts.
|
||||
|
||||
Architecture based on PR #5869 (benjaminsehl) with inbound attachment
|
||||
downloading from PR #4588 (YuhangLin).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
import httpx
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
cache_document_from_bytes,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_WEBHOOK_HOST = "127.0.0.1"
|
||||
DEFAULT_WEBHOOK_PORT = 8645
|
||||
DEFAULT_WEBHOOK_PATH = "/bluebubbles-webhook"
|
||||
MAX_TEXT_LENGTH = 4000
|
||||
|
||||
# Tapback reaction codes (BlueBubbles associatedMessageType values)
|
||||
_TAPBACK_ADDED = {
|
||||
2000: "love", 2001: "like", 2002: "dislike",
|
||||
2003: "laugh", 2004: "emphasize", 2005: "question",
|
||||
}
|
||||
_TAPBACK_REMOVED = {
|
||||
3000: "love", 3001: "like", 3002: "dislike",
|
||||
3003: "laugh", 3004: "emphasize", 3005: "question",
|
||||
}
|
||||
|
||||
# Webhook event types that carry user messages
|
||||
_MESSAGE_EVENTS = {"new-message", "message", "updated-message"}
|
||||
|
||||
# Log redaction patterns
|
||||
_PHONE_RE = re.compile(r"\+?\d{7,15}")
|
||||
_EMAIL_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.]+")
|
||||
|
||||
|
||||
def _redact(text: str) -> str:
|
||||
"""Redact phone numbers and emails from log output."""
|
||||
text = _PHONE_RE.sub("[REDACTED]", text)
|
||||
text = _EMAIL_RE.sub("[REDACTED]", text)
|
||||
return text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_bluebubbles_requirements() -> bool:
|
||||
try:
|
||||
import aiohttp # noqa: F401
|
||||
import httpx as _httpx # noqa: F401
|
||||
except ImportError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _normalize_server_url(raw: str) -> str:
|
||||
value = (raw or "").strip()
|
||||
if not value:
|
||||
return ""
|
||||
if not re.match(r"^https?://", value, flags=re.I):
|
||||
value = f"http://{value}"
|
||||
return value.rstrip("/")
|
||||
|
||||
|
||||
def _strip_markdown(text: str) -> str:
|
||||
"""Strip common markdown formatting for iMessage plain-text delivery."""
|
||||
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text, flags=re.DOTALL)
|
||||
text = re.sub(r"\*(.+?)\*", r"\1", text, flags=re.DOTALL)
|
||||
text = re.sub(r"__(.+?)__", r"\1", text, flags=re.DOTALL)
|
||||
text = re.sub(r"_(.+?)_", r"\1", text, flags=re.DOTALL)
|
||||
text = re.sub(r"```[a-zA-Z0-9_+-]*\n?", "", text)
|
||||
text = re.sub(r"`(.+?)`", r"\1", text)
|
||||
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
|
||||
text = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"\1", text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
platform = Platform.BLUEBUBBLES
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.BLUEBUBBLES)
|
||||
extra = config.extra or {}
|
||||
self.server_url = _normalize_server_url(
|
||||
extra.get("server_url") or os.getenv("BLUEBUBBLES_SERVER_URL", "")
|
||||
)
|
||||
self.password = extra.get("password") or os.getenv("BLUEBUBBLES_PASSWORD", "")
|
||||
self.webhook_host = (
|
||||
extra.get("webhook_host")
|
||||
or os.getenv("BLUEBUBBLES_WEBHOOK_HOST", DEFAULT_WEBHOOK_HOST)
|
||||
)
|
||||
self.webhook_port = int(
|
||||
extra.get("webhook_port")
|
||||
or os.getenv("BLUEBUBBLES_WEBHOOK_PORT", str(DEFAULT_WEBHOOK_PORT))
|
||||
)
|
||||
self.webhook_path = (
|
||||
extra.get("webhook_path")
|
||||
or os.getenv("BLUEBUBBLES_WEBHOOK_PATH", DEFAULT_WEBHOOK_PATH)
|
||||
)
|
||||
if not str(self.webhook_path).startswith("/"):
|
||||
self.webhook_path = f"/{self.webhook_path}"
|
||||
self.send_read_receipts = bool(extra.get("send_read_receipts", True))
|
||||
self.client: Optional[httpx.AsyncClient] = None
|
||||
self._runner = None
|
||||
self._private_api_enabled: Optional[bool] = None
|
||||
self._helper_connected: bool = False
|
||||
self._guid_cache: Dict[str, str] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _api_url(self, path: str) -> str:
|
||||
sep = "&" if "?" in path else "?"
|
||||
return f"{self.server_url}{path}{sep}password={quote(self.password, safe='')}"
|
||||
|
||||
async def _api_get(self, path: str) -> Dict[str, Any]:
|
||||
assert self.client is not None
|
||||
res = await self.client.get(self._api_url(path))
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
async def _api_post(self, path: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
assert self.client is not None
|
||||
res = await self.client.post(self._api_url(path), json=payload)
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def connect(self) -> bool:
|
||||
if not self.server_url or not self.password:
|
||||
logger.error(
|
||||
"[bluebubbles] BLUEBUBBLES_SERVER_URL and BLUEBUBBLES_PASSWORD are required"
|
||||
)
|
||||
return False
|
||||
from aiohttp import web
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=30.0)
|
||||
try:
|
||||
await self._api_get("/api/v1/ping")
|
||||
info = await self._api_get("/api/v1/server/info")
|
||||
server_data = (info or {}).get("data", {})
|
||||
self._private_api_enabled = bool(server_data.get("private_api"))
|
||||
self._helper_connected = bool(server_data.get("helper_connected"))
|
||||
logger.info(
|
||||
"[bluebubbles] connected to %s (private_api=%s, helper=%s)",
|
||||
self.server_url,
|
||||
self._private_api_enabled,
|
||||
self._helper_connected,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"[bluebubbles] cannot reach server at %s: %s", self.server_url, exc
|
||||
)
|
||||
if self.client:
|
||||
await self.client.aclose()
|
||||
self.client = None
|
||||
return False
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_get("/health", lambda _: web.Response(text="ok"))
|
||||
app.router.add_post(self.webhook_path, self._handle_webhook)
|
||||
self._runner = web.AppRunner(app)
|
||||
await self._runner.setup()
|
||||
site = web.TCPSite(self._runner, self.webhook_host, self.webhook_port)
|
||||
await site.start()
|
||||
self._mark_connected()
|
||||
logger.info(
|
||||
"[bluebubbles] webhook listening on http://%s:%s%s",
|
||||
self.webhook_host,
|
||||
self.webhook_port,
|
||||
self.webhook_path,
|
||||
)
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
if self.client:
|
||||
await self.client.aclose()
|
||||
self.client = None
|
||||
if self._runner:
|
||||
await self._runner.cleanup()
|
||||
self._runner = None
|
||||
self._mark_disconnected()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat GUID resolution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _resolve_chat_guid(self, target: str) -> Optional[str]:
|
||||
"""Resolve an email/phone to a BlueBubbles chat GUID.
|
||||
|
||||
If *target* already contains a semicolon (raw GUID format like
|
||||
``iMessage;-;user@example.com``), it is returned as-is. Otherwise
|
||||
the adapter queries the BlueBubbles chat list and matches on
|
||||
``chatIdentifier`` or participant address.
|
||||
"""
|
||||
target = (target or "").strip()
|
||||
if not target:
|
||||
return None
|
||||
# Already a raw GUID
|
||||
if ";" in target:
|
||||
return target
|
||||
if target in self._guid_cache:
|
||||
return self._guid_cache[target]
|
||||
try:
|
||||
payload = await self._api_post(
|
||||
"/api/v1/chat/query",
|
||||
{"limit": 100, "offset": 0, "with": ["participants"]},
|
||||
)
|
||||
for chat in payload.get("data", []) or []:
|
||||
guid = chat.get("guid") or chat.get("chatGuid")
|
||||
identifier = chat.get("chatIdentifier") or chat.get("identifier")
|
||||
if identifier == target:
|
||||
if guid:
|
||||
self._guid_cache[target] = guid
|
||||
return guid
|
||||
for part in chat.get("participants", []) or []:
|
||||
if (part.get("address") or "").strip() == target and guid:
|
||||
self._guid_cache[target] = guid
|
||||
return guid
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
async def _create_chat_for_handle(
|
||||
self, address: str, message: str
|
||||
) -> SendResult:
|
||||
"""Create a new chat by sending the first message to *address*."""
|
||||
payload = {
|
||||
"addresses": [address],
|
||||
"message": message,
|
||||
"tempGuid": f"temp-{datetime.utcnow().timestamp()}",
|
||||
}
|
||||
try:
|
||||
res = await self._api_post("/api/v1/chat/new", payload)
|
||||
data = res.get("data") or {}
|
||||
msg_id = data.get("guid") or data.get("messageGuid") or "ok"
|
||||
return SendResult(success=True, message_id=str(msg_id), raw_response=res)
|
||||
except Exception as exc:
|
||||
return SendResult(success=False, error=str(exc))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
text = _strip_markdown(content or "")
|
||||
if not text:
|
||||
return SendResult(success=False, error="BlueBubbles send requires text")
|
||||
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
|
||||
last = SendResult(success=True)
|
||||
for chunk in chunks:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if not guid:
|
||||
# If the target looks like an address, try creating a new chat
|
||||
if self._private_api_enabled and (
|
||||
"@" in chat_id or re.match(r"^\+\d+", chat_id)
|
||||
):
|
||||
return await self._create_chat_for_handle(chat_id, chunk)
|
||||
return SendResult(
|
||||
success=False,
|
||||
error=f"BlueBubbles chat not found for target: {chat_id}",
|
||||
)
|
||||
payload: Dict[str, Any] = {
|
||||
"chatGuid": guid,
|
||||
"tempGuid": f"temp-{datetime.utcnow().timestamp()}",
|
||||
"message": chunk,
|
||||
}
|
||||
if reply_to and self._private_api_enabled and self._helper_connected:
|
||||
payload["method"] = "private-api"
|
||||
payload["selectedMessageGuid"] = reply_to
|
||||
payload["partIndex"] = 0
|
||||
try:
|
||||
res = await self._api_post("/api/v1/message/text", payload)
|
||||
data = res.get("data") or {}
|
||||
msg_id = data.get("guid") or data.get("messageGuid") or "ok"
|
||||
last = SendResult(
|
||||
success=True, message_id=str(msg_id), raw_response=res
|
||||
)
|
||||
except Exception as exc:
|
||||
return SendResult(success=False, error=str(exc))
|
||||
return last
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Media sending (outbound)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _send_attachment(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
filename: Optional[str] = None,
|
||||
caption: Optional[str] = None,
|
||||
is_audio_message: bool = False,
|
||||
) -> SendResult:
|
||||
"""Send a file attachment via BlueBubbles multipart upload."""
|
||||
if not self.client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
if not os.path.isfile(file_path):
|
||||
return SendResult(success=False, error=f"File not found: {file_path}")
|
||||
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if not guid:
|
||||
return SendResult(success=False, error=f"Chat not found: {chat_id}")
|
||||
|
||||
fname = filename or os.path.basename(file_path)
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
files = {"attachment": (fname, f, "application/octet-stream")}
|
||||
data: Dict[str, str] = {
|
||||
"chatGuid": guid,
|
||||
"name": fname,
|
||||
"tempGuid": uuid.uuid4().hex,
|
||||
}
|
||||
if is_audio_message:
|
||||
data["isAudioMessage"] = "true"
|
||||
res = await self.client.post(
|
||||
self._api_url("/api/v1/message/attachment"),
|
||||
files=files,
|
||||
data=data,
|
||||
timeout=120,
|
||||
)
|
||||
res.raise_for_status()
|
||||
result = res.json()
|
||||
|
||||
if caption:
|
||||
await self.send(chat_id, caption)
|
||||
|
||||
if result.get("status") == 200:
|
||||
rdata = result.get("data") or {}
|
||||
msg_id = rdata.get("guid") if isinstance(rdata, dict) else None
|
||||
return SendResult(
|
||||
success=True, message_id=msg_id, raw_response=result
|
||||
)
|
||||
return SendResult(
|
||||
success=False,
|
||||
error=result.get("message", "Attachment upload failed"),
|
||||
)
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
try:
|
||||
from gateway.platforms.base import cache_image_from_url
|
||||
|
||||
local_path = await cache_image_from_url(image_url)
|
||||
return await self._send_attachment(chat_id, local_path, caption=caption)
|
||||
except Exception:
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
return await self._send_attachment(chat_id, image_path, caption=caption)
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
audio_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
return await self._send_attachment(
|
||||
chat_id, audio_path, caption=caption, is_audio_message=True
|
||||
)
|
||||
|
||||
async def send_video(
|
||||
self,
|
||||
chat_id: str,
|
||||
video_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
return await self._send_attachment(chat_id, video_path, caption=caption)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
return await self._send_attachment(
|
||||
chat_id, file_path, filename=file_name, caption=caption
|
||||
)
|
||||
|
||||
async def send_animation(
|
||||
self,
|
||||
chat_id: str,
|
||||
animation_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
return await self.send_image(
|
||||
chat_id, animation_url, caption, reply_to, metadata
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Typing indicators
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send_typing(self, chat_id: str, metadata=None) -> None:
|
||||
if not self._private_api_enabled or not self._helper_connected or not self.client:
|
||||
return
|
||||
try:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if guid:
|
||||
encoded = quote(guid, safe="")
|
||||
await self.client.post(
|
||||
self._api_url(f"/api/v1/chat/{encoded}/typing"), timeout=5
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def stop_typing(self, chat_id: str) -> None:
|
||||
if not self._private_api_enabled or not self._helper_connected or not self.client:
|
||||
return
|
||||
try:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if guid:
|
||||
encoded = quote(guid, safe="")
|
||||
await self.client.delete(
|
||||
self._api_url(f"/api/v1/chat/{encoded}/typing"), timeout=5
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Read receipts
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def mark_read(self, chat_id: str) -> bool:
|
||||
if not self._private_api_enabled or not self._helper_connected or not self.client:
|
||||
return False
|
||||
try:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if guid:
|
||||
encoded = quote(guid, safe="")
|
||||
await self.client.post(
|
||||
self._api_url(f"/api/v1/chat/{encoded}/read"), timeout=5
|
||||
)
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tapback reactions
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send_reaction(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_guid: str,
|
||||
reaction: str,
|
||||
part_index: int = 0,
|
||||
) -> SendResult:
|
||||
"""Send a tapback reaction (requires Private API helper)."""
|
||||
if not self._private_api_enabled or not self._helper_connected:
|
||||
return SendResult(
|
||||
success=False, error="Private API helper not connected"
|
||||
)
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if not guid:
|
||||
return SendResult(success=False, error=f"Chat not found: {chat_id}")
|
||||
try:
|
||||
res = await self._api_post(
|
||||
"/api/v1/message/react",
|
||||
{
|
||||
"chatGuid": guid,
|
||||
"selectedMessageGuid": message_guid,
|
||||
"reaction": reaction,
|
||||
"partIndex": part_index,
|
||||
},
|
||||
)
|
||||
return SendResult(success=True, raw_response=res)
|
||||
except Exception as exc:
|
||||
return SendResult(success=False, error=str(exc))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat info
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
is_group = ";+;" in (chat_id or "")
|
||||
info: Dict[str, Any] = {
|
||||
"name": chat_id,
|
||||
"type": "group" if is_group else "dm",
|
||||
}
|
||||
try:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if guid:
|
||||
encoded = quote(guid, safe="")
|
||||
res = await self._api_get(
|
||||
f"/api/v1/chat/{encoded}?with=participants"
|
||||
)
|
||||
data = (res or {}).get("data", {})
|
||||
display_name = (
|
||||
data.get("displayName")
|
||||
or data.get("chatIdentifier")
|
||||
or chat_id
|
||||
)
|
||||
participants = []
|
||||
for p in data.get("participants", []) or []:
|
||||
addr = (p.get("address") or "").strip()
|
||||
if addr:
|
||||
participants.append(addr)
|
||||
info["name"] = display_name
|
||||
if participants:
|
||||
info["participants"] = participants
|
||||
except Exception:
|
||||
pass
|
||||
return info
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
return _strip_markdown(content)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Inbound attachment downloading (from #4588)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _download_attachment(
|
||||
self, att_guid: str, att_meta: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
"""Download an attachment from BlueBubbles and cache it locally.
|
||||
|
||||
Returns the local file path on success, None on failure.
|
||||
"""
|
||||
if not self.client:
|
||||
return None
|
||||
try:
|
||||
encoded = quote(att_guid, safe="")
|
||||
resp = await self.client.get(
|
||||
self._api_url(f"/api/v1/attachment/{encoded}/download"),
|
||||
timeout=60,
|
||||
follow_redirects=True,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.content
|
||||
|
||||
mime = (att_meta.get("mimeType") or "").lower()
|
||||
transfer_name = att_meta.get("transferName", "")
|
||||
|
||||
if mime.startswith("image/"):
|
||||
ext_map = {
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/gif": ".gif",
|
||||
"image/webp": ".webp",
|
||||
"image/heic": ".jpg",
|
||||
"image/heif": ".jpg",
|
||||
"image/tiff": ".jpg",
|
||||
}
|
||||
ext = ext_map.get(mime, ".jpg")
|
||||
return cache_image_from_bytes(data, ext)
|
||||
|
||||
if mime.startswith("audio/"):
|
||||
ext_map = {
|
||||
"audio/mp3": ".mp3",
|
||||
"audio/mpeg": ".mp3",
|
||||
"audio/ogg": ".ogg",
|
||||
"audio/wav": ".wav",
|
||||
"audio/x-caf": ".mp3",
|
||||
"audio/mp4": ".m4a",
|
||||
"audio/aac": ".m4a",
|
||||
}
|
||||
ext = ext_map.get(mime, ".mp3")
|
||||
return cache_audio_from_bytes(data, ext)
|
||||
|
||||
# Videos, documents, and everything else
|
||||
filename = transfer_name or f"file_{uuid.uuid4().hex[:8]}"
|
||||
return cache_document_from_bytes(data, filename)
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[bluebubbles] failed to download attachment %s: %s",
|
||||
_redact(att_guid),
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Webhook handling
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_payload_record(
|
||||
self, payload: Dict[str, Any]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
data = payload.get("data")
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
return item
|
||||
if isinstance(payload.get("message"), dict):
|
||||
return payload.get("message")
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
@staticmethod
|
||||
def _value(*candidates: Any) -> Optional[str]:
|
||||
for candidate in candidates:
|
||||
if isinstance(candidate, str) and candidate.strip():
|
||||
return candidate.strip()
|
||||
return None
|
||||
|
||||
async def _handle_webhook(self, request):
|
||||
from aiohttp import web
|
||||
|
||||
token = (
|
||||
request.query.get("password")
|
||||
or request.query.get("guid")
|
||||
or request.headers.get("x-password")
|
||||
or request.headers.get("x-guid")
|
||||
or request.headers.get("x-bluebubbles-guid")
|
||||
)
|
||||
if token != self.password:
|
||||
return web.json_response({"error": "unauthorized"}, status=401)
|
||||
try:
|
||||
raw = await request.read()
|
||||
body = raw.decode("utf-8", errors="replace")
|
||||
try:
|
||||
payload = json.loads(body)
|
||||
except Exception:
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
form = parse_qs(body)
|
||||
payload_str = (
|
||||
form.get("payload")
|
||||
or form.get("data")
|
||||
or form.get("message")
|
||||
or [""]
|
||||
)[0]
|
||||
payload = json.loads(payload_str) if payload_str else {}
|
||||
except Exception as exc:
|
||||
logger.error("[bluebubbles] webhook parse error: %s", exc)
|
||||
return web.json_response({"error": "invalid payload"}, status=400)
|
||||
|
||||
event_type = self._value(payload.get("type"), payload.get("event")) or ""
|
||||
# Only process message events; silently acknowledge everything else
|
||||
if event_type and event_type not in _MESSAGE_EVENTS:
|
||||
return web.Response(text="ok")
|
||||
|
||||
record = self._extract_payload_record(payload) or {}
|
||||
is_from_me = bool(
|
||||
record.get("isFromMe")
|
||||
or record.get("fromMe")
|
||||
or record.get("is_from_me")
|
||||
)
|
||||
if is_from_me:
|
||||
return web.Response(text="ok")
|
||||
|
||||
# Skip tapback reactions delivered as messages
|
||||
assoc_type = record.get("associatedMessageType")
|
||||
if isinstance(assoc_type, int) and assoc_type in {
|
||||
**_TAPBACK_ADDED,
|
||||
**_TAPBACK_REMOVED,
|
||||
}:
|
||||
return web.Response(text="ok")
|
||||
|
||||
text = (
|
||||
self._value(
|
||||
record.get("text"), record.get("message"), record.get("body")
|
||||
)
|
||||
or ""
|
||||
)
|
||||
|
||||
# --- Inbound attachment handling ---
|
||||
attachments = record.get("attachments") or []
|
||||
media_urls: List[str] = []
|
||||
media_types: List[str] = []
|
||||
msg_type = MessageType.TEXT
|
||||
|
||||
for att in attachments:
|
||||
att_guid = att.get("guid", "")
|
||||
if not att_guid:
|
||||
continue
|
||||
cached = await self._download_attachment(att_guid, att)
|
||||
if cached:
|
||||
mime = (att.get("mimeType") or "").lower()
|
||||
media_urls.append(cached)
|
||||
media_types.append(mime)
|
||||
if mime.startswith("image/"):
|
||||
msg_type = MessageType.PHOTO
|
||||
elif mime.startswith("audio/") or (att.get("uti") or "").endswith(
|
||||
"caf"
|
||||
):
|
||||
msg_type = MessageType.VOICE
|
||||
elif mime.startswith("video/"):
|
||||
msg_type = MessageType.VIDEO
|
||||
else:
|
||||
msg_type = MessageType.DOCUMENT
|
||||
|
||||
# With multiple attachments, prefer PHOTO if any images present
|
||||
if len(media_urls) > 1:
|
||||
mime_prefixes = {(m or "").split("/")[0] for m in media_types}
|
||||
if "image" in mime_prefixes:
|
||||
msg_type = MessageType.PHOTO
|
||||
|
||||
if not text and media_urls:
|
||||
text = "(attachment)"
|
||||
# --- End attachment handling ---
|
||||
|
||||
chat_guid = self._value(
|
||||
record.get("chatGuid"),
|
||||
payload.get("chatGuid"),
|
||||
record.get("chat_guid"),
|
||||
payload.get("chat_guid"),
|
||||
payload.get("guid"),
|
||||
)
|
||||
chat_identifier = self._value(
|
||||
record.get("chatIdentifier"),
|
||||
record.get("identifier"),
|
||||
payload.get("chatIdentifier"),
|
||||
payload.get("identifier"),
|
||||
)
|
||||
sender = (
|
||||
self._value(
|
||||
record.get("handle", {}).get("address")
|
||||
if isinstance(record.get("handle"), dict)
|
||||
else None,
|
||||
record.get("sender"),
|
||||
record.get("from"),
|
||||
record.get("address"),
|
||||
)
|
||||
or chat_identifier
|
||||
or chat_guid
|
||||
)
|
||||
if not (chat_guid or chat_identifier) and sender:
|
||||
chat_identifier = sender
|
||||
if not sender or not (chat_guid or chat_identifier) or not text:
|
||||
return web.json_response({"error": "missing message fields"}, status=400)
|
||||
|
||||
session_chat_id = chat_guid or chat_identifier
|
||||
is_group = bool(record.get("isGroup")) or (";+;" in (chat_guid or ""))
|
||||
source = self.build_source(
|
||||
chat_id=session_chat_id,
|
||||
chat_name=chat_identifier or sender,
|
||||
chat_type="group" if is_group else "dm",
|
||||
user_id=sender,
|
||||
user_name=sender,
|
||||
chat_id_alt=chat_identifier,
|
||||
)
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=payload,
|
||||
message_id=self._value(
|
||||
record.get("guid"),
|
||||
record.get("messageGuid"),
|
||||
record.get("id"),
|
||||
),
|
||||
reply_to_message_id=self._value(
|
||||
record.get("threadOriginatorGuid"),
|
||||
record.get("associatedMessageGuid"),
|
||||
),
|
||||
media_urls=media_urls,
|
||||
media_types=media_types,
|
||||
)
|
||||
task = asyncio.create_task(self.handle_message(event))
|
||||
self._background_tasks.add(task)
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
|
||||
# Fire-and-forget read receipt
|
||||
if self.send_read_receipts and session_chat_id:
|
||||
asyncio.create_task(self.mark_read(session_chat_id))
|
||||
|
||||
return web.Response(text="ok")
|
||||
@@ -455,6 +455,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
self._seen_messages: Dict[str, float] = {}
|
||||
self._SEEN_TTL = 300 # 5 minutes
|
||||
self._SEEN_MAX = 2000 # prune threshold
|
||||
# Reply threading mode: "off" (no replies), "first" (reply on first
|
||||
# chunk only, default), "all" (reply-reference on every chunk).
|
||||
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
@@ -774,7 +777,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
message_ids = []
|
||||
reference = None
|
||||
|
||||
if reply_to:
|
||||
if reply_to and self._reply_to_mode != "off":
|
||||
try:
|
||||
ref_msg = await channel.fetch_message(int(reply_to))
|
||||
reference = ref_msg
|
||||
@@ -782,7 +785,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
logger.debug("Could not fetch reply-to message: %s", e)
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_reference = reference if i == 0 else None
|
||||
if self._reply_to_mode == "all":
|
||||
chunk_reference = reference
|
||||
else: # "first" (default) or "off"
|
||||
chunk_reference = reference if i == 0 else None
|
||||
try:
|
||||
msg = await channel.send(
|
||||
content=chunk,
|
||||
@@ -1761,8 +1767,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if hasattr(interaction.channel, "guild") and interaction.channel.guild:
|
||||
chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
|
||||
|
||||
# Get channel topic (if available)
|
||||
chat_topic = getattr(interaction.channel, "topic", None)
|
||||
# Get channel topic (if available).
|
||||
# For forum threads, inherit the parent forum's topic.
|
||||
chat_topic = self._get_effective_topic(interaction.channel, is_thread=is_thread)
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=str(interaction.channel_id),
|
||||
@@ -1836,6 +1843,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
chat_name = f"{guild_name} / {thread_name}" if guild_name else thread_name
|
||||
|
||||
# Inherit forum topic when the thread was created inside a forum channel.
|
||||
_chan = getattr(interaction, "channel", None)
|
||||
chat_topic = self._get_effective_topic(_chan, is_thread=True) if _chan else None
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=thread_id,
|
||||
chat_name=chat_name,
|
||||
@@ -1843,6 +1854,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
user_id=str(interaction.user.id),
|
||||
user_name=interaction.user.display_name,
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
)
|
||||
|
||||
event = MessageEvent(
|
||||
@@ -2128,6 +2140,15 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_effective_topic(self, channel: Any, is_thread: bool = False) -> Optional[str]:
|
||||
"""Return the channel topic, falling back to the parent forum's topic for forum threads."""
|
||||
topic = getattr(channel, "topic", None)
|
||||
if not topic and is_thread:
|
||||
parent = getattr(channel, "parent", None)
|
||||
if parent and self._is_forum_parent(parent):
|
||||
topic = getattr(parent, "topic", None)
|
||||
return topic
|
||||
|
||||
def _format_thread_chat_name(self, thread: Any) -> str:
|
||||
"""Build a readable chat name for thread-like Discord channels, including forum context when available."""
|
||||
thread_name = getattr(thread, "name", None) or str(getattr(thread, "id", "thread"))
|
||||
@@ -2295,8 +2316,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if hasattr(message.channel, "guild") and message.channel.guild:
|
||||
chat_name = f"{message.channel.guild.name} / #{chat_name}"
|
||||
|
||||
# Get channel topic (if available - TextChannels have topics, DMs/threads don't)
|
||||
chat_topic = getattr(message.channel, "topic", None)
|
||||
# Get channel topic (if available - TextChannels have topics, DMs/threads don't).
|
||||
# For threads whose parent is a forum channel, inherit the parent's topic
|
||||
# so forum descriptions (e.g. project instructions) appear in the session context.
|
||||
chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)
|
||||
|
||||
# Build source
|
||||
source = self.build_source(
|
||||
@@ -2359,7 +2382,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
ext or "unknown", content_type,
|
||||
)
|
||||
else:
|
||||
MAX_DOC_BYTES = 20 * 1024 * 1024
|
||||
MAX_DOC_BYTES = 32 * 1024 * 1024
|
||||
if att.size and att.size > MAX_DOC_BYTES:
|
||||
logger.warning(
|
||||
"[Discord] Document too large (%s bytes), skipping: %s",
|
||||
@@ -2383,9 +2406,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(doc_mime)
|
||||
logger.info("[Discord] Cached user document: %s", cached_path)
|
||||
# Inject text content for .txt/.md files (capped at 100 KB)
|
||||
# Inject text content for plain-text documents (capped at 100 KB)
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
if ext in (".md", ".txt", ".log") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = att.filename or f"document{ext}"
|
||||
|
||||
@@ -647,7 +647,11 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
|
||||
if result is not None:
|
||||
self._track_sent_timestamp(result)
|
||||
return SendResult(success=True)
|
||||
# Use the timestamp from the RPC result as a pseudo message_id.
|
||||
# Signal doesn't have real message IDs, but the stream consumer
|
||||
# needs a truthy value to follow its edit→fallback path correctly.
|
||||
_msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
|
||||
return SendResult(success=True, message_id=_msg_id or None)
|
||||
return SendResult(success=False, error="RPC send failed")
|
||||
|
||||
def _track_sent_timestamp(self, rpc_result) -> None:
|
||||
@@ -837,6 +841,11 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
async def stop_typing(self, chat_id: str) -> None:
|
||||
"""Public interface for stopping typing — called by base adapter's
|
||||
_keep_typing finally block to clean up platform-level typing tasks."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat Info
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
+160
-4
@@ -14,7 +14,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from typing import Dict, Optional, Any
|
||||
from typing import Dict, Optional, Any, Tuple
|
||||
|
||||
try:
|
||||
from slack_bolt.async_app import AsyncApp
|
||||
@@ -95,6 +95,12 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# respond to ALL subsequent messages in that thread automatically.
|
||||
self._mentioned_threads: set = set()
|
||||
self._MENTIONED_THREADS_MAX = 5000
|
||||
# Assistant thread metadata keyed by (channel_id, thread_ts). Slack's
|
||||
# AI Assistant lifecycle events can arrive before/alongside message
|
||||
# events, and they carry the user/thread identity needed for stable
|
||||
# session + memory scoping.
|
||||
self._assistant_threads: Dict[Tuple[str, str], Dict[str, str]] = {}
|
||||
self._ASSISTANT_THREADS_MAX = 5000
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
@@ -181,6 +187,14 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
async def handle_app_mention(event, say):
|
||||
pass
|
||||
|
||||
@self._app.event("assistant_thread_started")
|
||||
async def handle_assistant_thread_started(event, say):
|
||||
await self._handle_assistant_thread_lifecycle_event(event)
|
||||
|
||||
@self._app.event("assistant_thread_context_changed")
|
||||
async def handle_assistant_thread_context_changed(event, say):
|
||||
await self._handle_assistant_thread_lifecycle_event(event)
|
||||
|
||||
# Register slash command handler
|
||||
@self._app.command("/hermes")
|
||||
async def handle_hermes_command(ack, command):
|
||||
@@ -755,6 +769,135 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
# ----- Internal handlers -----
|
||||
|
||||
def _assistant_thread_key(self, channel_id: str, thread_ts: str) -> Optional[Tuple[str, str]]:
|
||||
"""Return a stable cache key for Slack assistant thread metadata."""
|
||||
if not channel_id or not thread_ts:
|
||||
return None
|
||||
return (str(channel_id), str(thread_ts))
|
||||
|
||||
def _extract_assistant_thread_metadata(self, event: dict) -> Dict[str, str]:
|
||||
"""Extract Slack Assistant thread identity data from an event payload."""
|
||||
assistant_thread = event.get("assistant_thread") or {}
|
||||
context = assistant_thread.get("context") or event.get("context") or {}
|
||||
|
||||
channel_id = (
|
||||
assistant_thread.get("channel_id")
|
||||
or event.get("channel")
|
||||
or context.get("channel_id")
|
||||
or ""
|
||||
)
|
||||
thread_ts = (
|
||||
assistant_thread.get("thread_ts")
|
||||
or event.get("thread_ts")
|
||||
or event.get("message_ts")
|
||||
or ""
|
||||
)
|
||||
user_id = (
|
||||
assistant_thread.get("user_id")
|
||||
or event.get("user")
|
||||
or context.get("user_id")
|
||||
or ""
|
||||
)
|
||||
team_id = (
|
||||
event.get("team")
|
||||
or event.get("team_id")
|
||||
or assistant_thread.get("team_id")
|
||||
or ""
|
||||
)
|
||||
context_channel_id = context.get("channel_id") or ""
|
||||
|
||||
return {
|
||||
"channel_id": str(channel_id) if channel_id else "",
|
||||
"thread_ts": str(thread_ts) if thread_ts else "",
|
||||
"user_id": str(user_id) if user_id else "",
|
||||
"team_id": str(team_id) if team_id else "",
|
||||
"context_channel_id": str(context_channel_id) if context_channel_id else "",
|
||||
}
|
||||
|
||||
def _cache_assistant_thread_metadata(self, metadata: Dict[str, str]) -> None:
|
||||
"""Remember assistant thread identity data for later message events."""
|
||||
channel_id = metadata.get("channel_id", "")
|
||||
thread_ts = metadata.get("thread_ts", "")
|
||||
key = self._assistant_thread_key(channel_id, thread_ts)
|
||||
if not key:
|
||||
return
|
||||
|
||||
existing = self._assistant_threads.get(key, {})
|
||||
merged = dict(existing)
|
||||
merged.update({k: v for k, v in metadata.items() if v})
|
||||
self._assistant_threads[key] = merged
|
||||
|
||||
# Evict oldest entries when the cache exceeds the limit
|
||||
if len(self._assistant_threads) > self._ASSISTANT_THREADS_MAX:
|
||||
excess = len(self._assistant_threads) - self._ASSISTANT_THREADS_MAX // 2
|
||||
for old_key in list(self._assistant_threads)[:excess]:
|
||||
del self._assistant_threads[old_key]
|
||||
|
||||
team_id = merged.get("team_id", "")
|
||||
if team_id and channel_id:
|
||||
self._channel_team[channel_id] = team_id
|
||||
|
||||
def _lookup_assistant_thread_metadata(
|
||||
self,
|
||||
event: dict,
|
||||
channel_id: str = "",
|
||||
thread_ts: str = "",
|
||||
) -> Dict[str, str]:
|
||||
"""Load cached assistant-thread metadata that matches the current event."""
|
||||
metadata = self._extract_assistant_thread_metadata(event)
|
||||
if channel_id and not metadata.get("channel_id"):
|
||||
metadata["channel_id"] = channel_id
|
||||
if thread_ts and not metadata.get("thread_ts"):
|
||||
metadata["thread_ts"] = thread_ts
|
||||
|
||||
key = self._assistant_thread_key(
|
||||
metadata.get("channel_id", ""),
|
||||
metadata.get("thread_ts", ""),
|
||||
)
|
||||
cached = self._assistant_threads.get(key, {}) if key else {}
|
||||
if cached:
|
||||
merged = dict(cached)
|
||||
merged.update({k: v for k, v in metadata.items() if v})
|
||||
return merged
|
||||
return metadata
|
||||
|
||||
def _seed_assistant_thread_session(self, metadata: Dict[str, str]) -> None:
|
||||
"""Prime the session store so assistant threads get stable user scoping."""
|
||||
session_store = getattr(self, "_session_store", None)
|
||||
if not session_store:
|
||||
return
|
||||
|
||||
channel_id = metadata.get("channel_id", "")
|
||||
thread_ts = metadata.get("thread_ts", "")
|
||||
user_id = metadata.get("user_id", "")
|
||||
if not channel_id or not thread_ts or not user_id:
|
||||
return
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_name=channel_id,
|
||||
chat_type="dm",
|
||||
user_id=user_id,
|
||||
thread_id=thread_ts,
|
||||
chat_topic=metadata.get("context_channel_id") or None,
|
||||
)
|
||||
|
||||
try:
|
||||
session_store.get_or_create_session(source)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[Slack] Failed to seed assistant thread session for %s/%s",
|
||||
channel_id,
|
||||
thread_ts,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
async def _handle_assistant_thread_lifecycle_event(self, event: dict) -> None:
|
||||
"""Handle Slack Assistant lifecycle events that carry user/thread identity."""
|
||||
metadata = self._extract_assistant_thread_metadata(event)
|
||||
self._cache_assistant_thread_metadata(metadata)
|
||||
self._seed_assistant_thread_session(metadata)
|
||||
|
||||
async def _handle_slack_message(self, event: dict) -> None:
|
||||
"""Handle an incoming Slack message event."""
|
||||
# Dedup: Slack Socket Mode can redeliver events after reconnects (#4777)
|
||||
@@ -781,10 +924,21 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
return
|
||||
|
||||
text = event.get("text", "")
|
||||
user_id = event.get("user", "")
|
||||
channel_id = event.get("channel", "")
|
||||
ts = event.get("ts", "")
|
||||
team_id = event.get("team", "")
|
||||
assistant_meta = self._lookup_assistant_thread_metadata(
|
||||
event,
|
||||
channel_id=channel_id,
|
||||
thread_ts=event.get("thread_ts", ""),
|
||||
)
|
||||
user_id = event.get("user") or assistant_meta.get("user_id", "")
|
||||
if not channel_id:
|
||||
channel_id = assistant_meta.get("channel_id", "")
|
||||
team_id = (
|
||||
event.get("team")
|
||||
or event.get("team_id")
|
||||
or assistant_meta.get("team_id", "")
|
||||
)
|
||||
|
||||
# Track which workspace owns this channel
|
||||
if team_id and channel_id:
|
||||
@@ -792,6 +946,8 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
# Determine if this is a DM or channel message
|
||||
channel_type = event.get("channel_type", "")
|
||||
if not channel_type and channel_id.startswith("D"):
|
||||
channel_type = "im"
|
||||
is_dm = channel_type == "im"
|
||||
|
||||
# Build thread_ts for session keying.
|
||||
@@ -800,7 +956,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# In DMs: only use the real thread_ts — top-level DMs should share
|
||||
# one continuous session, threaded DMs get their own session.
|
||||
if is_dm:
|
||||
thread_ts = event.get("thread_ts") # None for top-level DMs
|
||||
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts") # None for top-level DMs
|
||||
else:
|
||||
thread_ts = event.get("thread_ts") or ts # ts fallback for channels
|
||||
|
||||
|
||||
+80
-12
@@ -184,6 +184,8 @@ if _config_path.exists():
|
||||
# Env var from .env takes precedence (already in os.environ).
|
||||
if "gateway_timeout" in _agent_cfg and "HERMES_AGENT_TIMEOUT" not in os.environ:
|
||||
os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
|
||||
if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
|
||||
os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
|
||||
# Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
|
||||
# HERMES_TIMEZONE from .env takes precedence (already in os.environ).
|
||||
_tz_cfg = _cfg.get("timezone", "")
|
||||
@@ -1073,6 +1075,7 @@ class GatewayRunner:
|
||||
"MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
|
||||
"FEISHU_ALLOWED_USERS",
|
||||
"WECOM_ALLOWED_USERS",
|
||||
"BLUEBUBBLES_ALLOWED_USERS",
|
||||
"GATEWAY_ALLOWED_USERS")
|
||||
)
|
||||
_allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") or any(
|
||||
@@ -1083,7 +1086,8 @@ class GatewayRunner:
|
||||
"SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
|
||||
"MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS",
|
||||
"FEISHU_ALLOW_ALL_USERS",
|
||||
"WECOM_ALLOW_ALL_USERS")
|
||||
"WECOM_ALLOW_ALL_USERS",
|
||||
"BLUEBUBBLES_ALLOW_ALL_USERS")
|
||||
)
|
||||
if not _any_allowlist and not _allow_all:
|
||||
logger.warning(
|
||||
@@ -1654,6 +1658,13 @@ class GatewayRunner:
|
||||
adapter.gateway_runner = self # For cross-platform delivery
|
||||
return adapter
|
||||
|
||||
elif platform == Platform.BLUEBUBBLES:
|
||||
from gateway.platforms.bluebubbles import BlueBubblesAdapter, check_bluebubbles_requirements
|
||||
if not check_bluebubbles_requirements():
|
||||
logger.warning("BlueBubbles: aiohttp/httpx missing or BLUEBUBBLES_SERVER_URL/BLUEBUBBLES_PASSWORD not configured")
|
||||
return None
|
||||
return BlueBubblesAdapter(config)
|
||||
|
||||
return None
|
||||
|
||||
def _is_user_authorized(self, source: SessionSource) -> bool:
|
||||
@@ -1692,6 +1703,7 @@ class GatewayRunner:
|
||||
Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
|
||||
Platform.FEISHU: "FEISHU_ALLOWED_USERS",
|
||||
Platform.WECOM: "WECOM_ALLOWED_USERS",
|
||||
Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS",
|
||||
}
|
||||
platform_allow_all_map = {
|
||||
Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
|
||||
@@ -1706,6 +1718,7 @@ class GatewayRunner:
|
||||
Platform.DINGTALK: "DINGTALK_ALLOW_ALL_USERS",
|
||||
Platform.FEISHU: "FEISHU_ALLOW_ALL_USERS",
|
||||
Platform.WECOM: "WECOM_ALLOW_ALL_USERS",
|
||||
Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOW_ALL_USERS",
|
||||
}
|
||||
|
||||
# Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
|
||||
@@ -1779,8 +1792,11 @@ class GatewayRunner:
|
||||
"""
|
||||
source = event.source
|
||||
|
||||
# Check if user is authorized
|
||||
if not self._is_user_authorized(source):
|
||||
# Internal events (e.g. background-process completion notifications)
|
||||
# are system-generated and must skip user authorization.
|
||||
if getattr(event, "internal", False):
|
||||
pass
|
||||
elif not self._is_user_authorized(source):
|
||||
logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
|
||||
# In DMs: offer pairing code. In groups: silently ignore.
|
||||
if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
|
||||
@@ -5264,19 +5280,28 @@ class GatewayRunner:
|
||||
|
||||
agent = self._running_agents.get(session_key)
|
||||
if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
|
||||
lines = [
|
||||
"📊 **Session Token Usage**",
|
||||
f"Prompt (input): {agent.session_prompt_tokens:,}",
|
||||
f"Completion (output): {agent.session_completion_tokens:,}",
|
||||
f"Total: {agent.session_total_tokens:,}",
|
||||
f"API calls: {agent.session_api_calls}",
|
||||
]
|
||||
lines = []
|
||||
|
||||
# Rate limits first (when available from provider headers)
|
||||
rl_state = agent.get_rate_limit_state()
|
||||
if rl_state and rl_state.has_data:
|
||||
from agent.rate_limit_tracker import format_rate_limit_compact
|
||||
lines.append(f"⏱️ **Rate Limits:** {format_rate_limit_compact(rl_state)}")
|
||||
lines.append("")
|
||||
|
||||
# Session token usage
|
||||
lines.append("📊 **Session Token Usage**")
|
||||
lines.append(f"Prompt (input): {agent.session_prompt_tokens:,}")
|
||||
lines.append(f"Completion (output): {agent.session_completion_tokens:,}")
|
||||
lines.append(f"Total: {agent.session_total_tokens:,}")
|
||||
lines.append(f"API calls: {agent.session_api_calls}")
|
||||
ctx = agent.context_compressor
|
||||
if ctx.last_prompt_tokens:
|
||||
pct = min(100, ctx.last_prompt_tokens / ctx.context_length * 100) if ctx.context_length else 0
|
||||
lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
|
||||
if ctx.compression_count:
|
||||
lines.append(f"Compressions: {ctx.compression_count}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
# No running agent -- check session history for a rough count
|
||||
@@ -5518,7 +5543,7 @@ class GatewayRunner:
|
||||
Platform.TELEGRAM, Platform.DISCORD, Platform.SLACK, Platform.WHATSAPP,
|
||||
Platform.SIGNAL, Platform.MATTERMOST, Platform.MATRIX,
|
||||
Platform.HOMEASSISTANT, Platform.EMAIL, Platform.SMS, Platform.DINGTALK,
|
||||
Platform.FEISHU, Platform.WECOM, Platform.LOCAL,
|
||||
Platform.FEISHU, Platform.WECOM, Platform.BLUEBUBBLES, Platform.LOCAL,
|
||||
})
|
||||
|
||||
async def _handle_update_command(self, event: MessageEvent) -> str:
|
||||
@@ -6158,6 +6183,7 @@ class GatewayRunner:
|
||||
text=synth_text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=_source,
|
||||
internal=True,
|
||||
)
|
||||
logger.info(
|
||||
"Process %s finished — injecting agent notification for session %s",
|
||||
@@ -6308,7 +6334,15 @@ class GatewayRunner:
|
||||
# Falls back to env vars for backward compatibility.
|
||||
# YAML 1.1 parses bare `off` as boolean False — normalise before
|
||||
# the `or` chain so it doesn't silently fall through to "all".
|
||||
_raw_tp = user_config.get("display", {}).get("tool_progress")
|
||||
#
|
||||
# Per-platform overrides (display.tool_progress_overrides) take
|
||||
# priority over the global setting — e.g. Signal users can set
|
||||
# tool_progress to "off" while keeping Telegram on "all".
|
||||
_display_cfg = user_config.get("display", {})
|
||||
_overrides = _display_cfg.get("tool_progress_overrides", {})
|
||||
_raw_tp = _overrides.get(platform_key)
|
||||
if _raw_tp is None:
|
||||
_raw_tp = _display_cfg.get("tool_progress")
|
||||
if _raw_tp is False:
|
||||
_raw_tp = "off"
|
||||
progress_mode = (
|
||||
@@ -6412,6 +6446,18 @@ class GatewayRunner:
|
||||
if not adapter:
|
||||
return
|
||||
|
||||
# Skip tool progress for platforms that don't support message
|
||||
# editing (e.g. iMessage/BlueBubbles) — each progress update
|
||||
# would become a separate message bubble, which is noisy.
|
||||
from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
|
||||
if type(adapter).edit_message is _BaseAdapter.edit_message:
|
||||
while not progress_queue.empty():
|
||||
try:
|
||||
progress_queue.get_nowait()
|
||||
except Exception:
|
||||
break
|
||||
return
|
||||
|
||||
progress_lines = [] # Accumulated tool lines
|
||||
progress_msg_id = None # ID of the progress message to edit
|
||||
can_edit = True # False once an edit fails (platform doesn't support it)
|
||||
@@ -7106,6 +7152,9 @@ class GatewayRunner:
|
||||
# Default 1800s (30 min inactivity). 0 = unlimited.
|
||||
_agent_timeout_raw = float(os.getenv("HERMES_AGENT_TIMEOUT", 1800))
|
||||
_agent_timeout = _agent_timeout_raw if _agent_timeout_raw > 0 else None
|
||||
_agent_warning_raw = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900))
|
||||
_agent_warning = _agent_warning_raw if _agent_warning_raw > 0 else None
|
||||
_warning_fired = False
|
||||
loop = asyncio.get_event_loop()
|
||||
_executor_task = asyncio.ensure_future(
|
||||
loop.run_in_executor(None, run_sync)
|
||||
@@ -7138,6 +7187,25 @@ class GatewayRunner:
|
||||
_idle_secs = _act.get("seconds_since_activity", 0.0)
|
||||
except Exception:
|
||||
pass
|
||||
# Staged warning: fire once before escalating to full timeout.
|
||||
if (not _warning_fired and _agent_warning is not None
|
||||
and _idle_secs >= _agent_warning):
|
||||
_warning_fired = True
|
||||
_warn_adapter = self.adapters.get(source.platform)
|
||||
if _warn_adapter:
|
||||
_elapsed_warn = int(_agent_warning // 60) or 1
|
||||
_remaining_mins = int((_agent_timeout - _agent_warning) // 60) or 1
|
||||
try:
|
||||
await _warn_adapter.send(
|
||||
source.chat_id,
|
||||
f"⚠️ No activity for {_elapsed_warn} min. "
|
||||
f"If the agent does not respond soon, it will "
|
||||
f"be timed out in {_remaining_mins} min. "
|
||||
f"You can continue waiting or use /reset.",
|
||||
metadata=_status_thread_metadata,
|
||||
)
|
||||
except Exception as _warn_err:
|
||||
logger.debug("Inactivity warning send error: %s", _warn_err)
|
||||
if _idle_secs >= _agent_timeout:
|
||||
_inactivity_timeout = True
|
||||
break
|
||||
|
||||
@@ -193,6 +193,7 @@ _PII_SAFE_PLATFORMS = frozenset({
|
||||
Platform.WHATSAPP,
|
||||
Platform.SIGNAL,
|
||||
Platform.TELEGRAM,
|
||||
Platform.BLUEBUBBLES,
|
||||
})
|
||||
"""Platforms where user IDs can be safely redacted (no in-message mention system
|
||||
that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
|
||||
|
||||
@@ -353,6 +353,17 @@ class GatewayStreamConsumer:
|
||||
self._message_id = result.message_id
|
||||
self._already_sent = True
|
||||
self._last_sent_text = text
|
||||
elif result.success:
|
||||
# Platform accepted the message but returned no message_id
|
||||
# (e.g. Signal). Can't edit without an ID — switch to
|
||||
# fallback mode: suppress intermediate deltas, send only
|
||||
# the missing tail once the final response is ready.
|
||||
self._already_sent = True
|
||||
self._edit_supported = False
|
||||
self._fallback_prefix = self._clean_for_display(text)
|
||||
self._fallback_final_send = True
|
||||
# Sentinel prevents re-entering this branch on every delta
|
||||
self._message_id = "__no_edit__"
|
||||
else:
|
||||
# Initial send failed — disable streaming for this session
|
||||
self._edit_supported = False
|
||||
|
||||
+198
-6
@@ -67,12 +67,16 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes
|
||||
ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry
|
||||
DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1 # poll at most every 1s
|
||||
DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
|
||||
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
|
||||
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
|
||||
DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
|
||||
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
|
||||
QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
|
||||
QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -112,6 +116,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
auth_type="oauth_external",
|
||||
inference_base_url=DEFAULT_CODEX_BASE_URL,
|
||||
),
|
||||
"qwen-oauth": ProviderConfig(
|
||||
id="qwen-oauth",
|
||||
name="Qwen OAuth",
|
||||
auth_type="oauth_external",
|
||||
inference_base_url=DEFAULT_QWEN_BASE_URL,
|
||||
),
|
||||
"copilot": ProviderConfig(
|
||||
id="copilot",
|
||||
name="GitHub Copilot",
|
||||
@@ -240,7 +250,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
# Kimi Code Endpoint Detection
|
||||
# =============================================================================
|
||||
|
||||
# Kimi Code (platform.kimi.ai) issues keys prefixed "sk-kimi-" that only work
|
||||
# Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
|
||||
# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on
|
||||
# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set
|
||||
# KIMI_BASE_URL explicitly.
|
||||
@@ -817,6 +827,7 @@ def resolve_provider(
|
||||
"github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
|
||||
"aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
|
||||
"opencode": "opencode-zen", "zen": "opencode-zen",
|
||||
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
|
||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
@@ -946,6 +957,176 @@ def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> boo
|
||||
return float(exp) <= (time.time() + max(0, int(skew_seconds)))
|
||||
|
||||
|
||||
def _qwen_cli_auth_path() -> Path:
|
||||
return Path.home() / ".qwen" / "oauth_creds.json"
|
||||
|
||||
|
||||
def _read_qwen_cli_tokens() -> Dict[str, Any]:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
if not auth_path.exists():
|
||||
raise AuthError(
|
||||
"Qwen CLI credentials not found. Run 'qwen auth qwen-oauth' first.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_auth_missing",
|
||||
)
|
||||
try:
|
||||
data = json.loads(auth_path.read_text(encoding="utf-8"))
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Failed to read Qwen CLI credentials from {auth_path}: {exc}",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_auth_read_failed",
|
||||
) from exc
|
||||
if not isinstance(data, dict):
|
||||
raise AuthError(
|
||||
f"Invalid Qwen CLI credentials in {auth_path}.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_auth_invalid",
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def _save_qwen_cli_tokens(tokens: Dict[str, Any]) -> Path:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = auth_path.with_suffix(".tmp")
|
||||
tmp_path.write_text(json.dumps(tokens, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
tmp_path.replace(auth_path)
|
||||
return auth_path
|
||||
|
||||
|
||||
def _qwen_access_token_is_expiring(expiry_date_ms: Any, skew_seconds: int = QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS) -> bool:
|
||||
try:
|
||||
expiry_ms = int(expiry_date_ms)
|
||||
except Exception:
|
||||
return True
|
||||
return (time.time() + max(0, int(skew_seconds))) * 1000 >= expiry_ms
|
||||
|
||||
|
||||
def _refresh_qwen_cli_tokens(tokens: Dict[str, Any], timeout_seconds: float = 20.0) -> Dict[str, Any]:
|
||||
refresh_token = str(tokens.get("refresh_token", "") or "").strip()
|
||||
if not refresh_token:
|
||||
raise AuthError(
|
||||
"Qwen OAuth refresh token missing. Re-run 'qwen auth qwen-oauth'.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_token_missing",
|
||||
)
|
||||
|
||||
try:
|
||||
response = httpx.post(
|
||||
QWEN_OAUTH_TOKEN_URL,
|
||||
headers={
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
data={
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"client_id": QWEN_OAUTH_CLIENT_ID,
|
||||
},
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Qwen OAuth refresh failed: {exc}",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_failed",
|
||||
) from exc
|
||||
|
||||
if response.status_code >= 400:
|
||||
body = response.text.strip()
|
||||
raise AuthError(
|
||||
"Qwen OAuth refresh failed. Re-run 'qwen auth qwen-oauth'."
|
||||
+ (f" Response: {body}" if body else ""),
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_failed",
|
||||
)
|
||||
|
||||
try:
|
||||
payload = response.json()
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Qwen OAuth refresh returned invalid JSON: {exc}",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_invalid_json",
|
||||
) from exc
|
||||
|
||||
if not isinstance(payload, dict) or not str(payload.get("access_token", "") or "").strip():
|
||||
raise AuthError(
|
||||
"Qwen OAuth refresh response missing access_token.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_invalid_response",
|
||||
)
|
||||
|
||||
expires_in = payload.get("expires_in")
|
||||
try:
|
||||
expires_in_seconds = int(expires_in)
|
||||
except Exception:
|
||||
expires_in_seconds = 6 * 60 * 60
|
||||
|
||||
refreshed = {
|
||||
"access_token": str(payload.get("access_token", "") or "").strip(),
|
||||
"refresh_token": str(payload.get("refresh_token", refresh_token) or refresh_token).strip(),
|
||||
"token_type": str(payload.get("token_type", tokens.get("token_type", "Bearer")) or "Bearer").strip() or "Bearer",
|
||||
"resource_url": str(payload.get("resource_url", tokens.get("resource_url", "portal.qwen.ai")) or "portal.qwen.ai").strip(),
|
||||
"expiry_date": int(time.time() * 1000) + max(1, expires_in_seconds) * 1000,
|
||||
}
|
||||
_save_qwen_cli_tokens(refreshed)
|
||||
return refreshed
|
||||
|
||||
|
||||
def resolve_qwen_runtime_credentials(
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
refresh_if_expiring: bool = True,
|
||||
refresh_skew_seconds: int = QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
) -> Dict[str, Any]:
|
||||
tokens = _read_qwen_cli_tokens()
|
||||
access_token = str(tokens.get("access_token", "") or "").strip()
|
||||
should_refresh = bool(force_refresh)
|
||||
if not should_refresh and refresh_if_expiring:
|
||||
should_refresh = _qwen_access_token_is_expiring(tokens.get("expiry_date"), refresh_skew_seconds)
|
||||
if should_refresh:
|
||||
tokens = _refresh_qwen_cli_tokens(tokens)
|
||||
access_token = str(tokens.get("access_token", "") or "").strip()
|
||||
if not access_token:
|
||||
raise AuthError(
|
||||
"Qwen OAuth access token missing. Re-run 'qwen auth qwen-oauth'.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_access_token_missing",
|
||||
)
|
||||
|
||||
base_url = os.getenv("HERMES_QWEN_BASE_URL", "").strip().rstrip("/") or DEFAULT_QWEN_BASE_URL
|
||||
return {
|
||||
"provider": "qwen-oauth",
|
||||
"base_url": base_url,
|
||||
"api_key": access_token,
|
||||
"source": "qwen-cli",
|
||||
"expires_at_ms": tokens.get("expiry_date"),
|
||||
"auth_file": str(_qwen_cli_auth_path()),
|
||||
}
|
||||
|
||||
|
||||
def get_qwen_auth_status() -> Dict[str, Any]:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
try:
|
||||
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
|
||||
return {
|
||||
"logged_in": True,
|
||||
"auth_file": str(auth_path),
|
||||
"source": creds.get("source"),
|
||||
"api_key": creds.get("api_key"),
|
||||
"expires_at_ms": creds.get("expires_at_ms"),
|
||||
}
|
||||
except AuthError as exc:
|
||||
return {
|
||||
"logged_in": False,
|
||||
"auth_file": str(auth_path),
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SSH / remote session detection
|
||||
# =============================================================================
|
||||
@@ -2072,6 +2253,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
return get_nous_auth_status()
|
||||
if target == "openai-codex":
|
||||
return get_codex_auth_status()
|
||||
if target == "qwen-oauth":
|
||||
return get_qwen_auth_status()
|
||||
if target == "copilot-acp":
|
||||
return get_external_process_provider_status(target)
|
||||
# API-key providers
|
||||
@@ -2834,12 +3017,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
_save_provider_state(auth_store, "nous", auth_state)
|
||||
saved_to = _save_auth_store(auth_store)
|
||||
|
||||
config_path = _update_config_for_provider("nous", inference_base_url)
|
||||
print()
|
||||
print("Login successful!")
|
||||
print(f" Auth state: {saved_to}")
|
||||
print(f" Config updated: {config_path} (model.provider=nous)")
|
||||
|
||||
# Resolve model BEFORE writing provider to config.yaml so we never
|
||||
# leave the config in a half-updated state (provider=nous but model
|
||||
# still set to the previous provider's model, e.g. opus from
|
||||
# OpenRouter). The auth.json active_provider was already set above.
|
||||
selected_model = None
|
||||
try:
|
||||
runtime_key = auth_state.get("agent_key") or auth_state.get("access_token")
|
||||
if not isinstance(runtime_key, str) or not runtime_key:
|
||||
@@ -2873,9 +3059,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
unavailable_models=unavailable_models,
|
||||
portal_url=_portal,
|
||||
)
|
||||
if selected_model:
|
||||
_save_model_choice(selected_model)
|
||||
print(f"Default model set to: {selected_model}")
|
||||
elif unavailable_models:
|
||||
_url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
|
||||
print("No free models currently available.")
|
||||
@@ -2887,6 +3070,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
print()
|
||||
print(f"Login succeeded, but could not fetch available models. Reason: {message}")
|
||||
|
||||
# Write provider + model atomically so config is never mismatched.
|
||||
config_path = _update_config_for_provider(
|
||||
"nous", inference_base_url, default_model=selected_model,
|
||||
)
|
||||
if selected_model:
|
||||
_save_model_choice(selected_model)
|
||||
print(f"Default model set to: {selected_model}")
|
||||
print(f" Config updated: {config_path} (model.provider=nous)")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nLogin cancelled.")
|
||||
raise SystemExit(130)
|
||||
|
||||
@@ -32,7 +32,7 @@ from hermes_constants import OPENROUTER_BASE_URL
|
||||
|
||||
|
||||
# Providers that support OAuth login in addition to API keys.
|
||||
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex"}
|
||||
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"}
|
||||
|
||||
|
||||
def _get_custom_provider_names() -> list:
|
||||
@@ -147,7 +147,7 @@ def auth_add_command(args) -> None:
|
||||
if provider.startswith(CUSTOM_POOL_PREFIX):
|
||||
requested_type = AUTH_TYPE_API_KEY
|
||||
else:
|
||||
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex"} else AUTH_TYPE_API_KEY
|
||||
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth"} else AUTH_TYPE_API_KEY
|
||||
|
||||
pool = load_pool(provider)
|
||||
|
||||
@@ -250,6 +250,26 @@ def auth_add_command(args) -> None:
|
||||
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
|
||||
return
|
||||
|
||||
if provider == "qwen-oauth":
|
||||
creds = auth_mod.resolve_qwen_runtime_credentials(refresh_if_expiring=False)
|
||||
label = (getattr(args, "label", None) or "").strip() or label_from_token(
|
||||
creds["api_key"],
|
||||
_oauth_default_label(provider, len(pool.entries()) + 1),
|
||||
)
|
||||
entry = PooledCredential(
|
||||
provider=provider,
|
||||
id=uuid.uuid4().hex[:6],
|
||||
label=label,
|
||||
auth_type=AUTH_TYPE_OAUTH,
|
||||
priority=0,
|
||||
source=f"{SOURCE_MANUAL}:qwen_cli",
|
||||
access_token=creds["api_key"],
|
||||
base_url=creds.get("base_url"),
|
||||
)
|
||||
pool.add_entry(entry)
|
||||
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
|
||||
return
|
||||
|
||||
raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")
|
||||
|
||||
|
||||
|
||||
@@ -295,10 +295,16 @@ def _format_context_length(tokens: int) -> str:
|
||||
"""Format a token count for display (e.g. 128000 → '128K', 1048576 → '1M')."""
|
||||
if tokens >= 1_000_000:
|
||||
val = tokens / 1_000_000
|
||||
return f"{val:g}M"
|
||||
rounded = round(val)
|
||||
if abs(val - rounded) < 0.05:
|
||||
return f"{rounded}M"
|
||||
return f"{val:.1f}M"
|
||||
elif tokens >= 1_000:
|
||||
val = tokens / 1_000
|
||||
return f"{val:g}K"
|
||||
rounded = round(val)
|
||||
if abs(val - rounded) < 0.05:
|
||||
return f"{rounded}K"
|
||||
return f"{val:.1f}K"
|
||||
return str(tokens)
|
||||
|
||||
|
||||
|
||||
@@ -87,8 +87,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"),
|
||||
CommandDef("provider", "Show available providers and current provider",
|
||||
"Configuration"),
|
||||
CommandDef("prompt", "View/set custom system prompt", "Configuration",
|
||||
cli_only=True, args_hint="[text]", subcommands=("clear",)),
|
||||
|
||||
CommandDef("personality", "Set a predefined personality", "Configuration",
|
||||
args_hint="[name]"),
|
||||
CommandDef("statusbar", "Toggle the context/model status bar", "Configuration",
|
||||
@@ -129,7 +128,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("commands", "Browse all commands and skills (paginated)", "Info",
|
||||
gateway_only=True, args_hint="[page]"),
|
||||
CommandDef("help", "Show available commands", "Info"),
|
||||
CommandDef("usage", "Show token usage for the current session", "Info"),
|
||||
CommandDef("usage", "Show token usage and rate limits for the current session", "Info"),
|
||||
CommandDef("insights", "Show usage insights and analytics", "Info",
|
||||
args_hint="[days]"),
|
||||
CommandDef("platforms", "Show gateway/messaging platform status", "Info",
|
||||
|
||||
+78
-5
@@ -39,6 +39,7 @@ _EXTRA_ENV_KEYS = frozenset({
|
||||
"DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
|
||||
"FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
|
||||
"WECOM_BOT_ID", "WECOM_SECRET",
|
||||
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
|
||||
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
|
||||
"WHATSAPP_MODE", "WHATSAPP_ENABLED",
|
||||
"MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
|
||||
@@ -157,7 +158,14 @@ def get_project_root() -> Path:
|
||||
return Path(__file__).parent.parent.resolve()
|
||||
|
||||
def _secure_dir(path):
|
||||
"""Set directory to owner-only access (0700). No-op on Windows."""
|
||||
"""Set directory to owner-only access (0700). No-op on Windows.
|
||||
|
||||
Skipped in managed mode — the NixOS module sets group-readable
|
||||
permissions (0750) so interactive users in the hermes group can
|
||||
share state with the gateway service.
|
||||
"""
|
||||
if is_managed():
|
||||
return
|
||||
try:
|
||||
os.chmod(path, 0o700)
|
||||
except (OSError, NotImplementedError):
|
||||
@@ -165,7 +173,13 @@ def _secure_dir(path):
|
||||
|
||||
|
||||
def _secure_file(path):
|
||||
"""Set file to owner-only read/write (0600). No-op on Windows."""
|
||||
"""Set file to owner-only read/write (0600). No-op on Windows.
|
||||
|
||||
Skipped in managed mode — the NixOS activation script sets
|
||||
group-readable permissions (0640) on config files.
|
||||
"""
|
||||
if is_managed():
|
||||
return
|
||||
try:
|
||||
if os.path.exists(str(path)):
|
||||
os.chmod(path, 0o600)
|
||||
@@ -217,6 +231,10 @@ DEFAULT_CONFIG = {
|
||||
# (force on/off for all models), or a list of model-name substrings
|
||||
# to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
|
||||
"tool_use_enforcement": "auto",
|
||||
# Staged inactivity warning: send a warning to the user at this
|
||||
# threshold before escalating to a full timeout. The warning fires
|
||||
# once per run and does not interrupt the agent. 0 = disable warning.
|
||||
"gateway_timeout_warning": 900,
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
@@ -379,6 +397,7 @@ DEFAULT_CONFIG = {
|
||||
"show_cost": False, # Show $ cost in the status bar (off by default)
|
||||
"skin": "default",
|
||||
"tool_progress_command": False, # Enable /verbose command in messaging gateway
|
||||
"tool_progress_overrides": {}, # Per-platform overrides: {"signal": "off", "telegram": "all"}
|
||||
"tool_preview_length": 0, # Max chars for tool call previews (0 = no limit, show full paths/commands)
|
||||
},
|
||||
|
||||
@@ -413,7 +432,7 @@ DEFAULT_CONFIG = {
|
||||
|
||||
"stt": {
|
||||
"enabled": True,
|
||||
"provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API)
|
||||
"provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) | "mistral" (Voxtral Transcribe)
|
||||
"local": {
|
||||
"model": "base", # tiny, base, small, medium, large-v3
|
||||
"language": "", # auto-detect by default; set to "en", "es", "fr", etc. to force
|
||||
@@ -421,6 +440,9 @@ DEFAULT_CONFIG = {
|
||||
"openai": {
|
||||
"model": "whisper-1", # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe
|
||||
},
|
||||
"mistral": {
|
||||
"model": "voxtral-mini-latest", # voxtral-mini-latest, voxtral-mini-2602
|
||||
},
|
||||
},
|
||||
|
||||
"voice": {
|
||||
@@ -547,7 +569,7 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 12,
|
||||
"_config_version": 13,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -724,6 +746,14 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"HERMES_QWEN_BASE_URL": {
|
||||
"description": "Qwen Portal base URL override (default: https://portal.qwen.ai/v1)",
|
||||
"prompt": "Qwen Portal base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"OPENCODE_ZEN_API_KEY": {
|
||||
"description": "OpenCode Zen API key (pay-as-you-go access to curated models)",
|
||||
"prompt": "OpenCode Zen API key",
|
||||
@@ -975,6 +1005,13 @@ OPTIONAL_ENV_VARS = {
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"DISCORD_REPLY_TO_MODE": {
|
||||
"description": "Discord reply threading mode: 'off' (no reply references), 'first' (reply on first message only, default), 'all' (reply on every chunk)",
|
||||
"prompt": "Discord reply mode (off/first/all)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"SLACK_BOT_TOKEN": {
|
||||
"description": "Slack bot token (xoxb-). Get from OAuth & Permissions after installing your app. "
|
||||
"Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
|
||||
@@ -1088,6 +1125,27 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"BLUEBUBBLES_SERVER_URL": {
|
||||
"description": "BlueBubbles server URL for iMessage integration (e.g. http://192.168.1.10:1234)",
|
||||
"prompt": "BlueBubbles server URL",
|
||||
"url": "https://bluebubbles.app/",
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"BLUEBUBBLES_PASSWORD": {
|
||||
"description": "BlueBubbles server password (from BlueBubbles Server → Settings → API)",
|
||||
"prompt": "BlueBubbles server password",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
},
|
||||
"BLUEBUBBLES_ALLOWED_USERS": {
|
||||
"description": "Comma-separated iMessage addresses (email or phone) allowed to use the bot",
|
||||
"prompt": "Allowed iMessage addresses (comma-separated)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"GATEWAY_ALLOW_ALL_USERS": {
|
||||
"description": "Allow all users to interact with messaging bots (true/false). Default: false.",
|
||||
"prompt": "Allow all users (true/false)",
|
||||
@@ -1159,7 +1217,7 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "setting",
|
||||
},
|
||||
"SUDO_PASSWORD": {
|
||||
"description": "Sudo password for terminal commands requiring root access",
|
||||
"description": "Sudo password for terminal commands requiring root access; set to an explicit empty string to try empty without prompting",
|
||||
"prompt": "Sudo password",
|
||||
"url": None,
|
||||
"password": True,
|
||||
@@ -1643,6 +1701,21 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
ep = providers_dict[key]
|
||||
print(f" → {key}: {ep.get('api', '')}")
|
||||
|
||||
# ── Version 12 → 13: clear dead LLM_MODEL / OPENAI_MODEL from .env ──
|
||||
# These env vars were written by the old setup wizard but nothing reads
|
||||
# them anymore (config.yaml is the sole source of truth since March 2026).
|
||||
# Stale entries cause user confusion — see issue report.
|
||||
if current_ver < 13:
|
||||
for dead_var in ("LLM_MODEL", "OPENAI_MODEL"):
|
||||
try:
|
||||
old_val = get_env_value(dead_var)
|
||||
if old_val:
|
||||
save_env_value(dead_var, "")
|
||||
if not quiet:
|
||||
print(f" ✓ Cleared {dead_var} from .env (no longer used — config.yaml is source of truth)")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
|
||||
+70
-56
@@ -812,69 +812,83 @@ def run_doctor(args):
|
||||
check_warn("No GITHUB_TOKEN", f"(60 req/hr rate limit — set in {_DHH}/.env for better rates)")
|
||||
|
||||
# =========================================================================
|
||||
# Honcho memory
|
||||
# Memory Provider (only check the active provider, if any)
|
||||
# =========================================================================
|
||||
print()
|
||||
print(color("◆ Honcho Memory", Colors.CYAN, Colors.BOLD))
|
||||
print(color("◆ Memory Provider", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
_active_memory_provider = ""
|
||||
try:
|
||||
from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
|
||||
hcfg = HonchoClientConfig.from_global_config()
|
||||
_honcho_cfg_path = resolve_config_path()
|
||||
import yaml as _yaml
|
||||
_mem_cfg_path = HERMES_HOME / "config.yaml"
|
||||
if _mem_cfg_path.exists():
|
||||
with open(_mem_cfg_path) as _f:
|
||||
_raw_cfg = _yaml.safe_load(_f) or {}
|
||||
_active_memory_provider = (_raw_cfg.get("memory") or {}).get("provider", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not _honcho_cfg_path.exists():
|
||||
check_warn("Honcho config not found", "run: hermes memory setup")
|
||||
elif not hcfg.enabled:
|
||||
check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
|
||||
elif not (hcfg.api_key or hcfg.base_url):
|
||||
check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
|
||||
issues.append("No Honcho API key — run 'hermes memory setup'")
|
||||
else:
|
||||
from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
|
||||
reset_honcho_client()
|
||||
try:
|
||||
get_honcho_client(hcfg)
|
||||
check_ok(
|
||||
"Honcho connected",
|
||||
f"workspace={hcfg.workspace_id} mode={hcfg.recall_mode} freq={hcfg.write_frequency}",
|
||||
)
|
||||
except Exception as _e:
|
||||
check_fail("Honcho connection failed", str(_e))
|
||||
issues.append(f"Honcho unreachable: {_e}")
|
||||
except ImportError:
|
||||
check_warn("honcho-ai not installed", "pip install honcho-ai")
|
||||
except Exception as _e:
|
||||
check_warn("Honcho check failed", str(_e))
|
||||
if not _active_memory_provider:
|
||||
check_ok("Built-in memory active", "(no external provider configured — this is fine)")
|
||||
elif _active_memory_provider == "honcho":
|
||||
try:
|
||||
from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
|
||||
hcfg = HonchoClientConfig.from_global_config()
|
||||
_honcho_cfg_path = resolve_config_path()
|
||||
|
||||
# =========================================================================
|
||||
# Mem0 memory
|
||||
# =========================================================================
|
||||
print()
|
||||
print(color("◆ Mem0 Memory", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
try:
|
||||
from plugins.memory.mem0 import _load_config as _load_mem0_config
|
||||
mem0_cfg = _load_mem0_config()
|
||||
mem0_key = mem0_cfg.get("api_key", "")
|
||||
if mem0_key:
|
||||
check_ok("Mem0 API key configured")
|
||||
check_info(f"user_id={mem0_cfg.get('user_id', '?')} agent_id={mem0_cfg.get('agent_id', '?')}")
|
||||
# Check if mem0.json exists but is missing api_key (the bug we fixed)
|
||||
mem0_json = HERMES_HOME / "mem0.json"
|
||||
if mem0_json.exists():
|
||||
if not _honcho_cfg_path.exists():
|
||||
check_warn("Honcho config not found", "run: hermes memory setup")
|
||||
elif not hcfg.enabled:
|
||||
check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
|
||||
elif not (hcfg.api_key or hcfg.base_url):
|
||||
check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
|
||||
issues.append("No Honcho API key — run 'hermes memory setup'")
|
||||
else:
|
||||
from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
|
||||
reset_honcho_client()
|
||||
try:
|
||||
import json as _json
|
||||
file_cfg = _json.loads(mem0_json.read_text())
|
||||
if not file_cfg.get("api_key") and mem0_key:
|
||||
check_info("api_key from .env (not in mem0.json) — this is fine")
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
check_warn("Mem0 not configured", "(set MEM0_API_KEY in .env or run hermes memory setup)")
|
||||
except ImportError:
|
||||
check_warn("Mem0 plugin not loadable", "(optional)")
|
||||
except Exception as _e:
|
||||
check_warn("Mem0 check failed", str(_e))
|
||||
get_honcho_client(hcfg)
|
||||
check_ok(
|
||||
"Honcho connected",
|
||||
f"workspace={hcfg.workspace_id} mode={hcfg.recall_mode} freq={hcfg.write_frequency}",
|
||||
)
|
||||
except Exception as _e:
|
||||
check_fail("Honcho connection failed", str(_e))
|
||||
issues.append(f"Honcho unreachable: {_e}")
|
||||
except ImportError:
|
||||
check_fail("honcho-ai not installed", "pip install honcho-ai")
|
||||
issues.append("Honcho is set as memory provider but honcho-ai is not installed")
|
||||
except Exception as _e:
|
||||
check_warn("Honcho check failed", str(_e))
|
||||
elif _active_memory_provider == "mem0":
|
||||
try:
|
||||
from plugins.memory.mem0 import _load_config as _load_mem0_config
|
||||
mem0_cfg = _load_mem0_config()
|
||||
mem0_key = mem0_cfg.get("api_key", "")
|
||||
if mem0_key:
|
||||
check_ok("Mem0 API key configured")
|
||||
check_info(f"user_id={mem0_cfg.get('user_id', '?')} agent_id={mem0_cfg.get('agent_id', '?')}")
|
||||
else:
|
||||
check_fail("Mem0 API key not set", "(set MEM0_API_KEY in .env or run hermes memory setup)")
|
||||
issues.append("Mem0 is set as memory provider but API key is missing")
|
||||
except ImportError:
|
||||
check_fail("Mem0 plugin not loadable", "pip install mem0ai")
|
||||
issues.append("Mem0 is set as memory provider but mem0ai is not installed")
|
||||
except Exception as _e:
|
||||
check_warn("Mem0 check failed", str(_e))
|
||||
else:
|
||||
# Generic check for other memory providers (openviking, hindsight, etc.)
|
||||
try:
|
||||
from plugins.memory import load_memory_provider
|
||||
_provider = load_memory_provider(_active_memory_provider)
|
||||
if _provider and _provider.is_available():
|
||||
check_ok(f"{_active_memory_provider} provider active")
|
||||
elif _provider:
|
||||
check_warn(f"{_active_memory_provider} configured but not available", "run: hermes memory status")
|
||||
else:
|
||||
check_warn(f"{_active_memory_provider} plugin not found", "run: hermes memory setup")
|
||||
except Exception as _e:
|
||||
check_warn(f"{_active_memory_provider} check failed", str(_e))
|
||||
|
||||
# =========================================================================
|
||||
# Profiles
|
||||
|
||||
@@ -0,0 +1,337 @@
|
||||
"""
|
||||
Dump command for hermes CLI.
|
||||
|
||||
Outputs a compact, plain-text summary of the user's Hermes setup
|
||||
that can be copy-pasted into Discord/GitHub/Telegram for support context.
|
||||
No ANSI colors, no checkmarks — just data.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
|
||||
from hermes_constants import display_hermes_home
|
||||
|
||||
|
||||
def _get_git_commit(project_root: Path) -> str:
|
||||
"""Return short git commit hash, or '(unknown)'."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "--short=8", "HEAD"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
cwd=str(project_root),
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return "(unknown)"
|
||||
|
||||
|
||||
def _key_present(name: str) -> str:
|
||||
"""Return 'set' or 'not set' for an env var."""
|
||||
return "set" if os.getenv(name) else "not set"
|
||||
|
||||
|
||||
def _redact(value: str) -> str:
|
||||
"""Redact all but first 4 and last 4 chars."""
|
||||
if not value:
|
||||
return ""
|
||||
if len(value) < 12:
|
||||
return "***"
|
||||
return value[:4] + "..." + value[-4:]
|
||||
|
||||
|
||||
def _gateway_status() -> str:
|
||||
"""Return a short gateway status string."""
|
||||
if sys.platform.startswith("linux"):
|
||||
try:
|
||||
from hermes_cli.gateway import get_service_name
|
||||
svc = get_service_name()
|
||||
except Exception:
|
||||
svc = "hermes-gateway"
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["systemctl", "--user", "is-active", svc],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return "running (systemd)" if r.stdout.strip() == "active" else "stopped"
|
||||
except Exception:
|
||||
return "unknown"
|
||||
elif sys.platform == "darwin":
|
||||
try:
|
||||
from hermes_cli.gateway import get_launchd_label
|
||||
r = subprocess.run(
|
||||
["launchctl", "list", get_launchd_label()],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return "loaded (launchd)" if r.returncode == 0 else "not loaded"
|
||||
except Exception:
|
||||
return "unknown"
|
||||
return "N/A"
|
||||
|
||||
|
||||
def _count_skills(hermes_home: Path) -> int:
|
||||
"""Count installed skills."""
|
||||
skills_dir = hermes_home / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return 0
|
||||
count = 0
|
||||
for item in skills_dir.rglob("SKILL.md"):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def _count_mcp_servers(config: dict) -> int:
|
||||
"""Count configured MCP servers."""
|
||||
mcp = config.get("mcp", {})
|
||||
servers = mcp.get("servers", {})
|
||||
return len(servers)
|
||||
|
||||
|
||||
def _cron_summary(hermes_home: Path) -> str:
|
||||
"""Return cron jobs summary."""
|
||||
jobs_file = hermes_home / "cron" / "jobs.json"
|
||||
if not jobs_file.exists():
|
||||
return "0"
|
||||
try:
|
||||
with open(jobs_file, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
jobs = data.get("jobs", [])
|
||||
active = sum(1 for j in jobs if j.get("enabled", True))
|
||||
return f"{active} active / {len(jobs)} total"
|
||||
except Exception:
|
||||
return "(error reading)"
|
||||
|
||||
|
||||
def _configured_platforms() -> list[str]:
|
||||
"""Return list of configured messaging platform names."""
|
||||
checks = {
|
||||
"telegram": "TELEGRAM_BOT_TOKEN",
|
||||
"discord": "DISCORD_BOT_TOKEN",
|
||||
"slack": "SLACK_BOT_TOKEN",
|
||||
"whatsapp": "WHATSAPP_ENABLED",
|
||||
"signal": "SIGNAL_HTTP_URL",
|
||||
"email": "EMAIL_ADDRESS",
|
||||
"sms": "TWILIO_ACCOUNT_SID",
|
||||
"matrix": "MATRIX_HOMESERVER_URL",
|
||||
"mattermost": "MATTERMOST_URL",
|
||||
"homeassistant": "HASS_TOKEN",
|
||||
"dingtalk": "DINGTALK_CLIENT_ID",
|
||||
"feishu": "FEISHU_APP_ID",
|
||||
"wecom": "WECOM_BOT_ID",
|
||||
}
|
||||
return [name for name, env in checks.items() if os.getenv(env)]
|
||||
|
||||
|
||||
def _memory_provider(config: dict) -> str:
|
||||
"""Return the active memory provider name."""
|
||||
mem = config.get("memory", {})
|
||||
provider = mem.get("provider", "")
|
||||
return provider if provider else "built-in"
|
||||
|
||||
|
||||
def _get_model_and_provider(config: dict) -> tuple[str, str]:
|
||||
"""Extract model and provider from config."""
|
||||
model_cfg = config.get("model", "")
|
||||
if isinstance(model_cfg, dict):
|
||||
model = model_cfg.get("default") or model_cfg.get("model") or model_cfg.get("name") or "(not set)"
|
||||
provider = model_cfg.get("provider") or "(auto)"
|
||||
elif isinstance(model_cfg, str):
|
||||
model = model_cfg or "(not set)"
|
||||
provider = "(auto)"
|
||||
else:
|
||||
model = "(not set)"
|
||||
provider = "(auto)"
|
||||
return model, provider
|
||||
|
||||
|
||||
def _config_overrides(config: dict) -> dict[str, str]:
|
||||
"""Find non-default config values worth reporting.
|
||||
|
||||
Returns a flat dict of dotpath -> value for interesting overrides.
|
||||
"""
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
overrides = {}
|
||||
|
||||
# Sections with interesting user-facing overrides
|
||||
interesting_paths = [
|
||||
("agent", "max_turns"),
|
||||
("agent", "gateway_timeout"),
|
||||
("agent", "tool_use_enforcement"),
|
||||
("terminal", "backend"),
|
||||
("terminal", "docker_image"),
|
||||
("terminal", "persistent_shell"),
|
||||
("browser", "allow_private_urls"),
|
||||
("compression", "enabled"),
|
||||
("compression", "threshold"),
|
||||
("display", "streaming"),
|
||||
("display", "skin"),
|
||||
("display", "show_reasoning"),
|
||||
("smart_model_routing", "enabled"),
|
||||
("privacy", "redact_pii"),
|
||||
("tts", "provider"),
|
||||
]
|
||||
|
||||
for section, key in interesting_paths:
|
||||
default_section = DEFAULT_CONFIG.get(section, {})
|
||||
user_section = config.get(section, {})
|
||||
if not isinstance(default_section, dict) or not isinstance(user_section, dict):
|
||||
continue
|
||||
default_val = default_section.get(key)
|
||||
user_val = user_section.get(key)
|
||||
if user_val is not None and user_val != default_val:
|
||||
overrides[f"{section}.{key}"] = str(user_val)
|
||||
|
||||
# Toolsets (if different from default)
|
||||
default_toolsets = DEFAULT_CONFIG.get("toolsets", [])
|
||||
user_toolsets = config.get("toolsets", [])
|
||||
if user_toolsets != default_toolsets:
|
||||
overrides["toolsets"] = str(user_toolsets)
|
||||
|
||||
# Fallback providers
|
||||
fallbacks = config.get("fallback_providers", [])
|
||||
if fallbacks:
|
||||
overrides["fallback_providers"] = str(fallbacks)
|
||||
|
||||
return overrides
|
||||
|
||||
|
||||
def run_dump(args):
|
||||
"""Output a compact, copy-pasteable setup summary."""
|
||||
show_keys = getattr(args, "show_keys", False)
|
||||
|
||||
# Load env from .env file so key checks work
|
||||
from dotenv import load_dotenv
|
||||
env_path = get_env_path()
|
||||
if env_path.exists():
|
||||
try:
|
||||
load_dotenv(env_path, encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(env_path, encoding="latin-1")
|
||||
# Also try project .env as dev fallback
|
||||
load_dotenv(get_project_root() / ".env", override=False, encoding="utf-8")
|
||||
|
||||
project_root = get_project_root()
|
||||
hermes_home = get_hermes_home()
|
||||
|
||||
try:
|
||||
from hermes_cli import __version__, __release_date__
|
||||
except ImportError:
|
||||
__version__ = "(unknown)"
|
||||
__release_date__ = ""
|
||||
|
||||
commit = _get_git_commit(project_root)
|
||||
|
||||
try:
|
||||
config = load_config()
|
||||
except Exception:
|
||||
config = {}
|
||||
|
||||
model, provider = _get_model_and_provider(config)
|
||||
|
||||
# Profile
|
||||
try:
|
||||
from hermes_cli.profiles import get_active_profile_name
|
||||
profile = get_active_profile_name() or "(default)"
|
||||
except Exception:
|
||||
profile = "(default)"
|
||||
|
||||
# Terminal backend
|
||||
terminal_cfg = config.get("terminal", {})
|
||||
backend = terminal_cfg.get("backend", "local")
|
||||
|
||||
# OpenAI SDK version
|
||||
try:
|
||||
import openai
|
||||
openai_ver = openai.__version__
|
||||
except ImportError:
|
||||
openai_ver = "not installed"
|
||||
|
||||
# OS info
|
||||
os_info = f"{platform.system()} {platform.release()} {platform.machine()}"
|
||||
|
||||
lines = []
|
||||
lines.append("--- hermes dump ---")
|
||||
ver_str = f"{__version__}"
|
||||
if __release_date__:
|
||||
ver_str += f" ({__release_date__})"
|
||||
ver_str += f" [{commit}]"
|
||||
lines.append(f"version: {ver_str}")
|
||||
lines.append(f"os: {os_info}")
|
||||
lines.append(f"python: {sys.version.split()[0]}")
|
||||
lines.append(f"openai_sdk: {openai_ver}")
|
||||
lines.append(f"profile: {profile}")
|
||||
lines.append(f"hermes_home: {display_hermes_home()}")
|
||||
lines.append(f"model: {model}")
|
||||
lines.append(f"provider: {provider}")
|
||||
lines.append(f"terminal: {backend}")
|
||||
|
||||
# API keys
|
||||
lines.append("")
|
||||
lines.append("api_keys:")
|
||||
api_keys = [
|
||||
("OPENROUTER_API_KEY", "openrouter"),
|
||||
("OPENAI_API_KEY", "openai"),
|
||||
("ANTHROPIC_API_KEY", "anthropic"),
|
||||
("ANTHROPIC_TOKEN", "anthropic_token"),
|
||||
("NOUS_API_KEY", "nous"),
|
||||
("GLM_API_KEY", "glm/zai"),
|
||||
("ZAI_API_KEY", "zai"),
|
||||
("KIMI_API_KEY", "kimi"),
|
||||
("MINIMAX_API_KEY", "minimax"),
|
||||
("DEEPSEEK_API_KEY", "deepseek"),
|
||||
("DASHSCOPE_API_KEY", "dashscope"),
|
||||
("HF_TOKEN", "huggingface"),
|
||||
("AI_GATEWAY_API_KEY", "ai_gateway"),
|
||||
("OPENCODE_ZEN_API_KEY", "opencode_zen"),
|
||||
("OPENCODE_GO_API_KEY", "opencode_go"),
|
||||
("KILOCODE_API_KEY", "kilocode"),
|
||||
("FIRECRAWL_API_KEY", "firecrawl"),
|
||||
("TAVILY_API_KEY", "tavily"),
|
||||
("BROWSERBASE_API_KEY", "browserbase"),
|
||||
("FAL_KEY", "fal"),
|
||||
("ELEVENLABS_API_KEY", "elevenlabs"),
|
||||
("GITHUB_TOKEN", "github"),
|
||||
]
|
||||
|
||||
for env_var, label in api_keys:
|
||||
val = os.getenv(env_var, "")
|
||||
if show_keys and val:
|
||||
display = _redact(val)
|
||||
else:
|
||||
display = "set" if val else "not set"
|
||||
lines.append(f" {label:<20} {display}")
|
||||
|
||||
# Features summary
|
||||
lines.append("")
|
||||
lines.append("features:")
|
||||
|
||||
toolsets = config.get("toolsets", ["hermes-cli"])
|
||||
lines.append(f" toolsets: {', '.join(toolsets) if toolsets else '(default)'}")
|
||||
lines.append(f" mcp_servers: {_count_mcp_servers(config)}")
|
||||
lines.append(f" memory_provider: {_memory_provider(config)}")
|
||||
lines.append(f" gateway: {_gateway_status()}")
|
||||
|
||||
platforms = _configured_platforms()
|
||||
lines.append(f" platforms: {', '.join(platforms) if platforms else 'none'}")
|
||||
lines.append(f" cron_jobs: {_cron_summary(hermes_home)}")
|
||||
lines.append(f" skills: {_count_skills(hermes_home)}")
|
||||
|
||||
# Config overrides (non-default values)
|
||||
overrides = _config_overrides(config)
|
||||
if overrides:
|
||||
lines.append("")
|
||||
lines.append("config_overrides:")
|
||||
for key, val in overrides.items():
|
||||
lines.append(f" {key}: {val}")
|
||||
|
||||
lines.append("--- end dump ---")
|
||||
|
||||
output = "\n".join(lines)
|
||||
print(output)
|
||||
@@ -1588,6 +1588,34 @@ _PLATFORMS = [
|
||||
"help": "Chat ID for scheduled results and notifications."},
|
||||
],
|
||||
},
|
||||
{
|
||||
"key": "bluebubbles",
|
||||
"label": "BlueBubbles (iMessage)",
|
||||
"emoji": "💬",
|
||||
"token_var": "BLUEBUBBLES_SERVER_URL",
|
||||
"setup_instructions": [
|
||||
"1. Install BlueBubbles on a Mac that will act as your iMessage server:",
|
||||
" https://bluebubbles.app/",
|
||||
"2. Complete the BlueBubbles setup wizard — sign in with your Apple ID",
|
||||
"3. In BlueBubbles Settings → API, note the Server URL and password",
|
||||
"4. The server URL is typically http://<your-mac-ip>:1234",
|
||||
"5. Hermes connects via the BlueBubbles REST API and receives",
|
||||
" incoming messages via a local webhook",
|
||||
"6. To authorize users, use DM pairing: hermes pairing generate bluebubbles",
|
||||
" Share the code — the user sends it via iMessage to get approved",
|
||||
],
|
||||
"vars": [
|
||||
{"name": "BLUEBUBBLES_SERVER_URL", "prompt": "BlueBubbles server URL (e.g. http://192.168.1.10:1234)", "password": False,
|
||||
"help": "The URL shown in BlueBubbles Settings → API."},
|
||||
{"name": "BLUEBUBBLES_PASSWORD", "prompt": "BlueBubbles server password", "password": True,
|
||||
"help": "The password shown in BlueBubbles Settings → API."},
|
||||
{"name": "BLUEBUBBLES_ALLOWED_USERS", "prompt": "Pre-authorized phone numbers or iMessage IDs (comma-separated, or leave empty for DM pairing)", "password": False,
|
||||
"is_allowlist": True,
|
||||
"help": "Optional — pre-authorize specific users. Leave empty to use DM pairing instead (recommended)."},
|
||||
{"name": "BLUEBUBBLES_HOME_CHANNEL", "prompt": "Home channel (phone number or iMessage ID for cron/notifications, or empty)", "password": False,
|
||||
"help": "Phone number or Apple ID to deliver cron results and notifications to."},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
|
||||
+81
-1
@@ -918,6 +918,7 @@ def select_provider_and_model(args=None):
|
||||
"openrouter": "OpenRouter",
|
||||
"nous": "Nous Portal",
|
||||
"openai-codex": "OpenAI Codex",
|
||||
"qwen-oauth": "Qwen OAuth",
|
||||
"copilot-acp": "GitHub Copilot ACP",
|
||||
"copilot": "GitHub Copilot",
|
||||
"anthropic": "Anthropic",
|
||||
@@ -947,6 +948,7 @@ def select_provider_and_model(args=None):
|
||||
("openrouter", "OpenRouter (100+ models, pay-per-use)"),
|
||||
("anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
("openai-codex", "OpenAI Codex"),
|
||||
("qwen-oauth", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
("copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
("huggingface", "Hugging Face Inference Providers (20+ open models)"),
|
||||
]
|
||||
@@ -1043,6 +1045,8 @@ def select_provider_and_model(args=None):
|
||||
_model_flow_nous(config, current_model, args=args)
|
||||
elif selected_provider == "openai-codex":
|
||||
_model_flow_openai_codex(config, current_model)
|
||||
elif selected_provider == "qwen-oauth":
|
||||
_model_flow_qwen_oauth(config, current_model)
|
||||
elif selected_provider == "copilot-acp":
|
||||
_model_flow_copilot_acp(config, current_model)
|
||||
elif selected_provider == "copilot":
|
||||
@@ -1359,6 +1363,56 @@ def _model_flow_openai_codex(config, current_model=""):
|
||||
|
||||
|
||||
|
||||
_DEFAULT_QWEN_PORTAL_MODELS = [
|
||||
"qwen3-coder-plus",
|
||||
"qwen3-coder",
|
||||
]
|
||||
|
||||
|
||||
def _model_flow_qwen_oauth(_config, current_model=""):
|
||||
"""Qwen OAuth provider: reuse local Qwen CLI login, then pick model."""
|
||||
from hermes_cli.auth import (
|
||||
get_qwen_auth_status,
|
||||
resolve_qwen_runtime_credentials,
|
||||
_prompt_model_selection,
|
||||
_save_model_choice,
|
||||
_update_config_for_provider,
|
||||
DEFAULT_QWEN_BASE_URL,
|
||||
)
|
||||
from hermes_cli.models import fetch_api_models
|
||||
|
||||
status = get_qwen_auth_status()
|
||||
if not status.get("logged_in"):
|
||||
print("Not logged into Qwen CLI OAuth.")
|
||||
print("Run: qwen auth qwen-oauth")
|
||||
auth_file = status.get("auth_file")
|
||||
if auth_file:
|
||||
print(f"Expected credentials file: {auth_file}")
|
||||
if status.get("error"):
|
||||
print(f"Error: {status.get('error')}")
|
||||
return
|
||||
|
||||
# Try live model discovery, fall back to curated list.
|
||||
models = None
|
||||
try:
|
||||
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
|
||||
models = fetch_api_models(creds["api_key"], creds["base_url"])
|
||||
except Exception:
|
||||
pass
|
||||
if not models:
|
||||
models = list(_DEFAULT_QWEN_PORTAL_MODELS)
|
||||
|
||||
default = current_model or (models[0] if models else "qwen3-coder-plus")
|
||||
selected = _prompt_model_selection(models, current_model=default)
|
||||
if selected:
|
||||
_save_model_choice(selected)
|
||||
_update_config_for_provider("qwen-oauth", DEFAULT_QWEN_BASE_URL)
|
||||
print(f"Default model set to: {selected} (via Qwen OAuth)")
|
||||
else:
|
||||
print("No change.")
|
||||
|
||||
|
||||
|
||||
def _model_flow_custom(config):
|
||||
"""Custom endpoint: collect URL, API key, and model name.
|
||||
|
||||
@@ -1420,7 +1474,11 @@ def _model_flow_custom(config):
|
||||
f"Hermes will still save it."
|
||||
)
|
||||
if probe.get("suggested_base_url"):
|
||||
print(f" If this server expects /v1, try base URL: {probe['suggested_base_url']}")
|
||||
suggested = probe["suggested_base_url"]
|
||||
if suggested.endswith("/v1"):
|
||||
print(f" If this server expects /v1 in the path, try base URL: {suggested}")
|
||||
else:
|
||||
print(f" If /v1 should not be in the base URL, try: {suggested}")
|
||||
|
||||
# Select model — use probe results when available, fall back to manual input
|
||||
model_name = ""
|
||||
@@ -2585,6 +2643,12 @@ def cmd_doctor(args):
|
||||
run_doctor(args)
|
||||
|
||||
|
||||
def cmd_dump(args):
|
||||
"""Dump setup summary for support/debugging."""
|
||||
from hermes_cli.dump import run_dump
|
||||
run_dump(args)
|
||||
|
||||
|
||||
def cmd_config(args):
|
||||
"""Configuration management."""
|
||||
from hermes_cli.config import config_command
|
||||
@@ -4666,6 +4730,22 @@ For more help on a command:
|
||||
help="Attempt to fix issues automatically"
|
||||
)
|
||||
doctor_parser.set_defaults(func=cmd_doctor)
|
||||
|
||||
# =========================================================================
|
||||
# dump command
|
||||
# =========================================================================
|
||||
dump_parser = subparsers.add_parser(
|
||||
"dump",
|
||||
help="Dump setup summary for support/debugging",
|
||||
description="Output a compact, plain-text summary of your Hermes setup "
|
||||
"that can be copy-pasted into Discord/GitHub for support context"
|
||||
)
|
||||
dump_parser.add_argument(
|
||||
"--show-keys",
|
||||
action="store_true",
|
||||
help="Show redacted API key prefixes (first/last 4 chars) instead of just set/not set"
|
||||
)
|
||||
dump_parser.set_defaults(func=cmd_dump)
|
||||
|
||||
# =========================================================================
|
||||
# config command
|
||||
|
||||
@@ -84,6 +84,7 @@ _PASSTHROUGH_PROVIDERS: frozenset[str] = frozenset({
|
||||
"minimax",
|
||||
"minimax-cn",
|
||||
"alibaba",
|
||||
"qwen-oauth",
|
||||
"huggingface",
|
||||
"openai-codex",
|
||||
"custom",
|
||||
|
||||
@@ -537,8 +537,11 @@ def switch_model(
|
||||
)
|
||||
else:
|
||||
# --- Step c: On aggregator, convert vendor:model to vendor/model ---
|
||||
# Only convert when there's no slash — a slash means the name
|
||||
# is already in vendor/model format and the colon is a variant
|
||||
# tag (:free, :extended, :fast) that must be preserved.
|
||||
colon_pos = raw_input.find(":")
|
||||
if colon_pos > 0 and is_aggregator(current_provider):
|
||||
if colon_pos > 0 and "/" not in raw_input and is_aggregator(current_provider):
|
||||
left = raw_input[:colon_pos].strip().lower()
|
||||
right = raw_input[colon_pos + 1:].strip()
|
||||
if left and right:
|
||||
@@ -730,6 +733,7 @@ def list_authenticated_providers(
|
||||
fetch_models_dev,
|
||||
get_provider_info as _mdev_pinfo,
|
||||
)
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
|
||||
|
||||
results: List[dict] = []
|
||||
@@ -750,9 +754,16 @@ def list_authenticated_providers(
|
||||
if not isinstance(pdata, dict):
|
||||
continue
|
||||
|
||||
env_vars = pdata.get("env", [])
|
||||
if not isinstance(env_vars, list):
|
||||
continue
|
||||
# Prefer auth.py PROVIDER_REGISTRY for env var names — it's our
|
||||
# source of truth. models.dev can have wrong mappings (e.g.
|
||||
# minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
|
||||
pconfig = PROVIDER_REGISTRY.get(hermes_id)
|
||||
if pconfig and pconfig.api_key_env_vars:
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
else:
|
||||
env_vars = pdata.get("env", [])
|
||||
if not isinstance(env_vars, list):
|
||||
continue
|
||||
|
||||
# Check if any env var is set
|
||||
has_creds = any(os.environ.get(ev) for ev in env_vars)
|
||||
|
||||
@@ -483,6 +483,7 @@ _PROVIDER_LABELS = {
|
||||
"ai-gateway": "AI Gateway",
|
||||
"kilocode": "Kilo Code",
|
||||
"alibaba": "Alibaba Cloud (DashScope)",
|
||||
"qwen-oauth": "Qwen OAuth (Portal)",
|
||||
"huggingface": "Hugging Face",
|
||||
"custom": "Custom endpoint",
|
||||
}
|
||||
@@ -522,6 +523,7 @@ _PROVIDER_ALIASES = {
|
||||
"aliyun": "alibaba",
|
||||
"qwen": "alibaba",
|
||||
"alibaba-cloud": "alibaba",
|
||||
"qwen-portal": "qwen-oauth",
|
||||
"hf": "huggingface",
|
||||
"hugging-face": "huggingface",
|
||||
"huggingface-hub": "huggingface",
|
||||
@@ -767,6 +769,7 @@ def list_available_providers() -> list[dict[str, str]]:
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "huggingface",
|
||||
"zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
|
||||
"qwen-oauth",
|
||||
"opencode-zen", "opencode-go",
|
||||
"ai-gateway", "deepseek", "custom",
|
||||
]
|
||||
@@ -1529,7 +1532,7 @@ def probe_api_models(
|
||||
|
||||
return {
|
||||
"models": None,
|
||||
"probed_url": tried[-1] if tried else normalized.rstrip("/") + "/models",
|
||||
"probed_url": tried[0] if tried else normalized.rstrip("/") + "/models",
|
||||
"resolved_base_url": normalized,
|
||||
"suggested_base_url": alternate_base if alternate_base != normalized else None,
|
||||
"used_fallback": False,
|
||||
|
||||
@@ -102,7 +102,7 @@ _RESERVED_NAMES = frozenset({
|
||||
# Hermes subcommands that cannot be used as profile names/aliases
|
||||
_HERMES_SUBCOMMANDS = frozenset({
|
||||
"chat", "model", "gateway", "setup", "whatsapp", "login", "logout",
|
||||
"status", "cron", "doctor", "config", "pairing", "skills", "tools",
|
||||
"status", "cron", "doctor", "dump", "config", "pairing", "skills", "tools",
|
||||
"mcp", "sessions", "insights", "version", "update", "uninstall",
|
||||
"profile", "plugins", "honcho", "acp",
|
||||
})
|
||||
@@ -1007,7 +1007,7 @@ _hermes_completion() {
|
||||
|
||||
# Top-level subcommands
|
||||
if [[ "$COMP_CWORD" == 1 ]]; then
|
||||
local commands="chat model gateway setup status cron doctor config skills tools mcp sessions profile update version"
|
||||
local commands="chat model gateway setup status cron doctor dump config skills tools mcp sessions profile update version"
|
||||
COMPREPLY=($(compgen -W "$commands" -- "$cur"))
|
||||
fi
|
||||
}
|
||||
@@ -1032,7 +1032,7 @@ _hermes() {
|
||||
_arguments \\
|
||||
'-p[Profile name]:profile:($profiles)' \\
|
||||
'--profile[Profile name]:profile:($profiles)' \\
|
||||
'1:command:(chat model gateway setup status cron doctor config skills tools mcp sessions profile update version)' \\
|
||||
'1:command:(chat model gateway setup status cron doctor dump config skills tools mcp sessions profile update version)' \\
|
||||
'*::arg:->args'
|
||||
|
||||
case $words[1] in
|
||||
|
||||
@@ -58,6 +58,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
auth_type="oauth_external",
|
||||
base_url_override="https://chatgpt.com/backend-api/codex",
|
||||
),
|
||||
"qwen-oauth": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
auth_type="oauth_external",
|
||||
base_url_override="https://portal.qwen.ai/v1",
|
||||
base_url_env_var="HERMES_QWEN_BASE_URL",
|
||||
),
|
||||
"copilot-acp": HermesOverlay(
|
||||
transport="codex_responses",
|
||||
auth_type="external_process",
|
||||
|
||||
@@ -14,11 +14,13 @@ from agent.credential_pool import CredentialPool, PooledCredential, get_custom_p
|
||||
from hermes_cli.auth import (
|
||||
AuthError,
|
||||
DEFAULT_CODEX_BASE_URL,
|
||||
DEFAULT_QWEN_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
format_auth_error,
|
||||
resolve_provider,
|
||||
resolve_nous_runtime_credentials,
|
||||
resolve_codex_runtime_credentials,
|
||||
resolve_qwen_runtime_credentials,
|
||||
resolve_api_key_provider_credentials,
|
||||
resolve_external_process_provider_credentials,
|
||||
has_usable_secret,
|
||||
@@ -148,6 +150,9 @@ def _resolve_runtime_from_pool_entry(
|
||||
if provider == "openai-codex":
|
||||
api_mode = "codex_responses"
|
||||
base_url = base_url or DEFAULT_CODEX_BASE_URL
|
||||
elif provider == "qwen-oauth":
|
||||
api_mode = "chat_completions"
|
||||
base_url = base_url or DEFAULT_QWEN_BASE_URL
|
||||
elif provider == "anthropic":
|
||||
api_mode = "anthropic_messages"
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
@@ -691,6 +696,24 @@ def resolve_runtime_provider(
|
||||
logger.info("Auto-detected Codex provider but credentials failed; "
|
||||
"falling through to next provider.")
|
||||
|
||||
if provider == "qwen-oauth":
|
||||
try:
|
||||
creds = resolve_qwen_runtime_credentials()
|
||||
return {
|
||||
"provider": "qwen-oauth",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": creds.get("base_url", "").rstrip("/"),
|
||||
"api_key": creds.get("api_key", ""),
|
||||
"source": creds.get("source", "qwen-cli"),
|
||||
"expires_at_ms": creds.get("expires_at_ms"),
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
except AuthError:
|
||||
if requested_provider != "auto":
|
||||
raise
|
||||
logger.info("Qwen OAuth credentials failed; "
|
||||
"falling through to next provider.")
|
||||
|
||||
if provider == "copilot-acp":
|
||||
creds = resolve_external_process_provider_credentials(provider)
|
||||
return {
|
||||
|
||||
+242
-23
@@ -2167,6 +2167,71 @@ def _setup_whatsapp():
|
||||
print_info("or personal self-chat) and pair via QR code.")
|
||||
|
||||
|
||||
def _setup_bluebubbles():
|
||||
"""Configure BlueBubbles iMessage gateway."""
|
||||
print_header("BlueBubbles (iMessage)")
|
||||
existing = get_env_value("BLUEBUBBLES_SERVER_URL")
|
||||
if existing:
|
||||
print_info("BlueBubbles: already configured")
|
||||
if not prompt_yes_no("Reconfigure BlueBubbles?", False):
|
||||
return
|
||||
|
||||
print_info("Connects Hermes to iMessage via BlueBubbles — a free, open-source")
|
||||
print_info("macOS server that bridges iMessage to any device.")
|
||||
print_info(" Requires a Mac running BlueBubbles Server v1.0.0+")
|
||||
print_info(" Download: https://bluebubbles.app/")
|
||||
print()
|
||||
print_info("In BlueBubbles Server → Settings → API, note your Server URL and Password.")
|
||||
print()
|
||||
|
||||
server_url = prompt("BlueBubbles server URL (e.g. http://192.168.1.10:1234)")
|
||||
if not server_url:
|
||||
print_warning("Server URL is required — skipping BlueBubbles setup")
|
||||
return
|
||||
save_env_value("BLUEBUBBLES_SERVER_URL", server_url.rstrip("/"))
|
||||
|
||||
password = prompt("BlueBubbles server password", password=True)
|
||||
if not password:
|
||||
print_warning("Password is required — skipping BlueBubbles setup")
|
||||
return
|
||||
save_env_value("BLUEBUBBLES_PASSWORD", password)
|
||||
print_success("BlueBubbles credentials saved")
|
||||
|
||||
print()
|
||||
print_info("🔒 Security: Restrict who can message your bot")
|
||||
print_info(" Use iMessage addresses: email (user@icloud.com) or phone (+15551234567)")
|
||||
print()
|
||||
allowed_users = prompt("Allowed iMessage addresses (comma-separated, leave empty for open access)")
|
||||
if allowed_users:
|
||||
save_env_value("BLUEBUBBLES_ALLOWED_USERS", allowed_users.replace(" ", ""))
|
||||
print_success("BlueBubbles allowlist configured")
|
||||
else:
|
||||
print_info("⚠️ No allowlist set — anyone who can iMessage you can use the bot!")
|
||||
|
||||
print()
|
||||
print_info("📬 Home Channel: phone or email for cron job delivery and notifications.")
|
||||
print_info(" You can also set this later with /set-home in your iMessage chat.")
|
||||
home_channel = prompt("Home channel address (leave empty to set later)")
|
||||
if home_channel:
|
||||
save_env_value("BLUEBUBBLES_HOME_CHANNEL", home_channel)
|
||||
|
||||
print()
|
||||
print_info("Advanced settings (defaults are fine for most setups):")
|
||||
if prompt_yes_no("Configure webhook listener settings?", False):
|
||||
webhook_port = prompt("Webhook listener port (default: 8645)")
|
||||
if webhook_port:
|
||||
try:
|
||||
save_env_value("BLUEBUBBLES_WEBHOOK_PORT", str(int(webhook_port)))
|
||||
print_success(f"Webhook port set to {webhook_port}")
|
||||
except ValueError:
|
||||
print_warning("Invalid port number, using default 8645")
|
||||
|
||||
print()
|
||||
print_info("Requires the BlueBubbles Private API helper for typing indicators,")
|
||||
print_info("read receipts, and tapback reactions. Basic messaging works without it.")
|
||||
print_info(" Install: https://docs.bluebubbles.app/helper-bundle/installation")
|
||||
|
||||
|
||||
def _setup_webhooks():
|
||||
"""Configure webhook integration."""
|
||||
print_header("Webhooks")
|
||||
@@ -2221,6 +2286,7 @@ _GATEWAY_PLATFORMS = [
|
||||
("Matrix", "MATRIX_ACCESS_TOKEN", _setup_matrix),
|
||||
("Mattermost", "MATTERMOST_TOKEN", _setup_mattermost),
|
||||
("WhatsApp", "WHATSAPP_ENABLED", _setup_whatsapp),
|
||||
("BlueBubbles (iMessage)", "BLUEBUBBLES_SERVER_URL", _setup_bluebubbles),
|
||||
("Webhooks (GitHub, GitLab, etc.)", "WEBHOOK_ENABLED", _setup_webhooks),
|
||||
]
|
||||
|
||||
@@ -2264,6 +2330,7 @@ def setup_gateway(config: dict):
|
||||
or get_env_value("MATRIX_ACCESS_TOKEN")
|
||||
or get_env_value("MATRIX_PASSWORD")
|
||||
or get_env_value("WHATSAPP_ENABLED")
|
||||
or get_env_value("BLUEBUBBLES_SERVER_URL")
|
||||
or get_env_value("WEBHOOK_ENABLED")
|
||||
)
|
||||
if any_messaging:
|
||||
@@ -2283,6 +2350,8 @@ def setup_gateway(config: dict):
|
||||
missing_home.append("Discord")
|
||||
if get_env_value("SLACK_BOT_TOKEN") and not get_env_value("SLACK_HOME_CHANNEL"):
|
||||
missing_home.append("Slack")
|
||||
if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"):
|
||||
missing_home.append("BlueBubbles")
|
||||
|
||||
if missing_home:
|
||||
print()
|
||||
@@ -2453,6 +2522,8 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
|
||||
platforms.append("WhatsApp")
|
||||
if get_env_value("SIGNAL_ACCOUNT"):
|
||||
platforms.append("Signal")
|
||||
if get_env_value("BLUEBUBBLES_SERVER_URL"):
|
||||
platforms.append("BlueBubbles")
|
||||
if platforms:
|
||||
return ", ".join(platforms)
|
||||
return None # No platforms configured — section must run
|
||||
@@ -2501,9 +2572,120 @@ _OPENCLAW_SCRIPT = (
|
||||
)
|
||||
|
||||
|
||||
def _load_openclaw_migration_module():
|
||||
"""Load the openclaw_to_hermes migration script as a module.
|
||||
|
||||
Returns the loaded module, or None if the script can't be loaded.
|
||||
"""
|
||||
if not _OPENCLAW_SCRIPT.exists():
|
||||
return None
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"openclaw_to_hermes", _OPENCLAW_SCRIPT
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
return None
|
||||
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
# Register in sys.modules so @dataclass can resolve the module
|
||||
# (Python 3.11+ requires this for dynamically loaded modules)
|
||||
import sys as _sys
|
||||
_sys.modules[spec.name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception:
|
||||
_sys.modules.pop(spec.name, None)
|
||||
raise
|
||||
return mod
|
||||
|
||||
|
||||
# Item kinds that represent high-impact changes warranting explicit warnings.
|
||||
# Gateway tokens/channels can hijack messaging platforms from the old agent.
|
||||
# Config values may have different semantics between OpenClaw and Hermes.
|
||||
# Instruction/context files (.md) can contain incompatible setup procedures.
|
||||
_HIGH_IMPACT_KIND_KEYWORDS = {
|
||||
"gateway": "⚠ Gateway/messaging — this will configure Hermes to use your OpenClaw messaging channels",
|
||||
"telegram": "⚠ Telegram — this will point Hermes at your OpenClaw Telegram bot",
|
||||
"slack": "⚠ Slack — this will point Hermes at your OpenClaw Slack workspace",
|
||||
"discord": "⚠ Discord — this will point Hermes at your OpenClaw Discord bot",
|
||||
"whatsapp": "⚠ WhatsApp — this will point Hermes at your OpenClaw WhatsApp connection",
|
||||
"config": "⚠ Config values — OpenClaw settings may not map 1:1 to Hermes equivalents",
|
||||
"soul": "⚠ Instruction file — may contain OpenClaw-specific setup/restart procedures",
|
||||
"memory": "⚠ Memory/context file — may reference OpenClaw-specific infrastructure",
|
||||
"context": "⚠ Context file — may contain OpenClaw-specific instructions",
|
||||
}
|
||||
|
||||
|
||||
def _print_migration_preview(report: dict):
|
||||
"""Print a detailed dry-run preview of what migration would do.
|
||||
|
||||
Groups items by category and adds explicit warnings for high-impact
|
||||
changes like gateway token takeover and config value differences.
|
||||
"""
|
||||
items = report.get("items", [])
|
||||
if not items:
|
||||
print_info("Nothing to migrate.")
|
||||
return
|
||||
|
||||
migrated_items = [i for i in items if i.get("status") == "migrated"]
|
||||
conflict_items = [i for i in items if i.get("status") == "conflict"]
|
||||
skipped_items = [i for i in items if i.get("status") == "skipped"]
|
||||
|
||||
warnings_shown = set()
|
||||
|
||||
if migrated_items:
|
||||
print(color(" Would import:", Colors.GREEN))
|
||||
for item in migrated_items:
|
||||
kind = item.get("kind", "unknown")
|
||||
dest = item.get("destination", "")
|
||||
if dest:
|
||||
dest_short = str(dest).replace(str(Path.home()), "~")
|
||||
print(f" {kind:<22s} → {dest_short}")
|
||||
else:
|
||||
print(f" {kind}")
|
||||
|
||||
# Check for high-impact items and collect warnings
|
||||
kind_lower = kind.lower()
|
||||
dest_lower = str(dest).lower()
|
||||
for keyword, warning in _HIGH_IMPACT_KIND_KEYWORDS.items():
|
||||
if keyword in kind_lower or keyword in dest_lower:
|
||||
warnings_shown.add(warning)
|
||||
print()
|
||||
|
||||
if conflict_items:
|
||||
print(color(" Would overwrite (conflicts with existing Hermes config):", Colors.YELLOW))
|
||||
for item in conflict_items:
|
||||
kind = item.get("kind", "unknown")
|
||||
reason = item.get("reason", "already exists")
|
||||
print(f" {kind:<22s} {reason}")
|
||||
print()
|
||||
|
||||
if skipped_items:
|
||||
print(color(" Would skip:", Colors.DIM))
|
||||
for item in skipped_items:
|
||||
kind = item.get("kind", "unknown")
|
||||
reason = item.get("reason", "")
|
||||
print(f" {kind:<22s} {reason}")
|
||||
print()
|
||||
|
||||
# Print collected warnings
|
||||
if warnings_shown:
|
||||
print(color(" ── Warnings ──", Colors.YELLOW))
|
||||
for warning in sorted(warnings_shown):
|
||||
print(color(f" {warning}", Colors.YELLOW))
|
||||
print()
|
||||
print(color(" Note: OpenClaw config values may have different semantics in Hermes.", Colors.YELLOW))
|
||||
print(color(" For example, OpenClaw's tool_call_execution: \"auto\" ≠ Hermes's yolo mode.", Colors.YELLOW))
|
||||
print(color(" Instruction files (.md) from OpenClaw may contain incompatible procedures.", Colors.YELLOW))
|
||||
print()
|
||||
|
||||
|
||||
def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
"""Detect ~/.openclaw and offer to migrate during first-time setup.
|
||||
|
||||
Runs a dry-run first to show the user exactly what would be imported,
|
||||
overwritten, or taken over. Only executes after explicit confirmation.
|
||||
|
||||
Returns True if migration ran successfully, False otherwise.
|
||||
"""
|
||||
openclaw_dir = Path.home() / ".openclaw"
|
||||
@@ -2516,12 +2698,12 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
print()
|
||||
print_header("OpenClaw Installation Detected")
|
||||
print_info(f"Found OpenClaw data at {openclaw_dir}")
|
||||
print_info("Hermes can import your settings, memories, skills, and API keys.")
|
||||
print_info("Hermes can preview what would be imported before making any changes.")
|
||||
print()
|
||||
|
||||
if not prompt_yes_no("Would you like to import from OpenClaw?", default=True):
|
||||
if not prompt_yes_no("Would you like to see what can be imported?", default=True):
|
||||
print_info(
|
||||
"Skipping migration. You can run it later via the openclaw-migration skill."
|
||||
"Skipping migration. You can run it later with: hermes claw migrate --dry-run"
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -2530,34 +2712,71 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
if not config_path.exists():
|
||||
save_config(load_config())
|
||||
|
||||
# Dynamically load the migration script
|
||||
# Load the migration module
|
||||
try:
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"openclaw_to_hermes", _OPENCLAW_SCRIPT
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
mod = _load_openclaw_migration_module()
|
||||
if mod is None:
|
||||
print_warning("Could not load migration script.")
|
||||
return False
|
||||
except Exception as e:
|
||||
print_warning(f"Could not load migration script: {e}")
|
||||
logger.debug("OpenClaw migration module load error", exc_info=True)
|
||||
return False
|
||||
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
# Register in sys.modules so @dataclass can resolve the module
|
||||
# (Python 3.11+ requires this for dynamically loaded modules)
|
||||
import sys as _sys
|
||||
_sys.modules[spec.name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception:
|
||||
_sys.modules.pop(spec.name, None)
|
||||
raise
|
||||
|
||||
# Run migration with the "full" preset, execute mode, no overwrite
|
||||
# ── Phase 1: Dry-run preview ──
|
||||
try:
|
||||
selected = mod.resolve_selected_options(None, None, preset="full")
|
||||
dry_migrator = mod.Migrator(
|
||||
source_root=openclaw_dir.resolve(),
|
||||
target_root=hermes_home.resolve(),
|
||||
execute=False, # dry-run — no files modified
|
||||
workspace_target=None,
|
||||
overwrite=True, # show everything including conflicts
|
||||
migrate_secrets=True,
|
||||
output_dir=None,
|
||||
selected_options=selected,
|
||||
preset_name="full",
|
||||
)
|
||||
preview_report = dry_migrator.migrate()
|
||||
except Exception as e:
|
||||
print_warning(f"Migration preview failed: {e}")
|
||||
logger.debug("OpenClaw migration preview error", exc_info=True)
|
||||
return False
|
||||
|
||||
# Display the full preview
|
||||
preview_summary = preview_report.get("summary", {})
|
||||
preview_count = preview_summary.get("migrated", 0)
|
||||
|
||||
if preview_count == 0:
|
||||
print()
|
||||
print_info("Nothing to import from OpenClaw.")
|
||||
return False
|
||||
|
||||
print()
|
||||
print_header(f"Migration Preview — {preview_count} item(s) would be imported")
|
||||
print_info("No changes have been made yet. Review the list below:")
|
||||
print()
|
||||
_print_migration_preview(preview_report)
|
||||
|
||||
# ── Phase 2: Confirm and execute ──
|
||||
if not prompt_yes_no("Proceed with migration?", default=False):
|
||||
print_info(
|
||||
"Migration cancelled. You can run it later with: hermes claw migrate"
|
||||
)
|
||||
print_info(
|
||||
"Use --dry-run to preview again, or --preset minimal for a lighter import."
|
||||
)
|
||||
return False
|
||||
|
||||
# Execute the migration — overwrite=False so existing Hermes configs are
|
||||
# preserved. The user saw the preview; conflicts are skipped by default.
|
||||
try:
|
||||
migrator = mod.Migrator(
|
||||
source_root=openclaw_dir.resolve(),
|
||||
target_root=hermes_home.resolve(),
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=True,
|
||||
overwrite=False, # preserve existing Hermes config
|
||||
migrate_secrets=True,
|
||||
output_dir=None,
|
||||
selected_options=selected,
|
||||
@@ -2569,7 +2788,7 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
logger.debug("OpenClaw migration error", exc_info=True)
|
||||
return False
|
||||
|
||||
# Print summary
|
||||
# Print final summary
|
||||
summary = report.get("summary", {})
|
||||
migrated = summary.get("migrated", 0)
|
||||
skipped = summary.get("skipped", 0)
|
||||
@@ -2580,7 +2799,7 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
if migrated:
|
||||
print_success(f"Imported {migrated} item(s) from OpenClaw.")
|
||||
if conflicts:
|
||||
print_info(f"Skipped {conflicts} item(s) that already exist in Hermes.")
|
||||
print_info(f"Skipped {conflicts} item(s) that already exist in Hermes (use hermes claw migrate --overwrite to force).")
|
||||
if skipped:
|
||||
print_info(f"Skipped {skipped} item(s) (not found or unchanged).")
|
||||
if errors:
|
||||
|
||||
@@ -23,6 +23,7 @@ PLATFORMS = {
|
||||
"slack": "💼 Slack",
|
||||
"whatsapp": "📱 WhatsApp",
|
||||
"signal": "📡 Signal",
|
||||
"bluebubbles": "💬 BlueBubbles",
|
||||
"email": "📧 Email",
|
||||
"homeassistant": "🏠 Home Assistant",
|
||||
"mattermost": "💬 Mattermost",
|
||||
|
||||
+19
-1
@@ -153,12 +153,14 @@ def show_status(args):
|
||||
print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
|
||||
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status, get_qwen_auth_status
|
||||
nous_status = get_nous_auth_status()
|
||||
codex_status = get_codex_auth_status()
|
||||
qwen_status = get_qwen_auth_status()
|
||||
except Exception:
|
||||
nous_status = {}
|
||||
codex_status = {}
|
||||
qwen_status = {}
|
||||
|
||||
nous_logged_in = bool(nous_status.get("logged_in"))
|
||||
print(
|
||||
@@ -189,6 +191,21 @@ def show_status(args):
|
||||
if codex_status.get("error") and not codex_logged_in:
|
||||
print(f" Error: {codex_status.get('error')}")
|
||||
|
||||
qwen_logged_in = bool(qwen_status.get("logged_in"))
|
||||
print(
|
||||
f" {'Qwen OAuth':<12} {check_mark(qwen_logged_in)} "
|
||||
f"{'logged in' if qwen_logged_in else 'not logged in (run: qwen auth qwen-oauth)'}"
|
||||
)
|
||||
qwen_auth_file = qwen_status.get("auth_file")
|
||||
if qwen_auth_file:
|
||||
print(f" Auth file: {qwen_auth_file}")
|
||||
qwen_exp = qwen_status.get("expires_at_ms")
|
||||
if qwen_exp:
|
||||
from datetime import datetime, timezone
|
||||
print(f" Access exp: {datetime.fromtimestamp(int(qwen_exp) / 1000, tz=timezone.utc).isoformat()}")
|
||||
if qwen_status.get("error") and not qwen_logged_in:
|
||||
print(f" Error: {qwen_status.get('error')}")
|
||||
|
||||
# =========================================================================
|
||||
# Nous Subscription Features
|
||||
# =========================================================================
|
||||
@@ -285,6 +302,7 @@ def show_status(args):
|
||||
"DingTalk": ("DINGTALK_CLIENT_ID", None),
|
||||
"Feishu": ("FEISHU_APP_ID", "FEISHU_HOME_CHANNEL"),
|
||||
"WeCom": ("WECOM_BOT_ID", "WECOM_HOME_CHANNEL"),
|
||||
"BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
|
||||
}
|
||||
|
||||
for name, (token_var, home_var) in platforms.items():
|
||||
|
||||
@@ -126,6 +126,7 @@ PLATFORMS = {
|
||||
"slack": {"label": "💼 Slack", "default_toolset": "hermes-slack"},
|
||||
"whatsapp": {"label": "📱 WhatsApp", "default_toolset": "hermes-whatsapp"},
|
||||
"signal": {"label": "📡 Signal", "default_toolset": "hermes-signal"},
|
||||
"bluebubbles": {"label": "💙 BlueBubbles", "default_toolset": "hermes-bluebubbles"},
|
||||
"homeassistant": {"label": "🏠 Home Assistant", "default_toolset": "hermes-homeassistant"},
|
||||
"email": {"label": "📧 Email", "default_toolset": "hermes-email"},
|
||||
"matrix": {"label": "💬 Matrix", "default_toolset": "hermes-matrix"},
|
||||
|
||||
+21
-25
@@ -1235,10 +1235,10 @@ class SessionDB:
|
||||
self._execute_write(_do)
|
||||
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
"""Delete a session, its child sessions, and all their messages.
|
||||
"""Delete a session and all its messages.
|
||||
|
||||
Child sessions (subagent runs, compression continuations) are deleted
|
||||
first to satisfy the ``parent_session_id`` foreign key constraint.
|
||||
Child sessions are orphaned (parent_session_id set to NULL) rather
|
||||
than cascade-deleted, so they remain accessible independently.
|
||||
Returns True if the session was found and deleted.
|
||||
"""
|
||||
def _do(conn):
|
||||
@@ -1247,15 +1247,12 @@ class SessionDB:
|
||||
)
|
||||
if cursor.fetchone()[0] == 0:
|
||||
return False
|
||||
# Delete child sessions first (FK constraint)
|
||||
child_ids = [r[0] for r in conn.execute(
|
||||
"SELECT id FROM sessions WHERE parent_session_id = ?",
|
||||
# Orphan child sessions so FK constraint is satisfied
|
||||
conn.execute(
|
||||
"UPDATE sessions SET parent_session_id = NULL "
|
||||
"WHERE parent_session_id = ?",
|
||||
(session_id,),
|
||||
).fetchall()]
|
||||
for cid in child_ids:
|
||||
conn.execute("DELETE FROM messages WHERE session_id = ?", (cid,))
|
||||
conn.execute("DELETE FROM sessions WHERE id = ?", (cid,))
|
||||
# Delete the session itself
|
||||
)
|
||||
conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
|
||||
conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
|
||||
return True
|
||||
@@ -1264,9 +1261,9 @@ class SessionDB:
|
||||
def prune_sessions(self, older_than_days: int = 90, source: str = None) -> int:
|
||||
"""Delete sessions older than N days. Returns count of deleted sessions.
|
||||
|
||||
Only prunes ended sessions (not active ones). Child sessions whose
|
||||
parents are being pruned are deleted first to satisfy the
|
||||
``parent_session_id`` foreign key constraint.
|
||||
Only prunes ended sessions (not active ones). Child sessions outside
|
||||
the prune window are orphaned (parent_session_id set to NULL) rather
|
||||
than cascade-deleted.
|
||||
"""
|
||||
cutoff = time.time() - (older_than_days * 86400)
|
||||
|
||||
@@ -1284,17 +1281,16 @@ class SessionDB:
|
||||
)
|
||||
session_ids = set(row["id"] for row in cursor.fetchall())
|
||||
|
||||
# Delete children first whose parents are in the prune set
|
||||
# (avoids FK constraint errors)
|
||||
for sid in list(session_ids):
|
||||
child_ids = [r[0] for r in conn.execute(
|
||||
"SELECT id FROM sessions WHERE parent_session_id = ?",
|
||||
(sid,),
|
||||
).fetchall()]
|
||||
for cid in child_ids:
|
||||
conn.execute("DELETE FROM messages WHERE session_id = ?", (cid,))
|
||||
conn.execute("DELETE FROM sessions WHERE id = ?", (cid,))
|
||||
session_ids.discard(cid) # don't double-delete
|
||||
if not session_ids:
|
||||
return 0
|
||||
|
||||
# Orphan any sessions whose parent is about to be deleted
|
||||
placeholders = ",".join("?" * len(session_ids))
|
||||
conn.execute(
|
||||
f"UPDATE sessions SET parent_session_id = NULL "
|
||||
f"WHERE parent_session_id IN ({placeholders})",
|
||||
list(session_ids),
|
||||
)
|
||||
|
||||
for sid in session_ids:
|
||||
conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
|
||||
|
||||
+11
-3
@@ -464,7 +464,11 @@
|
||||
addToSystemPackages = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Add hermes CLI to environment.systemPackages.";
|
||||
description = ''
|
||||
Add the hermes CLI to environment.systemPackages and export
|
||||
HERMES_HOME system-wide (via environment.variables) so interactive
|
||||
shells share state with the gateway service.
|
||||
'';
|
||||
};
|
||||
|
||||
# ── OCI Container (opt-in) ──────────────────────────────────────────
|
||||
@@ -545,8 +549,12 @@
|
||||
})
|
||||
|
||||
# ── Host CLI ──────────────────────────────────────────────────────
|
||||
# Add the hermes CLI to system PATH and export HERMES_HOME system-wide
|
||||
# so interactive shells share state (sessions, skills, cron) with the
|
||||
# gateway service instead of creating a separate ~/.hermes/.
|
||||
(lib.mkIf cfg.addToSystemPackages {
|
||||
environment.systemPackages = [ cfg.package ];
|
||||
environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
|
||||
})
|
||||
|
||||
# ── Directories ───────────────────────────────────────────────────
|
||||
@@ -561,7 +569,7 @@
|
||||
|
||||
# ── Activation: link config + auth + documents ────────────────────
|
||||
{
|
||||
system.activationScripts."hermes-agent-setup" = lib.stringAfter [ "users" "setupSecrets" ] ''
|
||||
system.activationScripts."hermes-agent-setup" = lib.stringAfter ([ "users" ] ++ lib.optional (config.system.activationScripts ? setupSecrets) "setupSecrets") ''
|
||||
# Ensure directories exist (activation runs before tmpfiles)
|
||||
mkdir -p ${cfg.stateDir}/.hermes
|
||||
mkdir -p ${cfg.stateDir}/home
|
||||
@@ -601,7 +609,7 @@
|
||||
# so this is the single source of truth for both native and container mode.
|
||||
${lib.optionalString (cfg.environment != {} || cfg.environmentFiles != []) ''
|
||||
ENV_FILE="${cfg.stateDir}/.hermes/.env"
|
||||
install -o ${cfg.user} -g ${cfg.group} -m 0600 /dev/null "$ENV_FILE"
|
||||
install -o ${cfg.user} -g ${cfg.group} -m 0640 /dev/null "$ENV_FILE"
|
||||
cat > "$ENV_FILE" <<'HERMES_NIX_ENV_EOF'
|
||||
${envFileContent}
|
||||
HERMES_NIX_ENV_EOF
|
||||
|
||||
+1
-1
@@ -14,7 +14,7 @@
|
||||
};
|
||||
|
||||
runtimeDeps = with pkgs; [
|
||||
nodejs_20 ripgrep git openssh ffmpeg
|
||||
nodejs_20 ripgrep git openssh ffmpeg tirith
|
||||
];
|
||||
|
||||
runtimePath = pkgs.lib.makeBinPath runtimeDeps;
|
||||
|
||||
@@ -6,14 +6,68 @@
|
||||
uv2nix,
|
||||
pyproject-nix,
|
||||
pyproject-build-systems,
|
||||
stdenv,
|
||||
}:
|
||||
let
|
||||
workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
|
||||
hacks = callPackage pyproject-nix.build.hacks { };
|
||||
|
||||
overlay = workspace.mkPyprojectOverlay {
|
||||
sourcePreference = "wheel";
|
||||
};
|
||||
|
||||
isAarch64Darwin = stdenv.hostPlatform.system == "aarch64-darwin";
|
||||
|
||||
# Keep the workspace locked through uv2nix, but supply the local voice stack
|
||||
# from nixpkgs so wheel-only transitive artifacts do not break evaluation.
|
||||
mkPrebuiltPassthru = dependencies: {
|
||||
inherit dependencies;
|
||||
optional-dependencies = { };
|
||||
dependency-groups = { };
|
||||
};
|
||||
|
||||
mkPrebuiltOverride = final: from: dependencies:
|
||||
hacks.nixpkgsPrebuilt {
|
||||
inherit from;
|
||||
prev = {
|
||||
nativeBuildInputs = [ final.pyprojectHook ];
|
||||
passthru = mkPrebuiltPassthru dependencies;
|
||||
};
|
||||
};
|
||||
|
||||
pythonPackageOverrides = final: _prev:
|
||||
if isAarch64Darwin then {
|
||||
numpy = mkPrebuiltOverride final python311.pkgs.numpy { };
|
||||
|
||||
av = mkPrebuiltOverride final python311.pkgs.av { };
|
||||
|
||||
humanfriendly = mkPrebuiltOverride final python311.pkgs.humanfriendly { };
|
||||
|
||||
coloredlogs = mkPrebuiltOverride final python311.pkgs.coloredlogs {
|
||||
humanfriendly = [ ];
|
||||
};
|
||||
|
||||
onnxruntime = mkPrebuiltOverride final python311.pkgs.onnxruntime {
|
||||
coloredlogs = [ ];
|
||||
numpy = [ ];
|
||||
packaging = [ ];
|
||||
};
|
||||
|
||||
ctranslate2 = mkPrebuiltOverride final python311.pkgs.ctranslate2 {
|
||||
numpy = [ ];
|
||||
pyyaml = [ ];
|
||||
};
|
||||
|
||||
faster-whisper = mkPrebuiltOverride final python311.pkgs.faster-whisper {
|
||||
av = [ ];
|
||||
ctranslate2 = [ ];
|
||||
huggingface-hub = [ ];
|
||||
onnxruntime = [ ];
|
||||
tokenizers = [ ];
|
||||
tqdm = [ ];
|
||||
};
|
||||
} else {};
|
||||
|
||||
pythonSet =
|
||||
(callPackage pyproject-nix.build.packages {
|
||||
python = python311;
|
||||
@@ -21,6 +75,7 @@ let
|
||||
(lib.composeManyExtensions [
|
||||
pyproject-build-systems.overlays.default
|
||||
overlay
|
||||
pythonPackageOverrides
|
||||
]);
|
||||
in
|
||||
pythonSet.mkVirtualEnv "hermes-agent-env" {
|
||||
|
||||
@@ -1803,30 +1803,34 @@ class Migrator:
|
||||
def migrate_cron_jobs(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
config = config or self.load_openclaw_config()
|
||||
cron = config.get("cron") or {}
|
||||
if not cron:
|
||||
self.record("cron-jobs", None, None, "skipped", "No cron configuration found")
|
||||
return
|
||||
|
||||
# Archive the full cron config
|
||||
if self.archive_dir and self.execute:
|
||||
self.archive_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest = self.archive_dir / "cron-config.json"
|
||||
dest.write_text(json.dumps(cron, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
||||
self.record("cron-jobs", "openclaw.json cron.*", str(dest), "archived",
|
||||
"Cron config archived. Use 'hermes cron' to recreate jobs manually.")
|
||||
else:
|
||||
self.record("cron-jobs", "openclaw.json cron.*", "archive/cron-config.json",
|
||||
"archived", "Would archive cron config")
|
||||
|
||||
# Also check for cron store files
|
||||
cron_store = self.source_root / "cron"
|
||||
found_any = False
|
||||
|
||||
# Archive the full cron config when present
|
||||
if cron:
|
||||
found_any = True
|
||||
if self.archive_dir and self.execute:
|
||||
self.archive_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest = self.archive_dir / "cron-config.json"
|
||||
dest.write_text(json.dumps(cron, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
||||
self.record("cron-jobs", "openclaw.json cron.*", str(dest), "archived",
|
||||
"Cron config archived. Use 'hermes cron' to recreate jobs manually.")
|
||||
else:
|
||||
self.record("cron-jobs", "openclaw.json cron.*", "archive/cron-config.json",
|
||||
"archived", "Would archive cron config")
|
||||
|
||||
# Also check for cron store files even when config.cron is missing
|
||||
if cron_store.is_dir() and self.archive_dir:
|
||||
found_any = True
|
||||
dest_cron = self.archive_dir / "cron-store"
|
||||
if self.execute:
|
||||
shutil.copytree(cron_store, dest_cron, dirs_exist_ok=True)
|
||||
self.record("cron-jobs", str(cron_store), str(dest_cron), "archived",
|
||||
"Cron job store archived")
|
||||
|
||||
if not found_any:
|
||||
self.record("cron-jobs", None, None, "skipped", "No cron configuration found")
|
||||
|
||||
# ── Hooks ─────────────────────────────────────────────────
|
||||
def migrate_hooks_config(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
config = config or self.load_openclaw_config()
|
||||
@@ -2454,6 +2458,15 @@ class Migrator:
|
||||
notes.append(f"- **{item.kind}**: {item.reason}")
|
||||
notes.append("")
|
||||
|
||||
has_cron_config_archive = any(
|
||||
i.kind == "cron-jobs" and i.status == "archived" and i.destination and i.destination.endswith("cron-config.json")
|
||||
for i in self.items
|
||||
)
|
||||
has_cron_store_archive = any(
|
||||
i.kind == "cron-jobs" and i.status == "archived" and i.destination and i.destination.endswith("cron-store")
|
||||
for i in self.items
|
||||
)
|
||||
|
||||
notes.extend([
|
||||
"## IMPORTANT: Archive the OpenClaw Directory",
|
||||
"",
|
||||
@@ -2475,7 +2488,14 @@ class Migrator:
|
||||
"- Run `hermes claw cleanup` to archive the OpenClaw directory (prevents state confusion)",
|
||||
"- Run `hermes setup` to configure any remaining settings",
|
||||
"- Run `hermes mcp list` to verify MCP servers were imported correctly",
|
||||
"- Run `hermes cron` to recreate scheduled tasks (see archive/cron-config.json)",
|
||||
])
|
||||
|
||||
if has_cron_config_archive:
|
||||
notes.append("- Run `hermes cron` to recreate scheduled tasks (see archive/cron-config.json)")
|
||||
elif has_cron_store_archive:
|
||||
notes.append("- Run `hermes cron` to recreate scheduled tasks (see archived cron-store)")
|
||||
|
||||
notes.extend([
|
||||
"- Run `hermes gateway install` if you need the gateway service",
|
||||
"- Review `~/.hermes/config.yaml` for any adjustments",
|
||||
"",
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
# Hindsight Memory Provider
|
||||
|
||||
Long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval. Supports cloud and local (embedded) modes.
|
||||
Long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval. Supports cloud, local embedded, and local external modes.
|
||||
|
||||
## Requirements
|
||||
|
||||
- **Cloud:** API key from [ui.hindsight.vectorize.io](https://ui.hindsight.vectorize.io)
|
||||
- **Local:** API key for a supported LLM provider (OpenAI, Anthropic, Gemini, Groq, MiniMax, or Ollama). Embeddings and reranking run locally — no additional API keys needed.
|
||||
- **Local Embedded:** API key for a supported LLM provider (OpenAI, Anthropic, Gemini, Groq, OpenRouter, MiniMax, Ollama, or any OpenAI-compatible endpoint). Embeddings and reranking run locally — no additional API keys needed.
|
||||
- **Local External:** A running Hindsight instance (Docker or self-hosted) reachable over HTTP.
|
||||
|
||||
## Setup
|
||||
|
||||
@@ -21,17 +22,28 @@ hermes config set memory.provider hindsight
|
||||
echo "HINDSIGHT_API_KEY=your-key" >> ~/.hermes/.env
|
||||
```
|
||||
|
||||
### Cloud Mode
|
||||
### Cloud
|
||||
|
||||
Connects to the Hindsight Cloud API. Requires an API key from [ui.hindsight.vectorize.io](https://ui.hindsight.vectorize.io).
|
||||
|
||||
### Local Mode
|
||||
### Local Embedded
|
||||
|
||||
Runs an embedded Hindsight server with built-in PostgreSQL. Requires an LLM API key (e.g. Groq, OpenAI, Anthropic) for memory extraction and synthesis. The daemon starts automatically in the background on first use and stops after 5 minutes of inactivity.
|
||||
Hermes spins up a local Hindsight daemon with built-in PostgreSQL. Requires an LLM API key for memory extraction and synthesis. The daemon starts automatically in the background on first use and stops after 5 minutes of inactivity.
|
||||
|
||||
Supports any OpenAI-compatible LLM endpoint (llama.cpp, vLLM, LM Studio, etc.) — pick `openai_compatible` as the provider and enter the base URL.
|
||||
|
||||
Daemon startup logs: `~/.hermes/logs/hindsight-embed.log`
|
||||
Daemon runtime logs: `~/.hindsight/profiles/<profile>.log`
|
||||
|
||||
To open the Hindsight web UI (local embedded mode only):
|
||||
```bash
|
||||
hindsight-embed -p hermes ui start
|
||||
```
|
||||
|
||||
### Local External
|
||||
|
||||
Points the plugin at an existing Hindsight instance you're already running (Docker, self-hosted, etc.). No daemon management — just a URL and an optional API key.
|
||||
|
||||
## Config
|
||||
|
||||
Config file: `~/.hermes/hindsight/config.json`
|
||||
@@ -40,39 +52,58 @@ Config file: `~/.hermes/hindsight/config.json`
|
||||
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `mode` | `cloud` | `cloud` or `local` |
|
||||
| `api_url` | `https://api.hindsight.vectorize.io` | API URL (cloud mode) |
|
||||
| `api_url` | `http://localhost:8888` | API URL (local mode, unused — daemon manages its own port) |
|
||||
| `mode` | `cloud` | `cloud`, `local_embedded`, or `local_external` |
|
||||
| `api_url` | `https://api.hindsight.vectorize.io` | API URL (cloud and local_external modes) |
|
||||
|
||||
### Memory
|
||||
### Memory Bank
|
||||
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `bank_id` | `hermes` | Memory bank name |
|
||||
| `budget` | `mid` | Recall thoroughness: `low` / `mid` / `high` |
|
||||
| `bank_mission` | — | Reflect mission (identity/framing for reflect reasoning). Applied via Banks API. |
|
||||
| `bank_retain_mission` | — | Retain mission (steers what gets extracted). Applied via Banks API. |
|
||||
|
||||
### Recall
|
||||
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `recall_budget` | `mid` | Recall thoroughness: `low` / `mid` / `high` |
|
||||
| `recall_prefetch_method` | `recall` | Auto-recall method: `recall` (raw facts) or `reflect` (LLM synthesis) |
|
||||
| `recall_max_tokens` | `4096` | Maximum tokens for recall results |
|
||||
| `recall_max_input_chars` | `800` | Maximum input query length for auto-recall |
|
||||
| `recall_prompt_preamble` | — | Custom preamble for recalled memories in context |
|
||||
| `recall_tags` | — | Tags to filter when searching memories |
|
||||
| `recall_tags_match` | `any` | Tag matching mode: `any` / `all` / `any_strict` / `all_strict` |
|
||||
| `auto_recall` | `true` | Automatically recall memories before each turn |
|
||||
|
||||
### Retain
|
||||
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `auto_retain` | `true` | Automatically retain conversation turns |
|
||||
| `retain_async` | `true` | Process retain asynchronously on the Hindsight server |
|
||||
| `retain_every_n_turns` | `1` | Retain every N turns (1 = every turn) |
|
||||
| `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories |
|
||||
| `tags` | — | Tags applied when storing memories |
|
||||
|
||||
### Integration
|
||||
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `memory_mode` | `hybrid` | How memories are integrated into the agent |
|
||||
| `prefetch_method` | `recall` | Method for automatic context injection |
|
||||
|
||||
**memory_mode:**
|
||||
- `hybrid` — automatic context injection + tools available to the LLM
|
||||
- `context` — automatic injection only, no tools exposed
|
||||
- `tools` — tools only, no automatic injection
|
||||
|
||||
**prefetch_method:**
|
||||
- `recall` — injects raw memory facts (fast)
|
||||
- `reflect` — injects LLM-synthesized summary (slower, more coherent)
|
||||
|
||||
### Local Mode LLM
|
||||
### Local Embedded LLM
|
||||
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `llm_provider` | `openai` | LLM provider: `openai`, `anthropic`, `gemini`, `groq`, `minimax`, `ollama` |
|
||||
| `llm_model` | per-provider | Model name (e.g. `gpt-4o-mini`, `openai/gpt-oss-120b`) |
|
||||
| `llm_provider` | `openai` | `openai`, `anthropic`, `gemini`, `groq`, `openrouter`, `minimax`, `ollama`, `lmstudio`, `openai_compatible` |
|
||||
| `llm_model` | per-provider | Model name (e.g. `gpt-4o-mini`, `qwen/qwen3.5-9b`) |
|
||||
| `llm_base_url` | — | Endpoint URL for `openai_compatible` (e.g. `http://192.168.1.10:8080/v1`) |
|
||||
|
||||
The LLM API key is stored in `~/.hermes/.env` as `HINDSIGHT_LLM_API_KEY`.
|
||||
|
||||
@@ -92,7 +123,12 @@ Available in `hybrid` and `tools` memory modes:
|
||||
|----------|-------------|
|
||||
| `HINDSIGHT_API_KEY` | API key for Hindsight Cloud |
|
||||
| `HINDSIGHT_LLM_API_KEY` | LLM API key for local mode |
|
||||
| `HINDSIGHT_API_LLM_BASE_URL` | LLM Base URL for local mode (e.g. OpenRouter) |
|
||||
| `HINDSIGHT_API_URL` | Override API endpoint |
|
||||
| `HINDSIGHT_BANK_ID` | Override bank name |
|
||||
| `HINDSIGHT_BUDGET` | Override recall budget |
|
||||
| `HINDSIGHT_MODE` | Override mode (`cloud` / `local`) |
|
||||
| `HINDSIGHT_MODE` | Override mode (`cloud`, `local_embedded`, `local_external`) |
|
||||
|
||||
## Client Version
|
||||
|
||||
Requires `hindsight-client >= 0.4.22`. The plugin auto-upgrades on session start if an older version is detected.
|
||||
|
||||
@@ -28,21 +28,25 @@ from hermes_constants import get_hermes_home
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
from hermes_constants import get_hermes_home
|
||||
from tools.registry import tool_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
|
||||
_DEFAULT_LOCAL_URL = "http://localhost:8888"
|
||||
_MIN_CLIENT_VERSION = "0.4.22"
|
||||
_VALID_BUDGETS = {"low", "mid", "high"}
|
||||
_PROVIDER_DEFAULT_MODELS = {
|
||||
"openai": "gpt-4o-mini",
|
||||
"anthropic": "claude-haiku-4-5",
|
||||
"gemini": "gemini-2.5-flash",
|
||||
"groq": "openai/gpt-oss-120b",
|
||||
"openrouter": "qwen/qwen3.5-9b",
|
||||
"minimax": "MiniMax-M2.7",
|
||||
"ollama": "gemma3:12b",
|
||||
"lmstudio": "local-model",
|
||||
"openai_compatible": "your-model-name",
|
||||
}
|
||||
|
||||
|
||||
@@ -188,6 +192,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
self._bank_id = "hermes"
|
||||
self._budget = "mid"
|
||||
self._mode = "cloud"
|
||||
self._llm_base_url = ""
|
||||
self._memory_mode = "hybrid" # "context", "tools", or "hybrid"
|
||||
self._prefetch_method = "recall" # "recall" or "reflect"
|
||||
self._client = None
|
||||
@@ -195,6 +200,31 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
self._prefetch_lock = threading.Lock()
|
||||
self._prefetch_thread = None
|
||||
self._sync_thread = None
|
||||
self._session_id = ""
|
||||
|
||||
# Tags
|
||||
self._tags: list[str] | None = None
|
||||
self._recall_tags: list[str] | None = None
|
||||
self._recall_tags_match = "any"
|
||||
|
||||
# Retain controls
|
||||
self._auto_retain = True
|
||||
self._retain_every_n_turns = 1
|
||||
self._retain_context = "conversation between Hermes Agent and the User"
|
||||
self._turn_counter = 0
|
||||
self._session_turns: list[str] = [] # accumulates ALL turns for the session
|
||||
|
||||
# Recall controls
|
||||
self._auto_recall = True
|
||||
self._recall_max_tokens = 4096
|
||||
self._recall_types: list[str] | None = None
|
||||
self._recall_prompt_preamble = ""
|
||||
self._recall_max_input_chars = 800
|
||||
|
||||
# Bank
|
||||
self._bank_mission = ""
|
||||
self._bank_retain_mission: str | None = None
|
||||
self._retain_async = True
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
@@ -204,7 +234,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
try:
|
||||
cfg = _load_config()
|
||||
mode = cfg.get("mode", "cloud")
|
||||
if mode == "local":
|
||||
if mode in ("local", "local_embedded", "local_external"):
|
||||
return True
|
||||
has_key = bool(cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", ""))
|
||||
has_url = bool(cfg.get("api_url") or os.environ.get("HINDSIGHT_API_URL", ""))
|
||||
@@ -228,68 +258,306 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
existing.update(values)
|
||||
config_path.write_text(json.dumps(existing, indent=2))
|
||||
|
||||
def post_setup(self, hermes_home: str, config: dict) -> None:
|
||||
"""Custom setup wizard — installs only the deps needed for the selected mode."""
|
||||
import getpass
|
||||
import subprocess
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli.config import save_config
|
||||
|
||||
from hermes_cli.memory_setup import _curses_select
|
||||
|
||||
print("\n Configuring Hindsight memory:\n")
|
||||
|
||||
# Step 1: Mode selection
|
||||
mode_items = [
|
||||
("Cloud", "Hindsight Cloud API (lightweight, just needs an API key)"),
|
||||
("Local Embedded", "Run Hindsight locally (downloads ~200MB, needs LLM key)"),
|
||||
("Local External", "Connect to an existing Hindsight instance"),
|
||||
]
|
||||
mode_idx = _curses_select(" Select mode", mode_items, default=0)
|
||||
mode = ["cloud", "local_embedded", "local_external"][mode_idx]
|
||||
|
||||
provider_config: dict = {"mode": mode}
|
||||
env_writes: dict = {}
|
||||
|
||||
# Step 2: Install/upgrade deps for selected mode
|
||||
_MIN_CLIENT_VERSION = "0.4.22"
|
||||
cloud_dep = f"hindsight-client>={_MIN_CLIENT_VERSION}"
|
||||
local_dep = "hindsight-all"
|
||||
if mode == "local_embedded":
|
||||
deps_to_install = [local_dep]
|
||||
elif mode == "local_external":
|
||||
deps_to_install = [cloud_dep]
|
||||
else:
|
||||
deps_to_install = [cloud_dep]
|
||||
|
||||
print(f"\n Checking dependencies...")
|
||||
uv_path = shutil.which("uv")
|
||||
if not uv_path:
|
||||
print(" ⚠ uv not found — install it: curl -LsSf https://astral.sh/uv/install.sh | sh")
|
||||
print(f" Then run manually: uv pip install --python {sys.executable} {' '.join(deps_to_install)}")
|
||||
else:
|
||||
try:
|
||||
subprocess.run(
|
||||
[uv_path, "pip", "install", "--python", sys.executable, "--quiet", "--upgrade"] + deps_to_install,
|
||||
check=True, timeout=120, capture_output=True,
|
||||
)
|
||||
print(f" ✓ Dependencies up to date")
|
||||
except Exception as e:
|
||||
print(f" ⚠ Install failed: {e}")
|
||||
print(f" Run manually: uv pip install --python {sys.executable} {' '.join(deps_to_install)}")
|
||||
|
||||
# Step 3: Mode-specific config
|
||||
if mode == "cloud":
|
||||
print(f"\n Get your API key at https://ui.hindsight.vectorize.io\n")
|
||||
existing_key = os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
if existing_key:
|
||||
masked = f"...{existing_key[-4:]}" if len(existing_key) > 4 else "set"
|
||||
sys.stdout.write(f" API key (current: {masked}, blank to keep): ")
|
||||
sys.stdout.flush()
|
||||
api_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip()
|
||||
else:
|
||||
sys.stdout.write(" API key: ")
|
||||
sys.stdout.flush()
|
||||
api_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip()
|
||||
if api_key:
|
||||
env_writes["HINDSIGHT_API_KEY"] = api_key
|
||||
|
||||
val = input(f" API URL [{_DEFAULT_API_URL}]: ").strip()
|
||||
if val:
|
||||
provider_config["api_url"] = val
|
||||
|
||||
elif mode == "local_external":
|
||||
val = input(f" Hindsight API URL [{_DEFAULT_LOCAL_URL}]: ").strip()
|
||||
provider_config["api_url"] = val or _DEFAULT_LOCAL_URL
|
||||
|
||||
sys.stdout.write(" API key (optional, blank to skip): ")
|
||||
sys.stdout.flush()
|
||||
api_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip()
|
||||
if api_key:
|
||||
env_writes["HINDSIGHT_API_KEY"] = api_key
|
||||
|
||||
else: # local_embedded
|
||||
providers_list = list(_PROVIDER_DEFAULT_MODELS.keys())
|
||||
llm_items = [
|
||||
(p, f"default model: {_PROVIDER_DEFAULT_MODELS[p]}")
|
||||
for p in providers_list
|
||||
]
|
||||
llm_idx = _curses_select(" Select LLM provider", llm_items, default=0)
|
||||
llm_provider = providers_list[llm_idx]
|
||||
|
||||
provider_config["llm_provider"] = llm_provider
|
||||
|
||||
if llm_provider == "openai_compatible":
|
||||
val = input(" LLM endpoint URL (e.g. http://192.168.1.10:8080/v1): ").strip()
|
||||
if val:
|
||||
provider_config["llm_base_url"] = val
|
||||
elif llm_provider == "openrouter":
|
||||
provider_config["llm_base_url"] = "https://openrouter.ai/api/v1"
|
||||
|
||||
default_model = _PROVIDER_DEFAULT_MODELS.get(llm_provider, "gpt-4o-mini")
|
||||
val = input(f" LLM model [{default_model}]: ").strip()
|
||||
provider_config["llm_model"] = val or default_model
|
||||
|
||||
sys.stdout.write(" LLM API key: ")
|
||||
sys.stdout.flush()
|
||||
llm_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip()
|
||||
if llm_key:
|
||||
env_writes["HINDSIGHT_LLM_API_KEY"] = llm_key
|
||||
|
||||
# Step 4: Save everything
|
||||
provider_config["bank_id"] = "hermes"
|
||||
provider_config["recall_budget"] = "mid"
|
||||
bank_id = "hermes"
|
||||
config["memory"]["provider"] = "hindsight"
|
||||
save_config(config)
|
||||
|
||||
self.save_config(provider_config, hermes_home)
|
||||
|
||||
if env_writes:
|
||||
env_path = Path(hermes_home) / ".env"
|
||||
env_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
existing_lines = []
|
||||
if env_path.exists():
|
||||
existing_lines = env_path.read_text().splitlines()
|
||||
updated_keys = set()
|
||||
new_lines = []
|
||||
for line in existing_lines:
|
||||
key_match = line.split("=", 1)[0].strip() if "=" in line and not line.startswith("#") else None
|
||||
if key_match and key_match in env_writes:
|
||||
new_lines.append(f"{key_match}={env_writes[key_match]}")
|
||||
updated_keys.add(key_match)
|
||||
else:
|
||||
new_lines.append(line)
|
||||
for k, v in env_writes.items():
|
||||
if k not in updated_keys:
|
||||
new_lines.append(f"{k}={v}")
|
||||
env_path.write_text("\n".join(new_lines) + "\n")
|
||||
|
||||
print(f"\n ✓ Hindsight memory configured ({mode} mode)")
|
||||
if env_writes:
|
||||
print(f" API keys saved to .env")
|
||||
print(f"\n Start a new session to activate.\n")
|
||||
|
||||
def get_config_schema(self):
|
||||
return [
|
||||
{"key": "mode", "description": "Cloud API or local embedded mode", "default": "cloud", "choices": ["cloud", "local"]},
|
||||
{"key": "api_url", "description": "Hindsight API URL", "default": _DEFAULT_API_URL, "when": {"mode": "cloud"}},
|
||||
{"key": "mode", "description": "Connection mode", "default": "cloud", "choices": ["cloud", "local_embedded", "local_external"]},
|
||||
# Cloud mode
|
||||
{"key": "api_url", "description": "Hindsight Cloud API URL", "default": _DEFAULT_API_URL, "when": {"mode": "cloud"}},
|
||||
{"key": "api_key", "description": "Hindsight Cloud API key", "secret": True, "env_var": "HINDSIGHT_API_KEY", "url": "https://ui.hindsight.vectorize.io", "when": {"mode": "cloud"}},
|
||||
{"key": "llm_provider", "description": "LLM provider for local mode", "default": "openai", "choices": ["openai", "anthropic", "gemini", "groq", "minimax", "ollama"], "when": {"mode": "local"}},
|
||||
{"key": "llm_api_key", "description": "LLM API key for local Hindsight", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local"}},
|
||||
{"key": "llm_model", "description": "LLM model for local mode", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local"}},
|
||||
# Local external mode
|
||||
{"key": "api_url", "description": "Hindsight API URL", "default": _DEFAULT_LOCAL_URL, "when": {"mode": "local_external"}},
|
||||
{"key": "api_key", "description": "API key (optional)", "secret": True, "env_var": "HINDSIGHT_API_KEY", "when": {"mode": "local_external"}},
|
||||
# Local embedded mode
|
||||
{"key": "llm_provider", "description": "LLM provider", "default": "openai", "choices": ["openai", "anthropic", "gemini", "groq", "openrouter", "minimax", "ollama", "lmstudio", "openai_compatible"], "when": {"mode": "local_embedded"}},
|
||||
{"key": "llm_base_url", "description": "Endpoint URL (e.g. http://192.168.1.10:8080/v1)", "default": "", "when": {"mode": "local_embedded", "llm_provider": "openai_compatible"}},
|
||||
{"key": "llm_api_key", "description": "LLM API key (optional for openai_compatible)", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local_embedded"}},
|
||||
{"key": "llm_model", "description": "LLM model", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local_embedded"}},
|
||||
{"key": "bank_id", "description": "Memory bank name", "default": "hermes"},
|
||||
{"key": "budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
|
||||
{"key": "bank_mission", "description": "Mission/purpose description for the memory bank"},
|
||||
{"key": "bank_retain_mission", "description": "Custom extraction prompt for memory retention"},
|
||||
{"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
|
||||
{"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
|
||||
{"key": "prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
|
||||
{"key": "recall_prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
|
||||
{"key": "tags", "description": "Tags applied when storing memories (comma-separated)", "default": ""},
|
||||
{"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
|
||||
{"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
|
||||
{"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
|
||||
{"key": "auto_retain", "description": "Automatically retain conversation turns", "default": True},
|
||||
{"key": "retain_every_n_turns", "description": "Retain every N turns (1 = every turn)", "default": 1},
|
||||
{"key": "retain_async","description": "Process retain asynchronously on the Hindsight server", "default": True},
|
||||
{"key": "retain_context", "description": "Context label for retained memories", "default": "conversation between Hermes Agent and the User"},
|
||||
{"key": "recall_max_tokens", "description": "Maximum tokens for recall results", "default": 4096},
|
||||
{"key": "recall_max_input_chars", "description": "Maximum input query length for auto-recall", "default": 800},
|
||||
{"key": "recall_prompt_preamble", "description": "Custom preamble for recalled memories in context"},
|
||||
]
|
||||
|
||||
def _get_client(self):
|
||||
"""Return the cached Hindsight client (created once, reused)."""
|
||||
if self._client is None:
|
||||
if self._mode == "local":
|
||||
if self._mode == "local_embedded":
|
||||
from hindsight import HindsightEmbedded
|
||||
# Disable __del__ on the class to prevent "attached to a
|
||||
# different loop" errors during GC — we handle cleanup in
|
||||
# shutdown() instead.
|
||||
HindsightEmbedded.__del__ = lambda self: None
|
||||
self._client = HindsightEmbedded(
|
||||
llm_provider = self._config.get("llm_provider", "")
|
||||
if llm_provider in ("openai_compatible", "openrouter"):
|
||||
llm_provider = "openai"
|
||||
logger.debug("Creating HindsightEmbedded client (profile=%s, provider=%s)",
|
||||
self._config.get("profile", "hermes"), llm_provider)
|
||||
kwargs = dict(
|
||||
profile=self._config.get("profile", "hermes"),
|
||||
llm_provider=self._config.get("llm_provider", ""),
|
||||
llm_api_key=self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", ""),
|
||||
llm_provider=llm_provider,
|
||||
llm_api_key=self._config.get("llmApiKey") or self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", ""),
|
||||
llm_model=self._config.get("llm_model", ""),
|
||||
)
|
||||
if self._llm_base_url:
|
||||
kwargs["llm_base_url"] = self._llm_base_url
|
||||
self._client = HindsightEmbedded(**kwargs)
|
||||
else:
|
||||
from hindsight_client import Hindsight
|
||||
kwargs = {"base_url": self._api_url, "timeout": 30.0}
|
||||
if self._api_key:
|
||||
kwargs["api_key"] = self._api_key
|
||||
logger.debug("Creating Hindsight cloud client (url=%s, has_key=%s)",
|
||||
self._api_url, bool(self._api_key))
|
||||
self._client = Hindsight(**kwargs)
|
||||
return self._client
|
||||
|
||||
def initialize(self, session_id: str, **kwargs) -> None:
|
||||
self._session_id = session_id
|
||||
|
||||
# Check client version and auto-upgrade if needed
|
||||
try:
|
||||
from importlib.metadata import version as pkg_version
|
||||
from packaging.version import Version
|
||||
installed = pkg_version("hindsight-client")
|
||||
if Version(installed) < Version(_MIN_CLIENT_VERSION):
|
||||
logger.warning("hindsight-client %s is outdated (need >=%s), attempting upgrade...",
|
||||
installed, _MIN_CLIENT_VERSION)
|
||||
import shutil, subprocess, sys
|
||||
uv_path = shutil.which("uv")
|
||||
if uv_path:
|
||||
try:
|
||||
subprocess.run(
|
||||
[uv_path, "pip", "install", "--python", sys.executable,
|
||||
"--quiet", "--upgrade", f"hindsight-client>={_MIN_CLIENT_VERSION}"],
|
||||
check=True, timeout=120, capture_output=True,
|
||||
)
|
||||
logger.info("hindsight-client upgraded to >=%s", _MIN_CLIENT_VERSION)
|
||||
except Exception as e:
|
||||
logger.warning("Auto-upgrade failed: %s. Run: uv pip install 'hindsight-client>=%s'",
|
||||
e, _MIN_CLIENT_VERSION)
|
||||
else:
|
||||
logger.warning("uv not found. Run: pip install 'hindsight-client>=%s'", _MIN_CLIENT_VERSION)
|
||||
except Exception:
|
||||
pass # packaging not available or other issue — proceed anyway
|
||||
|
||||
self._config = _load_config()
|
||||
self._mode = self._config.get("mode", "cloud")
|
||||
self._api_key = self._config.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
default_url = _DEFAULT_LOCAL_URL if self._mode == "local" else _DEFAULT_API_URL
|
||||
# "local" is a legacy alias for "local_embedded"
|
||||
if self._mode == "local":
|
||||
self._mode = "local_embedded"
|
||||
self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL
|
||||
self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
|
||||
self._llm_base_url = self._config.get("llm_base_url", "")
|
||||
|
||||
banks = self._config.get("banks", {}).get("hermes", {})
|
||||
self._bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
|
||||
budget = self._config.get("budget") or banks.get("budget", "mid")
|
||||
budget = self._config.get("recall_budget") or self._config.get("budget") or banks.get("budget", "mid")
|
||||
self._budget = budget if budget in _VALID_BUDGETS else "mid"
|
||||
|
||||
memory_mode = self._config.get("memory_mode", "hybrid")
|
||||
self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
|
||||
|
||||
prefetch_method = self._config.get("prefetch_method", "recall")
|
||||
prefetch_method = self._config.get("recall_prefetch_method", "recall")
|
||||
self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
|
||||
|
||||
logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s",
|
||||
self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method)
|
||||
# Bank options
|
||||
self._bank_mission = self._config.get("bank_mission", "")
|
||||
self._bank_retain_mission = self._config.get("bank_retain_mission") or None
|
||||
|
||||
# Tags
|
||||
self._tags = self._config.get("tags") or None
|
||||
self._recall_tags = self._config.get("recall_tags") or None
|
||||
self._recall_tags_match = self._config.get("recall_tags_match", "any")
|
||||
|
||||
# Retain controls
|
||||
self._auto_retain = self._config.get("auto_retain", True)
|
||||
self._retain_every_n_turns = max(1, int(self._config.get("retain_every_n_turns", 1)))
|
||||
self._retain_context = self._config.get("retain_context", "conversation between Hermes Agent and the User")
|
||||
|
||||
# Recall controls
|
||||
self._auto_recall = self._config.get("auto_recall", True)
|
||||
self._recall_max_tokens = int(self._config.get("recall_max_tokens", 4096))
|
||||
self._recall_types = self._config.get("recall_types") or None
|
||||
self._recall_prompt_preamble = self._config.get("recall_prompt_preamble", "")
|
||||
self._recall_max_input_chars = int(self._config.get("recall_max_input_chars", 800))
|
||||
self._retain_async = self._config.get("retain_async", True)
|
||||
|
||||
_client_version = "unknown"
|
||||
try:
|
||||
from importlib.metadata import version as pkg_version
|
||||
_client_version = pkg_version("hindsight-client")
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s",
|
||||
self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version)
|
||||
logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, "
|
||||
"retain_async=%s, retain_context=%s, "
|
||||
"recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
|
||||
self._auto_retain, self._auto_recall, self._retain_every_n_turns,
|
||||
self._retain_async, self._retain_context,
|
||||
self._recall_max_tokens, self._recall_max_input_chars,
|
||||
self._tags, self._recall_tags)
|
||||
|
||||
# For local mode, start the embedded daemon in the background so it
|
||||
# doesn't block the chat. Redirect stdout/stderr to a log file to
|
||||
# prevent rich startup output from spamming the terminal.
|
||||
if self._mode == "local":
|
||||
if self._mode == "local_embedded":
|
||||
def _start_daemon():
|
||||
import traceback
|
||||
log_dir = get_hermes_home() / "logs"
|
||||
@@ -311,9 +579,12 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
# If the config changed and the daemon is running, stop it.
|
||||
from pathlib import Path as _Path
|
||||
profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env"
|
||||
current_key = self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
|
||||
current_key = self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
|
||||
current_provider = self._config.get("llm_provider", "")
|
||||
current_model = self._config.get("llm_model", "")
|
||||
current_base_url = self._config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
|
||||
# Map openai_compatible/openrouter → openai for the daemon (OpenAI wire format)
|
||||
daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider
|
||||
|
||||
# Read saved profile config
|
||||
saved = {}
|
||||
@@ -324,20 +595,24 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
saved[k.strip()] = v.strip()
|
||||
|
||||
config_changed = (
|
||||
saved.get("HINDSIGHT_API_LLM_PROVIDER") != current_provider or
|
||||
saved.get("HINDSIGHT_API_LLM_PROVIDER") != daemon_provider or
|
||||
saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or
|
||||
saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key
|
||||
saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key or
|
||||
saved.get("HINDSIGHT_API_LLM_BASE_URL", "") != current_base_url
|
||||
)
|
||||
|
||||
if config_changed:
|
||||
# Write updated profile .env
|
||||
profile_env.parent.mkdir(parents=True, exist_ok=True)
|
||||
profile_env.write_text(
|
||||
f"HINDSIGHT_API_LLM_PROVIDER={current_provider}\n"
|
||||
env_lines = (
|
||||
f"HINDSIGHT_API_LLM_PROVIDER={daemon_provider}\n"
|
||||
f"HINDSIGHT_API_LLM_API_KEY={current_key}\n"
|
||||
f"HINDSIGHT_API_LLM_MODEL={current_model}\n"
|
||||
f"HINDSIGHT_API_LOG_LEVEL=info\n"
|
||||
)
|
||||
if current_base_url:
|
||||
env_lines += f"HINDSIGHT_API_LLM_BASE_URL={current_base_url}\n"
|
||||
profile_env.write_text(env_lines)
|
||||
if client._manager.is_running(profile):
|
||||
with open(log_path, "a") as f:
|
||||
f.write("\n=== Config changed, restarting daemon ===\n")
|
||||
@@ -378,47 +653,118 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
|
||||
def prefetch(self, query: str, *, session_id: str = "") -> str:
|
||||
if self._prefetch_thread and self._prefetch_thread.is_alive():
|
||||
logger.debug("Prefetch: waiting for background thread to complete")
|
||||
self._prefetch_thread.join(timeout=3.0)
|
||||
with self._prefetch_lock:
|
||||
result = self._prefetch_result
|
||||
self._prefetch_result = ""
|
||||
if not result:
|
||||
logger.debug("Prefetch: no results available")
|
||||
return ""
|
||||
return f"## Hindsight Memory\n{result}"
|
||||
logger.debug("Prefetch: returning %d chars of context", len(result))
|
||||
header = self._recall_prompt_preamble or (
|
||||
"# Hindsight Memory (persistent cross-session context)\n"
|
||||
"Use this to answer questions about the user and prior sessions. "
|
||||
"Do not call tools to look up information that is already present here."
|
||||
)
|
||||
return f"{header}\n\n{result}"
|
||||
|
||||
def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
|
||||
if self._memory_mode == "tools":
|
||||
logger.debug("Prefetch: skipped (tools-only mode)")
|
||||
return
|
||||
if not self._auto_recall:
|
||||
logger.debug("Prefetch: skipped (auto_recall disabled)")
|
||||
return
|
||||
# Truncate query to max chars
|
||||
if self._recall_max_input_chars and len(query) > self._recall_max_input_chars:
|
||||
query = query[:self._recall_max_input_chars]
|
||||
|
||||
def _run():
|
||||
try:
|
||||
client = self._get_client()
|
||||
if self._prefetch_method == "reflect":
|
||||
logger.debug("Prefetch: calling reflect (bank=%s, query_len=%d)", self._bank_id, len(query))
|
||||
resp = _run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget))
|
||||
text = resp.text or ""
|
||||
else:
|
||||
resp = _run_sync(client.arecall(bank_id=self._bank_id, query=query, budget=self._budget))
|
||||
text = "\n".join(r.text for r in resp.results if r.text) if resp.results else ""
|
||||
recall_kwargs: dict = {
|
||||
"bank_id": self._bank_id, "query": query,
|
||||
"budget": self._budget, "max_tokens": self._recall_max_tokens,
|
||||
}
|
||||
if self._recall_tags:
|
||||
recall_kwargs["tags"] = self._recall_tags
|
||||
recall_kwargs["tags_match"] = self._recall_tags_match
|
||||
if self._recall_types:
|
||||
recall_kwargs["types"] = self._recall_types
|
||||
logger.debug("Prefetch: calling recall (bank=%s, query_len=%d, budget=%s)",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.arecall(**recall_kwargs))
|
||||
num_results = len(resp.results) if resp.results else 0
|
||||
logger.debug("Prefetch: recall returned %d results", num_results)
|
||||
text = "\n".join(f"- {r.text}" for r in resp.results if r.text) if resp.results else ""
|
||||
if text:
|
||||
with self._prefetch_lock:
|
||||
self._prefetch_result = text
|
||||
except Exception as e:
|
||||
logger.debug("Hindsight prefetch failed: %s", e)
|
||||
logger.debug("Hindsight prefetch failed: %s", e, exc_info=True)
|
||||
|
||||
self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
|
||||
self._prefetch_thread.start()
|
||||
|
||||
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
"""Retain conversation turn in background (non-blocking)."""
|
||||
combined = f"User: {user_content}\nAssistant: {assistant_content}"
|
||||
"""Retain conversation turn in background (non-blocking).
|
||||
|
||||
Respects retain_every_n_turns for batching.
|
||||
"""
|
||||
if not self._auto_retain:
|
||||
logger.debug("sync_turn: skipped (auto_retain disabled)")
|
||||
return
|
||||
|
||||
from datetime import datetime, timezone
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": user_content, "timestamp": now},
|
||||
{"role": "assistant", "content": assistant_content, "timestamp": now},
|
||||
]
|
||||
|
||||
turn = json.dumps(messages)
|
||||
self._session_turns.append(turn)
|
||||
self._turn_counter += 1
|
||||
|
||||
# Only retain every N turns
|
||||
if self._turn_counter % self._retain_every_n_turns != 0:
|
||||
logger.debug("sync_turn: buffered turn %d (will retain at turn %d)",
|
||||
self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns))
|
||||
return
|
||||
|
||||
logger.debug("sync_turn: retaining %d turns, total session content %d chars",
|
||||
len(self._session_turns), sum(len(t) for t in self._session_turns))
|
||||
# Send the ENTIRE session as a single JSON array (document_id deduplicates).
|
||||
# Each element in _session_turns is a JSON string of that turn's messages.
|
||||
content = "[" + ",".join(self._session_turns) + "]"
|
||||
|
||||
def _sync():
|
||||
try:
|
||||
client = self._get_client()
|
||||
_run_sync(client.aretain(
|
||||
bank_id=self._bank_id, content=combined, context="conversation"
|
||||
item: dict = {
|
||||
"content": content,
|
||||
"context": self._retain_context,
|
||||
}
|
||||
if self._tags:
|
||||
item["tags"] = self._tags
|
||||
logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
|
||||
self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns))
|
||||
_run_sync(client.aretain_batch(
|
||||
bank_id=self._bank_id,
|
||||
items=[item],
|
||||
document_id=self._session_id,
|
||||
retain_async=self._retain_async,
|
||||
))
|
||||
logger.debug("Hindsight retain succeeded")
|
||||
except Exception as e:
|
||||
logger.warning("Hindsight sync failed: %s", e)
|
||||
logger.warning("Hindsight sync failed: %s", e, exc_info=True)
|
||||
|
||||
if self._sync_thread and self._sync_thread.is_alive():
|
||||
self._sync_thread.join(timeout=5.0)
|
||||
@@ -443,12 +789,18 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
return tool_error("Missing required parameter: content")
|
||||
context = args.get("context")
|
||||
try:
|
||||
_run_sync(client.aretain(
|
||||
bank_id=self._bank_id, content=content, context=context
|
||||
))
|
||||
retain_kwargs: dict = {
|
||||
"bank_id": self._bank_id, "content": content, "context": context,
|
||||
}
|
||||
if self._tags:
|
||||
retain_kwargs["tags"] = self._tags
|
||||
logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s",
|
||||
self._bank_id, len(content), context)
|
||||
_run_sync(client.aretain(**retain_kwargs))
|
||||
logger.debug("Tool hindsight_retain: success")
|
||||
return json.dumps({"result": "Memory stored successfully."})
|
||||
except Exception as e:
|
||||
logger.warning("hindsight_retain failed: %s", e)
|
||||
logger.warning("hindsight_retain failed: %s", e, exc_info=True)
|
||||
return tool_error(f"Failed to store memory: {e}")
|
||||
|
||||
elif tool_name == "hindsight_recall":
|
||||
@@ -456,15 +808,26 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
if not query:
|
||||
return tool_error("Missing required parameter: query")
|
||||
try:
|
||||
resp = _run_sync(client.arecall(
|
||||
bank_id=self._bank_id, query=query, budget=self._budget
|
||||
))
|
||||
recall_kwargs: dict = {
|
||||
"bank_id": self._bank_id, "query": query, "budget": self._budget,
|
||||
"max_tokens": self._recall_max_tokens,
|
||||
}
|
||||
if self._recall_tags:
|
||||
recall_kwargs["tags"] = self._recall_tags
|
||||
recall_kwargs["tags_match"] = self._recall_tags_match
|
||||
if self._recall_types:
|
||||
recall_kwargs["types"] = self._recall_types
|
||||
logger.debug("Tool hindsight_recall: bank=%s, query_len=%d, budget=%s",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.arecall(**recall_kwargs))
|
||||
num_results = len(resp.results) if resp.results else 0
|
||||
logger.debug("Tool hindsight_recall: %d results", num_results)
|
||||
if not resp.results:
|
||||
return json.dumps({"result": "No relevant memories found."})
|
||||
lines = [f"{i}. {r.text}" for i, r in enumerate(resp.results, 1)]
|
||||
return json.dumps({"result": "\n".join(lines)})
|
||||
except Exception as e:
|
||||
logger.warning("hindsight_recall failed: %s", e)
|
||||
logger.warning("hindsight_recall failed: %s", e, exc_info=True)
|
||||
return tool_error(f"Failed to search memory: {e}")
|
||||
|
||||
elif tool_name == "hindsight_reflect":
|
||||
@@ -472,24 +835,28 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
if not query:
|
||||
return tool_error("Missing required parameter: query")
|
||||
try:
|
||||
logger.debug("Tool hindsight_reflect: bank=%s, query_len=%d, budget=%s",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.areflect(
|
||||
bank_id=self._bank_id, query=query, budget=self._budget
|
||||
))
|
||||
logger.debug("Tool hindsight_reflect: response_len=%d", len(resp.text or ""))
|
||||
return json.dumps({"result": resp.text or "No relevant memories found."})
|
||||
except Exception as e:
|
||||
logger.warning("hindsight_reflect failed: %s", e)
|
||||
logger.warning("hindsight_reflect failed: %s", e, exc_info=True)
|
||||
return tool_error(f"Failed to reflect: {e}")
|
||||
|
||||
return tool_error(f"Unknown tool: {tool_name}")
|
||||
|
||||
def shutdown(self) -> None:
|
||||
logger.debug("Hindsight shutdown: waiting for background threads")
|
||||
global _loop, _loop_thread
|
||||
for t in (self._prefetch_thread, self._sync_thread):
|
||||
if t and t.is_alive():
|
||||
t.join(timeout=5.0)
|
||||
if self._client is not None:
|
||||
try:
|
||||
if self._mode == "local":
|
||||
if self._mode == "local_embedded":
|
||||
# Use the public close() API. The RuntimeError from
|
||||
# aiohttp's "attached to a different loop" is expected
|
||||
# and harmless — the daemon keeps running independently.
|
||||
|
||||
@@ -2,9 +2,7 @@ name: hindsight
|
||||
version: 1.0.0
|
||||
description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
|
||||
pip_dependencies:
|
||||
- hindsight-client
|
||||
- hindsight-all
|
||||
requires_env:
|
||||
- HINDSIGHT_API_KEY
|
||||
- "hindsight-client>=0.4.22"
|
||||
requires_env: []
|
||||
hooks:
|
||||
- on_session_end
|
||||
|
||||
@@ -62,6 +62,7 @@ mcp = ["mcp>=1.2.0,<2"]
|
||||
homeassistant = ["aiohttp>=3.9.0,<4"]
|
||||
sms = ["aiohttp>=3.9.0,<4"]
|
||||
acp = ["agent-client-protocol>=0.9.0,<1.0"]
|
||||
mistral = ["mistralai>=2.3.0,<3"]
|
||||
dingtalk = ["dingtalk-stream>=0.1.0,<1"]
|
||||
feishu = ["lark-oapi>=1.5.3,<2"]
|
||||
rl = [
|
||||
@@ -94,6 +95,7 @@ all = [
|
||||
"hermes-agent[voice]",
|
||||
"hermes-agent[dingtalk]",
|
||||
"hermes-agent[feishu]",
|
||||
"hermes-agent[mistral]",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
+443
-152
@@ -66,7 +66,7 @@ from model_tools import (
|
||||
handle_function_call,
|
||||
check_toolset_requirements,
|
||||
)
|
||||
from tools.terminal_tool import cleanup_vm, get_active_env
|
||||
from tools.terminal_tool import cleanup_vm, get_active_env, is_persistent_env
|
||||
from tools.tool_result_storage import maybe_persist_tool_result, enforce_turn_budget
|
||||
from tools.interrupt import set_interrupt as _set_interrupt
|
||||
from tools.browser_tool import cleanup_browser
|
||||
@@ -77,6 +77,7 @@ from hermes_constants import OPENROUTER_BASE_URL
|
||||
# Agent internals extracted to agent/ package for modularity
|
||||
from agent.memory_manager import build_memory_context_block
|
||||
from agent.retry_utils import jittered_backoff
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||
@@ -86,6 +87,7 @@ from agent.model_metadata import (
|
||||
fetch_model_metadata,
|
||||
estimate_tokens_rough, estimate_messages_tokens_rough, estimate_request_tokens_rough,
|
||||
get_next_probe_tier, parse_context_limit_from_error,
|
||||
parse_available_output_tokens_from_error,
|
||||
save_context_length, is_local_endpoint,
|
||||
query_ollama_num_ctx,
|
||||
)
|
||||
@@ -413,6 +415,27 @@ def _strip_budget_warnings_from_history(messages: list) -> None:
|
||||
# =========================================================================
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Qwen Portal headers — mimics QwenCode CLI for portal.qwen.ai compatibility.
|
||||
# Extracted as a module-level helper so both __init__ and
|
||||
# _apply_client_headers_for_base_url can share it.
|
||||
# =========================================================================
|
||||
_QWEN_CODE_VERSION = "0.14.1"
|
||||
|
||||
|
||||
def _qwen_portal_headers() -> dict:
|
||||
"""Return default HTTP headers required by Qwen Portal API."""
|
||||
import platform as _plat
|
||||
|
||||
_ua = f"QwenCode/{_QWEN_CODE_VERSION} ({_plat.system().lower()}; {_plat.machine()})"
|
||||
return {
|
||||
"User-Agent": _ua,
|
||||
"X-DashScope-CacheControl": "enable",
|
||||
"X-DashScope-UserAgent": _ua,
|
||||
"X-DashScope-AuthType": "qwen-oauth",
|
||||
}
|
||||
|
||||
|
||||
class AIAgent:
|
||||
"""
|
||||
AI Agent with tool calling capabilities.
|
||||
@@ -421,6 +444,13 @@ class AIAgent:
|
||||
for AI models that support function calling.
|
||||
"""
|
||||
|
||||
# ── Class-level context pressure dedup (survives across instances) ──
|
||||
# The gateway creates a new AIAgent per message, so instance-level flags
|
||||
# reset every time. This dict tracks {session_id: (warn_level, timestamp)}
|
||||
# to suppress duplicate warnings within a cooldown window.
|
||||
_context_pressure_last_warned: dict = {}
|
||||
_CONTEXT_PRESSURE_COOLDOWN = 300 # seconds between re-warning same session
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return self._base_url
|
||||
@@ -652,7 +682,8 @@ class AIAgent:
|
||||
# Context pressure warnings: notify the USER (not the LLM) as context
|
||||
# fills up. Purely informational — displayed in CLI output and sent via
|
||||
# status_callback for gateway platforms. Does NOT inject into messages.
|
||||
self._context_pressure_warned = False
|
||||
# Tiered: fires at 85% and again at 95% of compaction threshold.
|
||||
self._context_pressure_warned_at = 0.0 # highest tier already shown
|
||||
|
||||
# Activity tracking — updated on each API call, tool execution, and
|
||||
# stream chunk. Used by the gateway timeout handler to report what the
|
||||
@@ -663,6 +694,10 @@ class AIAgent:
|
||||
self._current_tool: str | None = None
|
||||
self._api_call_count: int = 0
|
||||
|
||||
# Rate limit tracking — updated from x-ratelimit-* response headers
|
||||
# after each API call. Accessed by /usage slash command.
|
||||
self._rate_limit_state: Optional["RateLimitState"] = None
|
||||
|
||||
# Centralized logging — agent.log (INFO+) and errors.log (WARNING+)
|
||||
# both live under ~/.hermes/logs/. Idempotent, so gateway mode
|
||||
# (which creates a new AIAgent per message) won't duplicate handlers.
|
||||
@@ -756,6 +791,8 @@ class AIAgent:
|
||||
client_kwargs["default_headers"] = {
|
||||
"User-Agent": "KimiCLI/1.3",
|
||||
}
|
||||
elif "portal.qwen.ai" in effective_base.lower():
|
||||
client_kwargs["default_headers"] = _qwen_portal_headers()
|
||||
else:
|
||||
# No explicit creds — use the centralized provider router
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
@@ -1664,9 +1701,25 @@ class AIAgent:
|
||||
return None
|
||||
|
||||
def _cleanup_task_resources(self, task_id: str) -> None:
|
||||
"""Clean up VM and browser resources for a given task."""
|
||||
"""Clean up VM and browser resources for a given task.
|
||||
|
||||
Skips ``cleanup_vm`` when the active terminal environment is marked
|
||||
persistent (``persistent_filesystem=True``) so that long-lived sandbox
|
||||
containers survive between turns. The idle reaper in
|
||||
``terminal_tool._cleanup_inactive_envs`` still tears them down once
|
||||
``terminal.lifetime_seconds`` is exceeded. Non-persistent backends are
|
||||
torn down per-turn as before to prevent resource leakage (the original
|
||||
intent of this hook for the Morph backend, see commit fbd3a2fd).
|
||||
"""
|
||||
try:
|
||||
cleanup_vm(task_id)
|
||||
if is_persistent_env(task_id):
|
||||
if self.verbose_logging:
|
||||
logging.debug(
|
||||
f"Skipping per-turn cleanup_vm for persistent env {task_id}; "
|
||||
f"idle reaper will handle it."
|
||||
)
|
||||
else:
|
||||
cleanup_vm(task_id)
|
||||
except Exception as e:
|
||||
if self.verbose_logging:
|
||||
logging.warning(f"Failed to cleanup VM for task {task_id}: {e}")
|
||||
@@ -2498,6 +2551,29 @@ class AIAgent:
|
||||
self._last_activity_ts = time.time()
|
||||
self._last_activity_desc = desc
|
||||
|
||||
def _capture_rate_limits(self, http_response: Any) -> None:
|
||||
"""Parse x-ratelimit-* headers from an HTTP response and cache the state.
|
||||
|
||||
Called after each streaming API call. The httpx Response object is
|
||||
available on the OpenAI SDK Stream via ``stream.response``.
|
||||
"""
|
||||
if http_response is None:
|
||||
return
|
||||
headers = getattr(http_response, "headers", None)
|
||||
if not headers:
|
||||
return
|
||||
try:
|
||||
from agent.rate_limit_tracker import parse_rate_limit_headers
|
||||
state = parse_rate_limit_headers(headers, provider=self.provider)
|
||||
if state is not None:
|
||||
self._rate_limit_state = state
|
||||
except Exception:
|
||||
pass # Never let header parsing break the agent loop
|
||||
|
||||
def get_rate_limit_state(self):
|
||||
"""Return the last captured RateLimitState, or None."""
|
||||
return self._rate_limit_state
|
||||
|
||||
def get_activity_summary(self) -> dict:
|
||||
"""Return a snapshot of the agent's current activity for diagnostics.
|
||||
|
||||
@@ -4080,6 +4156,8 @@ class AIAgent:
|
||||
self._client_kwargs["default_headers"] = copilot_default_headers()
|
||||
elif "api.kimi.com" in normalized:
|
||||
self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
elif "portal.qwen.ai" in normalized:
|
||||
self._client_kwargs["default_headers"] = _qwen_portal_headers()
|
||||
else:
|
||||
self._client_kwargs.pop("default_headers", None)
|
||||
|
||||
@@ -4350,6 +4428,11 @@ class AIAgent:
|
||||
self._touch_activity("waiting for provider response (streaming)")
|
||||
stream = request_client_holder["client"].chat.completions.create(**stream_kwargs)
|
||||
|
||||
# Capture rate limit headers from the initial HTTP response.
|
||||
# The OpenAI SDK Stream object exposes the underlying httpx
|
||||
# response via .response before any chunks are consumed.
|
||||
self._capture_rate_limits(getattr(stream, "response", None))
|
||||
|
||||
content_parts: list = []
|
||||
tool_calls_acc: dict = {}
|
||||
tool_gen_notified: set = set()
|
||||
@@ -4703,18 +4786,25 @@ class AIAgent:
|
||||
self._close_request_openai_client(request_client, reason="stream_request_complete")
|
||||
|
||||
_stream_stale_timeout_base = float(os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0))
|
||||
# Scale the stale timeout for large contexts: slow models (like Opus)
|
||||
# can legitimately think for minutes before producing the first token
|
||||
# when the context is large. Without this, the stale detector kills
|
||||
# healthy connections during the model's thinking phase, producing
|
||||
# spurious RemoteProtocolError ("peer closed connection").
|
||||
_est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
|
||||
if _est_tokens > 100_000:
|
||||
_stream_stale_timeout = max(_stream_stale_timeout_base, 300.0)
|
||||
elif _est_tokens > 50_000:
|
||||
_stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
|
||||
# Local providers (Ollama, oMLX, llama-cpp) can take 300+ seconds
|
||||
# for prefill on large contexts. Disable the stale detector unless
|
||||
# the user explicitly set HERMES_STREAM_STALE_TIMEOUT.
|
||||
if _stream_stale_timeout_base == 180.0 and self.base_url and is_local_endpoint(self.base_url):
|
||||
_stream_stale_timeout = float("inf")
|
||||
logger.debug("Local provider detected (%s) — stale stream timeout disabled", self.base_url)
|
||||
else:
|
||||
_stream_stale_timeout = _stream_stale_timeout_base
|
||||
# Scale the stale timeout for large contexts: slow models (like Opus)
|
||||
# can legitimately think for minutes before producing the first token
|
||||
# when the context is large. Without this, the stale detector kills
|
||||
# healthy connections during the model's thinking phase, producing
|
||||
# spurious RemoteProtocolError ("peer closed connection").
|
||||
_est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
|
||||
if _est_tokens > 100_000:
|
||||
_stream_stale_timeout = max(_stream_stale_timeout_base, 300.0)
|
||||
elif _est_tokens > 50_000:
|
||||
_stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
|
||||
else:
|
||||
_stream_stale_timeout = _stream_stale_timeout_base
|
||||
|
||||
t = threading.Thread(target=_call, daemon=True)
|
||||
t.start()
|
||||
@@ -4870,7 +4960,7 @@ class AIAgent:
|
||||
effective_key = (fb_client.api_key or resolve_anthropic_token() or "") if fb_provider == "anthropic" else (fb_client.api_key or "")
|
||||
self.api_key = effective_key
|
||||
self._anthropic_api_key = effective_key
|
||||
self._anthropic_base_url = getattr(fb_client, "base_url", None)
|
||||
self._anthropic_base_url = fb_base_url
|
||||
self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
|
||||
self._is_anthropic_oauth = _is_oauth_token(effective_key)
|
||||
self.client = None
|
||||
@@ -4879,9 +4969,21 @@ class AIAgent:
|
||||
# Swap OpenAI client and config in-place
|
||||
self.api_key = fb_client.api_key
|
||||
self.client = fb_client
|
||||
# Preserve provider-specific headers that
|
||||
# resolve_provider_client() may have baked into
|
||||
# fb_client via the default_headers kwarg. The OpenAI
|
||||
# SDK stores these in _custom_headers. Without this,
|
||||
# subsequent request-client rebuilds (via
|
||||
# _create_request_openai_client) drop the headers,
|
||||
# causing 403s from providers like Kimi Coding that
|
||||
# require a User-Agent sentinel.
|
||||
fb_headers = getattr(fb_client, "_custom_headers", None)
|
||||
if not fb_headers:
|
||||
fb_headers = getattr(fb_client, "default_headers", None)
|
||||
self._client_kwargs = {
|
||||
"api_key": fb_client.api_key,
|
||||
"base_url": fb_base_url,
|
||||
**({"default_headers": dict(fb_headers)} if fb_headers else {}),
|
||||
}
|
||||
|
||||
# Re-evaluate prompt caching for the new provider/model
|
||||
@@ -5226,24 +5328,97 @@ class AIAgent:
|
||||
base = (getattr(self, "base_url", "") or "").lower()
|
||||
return "dashscope" in base or "aliyuncs" in base or "opencode.ai/zen/go" in base
|
||||
|
||||
def _is_qwen_portal(self) -> bool:
|
||||
"""Return True when the base URL targets Qwen Portal."""
|
||||
return "portal.qwen.ai" in self._base_url_lower
|
||||
|
||||
def _qwen_prepare_chat_messages(self, api_messages: list) -> list:
|
||||
prepared = copy.deepcopy(api_messages)
|
||||
if not prepared:
|
||||
return prepared
|
||||
|
||||
for msg in prepared:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str):
|
||||
msg["content"] = [{"type": "text", "text": content}]
|
||||
elif isinstance(content, list):
|
||||
# Normalize: convert bare strings to text dicts, keep dicts as-is.
|
||||
# deepcopy already created independent copies, no need for dict().
|
||||
normalized_parts = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
normalized_parts.append({"type": "text", "text": part})
|
||||
elif isinstance(part, dict):
|
||||
normalized_parts.append(part)
|
||||
if normalized_parts:
|
||||
msg["content"] = normalized_parts
|
||||
|
||||
# Inject cache_control on the last part of the system message.
|
||||
for msg in prepared:
|
||||
if isinstance(msg, dict) and msg.get("role") == "system":
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list) and content and isinstance(content[-1], dict):
|
||||
content[-1]["cache_control"] = {"type": "ephemeral"}
|
||||
break
|
||||
|
||||
return prepared
|
||||
|
||||
def _qwen_prepare_chat_messages_inplace(self, messages: list) -> None:
|
||||
"""In-place variant — mutates an already-copied message list."""
|
||||
if not messages:
|
||||
return
|
||||
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str):
|
||||
msg["content"] = [{"type": "text", "text": content}]
|
||||
elif isinstance(content, list):
|
||||
normalized_parts = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
normalized_parts.append({"type": "text", "text": part})
|
||||
elif isinstance(part, dict):
|
||||
normalized_parts.append(part)
|
||||
if normalized_parts:
|
||||
msg["content"] = normalized_parts
|
||||
|
||||
for msg in messages:
|
||||
if isinstance(msg, dict) and msg.get("role") == "system":
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list) and content and isinstance(content[-1], dict):
|
||||
content[-1]["cache_control"] = {"type": "ephemeral"}
|
||||
break
|
||||
|
||||
def _build_api_kwargs(self, api_messages: list) -> dict:
|
||||
"""Build the keyword arguments dict for the active API mode."""
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
|
||||
# Pass context_length so the adapter can clamp max_tokens if the
|
||||
# user configured a smaller context window than the model's output limit.
|
||||
# Pass context_length (total input+output window) so the adapter can
|
||||
# clamp max_tokens (output cap) when the user configured a smaller
|
||||
# context window than the model's native output limit.
|
||||
ctx_len = getattr(self, "context_compressor", None)
|
||||
ctx_len = ctx_len.context_length if ctx_len else None
|
||||
# _ephemeral_max_output_tokens is set for one call when the API
|
||||
# returns "max_tokens too large given prompt" — it caps output to
|
||||
# the available window space without touching context_length.
|
||||
ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
|
||||
if ephemeral_out is not None:
|
||||
self._ephemeral_max_output_tokens = None # consume immediately
|
||||
return build_anthropic_kwargs(
|
||||
model=self.model,
|
||||
messages=anthropic_messages,
|
||||
tools=self.tools,
|
||||
max_tokens=self.max_tokens,
|
||||
max_tokens=ephemeral_out if ephemeral_out is not None else self.max_tokens,
|
||||
reasoning_config=self.reasoning_config,
|
||||
is_oauth=self._is_anthropic_oauth,
|
||||
preserve_dots=self._anthropic_preserve_dots(),
|
||||
context_length=ctx_len,
|
||||
base_url=getattr(self, "_anthropic_base_url", None),
|
||||
)
|
||||
|
||||
if self.api_mode == "codex_responses":
|
||||
@@ -5337,6 +5512,17 @@ class AIAgent:
|
||||
tool_call.pop("call_id", None)
|
||||
tool_call.pop("response_item_id", None)
|
||||
|
||||
# Qwen portal: normalize content to list-of-dicts, inject cache_control.
|
||||
# Must run AFTER codex sanitization so we transform the final messages.
|
||||
# If sanitization already deepcopied, reuse that copy (in-place).
|
||||
if self._is_qwen_portal():
|
||||
if sanitized_messages is api_messages:
|
||||
# No sanitization was done — we need our own copy.
|
||||
sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages)
|
||||
else:
|
||||
# Already a deepcopy — transform in place to avoid a second deepcopy.
|
||||
self._qwen_prepare_chat_messages_inplace(sanitized_messages)
|
||||
|
||||
# GPT-5 and Codex models respond better to 'developer' than 'system'
|
||||
# for instruction-following. Swap the role at the API boundary so
|
||||
# internal message representation stays uniform ("system").
|
||||
@@ -5369,11 +5555,17 @@ class AIAgent:
|
||||
"messages": sanitized_messages,
|
||||
"timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)),
|
||||
}
|
||||
if self._is_qwen_portal():
|
||||
api_kwargs["metadata"] = {
|
||||
"sessionId": self.session_id or "hermes",
|
||||
"promptId": str(uuid.uuid4()),
|
||||
}
|
||||
if self.tools:
|
||||
api_kwargs["tools"] = self.tools
|
||||
|
||||
if self.max_tokens is not None:
|
||||
api_kwargs.update(self._max_tokens_param(self.max_tokens))
|
||||
if not self._is_qwen_portal():
|
||||
api_kwargs.update(self._max_tokens_param(self.max_tokens))
|
||||
elif self._is_openrouter_url() and "claude" in (self.model or "").lower():
|
||||
# OpenRouter translates requests to Anthropic's Messages API,
|
||||
# which requires max_tokens as a mandatory field. When we omit
|
||||
@@ -5438,6 +5630,9 @@ class AIAgent:
|
||||
options["num_ctx"] = self._ollama_num_ctx
|
||||
extra_body["options"] = options
|
||||
|
||||
if self._is_qwen_portal():
|
||||
extra_body["vl_high_resolution_images"] = True
|
||||
|
||||
if extra_body:
|
||||
api_kwargs["extra_body"] = extra_body
|
||||
|
||||
@@ -5753,7 +5948,7 @@ class AIAgent:
|
||||
tools=[memory_tool_def],
|
||||
temperature=0.3,
|
||||
max_tokens=5120,
|
||||
timeout=30.0,
|
||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||
)
|
||||
except RuntimeError:
|
||||
_aux_available = False
|
||||
@@ -5785,7 +5980,10 @@ class AIAgent:
|
||||
"temperature": 0.3,
|
||||
**self._max_tokens_param(5120),
|
||||
}
|
||||
response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(**api_kwargs, timeout=30.0)
|
||||
from agent.auxiliary_client import _get_task_timeout
|
||||
response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
|
||||
**api_kwargs, timeout=_get_task_timeout("flush_memories")
|
||||
)
|
||||
|
||||
# Extract tool calls from the response, handling all API formats
|
||||
tool_calls = []
|
||||
@@ -5892,6 +6090,15 @@ class AIAgent:
|
||||
except Exception as e:
|
||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||
|
||||
# Warn on repeated compressions (quality degrades with each pass)
|
||||
_cc = self.context_compressor.compression_count
|
||||
if _cc >= 2:
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Session compressed {_cc} times — "
|
||||
f"accuracy may degrade. Consider /new to start fresh.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Update token estimate after compaction so pressure calculations
|
||||
# use the post-compression count, not the stale pre-compression one.
|
||||
_compressed_est = (
|
||||
@@ -5904,12 +6111,16 @@ class AIAgent:
|
||||
# Only reset the pressure warning if compression actually brought
|
||||
# us below the warning level (85% of threshold). When compression
|
||||
# can't reduce enough (e.g. threshold is very low, or system prompt
|
||||
# alone exceeds the warning level), keep the flag set to prevent
|
||||
# alone exceeds the warning level), keep the tier set to prevent
|
||||
# spamming the user with repeated warnings every loop iteration.
|
||||
if self.context_compressor.threshold_tokens > 0:
|
||||
_post_progress = _compressed_est / self.context_compressor.threshold_tokens
|
||||
if _post_progress < 0.85:
|
||||
self._context_pressure_warned = False
|
||||
self._context_pressure_warned_at = 0.0
|
||||
# Clear class-level dedup for this session so a fresh
|
||||
# warning cycle can start if context grows again.
|
||||
_sid = self.session_id or "default"
|
||||
AIAgent._context_pressure_last_warned.pop(_sid, None)
|
||||
|
||||
# Clear the file-read dedup cache. After compression the original
|
||||
# read content is summarised away — if the model re-reads the same
|
||||
@@ -7091,6 +7302,7 @@ class AIAgent:
|
||||
length_continue_retries = 0
|
||||
truncated_response_prefix = ""
|
||||
compression_attempts = 0
|
||||
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
|
||||
|
||||
# Clear any stale interrupt state at start
|
||||
self.clear_interrupt()
|
||||
@@ -7115,6 +7327,7 @@ class AIAgent:
|
||||
# Check for interrupt request (e.g., user sent new message)
|
||||
if self._interrupt_requested:
|
||||
interrupted = True
|
||||
_turn_exit_reason = "interrupted_by_user"
|
||||
if not self.quiet_mode:
|
||||
self._safe_print("\n⚡ Breaking out of tool loop due to interrupt...")
|
||||
break
|
||||
@@ -7123,6 +7336,7 @@ class AIAgent:
|
||||
self._api_call_count = api_call_count
|
||||
self._touch_activity(f"starting API call #{api_call_count}")
|
||||
if not self.iteration_budget.consume():
|
||||
_turn_exit_reason = "budget_exhausted"
|
||||
if not self.quiet_mode:
|
||||
self._safe_print(f"\n⚠️ Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
|
||||
break
|
||||
@@ -7827,6 +8041,25 @@ class AIAgent:
|
||||
|
||||
status_code = getattr(api_error, "status_code", None)
|
||||
error_context = self._extract_api_error_context(api_error)
|
||||
|
||||
# ── Classify the error for structured recovery decisions ──
|
||||
_compressor = getattr(self, "context_compressor", None)
|
||||
_ctx_len = getattr(_compressor, "context_length", 200000) if _compressor else 200000
|
||||
classified = classify_api_error(
|
||||
api_error,
|
||||
provider=getattr(self, "provider", "") or "",
|
||||
model=getattr(self, "model", "") or "",
|
||||
approx_tokens=approx_tokens,
|
||||
context_length=_ctx_len,
|
||||
num_messages=len(api_messages) if api_messages else 0,
|
||||
)
|
||||
logger.debug(
|
||||
"Error classified: reason=%s status=%s retryable=%s compress=%s rotate=%s fallback=%s",
|
||||
classified.reason.value, classified.status_code,
|
||||
classified.retryable, classified.should_compress,
|
||||
classified.should_rotate_credential, classified.should_fallback,
|
||||
)
|
||||
|
||||
recovered_with_pool, has_retried_429 = self._recover_with_credential_pool(
|
||||
status_code=status_code,
|
||||
has_retried_429=has_retried_429,
|
||||
@@ -7889,27 +8122,24 @@ class AIAgent:
|
||||
# from all messages so the next retry sends no thinking
|
||||
# blocks at all. One-shot — don't retry infinitely.
|
||||
if (
|
||||
self.api_mode == "anthropic_messages"
|
||||
and status_code == 400
|
||||
classified.reason == FailoverReason.thinking_signature
|
||||
and not thinking_sig_retry_attempted
|
||||
):
|
||||
_err_msg_lower = str(api_error).lower()
|
||||
if "signature" in _err_msg_lower and "thinking" in _err_msg_lower:
|
||||
thinking_sig_retry_attempted = True
|
||||
for _m in messages:
|
||||
if isinstance(_m, dict):
|
||||
_m.pop("reasoning_details", None)
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Thinking block signature invalid — "
|
||||
f"stripped all thinking blocks, retrying...",
|
||||
force=True,
|
||||
)
|
||||
logging.warning(
|
||||
"%sThinking block signature recovery: stripped "
|
||||
"reasoning_details from %d messages",
|
||||
self.log_prefix, len(messages),
|
||||
)
|
||||
continue
|
||||
thinking_sig_retry_attempted = True
|
||||
for _m in messages:
|
||||
if isinstance(_m, dict):
|
||||
_m.pop("reasoning_details", None)
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Thinking block signature invalid — "
|
||||
f"stripped all thinking blocks, retrying...",
|
||||
force=True,
|
||||
)
|
||||
logging.warning(
|
||||
"%sThinking block signature recovery: stripped "
|
||||
"reasoning_details from %d messages",
|
||||
self.log_prefix, len(messages),
|
||||
)
|
||||
continue
|
||||
|
||||
retry_count += 1
|
||||
elapsed_time = time.time() - api_start_time
|
||||
@@ -7966,14 +8196,7 @@ class AIAgent:
|
||||
# is NOT a transient rate limit — retrying or switching
|
||||
# credentials won't help. Reduce context to 200k (the
|
||||
# standard tier) and compress.
|
||||
# Only applies to Sonnet — Opus 1M is general access.
|
||||
_is_long_context_tier_error = (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
and "sonnet" in self.model.lower()
|
||||
)
|
||||
if _is_long_context_tier_error:
|
||||
if classified.reason == FailoverReason.long_context_tier:
|
||||
_reduced_ctx = 200000
|
||||
compressor = self.context_compressor
|
||||
old_ctx = compressor.context_length
|
||||
@@ -8018,13 +8241,9 @@ class AIAgent:
|
||||
# When a fallback model is configured, switch immediately instead
|
||||
# of burning through retries with exponential backoff -- the
|
||||
# primary provider won't recover within the retry window.
|
||||
is_rate_limited = (
|
||||
status_code == 429
|
||||
or "rate limit" in error_msg
|
||||
or "too many requests" in error_msg
|
||||
or "rate_limit" in error_msg
|
||||
or "usage limit" in error_msg
|
||||
or "quota" in error_msg
|
||||
is_rate_limited = classified.reason in (
|
||||
FailoverReason.rate_limit,
|
||||
FailoverReason.billing,
|
||||
)
|
||||
if is_rate_limited and self._fallback_index < len(self._fallback_chain):
|
||||
# Don't eagerly fallback if credential pool rotation may
|
||||
@@ -8040,10 +8259,7 @@ class AIAgent:
|
||||
continue
|
||||
|
||||
is_payload_too_large = (
|
||||
status_code == 413
|
||||
or 'request entity too large' in error_msg
|
||||
or 'payload too large' in error_msg
|
||||
or 'error code: 413' in error_msg
|
||||
classified.reason == FailoverReason.payload_too_large
|
||||
)
|
||||
|
||||
if is_payload_too_large:
|
||||
@@ -8087,69 +8303,59 @@ class AIAgent:
|
||||
}
|
||||
|
||||
# Check for context-length errors BEFORE generic 4xx handler.
|
||||
# Local backends (LM Studio, Ollama, llama.cpp) often return
|
||||
# HTTP 400 with messages like "Context size has been exceeded"
|
||||
# which must trigger compression, not an immediate abort.
|
||||
is_context_length_error = any(phrase in error_msg for phrase in [
|
||||
'context length', 'context size', 'maximum context',
|
||||
'token limit', 'too many tokens', 'reduce the length',
|
||||
'exceeds the limit', 'context window',
|
||||
'request entity too large', # OpenRouter/Nous 413 safety net
|
||||
'prompt is too long', # Anthropic: "prompt is too long: N tokens > M maximum"
|
||||
'prompt exceeds max length', # Z.AI / GLM: generic 400 overflow wording
|
||||
])
|
||||
|
||||
# Fallback heuristic: Anthropic sometimes returns a generic
|
||||
# 400 invalid_request_error with just "Error" as the message
|
||||
# when the context is too large. If the error message is very
|
||||
# short/generic AND the session is large, treat it as a
|
||||
# probable context-length error and attempt compression rather
|
||||
# than aborting. This prevents an infinite failure loop where
|
||||
# each failed message gets persisted, making the session even
|
||||
# larger. (#1630)
|
||||
if not is_context_length_error and status_code == 400:
|
||||
ctx_len = getattr(getattr(self, 'context_compressor', None), 'context_length', 200000)
|
||||
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
|
||||
is_generic_error = len(error_msg.strip()) < 30 # e.g. just "error"
|
||||
if is_large_session and is_generic_error:
|
||||
is_context_length_error = True
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Generic 400 with large session "
|
||||
f"(~{approx_tokens:,} tokens, {len(api_messages)} msgs) — "
|
||||
f"treating as probable context overflow.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Server disconnects on large sessions are often caused by
|
||||
# the request exceeding the provider's context/payload limit
|
||||
# without a proper HTTP error response. Treat these as
|
||||
# context-length errors to trigger compression rather than
|
||||
# burning through retries that will all fail the same way.
|
||||
# This breaks the death spiral: disconnect → no token data
|
||||
# → no compression → bigger session → more disconnects.
|
||||
# (#2153)
|
||||
if not is_context_length_error and not status_code:
|
||||
_is_server_disconnect = (
|
||||
'server disconnected' in error_msg
|
||||
or 'peer closed connection' in error_msg
|
||||
or error_type in ('ReadError', 'RemoteProtocolError', 'ServerDisconnectedError')
|
||||
)
|
||||
if _is_server_disconnect:
|
||||
ctx_len = getattr(getattr(self, 'context_compressor', None), 'context_length', 200000)
|
||||
_is_large = approx_tokens > ctx_len * 0.6 or len(api_messages) > 200
|
||||
if _is_large:
|
||||
is_context_length_error = True
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Server disconnected with large session "
|
||||
f"(~{approx_tokens:,} tokens, {len(api_messages)} msgs) — "
|
||||
f"treating as context-length error, attempting compression.",
|
||||
force=True,
|
||||
)
|
||||
# The classifier detects context overflow from: explicit error
|
||||
# messages, generic 400 + large session heuristic (#1630), and
|
||||
# server disconnect + large session pattern (#2153).
|
||||
is_context_length_error = (
|
||||
classified.reason == FailoverReason.context_overflow
|
||||
)
|
||||
|
||||
if is_context_length_error:
|
||||
compressor = self.context_compressor
|
||||
old_ctx = compressor.context_length
|
||||
|
||||
# ── Distinguish two very different errors ───────────
|
||||
# 1. "Prompt too long": the INPUT exceeds the context window.
|
||||
# Fix: reduce context_length + compress history.
|
||||
# 2. "max_tokens too large": input is fine, but
|
||||
# input_tokens + requested max_tokens > context_window.
|
||||
# Fix: reduce max_tokens (the OUTPUT cap) for this call.
|
||||
# Do NOT shrink context_length — the window is unchanged.
|
||||
#
|
||||
# Note: max_tokens = output token cap (one response).
|
||||
# context_length = total window (input + output combined).
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
if available_out is not None:
|
||||
# Error is purely about the output cap being too large.
|
||||
# Cap output to the available space and retry without
|
||||
# touching context_length or triggering compression.
|
||||
safe_out = max(1, available_out - 64) # small safety margin
|
||||
self._ephemeral_max_output_tokens = safe_out
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Output cap too large for current prompt — "
|
||||
f"retrying with max_tokens={safe_out:,} "
|
||||
f"(available_tokens={available_out:,}; context_length unchanged at {old_ctx:,})",
|
||||
force=True,
|
||||
)
|
||||
# Still count against compression_attempts so we don't
|
||||
# loop forever if the error keeps recurring.
|
||||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"partial": True
|
||||
}
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
|
||||
# Error is about the INPUT being too large — reduce context_length.
|
||||
# Try to parse the actual limit from the error message
|
||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
if parsed_limit and parsed_limit < old_ctx:
|
||||
@@ -8216,35 +8422,30 @@ class AIAgent:
|
||||
"partial": True
|
||||
}
|
||||
|
||||
# Check for non-retryable client errors (4xx HTTP status codes).
|
||||
# These indicate a problem with the request itself (bad model ID,
|
||||
# invalid API key, forbidden, etc.) and will never succeed on retry.
|
||||
# Note: 413 and context-length errors are excluded — handled above.
|
||||
# 429 (rate limit) is transient and MUST be retried with backoff.
|
||||
# 529 (Anthropic overloaded) is also transient.
|
||||
# Also catch local validation errors (ValueError, TypeError) — these
|
||||
# are programming bugs, not transient failures.
|
||||
# Exclude UnicodeEncodeError — it's a ValueError subclass but is
|
||||
# handled separately by the surrogate sanitization path above.
|
||||
_RETRYABLE_STATUS_CODES = {413, 429, 529}
|
||||
# Check for non-retryable client errors. The classifier
|
||||
# already accounts for 413, 429, 529 (transient), context
|
||||
# overflow, and generic-400 heuristics. Local validation
|
||||
# errors (ValueError, TypeError) are programming bugs.
|
||||
is_local_validation_error = (
|
||||
isinstance(api_error, (ValueError, TypeError))
|
||||
and not isinstance(api_error, UnicodeEncodeError)
|
||||
)
|
||||
# Detect generic 400s from Anthropic OAuth (transient server-side failures).
|
||||
# Real invalid_request_error responses include a descriptive message;
|
||||
# transient ones contain only "Error" or are empty. (ref: issue #1608)
|
||||
_err_body = getattr(api_error, "body", None) or {}
|
||||
_err_message = (_err_body.get("error", {}).get("message", "") if isinstance(_err_body, dict) else "")
|
||||
_is_generic_400 = (status_code == 400 and _err_message.strip().lower() in ("error", ""))
|
||||
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code not in _RETRYABLE_STATUS_CODES and not _is_generic_400
|
||||
is_client_error = (is_local_validation_error or is_client_status_error or any(phrase in error_msg for phrase in [
|
||||
'error code: 401', 'error code: 403',
|
||||
'error code: 404', 'error code: 422',
|
||||
'is not a valid model', 'invalid model', 'model not found',
|
||||
'invalid api key', 'invalid_api_key', 'authentication',
|
||||
'unauthorized', 'forbidden', 'not found',
|
||||
])) and not is_context_length_error
|
||||
is_client_error = (
|
||||
is_local_validation_error
|
||||
or (
|
||||
not classified.retryable
|
||||
and not classified.should_compress
|
||||
and classified.reason not in (
|
||||
FailoverReason.rate_limit,
|
||||
FailoverReason.billing,
|
||||
FailoverReason.overloaded,
|
||||
FailoverReason.context_overflow,
|
||||
FailoverReason.payload_too_large,
|
||||
FailoverReason.long_context_tier,
|
||||
FailoverReason.thinking_signature,
|
||||
)
|
||||
)
|
||||
) and not is_context_length_error
|
||||
|
||||
if is_client_error:
|
||||
# Try fallback before aborting — a different provider
|
||||
@@ -8264,7 +8465,7 @@ class AIAgent:
|
||||
self._vprint(f"{self.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True)
|
||||
self._vprint(f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True)
|
||||
# Actionable guidance for common auth errors
|
||||
if status_code in (401, 403) or "unauthorized" in error_msg or "forbidden" in error_msg or "permission" in error_msg:
|
||||
if classified.is_auth or classified.reason == FailoverReason.billing:
|
||||
if _provider == "openai-codex" and status_code == 401:
|
||||
self._vprint(f"{self.log_prefix} 💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
|
||||
self._vprint(f"{self.log_prefix} refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
|
||||
@@ -8424,6 +8625,7 @@ class AIAgent:
|
||||
|
||||
# If the API call was interrupted, skip response processing
|
||||
if interrupted:
|
||||
_turn_exit_reason = "interrupted_during_api_call"
|
||||
break
|
||||
|
||||
if restart_with_compressed_messages:
|
||||
@@ -8443,6 +8645,7 @@ class AIAgent:
|
||||
# (e.g. repeated context-length errors that exhausted retry_count),
|
||||
# the `response` variable is still None. Break out cleanly.
|
||||
if response is None:
|
||||
_turn_exit_reason = "all_retries_exhausted_no_response"
|
||||
print(f"{self.log_prefix}❌ All API retries exhausted with no successful response.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
break
|
||||
@@ -8849,13 +9052,34 @@ class AIAgent:
|
||||
# compaction fires, not the raw context window.
|
||||
# Does not inject into messages — just prints to CLI output
|
||||
# and fires status_callback for gateway platforms.
|
||||
# Tiered: 85% (orange) and 95% (red/critical).
|
||||
if _compressor.threshold_tokens > 0:
|
||||
_compaction_progress = _real_tokens / _compressor.threshold_tokens
|
||||
if _compaction_progress >= 0.85 and not self._context_pressure_warned:
|
||||
self._context_pressure_warned = True
|
||||
self._emit_context_pressure(_compaction_progress, _compressor)
|
||||
# Determine the warning tier for this progress level
|
||||
_warn_tier = 0.0
|
||||
if _compaction_progress >= 0.95:
|
||||
_warn_tier = 0.95
|
||||
elif _compaction_progress >= 0.85:
|
||||
_warn_tier = 0.85
|
||||
if _warn_tier > self._context_pressure_warned_at:
|
||||
# Class-level dedup: check if this session was already
|
||||
# warned at this tier within the cooldown window.
|
||||
_sid = self.session_id or "default"
|
||||
_last = AIAgent._context_pressure_last_warned.get(_sid)
|
||||
_now = time.time()
|
||||
if _last is None or _last[0] < _warn_tier or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN:
|
||||
self._context_pressure_warned_at = _warn_tier
|
||||
AIAgent._context_pressure_last_warned[_sid] = (_warn_tier, _now)
|
||||
self._emit_context_pressure(_compaction_progress, _compressor)
|
||||
# Evict stale entries (older than 2x cooldown)
|
||||
_cutoff = _now - self._CONTEXT_PRESSURE_COOLDOWN * 2
|
||||
AIAgent._context_pressure_last_warned = {
|
||||
k: v for k, v in AIAgent._context_pressure_last_warned.items()
|
||||
if v[1] > _cutoff
|
||||
}
|
||||
|
||||
if self.compression_enabled and _compressor.should_compress(_real_tokens):
|
||||
self._safe_print(" ⟳ compacting context…")
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message,
|
||||
approx_tokens=self.context_compressor.last_prompt_tokens,
|
||||
@@ -8885,6 +9109,7 @@ class AIAgent:
|
||||
# instead of wasting API calls on retries that won't help.
|
||||
fallback = getattr(self, '_last_content_with_tools', None)
|
||||
if fallback:
|
||||
_turn_exit_reason = "fallback_prior_turn_content"
|
||||
logger.debug("Empty follow-up after tool calls — using prior turn content as final response")
|
||||
self._last_content_with_tools = None
|
||||
self._empty_content_retries = 0
|
||||
@@ -8930,8 +9155,28 @@ class AIAgent:
|
||||
self._save_session_log(messages)
|
||||
continue
|
||||
|
||||
# Exhausted prefill attempts or no structured
|
||||
# reasoning — fall through to "(empty)" terminal.
|
||||
# ── Empty response retry (no reasoning) ──────
|
||||
# Model returned nothing — no content, no
|
||||
# structured reasoning, no tool calls. Common
|
||||
# with open models (transient provider issues,
|
||||
# rate limits, sampling flukes). Silently retry
|
||||
# up to 3 times before giving up. Skip when
|
||||
# content has inline <think> tags (model chose
|
||||
# to reason, just no visible text).
|
||||
_truly_empty = not final_response.strip()
|
||||
if _truly_empty and not _has_structured and self._empty_content_retries < 3:
|
||||
self._empty_content_retries += 1
|
||||
self._vprint(
|
||||
f"{self.log_prefix}↻ Empty response (no content or reasoning) "
|
||||
f"— retrying ({self._empty_content_retries}/3)",
|
||||
force=True,
|
||||
)
|
||||
continue
|
||||
|
||||
# Exhausted prefill attempts, empty retries, or
|
||||
# structured reasoning with no content —
|
||||
# fall through to "(empty)" terminal.
|
||||
_turn_exit_reason = "empty_response_exhausted"
|
||||
reasoning_text = self._extract_reasoning(assistant_message)
|
||||
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
assistant_msg["content"] = "(empty)"
|
||||
@@ -8941,7 +9186,7 @@ class AIAgent:
|
||||
reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
|
||||
self._vprint(f"{self.log_prefix}ℹ️ Reasoning-only response (no visible content). Reasoning: {reasoning_preview}")
|
||||
else:
|
||||
self._vprint(f"{self.log_prefix}ℹ️ Empty response (no content or reasoning).")
|
||||
self._vprint(f"{self.log_prefix}ℹ️ Empty response (no content or reasoning) after 3 retries.")
|
||||
|
||||
final_response = "(empty)"
|
||||
break
|
||||
@@ -9003,6 +9248,7 @@ class AIAgent:
|
||||
|
||||
messages.append(final_msg)
|
||||
|
||||
_turn_exit_reason = f"text_response(finish_reason={finish_reason})"
|
||||
if not self.quiet_mode:
|
||||
self._safe_print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
|
||||
break
|
||||
@@ -9052,6 +9298,7 @@ class AIAgent:
|
||||
|
||||
# If we're near the limit, break to avoid infinite loops
|
||||
if api_call_count >= self.max_iterations - 1:
|
||||
_turn_exit_reason = f"error_near_max_iterations({error_msg[:80]})"
|
||||
final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
|
||||
# Append as assistant so the history stays valid for
|
||||
# session resume (avoids consecutive user messages).
|
||||
@@ -9062,6 +9309,7 @@ class AIAgent:
|
||||
api_call_count >= self.max_iterations
|
||||
or self.iteration_budget.remaining <= 0
|
||||
):
|
||||
_turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})"
|
||||
if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
|
||||
print(f"\n⚠️ Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
|
||||
final_response = self._handle_max_iterations(messages, api_call_count)
|
||||
@@ -9078,6 +9326,49 @@ class AIAgent:
|
||||
# Persist session to both JSON log and SQLite
|
||||
self._persist_session(messages, conversation_history)
|
||||
|
||||
# ── Turn-exit diagnostic log ─────────────────────────────────────
|
||||
# Always logged at INFO so agent.log captures WHY every turn ended.
|
||||
# When the last message is a tool result (agent was mid-work), log
|
||||
# at WARNING — this is the "just stops" scenario users report.
|
||||
_last_msg_role = messages[-1].get("role") if messages else None
|
||||
_last_tool_name = None
|
||||
if _last_msg_role == "tool":
|
||||
# Walk back to find the assistant message with the tool call
|
||||
for _m in reversed(messages):
|
||||
if _m.get("role") == "assistant" and _m.get("tool_calls"):
|
||||
_tcs = _m["tool_calls"]
|
||||
if _tcs and isinstance(_tcs[0], dict):
|
||||
_last_tool_name = _tcs[-1].get("function", {}).get("name")
|
||||
break
|
||||
|
||||
_turn_tool_count = sum(
|
||||
1 for m in messages
|
||||
if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls")
|
||||
)
|
||||
_resp_len = len(final_response) if final_response else 0
|
||||
_budget_used = self.iteration_budget.used if self.iteration_budget else 0
|
||||
_budget_max = self.iteration_budget.max_total if self.iteration_budget else 0
|
||||
|
||||
_diag_msg = (
|
||||
"Turn ended: reason=%s model=%s api_calls=%d/%d budget=%d/%d "
|
||||
"tool_turns=%d last_msg_role=%s response_len=%d session=%s"
|
||||
)
|
||||
_diag_args = (
|
||||
_turn_exit_reason, self.model, api_call_count, self.max_iterations,
|
||||
_budget_used, _budget_max,
|
||||
_turn_tool_count, _last_msg_role, _resp_len,
|
||||
self.session_id or "none",
|
||||
)
|
||||
|
||||
if _last_msg_role == "tool" and not interrupted:
|
||||
# Agent was mid-work — this is the "just stops" case.
|
||||
logger.warning(
|
||||
"Turn ended with pending tool result (agent may appear stuck). "
|
||||
+ _diag_msg + " last_tool=%s",
|
||||
*_diag_args, _last_tool_name,
|
||||
)
|
||||
else:
|
||||
logger.info(_diag_msg, *_diag_args)
|
||||
|
||||
# Plugin hook: post_llm_call
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
|
||||
@@ -249,7 +249,6 @@ Type these during an interactive chat session.
|
||||
/config Show config (CLI)
|
||||
/model [name] Show or change model
|
||||
/provider Show provider info
|
||||
/prompt [text] View/set system prompt (CLI)
|
||||
/personality [name] Set personality
|
||||
/reasoning [level] Set reasoning (none|low|medium|high|xhigh|show|hide)
|
||||
/verbose Cycle: off → new → all → verbose
|
||||
|
||||
@@ -77,6 +77,20 @@ class TestReadCodexAccessToken:
|
||||
result = _read_codex_access_token()
|
||||
assert result == "tok-123"
|
||||
|
||||
def test_pool_without_selected_entry_falls_back_to_auth_store(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
valid_jwt = "eyJhbGciOiJSUzI1NiJ9.eyJleHAiOjk5OTk5OTk5OTl9.sig"
|
||||
with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)), \
|
||||
patch("hermes_cli.auth._read_codex_tokens", return_value={
|
||||
"tokens": {"access_token": valid_jwt, "refresh_token": "refresh"}
|
||||
}):
|
||||
result = _read_codex_access_token()
|
||||
|
||||
assert result == valid_jwt
|
||||
|
||||
def test_missing_returns_none(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
@@ -238,6 +252,24 @@ class TestAnthropicOAuthFlag:
|
||||
assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled"
|
||||
|
||||
|
||||
class TestTryCodex:
|
||||
def test_pool_without_selected_entry_falls_back_to_auth_store(self):
|
||||
with (
|
||||
patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)),
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-auth-token"),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
mock_openai.return_value = MagicMock()
|
||||
from agent.auxiliary_client import _try_codex
|
||||
|
||||
client, model = _try_codex()
|
||||
|
||||
assert client is not None
|
||||
assert model == "gpt-5.2-codex"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "codex-auth-token"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
|
||||
class TestExpiredCodexFallback:
|
||||
"""Test that expired Codex tokens don't block the auto chain."""
|
||||
|
||||
@@ -737,8 +769,8 @@ class TestAuxiliaryPoolAwareness:
|
||||
assert client is not None
|
||||
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
|
||||
|
||||
def test_vision_auto_prefers_openrouter_over_active_provider(self, monkeypatch):
|
||||
"""OpenRouter is tried before the active provider in vision auto."""
|
||||
def test_vision_auto_prefers_active_provider_over_openrouter(self, monkeypatch):
|
||||
"""Active provider is tried before OpenRouter in vision auto."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
|
||||
|
||||
@@ -746,12 +778,13 @@ class TestAuxiliaryPoolAwareness:
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
||||
patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
||||
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
|
||||
):
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
# OpenRouter should win over anthropic active provider
|
||||
assert provider == "openrouter"
|
||||
# Active provider should win over OpenRouter
|
||||
assert provider == "anthropic"
|
||||
|
||||
def test_vision_auto_uses_named_custom_as_active_provider(self, monkeypatch):
|
||||
"""Named custom provider works as active provider fallback in vision auto."""
|
||||
|
||||
@@ -324,7 +324,10 @@ class TestCompressWithClient:
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
# Last head message (index 1) is "assistant" → summary should be "user"
|
||||
# Last head message (index 1) is "assistant" → summary should be "user".
|
||||
# With min_tail=3, tail = last 3 messages (indices 5-7).
|
||||
# head_last=assistant, tail_first=assistant → summary_role="user", no collision.
|
||||
# Need 8 messages: min_for_compress = 2+3+1 = 6, must have > 6.
|
||||
msgs = [
|
||||
{"role": "user", "content": "msg 0"},
|
||||
{"role": "assistant", "content": "msg 1"},
|
||||
@@ -332,6 +335,8 @@ class TestCompressWithClient:
|
||||
{"role": "assistant", "content": "msg 3"},
|
||||
{"role": "user", "content": "msg 4"},
|
||||
{"role": "assistant", "content": "msg 5"},
|
||||
{"role": "user", "content": "msg 6"},
|
||||
{"role": "assistant", "content": "msg 7"},
|
||||
]
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
@@ -460,8 +465,10 @@ class TestCompressWithClient:
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
# Head: [system, user] → last head = user
|
||||
# Tail: [assistant, user] → first tail = assistant
|
||||
# Tail: [assistant, user, assistant] → first tail = assistant
|
||||
# summary_role="assistant" collides with tail, "user" collides with head → merge
|
||||
# With min_tail=3, tail = last 3 messages (indices 5-7).
|
||||
# Need 8 messages: min_for_compress = 2+3+1 = 6, must have > 6.
|
||||
msgs = [
|
||||
{"role": "system", "content": "system prompt"},
|
||||
{"role": "user", "content": "msg 1"},
|
||||
@@ -470,6 +477,7 @@ class TestCompressWithClient:
|
||||
{"role": "assistant", "content": "msg 4"}, # compressed
|
||||
{"role": "assistant", "content": "msg 5"}, # tail start
|
||||
{"role": "user", "content": "msg 6"},
|
||||
{"role": "assistant", "content": "msg 7"},
|
||||
]
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
@@ -481,7 +489,7 @@ class TestCompressWithClient:
|
||||
if r1 in ("user", "assistant") and r2 in ("user", "assistant"):
|
||||
assert r1 != r2, f"consecutive {r1} at indices {i-1},{i}"
|
||||
|
||||
# The summary should be merged into the first tail message (assistant)
|
||||
# The summary should be merged into the first tail message (assistant at index 5)
|
||||
first_tail = [m for m in result if "msg 5" in (m.get("content") or "")]
|
||||
assert len(first_tail) == 1
|
||||
assert "summary text" in first_tail[0]["content"]
|
||||
@@ -496,14 +504,18 @@ class TestCompressWithClient:
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
# Head=assistant, Tail=assistant → summary_role="user", no collision
|
||||
# Head=assistant, Tail=assistant → summary_role="user", no collision.
|
||||
# With min_tail=3, tail = last 3 messages (indices 5-7).
|
||||
# Need 8 messages: min_for_compress = 2+3+1 = 6, must have > 6.
|
||||
msgs = [
|
||||
{"role": "user", "content": "msg 0"},
|
||||
{"role": "assistant", "content": "msg 1"},
|
||||
{"role": "user", "content": "msg 2"},
|
||||
{"role": "assistant", "content": "msg 3"},
|
||||
{"role": "assistant", "content": "msg 4"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "user", "content": "msg 4"},
|
||||
{"role": "assistant", "content": "msg 5"},
|
||||
{"role": "user", "content": "msg 6"},
|
||||
{"role": "assistant", "content": "msg 7"},
|
||||
]
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
@@ -600,3 +612,158 @@ class TestSummaryTargetRatio:
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
assert c.protect_last_n == 20
|
||||
|
||||
|
||||
class TestTokenBudgetTailProtection:
|
||||
"""Tests for token-budget-based tail protection (PR #6240).
|
||||
|
||||
The core change: tail protection is now based on a token budget rather
|
||||
than a fixed message count. This prevents large tool outputs from
|
||||
blocking compaction.
|
||||
"""
|
||||
|
||||
@pytest.fixture()
|
||||
def budget_compressor(self):
|
||||
"""Compressor with known token budget for tail protection tests."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
|
||||
c = ContextCompressor(
|
||||
model="test/model",
|
||||
threshold_percent=0.50, # 100K threshold
|
||||
protect_first_n=2,
|
||||
protect_last_n=20,
|
||||
quiet_mode=True,
|
||||
)
|
||||
return c
|
||||
|
||||
def test_large_tool_outputs_no_longer_block_compaction(self, budget_compressor):
|
||||
"""The motivating scenario: 20 messages with large tool outputs should
|
||||
NOT prevent compaction. With message-count tail protection they would
|
||||
all be protected, leaving nothing to summarize."""
|
||||
c = budget_compressor
|
||||
messages = [
|
||||
{"role": "user", "content": "Start task"},
|
||||
{"role": "assistant", "content": "On it"},
|
||||
]
|
||||
# Add 20 messages with large tool outputs (~5K chars each ≈ 1250 tokens)
|
||||
for i in range(10):
|
||||
messages.append({
|
||||
"role": "assistant", "content": None,
|
||||
"tool_calls": [{"function": {"name": f"tool_{i}", "arguments": "{}"}}],
|
||||
})
|
||||
messages.append({
|
||||
"role": "tool", "content": "x" * 5000,
|
||||
"tool_call_id": f"call_{i}",
|
||||
})
|
||||
# Add 3 recent small messages
|
||||
messages.append({"role": "user", "content": "What's the status?"})
|
||||
messages.append({"role": "assistant", "content": "Here's what I found..."})
|
||||
messages.append({"role": "user", "content": "Continue"})
|
||||
|
||||
# The tail cut should NOT protect all 20 tool messages
|
||||
head_end = c.protect_first_n
|
||||
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||
tail_size = len(messages) - cut
|
||||
# With token budget, the tail should be much smaller than 20+
|
||||
assert tail_size < 20, f"Tail {tail_size} messages — large tool outputs are blocking compaction"
|
||||
# But at least 3 (hard minimum)
|
||||
assert tail_size >= 3
|
||||
|
||||
def test_min_tail_always_3_messages(self, budget_compressor):
|
||||
"""Even with a tiny token budget, at least 3 messages are protected."""
|
||||
c = budget_compressor
|
||||
# Override to a tiny budget
|
||||
c.tail_token_budget = 10
|
||||
messages = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
{"role": "user", "content": "do something"},
|
||||
{"role": "assistant", "content": "working on it"},
|
||||
{"role": "user", "content": "more work"},
|
||||
{"role": "assistant", "content": "done"},
|
||||
{"role": "user", "content": "thanks"},
|
||||
]
|
||||
head_end = 2
|
||||
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||
tail_size = len(messages) - cut
|
||||
assert tail_size >= 3, f"Tail is only {tail_size} messages, min should be 3"
|
||||
|
||||
def test_soft_ceiling_allows_oversized_message(self, budget_compressor):
|
||||
"""The 1.5x soft ceiling allows an oversized message to be included
|
||||
rather than splitting it."""
|
||||
c = budget_compressor
|
||||
# Set a small budget — 500 tokens
|
||||
c.tail_token_budget = 500
|
||||
messages = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
{"role": "user", "content": "read the file"},
|
||||
# This message is ~600 tokens (> budget of 500, but < 1.5x = 750)
|
||||
{"role": "assistant", "content": "a" * 2400},
|
||||
{"role": "user", "content": "short"},
|
||||
{"role": "assistant", "content": "short reply"},
|
||||
{"role": "user", "content": "continue"},
|
||||
]
|
||||
head_end = 2
|
||||
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||
# The oversized message at index 3 should NOT be the cut point
|
||||
# because 1.5x ceiling = 750 tokens and accumulated would be ~610
|
||||
# (short msgs + oversized msg) which is < 750
|
||||
tail_size = len(messages) - cut
|
||||
assert tail_size >= 3
|
||||
|
||||
def test_small_conversation_still_compresses(self, budget_compressor):
|
||||
"""With the new min of 8 messages (head=2 + 3 + 1 guard + 2 middle),
|
||||
a small but compressible conversation should still compress."""
|
||||
c = budget_compressor
|
||||
# 9 messages: head(2) + 4 middle + 3 tail = compressible
|
||||
messages = []
|
||||
for i in range(9):
|
||||
role = "user" if i % 2 == 0 else "assistant"
|
||||
messages.append({"role": role, "content": f"Message {i}"})
|
||||
|
||||
# Should not early-return (needs > protect_first_n + 3 + 1 = 6)
|
||||
# Mock the summary generation to avoid real API call
|
||||
with patch.object(c, "_generate_summary", return_value="Summary of conversation"):
|
||||
result = c.compress(messages, current_tokens=90_000)
|
||||
# Should have compressed (fewer messages than original)
|
||||
assert len(result) < len(messages)
|
||||
|
||||
def test_prune_with_token_budget(self, budget_compressor):
|
||||
"""_prune_old_tool_results with protect_tail_tokens respects the budget."""
|
||||
c = budget_compressor
|
||||
messages = [
|
||||
{"role": "user", "content": "start"},
|
||||
{"role": "assistant", "content": None,
|
||||
"tool_calls": [{"function": {"name": "read_file", "arguments": '{"path": "big.txt"}'}}]},
|
||||
{"role": "tool", "content": "x" * 10000, "tool_call_id": "c1"}, # ~2500 tokens
|
||||
{"role": "assistant", "content": None,
|
||||
"tool_calls": [{"function": {"name": "read_file", "arguments": '{"path": "small.txt"}'}}]},
|
||||
{"role": "tool", "content": "y" * 10000, "tool_call_id": "c2"}, # ~2500 tokens
|
||||
{"role": "user", "content": "short recent message"},
|
||||
{"role": "assistant", "content": "short reply"},
|
||||
]
|
||||
# With a 1000-token budget, only the last couple messages should be protected
|
||||
result, pruned = c._prune_old_tool_results(
|
||||
messages, protect_tail_count=2, protect_tail_tokens=1000,
|
||||
)
|
||||
# At least one old tool result should have been pruned
|
||||
assert pruned >= 1
|
||||
|
||||
def test_prune_without_token_budget_uses_message_count(self, budget_compressor):
|
||||
"""Without protect_tail_tokens, falls back to message-count behavior."""
|
||||
c = budget_compressor
|
||||
messages = [
|
||||
{"role": "user", "content": "start"},
|
||||
{"role": "assistant", "content": None,
|
||||
"tool_calls": [{"function": {"name": "tool", "arguments": "{}"}}]},
|
||||
{"role": "tool", "content": "x" * 5000, "tool_call_id": "c1"},
|
||||
{"role": "user", "content": "recent"},
|
||||
{"role": "assistant", "content": "reply"},
|
||||
]
|
||||
# protect_tail_count=3 means last 3 messages protected
|
||||
result, pruned = c._prune_old_tool_results(
|
||||
messages, protect_tail_count=3,
|
||||
)
|
||||
# Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
|
||||
# so it might or might not be pruned depending on boundary
|
||||
assert isinstance(pruned, int)
|
||||
|
||||
@@ -214,6 +214,42 @@ def test_exhausted_entry_resets_after_ttl(tmp_path, monkeypatch):
|
||||
assert entry.last_status == "ok"
|
||||
|
||||
|
||||
def test_exhausted_402_entry_resets_after_one_hour(tmp_path, monkeypatch):
|
||||
"""402-exhausted credentials recover after 1 hour, not 24."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openrouter": [
|
||||
{
|
||||
"id": "cred-1",
|
||||
"label": "primary",
|
||||
"auth_type": "api_key",
|
||||
"priority": 0,
|
||||
"source": "manual",
|
||||
"access_token": "***",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"last_status": "exhausted",
|
||||
"last_status_at": time.time() - 3700, # ~1h2m ago
|
||||
"last_error_code": 402,
|
||||
}
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
|
||||
pool = load_pool("openrouter")
|
||||
entry = pool.select()
|
||||
|
||||
assert entry is not None
|
||||
assert entry.id == "cred-1"
|
||||
assert entry.last_status == "ok"
|
||||
|
||||
|
||||
def test_explicit_reset_timestamp_overrides_default_429_ttl(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
_write_auth_store(
|
||||
|
||||
@@ -0,0 +1,782 @@
|
||||
"""Tests for agent.error_classifier — structured API error classification."""
|
||||
|
||||
import pytest
|
||||
from agent.error_classifier import (
|
||||
ClassifiedError,
|
||||
FailoverReason,
|
||||
classify_api_error,
|
||||
_extract_status_code,
|
||||
_extract_error_body,
|
||||
_extract_error_code,
|
||||
_classify_402,
|
||||
)
|
||||
|
||||
|
||||
# ── Helper: mock API errors ────────────────────────────────────────────
|
||||
|
||||
class MockAPIError(Exception):
|
||||
"""Simulates an OpenAI SDK APIStatusError."""
|
||||
def __init__(self, message, status_code=None, body=None):
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
self.body = body or {}
|
||||
|
||||
|
||||
class MockTransportError(Exception):
|
||||
"""Simulates a transport-level error with a specific type name."""
|
||||
pass
|
||||
|
||||
|
||||
class ReadTimeout(MockTransportError):
|
||||
pass
|
||||
|
||||
|
||||
class ConnectError(MockTransportError):
|
||||
pass
|
||||
|
||||
|
||||
class RemoteProtocolError(MockTransportError):
|
||||
pass
|
||||
|
||||
|
||||
class ServerDisconnectedError(MockTransportError):
|
||||
pass
|
||||
|
||||
|
||||
# ── Test: FailoverReason enum ──────────────────────────────────────────
|
||||
|
||||
class TestFailoverReason:
|
||||
def test_all_reasons_have_string_values(self):
|
||||
for reason in FailoverReason:
|
||||
assert isinstance(reason.value, str)
|
||||
|
||||
def test_enum_members_exist(self):
|
||||
expected = {
|
||||
"auth", "auth_permanent", "billing", "rate_limit",
|
||||
"overloaded", "server_error", "timeout",
|
||||
"context_overflow", "payload_too_large",
|
||||
"model_not_found", "format_error",
|
||||
"thinking_signature", "long_context_tier", "unknown",
|
||||
}
|
||||
actual = {r.value for r in FailoverReason}
|
||||
assert expected == actual
|
||||
|
||||
|
||||
# ── Test: ClassifiedError ──────────────────────────────────────────────
|
||||
|
||||
class TestClassifiedError:
|
||||
def test_is_auth_property(self):
|
||||
e1 = ClassifiedError(reason=FailoverReason.auth)
|
||||
assert e1.is_auth is True
|
||||
|
||||
e2 = ClassifiedError(reason=FailoverReason.auth_permanent)
|
||||
assert e2.is_auth is True
|
||||
|
||||
e3 = ClassifiedError(reason=FailoverReason.billing)
|
||||
assert e3.is_auth is False
|
||||
|
||||
def test_is_transient_property(self):
|
||||
transient_reasons = [
|
||||
FailoverReason.rate_limit,
|
||||
FailoverReason.overloaded,
|
||||
FailoverReason.server_error,
|
||||
FailoverReason.timeout,
|
||||
FailoverReason.unknown,
|
||||
]
|
||||
for reason in transient_reasons:
|
||||
e = ClassifiedError(reason=reason)
|
||||
assert e.is_transient is True, f"{reason} should be transient"
|
||||
|
||||
non_transient = [
|
||||
FailoverReason.auth,
|
||||
FailoverReason.billing,
|
||||
FailoverReason.model_not_found,
|
||||
FailoverReason.format_error,
|
||||
]
|
||||
for reason in non_transient:
|
||||
e = ClassifiedError(reason=reason)
|
||||
assert e.is_transient is False, f"{reason} should NOT be transient"
|
||||
|
||||
def test_defaults(self):
|
||||
e = ClassifiedError(reason=FailoverReason.unknown)
|
||||
assert e.retryable is True
|
||||
assert e.should_compress is False
|
||||
assert e.should_rotate_credential is False
|
||||
assert e.should_fallback is False
|
||||
assert e.status_code is None
|
||||
assert e.message == ""
|
||||
|
||||
|
||||
# ── Test: Status code extraction ───────────────────────────────────────
|
||||
|
||||
class TestExtractStatusCode:
|
||||
def test_from_status_code_attr(self):
|
||||
e = MockAPIError("fail", status_code=429)
|
||||
assert _extract_status_code(e) == 429
|
||||
|
||||
def test_from_status_attr(self):
|
||||
class ErrWithStatus(Exception):
|
||||
status = 503
|
||||
assert _extract_status_code(ErrWithStatus()) == 503
|
||||
|
||||
def test_from_cause_chain(self):
|
||||
inner = MockAPIError("inner", status_code=401)
|
||||
outer = Exception("outer")
|
||||
outer.__cause__ = inner
|
||||
assert _extract_status_code(outer) == 401
|
||||
|
||||
def test_none_when_missing(self):
|
||||
assert _extract_status_code(Exception("generic")) is None
|
||||
|
||||
def test_rejects_non_http_status(self):
|
||||
"""Integers outside 100-599 on .status should be ignored."""
|
||||
class ErrWeirdStatus(Exception):
|
||||
status = 42
|
||||
assert _extract_status_code(ErrWeirdStatus()) is None
|
||||
|
||||
|
||||
# ── Test: Error body extraction ────────────────────────────────────────
|
||||
|
||||
class TestExtractErrorBody:
|
||||
def test_from_body_attr(self):
|
||||
e = MockAPIError("fail", body={"error": {"message": "bad"}})
|
||||
assert _extract_error_body(e) == {"error": {"message": "bad"}}
|
||||
|
||||
def test_empty_when_no_body(self):
|
||||
assert _extract_error_body(Exception("generic")) == {}
|
||||
|
||||
|
||||
# ── Test: Error code extraction ────────────────────────────────────────
|
||||
|
||||
class TestExtractErrorCode:
|
||||
def test_from_nested_error_code(self):
|
||||
body = {"error": {"code": "rate_limit_exceeded"}}
|
||||
assert _extract_error_code(body) == "rate_limit_exceeded"
|
||||
|
||||
def test_from_nested_error_type(self):
|
||||
body = {"error": {"type": "invalid_request_error"}}
|
||||
assert _extract_error_code(body) == "invalid_request_error"
|
||||
|
||||
def test_from_top_level_code(self):
|
||||
body = {"code": "model_not_found"}
|
||||
assert _extract_error_code(body) == "model_not_found"
|
||||
|
||||
def test_empty_when_no_code(self):
|
||||
assert _extract_error_code({}) == ""
|
||||
assert _extract_error_code({"error": {"message": "oops"}}) == ""
|
||||
|
||||
|
||||
# ── Test: 402 disambiguation ───────────────────────────────────────────
|
||||
|
||||
class TestClassify402:
|
||||
"""The critical 402 billing vs rate_limit disambiguation."""
|
||||
|
||||
def test_billing_exhaustion(self):
|
||||
"""Plain 402 = billing."""
|
||||
result = _classify_402(
|
||||
"payment required",
|
||||
lambda reason, **kw: ClassifiedError(reason=reason, **kw),
|
||||
)
|
||||
assert result.reason == FailoverReason.billing
|
||||
assert result.should_rotate_credential is True
|
||||
|
||||
def test_transient_usage_limit(self):
|
||||
"""402 with 'usage limit' + 'try again' = rate limit, not billing."""
|
||||
result = _classify_402(
|
||||
"usage limit exceeded. try again in 5 minutes",
|
||||
lambda reason, **kw: ClassifiedError(reason=reason, **kw),
|
||||
)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
assert result.should_rotate_credential is True
|
||||
|
||||
def test_quota_with_retry(self):
|
||||
"""402 with 'quota' + 'retry' = rate limit."""
|
||||
result = _classify_402(
|
||||
"quota exceeded, please retry after the window resets",
|
||||
lambda reason, **kw: ClassifiedError(reason=reason, **kw),
|
||||
)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_quota_without_retry(self):
|
||||
"""402 with just 'quota' but no transient signal = billing."""
|
||||
result = _classify_402(
|
||||
"quota exceeded",
|
||||
lambda reason, **kw: ClassifiedError(reason=reason, **kw),
|
||||
)
|
||||
assert result.reason == FailoverReason.billing
|
||||
|
||||
def test_insufficient_credits(self):
|
||||
result = _classify_402(
|
||||
"insufficient credits to complete request",
|
||||
lambda reason, **kw: ClassifiedError(reason=reason, **kw),
|
||||
)
|
||||
assert result.reason == FailoverReason.billing
|
||||
|
||||
|
||||
# ── Test: Full classification pipeline ─────────────────────────────────
|
||||
|
||||
class TestClassifyApiError:
|
||||
"""End-to-end classification tests."""
|
||||
|
||||
# ── Auth errors ──
|
||||
|
||||
def test_401_classified_as_auth(self):
|
||||
e = MockAPIError("Unauthorized", status_code=401)
|
||||
result = classify_api_error(e, provider="openrouter")
|
||||
assert result.reason == FailoverReason.auth
|
||||
assert result.should_rotate_credential is True
|
||||
# 401 is non-retryable on its own — credential rotation runs
|
||||
# before the retryability check in the agent loop.
|
||||
assert result.retryable is False
|
||||
assert result.should_fallback is True
|
||||
|
||||
def test_403_classified_as_auth(self):
|
||||
e = MockAPIError("Forbidden", status_code=403)
|
||||
result = classify_api_error(e, provider="anthropic")
|
||||
assert result.reason == FailoverReason.auth
|
||||
assert result.should_fallback is True
|
||||
|
||||
def test_403_key_limit_classified_as_billing(self):
|
||||
"""OpenRouter 403 'key limit exceeded' is billing, not auth."""
|
||||
e = MockAPIError("Key limit exceeded for this key", status_code=403)
|
||||
result = classify_api_error(e, provider="openrouter")
|
||||
assert result.reason == FailoverReason.billing
|
||||
assert result.should_rotate_credential is True
|
||||
assert result.should_fallback is True
|
||||
|
||||
def test_403_spending_limit_classified_as_billing(self):
|
||||
e = MockAPIError("spending limit reached", status_code=403)
|
||||
result = classify_api_error(e, provider="openrouter")
|
||||
assert result.reason == FailoverReason.billing
|
||||
|
||||
# ── Billing ──
|
||||
|
||||
def test_402_plain_billing(self):
|
||||
e = MockAPIError("Payment Required", status_code=402)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.billing
|
||||
assert result.retryable is False
|
||||
|
||||
def test_402_transient_usage_limit(self):
|
||||
e = MockAPIError("usage limit exceeded, try again later", status_code=402)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
assert result.retryable is True
|
||||
|
||||
# ── Rate limit ──
|
||||
|
||||
def test_429_rate_limit(self):
|
||||
e = MockAPIError("Too Many Requests", status_code=429)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
assert result.should_fallback is True
|
||||
|
||||
# ── Server errors ──
|
||||
|
||||
def test_500_server_error(self):
|
||||
e = MockAPIError("Internal Server Error", status_code=500)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.server_error
|
||||
assert result.retryable is True
|
||||
|
||||
def test_502_server_error(self):
|
||||
e = MockAPIError("Bad Gateway", status_code=502)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.server_error
|
||||
|
||||
def test_503_overloaded(self):
|
||||
e = MockAPIError("Service Unavailable", status_code=503)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.overloaded
|
||||
|
||||
def test_529_anthropic_overloaded(self):
|
||||
e = MockAPIError("Overloaded", status_code=529)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.overloaded
|
||||
|
||||
# ── Model not found ──
|
||||
|
||||
def test_404_model_not_found(self):
|
||||
e = MockAPIError("model not found", status_code=404)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.model_not_found
|
||||
assert result.should_fallback is True
|
||||
assert result.retryable is False
|
||||
|
||||
def test_404_generic(self):
|
||||
e = MockAPIError("Not Found", status_code=404)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.model_not_found
|
||||
|
||||
# ── Payload too large ──
|
||||
|
||||
def test_413_payload_too_large(self):
|
||||
e = MockAPIError("Request Entity Too Large", status_code=413)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.payload_too_large
|
||||
assert result.should_compress is True
|
||||
|
||||
# ── Context overflow ──
|
||||
|
||||
def test_400_context_length(self):
|
||||
e = MockAPIError("context length exceeded: 250000 > 200000", status_code=400)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
assert result.should_compress is True
|
||||
|
||||
def test_400_too_many_tokens(self):
|
||||
e = MockAPIError("This model's maximum context is 128000 tokens, too many tokens", status_code=400)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
def test_400_prompt_too_long(self):
|
||||
e = MockAPIError("prompt is too long: 300000 tokens > 200000 maximum", status_code=400)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
def test_400_generic_large_session(self):
|
||||
"""Generic 400 with large session → context overflow heuristic."""
|
||||
e = MockAPIError(
|
||||
"Error",
|
||||
status_code=400,
|
||||
body={"error": {"message": "Error"}},
|
||||
)
|
||||
result = classify_api_error(e, approx_tokens=100000, context_length=200000)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
def test_400_generic_small_session_is_format_error(self):
|
||||
"""Generic 400 with small session → format error, not context overflow."""
|
||||
e = MockAPIError(
|
||||
"Error",
|
||||
status_code=400,
|
||||
body={"error": {"message": "Error"}},
|
||||
)
|
||||
result = classify_api_error(e, approx_tokens=1000, context_length=200000)
|
||||
assert result.reason == FailoverReason.format_error
|
||||
|
||||
# ── Server disconnect + large session ──
|
||||
|
||||
def test_disconnect_large_session_context_overflow(self):
|
||||
"""Server disconnect with large session → context overflow."""
|
||||
e = Exception("server disconnected without sending complete message")
|
||||
result = classify_api_error(e, approx_tokens=150000, context_length=200000)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
assert result.should_compress is True
|
||||
|
||||
def test_disconnect_small_session_timeout(self):
|
||||
"""Server disconnect with small session → timeout."""
|
||||
e = Exception("server disconnected without sending complete message")
|
||||
result = classify_api_error(e, approx_tokens=5000, context_length=200000)
|
||||
assert result.reason == FailoverReason.timeout
|
||||
|
||||
# ── Provider-specific: Anthropic thinking signature ──
|
||||
|
||||
def test_anthropic_thinking_signature(self):
|
||||
e = MockAPIError(
|
||||
"thinking block has invalid signature",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="anthropic")
|
||||
assert result.reason == FailoverReason.thinking_signature
|
||||
assert result.retryable is True
|
||||
|
||||
def test_non_anthropic_400_with_signature_not_classified_as_thinking(self):
|
||||
"""400 with 'signature' but from non-Anthropic → format error."""
|
||||
e = MockAPIError("invalid signature", status_code=400)
|
||||
result = classify_api_error(e, provider="openrouter", approx_tokens=0)
|
||||
# Without "thinking" in the message, it shouldn't be thinking_signature
|
||||
assert result.reason != FailoverReason.thinking_signature
|
||||
|
||||
# ── Provider-specific: Anthropic long-context tier ──
|
||||
|
||||
def test_anthropic_long_context_tier(self):
|
||||
e = MockAPIError(
|
||||
"Extra usage is required for long context requests over 200k tokens",
|
||||
status_code=429,
|
||||
)
|
||||
result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4")
|
||||
assert result.reason == FailoverReason.long_context_tier
|
||||
assert result.should_compress is True
|
||||
|
||||
def test_normal_429_not_long_context(self):
|
||||
"""Normal 429 without 'extra usage' + 'long context' → rate_limit."""
|
||||
e = MockAPIError("Too Many Requests", status_code=429)
|
||||
result = classify_api_error(e, provider="anthropic")
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
# ── Transport errors ──
|
||||
|
||||
def test_read_timeout(self):
|
||||
e = ReadTimeout("Read timed out")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.timeout
|
||||
assert result.retryable is True
|
||||
|
||||
def test_connect_error(self):
|
||||
e = ConnectError("Connection refused")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.timeout
|
||||
|
||||
def test_connection_error_builtin(self):
|
||||
e = ConnectionError("Connection reset by peer")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.timeout
|
||||
|
||||
def test_timeout_error_builtin(self):
|
||||
e = TimeoutError("timed out")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.timeout
|
||||
|
||||
# ── Error code classification ──
|
||||
|
||||
def test_error_code_resource_exhausted(self):
|
||||
e = MockAPIError(
|
||||
"Resource exhausted",
|
||||
body={"error": {"code": "resource_exhausted", "message": "Too many requests"}},
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_error_code_model_not_found(self):
|
||||
e = MockAPIError(
|
||||
"Model not available",
|
||||
body={"error": {"code": "model_not_found"}},
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.model_not_found
|
||||
|
||||
def test_error_code_context_length_exceeded(self):
|
||||
e = MockAPIError(
|
||||
"Context too large",
|
||||
body={"error": {"code": "context_length_exceeded"}},
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
# ── Message-only patterns (no status code) ──
|
||||
|
||||
def test_message_billing_pattern(self):
|
||||
e = Exception("insufficient credits to complete this request")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.billing
|
||||
|
||||
def test_message_rate_limit_pattern(self):
|
||||
e = Exception("rate limit reached for this model")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_message_auth_pattern(self):
|
||||
e = Exception("invalid api key provided")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.auth
|
||||
|
||||
def test_message_model_not_found_pattern(self):
|
||||
e = Exception("gpt-99 is not a valid model")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.model_not_found
|
||||
|
||||
def test_message_context_overflow_pattern(self):
|
||||
e = Exception("maximum context length exceeded")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
# ── Unknown / fallback ──
|
||||
|
||||
def test_generic_exception_is_unknown(self):
|
||||
e = Exception("something weird happened")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.unknown
|
||||
assert result.retryable is True
|
||||
|
||||
# ── Format error ──
|
||||
|
||||
def test_400_descriptive_format_error(self):
|
||||
"""400 with descriptive message (not context overflow) → format error."""
|
||||
e = MockAPIError(
|
||||
"Invalid value for parameter 'temperature': must be between 0 and 2",
|
||||
status_code=400,
|
||||
body={"error": {"message": "Invalid value for parameter 'temperature': must be between 0 and 2"}},
|
||||
)
|
||||
result = classify_api_error(e, approx_tokens=1000)
|
||||
assert result.reason == FailoverReason.format_error
|
||||
assert result.retryable is False
|
||||
|
||||
def test_422_format_error(self):
|
||||
e = MockAPIError("Unprocessable Entity", status_code=422)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.format_error
|
||||
assert result.retryable is False
|
||||
|
||||
def test_400_flat_body_descriptive_not_context_overflow(self):
|
||||
"""Responses API flat body with descriptive error + large session → format error.
|
||||
|
||||
The Codex Responses API returns errors in flat body format:
|
||||
{"message": "...", "type": "..."} without an "error" wrapper.
|
||||
A descriptive 400 must NOT be misclassified as context overflow
|
||||
just because the session is large.
|
||||
"""
|
||||
e = MockAPIError(
|
||||
"Invalid 'input[index].name': string does not match pattern.",
|
||||
status_code=400,
|
||||
body={"message": "Invalid 'input[index].name': string does not match pattern.",
|
||||
"type": "invalid_request_error"},
|
||||
)
|
||||
result = classify_api_error(e, approx_tokens=200000, context_length=400000, num_messages=500)
|
||||
assert result.reason == FailoverReason.format_error
|
||||
assert result.retryable is False
|
||||
|
||||
def test_400_flat_body_generic_large_session_still_context_overflow(self):
|
||||
"""Flat body with generic 'Error' message + large session → context overflow.
|
||||
|
||||
Regression: the flat-body fallback must not break the existing heuristic
|
||||
for genuinely generic errors from providers that use flat bodies.
|
||||
"""
|
||||
e = MockAPIError(
|
||||
"Error",
|
||||
status_code=400,
|
||||
body={"message": "Error"},
|
||||
)
|
||||
result = classify_api_error(e, approx_tokens=100000, context_length=200000)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
# ── Peer closed + large session ──
|
||||
|
||||
def test_peer_closed_large_session(self):
|
||||
e = Exception("peer closed connection without sending complete message")
|
||||
result = classify_api_error(e, approx_tokens=130000, context_length=200000)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
# ── Chinese error messages ──
|
||||
|
||||
def test_chinese_context_overflow(self):
|
||||
e = MockAPIError("超过最大长度限制", status_code=400)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
# ── Result metadata ──
|
||||
|
||||
def test_provider_and_model_in_result(self):
|
||||
e = MockAPIError("fail", status_code=500)
|
||||
result = classify_api_error(e, provider="openrouter", model="gpt-5")
|
||||
assert result.provider == "openrouter"
|
||||
assert result.model == "gpt-5"
|
||||
assert result.status_code == 500
|
||||
|
||||
def test_message_extracted(self):
|
||||
e = MockAPIError(
|
||||
"outer",
|
||||
status_code=500,
|
||||
body={"error": {"message": "Internal server error occurred"}},
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.message == "Internal server error occurred"
|
||||
|
||||
|
||||
# ── Test: Adversarial / edge cases (from live testing) ─────────────────
|
||||
|
||||
class TestAdversarialEdgeCases:
|
||||
"""Edge cases discovered during live testing with real SDK objects."""
|
||||
|
||||
def test_empty_exception_message(self):
|
||||
result = classify_api_error(Exception(""))
|
||||
assert result.reason == FailoverReason.unknown
|
||||
assert result.retryable is True
|
||||
|
||||
def test_500_with_none_body(self):
|
||||
e = MockAPIError("fail", status_code=500, body=None)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.server_error
|
||||
|
||||
def test_non_dict_body(self):
|
||||
"""Some providers return strings instead of JSON."""
|
||||
class StringBodyError(Exception):
|
||||
status_code = 400
|
||||
body = "just a string"
|
||||
result = classify_api_error(StringBodyError("bad"))
|
||||
assert result.reason == FailoverReason.format_error
|
||||
|
||||
def test_list_body(self):
|
||||
class ListBodyError(Exception):
|
||||
status_code = 500
|
||||
body = [{"error": "something"}]
|
||||
result = classify_api_error(ListBodyError("server error"))
|
||||
assert result.reason == FailoverReason.server_error
|
||||
|
||||
def test_circular_cause_chain(self):
|
||||
"""Must not infinite-loop on circular __cause__."""
|
||||
e = Exception("circular")
|
||||
e.__cause__ = e
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.unknown
|
||||
|
||||
def test_three_level_cause_chain(self):
|
||||
inner = MockAPIError("inner", status_code=429)
|
||||
middle = Exception("middle")
|
||||
middle.__cause__ = inner
|
||||
outer = RuntimeError("outer")
|
||||
outer.__cause__ = middle
|
||||
result = classify_api_error(outer)
|
||||
assert result.status_code == 429
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_400_with_rate_limit_text(self):
|
||||
"""Some providers send rate limits as 400 instead of 429."""
|
||||
e = MockAPIError(
|
||||
"rate limit policy",
|
||||
status_code=400,
|
||||
body={"error": {"message": "rate limit exceeded on this model"}},
|
||||
)
|
||||
result = classify_api_error(e, provider="openrouter")
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_400_with_billing_text(self):
|
||||
"""Some providers send billing errors as 400."""
|
||||
e = MockAPIError(
|
||||
"billing",
|
||||
status_code=400,
|
||||
body={"error": {"message": "insufficient credits for this request"}},
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.billing
|
||||
|
||||
def test_200_with_error_body(self):
|
||||
"""200 status with error in body — should be unknown, not crash."""
|
||||
class WeirdSuccess(Exception):
|
||||
status_code = 200
|
||||
body = {"error": {"message": "loading"}}
|
||||
result = classify_api_error(WeirdSuccess("model loading"))
|
||||
assert result.reason == FailoverReason.unknown
|
||||
|
||||
def test_ollama_context_size_exceeded(self):
|
||||
e = MockAPIError(
|
||||
"Error",
|
||||
status_code=400,
|
||||
body={"error": {"message": "context size has been exceeded"}},
|
||||
)
|
||||
result = classify_api_error(e, provider="ollama")
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
def test_connection_refused_error(self):
|
||||
e = ConnectionRefusedError("Connection refused: localhost:11434")
|
||||
result = classify_api_error(e, provider="ollama")
|
||||
assert result.reason == FailoverReason.timeout
|
||||
|
||||
def test_body_message_enrichment(self):
|
||||
"""Body message must be included in pattern matching even when
|
||||
str(error) doesn't contain it (OpenAI SDK APIStatusError)."""
|
||||
e = MockAPIError(
|
||||
"Usage limit", # str(e) = "usage limit"
|
||||
status_code=402,
|
||||
body={"error": {"message": "Usage limit reached, try again in 5 minutes"}},
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
# "try again" is only in body, not in str(e)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_disconnect_pattern_ordering(self):
|
||||
"""Disconnect + large session must beat generic transport catch."""
|
||||
class FakeRemoteProtocol(Exception):
|
||||
pass
|
||||
# Type name isn't in _TRANSPORT_ERROR_TYPES but message has disconnect pattern
|
||||
e = Exception("peer closed connection without sending complete message")
|
||||
result = classify_api_error(e, approx_tokens=150000, context_length=200000)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
assert result.should_compress is True
|
||||
|
||||
def test_credit_balance_too_low(self):
|
||||
e = MockAPIError(
|
||||
"Credits low",
|
||||
status_code=402,
|
||||
body={"error": {"message": "Your credit balance is too low"}},
|
||||
)
|
||||
result = classify_api_error(e, provider="anthropic")
|
||||
assert result.reason == FailoverReason.billing
|
||||
|
||||
def test_deepseek_402_chinese(self):
|
||||
"""Chinese billing message should still match billing patterns."""
|
||||
# "余额不足" doesn't match English billing patterns, but 402 defaults to billing
|
||||
e = MockAPIError("余额不足", status_code=402)
|
||||
result = classify_api_error(e, provider="deepseek")
|
||||
assert result.reason == FailoverReason.billing
|
||||
|
||||
def test_openrouter_wrapped_context_overflow_in_metadata_raw(self):
|
||||
"""OpenRouter wraps provider errors in metadata.raw JSON string."""
|
||||
e = MockAPIError(
|
||||
"Provider returned error",
|
||||
status_code=400,
|
||||
body={
|
||||
"error": {
|
||||
"message": "Provider returned error",
|
||||
"code": 400,
|
||||
"metadata": {
|
||||
"raw": '{"error":{"message":"context length exceeded: 50000 > 32768"}}'
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
result = classify_api_error(e, provider="openrouter", approx_tokens=10000)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
assert result.should_compress is True
|
||||
|
||||
def test_openrouter_wrapped_rate_limit_in_metadata_raw(self):
|
||||
e = MockAPIError(
|
||||
"Provider returned error",
|
||||
status_code=400,
|
||||
body={
|
||||
"error": {
|
||||
"message": "Provider returned error",
|
||||
"metadata": {
|
||||
"raw": '{"error":{"message":"Rate limit exceeded. Please retry after 30s."}}'
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
result = classify_api_error(e, provider="openrouter")
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
|
||||
def test_thinking_signature_via_openrouter(self):
|
||||
"""Thinking signature errors proxied through OpenRouter must be caught."""
|
||||
e = MockAPIError(
|
||||
"thinking block has invalid signature",
|
||||
status_code=400,
|
||||
)
|
||||
# provider is openrouter, not anthropic — old code missed this
|
||||
result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4")
|
||||
assert result.reason == FailoverReason.thinking_signature
|
||||
|
||||
def test_generic_400_large_by_message_count(self):
|
||||
"""Many small messages (>80) should trigger context overflow heuristic."""
|
||||
e = MockAPIError(
|
||||
"Error",
|
||||
status_code=400,
|
||||
body={"error": {"message": "Error"}},
|
||||
)
|
||||
# Low token count but high message count
|
||||
result = classify_api_error(
|
||||
e, approx_tokens=5000, context_length=200000, num_messages=100,
|
||||
)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
def test_disconnect_large_by_message_count(self):
|
||||
"""Server disconnect with 200+ messages should trigger context overflow."""
|
||||
e = Exception("server disconnected without sending complete message")
|
||||
result = classify_api_error(
|
||||
e, approx_tokens=5000, context_length=200000, num_messages=250,
|
||||
)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
def test_openrouter_wrapped_model_not_found_in_metadata_raw(self):
|
||||
e = MockAPIError(
|
||||
"Provider returned error",
|
||||
status_code=400,
|
||||
body={
|
||||
"error": {
|
||||
"message": "Provider returned error",
|
||||
"metadata": {
|
||||
"raw": '{"error":{"message":"The model gpt-99 does not exist"}}'
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
result = classify_api_error(e, provider="openrouter")
|
||||
assert result.reason == FailoverReason.model_not_found
|
||||
@@ -0,0 +1,212 @@
|
||||
"""Tests for agent.rate_limit_tracker — header parsing and formatting."""
|
||||
|
||||
import time
|
||||
import pytest
|
||||
from agent.rate_limit_tracker import (
|
||||
RateLimitBucket,
|
||||
RateLimitState,
|
||||
parse_rate_limit_headers,
|
||||
format_rate_limit_display,
|
||||
format_rate_limit_compact,
|
||||
_fmt_count,
|
||||
_fmt_seconds,
|
||||
_bar,
|
||||
)
|
||||
|
||||
|
||||
# ── Sample headers from Nous inference API ──────────────────────────────
|
||||
|
||||
NOUS_HEADERS = {
|
||||
"x-ratelimit-limit-requests": "800",
|
||||
"x-ratelimit-limit-requests-1h": "33600",
|
||||
"x-ratelimit-limit-tokens": "8000000",
|
||||
"x-ratelimit-limit-tokens-1h": "336000000",
|
||||
"x-ratelimit-remaining-requests": "795",
|
||||
"x-ratelimit-remaining-requests-1h": "33590",
|
||||
"x-ratelimit-remaining-tokens": "7999500",
|
||||
"x-ratelimit-remaining-tokens-1h": "335999000",
|
||||
"x-ratelimit-reset-requests": "45.5",
|
||||
"x-ratelimit-reset-requests-1h": "3500.0",
|
||||
"x-ratelimit-reset-tokens": "42.3",
|
||||
"x-ratelimit-reset-tokens-1h": "3490.0",
|
||||
}
|
||||
|
||||
|
||||
class TestParseHeaders:
|
||||
def test_basic_parsing(self):
|
||||
state = parse_rate_limit_headers(NOUS_HEADERS, provider="nous")
|
||||
assert state is not None
|
||||
assert state.provider == "nous"
|
||||
assert state.has_data
|
||||
|
||||
assert state.requests_min.limit == 800
|
||||
assert state.requests_min.remaining == 795
|
||||
assert state.requests_min.reset_seconds == 45.5
|
||||
|
||||
assert state.requests_hour.limit == 33600
|
||||
assert state.requests_hour.remaining == 33590
|
||||
|
||||
assert state.tokens_min.limit == 8000000
|
||||
assert state.tokens_min.remaining == 7999500
|
||||
|
||||
assert state.tokens_hour.limit == 336000000
|
||||
assert state.tokens_hour.remaining == 335999000
|
||||
assert state.tokens_hour.reset_seconds == 3490.0
|
||||
|
||||
def test_no_headers(self):
|
||||
state = parse_rate_limit_headers({})
|
||||
assert state is None
|
||||
|
||||
def test_partial_headers(self):
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests": "100",
|
||||
"x-ratelimit-remaining-requests": "50",
|
||||
}
|
||||
state = parse_rate_limit_headers(headers)
|
||||
assert state is not None
|
||||
assert state.requests_min.limit == 100
|
||||
assert state.requests_min.remaining == 50
|
||||
# Missing fields default to 0
|
||||
assert state.tokens_min.limit == 0
|
||||
|
||||
def test_non_rate_limit_headers_ignored(self):
|
||||
headers = {
|
||||
"content-type": "application/json",
|
||||
"server": "nginx",
|
||||
}
|
||||
state = parse_rate_limit_headers(headers)
|
||||
assert state is None
|
||||
|
||||
def test_malformed_values(self):
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests": "not-a-number",
|
||||
"x-ratelimit-remaining-requests": "",
|
||||
"x-ratelimit-reset-requests": "abc",
|
||||
}
|
||||
state = parse_rate_limit_headers(headers)
|
||||
assert state is not None
|
||||
assert state.requests_min.limit == 0
|
||||
assert state.requests_min.remaining == 0
|
||||
assert state.requests_min.reset_seconds == 0.0
|
||||
|
||||
|
||||
class TestBucket:
|
||||
def test_used(self):
|
||||
b = RateLimitBucket(limit=800, remaining=795, reset_seconds=45.0, captured_at=time.time())
|
||||
assert b.used == 5
|
||||
|
||||
def test_usage_pct(self):
|
||||
b = RateLimitBucket(limit=100, remaining=20, reset_seconds=30.0, captured_at=time.time())
|
||||
assert b.usage_pct == pytest.approx(80.0)
|
||||
|
||||
def test_usage_pct_zero_limit(self):
|
||||
b = RateLimitBucket(limit=0, remaining=0)
|
||||
assert b.usage_pct == 0.0
|
||||
|
||||
def test_remaining_seconds_now(self):
|
||||
now = time.time()
|
||||
b = RateLimitBucket(limit=800, remaining=795, reset_seconds=60.0, captured_at=now - 10)
|
||||
# ~50 seconds should remain
|
||||
assert 49 <= b.remaining_seconds_now <= 51
|
||||
|
||||
def test_remaining_seconds_expired(self):
|
||||
b = RateLimitBucket(limit=800, remaining=795, reset_seconds=30.0, captured_at=time.time() - 60)
|
||||
assert b.remaining_seconds_now == 0.0
|
||||
|
||||
|
||||
class TestFormatting:
|
||||
def test_fmt_count_millions(self):
|
||||
assert _fmt_count(8000000) == "8.0M"
|
||||
assert _fmt_count(336000000) == "336.0M"
|
||||
|
||||
def test_fmt_count_thousands(self):
|
||||
assert _fmt_count(33600) == "33.6K"
|
||||
assert _fmt_count(1500) == "1.5K"
|
||||
|
||||
def test_fmt_count_small(self):
|
||||
assert _fmt_count(800) == "800"
|
||||
assert _fmt_count(0) == "0"
|
||||
|
||||
def test_fmt_seconds_short(self):
|
||||
assert _fmt_seconds(45) == "45s"
|
||||
assert _fmt_seconds(0) == "0s"
|
||||
|
||||
def test_fmt_seconds_minutes(self):
|
||||
assert _fmt_seconds(125) == "2m 5s"
|
||||
assert _fmt_seconds(120) == "2m"
|
||||
|
||||
def test_fmt_seconds_hours(self):
|
||||
assert _fmt_seconds(3660) == "1h 1m"
|
||||
assert _fmt_seconds(3600) == "1h"
|
||||
|
||||
def test_bar(self):
|
||||
bar = _bar(50.0, width=10)
|
||||
assert bar == "[█████░░░░░]"
|
||||
assert _bar(0.0, width=10) == "[░░░░░░░░░░]"
|
||||
assert _bar(100.0, width=10) == "[██████████]"
|
||||
|
||||
def test_format_display_no_data(self):
|
||||
state = RateLimitState()
|
||||
result = format_rate_limit_display(state)
|
||||
assert "No rate limit data" in result
|
||||
|
||||
def test_format_display_with_data(self):
|
||||
state = parse_rate_limit_headers(NOUS_HEADERS, provider="nous")
|
||||
result = format_rate_limit_display(state)
|
||||
assert "Nous" in result
|
||||
assert "Requests/min" in result
|
||||
assert "Requests/hr" in result
|
||||
assert "Tokens/min" in result
|
||||
assert "Tokens/hr" in result
|
||||
assert "resets in" in result
|
||||
|
||||
def test_format_display_warning_on_high_usage(self):
|
||||
headers = {
|
||||
**NOUS_HEADERS,
|
||||
"x-ratelimit-remaining-requests": "50", # 750/800 used = 93.75%
|
||||
}
|
||||
state = parse_rate_limit_headers(headers)
|
||||
result = format_rate_limit_display(state)
|
||||
assert "⚠" in result
|
||||
|
||||
def test_format_compact(self):
|
||||
state = parse_rate_limit_headers(NOUS_HEADERS, provider="nous")
|
||||
result = format_rate_limit_compact(state)
|
||||
assert "RPM:" in result
|
||||
assert "RPH:" in result
|
||||
assert "TPM:" in result
|
||||
assert "TPH:" in result
|
||||
assert "resets" in result
|
||||
|
||||
def test_format_compact_no_data(self):
|
||||
state = RateLimitState()
|
||||
result = format_rate_limit_compact(state)
|
||||
assert "No rate limit data" in result
|
||||
|
||||
|
||||
class TestAgentIntegration:
|
||||
"""Test that AIAgent captures rate limit state correctly."""
|
||||
|
||||
def test_capture_rate_limits_from_headers(self):
|
||||
"""Simulate the header capture path without a real API call."""
|
||||
import sys
|
||||
import os
|
||||
# Use a mock httpx-like response
|
||||
class MockResponse:
|
||||
headers = NOUS_HEADERS
|
||||
|
||||
# Import AIAgent minimally
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
# Test the parsing directly
|
||||
state = parse_rate_limit_headers(MockResponse.headers, provider="nous")
|
||||
assert state is not None
|
||||
assert state.requests_min.limit == 800
|
||||
assert state.tokens_hour.limit == 336000000
|
||||
|
||||
def test_capture_rate_limits_none_response(self):
|
||||
"""_capture_rate_limits should handle None gracefully."""
|
||||
from agent.rate_limit_tracker import parse_rate_limit_headers
|
||||
# None should not crash
|
||||
result = parse_rate_limit_headers({})
|
||||
assert result is None
|
||||
@@ -3,6 +3,7 @@
|
||||
import os
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from agent.subdirectory_hints import SubdirectoryHintTracker
|
||||
|
||||
@@ -189,3 +190,45 @@ class TestSubdirectoryHintTracker:
|
||||
"terminal", {"command": "curl https://example.com/frontend/api"}
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestPermissionErrorHandling:
|
||||
"""Regression tests for PermissionError in filesystem checks (ref #6214)."""
|
||||
|
||||
def test_is_valid_subdir_permission_error(self, tmp_path):
|
||||
"""_is_valid_subdir should return False when is_dir() raises PermissionError."""
|
||||
tracker = SubdirectoryHintTracker(working_dir=str(tmp_path))
|
||||
restricted = tmp_path / "restricted"
|
||||
restricted.mkdir()
|
||||
with patch.object(Path, "is_dir", side_effect=PermissionError("Permission denied")):
|
||||
assert tracker._is_valid_subdir(restricted) is False
|
||||
|
||||
def test_load_hints_permission_error_on_is_file(self, tmp_path):
|
||||
"""_load_hints_for_directory should skip files when is_file() raises PermissionError."""
|
||||
tracker = SubdirectoryHintTracker(working_dir=str(tmp_path))
|
||||
restricted = tmp_path / "restricted"
|
||||
restricted.mkdir()
|
||||
original_is_file = Path.is_file
|
||||
def patched_is_file(self):
|
||||
if "restricted" in str(self):
|
||||
raise PermissionError("Permission denied")
|
||||
return original_is_file(self)
|
||||
with patch.object(Path, "is_file", patched_is_file):
|
||||
result = tracker._load_hints_for_directory(restricted)
|
||||
assert result is None
|
||||
|
||||
def test_check_tool_call_survives_inaccessible_path(self, project):
|
||||
"""Full check_tool_call should not crash when a path is inaccessible."""
|
||||
tracker = SubdirectoryHintTracker(working_dir=str(project))
|
||||
original_is_dir = Path.is_dir
|
||||
def patched_is_dir(self):
|
||||
if "backend" in str(self) and "src" not in str(self):
|
||||
raise PermissionError("Permission denied")
|
||||
return original_is_dir(self)
|
||||
with patch.object(Path, "is_dir", patched_is_dir):
|
||||
# Should not raise — gracefully skip the inaccessible directory
|
||||
result = tracker.check_tool_call(
|
||||
"read_file", {"path": str(project / "backend" / "src" / "main.py")}
|
||||
)
|
||||
# Result may be None (backend skipped) — the key point is no crash
|
||||
assert result is None or isinstance(result, str)
|
||||
|
||||
@@ -2,22 +2,65 @@ import queue
|
||||
import threading
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import cli as cli_module
|
||||
from cli import HermesCLI
|
||||
|
||||
|
||||
class _FakeBuffer:
|
||||
def __init__(self, text="", cursor_position=None):
|
||||
self.text = text
|
||||
self.cursor_position = len(text) if cursor_position is None else cursor_position
|
||||
|
||||
def reset(self, append_to_history=False):
|
||||
self.text = ""
|
||||
self.cursor_position = 0
|
||||
|
||||
|
||||
def _make_cli_stub():
|
||||
cli = HermesCLI.__new__(HermesCLI)
|
||||
cli._approval_state = None
|
||||
cli._approval_deadline = 0
|
||||
cli._approval_lock = threading.Lock()
|
||||
cli._sudo_state = None
|
||||
cli._sudo_deadline = 0
|
||||
cli._modal_input_snapshot = None
|
||||
cli._invalidate = MagicMock()
|
||||
cli._app = SimpleNamespace(invalidate=MagicMock())
|
||||
cli._app = SimpleNamespace(invalidate=MagicMock(), current_buffer=_FakeBuffer())
|
||||
return cli
|
||||
|
||||
|
||||
class TestCliApprovalUi:
|
||||
def test_sudo_prompt_restores_existing_draft_after_response(self):
|
||||
cli = _make_cli_stub()
|
||||
cli._app.current_buffer = _FakeBuffer("draft command", cursor_position=5)
|
||||
result = {}
|
||||
|
||||
def _run_callback():
|
||||
result["value"] = cli._sudo_password_callback()
|
||||
|
||||
with patch.object(cli_module, "_cprint"):
|
||||
thread = threading.Thread(target=_run_callback, daemon=True)
|
||||
thread.start()
|
||||
|
||||
deadline = time.time() + 2
|
||||
while cli._sudo_state is None and time.time() < deadline:
|
||||
time.sleep(0.01)
|
||||
|
||||
assert cli._sudo_state is not None
|
||||
assert cli._app.current_buffer.text == ""
|
||||
|
||||
cli._app.current_buffer.text = "secret"
|
||||
cli._app.current_buffer.cursor_position = len("secret")
|
||||
cli._sudo_state["response_queue"].put("secret")
|
||||
|
||||
thread.join(timeout=2)
|
||||
|
||||
assert result["value"] == "secret"
|
||||
assert cli._app.current_buffer.text == "draft command"
|
||||
assert cli._app.current_buffer.cursor_position == 5
|
||||
|
||||
def test_approval_callback_includes_view_for_long_commands(self):
|
||||
cli = _make_cli_stub()
|
||||
command = "sudo dd if=/tmp/githubcli-keyring.gpg of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress"
|
||||
|
||||
@@ -41,6 +41,7 @@ def _attach_agent(
|
||||
session_completion_tokens=completion_tokens,
|
||||
session_total_tokens=total_tokens,
|
||||
session_api_calls=api_calls,
|
||||
get_rate_limit_state=lambda: None,
|
||||
context_compressor=SimpleNamespace(
|
||||
last_prompt_tokens=context_tokens,
|
||||
context_length=context_length,
|
||||
|
||||
@@ -38,6 +38,8 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
|
||||
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
|
||||
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
|
||||
# Avoid making real calls during tests if this key is set in the env files
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
||||
@@ -0,0 +1,361 @@
|
||||
"""Tests for the BlueBubbles iMessage gateway adapter."""
|
||||
import pytest
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
|
||||
|
||||
def _make_adapter(monkeypatch, **extra):
|
||||
monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
|
||||
monkeypatch.setenv("BLUEBUBBLES_PASSWORD", "secret")
|
||||
from gateway.platforms.bluebubbles import BlueBubblesAdapter
|
||||
|
||||
cfg = PlatformConfig(
|
||||
enabled=True,
|
||||
extra={
|
||||
"server_url": "http://localhost:1234",
|
||||
"password": "secret",
|
||||
**extra,
|
||||
},
|
||||
)
|
||||
return BlueBubblesAdapter(cfg)
|
||||
|
||||
|
||||
class TestBlueBubblesPlatformEnum:
|
||||
def test_bluebubbles_enum_exists(self):
|
||||
assert Platform.BLUEBUBBLES.value == "bluebubbles"
|
||||
|
||||
|
||||
class TestBlueBubblesConfigLoading:
|
||||
def test_apply_env_overrides_bluebubbles(self, monkeypatch):
|
||||
monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
|
||||
monkeypatch.setenv("BLUEBUBBLES_PASSWORD", "secret")
|
||||
monkeypatch.setenv("BLUEBUBBLES_WEBHOOK_PORT", "9999")
|
||||
from gateway.config import GatewayConfig, _apply_env_overrides
|
||||
|
||||
config = GatewayConfig()
|
||||
_apply_env_overrides(config)
|
||||
assert Platform.BLUEBUBBLES in config.platforms
|
||||
bc = config.platforms[Platform.BLUEBUBBLES]
|
||||
assert bc.enabled is True
|
||||
assert bc.extra["server_url"] == "http://localhost:1234"
|
||||
assert bc.extra["password"] == "secret"
|
||||
assert bc.extra["webhook_port"] == 9999
|
||||
|
||||
def test_connected_platforms_includes_bluebubbles(self, monkeypatch):
|
||||
monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
|
||||
monkeypatch.setenv("BLUEBUBBLES_PASSWORD", "secret")
|
||||
from gateway.config import GatewayConfig, _apply_env_overrides
|
||||
|
||||
config = GatewayConfig()
|
||||
_apply_env_overrides(config)
|
||||
assert Platform.BLUEBUBBLES in config.get_connected_platforms()
|
||||
|
||||
def test_home_channel_set_from_env(self, monkeypatch):
|
||||
monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
|
||||
monkeypatch.setenv("BLUEBUBBLES_PASSWORD", "secret")
|
||||
monkeypatch.setenv("BLUEBUBBLES_HOME_CHANNEL", "user@example.com")
|
||||
from gateway.config import GatewayConfig, _apply_env_overrides
|
||||
|
||||
config = GatewayConfig()
|
||||
_apply_env_overrides(config)
|
||||
hc = config.platforms[Platform.BLUEBUBBLES].home_channel
|
||||
assert hc is not None
|
||||
assert hc.chat_id == "user@example.com"
|
||||
|
||||
def test_not_connected_without_password(self, monkeypatch):
|
||||
monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
|
||||
monkeypatch.delenv("BLUEBUBBLES_PASSWORD", raising=False)
|
||||
from gateway.config import GatewayConfig, _apply_env_overrides
|
||||
|
||||
config = GatewayConfig()
|
||||
_apply_env_overrides(config)
|
||||
assert Platform.BLUEBUBBLES not in config.get_connected_platforms()
|
||||
|
||||
|
||||
class TestBlueBubblesHelpers:
|
||||
def test_check_requirements(self, monkeypatch):
|
||||
monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
|
||||
monkeypatch.setenv("BLUEBUBBLES_PASSWORD", "secret")
|
||||
from gateway.platforms.bluebubbles import check_bluebubbles_requirements
|
||||
|
||||
assert check_bluebubbles_requirements() is True
|
||||
|
||||
def test_format_message_strips_markdown(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
assert adapter.format_message("**Hello** `world`") == "Hello world"
|
||||
|
||||
def test_strip_markdown_headers(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
assert adapter.format_message("## Heading\ntext") == "Heading\ntext"
|
||||
|
||||
def test_strip_markdown_links(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
assert adapter.format_message("[click here](http://example.com)") == "click here"
|
||||
|
||||
def test_init_normalizes_webhook_path(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch, webhook_path="bluebubbles-webhook")
|
||||
assert adapter.webhook_path == "/bluebubbles-webhook"
|
||||
|
||||
def test_init_preserves_leading_slash(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch, webhook_path="/my-hook")
|
||||
assert adapter.webhook_path == "/my-hook"
|
||||
|
||||
def test_server_url_normalized(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch, server_url="http://localhost:1234/")
|
||||
assert adapter.server_url == "http://localhost:1234"
|
||||
|
||||
def test_server_url_adds_scheme(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch, server_url="localhost:1234")
|
||||
assert adapter.server_url == "http://localhost:1234"
|
||||
|
||||
|
||||
class TestBlueBubblesWebhookParsing:
|
||||
def test_webhook_prefers_chat_guid_over_message_guid(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
payload = {
|
||||
"guid": "MESSAGE-GUID",
|
||||
"chatGuid": "iMessage;-;user@example.com",
|
||||
"chatIdentifier": "user@example.com",
|
||||
}
|
||||
record = adapter._extract_payload_record(payload) or {}
|
||||
chat_guid = adapter._value(
|
||||
record.get("chatGuid"),
|
||||
payload.get("chatGuid"),
|
||||
record.get("chat_guid"),
|
||||
payload.get("chat_guid"),
|
||||
payload.get("guid"),
|
||||
)
|
||||
assert chat_guid == "iMessage;-;user@example.com"
|
||||
|
||||
def test_webhook_can_fall_back_to_sender_when_chat_fields_missing(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
payload = {
|
||||
"data": {
|
||||
"guid": "MESSAGE-GUID",
|
||||
"text": "hello",
|
||||
"handle": {"address": "user@example.com"},
|
||||
"isFromMe": False,
|
||||
}
|
||||
}
|
||||
record = adapter._extract_payload_record(payload) or {}
|
||||
chat_guid = adapter._value(
|
||||
record.get("chatGuid"),
|
||||
payload.get("chatGuid"),
|
||||
record.get("chat_guid"),
|
||||
payload.get("chat_guid"),
|
||||
payload.get("guid"),
|
||||
)
|
||||
chat_identifier = adapter._value(
|
||||
record.get("chatIdentifier"),
|
||||
record.get("identifier"),
|
||||
payload.get("chatIdentifier"),
|
||||
payload.get("identifier"),
|
||||
)
|
||||
sender = (
|
||||
adapter._value(
|
||||
record.get("handle", {}).get("address")
|
||||
if isinstance(record.get("handle"), dict)
|
||||
else None,
|
||||
record.get("sender"),
|
||||
record.get("from"),
|
||||
record.get("address"),
|
||||
)
|
||||
or chat_identifier
|
||||
or chat_guid
|
||||
)
|
||||
if not (chat_guid or chat_identifier) and sender:
|
||||
chat_identifier = sender
|
||||
assert chat_identifier == "user@example.com"
|
||||
|
||||
def test_extract_payload_record_accepts_list_data(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
payload = {
|
||||
"type": "new-message",
|
||||
"data": [
|
||||
{
|
||||
"text": "hello",
|
||||
"chatGuid": "iMessage;-;user@example.com",
|
||||
"chatIdentifier": "user@example.com",
|
||||
}
|
||||
],
|
||||
}
|
||||
record = adapter._extract_payload_record(payload)
|
||||
assert record == payload["data"][0]
|
||||
|
||||
def test_extract_payload_record_dict_data(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
payload = {"data": {"text": "hello", "chatGuid": "iMessage;-;+1234"}}
|
||||
record = adapter._extract_payload_record(payload)
|
||||
assert record["text"] == "hello"
|
||||
|
||||
def test_extract_payload_record_fallback_to_message(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
payload = {"message": {"text": "hello"}}
|
||||
record = adapter._extract_payload_record(payload)
|
||||
assert record["text"] == "hello"
|
||||
|
||||
|
||||
class TestBlueBubblesGuidResolution:
|
||||
def test_raw_guid_returned_as_is(self, monkeypatch):
|
||||
"""If target already contains ';' it's a raw GUID — return unchanged."""
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
import asyncio
|
||||
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
adapter._resolve_chat_guid("iMessage;-;user@example.com")
|
||||
)
|
||||
assert result == "iMessage;-;user@example.com"
|
||||
|
||||
def test_empty_target_returns_none(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
import asyncio
|
||||
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
adapter._resolve_chat_guid("")
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestBlueBubblesToolsetIntegration:
|
||||
def test_toolset_exists(self):
|
||||
from toolsets import TOOLSETS
|
||||
|
||||
assert "hermes-bluebubbles" in TOOLSETS
|
||||
|
||||
def test_toolset_in_gateway_composite(self):
|
||||
from toolsets import TOOLSETS
|
||||
|
||||
gateway = TOOLSETS["hermes-gateway"]
|
||||
assert "hermes-bluebubbles" in gateway["includes"]
|
||||
|
||||
|
||||
class TestBlueBubblesPromptHint:
|
||||
def test_platform_hint_exists(self):
|
||||
from agent.prompt_builder import PLATFORM_HINTS
|
||||
|
||||
assert "bluebubbles" in PLATFORM_HINTS
|
||||
hint = PLATFORM_HINTS["bluebubbles"]
|
||||
assert "iMessage" in hint
|
||||
assert "plain text" in hint
|
||||
|
||||
|
||||
class TestBlueBubblesAttachmentDownload:
|
||||
"""Verify _download_attachment routes to the correct cache helper."""
|
||||
|
||||
def test_download_image_uses_image_cache(self, monkeypatch):
|
||||
"""Image MIME routes to cache_image_from_bytes."""
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
import asyncio
|
||||
import httpx
|
||||
|
||||
# Mock the HTTP client response
|
||||
class MockResponse:
|
||||
status_code = 200
|
||||
content = b"\x89PNG\r\n\x1a\n"
|
||||
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
|
||||
async def mock_get(*args, **kwargs):
|
||||
return MockResponse()
|
||||
|
||||
adapter.client = type("MockClient", (), {"get": mock_get})()
|
||||
|
||||
cached_path = None
|
||||
|
||||
def mock_cache_image(data, ext):
|
||||
nonlocal cached_path
|
||||
cached_path = f"/tmp/test_image{ext}"
|
||||
return cached_path
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.bluebubbles.cache_image_from_bytes",
|
||||
mock_cache_image,
|
||||
)
|
||||
|
||||
att_meta = {"mimeType": "image/png", "transferName": "photo.png"}
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
adapter._download_attachment("att-guid-123", att_meta)
|
||||
)
|
||||
assert result == "/tmp/test_image.png"
|
||||
|
||||
def test_download_audio_uses_audio_cache(self, monkeypatch):
|
||||
"""Audio MIME routes to cache_audio_from_bytes."""
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
import asyncio
|
||||
|
||||
class MockResponse:
|
||||
status_code = 200
|
||||
content = b"fake-audio-data"
|
||||
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
|
||||
async def mock_get(*args, **kwargs):
|
||||
return MockResponse()
|
||||
|
||||
adapter.client = type("MockClient", (), {"get": mock_get})()
|
||||
|
||||
cached_path = None
|
||||
|
||||
def mock_cache_audio(data, ext):
|
||||
nonlocal cached_path
|
||||
cached_path = f"/tmp/test_audio{ext}"
|
||||
return cached_path
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.bluebubbles.cache_audio_from_bytes",
|
||||
mock_cache_audio,
|
||||
)
|
||||
|
||||
att_meta = {"mimeType": "audio/mpeg", "transferName": "voice.mp3"}
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
adapter._download_attachment("att-guid-456", att_meta)
|
||||
)
|
||||
assert result == "/tmp/test_audio.mp3"
|
||||
|
||||
def test_download_document_uses_document_cache(self, monkeypatch):
|
||||
"""Non-image/audio MIME routes to cache_document_from_bytes."""
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
import asyncio
|
||||
|
||||
class MockResponse:
|
||||
status_code = 200
|
||||
content = b"fake-doc-data"
|
||||
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
|
||||
async def mock_get(*args, **kwargs):
|
||||
return MockResponse()
|
||||
|
||||
adapter.client = type("MockClient", (), {"get": mock_get})()
|
||||
|
||||
cached_path = None
|
||||
|
||||
def mock_cache_doc(data, filename):
|
||||
nonlocal cached_path
|
||||
cached_path = f"/tmp/{filename}"
|
||||
return cached_path
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.bluebubbles.cache_document_from_bytes",
|
||||
mock_cache_doc,
|
||||
)
|
||||
|
||||
att_meta = {"mimeType": "application/pdf", "transferName": "report.pdf"}
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
adapter._download_attachment("att-guid-789", att_meta)
|
||||
)
|
||||
assert result == "/tmp/report.pdf"
|
||||
|
||||
def test_download_returns_none_without_client(self, monkeypatch):
|
||||
"""No client → returns None gracefully."""
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
adapter.client = None
|
||||
import asyncio
|
||||
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
adapter._download_attachment("att-guid", {"mimeType": "image/png"})
|
||||
)
|
||||
assert result is None
|
||||
@@ -209,14 +209,31 @@ class TestIncomingDocumentHandling:
|
||||
assert "[Content of readme.md]:" in event.text
|
||||
assert "# Title" in event.text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_log_content_injected(self, adapter):
|
||||
""".log file under 100KB should be treated as text/plain and injected."""
|
||||
file_content = b"BLE trace line 1\nBLE trace line 2"
|
||||
|
||||
with _mock_aiohttp_download(file_content):
|
||||
msg = make_message(
|
||||
attachments=[make_attachment(filename="btsnoop_hci.log", content_type="text/plain")],
|
||||
content="please inspect this",
|
||||
)
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert "[Content of btsnoop_hci.log]:" in event.text
|
||||
assert "BLE trace line 1" in event.text
|
||||
assert "please inspect this" in event.text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_oversized_document_skipped(self, adapter):
|
||||
"""A document over 20MB should be skipped — media_urls stays empty."""
|
||||
"""A document over 32MB should be skipped — media_urls stays empty."""
|
||||
msg = make_message([
|
||||
make_attachment(
|
||||
filename="huge.pdf",
|
||||
content_type="application/pdf",
|
||||
size=25 * 1024 * 1024,
|
||||
size=33 * 1024 * 1024,
|
||||
)
|
||||
])
|
||||
await adapter._handle_message(msg)
|
||||
@@ -226,6 +243,24 @@ class TestIncomingDocumentHandling:
|
||||
# handler must still be called
|
||||
adapter.handle_message.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_mid_sized_zip_under_32mb_is_cached(self, adapter):
|
||||
"""A 25MB .zip should be accepted now that Discord documents allow up to 32MB."""
|
||||
msg = make_message([
|
||||
make_attachment(
|
||||
filename="bugreport.zip",
|
||||
content_type="application/zip",
|
||||
size=25 * 1024 * 1024,
|
||||
)
|
||||
])
|
||||
|
||||
with _mock_aiohttp_download(b"PK\x03\x04test"):
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert len(event.media_urls) == 1
|
||||
assert event.media_types == ["application/zip"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_zip_document_cached(self, adapter):
|
||||
"""A .zip file should be cached as a supported document."""
|
||||
|
||||
@@ -0,0 +1,277 @@
|
||||
"""Tests for Discord reply_to_mode functionality.
|
||||
|
||||
Covers the threading behavior control for multi-chunk replies:
|
||||
- "off": Never reply-reference to original message
|
||||
- "first": Only first chunk uses reply reference (default)
|
||||
- "all": All chunks reply-reference the original message
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import PlatformConfig, GatewayConfig, Platform, _apply_env_overrides
|
||||
|
||||
|
||||
def _ensure_discord_mock():
|
||||
"""Install a mock discord module when discord.py isn't available."""
|
||||
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
|
||||
return
|
||||
|
||||
discord_mod = MagicMock()
|
||||
discord_mod.Intents.default.return_value = MagicMock()
|
||||
discord_mod.Client = MagicMock
|
||||
discord_mod.File = MagicMock
|
||||
discord_mod.DMChannel = type("DMChannel", (), {})
|
||||
discord_mod.Thread = type("Thread", (), {})
|
||||
discord_mod.ForumChannel = type("ForumChannel", (), {})
|
||||
discord_mod.ui = SimpleNamespace(View=object, button=lambda *a, **k: (lambda fn: fn), Button=object)
|
||||
discord_mod.ButtonStyle = SimpleNamespace(success=1, primary=2, secondary=2, danger=3, green=1, grey=2, blurple=2, red=3)
|
||||
discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4, purple=lambda: 5)
|
||||
discord_mod.Interaction = object
|
||||
discord_mod.Embed = MagicMock
|
||||
discord_mod.app_commands = SimpleNamespace(
|
||||
describe=lambda **kwargs: (lambda fn: fn),
|
||||
choices=lambda **kwargs: (lambda fn: fn),
|
||||
Choice=lambda **kwargs: SimpleNamespace(**kwargs),
|
||||
)
|
||||
|
||||
ext_mod = MagicMock()
|
||||
commands_mod = MagicMock()
|
||||
commands_mod.Bot = MagicMock
|
||||
ext_mod.commands = commands_mod
|
||||
|
||||
sys.modules.setdefault("discord", discord_mod)
|
||||
sys.modules.setdefault("discord.ext", ext_mod)
|
||||
sys.modules.setdefault("discord.ext.commands", commands_mod)
|
||||
|
||||
|
||||
_ensure_discord_mock()
|
||||
|
||||
from gateway.platforms.discord import DiscordAdapter # noqa: E402
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def adapter_factory():
|
||||
"""Factory to create DiscordAdapter with custom reply_to_mode."""
|
||||
def create(reply_to_mode: str = "first"):
|
||||
config = PlatformConfig(enabled=True, token="test-token", reply_to_mode=reply_to_mode)
|
||||
return DiscordAdapter(config)
|
||||
return create
|
||||
|
||||
|
||||
class TestReplyToModeConfig:
|
||||
"""Tests for reply_to_mode configuration loading."""
|
||||
|
||||
def test_default_mode_is_first(self, adapter_factory):
|
||||
adapter = adapter_factory()
|
||||
assert adapter._reply_to_mode == "first"
|
||||
|
||||
def test_off_mode(self, adapter_factory):
|
||||
adapter = adapter_factory(reply_to_mode="off")
|
||||
assert adapter._reply_to_mode == "off"
|
||||
|
||||
def test_first_mode(self, adapter_factory):
|
||||
adapter = adapter_factory(reply_to_mode="first")
|
||||
assert adapter._reply_to_mode == "first"
|
||||
|
||||
def test_all_mode(self, adapter_factory):
|
||||
adapter = adapter_factory(reply_to_mode="all")
|
||||
assert adapter._reply_to_mode == "all"
|
||||
|
||||
def test_invalid_mode_stored_as_is(self, adapter_factory):
|
||||
"""Invalid modes are stored but send() handles them gracefully."""
|
||||
adapter = adapter_factory(reply_to_mode="invalid")
|
||||
assert adapter._reply_to_mode == "invalid"
|
||||
|
||||
def test_none_mode_defaults_to_first(self):
|
||||
config = PlatformConfig(enabled=True, token="test-token")
|
||||
adapter = DiscordAdapter(config)
|
||||
assert adapter._reply_to_mode == "first"
|
||||
|
||||
def test_empty_string_mode_defaults_to_first(self):
|
||||
config = PlatformConfig(enabled=True, token="test-token", reply_to_mode="")
|
||||
adapter = DiscordAdapter(config)
|
||||
assert adapter._reply_to_mode == "first"
|
||||
|
||||
|
||||
def _make_discord_adapter(reply_to_mode: str = "first"):
|
||||
"""Create a DiscordAdapter with mocked client and channel for send() tests."""
|
||||
config = PlatformConfig(enabled=True, token="test-token", reply_to_mode=reply_to_mode)
|
||||
adapter = DiscordAdapter(config)
|
||||
|
||||
# Mock the Discord client and channel
|
||||
mock_channel = AsyncMock()
|
||||
ref_message = MagicMock()
|
||||
mock_channel.fetch_message = AsyncMock(return_value=ref_message)
|
||||
|
||||
sent_msg = MagicMock()
|
||||
sent_msg.id = 42
|
||||
mock_channel.send = AsyncMock(return_value=sent_msg)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_channel = MagicMock(return_value=mock_channel)
|
||||
|
||||
adapter._client = mock_client
|
||||
return adapter, mock_channel, ref_message
|
||||
|
||||
|
||||
class TestSendWithReplyToMode:
|
||||
"""Tests for send() method respecting reply_to_mode."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_off_mode_no_reply_reference(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("off")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
# Should never try to fetch the reference message
|
||||
channel.fetch_message.assert_not_called()
|
||||
# All chunks sent without reference
|
||||
for call in channel.send.call_args_list:
|
||||
assert call.kwargs.get("reference") is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_first_mode_only_first_chunk_references(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("first")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
# Should fetch the reference message
|
||||
channel.fetch_message.assert_called_once_with(999)
|
||||
calls = channel.send.call_args_list
|
||||
assert len(calls) == 3
|
||||
assert calls[0].kwargs.get("reference") is ref_msg
|
||||
assert calls[1].kwargs.get("reference") is None
|
||||
assert calls[2].kwargs.get("reference") is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_all_mode_all_chunks_reference(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("all")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
channel.fetch_message.assert_called_once_with(999)
|
||||
calls = channel.send.call_args_list
|
||||
assert len(calls) == 3
|
||||
for call in calls:
|
||||
assert call.kwargs.get("reference") is ref_msg
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_reply_to_param_no_reference(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("all")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to=None)
|
||||
|
||||
channel.fetch_message.assert_not_called()
|
||||
for call in channel.send.call_args_list:
|
||||
assert call.kwargs.get("reference") is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_chunk_respects_first_mode(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("first")
|
||||
adapter.truncate_message = lambda content, max_len: ["single chunk"]
|
||||
|
||||
await adapter.send("12345", "test", reply_to="999")
|
||||
|
||||
calls = channel.send.call_args_list
|
||||
assert len(calls) == 1
|
||||
assert calls[0].kwargs.get("reference") is ref_msg
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_chunk_off_mode(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("off")
|
||||
adapter.truncate_message = lambda content, max_len: ["single chunk"]
|
||||
|
||||
await adapter.send("12345", "test", reply_to="999")
|
||||
|
||||
channel.fetch_message.assert_not_called()
|
||||
calls = channel.send.call_args_list
|
||||
assert len(calls) == 1
|
||||
assert calls[0].kwargs.get("reference") is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invalid_mode_falls_back_to_first_behavior(self):
|
||||
"""Invalid mode behaves like 'first' — only first chunk gets reference."""
|
||||
adapter, channel, ref_msg = _make_discord_adapter("banana")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
|
||||
|
||||
await adapter.send("12345", "test", reply_to="999")
|
||||
|
||||
calls = channel.send.call_args_list
|
||||
assert len(calls) == 2
|
||||
assert calls[0].kwargs.get("reference") is ref_msg
|
||||
assert calls[1].kwargs.get("reference") is None
|
||||
|
||||
|
||||
class TestConfigSerialization:
|
||||
"""Tests for reply_to_mode serialization (shared with Telegram)."""
|
||||
|
||||
def test_to_dict_includes_reply_to_mode(self):
|
||||
config = PlatformConfig(enabled=True, token="test", reply_to_mode="all")
|
||||
result = config.to_dict()
|
||||
assert result["reply_to_mode"] == "all"
|
||||
|
||||
def test_from_dict_loads_reply_to_mode(self):
|
||||
data = {"enabled": True, "token": "***", "reply_to_mode": "off"}
|
||||
config = PlatformConfig.from_dict(data)
|
||||
assert config.reply_to_mode == "off"
|
||||
|
||||
def test_from_dict_defaults_to_first(self):
|
||||
data = {"enabled": True, "token": "***"}
|
||||
config = PlatformConfig.from_dict(data)
|
||||
assert config.reply_to_mode == "first"
|
||||
|
||||
|
||||
class TestEnvVarOverride:
|
||||
"""Tests for DISCORD_REPLY_TO_MODE environment variable override."""
|
||||
|
||||
def _make_config(self):
|
||||
config = GatewayConfig()
|
||||
config.platforms[Platform.DISCORD] = PlatformConfig(enabled=True, token="test")
|
||||
return config
|
||||
|
||||
def test_env_var_sets_off_mode(self):
|
||||
config = self._make_config()
|
||||
with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "off"}, clear=False):
|
||||
_apply_env_overrides(config)
|
||||
assert config.platforms[Platform.DISCORD].reply_to_mode == "off"
|
||||
|
||||
def test_env_var_sets_all_mode(self):
|
||||
config = self._make_config()
|
||||
with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "all"}, clear=False):
|
||||
_apply_env_overrides(config)
|
||||
assert config.platforms[Platform.DISCORD].reply_to_mode == "all"
|
||||
|
||||
def test_env_var_case_insensitive(self):
|
||||
config = self._make_config()
|
||||
with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "ALL"}, clear=False):
|
||||
_apply_env_overrides(config)
|
||||
assert config.platforms[Platform.DISCORD].reply_to_mode == "all"
|
||||
|
||||
def test_env_var_invalid_value_ignored(self):
|
||||
config = self._make_config()
|
||||
with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "banana"}, clear=False):
|
||||
_apply_env_overrides(config)
|
||||
assert config.platforms[Platform.DISCORD].reply_to_mode == "first"
|
||||
|
||||
def test_env_var_empty_value_ignored(self):
|
||||
config = self._make_config()
|
||||
with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": ""}, clear=False):
|
||||
_apply_env_overrides(config)
|
||||
assert config.platforms[Platform.DISCORD].reply_to_mode == "first"
|
||||
|
||||
def test_env_var_creates_platform_config_if_missing(self):
|
||||
"""DISCORD_REPLY_TO_MODE creates PlatformConfig even without DISCORD_BOT_TOKEN."""
|
||||
config = GatewayConfig()
|
||||
assert Platform.DISCORD not in config.platforms
|
||||
with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "off"}, clear=False):
|
||||
_apply_env_overrides(config)
|
||||
assert Platform.DISCORD in config.platforms
|
||||
assert config.platforms[Platform.DISCORD].reply_to_mode == "off"
|
||||
@@ -0,0 +1,315 @@
|
||||
"""Tests for staged inactivity timeout in gateway agent runs.
|
||||
|
||||
Tests cover:
|
||||
- Warning fires once when inactivity reaches gateway_timeout_warning threshold
|
||||
- Warning does not fire when gateway_timeout is 0 (unlimited)
|
||||
- Warning fires only once per run, not on every poll
|
||||
- Full timeout still fires at gateway_timeout threshold
|
||||
- Warning respects HERMES_AGENT_TIMEOUT_WARNING env var
|
||||
- Warning disabled when gateway_timeout_warning is 0
|
||||
"""
|
||||
|
||||
import concurrent.futures
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
|
||||
class FakeAgent:
|
||||
"""Mock agent with controllable activity summary for timeout tests."""
|
||||
|
||||
def __init__(self, idle_seconds=0.0, activity_desc="tool_call",
|
||||
current_tool=None, api_call_count=5, max_iterations=90):
|
||||
self._idle_seconds = idle_seconds
|
||||
self._activity_desc = activity_desc
|
||||
self._current_tool = current_tool
|
||||
self._api_call_count = api_call_count
|
||||
self._max_iterations = max_iterations
|
||||
self._interrupted = False
|
||||
self._interrupt_msg = None
|
||||
|
||||
def get_activity_summary(self):
|
||||
return {
|
||||
"last_activity_ts": time.time() - self._idle_seconds,
|
||||
"last_activity_desc": self._activity_desc,
|
||||
"seconds_since_activity": self._idle_seconds,
|
||||
"current_tool": self._current_tool,
|
||||
"api_call_count": self._api_call_count,
|
||||
"max_iterations": self._max_iterations,
|
||||
}
|
||||
|
||||
def interrupt(self, msg):
|
||||
self._interrupted = True
|
||||
self._interrupt_msg = msg
|
||||
|
||||
def run_conversation(self, prompt):
|
||||
return {"final_response": "Done", "messages": []}
|
||||
|
||||
|
||||
class SlowFakeAgent(FakeAgent):
|
||||
"""Agent that runs for a while, then goes idle."""
|
||||
|
||||
def __init__(self, run_duration=0.5, idle_after=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._run_duration = run_duration
|
||||
self._idle_after = idle_after
|
||||
self._start_time = None
|
||||
|
||||
def get_activity_summary(self):
|
||||
summary = super().get_activity_summary()
|
||||
if self._idle_after is not None and self._start_time:
|
||||
elapsed = time.time() - self._start_time
|
||||
if elapsed > self._idle_after:
|
||||
idle_time = elapsed - self._idle_after
|
||||
summary["seconds_since_activity"] = idle_time
|
||||
summary["last_activity_desc"] = "api_call_streaming"
|
||||
else:
|
||||
summary["seconds_since_activity"] = 0.0
|
||||
return summary
|
||||
|
||||
def run_conversation(self, prompt):
|
||||
self._start_time = time.time()
|
||||
time.sleep(self._run_duration)
|
||||
return {"final_response": "Completed after work", "messages": []}
|
||||
|
||||
|
||||
class TestStagedInactivityWarning:
|
||||
"""Test the staged inactivity warning before full timeout."""
|
||||
|
||||
def test_warning_fires_once_before_timeout(self):
|
||||
"""Warning fires when inactivity reaches warning threshold."""
|
||||
agent = SlowFakeAgent(
|
||||
run_duration=10.0,
|
||||
idle_after=0.1,
|
||||
activity_desc="api_call_streaming",
|
||||
)
|
||||
|
||||
_agent_timeout = 20.0
|
||||
_agent_warning = 5.0
|
||||
_POLL_INTERVAL = 0.1
|
||||
|
||||
pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
future = pool.submit(agent.run_conversation, "test prompt")
|
||||
_inactivity_timeout = False
|
||||
_warning_fired = False
|
||||
_warning_send_count = 0
|
||||
|
||||
while True:
|
||||
done, _ = concurrent.futures.wait({future}, timeout=_POLL_INTERVAL)
|
||||
if done:
|
||||
result = future.result()
|
||||
break
|
||||
_idle_secs = 0.0
|
||||
if hasattr(agent, "get_activity_summary"):
|
||||
try:
|
||||
_act = agent.get_activity_summary()
|
||||
_idle_secs = _act.get("seconds_since_activity", 0.0)
|
||||
except Exception:
|
||||
pass
|
||||
if (not _warning_fired and _agent_warning > 0
|
||||
and _idle_secs >= _agent_warning):
|
||||
_warning_fired = True
|
||||
_warning_send_count += 1
|
||||
if _idle_secs >= _agent_timeout:
|
||||
_inactivity_timeout = True
|
||||
break
|
||||
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
assert _warning_fired
|
||||
assert _warning_send_count == 1
|
||||
assert not _inactivity_timeout
|
||||
|
||||
def test_warning_disabled_when_zero(self):
|
||||
"""No warning fires when gateway_timeout_warning is 0."""
|
||||
agent = SlowFakeAgent(
|
||||
run_duration=5.0,
|
||||
idle_after=0.1,
|
||||
)
|
||||
|
||||
_agent_timeout = 20.0
|
||||
_agent_warning = 0.0
|
||||
_POLL_INTERVAL = 0.1
|
||||
|
||||
pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
future = pool.submit(agent.run_conversation, "test")
|
||||
_warning_fired = False
|
||||
|
||||
while True:
|
||||
done, _ = concurrent.futures.wait({future}, timeout=_POLL_INTERVAL)
|
||||
if done:
|
||||
future.result()
|
||||
break
|
||||
_idle_secs = 0.0
|
||||
if hasattr(agent, "get_activity_summary"):
|
||||
try:
|
||||
_act = agent.get_activity_summary()
|
||||
_idle_secs = _act.get("seconds_since_activity", 0.0)
|
||||
except Exception:
|
||||
pass
|
||||
if (not _warning_fired and _agent_warning > 0
|
||||
and _idle_secs >= _agent_warning):
|
||||
_warning_fired = True
|
||||
if _idle_secs >= _agent_timeout:
|
||||
break
|
||||
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
assert not _warning_fired
|
||||
|
||||
def test_warning_fires_only_once(self):
|
||||
"""Warning fires exactly once even if agent remains idle."""
|
||||
agent = SlowFakeAgent(
|
||||
run_duration=10.0,
|
||||
idle_after=0.05,
|
||||
)
|
||||
|
||||
_agent_timeout = 20.0
|
||||
_agent_warning = 0.2
|
||||
_POLL_INTERVAL = 0.05
|
||||
|
||||
pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
future = pool.submit(agent.run_conversation, "test")
|
||||
_warning_count = 0
|
||||
|
||||
while True:
|
||||
done, _ = concurrent.futures.wait({future}, timeout=_POLL_INTERVAL)
|
||||
if done:
|
||||
future.result()
|
||||
break
|
||||
_idle_secs = 0.0
|
||||
if hasattr(agent, "get_activity_summary"):
|
||||
try:
|
||||
_act = agent.get_activity_summary()
|
||||
_idle_secs = _act.get("seconds_since_activity", 0.0)
|
||||
except Exception:
|
||||
pass
|
||||
if (not _warning_count and _agent_warning > 0
|
||||
and _idle_secs >= _agent_warning):
|
||||
_warning_count += 1
|
||||
if _idle_secs >= _agent_timeout:
|
||||
break
|
||||
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
assert _warning_count == 1
|
||||
|
||||
def test_full_timeout_still_fires_after_warning(self):
|
||||
"""Full timeout fires even after warning was sent."""
|
||||
agent = SlowFakeAgent(
|
||||
run_duration=15.0,
|
||||
idle_after=0.1,
|
||||
activity_desc="waiting for provider response (streaming)",
|
||||
)
|
||||
|
||||
_agent_timeout = 1.0
|
||||
_agent_warning = 0.3
|
||||
_POLL_INTERVAL = 0.05
|
||||
|
||||
pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
future = pool.submit(agent.run_conversation, "test")
|
||||
_inactivity_timeout = False
|
||||
_warning_fired = False
|
||||
|
||||
while True:
|
||||
done, _ = concurrent.futures.wait({future}, timeout=_POLL_INTERVAL)
|
||||
if done:
|
||||
future.result()
|
||||
break
|
||||
_idle_secs = 0.0
|
||||
if hasattr(agent, "get_activity_summary"):
|
||||
try:
|
||||
_act = agent.get_activity_summary()
|
||||
_idle_secs = _act.get("seconds_since_activity", 0.0)
|
||||
except Exception:
|
||||
pass
|
||||
if (not _warning_fired and _agent_warning > 0
|
||||
and _idle_secs >= _agent_warning):
|
||||
_warning_fired = True
|
||||
if _idle_secs >= _agent_timeout:
|
||||
_inactivity_timeout = True
|
||||
break
|
||||
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
assert _warning_fired
|
||||
assert _inactivity_timeout
|
||||
|
||||
def test_warning_env_var_respected(self, monkeypatch):
|
||||
"""HERMES_AGENT_TIMEOUT_WARNING env var is parsed correctly."""
|
||||
monkeypatch.setenv("HERMES_AGENT_TIMEOUT_WARNING", "600")
|
||||
_warning = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900))
|
||||
assert _warning == 600.0
|
||||
|
||||
def test_warning_zero_means_disabled(self, monkeypatch):
|
||||
"""HERMES_AGENT_TIMEOUT_WARNING=0 disables the warning."""
|
||||
monkeypatch.setenv("HERMES_AGENT_TIMEOUT_WARNING", "0")
|
||||
_raw = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900))
|
||||
_warning = _raw if _raw > 0 else None
|
||||
assert _warning is None
|
||||
|
||||
def test_unlimited_timeout_no_warning(self):
|
||||
"""When timeout is unlimited (0), no warning fires either."""
|
||||
agent = SlowFakeAgent(
|
||||
run_duration=0.5,
|
||||
idle_after=0.0,
|
||||
)
|
||||
|
||||
_agent_timeout = None
|
||||
_agent_warning = 5.0
|
||||
_POLL_INTERVAL = 0.05
|
||||
|
||||
pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
future = pool.submit(agent.run_conversation, "test")
|
||||
|
||||
result = future.result(timeout=2.0)
|
||||
pool.shutdown(wait=False)
|
||||
|
||||
assert result["final_response"] == "Completed after work"
|
||||
|
||||
|
||||
class TestWarningThresholdBelowTimeout:
|
||||
"""Test that warning threshold must be less than timeout threshold."""
|
||||
|
||||
def test_warning_at_half_timeout(self):
|
||||
"""Warning fires at half the timeout duration."""
|
||||
agent = SlowFakeAgent(
|
||||
run_duration=10.0,
|
||||
idle_after=0.1,
|
||||
activity_desc="receiving stream response",
|
||||
)
|
||||
|
||||
_agent_timeout = 2.0
|
||||
_agent_warning = 1.0
|
||||
_POLL_INTERVAL = 0.05
|
||||
|
||||
pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
future = pool.submit(agent.run_conversation, "test")
|
||||
_warning_fired = False
|
||||
_timeout_fired = False
|
||||
|
||||
while True:
|
||||
done, _ = concurrent.futures.wait({future}, timeout=_POLL_INTERVAL)
|
||||
if done:
|
||||
future.result()
|
||||
break
|
||||
_idle_secs = 0.0
|
||||
if hasattr(agent, "get_activity_summary"):
|
||||
try:
|
||||
_act = agent.get_activity_summary()
|
||||
_idle_secs = _act.get("seconds_since_activity", 0.0)
|
||||
except Exception:
|
||||
pass
|
||||
if (not _warning_fired and _agent_warning > 0
|
||||
and _idle_secs >= _agent_warning):
|
||||
_warning_fired = True
|
||||
if _idle_secs >= _agent_timeout:
|
||||
_timeout_fired = True
|
||||
break
|
||||
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
assert _warning_fired
|
||||
assert _timeout_fired
|
||||
@@ -0,0 +1,226 @@
|
||||
"""Tests that internal synthetic events (e.g. background process completion)
|
||||
bypass user authorization and do not trigger DM pairing.
|
||||
|
||||
Regression test for the bug where ``_run_process_watcher`` with
|
||||
``notify_on_complete=True`` injected a ``MessageEvent`` without ``user_id``,
|
||||
causing ``_is_user_authorized`` to reject it and the gateway to send a
|
||||
pairing code to the chat.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.run import GatewayRunner
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _FakeRegistry:
|
||||
"""Return pre-canned sessions, then None once exhausted."""
|
||||
|
||||
def __init__(self, sessions):
|
||||
self._sessions = list(sessions)
|
||||
|
||||
def get(self, session_id):
|
||||
if self._sessions:
|
||||
return self._sessions.pop(0)
|
||||
return None
|
||||
|
||||
|
||||
def _build_runner(monkeypatch, tmp_path) -> GatewayRunner:
|
||||
"""Create a GatewayRunner with notifications set to 'all'."""
|
||||
(tmp_path / "config.yaml").write_text(
|
||||
"display:\n background_process_notifications: all\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
import gateway.run as gateway_run
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner = GatewayRunner(GatewayConfig())
|
||||
adapter = SimpleNamespace(send=AsyncMock(), handle_message=AsyncMock())
|
||||
runner.adapters[Platform.DISCORD] = adapter
|
||||
return runner
|
||||
|
||||
|
||||
def _watcher_dict_with_notify():
|
||||
return {
|
||||
"session_id": "proc_test_internal",
|
||||
"check_interval": 0,
|
||||
"session_key": "agent:main:discord:dm:123",
|
||||
"platform": "discord",
|
||||
"chat_id": "123",
|
||||
"thread_id": "",
|
||||
"notify_on_complete": True,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_notify_on_complete_sets_internal_flag(monkeypatch, tmp_path):
|
||||
"""Synthetic completion event must have internal=True."""
|
||||
import tools.process_registry as pr_module
|
||||
|
||||
sessions = [
|
||||
SimpleNamespace(
|
||||
output_buffer="done\n", exited=True, exit_code=0, command="echo test"
|
||||
),
|
||||
]
|
||||
monkeypatch.setattr(pr_module, "process_registry", _FakeRegistry(sessions))
|
||||
|
||||
async def _instant_sleep(*_a, **_kw):
|
||||
pass
|
||||
monkeypatch.setattr(asyncio, "sleep", _instant_sleep)
|
||||
|
||||
runner = _build_runner(monkeypatch, tmp_path)
|
||||
adapter = runner.adapters[Platform.DISCORD]
|
||||
|
||||
await runner._run_process_watcher(_watcher_dict_with_notify())
|
||||
|
||||
assert adapter.handle_message.await_count == 1
|
||||
event = adapter.handle_message.await_args.args[0]
|
||||
assert isinstance(event, MessageEvent)
|
||||
assert event.internal is True, "Synthetic completion event must be marked internal"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_internal_event_bypasses_authorization(monkeypatch, tmp_path):
|
||||
"""An internal event should skip _is_user_authorized entirely."""
|
||||
import gateway.run as gateway_run
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
||||
|
||||
runner = GatewayRunner(GatewayConfig())
|
||||
|
||||
# Create an internal event with no user_id (simulates the bug scenario)
|
||||
source = SessionSource(
|
||||
platform=Platform.DISCORD,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
)
|
||||
event = MessageEvent(
|
||||
text="[SYSTEM: Background process completed]",
|
||||
source=source,
|
||||
internal=True,
|
||||
)
|
||||
|
||||
# Track if _is_user_authorized is called
|
||||
auth_called = False
|
||||
original_auth = GatewayRunner._is_user_authorized
|
||||
|
||||
def tracking_auth(self, src):
|
||||
nonlocal auth_called
|
||||
auth_called = True
|
||||
return original_auth(self, src)
|
||||
|
||||
monkeypatch.setattr(GatewayRunner, "_is_user_authorized", tracking_auth)
|
||||
|
||||
# _handle_message will proceed past auth check and eventually fail on
|
||||
# downstream logic. We just need to verify auth is skipped.
|
||||
try:
|
||||
await runner._handle_message(event)
|
||||
except Exception:
|
||||
pass # Expected — downstream code needs more setup
|
||||
|
||||
assert not auth_called, (
|
||||
"_is_user_authorized should NOT be called for internal events"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_internal_event_does_not_trigger_pairing(monkeypatch, tmp_path):
|
||||
"""An internal event with no user_id must not generate a pairing code."""
|
||||
import gateway.run as gateway_run
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
||||
|
||||
runner = GatewayRunner(GatewayConfig())
|
||||
# Add adapter so pairing would have somewhere to send
|
||||
adapter = SimpleNamespace(send=AsyncMock())
|
||||
runner.adapters[Platform.DISCORD] = adapter
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.DISCORD,
|
||||
chat_id="123",
|
||||
chat_type="dm", # DM would normally trigger pairing
|
||||
)
|
||||
event = MessageEvent(
|
||||
text="[SYSTEM: Background process completed]",
|
||||
source=source,
|
||||
internal=True,
|
||||
)
|
||||
|
||||
# Track pairing code generation
|
||||
generate_called = False
|
||||
original_generate = runner.pairing_store.generate_code
|
||||
|
||||
def tracking_generate(*args, **kwargs):
|
||||
nonlocal generate_called
|
||||
generate_called = True
|
||||
return original_generate(*args, **kwargs)
|
||||
|
||||
runner.pairing_store.generate_code = tracking_generate
|
||||
|
||||
try:
|
||||
await runner._handle_message(event)
|
||||
except Exception:
|
||||
pass # Expected — downstream code needs more setup
|
||||
|
||||
assert not generate_called, (
|
||||
"Pairing code should NOT be generated for internal events"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
|
||||
"""Verify the normal (non-internal) path still triggers pairing for unknown users."""
|
||||
import gateway.run as gateway_run
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
||||
|
||||
# Clear env vars that could let all users through (loaded by
|
||||
# module-level dotenv in gateway/run.py from the real ~/.hermes/.env).
|
||||
monkeypatch.delenv("DISCORD_ALLOW_ALL_USERS", raising=False)
|
||||
monkeypatch.delenv("DISCORD_ALLOWED_USERS", raising=False)
|
||||
monkeypatch.delenv("GATEWAY_ALLOW_ALL_USERS", raising=False)
|
||||
monkeypatch.delenv("GATEWAY_ALLOWED_USERS", raising=False)
|
||||
|
||||
runner = GatewayRunner(GatewayConfig())
|
||||
adapter = SimpleNamespace(send=AsyncMock())
|
||||
runner.adapters[Platform.DISCORD] = adapter
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.DISCORD,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
user_id="unknown_user_999",
|
||||
)
|
||||
# Normal event (not internal)
|
||||
event = MessageEvent(
|
||||
text="hello",
|
||||
source=source,
|
||||
internal=False,
|
||||
)
|
||||
|
||||
result = await runner._handle_message(event)
|
||||
|
||||
# Should return None (unauthorized) and send pairing message
|
||||
assert result is None
|
||||
assert adapter.send.await_count == 1
|
||||
sent_text = adapter.send.await_args.args[1]
|
||||
assert "don't recognize you" in sent_text
|
||||
@@ -38,10 +38,11 @@ def _make_timeout_error() -> httpx.TimeoutException:
|
||||
# cache_image_from_url (base.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@patch("tools.url_safety.is_safe_url", return_value=True)
|
||||
class TestCacheImageFromUrl:
|
||||
"""Tests for gateway.platforms.base.cache_image_from_url"""
|
||||
|
||||
def test_success_on_first_attempt(self, tmp_path, monkeypatch):
|
||||
def test_success_on_first_attempt(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A clean 200 response caches the image and returns a path."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -65,7 +66,7 @@ class TestCacheImageFromUrl:
|
||||
assert path.endswith(".jpg")
|
||||
mock_client.get.assert_called_once()
|
||||
|
||||
def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch):
|
||||
def test_retries_on_timeout_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A timeout on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -95,7 +96,7 @@ class TestCacheImageFromUrl:
|
||||
assert mock_client.get.call_count == 2
|
||||
mock_sleep.assert_called_once()
|
||||
|
||||
def test_retries_on_429_then_succeeds(self, tmp_path, monkeypatch):
|
||||
def test_retries_on_429_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A 429 response on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -122,7 +123,7 @@ class TestCacheImageFromUrl:
|
||||
assert path.endswith(".jpg")
|
||||
assert mock_client.get.call_count == 2
|
||||
|
||||
def test_raises_after_max_retries_exhausted(self, tmp_path, monkeypatch):
|
||||
def test_raises_after_max_retries_exhausted(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""Timeout on every attempt raises after all retries are consumed."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -145,7 +146,7 @@ class TestCacheImageFromUrl:
|
||||
# 3 total calls: initial + 2 retries
|
||||
assert mock_client.get.call_count == 3
|
||||
|
||||
def test_non_retryable_4xx_raises_immediately(self, tmp_path, monkeypatch):
|
||||
def test_non_retryable_4xx_raises_immediately(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A 404 (non-retryable) is raised immediately without any retry."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -175,10 +176,11 @@ class TestCacheImageFromUrl:
|
||||
# cache_audio_from_url (base.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@patch("tools.url_safety.is_safe_url", return_value=True)
|
||||
class TestCacheAudioFromUrl:
|
||||
"""Tests for gateway.platforms.base.cache_audio_from_url"""
|
||||
|
||||
def test_success_on_first_attempt(self, tmp_path, monkeypatch):
|
||||
def test_success_on_first_attempt(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A clean 200 response caches the audio and returns a path."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -202,7 +204,7 @@ class TestCacheAudioFromUrl:
|
||||
assert path.endswith(".ogg")
|
||||
mock_client.get.assert_called_once()
|
||||
|
||||
def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch):
|
||||
def test_retries_on_timeout_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A timeout on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -232,7 +234,7 @@ class TestCacheAudioFromUrl:
|
||||
assert mock_client.get.call_count == 2
|
||||
mock_sleep.assert_called_once()
|
||||
|
||||
def test_retries_on_429_then_succeeds(self, tmp_path, monkeypatch):
|
||||
def test_retries_on_429_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A 429 response on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -259,7 +261,7 @@ class TestCacheAudioFromUrl:
|
||||
assert path.endswith(".ogg")
|
||||
assert mock_client.get.call_count == 2
|
||||
|
||||
def test_retries_on_500_then_succeeds(self, tmp_path, monkeypatch):
|
||||
def test_retries_on_500_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A 500 response on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -286,7 +288,7 @@ class TestCacheAudioFromUrl:
|
||||
assert path.endswith(".ogg")
|
||||
assert mock_client.get.call_count == 2
|
||||
|
||||
def test_raises_after_max_retries_exhausted(self, tmp_path, monkeypatch):
|
||||
def test_raises_after_max_retries_exhausted(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""Timeout on every attempt raises after all retries are consumed."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -309,7 +311,7 @@ class TestCacheAudioFromUrl:
|
||||
# 3 total calls: initial + 2 retries
|
||||
assert mock_client.get.call_count == 3
|
||||
|
||||
def test_non_retryable_4xx_raises_immediately(self, tmp_path, monkeypatch):
|
||||
def test_non_retryable_4xx_raises_immediately(self, _mock_safe, tmp_path, monkeypatch):
|
||||
"""A 404 (non-retryable) is raised immediately without any retry."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
|
||||
@@ -707,3 +707,66 @@ class TestSignalSendDocumentViaHelper:
|
||||
|
||||
assert result.success is False
|
||||
assert "/nonexistent.pdf" in result.error
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# send() returns message_id from timestamp (#4647)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSignalSendReturnsMessageId:
|
||||
"""Signal send() must return a timestamp-based message_id so the stream
|
||||
consumer can follow its edit→fallback path correctly."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_returns_timestamp_as_message_id(self, monkeypatch):
|
||||
adapter = _make_signal_adapter(monkeypatch)
|
||||
mock_rpc, _ = _stub_rpc({"timestamp": 1712345678000})
|
||||
adapter._rpc = mock_rpc
|
||||
adapter._stop_typing_indicator = AsyncMock()
|
||||
|
||||
result = await adapter.send(chat_id="+155****4567", content="hello")
|
||||
|
||||
assert result.success is True
|
||||
assert result.message_id == "1712345678000"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_returns_none_message_id_when_no_timestamp(self, monkeypatch):
|
||||
adapter = _make_signal_adapter(monkeypatch)
|
||||
mock_rpc, _ = _stub_rpc({}) # No timestamp key
|
||||
adapter._rpc = mock_rpc
|
||||
adapter._stop_typing_indicator = AsyncMock()
|
||||
|
||||
result = await adapter.send(chat_id="+155****4567", content="hello")
|
||||
|
||||
assert result.success is True
|
||||
assert result.message_id is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_returns_none_message_id_for_non_dict(self, monkeypatch):
|
||||
adapter = _make_signal_adapter(monkeypatch)
|
||||
mock_rpc, _ = _stub_rpc("ok") # Non-dict result
|
||||
adapter._rpc = mock_rpc
|
||||
adapter._stop_typing_indicator = AsyncMock()
|
||||
|
||||
result = await adapter.send(chat_id="+155****4567", content="hello")
|
||||
|
||||
assert result.success is True
|
||||
assert result.message_id is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# stop_typing() delegates to _stop_typing_indicator (#4647)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSignalStopTyping:
|
||||
"""Signal must expose a public stop_typing() so base adapter's
|
||||
_keep_typing finally block can clean up platform-level typing tasks."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_typing_calls_private_method(self, monkeypatch):
|
||||
adapter = _make_signal_adapter(monkeypatch)
|
||||
adapter._stop_typing_indicator = AsyncMock()
|
||||
|
||||
await adapter.stop_typing("+155****4567")
|
||||
|
||||
adapter._stop_typing_indicator.assert_awaited_once_with("+155****4567")
|
||||
|
||||
+111
-1
@@ -96,7 +96,7 @@ class TestAppMentionHandler:
|
||||
"""Verify that the app_mention event handler is registered."""
|
||||
|
||||
def test_app_mention_registered_on_connect(self):
|
||||
"""connect() should register both 'message' and 'app_mention' handlers."""
|
||||
"""connect() should register message + assistant lifecycle handlers."""
|
||||
config = PlatformConfig(enabled=True, token="xoxb-fake")
|
||||
adapter = SlackAdapter(config)
|
||||
|
||||
@@ -145,6 +145,8 @@ class TestAppMentionHandler:
|
||||
|
||||
assert "message" in registered_events
|
||||
assert "app_mention" in registered_events
|
||||
assert "assistant_thread_started" in registered_events
|
||||
assert "assistant_thread_context_changed" in registered_events
|
||||
assert "/hermes" in registered_commands
|
||||
|
||||
|
||||
@@ -840,6 +842,114 @@ class TestThreadReplyHandling:
|
||||
adapter.handle_message.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestAssistantThreadLifecycle
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAssistantThreadLifecycle:
|
||||
"""Slack Assistant lifecycle events should seed session/user context."""
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_session_store(self):
|
||||
store = MagicMock()
|
||||
store._entries = {}
|
||||
store._ensure_loaded = MagicMock()
|
||||
store.config = MagicMock()
|
||||
store.config.group_sessions_per_user = True
|
||||
store.get_or_create_session = MagicMock()
|
||||
return store
|
||||
|
||||
@pytest.fixture()
|
||||
def assistant_adapter(self, mock_session_store):
|
||||
config = PlatformConfig(enabled=True, token="***")
|
||||
a = SlackAdapter(config)
|
||||
a._app = MagicMock()
|
||||
a._app.client = AsyncMock()
|
||||
a._bot_user_id = "U_BOT"
|
||||
a._team_bot_user_ids = {"T_TEAM": "U_BOT"}
|
||||
a._running = True
|
||||
a.handle_message = AsyncMock()
|
||||
a.set_session_store(mock_session_store)
|
||||
return a
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lifecycle_event_seeds_session_store(self, assistant_adapter, mock_session_store):
|
||||
event = {
|
||||
"type": "assistant_thread_started",
|
||||
"team_id": "T_TEAM",
|
||||
"assistant_thread": {
|
||||
"channel_id": "D123",
|
||||
"thread_ts": "171.000",
|
||||
"user_id": "U_USER",
|
||||
"context": {"channel_id": "C_ORIGIN"},
|
||||
},
|
||||
}
|
||||
|
||||
await assistant_adapter._handle_assistant_thread_lifecycle_event(event)
|
||||
|
||||
assert assistant_adapter._assistant_threads[("D123", "171.000")]["user_id"] == "U_USER"
|
||||
mock_session_store.get_or_create_session.assert_called_once()
|
||||
source = mock_session_store.get_or_create_session.call_args[0][0]
|
||||
assert source.chat_id == "D123"
|
||||
assert source.chat_type == "dm"
|
||||
assert source.user_id == "U_USER"
|
||||
assert source.thread_id == "171.000"
|
||||
assert source.chat_topic == "C_ORIGIN"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_uses_cached_assistant_thread_identity(self, assistant_adapter):
|
||||
assistant_adapter._assistant_threads[("D123", "171.000")] = {
|
||||
"channel_id": "D123",
|
||||
"thread_ts": "171.000",
|
||||
"user_id": "U_USER",
|
||||
"team_id": "T_TEAM",
|
||||
}
|
||||
assistant_adapter._app.client.users_info = AsyncMock(return_value={
|
||||
"user": {"profile": {"display_name": "Tyler"}}
|
||||
})
|
||||
assistant_adapter._app.client.reactions_add = AsyncMock()
|
||||
assistant_adapter._app.client.reactions_remove = AsyncMock()
|
||||
|
||||
event = {
|
||||
"text": "hello from assistant dm",
|
||||
"channel": "D123",
|
||||
"channel_type": "im",
|
||||
"thread_ts": "171.000",
|
||||
"ts": "171.111",
|
||||
"team": "T_TEAM",
|
||||
}
|
||||
|
||||
await assistant_adapter._handle_slack_message(event)
|
||||
|
||||
msg_event = assistant_adapter.handle_message.call_args[0][0]
|
||||
assert msg_event.source.user_id == "U_USER"
|
||||
assert msg_event.source.thread_id == "171.000"
|
||||
assert msg_event.source.user_name == "Tyler"
|
||||
|
||||
def test_assistant_threads_cache_eviction(self, assistant_adapter):
|
||||
"""Cache should evict oldest entries when exceeding the size limit."""
|
||||
assistant_adapter._ASSISTANT_THREADS_MAX = 10
|
||||
# Fill to the limit
|
||||
for i in range(10):
|
||||
assistant_adapter._cache_assistant_thread_metadata({
|
||||
"channel_id": f"D{i}",
|
||||
"thread_ts": f"{i}.000",
|
||||
"user_id": f"U{i}",
|
||||
})
|
||||
assert len(assistant_adapter._assistant_threads) == 10
|
||||
|
||||
# Adding one more should trigger eviction (down to max // 2 = 5)
|
||||
assistant_adapter._cache_assistant_thread_metadata({
|
||||
"channel_id": "D999",
|
||||
"thread_ts": "999.000",
|
||||
"user_id": "U999",
|
||||
})
|
||||
assert len(assistant_adapter._assistant_threads) <= 10
|
||||
# The newest entry must survive eviction
|
||||
assert ("D999", "999.000") in assistant_adapter._assistant_threads
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestUserNameResolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -383,6 +383,60 @@ class TestSegmentBreakOnToolBoundary:
|
||||
sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
|
||||
assert sent_texts == ["Hello ▉", "Next segment"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_message_id_enters_fallback_mode(self):
|
||||
"""Platform returns success but no message_id (Signal) — must not
|
||||
re-send on every delta. Should enter fallback mode and send only
|
||||
the continuation at finish."""
|
||||
adapter = MagicMock()
|
||||
# First send succeeds but returns no message_id (Signal behavior)
|
||||
send_result_no_id = SimpleNamespace(success=True, message_id=None)
|
||||
# Fallback final send succeeds
|
||||
send_result_final = SimpleNamespace(success=True, message_id="msg_final")
|
||||
adapter.send = AsyncMock(side_effect=[send_result_no_id, send_result_final])
|
||||
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
|
||||
adapter.MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
|
||||
consumer = GatewayStreamConsumer(adapter, "chat_123", config)
|
||||
|
||||
consumer.on_delta("Hello")
|
||||
task = asyncio.create_task(consumer.run())
|
||||
await asyncio.sleep(0.08)
|
||||
consumer.on_delta(" world, this is a longer response.")
|
||||
await asyncio.sleep(0.08)
|
||||
consumer.finish()
|
||||
await task
|
||||
|
||||
# Should send exactly 2 messages: initial chunk + fallback continuation
|
||||
# NOT one message per delta
|
||||
assert adapter.send.call_count == 2
|
||||
assert consumer.already_sent
|
||||
# edit_message should NOT have been called (no valid message_id to edit)
|
||||
adapter.edit_message.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_message_id_single_delta_marks_already_sent(self):
|
||||
"""When the entire response fits in one delta and platform returns no
|
||||
message_id, already_sent must still be True to prevent the gateway
|
||||
from re-sending the full response."""
|
||||
adapter = MagicMock()
|
||||
send_result = SimpleNamespace(success=True, message_id=None)
|
||||
adapter.send = AsyncMock(return_value=send_result)
|
||||
adapter.MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
|
||||
consumer = GatewayStreamConsumer(adapter, "chat_123", config)
|
||||
|
||||
consumer.on_delta("Short response.")
|
||||
consumer.finish()
|
||||
|
||||
await consumer.run()
|
||||
|
||||
assert consumer.already_sent
|
||||
# Only one send call (the initial message)
|
||||
assert adapter.send.call_count == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fallback_final_splits_long_continuation_without_dropping_text(self):
|
||||
"""Long continuation tails should be chunked when fallback final-send runs."""
|
||||
|
||||
@@ -4,7 +4,7 @@ import base64
|
||||
import os
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -355,7 +355,8 @@ class TestMediaUpload:
|
||||
assert calls[3][1]["chunk_index"] == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_remote_bytes_rejects_large_content_length(self):
|
||||
@patch("tools.url_safety.is_safe_url", return_value=True)
|
||||
async def test_download_remote_bytes_rejects_large_content_length(self, _mock_safe):
|
||||
from gateway.platforms.wecom import WeComAdapter
|
||||
|
||||
class FakeResponse:
|
||||
|
||||
@@ -628,14 +628,21 @@ class TestHasAnyProviderConfigured:
|
||||
def test_claude_code_creds_ignored_on_fresh_install(self, monkeypatch, tmp_path):
|
||||
"""Claude Code credentials should NOT skip the wizard when Hermes is unconfigured."""
|
||||
from hermes_cli import config as config_module
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
|
||||
monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
|
||||
# Clear all provider env vars so earlier checks don't short-circuit
|
||||
for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
|
||||
_all_vars = {"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN", "OPENAI_BASE_URL"}
|
||||
for pconfig in PROVIDER_REGISTRY.values():
|
||||
if pconfig.auth_type == "api_key":
|
||||
_all_vars.update(pconfig.api_key_env_vars)
|
||||
for var in _all_vars:
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
# Prevent gh-cli / copilot auth fallback from leaking in
|
||||
monkeypatch.setattr("hermes_cli.auth.get_auth_status", lambda _pid: {})
|
||||
# Simulate valid Claude Code credentials
|
||||
monkeypatch.setattr(
|
||||
"agent.anthropic_adapter.read_claude_code_credentials",
|
||||
@@ -710,6 +717,7 @@ class TestHasAnyProviderConfigured:
|
||||
"""config.yaml model dict with empty default and no creds stays false."""
|
||||
import yaml
|
||||
from hermes_cli import config as config_module
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_file = hermes_home / "config.yaml"
|
||||
@@ -719,9 +727,15 @@ class TestHasAnyProviderConfigured:
|
||||
monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
|
||||
monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
|
||||
_all_vars = {"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN", "OPENAI_BASE_URL"}
|
||||
for pconfig in PROVIDER_REGISTRY.values():
|
||||
if pconfig.auth_type == "api_key":
|
||||
_all_vars.update(pconfig.api_key_env_vars)
|
||||
for var in _all_vars:
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
# Prevent gh-cli / copilot auth fallback from leaking in
|
||||
monkeypatch.setattr("hermes_cli.auth.get_auth_status", lambda _pid: {})
|
||||
from hermes_cli.main import _has_any_provider_configured
|
||||
assert _has_any_provider_configured() is False
|
||||
|
||||
@@ -941,9 +955,10 @@ class TestHuggingFaceModels:
|
||||
"""Every HF model should have a context length entry."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
|
||||
lower_keys = {k.lower() for k in DEFAULT_CONTEXT_LENGTHS}
|
||||
hf_models = _PROVIDER_MODELS["huggingface"]
|
||||
for model in hf_models:
|
||||
assert model in DEFAULT_CONTEXT_LENGTHS, (
|
||||
assert model.lower() in lower_keys, (
|
||||
f"HF model {model!r} missing from DEFAULT_CONTEXT_LENGTHS"
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,399 @@
|
||||
"""Tests for Qwen OAuth provider authentication (hermes_cli/auth.py).
|
||||
|
||||
Covers: _qwen_cli_auth_path, _read_qwen_cli_tokens, _save_qwen_cli_tokens,
|
||||
_qwen_access_token_is_expiring, _refresh_qwen_cli_tokens,
|
||||
resolve_qwen_runtime_credentials, get_qwen_auth_status.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import stat
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli.auth import (
|
||||
AuthError,
|
||||
DEFAULT_QWEN_BASE_URL,
|
||||
QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
_qwen_cli_auth_path,
|
||||
_read_qwen_cli_tokens,
|
||||
_save_qwen_cli_tokens,
|
||||
_qwen_access_token_is_expiring,
|
||||
_refresh_qwen_cli_tokens,
|
||||
resolve_qwen_runtime_credentials,
|
||||
get_qwen_auth_status,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_qwen_tokens(
|
||||
access_token="test-access-token",
|
||||
refresh_token="test-refresh-token",
|
||||
expiry_date=None,
|
||||
**extra,
|
||||
):
|
||||
"""Create a minimal Qwen CLI OAuth credential dict."""
|
||||
if expiry_date is None:
|
||||
# 1 hour from now in milliseconds
|
||||
expiry_date = int((time.time() + 3600) * 1000)
|
||||
data = {
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
"token_type": "Bearer",
|
||||
"expiry_date": expiry_date,
|
||||
"resource_url": "portal.qwen.ai",
|
||||
}
|
||||
data.update(extra)
|
||||
return data
|
||||
|
||||
|
||||
def _write_qwen_creds(tmp_path, tokens=None):
|
||||
"""Write tokens to the Qwen CLI credentials file and return the path."""
|
||||
qwen_dir = tmp_path / ".qwen"
|
||||
qwen_dir.mkdir(parents=True, exist_ok=True)
|
||||
creds_path = qwen_dir / "oauth_creds.json"
|
||||
if tokens is None:
|
||||
tokens = _make_qwen_tokens()
|
||||
creds_path.write_text(json.dumps(tokens), encoding="utf-8")
|
||||
return creds_path
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def qwen_env(tmp_path, monkeypatch):
|
||||
"""Redirect _qwen_cli_auth_path to tmp_path/.qwen/oauth_creds.json."""
|
||||
creds_path = tmp_path / ".qwen" / "oauth_creds.json"
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth._qwen_cli_auth_path", lambda: creds_path
|
||||
)
|
||||
return tmp_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _qwen_cli_auth_path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_qwen_cli_auth_path_returns_expected_location():
|
||||
path = _qwen_cli_auth_path()
|
||||
assert path == Path.home() / ".qwen" / "oauth_creds.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _read_qwen_cli_tokens
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_read_qwen_cli_tokens_success(qwen_env):
|
||||
tokens = _make_qwen_tokens(access_token="my-access")
|
||||
_write_qwen_creds(qwen_env, tokens)
|
||||
result = _read_qwen_cli_tokens()
|
||||
assert result["access_token"] == "my-access"
|
||||
assert result["refresh_token"] == "test-refresh-token"
|
||||
|
||||
|
||||
def test_read_qwen_cli_tokens_missing_file(qwen_env):
|
||||
with pytest.raises(AuthError) as exc:
|
||||
_read_qwen_cli_tokens()
|
||||
assert exc.value.code == "qwen_auth_missing"
|
||||
|
||||
|
||||
def test_read_qwen_cli_tokens_invalid_json(qwen_env):
|
||||
creds_path = qwen_env / ".qwen" / "oauth_creds.json"
|
||||
creds_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
creds_path.write_text("not json{{{", encoding="utf-8")
|
||||
with pytest.raises(AuthError) as exc:
|
||||
_read_qwen_cli_tokens()
|
||||
assert exc.value.code == "qwen_auth_read_failed"
|
||||
|
||||
|
||||
def test_read_qwen_cli_tokens_non_dict(qwen_env):
|
||||
creds_path = qwen_env / ".qwen" / "oauth_creds.json"
|
||||
creds_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
creds_path.write_text(json.dumps(["a", "b"]), encoding="utf-8")
|
||||
with pytest.raises(AuthError) as exc:
|
||||
_read_qwen_cli_tokens()
|
||||
assert exc.value.code == "qwen_auth_invalid"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _save_qwen_cli_tokens
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_save_qwen_cli_tokens_roundtrip(qwen_env):
|
||||
tokens = _make_qwen_tokens(access_token="saved-token")
|
||||
saved_path = _save_qwen_cli_tokens(tokens)
|
||||
assert saved_path.exists()
|
||||
loaded = json.loads(saved_path.read_text(encoding="utf-8"))
|
||||
assert loaded["access_token"] == "saved-token"
|
||||
|
||||
|
||||
def test_save_qwen_cli_tokens_creates_parent(qwen_env):
|
||||
tokens = _make_qwen_tokens()
|
||||
saved_path = _save_qwen_cli_tokens(tokens)
|
||||
assert saved_path.parent.exists()
|
||||
|
||||
|
||||
def test_save_qwen_cli_tokens_permissions(qwen_env):
|
||||
tokens = _make_qwen_tokens()
|
||||
saved_path = _save_qwen_cli_tokens(tokens)
|
||||
mode = saved_path.stat().st_mode
|
||||
assert mode & stat.S_IRUSR # owner read
|
||||
assert mode & stat.S_IWUSR # owner write
|
||||
assert not (mode & stat.S_IRGRP) # no group read
|
||||
assert not (mode & stat.S_IROTH) # no other read
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _qwen_access_token_is_expiring
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_expiring_token_not_expired():
|
||||
# 1 hour from now in milliseconds
|
||||
future_ms = int((time.time() + 3600) * 1000)
|
||||
assert not _qwen_access_token_is_expiring(future_ms)
|
||||
|
||||
|
||||
def test_expiring_token_already_expired():
|
||||
# 1 hour ago in milliseconds
|
||||
past_ms = int((time.time() - 3600) * 1000)
|
||||
assert _qwen_access_token_is_expiring(past_ms)
|
||||
|
||||
|
||||
def test_expiring_token_within_skew():
|
||||
# Just inside the default skew window
|
||||
near_ms = int((time.time() + QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS - 5) * 1000)
|
||||
assert _qwen_access_token_is_expiring(near_ms)
|
||||
|
||||
|
||||
def test_expiring_token_none_returns_true():
|
||||
assert _qwen_access_token_is_expiring(None)
|
||||
|
||||
|
||||
def test_expiring_token_non_numeric_returns_true():
|
||||
assert _qwen_access_token_is_expiring("not-a-number")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _refresh_qwen_cli_tokens
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_refresh_qwen_cli_tokens_success(qwen_env):
|
||||
tokens = _make_qwen_tokens(refresh_token="old-refresh")
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
resp.json.return_value = {
|
||||
"access_token": "new-access",
|
||||
"refresh_token": "new-refresh",
|
||||
"expires_in": 7200,
|
||||
}
|
||||
|
||||
with patch("hermes_cli.auth.httpx") as mock_httpx:
|
||||
mock_httpx.post.return_value = resp
|
||||
result = _refresh_qwen_cli_tokens(tokens)
|
||||
|
||||
assert result["access_token"] == "new-access"
|
||||
assert result["refresh_token"] == "new-refresh"
|
||||
assert "expiry_date" in result
|
||||
|
||||
|
||||
def test_refresh_qwen_cli_tokens_preserves_old_refresh_if_not_in_response(qwen_env):
|
||||
tokens = _make_qwen_tokens(refresh_token="keep-me")
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
resp.json.return_value = {
|
||||
"access_token": "new-access",
|
||||
# No refresh_token in response — should keep old one
|
||||
"expires_in": 3600,
|
||||
}
|
||||
|
||||
with patch("hermes_cli.auth.httpx") as mock_httpx:
|
||||
mock_httpx.post.return_value = resp
|
||||
result = _refresh_qwen_cli_tokens(tokens)
|
||||
|
||||
assert result["refresh_token"] == "keep-me"
|
||||
|
||||
|
||||
def test_refresh_qwen_cli_tokens_missing_refresh_token():
|
||||
tokens = {"access_token": "at", "refresh_token": ""}
|
||||
with pytest.raises(AuthError) as exc:
|
||||
_refresh_qwen_cli_tokens(tokens)
|
||||
assert exc.value.code == "qwen_refresh_token_missing"
|
||||
|
||||
|
||||
def test_refresh_qwen_cli_tokens_http_error(qwen_env):
|
||||
tokens = _make_qwen_tokens()
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 401
|
||||
resp.text = "unauthorized"
|
||||
|
||||
with patch("hermes_cli.auth.httpx") as mock_httpx:
|
||||
mock_httpx.post.return_value = resp
|
||||
with pytest.raises(AuthError) as exc:
|
||||
_refresh_qwen_cli_tokens(tokens)
|
||||
assert exc.value.code == "qwen_refresh_failed"
|
||||
|
||||
|
||||
def test_refresh_qwen_cli_tokens_network_error(qwen_env):
|
||||
tokens = _make_qwen_tokens()
|
||||
|
||||
with patch("hermes_cli.auth.httpx") as mock_httpx:
|
||||
mock_httpx.post.side_effect = ConnectionError("timeout")
|
||||
with pytest.raises(AuthError) as exc:
|
||||
_refresh_qwen_cli_tokens(tokens)
|
||||
assert exc.value.code == "qwen_refresh_failed"
|
||||
|
||||
|
||||
def test_refresh_qwen_cli_tokens_invalid_json_response(qwen_env):
|
||||
tokens = _make_qwen_tokens()
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
resp.json.side_effect = ValueError("bad json")
|
||||
|
||||
with patch("hermes_cli.auth.httpx") as mock_httpx:
|
||||
mock_httpx.post.return_value = resp
|
||||
with pytest.raises(AuthError) as exc:
|
||||
_refresh_qwen_cli_tokens(tokens)
|
||||
assert exc.value.code == "qwen_refresh_invalid_json"
|
||||
|
||||
|
||||
def test_refresh_qwen_cli_tokens_missing_access_token_in_response(qwen_env):
|
||||
tokens = _make_qwen_tokens()
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
resp.json.return_value = {"something": "but no access_token"}
|
||||
|
||||
with patch("hermes_cli.auth.httpx") as mock_httpx:
|
||||
mock_httpx.post.return_value = resp
|
||||
with pytest.raises(AuthError) as exc:
|
||||
_refresh_qwen_cli_tokens(tokens)
|
||||
assert exc.value.code == "qwen_refresh_invalid_response"
|
||||
|
||||
|
||||
def test_refresh_qwen_cli_tokens_default_expires_in(qwen_env):
|
||||
"""When expires_in is missing, default to 6 hours."""
|
||||
tokens = _make_qwen_tokens()
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
resp.json.return_value = {"access_token": "new"}
|
||||
|
||||
with patch("hermes_cli.auth.httpx") as mock_httpx:
|
||||
mock_httpx.post.return_value = resp
|
||||
result = _refresh_qwen_cli_tokens(tokens)
|
||||
|
||||
# Verify expiry_date is roughly now + 6h (within 60s tolerance)
|
||||
expected_ms = int(time.time() * 1000) + 6 * 60 * 60 * 1000
|
||||
assert abs(result["expiry_date"] - expected_ms) < 60_000
|
||||
|
||||
|
||||
def test_refresh_qwen_cli_tokens_saves_to_disk(qwen_env):
|
||||
tokens = _make_qwen_tokens()
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
resp.json.return_value = {
|
||||
"access_token": "disk-check",
|
||||
"expires_in": 3600,
|
||||
}
|
||||
|
||||
with patch("hermes_cli.auth.httpx") as mock_httpx:
|
||||
mock_httpx.post.return_value = resp
|
||||
_refresh_qwen_cli_tokens(tokens)
|
||||
|
||||
# Verify it was persisted
|
||||
creds_path = qwen_env / ".qwen" / "oauth_creds.json"
|
||||
assert creds_path.exists()
|
||||
saved = json.loads(creds_path.read_text(encoding="utf-8"))
|
||||
assert saved["access_token"] == "disk-check"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# resolve_qwen_runtime_credentials
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_resolve_qwen_runtime_credentials_fresh_token(qwen_env):
|
||||
tokens = _make_qwen_tokens(access_token="fresh-at")
|
||||
_write_qwen_creds(qwen_env, tokens)
|
||||
|
||||
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
|
||||
assert creds["provider"] == "qwen-oauth"
|
||||
assert creds["api_key"] == "fresh-at"
|
||||
assert creds["base_url"] == DEFAULT_QWEN_BASE_URL
|
||||
assert creds["source"] == "qwen-cli"
|
||||
|
||||
|
||||
def test_resolve_qwen_runtime_credentials_triggers_refresh(qwen_env):
|
||||
# Write an expired token
|
||||
expired_ms = int((time.time() - 3600) * 1000)
|
||||
tokens = _make_qwen_tokens(access_token="old", expiry_date=expired_ms)
|
||||
_write_qwen_creds(qwen_env, tokens)
|
||||
|
||||
refreshed = _make_qwen_tokens(access_token="refreshed-at")
|
||||
|
||||
with patch(
|
||||
"hermes_cli.auth._refresh_qwen_cli_tokens", return_value=refreshed
|
||||
) as mock_refresh:
|
||||
creds = resolve_qwen_runtime_credentials()
|
||||
mock_refresh.assert_called_once()
|
||||
assert creds["api_key"] == "refreshed-at"
|
||||
|
||||
|
||||
def test_resolve_qwen_runtime_credentials_force_refresh(qwen_env):
|
||||
tokens = _make_qwen_tokens(access_token="old-at")
|
||||
_write_qwen_creds(qwen_env, tokens)
|
||||
|
||||
refreshed = _make_qwen_tokens(access_token="force-refreshed")
|
||||
|
||||
with patch(
|
||||
"hermes_cli.auth._refresh_qwen_cli_tokens", return_value=refreshed
|
||||
) as mock_refresh:
|
||||
creds = resolve_qwen_runtime_credentials(force_refresh=True)
|
||||
mock_refresh.assert_called_once()
|
||||
assert creds["api_key"] == "force-refreshed"
|
||||
|
||||
|
||||
def test_resolve_qwen_runtime_credentials_missing_access_token(qwen_env):
|
||||
tokens = _make_qwen_tokens(access_token="")
|
||||
_write_qwen_creds(qwen_env, tokens)
|
||||
|
||||
with pytest.raises(AuthError) as exc:
|
||||
resolve_qwen_runtime_credentials(refresh_if_expiring=False)
|
||||
assert exc.value.code == "qwen_access_token_missing"
|
||||
|
||||
|
||||
def test_resolve_qwen_runtime_credentials_base_url_env_override(qwen_env, monkeypatch):
|
||||
tokens = _make_qwen_tokens(access_token="at")
|
||||
_write_qwen_creds(qwen_env, tokens)
|
||||
monkeypatch.setenv("HERMES_QWEN_BASE_URL", "https://custom.qwen.ai/v1")
|
||||
|
||||
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
|
||||
assert creds["base_url"] == "https://custom.qwen.ai/v1"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_qwen_auth_status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_get_qwen_auth_status_logged_in(qwen_env):
|
||||
tokens = _make_qwen_tokens(access_token="status-at")
|
||||
_write_qwen_creds(qwen_env, tokens)
|
||||
|
||||
status = get_qwen_auth_status()
|
||||
assert status["logged_in"] is True
|
||||
assert status["api_key"] == "status-at"
|
||||
|
||||
|
||||
def test_get_qwen_auth_status_not_logged_in(qwen_env):
|
||||
# No credentials file
|
||||
status = get_qwen_auth_status()
|
||||
assert status["logged_in"] is False
|
||||
assert "error" in status
|
||||
@@ -425,8 +425,8 @@ class TestSlashCommandCompleter:
|
||||
class TestSubcommands:
|
||||
def test_explicit_subcommands_extracted(self):
|
||||
"""Commands with explicit subcommands on CommandDef are extracted."""
|
||||
assert "/prompt" in SUBCOMMANDS
|
||||
assert "clear" in SUBCOMMANDS["/prompt"]
|
||||
assert "/skills" in SUBCOMMANDS
|
||||
assert "install" in SUBCOMMANDS["/skills"]
|
||||
|
||||
def test_reasoning_has_subcommands(self):
|
||||
assert "/reasoning" in SUBCOMMANDS
|
||||
|
||||
@@ -136,3 +136,73 @@ def test_check_gateway_service_linger_skips_when_service_not_installed(monkeypat
|
||||
out = capsys.readouterr().out
|
||||
assert out == ""
|
||||
assert issues == []
|
||||
|
||||
|
||||
# ── Memory provider section (doctor should only check the *active* provider) ──
|
||||
|
||||
|
||||
class TestDoctorMemoryProviderSection:
|
||||
"""The ◆ Memory Provider section should respect memory.provider config."""
|
||||
|
||||
def _make_hermes_home(self, tmp_path, provider=""):
|
||||
"""Create a minimal HERMES_HOME with config.yaml."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
import yaml
|
||||
config = {"memory": {"provider": provider}} if provider else {"memory": {}}
|
||||
(home / "config.yaml").write_text(yaml.dump(config))
|
||||
return home
|
||||
|
||||
def _run_doctor_and_capture(self, monkeypatch, tmp_path, provider=""):
|
||||
"""Run doctor and capture stdout."""
|
||||
home = self._make_hermes_home(tmp_path, provider)
|
||||
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
|
||||
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project")
|
||||
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
|
||||
(tmp_path / "project").mkdir(exist_ok=True)
|
||||
|
||||
# Stub tool availability (returns empty) so doctor runs past it
|
||||
fake_model_tools = types.SimpleNamespace(
|
||||
check_tool_availability=lambda *a, **kw: ([], []),
|
||||
TOOLSET_REQUIREMENTS={},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||
|
||||
# Stub auth checks to avoid real API calls
|
||||
try:
|
||||
from hermes_cli import auth as _auth_mod
|
||||
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
|
||||
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
import io, contextlib
|
||||
buf = io.StringIO()
|
||||
with contextlib.redirect_stdout(buf):
|
||||
doctor_mod.run_doctor(Namespace(fix=False))
|
||||
return buf.getvalue()
|
||||
|
||||
def test_no_provider_shows_builtin_ok(self, monkeypatch, tmp_path):
|
||||
out = self._run_doctor_and_capture(monkeypatch, tmp_path, provider="")
|
||||
assert "Memory Provider" in out
|
||||
assert "Built-in memory active" in out
|
||||
# Should NOT mention Honcho or Mem0 errors
|
||||
assert "Honcho API key" not in out
|
||||
assert "Mem0" not in out
|
||||
|
||||
def test_honcho_provider_not_installed_shows_fail(self, monkeypatch, tmp_path):
|
||||
# Make honcho import fail
|
||||
monkeypatch.setitem(
|
||||
sys.modules, "plugins.memory.honcho.client", None
|
||||
)
|
||||
out = self._run_doctor_and_capture(monkeypatch, tmp_path, provider="honcho")
|
||||
assert "Memory Provider" in out
|
||||
# Should show failure since honcho is set but not importable
|
||||
assert "Built-in memory active" not in out
|
||||
|
||||
def test_mem0_provider_not_installed_shows_fail(self, monkeypatch, tmp_path):
|
||||
# Make mem0 import fail
|
||||
monkeypatch.setitem(sys.modules, "plugins.memory.mem0", None)
|
||||
out = self._run_doctor_and_capture(monkeypatch, tmp_path, provider="mem0")
|
||||
assert "Memory Provider" in out
|
||||
assert "Built-in memory active" not in out
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
"""Tests for OpenRouter variant tag preservation in model switching.
|
||||
|
||||
Regression test for GitHub PR #6088 / Discord report: OpenRouter model IDs
|
||||
with variant suffixes like ``:free``, ``:extended``, ``:fast`` were being
|
||||
mangled by the colon-to-slash conversion in model_switch.py Step c.
|
||||
|
||||
The fix: Step c now skips colon→slash conversion when the model name already
|
||||
contains a forward slash (i.e. is already in ``vendor/model`` format), since
|
||||
the colon is a variant tag, not a vendor separator.
|
||||
"""
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.model_switch import switch_model
|
||||
|
||||
|
||||
# Shared mock context — skip network calls, credential resolution, catalog lookups
|
||||
_MOCK_VALIDATION = {"accepted": True, "persist": True, "recognized": True, "message": None}
|
||||
|
||||
|
||||
def _run_switch(raw_input: str, current_provider: str = "openrouter") -> str:
|
||||
"""Run switch_model with mocked dependencies, return the resolved model name."""
|
||||
with patch("hermes_cli.model_switch.resolve_alias", return_value=None), \
|
||||
patch("hermes_cli.model_switch.list_provider_models", return_value=[]), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value={"api_key": "test", "base_url": "", "api_mode": "chat_completions"}), \
|
||||
patch("hermes_cli.models.validate_requested_model", return_value=_MOCK_VALIDATION), \
|
||||
patch("hermes_cli.model_switch.get_model_info", return_value=None), \
|
||||
patch("hermes_cli.model_switch.get_model_capabilities", return_value=None), \
|
||||
patch("hermes_cli.models.detect_provider_for_model", return_value=None):
|
||||
result = switch_model(
|
||||
raw_input=raw_input,
|
||||
current_provider=current_provider,
|
||||
current_model="anthropic/claude-sonnet-4.6",
|
||||
)
|
||||
assert result.success, f"switch_model failed: {result.error_message}"
|
||||
return result.new_model
|
||||
|
||||
|
||||
class TestVariantTagPreservation:
|
||||
"""OpenRouter variant tags (:free, :extended, :fast) must survive model switching."""
|
||||
|
||||
@pytest.mark.parametrize("model,expected", [
|
||||
("nvidia/nemotron-3-super-120b-a12b:free", "nvidia/nemotron-3-super-120b-a12b:free"),
|
||||
("anthropic/claude-sonnet-4.6:extended", "anthropic/claude-sonnet-4.6:extended"),
|
||||
("meta-llama/llama-4-maverick:fast", "meta-llama/llama-4-maverick:fast"),
|
||||
])
|
||||
def test_slash_format_preserves_variant_tag(self, model, expected):
|
||||
"""Models already in vendor/model:tag format must not have their tag mangled."""
|
||||
assert _run_switch(model) == expected
|
||||
|
||||
def test_legacy_colon_format_converts_to_slash(self):
|
||||
"""Legacy vendor:model (no slash) should still be converted to vendor/model."""
|
||||
result = _run_switch("nvidia:nemotron-3-super-120b-a12b")
|
||||
assert result == "nvidia/nemotron-3-super-120b-a12b"
|
||||
|
||||
def test_legacy_colon_format_with_tag_converts_first_colon_only(self):
|
||||
"""vendor:model:free (no slash) → vendor/model:free — first colon becomes slash."""
|
||||
result = _run_switch("nvidia:nemotron-3-super-120b-a12b:free")
|
||||
assert result == "nvidia/nemotron-3-super-120b-a12b:free"
|
||||
|
||||
def test_bare_model_name_unaffected(self):
|
||||
"""Bare model names without colons or slashes should work normally."""
|
||||
result = _run_switch("claude-sonnet-4.6")
|
||||
assert result == "anthropic/claude-sonnet-4.6"
|
||||
|
||||
def test_already_correct_slug_no_tag(self):
|
||||
"""Standard vendor/model slugs without tags pass through unchanged."""
|
||||
result = _run_switch("anthropic/claude-sonnet-4.6")
|
||||
assert result == "anthropic/claude-sonnet-4.6"
|
||||
@@ -143,6 +143,82 @@ def test_resolve_runtime_provider_codex(monkeypatch):
|
||||
assert resolved["requested_provider"] == "openai-codex"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_qwen_oauth(monkeypatch):
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "qwen-oauth")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"resolve_qwen_runtime_credentials",
|
||||
lambda: {
|
||||
"provider": "qwen-oauth",
|
||||
"base_url": "https://portal.qwen.ai/v1",
|
||||
"api_key": "qwen-token",
|
||||
"source": "qwen-cli",
|
||||
"expires_at_ms": 1775640710946,
|
||||
},
|
||||
)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="qwen-oauth")
|
||||
|
||||
assert resolved["provider"] == "qwen-oauth"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["base_url"] == "https://portal.qwen.ai/v1"
|
||||
assert resolved["api_key"] == "qwen-token"
|
||||
assert resolved["requested_provider"] == "qwen-oauth"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_uses_qwen_pool_entry(monkeypatch):
|
||||
class _Entry:
|
||||
access_token = "pool-qwen-token"
|
||||
source = "manual:qwen_cli"
|
||||
base_url = "https://portal.qwen.ai/v1"
|
||||
|
||||
class _Pool:
|
||||
def has_credentials(self):
|
||||
return True
|
||||
|
||||
def select(self):
|
||||
return _Entry()
|
||||
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "qwen-oauth")
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "qwen-oauth", "default": "coder-model"})
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="qwen-oauth")
|
||||
|
||||
assert resolved["provider"] == "qwen-oauth"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["base_url"] == "https://portal.qwen.ai/v1"
|
||||
assert resolved["api_key"] == "pool-qwen-token"
|
||||
assert resolved["source"] == "manual:qwen_cli"
|
||||
|
||||
|
||||
def test_resolve_provider_alias_qwen(monkeypatch):
|
||||
monkeypatch.setattr(rp.auth_mod, "_load_auth_store", lambda: {})
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
assert rp.resolve_provider("qwen-portal") == "qwen-oauth"
|
||||
assert rp.resolve_provider("qwen-cli") == "qwen-oauth"
|
||||
|
||||
|
||||
def test_qwen_oauth_auto_fallthrough_on_auth_failure(monkeypatch):
|
||||
"""When requested_provider is 'auto' and Qwen creds fail, fall through."""
|
||||
from hermes_cli.auth import AuthError
|
||||
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "qwen-oauth")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"resolve_qwen_runtime_credentials",
|
||||
lambda **kw: (_ for _ in ()).throw(AuthError("stale", provider="qwen-oauth", code="qwen_auth_missing")),
|
||||
)
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
|
||||
|
||||
# Should NOT raise — falls through to OpenRouter
|
||||
resolved = rp.resolve_runtime_provider(requested="auto")
|
||||
# The fallthrough means it won't be qwen-oauth
|
||||
assert resolved["provider"] != "qwen-oauth"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_ai_gateway(monkeypatch):
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "ai-gateway")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
|
||||
@@ -44,7 +44,7 @@ class TestOfferOpenclawMigration:
|
||||
assert setup_mod._offer_openclaw_migration(tmp_path / ".hermes") is False
|
||||
|
||||
def test_runs_migration_when_user_accepts(self, tmp_path):
|
||||
"""Should dynamically load the script and run the Migrator."""
|
||||
"""Should run dry-run preview first, then execute after confirmation."""
|
||||
openclaw_dir = tmp_path / ".openclaw"
|
||||
openclaw_dir.mkdir()
|
||||
|
||||
@@ -60,6 +60,7 @@ class TestOfferOpenclawMigration:
|
||||
fake_migrator = MagicMock()
|
||||
fake_migrator.migrate.return_value = {
|
||||
"summary": {"migrated": 3, "skipped": 1, "conflict": 0, "error": 0},
|
||||
"items": [{"kind": "config", "status": "migrated", "destination": "/tmp/x"}],
|
||||
"output_dir": str(hermes_home / "migration"),
|
||||
}
|
||||
fake_mod.Migrator = MagicMock(return_value=fake_migrator)
|
||||
@@ -70,6 +71,7 @@ class TestOfferOpenclawMigration:
|
||||
with (
|
||||
patch("hermes_cli.setup.Path.home", return_value=tmp_path),
|
||||
patch.object(setup_mod, "_OPENCLAW_SCRIPT", script),
|
||||
# Both prompts answered Yes: preview offer + proceed confirmation
|
||||
patch.object(setup_mod, "prompt_yes_no", return_value=True),
|
||||
patch.object(setup_mod, "get_config_path", return_value=config_path),
|
||||
patch("importlib.util.spec_from_file_location") as mock_spec_fn,
|
||||
@@ -91,13 +93,75 @@ class TestOfferOpenclawMigration:
|
||||
fake_mod.resolve_selected_options.assert_called_once_with(
|
||||
None, None, preset="full"
|
||||
)
|
||||
fake_mod.Migrator.assert_called_once()
|
||||
call_kwargs = fake_mod.Migrator.call_args[1]
|
||||
assert call_kwargs["execute"] is True
|
||||
assert call_kwargs["overwrite"] is True
|
||||
assert call_kwargs["migrate_secrets"] is True
|
||||
assert call_kwargs["preset_name"] == "full"
|
||||
fake_migrator.migrate.assert_called_once()
|
||||
# Migrator called twice: once for dry-run preview, once for execution
|
||||
assert fake_mod.Migrator.call_count == 2
|
||||
|
||||
# First call: dry-run preview (execute=False, overwrite=True to show all)
|
||||
preview_kwargs = fake_mod.Migrator.call_args_list[0][1]
|
||||
assert preview_kwargs["execute"] is False
|
||||
assert preview_kwargs["overwrite"] is True
|
||||
assert preview_kwargs["migrate_secrets"] is True
|
||||
assert preview_kwargs["preset_name"] == "full"
|
||||
|
||||
# Second call: actual execution (execute=True, overwrite=False to preserve)
|
||||
exec_kwargs = fake_mod.Migrator.call_args_list[1][1]
|
||||
assert exec_kwargs["execute"] is True
|
||||
assert exec_kwargs["overwrite"] is False
|
||||
assert exec_kwargs["migrate_secrets"] is True
|
||||
assert exec_kwargs["preset_name"] == "full"
|
||||
|
||||
# migrate() called twice (once per Migrator instance)
|
||||
assert fake_migrator.migrate.call_count == 2
|
||||
|
||||
def test_user_declines_after_preview(self, tmp_path):
|
||||
"""Should return False when user sees preview but declines to proceed."""
|
||||
openclaw_dir = tmp_path / ".openclaw"
|
||||
openclaw_dir.mkdir()
|
||||
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n max_turns: 90\n")
|
||||
|
||||
fake_mod = ModuleType("openclaw_to_hermes")
|
||||
fake_mod.resolve_selected_options = MagicMock(return_value={"soul", "memory"})
|
||||
fake_migrator = MagicMock()
|
||||
fake_migrator.migrate.return_value = {
|
||||
"summary": {"migrated": 3, "skipped": 0, "conflict": 0, "error": 0},
|
||||
"items": [{"kind": "config", "status": "migrated", "destination": "/tmp/x"}],
|
||||
}
|
||||
fake_mod.Migrator = MagicMock(return_value=fake_migrator)
|
||||
|
||||
script = tmp_path / "openclaw_to_hermes.py"
|
||||
script.write_text("# placeholder")
|
||||
|
||||
# First prompt (preview): Yes, Second prompt (proceed): No
|
||||
prompt_responses = iter([True, False])
|
||||
|
||||
with (
|
||||
patch("hermes_cli.setup.Path.home", return_value=tmp_path),
|
||||
patch.object(setup_mod, "_OPENCLAW_SCRIPT", script),
|
||||
patch.object(setup_mod, "prompt_yes_no", side_effect=prompt_responses),
|
||||
patch.object(setup_mod, "get_config_path", return_value=config_path),
|
||||
patch("importlib.util.spec_from_file_location") as mock_spec_fn,
|
||||
):
|
||||
mock_spec = MagicMock()
|
||||
mock_spec.loader = MagicMock()
|
||||
mock_spec_fn.return_value = mock_spec
|
||||
|
||||
def exec_module(mod):
|
||||
mod.resolve_selected_options = fake_mod.resolve_selected_options
|
||||
mod.Migrator = fake_mod.Migrator
|
||||
|
||||
mock_spec.loader.exec_module = exec_module
|
||||
|
||||
result = setup_mod._offer_openclaw_migration(hermes_home)
|
||||
|
||||
assert result is False
|
||||
# Only dry-run Migrator was created, not the execute one
|
||||
assert fake_mod.Migrator.call_count == 1
|
||||
preview_kwargs = fake_mod.Migrator.call_args[1]
|
||||
assert preview_kwargs["execute"] is False
|
||||
|
||||
def test_handles_migration_error_gracefully(self, tmp_path):
|
||||
"""Should catch exceptions and return False."""
|
||||
|
||||
@@ -354,6 +354,14 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
|
||||
lambda *args, **kwargs: {"web", "image_gen", "tts", "browser"},
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
|
||||
# Prevent leaked platform tokens (e.g. DISCORD_BOT_TOKEN from gateway.run
|
||||
# import) from adding extra platforms. The loop in tools_command runs
|
||||
# apply_nous_managed_defaults per platform; a second iteration sees values
|
||||
# set by the first as "explicit" and skips them.
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.tools_config._get_enabled_platforms",
|
||||
lambda: ["cli"],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.nous_subscription.get_nous_auth_status",
|
||||
lambda: {"logged_in": True},
|
||||
|
||||
@@ -368,6 +368,9 @@ class TestCmdUpdateLaunchdRestart:
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "is_macos", lambda: False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "is_linux", lambda: True,
|
||||
)
|
||||
|
||||
mock_run.side_effect = _make_run_side_effect(
|
||||
commit_count="3",
|
||||
|
||||
@@ -0,0 +1,598 @@
|
||||
"""Tests for the Hindsight memory provider plugin.
|
||||
|
||||
Tests cover config loading, tool handlers (tags, max_tokens, types),
|
||||
prefetch (auto_recall, preamble, query truncation), sync_turn (auto_retain,
|
||||
turn counting, tags), and schema completeness.
|
||||
"""
|
||||
|
||||
import json
|
||||
import threading
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from plugins.memory.hindsight import (
|
||||
HindsightMemoryProvider,
|
||||
RECALL_SCHEMA,
|
||||
REFLECT_SCHEMA,
|
||||
RETAIN_SCHEMA,
|
||||
_load_config,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_env(monkeypatch):
|
||||
"""Ensure no stale env vars leak between tests."""
|
||||
for key in (
|
||||
"HINDSIGHT_API_KEY", "HINDSIGHT_API_URL", "HINDSIGHT_BANK_ID",
|
||||
"HINDSIGHT_BUDGET", "HINDSIGHT_MODE", "HINDSIGHT_LLM_API_KEY",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
def _make_mock_client():
|
||||
"""Create a mock Hindsight client with async methods."""
|
||||
client = MagicMock()
|
||||
client.aretain = AsyncMock()
|
||||
client.arecall = AsyncMock(
|
||||
return_value=SimpleNamespace(
|
||||
results=[
|
||||
SimpleNamespace(text="Memory 1"),
|
||||
SimpleNamespace(text="Memory 2"),
|
||||
]
|
||||
)
|
||||
)
|
||||
client.areflect = AsyncMock(
|
||||
return_value=SimpleNamespace(text="Synthesized answer")
|
||||
)
|
||||
client.aretain_batch = AsyncMock()
|
||||
client.aclose = AsyncMock()
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def provider(tmp_path, monkeypatch):
|
||||
"""Create an initialized HindsightMemoryProvider with a mock client."""
|
||||
config = {
|
||||
"mode": "cloud",
|
||||
"apiKey": "test-key",
|
||||
"api_url": "http://localhost:9999",
|
||||
"bank_id": "test-bank",
|
||||
"budget": "mid",
|
||||
"memory_mode": "hybrid",
|
||||
}
|
||||
config_path = tmp_path / "hindsight" / "config.json"
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(json.dumps(config))
|
||||
|
||||
monkeypatch.setattr(
|
||||
"plugins.memory.hindsight.get_hermes_home", lambda: tmp_path
|
||||
)
|
||||
|
||||
p = HindsightMemoryProvider()
|
||||
p.initialize(session_id="test-session", hermes_home=str(tmp_path), platform="cli")
|
||||
p._client = _make_mock_client()
|
||||
return p
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def provider_with_config(tmp_path, monkeypatch):
|
||||
"""Create a provider factory that accepts custom config overrides."""
|
||||
def _make(**overrides):
|
||||
config = {
|
||||
"mode": "cloud",
|
||||
"apiKey": "test-key",
|
||||
"api_url": "http://localhost:9999",
|
||||
"bank_id": "test-bank",
|
||||
"budget": "mid",
|
||||
"memory_mode": "hybrid",
|
||||
}
|
||||
config.update(overrides)
|
||||
config_path = tmp_path / "hindsight" / "config.json"
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(json.dumps(config))
|
||||
|
||||
monkeypatch.setattr(
|
||||
"plugins.memory.hindsight.get_hermes_home", lambda: tmp_path
|
||||
)
|
||||
|
||||
p = HindsightMemoryProvider()
|
||||
p.initialize(session_id="test-session", hermes_home=str(tmp_path), platform="cli")
|
||||
p._client = _make_mock_client()
|
||||
return p
|
||||
return _make
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSchemas:
|
||||
def test_retain_schema_has_content(self):
|
||||
assert RETAIN_SCHEMA["name"] == "hindsight_retain"
|
||||
assert "content" in RETAIN_SCHEMA["parameters"]["properties"]
|
||||
assert "content" in RETAIN_SCHEMA["parameters"]["required"]
|
||||
|
||||
def test_recall_schema_has_query(self):
|
||||
assert RECALL_SCHEMA["name"] == "hindsight_recall"
|
||||
assert "query" in RECALL_SCHEMA["parameters"]["properties"]
|
||||
assert "query" in RECALL_SCHEMA["parameters"]["required"]
|
||||
|
||||
def test_reflect_schema_has_query(self):
|
||||
assert REFLECT_SCHEMA["name"] == "hindsight_reflect"
|
||||
assert "query" in REFLECT_SCHEMA["parameters"]["properties"]
|
||||
|
||||
def test_get_tool_schemas_returns_three(self, provider):
|
||||
schemas = provider.get_tool_schemas()
|
||||
assert len(schemas) == 3
|
||||
names = {s["name"] for s in schemas}
|
||||
assert names == {"hindsight_retain", "hindsight_recall", "hindsight_reflect"}
|
||||
|
||||
def test_context_mode_returns_no_tools(self, provider_with_config):
|
||||
p = provider_with_config(memory_mode="context")
|
||||
assert p.get_tool_schemas() == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConfig:
|
||||
def test_default_values(self, provider):
|
||||
assert provider._auto_retain is True
|
||||
assert provider._auto_recall is True
|
||||
assert provider._retain_every_n_turns == 1
|
||||
assert provider._recall_max_tokens == 4096
|
||||
assert provider._recall_max_input_chars == 800
|
||||
assert provider._tags is None
|
||||
assert provider._recall_tags is None
|
||||
assert provider._bank_mission == ""
|
||||
assert provider._bank_retain_mission is None
|
||||
assert provider._retain_context == "conversation between Hermes Agent and the User"
|
||||
|
||||
def test_custom_config_values(self, provider_with_config):
|
||||
p = provider_with_config(
|
||||
tags=["tag1", "tag2"],
|
||||
recall_tags=["recall-tag"],
|
||||
recall_tags_match="all",
|
||||
auto_retain=False,
|
||||
auto_recall=False,
|
||||
retain_every_n_turns=3,
|
||||
retain_context="custom-ctx",
|
||||
bank_retain_mission="Extract key facts",
|
||||
recall_max_tokens=2048,
|
||||
recall_types=["world", "experience"],
|
||||
recall_prompt_preamble="Custom preamble:",
|
||||
recall_max_input_chars=500,
|
||||
bank_mission="Test agent mission",
|
||||
)
|
||||
assert p._tags == ["tag1", "tag2"]
|
||||
assert p._recall_tags == ["recall-tag"]
|
||||
assert p._recall_tags_match == "all"
|
||||
assert p._auto_retain is False
|
||||
assert p._auto_recall is False
|
||||
assert p._retain_every_n_turns == 3
|
||||
assert p._retain_context == "custom-ctx"
|
||||
assert p._bank_retain_mission == "Extract key facts"
|
||||
assert p._recall_max_tokens == 2048
|
||||
assert p._recall_types == ["world", "experience"]
|
||||
assert p._recall_prompt_preamble == "Custom preamble:"
|
||||
assert p._recall_max_input_chars == 500
|
||||
assert p._bank_mission == "Test agent mission"
|
||||
|
||||
def test_config_from_env_fallback(self, tmp_path, monkeypatch):
|
||||
"""When no config file exists, falls back to env vars."""
|
||||
monkeypatch.setattr(
|
||||
"plugins.memory.hindsight.get_hermes_home",
|
||||
lambda: tmp_path / "nonexistent",
|
||||
)
|
||||
monkeypatch.setenv("HINDSIGHT_MODE", "cloud")
|
||||
monkeypatch.setenv("HINDSIGHT_API_KEY", "env-key")
|
||||
monkeypatch.setenv("HINDSIGHT_BANK_ID", "env-bank")
|
||||
monkeypatch.setenv("HINDSIGHT_BUDGET", "high")
|
||||
|
||||
cfg = _load_config()
|
||||
assert cfg["apiKey"] == "env-key"
|
||||
assert cfg["banks"]["hermes"]["bankId"] == "env-bank"
|
||||
assert cfg["banks"]["hermes"]["budget"] == "high"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool handler tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestToolHandlers:
|
||||
def test_retain_success(self, provider):
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_retain", {"content": "user likes dark mode"}
|
||||
))
|
||||
assert result["result"] == "Memory stored successfully."
|
||||
provider._client.aretain.assert_called_once()
|
||||
call_kwargs = provider._client.aretain.call_args.kwargs
|
||||
assert call_kwargs["bank_id"] == "test-bank"
|
||||
assert call_kwargs["content"] == "user likes dark mode"
|
||||
|
||||
def test_retain_with_tags(self, provider_with_config):
|
||||
p = provider_with_config(tags=["pref", "ui"])
|
||||
p.handle_tool_call("hindsight_retain", {"content": "likes dark mode"})
|
||||
call_kwargs = p._client.aretain.call_args.kwargs
|
||||
assert call_kwargs["tags"] == ["pref", "ui"]
|
||||
|
||||
def test_retain_without_tags(self, provider):
|
||||
provider.handle_tool_call("hindsight_retain", {"content": "hello"})
|
||||
call_kwargs = provider._client.aretain.call_args.kwargs
|
||||
assert "tags" not in call_kwargs
|
||||
|
||||
def test_retain_missing_content(self, provider):
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_retain", {}
|
||||
))
|
||||
assert "error" in result
|
||||
|
||||
def test_recall_success(self, provider):
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_recall", {"query": "dark mode"}
|
||||
))
|
||||
assert "Memory 1" in result["result"]
|
||||
assert "Memory 2" in result["result"]
|
||||
|
||||
def test_recall_passes_max_tokens(self, provider_with_config):
|
||||
p = provider_with_config(recall_max_tokens=2048)
|
||||
p.handle_tool_call("hindsight_recall", {"query": "test"})
|
||||
call_kwargs = p._client.arecall.call_args.kwargs
|
||||
assert call_kwargs["max_tokens"] == 2048
|
||||
|
||||
def test_recall_passes_tags(self, provider_with_config):
|
||||
p = provider_with_config(recall_tags=["tag1"], recall_tags_match="all")
|
||||
p.handle_tool_call("hindsight_recall", {"query": "test"})
|
||||
call_kwargs = p._client.arecall.call_args.kwargs
|
||||
assert call_kwargs["tags"] == ["tag1"]
|
||||
assert call_kwargs["tags_match"] == "all"
|
||||
|
||||
def test_recall_passes_types(self, provider_with_config):
|
||||
p = provider_with_config(recall_types=["world", "experience"])
|
||||
p.handle_tool_call("hindsight_recall", {"query": "test"})
|
||||
call_kwargs = p._client.arecall.call_args.kwargs
|
||||
assert call_kwargs["types"] == ["world", "experience"]
|
||||
|
||||
def test_recall_no_results(self, provider):
|
||||
provider._client.arecall.return_value = SimpleNamespace(results=[])
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_recall", {"query": "test"}
|
||||
))
|
||||
assert result["result"] == "No relevant memories found."
|
||||
|
||||
def test_recall_missing_query(self, provider):
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_recall", {}
|
||||
))
|
||||
assert "error" in result
|
||||
|
||||
def test_reflect_success(self, provider):
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_reflect", {"query": "summarize"}
|
||||
))
|
||||
assert result["result"] == "Synthesized answer"
|
||||
|
||||
def test_reflect_missing_query(self, provider):
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_reflect", {}
|
||||
))
|
||||
assert "error" in result
|
||||
|
||||
def test_unknown_tool(self, provider):
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_unknown", {}
|
||||
))
|
||||
assert "error" in result
|
||||
|
||||
def test_retain_error_handling(self, provider):
|
||||
provider._client.aretain.side_effect = RuntimeError("connection failed")
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_retain", {"content": "test"}
|
||||
))
|
||||
assert "error" in result
|
||||
assert "connection failed" in result["error"]
|
||||
|
||||
def test_recall_error_handling(self, provider):
|
||||
provider._client.arecall.side_effect = RuntimeError("timeout")
|
||||
result = json.loads(provider.handle_tool_call(
|
||||
"hindsight_recall", {"query": "test"}
|
||||
))
|
||||
assert "error" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prefetch tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPrefetch:
|
||||
def test_prefetch_returns_empty_when_no_result(self, provider):
|
||||
assert provider.prefetch("test") == ""
|
||||
|
||||
def test_prefetch_default_preamble(self, provider):
|
||||
provider._prefetch_result = "- some memory"
|
||||
result = provider.prefetch("test")
|
||||
assert "Hindsight Memory" in result
|
||||
assert "- some memory" in result
|
||||
|
||||
def test_prefetch_custom_preamble(self, provider_with_config):
|
||||
p = provider_with_config(recall_prompt_preamble="Custom header:")
|
||||
p._prefetch_result = "- memory line"
|
||||
result = p.prefetch("test")
|
||||
assert result.startswith("Custom header:")
|
||||
assert "- memory line" in result
|
||||
|
||||
def test_queue_prefetch_skipped_in_tools_mode(self, provider_with_config):
|
||||
p = provider_with_config(memory_mode="tools")
|
||||
p.queue_prefetch("test")
|
||||
# Should not start a thread
|
||||
assert p._prefetch_thread is None
|
||||
|
||||
def test_queue_prefetch_skipped_when_auto_recall_off(self, provider_with_config):
|
||||
p = provider_with_config(auto_recall=False)
|
||||
p.queue_prefetch("test")
|
||||
assert p._prefetch_thread is None
|
||||
|
||||
def test_queue_prefetch_truncates_query(self, provider_with_config):
|
||||
p = provider_with_config(recall_max_input_chars=10)
|
||||
# Mock _run_sync to capture the query
|
||||
original_query = None
|
||||
|
||||
def _capture_recall(**kwargs):
|
||||
nonlocal original_query
|
||||
original_query = kwargs.get("query", "")
|
||||
return SimpleNamespace(results=[])
|
||||
|
||||
p._client.arecall = AsyncMock(side_effect=_capture_recall)
|
||||
|
||||
long_query = "a" * 100
|
||||
p.queue_prefetch(long_query)
|
||||
if p._prefetch_thread:
|
||||
p._prefetch_thread.join(timeout=5.0)
|
||||
|
||||
# The query passed to arecall should be truncated
|
||||
if original_query is not None:
|
||||
assert len(original_query) <= 10
|
||||
|
||||
def test_queue_prefetch_passes_recall_params(self, provider_with_config):
|
||||
p = provider_with_config(
|
||||
recall_tags=["t1"],
|
||||
recall_tags_match="all",
|
||||
recall_max_tokens=1024,
|
||||
recall_types=["world"],
|
||||
)
|
||||
p.queue_prefetch("test query")
|
||||
if p._prefetch_thread:
|
||||
p._prefetch_thread.join(timeout=5.0)
|
||||
|
||||
call_kwargs = p._client.arecall.call_args.kwargs
|
||||
assert call_kwargs["max_tokens"] == 1024
|
||||
assert call_kwargs["tags"] == ["t1"]
|
||||
assert call_kwargs["tags_match"] == "all"
|
||||
assert call_kwargs["types"] == ["world"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# sync_turn tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSyncTurn:
|
||||
def _get_retain_kwargs(self, provider):
|
||||
"""Helper to get the kwargs from the aretain_batch call."""
|
||||
return provider._client.aretain_batch.call_args.kwargs
|
||||
|
||||
def _get_retain_content(self, provider):
|
||||
"""Helper to get the raw content string from the first item."""
|
||||
kwargs = self._get_retain_kwargs(provider)
|
||||
return kwargs["items"][0]["content"]
|
||||
|
||||
def _get_retain_messages(self, provider):
|
||||
"""Helper to parse the first turn's messages from retained content.
|
||||
|
||||
Content is a JSON array of turns: [[msgs...], [msgs...], ...]
|
||||
For single-turn tests, returns the first turn's messages.
|
||||
"""
|
||||
content = self._get_retain_content(provider)
|
||||
turns = json.loads(content)
|
||||
return turns[0] if len(turns) == 1 else turns
|
||||
|
||||
def test_sync_turn_retains(self, provider):
|
||||
provider.sync_turn("hello", "hi there")
|
||||
if provider._sync_thread:
|
||||
provider._sync_thread.join(timeout=5.0)
|
||||
provider._client.aretain_batch.assert_called_once()
|
||||
messages = self._get_retain_messages(provider)
|
||||
assert len(messages) == 2
|
||||
assert messages[0]["role"] == "user"
|
||||
assert messages[0]["content"] == "hello"
|
||||
assert "timestamp" in messages[0]
|
||||
assert messages[1]["role"] == "assistant"
|
||||
assert messages[1]["content"] == "hi there"
|
||||
assert "timestamp" in messages[1]
|
||||
|
||||
def test_sync_turn_skipped_when_auto_retain_off(self, provider_with_config):
|
||||
p = provider_with_config(auto_retain=False)
|
||||
p.sync_turn("hello", "hi")
|
||||
assert p._sync_thread is None
|
||||
p._client.aretain_batch.assert_not_called()
|
||||
|
||||
def test_sync_turn_with_tags(self, provider_with_config):
|
||||
p = provider_with_config(tags=["conv", "session1"])
|
||||
p.sync_turn("hello", "hi")
|
||||
if p._sync_thread:
|
||||
p._sync_thread.join(timeout=5.0)
|
||||
item = p._client.aretain_batch.call_args.kwargs["items"][0]
|
||||
assert item["tags"] == ["conv", "session1"]
|
||||
|
||||
def test_sync_turn_uses_aretain_batch(self, provider):
|
||||
"""sync_turn should use aretain_batch with retain_async."""
|
||||
provider.sync_turn("hello", "hi")
|
||||
if provider._sync_thread:
|
||||
provider._sync_thread.join(timeout=5.0)
|
||||
provider._client.aretain_batch.assert_called_once()
|
||||
call_kwargs = provider._client.aretain_batch.call_args.kwargs
|
||||
assert call_kwargs["document_id"] == "test-session"
|
||||
assert call_kwargs["retain_async"] is True
|
||||
assert len(call_kwargs["items"]) == 1
|
||||
assert call_kwargs["items"][0]["context"] == "conversation between Hermes Agent and the User"
|
||||
|
||||
def test_sync_turn_custom_context(self, provider_with_config):
|
||||
p = provider_with_config(retain_context="my-agent")
|
||||
p.sync_turn("hello", "hi")
|
||||
if p._sync_thread:
|
||||
p._sync_thread.join(timeout=5.0)
|
||||
item = p._client.aretain_batch.call_args.kwargs["items"][0]
|
||||
assert item["context"] == "my-agent"
|
||||
|
||||
def test_sync_turn_every_n_turns(self, provider_with_config):
|
||||
"""With retain_every_n_turns=3, only retains on every 3rd turn."""
|
||||
p = provider_with_config(retain_every_n_turns=3)
|
||||
|
||||
p.sync_turn("turn1-user", "turn1-asst")
|
||||
assert p._sync_thread is None # not retained yet
|
||||
|
||||
p.sync_turn("turn2-user", "turn2-asst")
|
||||
assert p._sync_thread is None # not retained yet
|
||||
|
||||
p.sync_turn("turn3-user", "turn3-asst")
|
||||
assert p._sync_thread is not None # retained!
|
||||
p._sync_thread.join(timeout=5.0)
|
||||
|
||||
p._client.aretain_batch.assert_called_once()
|
||||
content = p._client.aretain_batch.call_args.kwargs["items"][0]["content"]
|
||||
# Should contain all 3 turns
|
||||
assert "turn1-user" in content
|
||||
assert "turn2-user" in content
|
||||
assert "turn3-user" in content
|
||||
|
||||
def test_sync_turn_accumulates_full_session(self, provider_with_config):
|
||||
"""Each retain sends the ENTIRE session, not just the latest batch."""
|
||||
p = provider_with_config(retain_every_n_turns=2)
|
||||
|
||||
p.sync_turn("turn1-user", "turn1-asst")
|
||||
p.sync_turn("turn2-user", "turn2-asst")
|
||||
if p._sync_thread:
|
||||
p._sync_thread.join(timeout=5.0)
|
||||
|
||||
p._client.aretain_batch.reset_mock()
|
||||
|
||||
p.sync_turn("turn3-user", "turn3-asst")
|
||||
p.sync_turn("turn4-user", "turn4-asst")
|
||||
if p._sync_thread:
|
||||
p._sync_thread.join(timeout=5.0)
|
||||
|
||||
content = p._client.aretain_batch.call_args.kwargs["items"][0]["content"]
|
||||
# Should contain ALL turns from the session
|
||||
assert "turn1-user" in content
|
||||
assert "turn2-user" in content
|
||||
assert "turn3-user" in content
|
||||
assert "turn4-user" in content
|
||||
|
||||
def test_sync_turn_passes_document_id(self, provider):
|
||||
"""sync_turn should pass session_id as document_id for dedup."""
|
||||
provider.sync_turn("hello", "hi")
|
||||
if provider._sync_thread:
|
||||
provider._sync_thread.join(timeout=5.0)
|
||||
call_kwargs = provider._client.aretain_batch.call_args.kwargs
|
||||
assert call_kwargs["document_id"] == "test-session"
|
||||
|
||||
def test_sync_turn_error_does_not_raise(self, provider):
|
||||
"""Errors in sync_turn should be swallowed (non-blocking)."""
|
||||
provider._client.aretain_batch.side_effect = RuntimeError("network error")
|
||||
provider.sync_turn("hello", "hi")
|
||||
if provider._sync_thread:
|
||||
provider._sync_thread.join(timeout=5.0)
|
||||
# Should not raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# System prompt tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSystemPrompt:
|
||||
def test_hybrid_mode_prompt(self, provider):
|
||||
block = provider.system_prompt_block()
|
||||
assert "Hindsight Memory" in block
|
||||
assert "hindsight_recall" in block
|
||||
assert "automatically injected" in block
|
||||
|
||||
def test_context_mode_prompt(self, provider_with_config):
|
||||
p = provider_with_config(memory_mode="context")
|
||||
block = p.system_prompt_block()
|
||||
assert "context mode" in block
|
||||
assert "hindsight_recall" not in block
|
||||
|
||||
def test_tools_mode_prompt(self, provider_with_config):
|
||||
p = provider_with_config(memory_mode="tools")
|
||||
block = p.system_prompt_block()
|
||||
assert "tools mode" in block
|
||||
assert "hindsight_recall" in block
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config schema tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConfigSchema:
|
||||
def test_schema_has_all_new_fields(self, provider):
|
||||
schema = provider.get_config_schema()
|
||||
keys = {f["key"] for f in schema}
|
||||
expected_keys = {
|
||||
"mode", "api_url", "api_key", "llm_provider", "llm_api_key",
|
||||
"llm_model", "bank_id", "bank_mission", "bank_retain_mission",
|
||||
"recall_budget", "memory_mode", "recall_prefetch_method",
|
||||
"tags", "recall_tags", "recall_tags_match",
|
||||
"auto_recall", "auto_retain",
|
||||
"retain_every_n_turns", "retain_async",
|
||||
"retain_context",
|
||||
"recall_max_tokens", "recall_max_input_chars",
|
||||
"recall_prompt_preamble",
|
||||
}
|
||||
assert expected_keys.issubset(keys), f"Missing: {expected_keys - keys}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Availability tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAvailability:
|
||||
def test_available_with_api_key(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"plugins.memory.hindsight.get_hermes_home",
|
||||
lambda: tmp_path / "nonexistent",
|
||||
)
|
||||
monkeypatch.setenv("HINDSIGHT_API_KEY", "test-key")
|
||||
p = HindsightMemoryProvider()
|
||||
assert p.is_available()
|
||||
|
||||
def test_not_available_without_config(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"plugins.memory.hindsight.get_hermes_home",
|
||||
lambda: tmp_path / "nonexistent",
|
||||
)
|
||||
p = HindsightMemoryProvider()
|
||||
assert not p.is_available()
|
||||
|
||||
def test_available_in_local_mode(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"plugins.memory.hindsight.get_hermes_home",
|
||||
lambda: tmp_path / "nonexistent",
|
||||
)
|
||||
monkeypatch.setenv("HINDSIGHT_MODE", "local")
|
||||
p = HindsightMemoryProvider()
|
||||
assert p.is_available()
|
||||
@@ -150,8 +150,8 @@ def agent():
|
||||
class TestContextPressureFlags:
|
||||
"""Context pressure warning flag tracking on AIAgent."""
|
||||
|
||||
def test_flag_initialized_false(self, agent):
|
||||
assert agent._context_pressure_warned is False
|
||||
def test_flag_initialized_zero(self, agent):
|
||||
assert agent._context_pressure_warned_at == 0.0
|
||||
|
||||
def test_emit_calls_status_callback(self, agent):
|
||||
"""status_callback should be invoked with event type and message."""
|
||||
@@ -210,7 +210,7 @@ class TestContextPressureFlags:
|
||||
|
||||
def test_flag_reset_on_compression(self, agent):
|
||||
"""After _compress_context, context pressure flag should reset."""
|
||||
agent._context_pressure_warned = True
|
||||
agent._context_pressure_warned_at = 0.85
|
||||
agent.compression_enabled = True
|
||||
|
||||
agent.context_compressor = MagicMock()
|
||||
@@ -219,6 +219,7 @@ class TestContextPressureFlags:
|
||||
]
|
||||
agent.context_compressor.context_length = 200_000
|
||||
agent.context_compressor.threshold_tokens = 100_000
|
||||
agent.context_compressor.compression_count = 1
|
||||
|
||||
agent._todo_store = MagicMock()
|
||||
agent._todo_store.format_for_injection.return_value = None
|
||||
@@ -233,7 +234,7 @@ class TestContextPressureFlags:
|
||||
]
|
||||
agent._compress_context(messages, "system prompt")
|
||||
|
||||
assert agent._context_pressure_warned is False
|
||||
assert agent._context_pressure_warned_at == 0.0
|
||||
|
||||
def test_emit_callback_error_handled(self, agent):
|
||||
"""If status_callback raises, it should be caught gracefully."""
|
||||
@@ -246,3 +247,115 @@ class TestContextPressureFlags:
|
||||
|
||||
# Should not raise
|
||||
agent._emit_context_pressure(0.85, compressor)
|
||||
|
||||
def test_tiered_reemits_at_95(self, agent):
|
||||
"""Warning fires at 85%, then fires again when crossing 95%."""
|
||||
agent._context_pressure_warned_at = 0.85
|
||||
# Simulate crossing 95%: the tier (0.95) > warned_at (0.85)
|
||||
assert 0.95 > agent._context_pressure_warned_at
|
||||
# After emission at 95%, the tier should update
|
||||
agent._context_pressure_warned_at = 0.95
|
||||
assert agent._context_pressure_warned_at == 0.95
|
||||
|
||||
def test_tiered_no_double_emit_at_same_level(self, agent):
|
||||
"""Once warned at 85%, further 85%+ readings don't re-warn."""
|
||||
agent._context_pressure_warned_at = 0.85
|
||||
# At 88%, tier is 0.85, which is NOT > warned_at (0.85)
|
||||
_warn_tier = 0.85 if 0.88 >= 0.85 else 0.0
|
||||
assert not (_warn_tier > agent._context_pressure_warned_at)
|
||||
|
||||
def test_flag_not_reset_when_compression_insufficient(self, agent):
|
||||
"""When compression can't drop below 85%, keep the flag set."""
|
||||
agent._context_pressure_warned_at = 0.85
|
||||
agent.compression_enabled = True
|
||||
|
||||
agent.context_compressor = MagicMock()
|
||||
agent.context_compressor.compress.return_value = [
|
||||
{"role": "user", "content": "Summary of conversation so far."}
|
||||
]
|
||||
agent.context_compressor.context_length = 200
|
||||
# Use a small threshold so the tiny compressed output still
|
||||
# represents >= 85% of it (prevents flag reset).
|
||||
agent.context_compressor.threshold_tokens = 10
|
||||
agent.context_compressor.compression_count = 1
|
||||
agent.context_compressor.last_prompt_tokens = 0
|
||||
|
||||
agent._todo_store = MagicMock()
|
||||
agent._todo_store.format_for_injection.return_value = None
|
||||
agent._build_system_prompt = MagicMock(return_value="system prompt")
|
||||
agent._cached_system_prompt = "old system prompt"
|
||||
agent._session_db = None
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi there"},
|
||||
]
|
||||
agent._compress_context(messages, "system prompt")
|
||||
|
||||
# Post-compression is ~90% of threshold — flag should NOT reset
|
||||
assert agent._context_pressure_warned_at == 0.85
|
||||
|
||||
|
||||
class TestContextPressureGatewayDedup:
|
||||
"""Class-level dedup prevents warning spam across AIAgent instances."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Clear class-level dedup state between tests."""
|
||||
AIAgent._context_pressure_last_warned.clear()
|
||||
|
||||
def test_second_instance_within_cooldown_suppressed(self):
|
||||
"""Same session, same tier, within cooldown — should be suppressed."""
|
||||
import time
|
||||
sid = "test_session_dedup"
|
||||
# Simulate first warning
|
||||
AIAgent._context_pressure_last_warned[sid] = (0.85, time.time())
|
||||
# Second instance checking same tier within cooldown
|
||||
_last = AIAgent._context_pressure_last_warned.get(sid)
|
||||
_should_warn = _last is None or _last[0] < 0.85 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN
|
||||
assert not _should_warn
|
||||
|
||||
def test_higher_tier_fires_despite_cooldown(self):
|
||||
"""Same session, higher tier — should fire even within cooldown."""
|
||||
import time
|
||||
sid = "test_session_tier"
|
||||
AIAgent._context_pressure_last_warned[sid] = (0.85, time.time())
|
||||
_last = AIAgent._context_pressure_last_warned.get(sid)
|
||||
# 0.95 > 0.85 stored tier → should warn
|
||||
_should_warn = _last is None or _last[0] < 0.95 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN
|
||||
assert _should_warn
|
||||
|
||||
def test_warning_fires_after_cooldown_expires(self):
|
||||
"""Same session, same tier, after cooldown — should fire again."""
|
||||
import time
|
||||
sid = "test_session_expired"
|
||||
# Set a timestamp far in the past
|
||||
AIAgent._context_pressure_last_warned[sid] = (0.85, time.time() - AIAgent._CONTEXT_PRESSURE_COOLDOWN - 1)
|
||||
_last = AIAgent._context_pressure_last_warned.get(sid)
|
||||
_should_warn = _last is None or _last[0] < 0.85 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN
|
||||
assert _should_warn
|
||||
|
||||
def test_compression_clears_dedup(self):
|
||||
"""After compression drops below 85%, dedup entry should be cleared."""
|
||||
import time
|
||||
sid = "test_session_clear"
|
||||
AIAgent._context_pressure_last_warned[sid] = (0.85, time.time())
|
||||
assert sid in AIAgent._context_pressure_last_warned
|
||||
# Simulate what _compress_context does on reset
|
||||
AIAgent._context_pressure_last_warned.pop(sid, None)
|
||||
assert sid not in AIAgent._context_pressure_last_warned
|
||||
|
||||
def test_eviction_removes_stale_entries(self):
|
||||
"""Stale entries older than 2x cooldown should be evicted."""
|
||||
import time
|
||||
_now = time.time()
|
||||
AIAgent._context_pressure_last_warned = {
|
||||
"fresh": (0.85, _now),
|
||||
"stale": (0.85, _now - AIAgent._CONTEXT_PRESSURE_COOLDOWN * 3),
|
||||
}
|
||||
_cutoff = _now - AIAgent._CONTEXT_PRESSURE_COOLDOWN * 2
|
||||
AIAgent._context_pressure_last_warned = {
|
||||
k: v for k, v in AIAgent._context_pressure_last_warned.items()
|
||||
if v[1] > _cutoff
|
||||
}
|
||||
assert "fresh" in AIAgent._context_pressure_last_warned
|
||||
assert "stale" not in AIAgent._context_pressure_last_warned
|
||||
|
||||
@@ -91,6 +91,61 @@ def _chat_response_with_memory_call():
|
||||
)
|
||||
|
||||
|
||||
class TestFlushMemoriesRespectsConfigTimeout:
|
||||
"""flush_memories() must NOT hardcode timeout=30.0 — it should defer
|
||||
to the config value via auxiliary.flush_memories.timeout."""
|
||||
|
||||
def test_auxiliary_path_omits_explicit_timeout(self, monkeypatch):
|
||||
"""When calling _call_llm, timeout should NOT be passed so that
|
||||
_get_task_timeout('flush_memories') reads from config."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
|
||||
mock_response = _chat_response_with_memory_call()
|
||||
|
||||
with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
{"role": "user", "content": "Note this"},
|
||||
]
|
||||
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
||||
agent.flush_memories(messages)
|
||||
|
||||
mock_call.assert_called_once()
|
||||
call_kwargs = mock_call.call_args
|
||||
# timeout must NOT be explicitly passed (so _get_task_timeout resolves it)
|
||||
assert "timeout" not in call_kwargs.kwargs, (
|
||||
"flush_memories should not pass explicit timeout to _call_llm; "
|
||||
"let _get_task_timeout('flush_memories') resolve from config"
|
||||
)
|
||||
|
||||
def test_fallback_path_uses_config_timeout(self, monkeypatch):
|
||||
"""When auxiliary client is unavailable and we fall back to direct
|
||||
OpenAI client, timeout should come from _get_task_timeout, not hardcoded."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
agent.client = MagicMock()
|
||||
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||
|
||||
custom_timeout = 180.0
|
||||
|
||||
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
|
||||
patch("agent.auxiliary_client._get_task_timeout", return_value=custom_timeout) as mock_gtt, \
|
||||
patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
{"role": "user", "content": "Save this"},
|
||||
]
|
||||
agent.flush_memories(messages)
|
||||
|
||||
mock_gtt.assert_called_once_with("flush_memories")
|
||||
agent.client.chat.completions.create.assert_called_once()
|
||||
call_kwargs = agent.client.chat.completions.create.call_args
|
||||
assert call_kwargs.kwargs.get("timeout") == custom_timeout, (
|
||||
f"Expected timeout={custom_timeout} from config, got {call_kwargs.kwargs.get('timeout')}"
|
||||
)
|
||||
|
||||
|
||||
class TestFlushMemoriesUsesAuxiliaryClient:
|
||||
"""When an auxiliary client is available, flush_memories should use it
|
||||
instead of self.client -- especially critical in Codex mode."""
|
||||
|
||||
@@ -872,6 +872,52 @@ class TestBuildApiKwargs:
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["max_tokens"] == 4096
|
||||
|
||||
def test_qwen_portal_formats_messages_and_metadata(self, agent):
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.session_id = "sess-123"
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful"},
|
||||
{"role": "assistant", "content": "Got it"},
|
||||
{"role": "user", "content": "hi"},
|
||||
]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["metadata"]["sessionId"] == "sess-123"
|
||||
assert kwargs["extra_body"]["vl_high_resolution_images"] is True
|
||||
assert isinstance(kwargs["messages"][0]["content"], list)
|
||||
assert kwargs["messages"][0]["content"][0]["cache_control"] == {"type": "ephemeral"}
|
||||
assert kwargs["messages"][2]["content"][0]["text"] == "hi"
|
||||
|
||||
def test_qwen_portal_normalizes_bare_string_content_parts(self, agent):
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
messages = [
|
||||
{"role": "system", "content": [{"type": "text", "text": "system"}]},
|
||||
{"role": "user", "content": ["hello", {"type": "text", "text": "world"}]},
|
||||
]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
user_content = kwargs["messages"][1]["content"]
|
||||
assert user_content[0] == {"type": "text", "text": "hello"}
|
||||
assert user_content[1] == {"type": "text", "text": "world"}
|
||||
|
||||
def test_qwen_portal_no_system_message(self, agent):
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
# Should not crash even without a system message
|
||||
assert kwargs["messages"][0]["content"][0]["text"] == "hi"
|
||||
assert "cache_control" not in kwargs["messages"][0]["content"][0]
|
||||
|
||||
def test_qwen_portal_omits_max_tokens(self, agent):
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.max_tokens = 4096
|
||||
messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "max_tokens" not in kwargs
|
||||
assert "max_completion_tokens" not in kwargs
|
||||
|
||||
|
||||
class TestBuildAssistantMessage:
|
||||
def test_basic_message(self, agent):
|
||||
@@ -1622,12 +1668,15 @@ class TestRunConversation:
|
||||
if roles[i] == "assistant" and roles[i + 1] == "assistant":
|
||||
raise AssertionError("Consecutive assistant messages found in history")
|
||||
|
||||
def test_truly_empty_response_accepted_without_retry(self, agent):
|
||||
"""Truly empty response (no content, no reasoning) should still complete with (empty)."""
|
||||
def test_truly_empty_response_retries_3_times_then_empty(self, agent):
|
||||
"""Truly empty response (no content, no reasoning) retries 3 times then falls through to (empty)."""
|
||||
self._setup_agent(agent)
|
||||
agent.base_url = "http://127.0.0.1:1234/v1"
|
||||
empty_resp = _mock_response(content=None, finish_reason="stop")
|
||||
agent.client.chat.completions.create.side_effect = [empty_resp]
|
||||
# 4 responses: 1 original + 3 nudge retries, all empty
|
||||
agent.client.chat.completions.create.side_effect = [
|
||||
empty_resp, empty_resp, empty_resp, empty_resp,
|
||||
]
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
@@ -1636,7 +1685,28 @@ class TestRunConversation:
|
||||
result = agent.run_conversation("answer me")
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
assert result["api_calls"] == 1 # no retries
|
||||
assert result["api_calls"] == 4 # 1 original + 3 retries
|
||||
|
||||
def test_truly_empty_response_succeeds_on_nudge(self, agent):
|
||||
"""Model produces content after being nudged for empty response."""
|
||||
self._setup_agent(agent)
|
||||
agent.base_url = "http://127.0.0.1:1234/v1"
|
||||
empty_resp = _mock_response(content=None, finish_reason="stop")
|
||||
content_resp = _mock_response(
|
||||
content="Here is the actual answer.",
|
||||
finish_reason="stop",
|
||||
)
|
||||
# 1 empty response, then model produces content on nudge
|
||||
agent.client.chat.completions.create.side_effect = [empty_resp, content_resp]
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("answer me")
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Here is the actual answer."
|
||||
assert result["api_calls"] == 2 # 1 original + 1 nudge retry
|
||||
|
||||
def test_nous_401_refreshes_after_remint_and_retries(self, agent):
|
||||
self._setup_agent(agent)
|
||||
|
||||
@@ -658,6 +658,47 @@ def test_workspace_agents_records_skip_when_missing(tmp_path: Path):
|
||||
assert wa_items[0]["status"] == "skipped"
|
||||
|
||||
|
||||
def test_cron_store_is_archived_without_config_cron_section(tmp_path: Path):
|
||||
"""Bug fix: archive cron store even when openclaw.json has no top-level cron config."""
|
||||
mod = load_module()
|
||||
source = tmp_path / ".openclaw"
|
||||
target = tmp_path / ".hermes"
|
||||
output_dir = target / "migration-report"
|
||||
source.mkdir()
|
||||
target.mkdir()
|
||||
|
||||
(source / "openclaw.json").write_text(json.dumps({"channels": {}}), encoding="utf-8")
|
||||
(source / "cron").mkdir(parents=True)
|
||||
(source / "cron" / "jobs.json").write_text(
|
||||
json.dumps({"version": 1, "jobs": [{"id": "job-1", "name": "demo"}]}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
migrator = mod.Migrator(
|
||||
source_root=source,
|
||||
target_root=target,
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=False,
|
||||
migrate_secrets=False,
|
||||
output_dir=output_dir,
|
||||
selected_options={"cron-jobs"},
|
||||
)
|
||||
report = migrator.migrate()
|
||||
|
||||
cron_items = [item for item in report["items"] if item["kind"] == "cron-jobs"]
|
||||
archived_store = next(
|
||||
(item for item in cron_items if item["destination"] and item["destination"].endswith("archive/cron-store")),
|
||||
None,
|
||||
)
|
||||
assert archived_store is not None
|
||||
assert Path(archived_store["destination"]).joinpath("jobs.json").exists()
|
||||
|
||||
notes_text = (output_dir / "MIGRATION_NOTES.md").read_text(encoding="utf-8")
|
||||
assert "Run `hermes cron` to recreate scheduled tasks" in notes_text
|
||||
assert "archive/cron-config.json" not in notes_text
|
||||
|
||||
|
||||
def test_skill_installs_cleanly_under_skills_guard():
|
||||
skills_guard = load_skills_guard()
|
||||
result = skills_guard.scan_skill(
|
||||
|
||||
@@ -0,0 +1,319 @@
|
||||
"""Tests for the context-halving bugfix.
|
||||
|
||||
Background
|
||||
----------
|
||||
When the API returns "max_tokens too large given prompt" (input is fine,
|
||||
but input_tokens + requested max_tokens > context_window), the old code
|
||||
incorrectly halved context_length via get_next_probe_tier().
|
||||
|
||||
The fix introduces:
|
||||
* parse_available_output_tokens_from_error() — detects this specific
|
||||
error class and returns the available output token budget.
|
||||
* _ephemeral_max_output_tokens on AIAgent — a one-shot override that
|
||||
caps the output for one retry without touching context_length.
|
||||
|
||||
Naming note
|
||||
-----------
|
||||
max_tokens = OUTPUT token cap (a single response).
|
||||
context_length = TOTAL context window (input + output combined).
|
||||
These are different and the old code conflated them; the fix keeps them
|
||||
separate.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch, PropertyMock
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parse_available_output_tokens_from_error — unit tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestParseAvailableOutputTokens:
|
||||
"""Pure-function tests; no I/O required."""
|
||||
|
||||
def _parse(self, msg):
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
return parse_available_output_tokens_from_error(msg)
|
||||
|
||||
# ── Should detect and extract ────────────────────────────────────────
|
||||
|
||||
def test_anthropic_canonical_format(self):
|
||||
"""Canonical Anthropic error: max_tokens: X > context_window: Y - input_tokens: Z = available_tokens: W"""
|
||||
msg = (
|
||||
"max_tokens: 32768 > context_window: 200000 "
|
||||
"- input_tokens: 190000 = available_tokens: 10000"
|
||||
)
|
||||
assert self._parse(msg) == 10000
|
||||
|
||||
def test_anthropic_format_large_numbers(self):
|
||||
msg = (
|
||||
"max_tokens: 128000 > context_window: 200000 "
|
||||
"- input_tokens: 180000 = available_tokens: 20000"
|
||||
)
|
||||
assert self._parse(msg) == 20000
|
||||
|
||||
def test_available_tokens_variant_spacing(self):
|
||||
"""Handles extra spaces around the colon."""
|
||||
msg = "max_tokens: 32768 > 200000 available_tokens : 5000"
|
||||
assert self._parse(msg) == 5000
|
||||
|
||||
def test_available_tokens_natural_language(self):
|
||||
"""'available tokens: N' wording (no underscore)."""
|
||||
msg = "max_tokens must be at most 10000 given your prompt (available tokens: 10000)"
|
||||
assert self._parse(msg) == 10000
|
||||
|
||||
def test_single_token_available(self):
|
||||
"""Edge case: only 1 token left."""
|
||||
msg = "max_tokens: 9999 > context_window: 10000 - input_tokens: 9999 = available_tokens: 1"
|
||||
assert self._parse(msg) == 1
|
||||
|
||||
# ── Should NOT detect (returns None) ─────────────────────────────────
|
||||
|
||||
def test_prompt_too_long_is_not_output_cap_error(self):
|
||||
"""'prompt is too long' errors must NOT be caught — they need context halving."""
|
||||
msg = "prompt is too long: 205000 tokens > 200000 maximum"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
def test_generic_context_window_exceeded(self):
|
||||
"""Generic context window errors without available_tokens should not match."""
|
||||
msg = "context window exceeded: maximum is 32768 tokens"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
def test_context_length_exceeded(self):
|
||||
msg = "context_length_exceeded: prompt has 131073 tokens, limit is 131072"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
def test_no_max_tokens_keyword(self):
|
||||
"""Error not related to max_tokens at all."""
|
||||
msg = "invalid_api_key: the API key is invalid"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
def test_empty_string(self):
|
||||
assert self._parse("") is None
|
||||
|
||||
def test_rate_limit_error(self):
|
||||
msg = "rate_limit_error: too many requests per minute"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_anthropic_kwargs — output cap clamping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildAnthropicKwargsClamping:
|
||||
"""The context_length clamp only fires when output ceiling > window.
|
||||
For standard Anthropic models (output ceiling < window) it must not fire.
|
||||
"""
|
||||
|
||||
def _build(self, model, max_tokens=None, context_length=None):
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
return build_anthropic_kwargs(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
max_tokens=max_tokens,
|
||||
reasoning_config=None,
|
||||
context_length=context_length,
|
||||
)
|
||||
|
||||
def test_no_clamping_when_output_ceiling_fits_in_window(self):
|
||||
"""Opus 4.6 native output (128K) < context window (200K) — no clamping."""
|
||||
kwargs = self._build("claude-opus-4-6", context_length=200_000)
|
||||
assert kwargs["max_tokens"] == 128_000
|
||||
|
||||
def test_clamping_fires_for_tiny_custom_window(self):
|
||||
"""When context_length is 8K (local model), output cap is clamped to 7999."""
|
||||
kwargs = self._build("claude-opus-4-6", context_length=8_000)
|
||||
assert kwargs["max_tokens"] == 7_999
|
||||
|
||||
def test_explicit_max_tokens_respected_when_within_window(self):
|
||||
"""Explicit max_tokens smaller than window passes through unchanged."""
|
||||
kwargs = self._build("claude-opus-4-6", max_tokens=4096, context_length=200_000)
|
||||
assert kwargs["max_tokens"] == 4096
|
||||
|
||||
def test_explicit_max_tokens_clamped_when_exceeds_window(self):
|
||||
"""Explicit max_tokens larger than a small window is clamped."""
|
||||
kwargs = self._build("claude-opus-4-6", max_tokens=32_768, context_length=16_000)
|
||||
assert kwargs["max_tokens"] == 15_999
|
||||
|
||||
def test_no_context_length_uses_native_ceiling(self):
|
||||
"""Without context_length the native output ceiling is used directly."""
|
||||
kwargs = self._build("claude-sonnet-4-6")
|
||||
assert kwargs["max_tokens"] == 64_000
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ephemeral max_tokens mechanism — _build_api_kwargs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEphemeralMaxOutputTokens:
|
||||
"""_build_api_kwargs consumes _ephemeral_max_output_tokens exactly once
|
||||
and falls back to self.max_tokens on subsequent calls.
|
||||
"""
|
||||
|
||||
def _make_agent(self):
|
||||
"""Return a minimal AIAgent with api_mode='anthropic_messages' and
|
||||
a stubbed context_compressor, bypassing full __init__ cost."""
|
||||
from run_agent import AIAgent
|
||||
agent = object.__new__(AIAgent)
|
||||
# Minimal attributes used by _build_api_kwargs
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.model = "claude-opus-4-6"
|
||||
agent.tools = []
|
||||
agent.max_tokens = None
|
||||
agent.reasoning_config = None
|
||||
agent._is_anthropic_oauth = False
|
||||
agent._ephemeral_max_output_tokens = None
|
||||
|
||||
compressor = MagicMock()
|
||||
compressor.context_length = 200_000
|
||||
agent.context_compressor = compressor
|
||||
|
||||
# Stub out the internal message-preparation helper
|
||||
agent._prepare_anthropic_messages_for_api = MagicMock(
|
||||
return_value=[{"role": "user", "content": "hi"}]
|
||||
)
|
||||
agent._anthropic_preserve_dots = MagicMock(return_value=False)
|
||||
return agent
|
||||
|
||||
def test_ephemeral_override_is_used_on_first_call(self):
|
||||
"""When _ephemeral_max_output_tokens is set, it overrides self.max_tokens."""
|
||||
agent = self._make_agent()
|
||||
agent._ephemeral_max_output_tokens = 5_000
|
||||
|
||||
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
assert kwargs["max_tokens"] == 5_000
|
||||
|
||||
def test_ephemeral_override_is_consumed_after_one_call(self):
|
||||
"""After one call the ephemeral override is cleared to None."""
|
||||
agent = self._make_agent()
|
||||
agent._ephemeral_max_output_tokens = 5_000
|
||||
|
||||
agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
assert agent._ephemeral_max_output_tokens is None
|
||||
|
||||
def test_subsequent_call_uses_self_max_tokens(self):
|
||||
"""A second _build_api_kwargs call uses the normal max_tokens path."""
|
||||
agent = self._make_agent()
|
||||
agent._ephemeral_max_output_tokens = 5_000
|
||||
agent.max_tokens = None # will resolve to native ceiling (128K for Opus 4.6)
|
||||
|
||||
agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
# Second call — ephemeral is gone
|
||||
kwargs2 = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
assert kwargs2["max_tokens"] == 128_000 # Opus 4.6 native ceiling
|
||||
|
||||
def test_no_ephemeral_uses_self_max_tokens_directly(self):
|
||||
"""Without an ephemeral override, self.max_tokens is used normally."""
|
||||
agent = self._make_agent()
|
||||
agent.max_tokens = 8_192
|
||||
|
||||
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
assert kwargs["max_tokens"] == 8_192
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration: error handler does NOT halve context_length for output-cap errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestContextNotHalvedOnOutputCapError:
|
||||
"""When the API returns 'max_tokens too large given prompt', the handler
|
||||
must set _ephemeral_max_output_tokens and NOT modify context_length.
|
||||
"""
|
||||
|
||||
def _make_agent_with_compressor(self, context_length=200_000):
|
||||
from run_agent import AIAgent
|
||||
from agent.context_compressor import ContextCompressor
|
||||
|
||||
agent = object.__new__(AIAgent)
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.model = "claude-opus-4-6"
|
||||
agent.base_url = "https://api.anthropic.com"
|
||||
agent.tools = []
|
||||
agent.max_tokens = None
|
||||
agent.reasoning_config = None
|
||||
agent._is_anthropic_oauth = False
|
||||
agent._ephemeral_max_output_tokens = None
|
||||
agent.log_prefix = ""
|
||||
agent.quiet_mode = True
|
||||
agent.verbose_logging = False
|
||||
|
||||
compressor = MagicMock(spec=ContextCompressor)
|
||||
compressor.context_length = context_length
|
||||
compressor.threshold_percent = 0.75
|
||||
agent.context_compressor = compressor
|
||||
|
||||
agent._prepare_anthropic_messages_for_api = MagicMock(
|
||||
return_value=[{"role": "user", "content": "hi"}]
|
||||
)
|
||||
agent._anthropic_preserve_dots = MagicMock(return_value=False)
|
||||
agent._vprint = MagicMock()
|
||||
return agent
|
||||
|
||||
def test_output_cap_error_sets_ephemeral_not_context_length(self):
|
||||
"""On 'max_tokens too large' error, _ephemeral_max_output_tokens is set
|
||||
and compressor.context_length is left unchanged."""
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
from agent.model_metadata import get_next_probe_tier
|
||||
|
||||
error_msg = (
|
||||
"max_tokens: 128000 > context_window: 200000 "
|
||||
"- input_tokens: 180000 = available_tokens: 20000"
|
||||
)
|
||||
|
||||
# Simulate the handler logic from run_agent.py
|
||||
agent = self._make_agent_with_compressor(context_length=200_000)
|
||||
old_ctx = agent.context_compressor.context_length
|
||||
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
assert available_out == 20_000, "parser must detect the error"
|
||||
|
||||
# The fix: set ephemeral, skip context_length modification
|
||||
agent._ephemeral_max_output_tokens = max(1, available_out - 64)
|
||||
|
||||
# context_length must be untouched
|
||||
assert agent.context_compressor.context_length == old_ctx
|
||||
assert agent._ephemeral_max_output_tokens == 19_936
|
||||
|
||||
def test_prompt_too_long_still_triggers_probe_tier(self):
|
||||
"""Genuine prompt-too-long errors must still use get_next_probe_tier."""
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
from agent.model_metadata import get_next_probe_tier
|
||||
|
||||
error_msg = "prompt is too long: 205000 tokens > 200000 maximum"
|
||||
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
assert available_out is None, "prompt-too-long must not be caught by output-cap parser"
|
||||
|
||||
# The old halving path is still used for this class of error
|
||||
new_ctx = get_next_probe_tier(200_000)
|
||||
assert new_ctx == 128_000
|
||||
|
||||
def test_output_cap_error_safety_margin(self):
|
||||
"""The ephemeral value includes a 64-token safety margin below available_out."""
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
|
||||
error_msg = (
|
||||
"max_tokens: 32768 > context_window: 200000 "
|
||||
"- input_tokens: 190000 = available_tokens: 10000"
|
||||
)
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
safe_out = max(1, available_out - 64)
|
||||
assert safe_out == 9_936
|
||||
|
||||
def test_safety_margin_never_goes_below_one(self):
|
||||
"""When available_out is very small, safe_out must be at least 1."""
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
|
||||
error_msg = (
|
||||
"max_tokens: 10 > context_window: 200000 "
|
||||
"- input_tokens: 199990 = available_tokens: 1"
|
||||
)
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
safe_out = max(1, available_out - 64)
|
||||
assert safe_out == 1
|
||||
@@ -663,6 +663,84 @@ class TestPruneSessions:
|
||||
assert db.get_session("old_cli") is None
|
||||
assert db.get_session("old_tg") is not None
|
||||
|
||||
def test_prune_with_multilevel_chain(self, db):
|
||||
"""Pruning old sessions orphans newer children instead of crashing on FK."""
|
||||
old_ts = time.time() - 200 * 86400
|
||||
recent_ts = time.time() - 10 * 86400
|
||||
|
||||
# Chain: A (old) -> B (old) -> C (recent) -> D (recent)
|
||||
db.create_session(session_id="A", source="cli")
|
||||
db.end_session("A", end_reason="compressed")
|
||||
db.create_session(session_id="B", source="cli", parent_session_id="A")
|
||||
db.end_session("B", end_reason="compressed")
|
||||
db.create_session(session_id="C", source="cli", parent_session_id="B")
|
||||
db.end_session("C", end_reason="compressed")
|
||||
db.create_session(session_id="D", source="cli", parent_session_id="C")
|
||||
db.end_session("D", end_reason="done")
|
||||
|
||||
# Backdate A and B to be old; C and D stay recent
|
||||
for sid, ts in [("A", old_ts), ("B", old_ts), ("C", recent_ts), ("D", recent_ts)]:
|
||||
db._conn.execute(
|
||||
"UPDATE sessions SET started_at = ? WHERE id = ?", (ts, sid)
|
||||
)
|
||||
db._conn.commit()
|
||||
|
||||
# Should not raise IntegrityError
|
||||
pruned = db.prune_sessions(older_than_days=90)
|
||||
assert pruned == 2 # only A and B
|
||||
assert db.get_session("A") is None
|
||||
assert db.get_session("B") is None
|
||||
# C and D survive, C is orphaned (parent_session_id NULL)
|
||||
c = db.get_session("C")
|
||||
assert c is not None
|
||||
assert c["parent_session_id"] is None
|
||||
d = db.get_session("D")
|
||||
assert d is not None
|
||||
assert d["parent_session_id"] == "C"
|
||||
|
||||
def test_prune_entire_old_chain(self, db):
|
||||
"""All sessions in a chain are old — entire chain is pruned."""
|
||||
old_ts = time.time() - 200 * 86400
|
||||
|
||||
db.create_session(session_id="X", source="cli")
|
||||
db.end_session("X", end_reason="compressed")
|
||||
db.create_session(session_id="Y", source="cli", parent_session_id="X")
|
||||
db.end_session("Y", end_reason="compressed")
|
||||
db.create_session(session_id="Z", source="cli", parent_session_id="Y")
|
||||
db.end_session("Z", end_reason="done")
|
||||
|
||||
for sid in ("X", "Y", "Z"):
|
||||
db._conn.execute(
|
||||
"UPDATE sessions SET started_at = ? WHERE id = ?", (old_ts, sid)
|
||||
)
|
||||
db._conn.commit()
|
||||
|
||||
pruned = db.prune_sessions(older_than_days=90)
|
||||
assert pruned == 3
|
||||
for sid in ("X", "Y", "Z"):
|
||||
assert db.get_session(sid) is None
|
||||
|
||||
|
||||
class TestDeleteSessionOrphansChildren:
|
||||
def test_delete_orphans_children(self, db):
|
||||
"""Deleting a parent session orphans its children."""
|
||||
db.create_session(session_id="parent", source="cli")
|
||||
db.create_session(session_id="child", source="cli", parent_session_id="parent")
|
||||
db.create_session(session_id="grandchild", source="cli", parent_session_id="child")
|
||||
|
||||
# Should not raise IntegrityError
|
||||
result = db.delete_session("parent")
|
||||
assert result is True
|
||||
assert db.get_session("parent") is None
|
||||
# Child is orphaned, not deleted
|
||||
child = db.get_session("child")
|
||||
assert child is not None
|
||||
assert child["parent_session_id"] is None
|
||||
# Grandchild is untouched
|
||||
grandchild = db.get_session("grandchild")
|
||||
assert grandchild is not None
|
||||
assert grandchild["parent_session_id"] == "child"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Schema and WAL mode
|
||||
|
||||
@@ -0,0 +1,174 @@
|
||||
"""Tests for BaseEnvironment unified execution model.
|
||||
|
||||
Tests _wrap_command(), _extract_cwd_from_output(), _embed_stdin_heredoc(),
|
||||
init_session() failure handling, and the CWD marker contract.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from tools.environments.base import BaseEnvironment, _cwd_marker
|
||||
|
||||
|
||||
class _TestableEnv(BaseEnvironment):
|
||||
"""Concrete subclass for testing base class methods."""
|
||||
|
||||
def __init__(self, cwd="/tmp", timeout=10):
|
||||
super().__init__(cwd=cwd, timeout=timeout)
|
||||
|
||||
def _run_bash(self, cmd_string, *, login=False, timeout=120, stdin_data=None):
|
||||
raise NotImplementedError("Use mock")
|
||||
|
||||
def cleanup(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestWrapCommand:
|
||||
def test_basic_shape(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("echo hello", "/tmp")
|
||||
|
||||
assert "source" in wrapped
|
||||
assert "cd /tmp" in wrapped or "cd '/tmp'" in wrapped
|
||||
assert "eval 'echo hello'" in wrapped
|
||||
assert "__hermes_ec=$?" in wrapped
|
||||
assert "export -p >" in wrapped
|
||||
assert "pwd -P >" in wrapped
|
||||
assert env._cwd_marker in wrapped
|
||||
assert "exit $__hermes_ec" in wrapped
|
||||
|
||||
def test_no_snapshot_skips_source(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = False
|
||||
wrapped = env._wrap_command("echo hello", "/tmp")
|
||||
|
||||
assert "source" not in wrapped
|
||||
|
||||
def test_single_quote_escaping(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("echo 'hello world'", "/tmp")
|
||||
|
||||
assert "eval 'echo '\\''hello world'\\'''" in wrapped
|
||||
|
||||
def test_tilde_not_quoted(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("ls", "~")
|
||||
|
||||
assert "cd ~" in wrapped
|
||||
assert "cd '~'" not in wrapped
|
||||
|
||||
def test_cd_failure_exit_126(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("ls", "/nonexistent")
|
||||
|
||||
assert "exit 126" in wrapped
|
||||
|
||||
|
||||
class TestExtractCwdFromOutput:
|
||||
def test_happy_path(self):
|
||||
env = _TestableEnv()
|
||||
marker = env._cwd_marker
|
||||
result = {
|
||||
"output": f"hello\n{marker}/home/user{marker}\n",
|
||||
}
|
||||
env._extract_cwd_from_output(result)
|
||||
|
||||
assert env.cwd == "/home/user"
|
||||
assert marker not in result["output"]
|
||||
|
||||
def test_missing_marker(self):
|
||||
env = _TestableEnv()
|
||||
result = {"output": "hello world\n"}
|
||||
env._extract_cwd_from_output(result)
|
||||
|
||||
assert env.cwd == "/tmp" # unchanged
|
||||
|
||||
def test_marker_in_command_output(self):
|
||||
"""If the marker appears in command output AND as the real marker,
|
||||
rfind grabs the last (real) one."""
|
||||
env = _TestableEnv()
|
||||
marker = env._cwd_marker
|
||||
result = {
|
||||
"output": f"user typed {marker} in their output\nreal output\n{marker}/correct/path{marker}\n",
|
||||
}
|
||||
env._extract_cwd_from_output(result)
|
||||
|
||||
assert env.cwd == "/correct/path"
|
||||
|
||||
def test_output_cleaned(self):
|
||||
env = _TestableEnv()
|
||||
marker = env._cwd_marker
|
||||
result = {
|
||||
"output": f"hello\n{marker}/tmp{marker}\n",
|
||||
}
|
||||
env._extract_cwd_from_output(result)
|
||||
|
||||
assert "hello" in result["output"]
|
||||
assert marker not in result["output"]
|
||||
|
||||
|
||||
class TestEmbedStdinHeredoc:
|
||||
def test_heredoc_format(self):
|
||||
result = BaseEnvironment._embed_stdin_heredoc("cat", "hello world")
|
||||
|
||||
assert result.startswith("cat << '")
|
||||
assert "hello world" in result
|
||||
assert "HERMES_STDIN_" in result
|
||||
|
||||
def test_unique_delimiter_each_call(self):
|
||||
r1 = BaseEnvironment._embed_stdin_heredoc("cat", "data")
|
||||
r2 = BaseEnvironment._embed_stdin_heredoc("cat", "data")
|
||||
|
||||
# Extract delimiters
|
||||
d1 = r1.split("'")[1]
|
||||
d2 = r2.split("'")[1]
|
||||
assert d1 != d2 # UUID-based, should be unique
|
||||
|
||||
|
||||
class TestInitSessionFailure:
|
||||
def test_snapshot_ready_false_on_failure(self):
|
||||
env = _TestableEnv()
|
||||
|
||||
def failing_run_bash(*args, **kwargs):
|
||||
raise RuntimeError("bash not found")
|
||||
|
||||
env._run_bash = failing_run_bash
|
||||
env.init_session()
|
||||
|
||||
assert env._snapshot_ready is False
|
||||
|
||||
def test_login_flag_when_snapshot_not_ready(self):
|
||||
"""When _snapshot_ready=False, execute() should pass login=True to _run_bash."""
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = False
|
||||
|
||||
calls = []
|
||||
def mock_run_bash(cmd, *, login=False, timeout=120, stdin_data=None):
|
||||
calls.append({"login": login})
|
||||
# Return a mock process handle
|
||||
mock = MagicMock()
|
||||
mock.poll.return_value = 0
|
||||
mock.returncode = 0
|
||||
mock.stdout = iter([])
|
||||
return mock
|
||||
|
||||
env._run_bash = mock_run_bash
|
||||
env.execute("echo test")
|
||||
|
||||
assert len(calls) == 1
|
||||
assert calls[0]["login"] is True
|
||||
|
||||
|
||||
class TestCwdMarker:
|
||||
def test_marker_contains_session_id(self):
|
||||
env = _TestableEnv()
|
||||
assert env._session_id in env._cwd_marker
|
||||
|
||||
def test_unique_per_instance(self):
|
||||
env1 = _TestableEnv()
|
||||
env2 = _TestableEnv()
|
||||
assert env1._cwd_marker != env2._cwd_marker
|
||||
@@ -16,6 +16,7 @@ from tools.browser_camofox import (
|
||||
_managed_persistence_enabled,
|
||||
camofox_close,
|
||||
camofox_navigate,
|
||||
camofox_soft_cleanup,
|
||||
check_camofox_available,
|
||||
cleanup_all_camofox_sessions,
|
||||
get_vnc_url,
|
||||
@@ -240,3 +241,50 @@ class TestVncUrlDiscovery:
|
||||
|
||||
assert result["vnc_url"] == "http://localhost:6080"
|
||||
assert "vnc_hint" in result
|
||||
|
||||
|
||||
class TestCamofoxSoftCleanup:
|
||||
"""camofox_soft_cleanup drops local state only when managed persistence is on."""
|
||||
|
||||
def test_returns_true_and_drops_session_when_enabled(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
with _enable_persistence():
|
||||
_get_session("task-1")
|
||||
result = camofox_soft_cleanup("task-1")
|
||||
|
||||
assert result is True
|
||||
# Session should have been dropped from in-memory store
|
||||
import tools.browser_camofox as mod
|
||||
with mod._sessions_lock:
|
||||
assert "task-1" not in mod._sessions
|
||||
|
||||
def test_returns_false_when_disabled(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
_get_session("task-1")
|
||||
config = {"browser": {"camofox": {"managed_persistence": False}}}
|
||||
with patch("tools.browser_camofox.load_config", return_value=config):
|
||||
result = camofox_soft_cleanup("task-1")
|
||||
|
||||
assert result is False
|
||||
# Session should still be present — not dropped
|
||||
import tools.browser_camofox as mod
|
||||
with mod._sessions_lock:
|
||||
assert "task-1" in mod._sessions
|
||||
|
||||
def test_does_not_call_server_delete(self, tmp_path, monkeypatch):
|
||||
"""Soft cleanup must never hit the Camofox /sessions DELETE endpoint."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
with (
|
||||
_enable_persistence(),
|
||||
patch("tools.browser_camofox.requests.delete") as mock_delete,
|
||||
):
|
||||
_get_session("task-1")
|
||||
camofox_soft_cleanup("task-1")
|
||||
|
||||
mock_delete.assert_not_called()
|
||||
|
||||
@@ -63,4 +63,4 @@ class TestCamofoxConfigDefaults:
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
# managed_persistence is auto-merged by _deep_merge, no version bump needed
|
||||
assert DEFAULT_CONFIG["_config_version"] == 12
|
||||
assert DEFAULT_CONFIG["_config_version"] == 13
|
||||
|
||||
@@ -65,6 +65,62 @@ class TestBrowserCleanup:
|
||||
mock_stop.assert_called_once_with("task-1")
|
||||
mock_run.assert_called_once_with("task-1", "close", [], timeout=10)
|
||||
|
||||
def test_cleanup_camofox_managed_persistence_skips_close(self):
|
||||
"""When camofox mode + managed persistence, soft_cleanup fires instead of close."""
|
||||
browser_tool = self.browser_tool
|
||||
browser_tool._active_sessions["task-1"] = {
|
||||
"session_name": "sess-1",
|
||||
"bb_session_id": None,
|
||||
}
|
||||
browser_tool._session_last_activity["task-1"] = 123.0
|
||||
|
||||
with (
|
||||
patch("tools.browser_tool._is_camofox_mode", return_value=True),
|
||||
patch("tools.browser_tool._maybe_stop_recording") as mock_stop,
|
||||
patch(
|
||||
"tools.browser_tool._run_browser_command",
|
||||
return_value={"success": True},
|
||||
),
|
||||
patch("tools.browser_tool.os.path.exists", return_value=False),
|
||||
patch(
|
||||
"tools.browser_camofox.camofox_soft_cleanup",
|
||||
return_value=True,
|
||||
) as mock_soft,
|
||||
patch("tools.browser_camofox.camofox_close") as mock_close,
|
||||
):
|
||||
browser_tool.cleanup_browser("task-1")
|
||||
|
||||
mock_soft.assert_called_once_with("task-1")
|
||||
mock_close.assert_not_called()
|
||||
|
||||
def test_cleanup_camofox_no_persistence_calls_close(self):
|
||||
"""When camofox mode but managed persistence is off, camofox_close fires."""
|
||||
browser_tool = self.browser_tool
|
||||
browser_tool._active_sessions["task-1"] = {
|
||||
"session_name": "sess-1",
|
||||
"bb_session_id": None,
|
||||
}
|
||||
browser_tool._session_last_activity["task-1"] = 123.0
|
||||
|
||||
with (
|
||||
patch("tools.browser_tool._is_camofox_mode", return_value=True),
|
||||
patch("tools.browser_tool._maybe_stop_recording") as mock_stop,
|
||||
patch(
|
||||
"tools.browser_tool._run_browser_command",
|
||||
return_value={"success": True},
|
||||
),
|
||||
patch("tools.browser_tool.os.path.exists", return_value=False),
|
||||
patch(
|
||||
"tools.browser_camofox.camofox_soft_cleanup",
|
||||
return_value=False,
|
||||
) as mock_soft,
|
||||
patch("tools.browser_camofox.camofox_close") as mock_close,
|
||||
):
|
||||
browser_tool.cleanup_browser("task-1")
|
||||
|
||||
mock_soft.assert_called_once_with("task-1")
|
||||
mock_close.assert_called_once_with("task-1")
|
||||
|
||||
def test_emergency_cleanup_clears_all_tracking_state(self):
|
||||
browser_tool = self.browser_tool
|
||||
browser_tool._cleanup_done = False
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user