Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| add160fd1b | |||
| 51cf4e0bbc | |||
| 72bd14e09d | |||
| 2fe8fd8720 | |||
| ab35753c52 | |||
| 51bd4aecff | |||
| bead55bbcb | |||
| 5c491734f8 | |||
| 454edc7771 | |||
| 49d1390b40 | |||
| 7046d9b834 | |||
| acbbdc05d4 | |||
| 9cbfa1309b | |||
| 3dfce74099 | |||
| 431262f9a5 | |||
| ec43496d5a | |||
| ef8a985ee3 | |||
| 0482133559 | |||
| 637bbbee7e | |||
| cd814392ae | |||
| fe2e1c16c6 |
@@ -81,14 +81,6 @@
|
||||
# HF_TOKEN=
|
||||
# OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Qwen OAuth)
|
||||
# =============================================================================
|
||||
# Qwen OAuth reuses your local Qwen CLI login (qwen auth qwen-oauth).
|
||||
# No API key needed — credentials come from ~/.qwen/oauth_creds.json.
|
||||
# Optional base URL override:
|
||||
# HERMES_QWEN_BASE_URL=https://portal.qwen.ai/v1
|
||||
|
||||
# =============================================================================
|
||||
# TOOL API KEYS
|
||||
# =============================================================================
|
||||
|
||||
@@ -8,9 +8,6 @@ on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: docker-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -20,29 +17,22 @@ jobs:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
# Build amd64 only so we can `load` the image for smoke testing.
|
||||
# `load: true` cannot export a multi-arch manifest to the local daemon.
|
||||
# The multi-arch build follows on push to main / release.
|
||||
- name: Build image (amd64, smoke test)
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/amd64
|
||||
tags: nousresearch/hermes-agent:test
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
@@ -61,28 +51,26 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Push multi-arch image (main branch)
|
||||
- name: Push image (main branch)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
nousresearch/hermes-agent:latest
|
||||
nousresearch/hermes-agent:${{ github.sha }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Push multi-arch image (release)
|
||||
- name: Push image (release)
|
||||
if: github.event_name == 'release'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
nousresearch/hermes-agent:latest
|
||||
nousresearch/hermes-agent:${{ github.event.release.tag_name }}
|
||||
|
||||
@@ -27,8 +27,8 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install ascii-guard
|
||||
run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
|
||||
- name: Install Python dependencies
|
||||
run: python -m pip install ascii-guard pyyaml
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
@@ -27,8 +27,8 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
- uses: DeterminateSystems/nix-installer-action@main
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@main
|
||||
- name: Check flake
|
||||
if: runner.os == 'Linux'
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
@@ -19,9 +19,6 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
|
||||
|
||||
+2
-6
@@ -1,20 +1,16 @@
|
||||
FROM debian:13.4
|
||||
|
||||
# Disable Python stdout buffering to ensure logs are printed immediately
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev procps && \
|
||||
build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY . /opt/hermes
|
||||
WORKDIR /opt/hermes
|
||||
|
||||
# Install Python and Node dependencies in one layer, no cache
|
||||
RUN pip install --no-cache-dir uv --break-system-packages && \
|
||||
uv pip install --system --break-system-packages --no-cache -e ".[all]" && \
|
||||
RUN pip install --no-cache-dir -e ".[all]" --break-system-packages && \
|
||||
npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
cd /opt/hermes/scripts/whatsapp-bridge && \
|
||||
|
||||
@@ -33,10 +33,8 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
Works on Linux, macOS, WSL2, and Android via Termux. The installer handles the platform-specific setup for you.
|
||||
Works on Linux, macOS, and WSL2. The installer handles everything — Python, Node.js, dependencies, and the `hermes` command. No prerequisites except git.
|
||||
|
||||
> **Android / Termux:** The tested manual path is documented in the [Termux guide](https://hermes-agent.nousresearch.com/docs/getting-started/termux). On Termux, Hermes installs a curated `.[termux]` extra because the full `.[all]` extra currently pulls Android-incompatible voice dependencies.
|
||||
>
|
||||
> **Windows:** Native Windows is not supported. Please install [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install) and run the command above.
|
||||
|
||||
After installation:
|
||||
|
||||
@@ -1,346 +0,0 @@
|
||||
# Hermes Agent v0.8.0 (v2026.4.8)
|
||||
|
||||
**Release Date:** April 8, 2026
|
||||
|
||||
> The intelligence release — background task auto-notifications, free MiMo v2 Pro on Nous Portal, live model switching across all platforms, self-optimized GPT/Codex guidance, native Google AI Studio, smart inactivity timeouts, approval buttons, MCP OAuth 2.1, and 209 merged PRs with 82 resolved issues.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Background Process Auto-Notifications (`notify_on_complete`)** — Background tasks can now automatically notify the agent when they finish. Start a long-running process (AI model training, test suites, deployments, builds) and the agent gets notified on completion — no polling needed. The agent can keep working on other things and pick up results when they land. ([#5779](https://github.com/NousResearch/hermes-agent/pull/5779))
|
||||
|
||||
- **Free Xiaomi MiMo v2 Pro on Nous Portal** — Nous Portal now supports the free-tier Xiaomi MiMo v2 Pro model for auxiliary tasks (compression, vision, summarization), with free-tier model gating and pricing display in model selection. ([#6018](https://github.com/NousResearch/hermes-agent/pull/6018), [#5880](https://github.com/NousResearch/hermes-agent/pull/5880))
|
||||
|
||||
- **Live Model Switching (`/model` Command)** — Switch models and providers mid-session from CLI, Telegram, Discord, Slack, or any gateway platform. Aggregator-aware resolution keeps you on OpenRouter/Nous when possible, with automatic cross-provider fallback when needed. Interactive model pickers on Telegram and Discord with inline buttons. ([#5181](https://github.com/NousResearch/hermes-agent/pull/5181), [#5742](https://github.com/NousResearch/hermes-agent/pull/5742))
|
||||
|
||||
- **Self-Optimized GPT/Codex Tool-Use Guidance** — The agent diagnosed and patched 5 failure modes in GPT and Codex tool calling through automated behavioral benchmarking, dramatically improving reliability on OpenAI models. Includes execution discipline guidance and thinking-only prefill continuation for structured reasoning. ([#6120](https://github.com/NousResearch/hermes-agent/pull/6120), [#5414](https://github.com/NousResearch/hermes-agent/pull/5414), [#5931](https://github.com/NousResearch/hermes-agent/pull/5931))
|
||||
|
||||
- **Google AI Studio (Gemini) Native Provider** — Direct access to Gemini models through Google's AI Studio API. Includes automatic models.dev registry integration for real-time context length detection across any provider. ([#5577](https://github.com/NousResearch/hermes-agent/pull/5577))
|
||||
|
||||
- **Inactivity-Based Agent Timeouts** — Gateway and cron timeouts now track actual tool activity instead of wall-clock time. Long-running tasks that are actively working will never be killed — only truly idle agents time out. ([#5389](https://github.com/NousResearch/hermes-agent/pull/5389), [#5440](https://github.com/NousResearch/hermes-agent/pull/5440))
|
||||
|
||||
- **Approval Buttons on Slack & Telegram** — Dangerous command approval via native platform buttons instead of typing `/approve`. Slack gets thread context preservation; Telegram gets emoji reactions for approval status. ([#5890](https://github.com/NousResearch/hermes-agent/pull/5890), [#5975](https://github.com/NousResearch/hermes-agent/pull/5975))
|
||||
|
||||
- **MCP OAuth 2.1 PKCE + OSV Malware Scanning** — Full standards-compliant OAuth for MCP server authentication, plus automatic malware scanning of MCP extension packages via the OSV vulnerability database. ([#5420](https://github.com/NousResearch/hermes-agent/pull/5420), [#5305](https://github.com/NousResearch/hermes-agent/pull/5305))
|
||||
|
||||
- **Centralized Logging & Config Validation** — Structured logging to `~/.hermes/logs/` (agent.log + errors.log) with the `hermes logs` command for tailing and filtering. Config structure validation catches malformed YAML at startup before it causes cryptic failures. ([#5430](https://github.com/NousResearch/hermes-agent/pull/5430), [#5426](https://github.com/NousResearch/hermes-agent/pull/5426))
|
||||
|
||||
- **Plugin System Expansion** — Plugins can now register CLI subcommands, receive request-scoped API hooks with correlation IDs, prompt for required env vars during install, and hook into session lifecycle events (finalize/reset). ([#5295](https://github.com/NousResearch/hermes-agent/pull/5295), [#5427](https://github.com/NousResearch/hermes-agent/pull/5427), [#5470](https://github.com/NousResearch/hermes-agent/pull/5470), [#6129](https://github.com/NousResearch/hermes-agent/pull/6129))
|
||||
|
||||
- **Matrix Tier 1 & Platform Hardening** — Matrix gets reactions, read receipts, rich formatting, and room management. Discord adds channel controls and ignored channels. Signal gets full MEDIA: tag delivery. Mattermost gets file attachments. Comprehensive reliability fixes across all platforms. ([#5275](https://github.com/NousResearch/hermes-agent/pull/5275), [#5975](https://github.com/NousResearch/hermes-agent/pull/5975), [#5602](https://github.com/NousResearch/hermes-agent/pull/5602))
|
||||
|
||||
- **Security Hardening Pass** — Consolidated SSRF protections, timing attack mitigations, tar traversal prevention, credential leakage guards, cron path traversal hardening, and cross-session isolation. Terminal workdir sanitization across all backends. ([#5944](https://github.com/NousResearch/hermes-agent/pull/5944), [#5613](https://github.com/NousResearch/hermes-agent/pull/5613), [#5629](https://github.com/NousResearch/hermes-agent/pull/5629))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native Google AI Studio (Gemini) provider** with models.dev integration for automatic context length detection ([#5577](https://github.com/NousResearch/hermes-agent/pull/5577))
|
||||
- **`/model` command — full provider+model system overhaul** — live switching across CLI and all gateway platforms with aggregator-aware resolution ([#5181](https://github.com/NousResearch/hermes-agent/pull/5181))
|
||||
- **Interactive model picker for Telegram and Discord** — inline button-based model selection ([#5742](https://github.com/NousResearch/hermes-agent/pull/5742))
|
||||
- **Nous Portal free-tier model gating** with pricing display in model selection ([#5880](https://github.com/NousResearch/hermes-agent/pull/5880))
|
||||
- **Model pricing display** for OpenRouter and Nous Portal providers ([#5416](https://github.com/NousResearch/hermes-agent/pull/5416))
|
||||
- **xAI (Grok) prompt caching** via `x-grok-conv-id` header ([#5604](https://github.com/NousResearch/hermes-agent/pull/5604))
|
||||
- **Grok added to tool-use enforcement models** for direct xAI usage ([#5595](https://github.com/NousResearch/hermes-agent/pull/5595))
|
||||
- **MiniMax TTS provider** (speech-2.8) ([#4963](https://github.com/NousResearch/hermes-agent/pull/4963))
|
||||
- **Non-agentic model warning** — warns users when loading Hermes LLM models not designed for tool use ([#5378](https://github.com/NousResearch/hermes-agent/pull/5378))
|
||||
- **Ollama Cloud auth, /model switch persistence**, and alias tab completion ([#5269](https://github.com/NousResearch/hermes-agent/pull/5269))
|
||||
- **Preserve dots in OpenCode Go model names** (minimax-m2.7, glm-4.5, kimi-k2.5) ([#5597](https://github.com/NousResearch/hermes-agent/pull/5597))
|
||||
- **MiniMax models 404 fix** — strip /v1 from Anthropic base URL for OpenCode Go ([#4918](https://github.com/NousResearch/hermes-agent/pull/4918))
|
||||
- **Provider credential reset windows** honored in pooled failover ([#5188](https://github.com/NousResearch/hermes-agent/pull/5188))
|
||||
- **OAuth token sync** between credential pool and credentials file ([#4981](https://github.com/NousResearch/hermes-agent/pull/4981))
|
||||
- **Stale OAuth credentials** no longer block OpenRouter users on auto-detect ([#5746](https://github.com/NousResearch/hermes-agent/pull/5746))
|
||||
- **Codex OAuth credential pool disconnect** + expired token import fix ([#5681](https://github.com/NousResearch/hermes-agent/pull/5681))
|
||||
- **Codex pool entry sync** from `~/.codex/auth.json` on exhaustion — @GratefulDave ([#5610](https://github.com/NousResearch/hermes-agent/pull/5610))
|
||||
- **Auxiliary client payment fallback** — retry with next provider on 402 ([#5599](https://github.com/NousResearch/hermes-agent/pull/5599))
|
||||
- **Auxiliary client resolves named custom providers** and 'main' alias ([#5978](https://github.com/NousResearch/hermes-agent/pull/5978))
|
||||
- **Use mimo-v2-pro** for non-vision auxiliary tasks on Nous free tier ([#6018](https://github.com/NousResearch/hermes-agent/pull/6018))
|
||||
- **Vision auto-detection** tries main provider first ([#6041](https://github.com/NousResearch/hermes-agent/pull/6041))
|
||||
- **Provider re-ordering and Quick Install** — @austinpickett ([#4664](https://github.com/NousResearch/hermes-agent/pull/4664))
|
||||
- **Nous OAuth access_token** no longer used as inference API key — @SHL0MS ([#5564](https://github.com/NousResearch/hermes-agent/pull/5564))
|
||||
- **HERMES_PORTAL_BASE_URL env var** respected during Nous login — @benbarclay ([#5745](https://github.com/NousResearch/hermes-agent/pull/5745))
|
||||
- **Env var overrides** for Nous portal/inference URLs ([#5419](https://github.com/NousResearch/hermes-agent/pull/5419))
|
||||
- **Z.AI endpoint auto-detect** via probe and cache ([#5763](https://github.com/NousResearch/hermes-agent/pull/5763))
|
||||
- **MiniMax context lengths, model catalog, thinking guard, aux model, and config base_url** corrections ([#6082](https://github.com/NousResearch/hermes-agent/pull/6082))
|
||||
- **Community provider/model resolution fixes** — salvaged 4 community PRs + MiniMax aux URL ([#5983](https://github.com/NousResearch/hermes-agent/pull/5983))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **Self-optimized GPT/Codex tool-use guidance** via automated behavioral benchmarking — agent self-diagnosed and patched 5 failure modes ([#6120](https://github.com/NousResearch/hermes-agent/pull/6120))
|
||||
- **GPT/Codex execution discipline guidance** in system prompts ([#5414](https://github.com/NousResearch/hermes-agent/pull/5414))
|
||||
- **Thinking-only prefill continuation** for structured reasoning responses ([#5931](https://github.com/NousResearch/hermes-agent/pull/5931))
|
||||
- **Accept reasoning-only responses** without retries — set content to "(empty)" instead of infinite retry ([#5278](https://github.com/NousResearch/hermes-agent/pull/5278))
|
||||
- **Jittered retry backoff** — exponential backoff with jitter for API retries ([#6048](https://github.com/NousResearch/hermes-agent/pull/6048))
|
||||
- **Smart thinking block signature management** — preserve and manage Anthropic thinking signatures across turns ([#6112](https://github.com/NousResearch/hermes-agent/pull/6112))
|
||||
- **Coerce tool call arguments** to match JSON Schema types — fixes models that send strings instead of numbers/booleans ([#5265](https://github.com/NousResearch/hermes-agent/pull/5265))
|
||||
- **Save oversized tool results to file** instead of destructive truncation ([#5210](https://github.com/NousResearch/hermes-agent/pull/5210))
|
||||
- **Sandbox-aware tool result persistence** ([#6085](https://github.com/NousResearch/hermes-agent/pull/6085))
|
||||
- **Streaming fallback** improved after edit failures ([#6110](https://github.com/NousResearch/hermes-agent/pull/6110))
|
||||
- **Codex empty-output gaps** covered in fallback + normalizer + auxiliary client ([#5724](https://github.com/NousResearch/hermes-agent/pull/5724), [#5730](https://github.com/NousResearch/hermes-agent/pull/5730), [#5734](https://github.com/NousResearch/hermes-agent/pull/5734))
|
||||
- **Codex stream output backfill** from output_item.done events ([#5689](https://github.com/NousResearch/hermes-agent/pull/5689))
|
||||
- **Stream consumer creates new message** after tool boundaries ([#5739](https://github.com/NousResearch/hermes-agent/pull/5739))
|
||||
- **Codex validation aligned** with normalization for empty stream output ([#5940](https://github.com/NousResearch/hermes-agent/pull/5940))
|
||||
- **Bridge tool-calls** in copilot-acp adapter ([#5460](https://github.com/NousResearch/hermes-agent/pull/5460))
|
||||
- **Filter transcript-only roles** from chat-completions payload ([#4880](https://github.com/NousResearch/hermes-agent/pull/4880))
|
||||
- **Context compaction failures fixed** on temperature-restricted models — @MadKangYu ([#5608](https://github.com/NousResearch/hermes-agent/pull/5608))
|
||||
- **Sanitize tool_calls for all strict APIs** (Fireworks, Mistral, etc.) — @lumethegreat ([#5183](https://github.com/NousResearch/hermes-agent/pull/5183))
|
||||
|
||||
### Memory & Sessions
|
||||
- **Supermemory memory provider** — new memory plugin with multi-container, search_mode, identity template, and env var override ([#5737](https://github.com/NousResearch/hermes-agent/pull/5737), [#5933](https://github.com/NousResearch/hermes-agent/pull/5933))
|
||||
- **Shared thread sessions** by default — multi-user thread support across gateway platforms ([#5391](https://github.com/NousResearch/hermes-agent/pull/5391))
|
||||
- **Subagent sessions linked to parent** and hidden from session list ([#5309](https://github.com/NousResearch/hermes-agent/pull/5309))
|
||||
- **Profile-scoped memory isolation** and clone support ([#4845](https://github.com/NousResearch/hermes-agent/pull/4845))
|
||||
- **Thread gateway user_id to memory plugins** for per-user scoping ([#5895](https://github.com/NousResearch/hermes-agent/pull/5895))
|
||||
- **Honcho plugin drift overhaul** + plugin CLI registration system ([#5295](https://github.com/NousResearch/hermes-agent/pull/5295))
|
||||
- **Honcho holographic prompt and trust score** rendering preserved ([#4872](https://github.com/NousResearch/hermes-agent/pull/4872))
|
||||
- **Honcho doctor fix** — use recall_mode instead of memory_mode — @techguysimon ([#5645](https://github.com/NousResearch/hermes-agent/pull/5645))
|
||||
- **RetainDB** — API routes, write queue, dialectic, agent model, file tools fixes ([#5461](https://github.com/NousResearch/hermes-agent/pull/5461))
|
||||
- **Hindsight memory plugin overhaul** + memory setup wizard fixes ([#5094](https://github.com/NousResearch/hermes-agent/pull/5094))
|
||||
- **mem0 API v2 compat**, prefetch context fencing, secret redaction ([#5423](https://github.com/NousResearch/hermes-agent/pull/5423))
|
||||
- **mem0 env vars merged** with mem0.json instead of either/or ([#4939](https://github.com/NousResearch/hermes-agent/pull/4939))
|
||||
- **Clean user message** used for all memory provider operations ([#4940](https://github.com/NousResearch/hermes-agent/pull/4940))
|
||||
- **Silent memory flush failure** on /new and /resume fixed — @ryanautomated ([#5640](https://github.com/NousResearch/hermes-agent/pull/5640))
|
||||
- **OpenViking atexit safety net** for session commit ([#5664](https://github.com/NousResearch/hermes-agent/pull/5664))
|
||||
- **OpenViking tenant-scoping headers** for multi-tenant servers ([#4936](https://github.com/NousResearch/hermes-agent/pull/4936))
|
||||
- **ByteRover brv query** runs synchronously before LLM call ([#4831](https://github.com/NousResearch/hermes-agent/pull/4831))
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### Gateway Core
|
||||
- **Inactivity-based agent timeout** — replaces wall-clock timeout with smart activity tracking; long-running active tasks never killed ([#5389](https://github.com/NousResearch/hermes-agent/pull/5389))
|
||||
- **Approval buttons for Slack & Telegram** + Slack thread context preservation ([#5890](https://github.com/NousResearch/hermes-agent/pull/5890))
|
||||
- **Live-stream /update output** + forward interactive prompts to user ([#5180](https://github.com/NousResearch/hermes-agent/pull/5180))
|
||||
- **Infinite timeout support** + periodic notifications + actionable error messages ([#4959](https://github.com/NousResearch/hermes-agent/pull/4959))
|
||||
- **Duplicate message prevention** — gateway dedup + partial stream guard ([#4878](https://github.com/NousResearch/hermes-agent/pull/4878))
|
||||
- **Webhook delivery_info persistence** + full session id in /status ([#5942](https://github.com/NousResearch/hermes-agent/pull/5942))
|
||||
- **Tool preview truncation** respects tool_preview_length in all/new progress modes ([#5937](https://github.com/NousResearch/hermes-agent/pull/5937))
|
||||
- **Short preview truncation** restored for all/new tool progress modes ([#4935](https://github.com/NousResearch/hermes-agent/pull/4935))
|
||||
- **Update-pending state** written atomically to prevent corruption ([#4923](https://github.com/NousResearch/hermes-agent/pull/4923))
|
||||
- **Approval session key isolated** per turn ([#4884](https://github.com/NousResearch/hermes-agent/pull/4884))
|
||||
- **Active-session guard bypass** for /approve, /deny, /stop, /new ([#4926](https://github.com/NousResearch/hermes-agent/pull/4926), [#5765](https://github.com/NousResearch/hermes-agent/pull/5765))
|
||||
- **Typing indicator paused** during approval waits ([#5893](https://github.com/NousResearch/hermes-agent/pull/5893))
|
||||
- **Caption check** uses exact line-by-line match instead of substring (all platforms) ([#5939](https://github.com/NousResearch/hermes-agent/pull/5939))
|
||||
- **MEDIA: tags stripped** from streamed gateway messages ([#5152](https://github.com/NousResearch/hermes-agent/pull/5152))
|
||||
- **MEDIA: tags extracted** from cron delivery before sending ([#5598](https://github.com/NousResearch/hermes-agent/pull/5598))
|
||||
- **Profile-aware service units** + voice transcription cleanup ([#5972](https://github.com/NousResearch/hermes-agent/pull/5972))
|
||||
- **Thread-safe PairingStore** with atomic writes — @CharlieKerfoot ([#5656](https://github.com/NousResearch/hermes-agent/pull/5656))
|
||||
- **Sanitize media URLs** in base platform logs — @WAXLYY ([#5631](https://github.com/NousResearch/hermes-agent/pull/5631))
|
||||
- **Reduce Telegram fallback IP activation log noise** — @MadKangYu ([#5615](https://github.com/NousResearch/hermes-agent/pull/5615))
|
||||
- **Cron static method wrappers** to prevent self-binding ([#5299](https://github.com/NousResearch/hermes-agent/pull/5299))
|
||||
- **Stale 'hermes login' replaced** with 'hermes auth' + credential removal re-seeding fix ([#5670](https://github.com/NousResearch/hermes-agent/pull/5670))
|
||||
|
||||
### Telegram
|
||||
- **Group topics skill binding** for supergroup forum topics ([#4886](https://github.com/NousResearch/hermes-agent/pull/4886))
|
||||
- **Emoji reactions** for approval status and notifications ([#5975](https://github.com/NousResearch/hermes-agent/pull/5975))
|
||||
- **Duplicate message delivery prevented** on send timeout ([#5153](https://github.com/NousResearch/hermes-agent/pull/5153))
|
||||
- **Command names sanitized** to strip invalid characters ([#5596](https://github.com/NousResearch/hermes-agent/pull/5596))
|
||||
- **Per-platform disabled skills** respected in Telegram menu and gateway dispatch ([#4799](https://github.com/NousResearch/hermes-agent/pull/4799))
|
||||
- **/approve and /deny** routed through running-agent guard ([#4798](https://github.com/NousResearch/hermes-agent/pull/4798))
|
||||
|
||||
### Discord
|
||||
- **Channel controls** — ignored_channels and no_thread_channels config options ([#5975](https://github.com/NousResearch/hermes-agent/pull/5975))
|
||||
- **Skills registered as native slash commands** via shared gateway logic ([#5603](https://github.com/NousResearch/hermes-agent/pull/5603))
|
||||
- **/approve, /deny, /queue, /background, /btw** registered as native slash commands ([#4800](https://github.com/NousResearch/hermes-agent/pull/4800), [#5477](https://github.com/NousResearch/hermes-agent/pull/5477))
|
||||
- **Unnecessary members intent** removed on startup + token lock leak fix ([#5302](https://github.com/NousResearch/hermes-agent/pull/5302))
|
||||
|
||||
### Slack
|
||||
- **Thread engagement** — auto-respond in bot-started and mentioned threads ([#5897](https://github.com/NousResearch/hermes-agent/pull/5897))
|
||||
- **mrkdwn in edit_message** + thread replies without @mentions ([#5733](https://github.com/NousResearch/hermes-agent/pull/5733))
|
||||
|
||||
### Matrix
|
||||
- **Tier 1 feature parity** — reactions, read receipts, rich formatting, room management ([#5275](https://github.com/NousResearch/hermes-agent/pull/5275))
|
||||
- **MATRIX_REQUIRE_MENTION and MATRIX_AUTO_THREAD** support ([#5106](https://github.com/NousResearch/hermes-agent/pull/5106))
|
||||
- **Comprehensive reliability** — encrypted media, auth recovery, cron E2EE, Synapse compat ([#5271](https://github.com/NousResearch/hermes-agent/pull/5271))
|
||||
- **CJK input, E2EE, and reconnect** fixes ([#5665](https://github.com/NousResearch/hermes-agent/pull/5665))
|
||||
|
||||
### Signal
|
||||
- **Full MEDIA: tag delivery** — send_image_file, send_voice, and send_video implemented ([#5602](https://github.com/NousResearch/hermes-agent/pull/5602))
|
||||
|
||||
### Mattermost
|
||||
- **File attachments** — set message type to DOCUMENT when post has file attachments — @nericervin ([#5609](https://github.com/NousResearch/hermes-agent/pull/5609))
|
||||
|
||||
### Feishu
|
||||
- **Interactive card approval buttons** ([#6043](https://github.com/NousResearch/hermes-agent/pull/6043))
|
||||
- **Reconnect and ACL** fixes ([#5665](https://github.com/NousResearch/hermes-agent/pull/5665))
|
||||
|
||||
### Webhooks
|
||||
- **`{__raw__}` template token** and thread_id passthrough for forum topics ([#5662](https://github.com/NousResearch/hermes-agent/pull/5662))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ CLI & User Experience
|
||||
|
||||
### Interactive CLI
|
||||
- **Defer response content** until reasoning block completes ([#5773](https://github.com/NousResearch/hermes-agent/pull/5773))
|
||||
- **Ghost status-bar lines cleared** on terminal resize ([#4960](https://github.com/NousResearch/hermes-agent/pull/4960))
|
||||
- **Normalise \r\n and \r line endings** in pasted text ([#4849](https://github.com/NousResearch/hermes-agent/pull/4849))
|
||||
- **ChatConsole errors, curses scroll, skin-aware banner, git state** banner fixes ([#5974](https://github.com/NousResearch/hermes-agent/pull/5974))
|
||||
- **Native Windows image paste** support ([#5917](https://github.com/NousResearch/hermes-agent/pull/5917))
|
||||
- **--yolo and other flags** no longer silently dropped when placed before 'chat' subcommand ([#5145](https://github.com/NousResearch/hermes-agent/pull/5145))
|
||||
|
||||
### Setup & Configuration
|
||||
- **Config structure validation** — detect malformed YAML at startup with actionable error messages ([#5426](https://github.com/NousResearch/hermes-agent/pull/5426))
|
||||
- **Centralized logging** to `~/.hermes/logs/` — agent.log (INFO+), errors.log (WARNING+) with `hermes logs` command ([#5430](https://github.com/NousResearch/hermes-agent/pull/5430))
|
||||
- **Docs links added** to setup wizard sections ([#5283](https://github.com/NousResearch/hermes-agent/pull/5283))
|
||||
- **Doctor diagnostics** — sync provider checks, config migration, WAL and mem0 diagnostics ([#5077](https://github.com/NousResearch/hermes-agent/pull/5077))
|
||||
- **Timeout debug logging** and user-facing diagnostics improved ([#5370](https://github.com/NousResearch/hermes-agent/pull/5370))
|
||||
- **Reasoning effort unified** to config.yaml only ([#6118](https://github.com/NousResearch/hermes-agent/pull/6118))
|
||||
- **Permanent command allowlist** loaded on startup ([#5076](https://github.com/NousResearch/hermes-agent/pull/5076))
|
||||
- **`hermes auth remove`** now clears env-seeded credentials permanently ([#5285](https://github.com/NousResearch/hermes-agent/pull/5285))
|
||||
- **Bundled skills synced to all profiles** during update ([#5795](https://github.com/NousResearch/hermes-agent/pull/5795))
|
||||
- **`hermes update` no longer kills** freshly-restarted gateway service ([#5448](https://github.com/NousResearch/hermes-agent/pull/5448))
|
||||
- **Subprocess.run() timeouts** added to all gateway CLI commands ([#5424](https://github.com/NousResearch/hermes-agent/pull/5424))
|
||||
- **Actionable error message** when Codex refresh token is reused — @tymrtn ([#5612](https://github.com/NousResearch/hermes-agent/pull/5612))
|
||||
- **Google-workspace skill scripts** can now run directly — @xinbenlv ([#5624](https://github.com/NousResearch/hermes-agent/pull/5624))
|
||||
|
||||
### Cron System
|
||||
- **Inactivity-based cron timeout** — replaces wall-clock; active tasks run indefinitely ([#5440](https://github.com/NousResearch/hermes-agent/pull/5440))
|
||||
- **Pre-run script injection** for data collection and change detection ([#5082](https://github.com/NousResearch/hermes-agent/pull/5082))
|
||||
- **Delivery failure tracking** in job status ([#6042](https://github.com/NousResearch/hermes-agent/pull/6042))
|
||||
- **Delivery guidance** in cron prompts — stops send_message thrashing ([#5444](https://github.com/NousResearch/hermes-agent/pull/5444))
|
||||
- **MEDIA files delivered** as native platform attachments ([#5921](https://github.com/NousResearch/hermes-agent/pull/5921))
|
||||
- **[SILENT] suppression** works anywhere in response — @auspic7 ([#5654](https://github.com/NousResearch/hermes-agent/pull/5654))
|
||||
- **Cron path traversal** hardening ([#5147](https://github.com/NousResearch/hermes-agent/pull/5147))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Terminal & Execution
|
||||
- **Execute_code on remote backends** — code execution now works on Docker, SSH, Modal, and other remote terminal backends ([#5088](https://github.com/NousResearch/hermes-agent/pull/5088))
|
||||
- **Exit code context** for common CLI tools in terminal results — helps agent understand what went wrong ([#5144](https://github.com/NousResearch/hermes-agent/pull/5144))
|
||||
- **Progressive subdirectory hint discovery** — agent learns project structure as it navigates ([#5291](https://github.com/NousResearch/hermes-agent/pull/5291))
|
||||
- **notify_on_complete for background processes** — get notified when long-running tasks finish ([#5779](https://github.com/NousResearch/hermes-agent/pull/5779))
|
||||
- **Docker env config** — explicit container environment variables via docker_env config ([#4738](https://github.com/NousResearch/hermes-agent/pull/4738))
|
||||
- **Approval metadata included** in terminal tool results ([#5141](https://github.com/NousResearch/hermes-agent/pull/5141))
|
||||
- **Workdir parameter sanitized** in terminal tool across all backends ([#5629](https://github.com/NousResearch/hermes-agent/pull/5629))
|
||||
- **Detached process crash recovery** state corrected ([#6101](https://github.com/NousResearch/hermes-agent/pull/6101))
|
||||
- **Agent-browser paths with spaces** preserved — @Vasanthdev2004 ([#6077](https://github.com/NousResearch/hermes-agent/pull/6077))
|
||||
- **Portable base64 encoding** for image reading on macOS — @CharlieKerfoot ([#5657](https://github.com/NousResearch/hermes-agent/pull/5657))
|
||||
|
||||
### Browser
|
||||
- **Switch managed browser provider** from Browserbase to Browser Use — @benbarclay ([#5750](https://github.com/NousResearch/hermes-agent/pull/5750))
|
||||
- **Firecrawl cloud browser** provider — @alt-glitch ([#5628](https://github.com/NousResearch/hermes-agent/pull/5628))
|
||||
- **JS evaluation** via browser_console expression parameter ([#5303](https://github.com/NousResearch/hermes-agent/pull/5303))
|
||||
- **Windows browser** fixes ([#5665](https://github.com/NousResearch/hermes-agent/pull/5665))
|
||||
|
||||
### MCP
|
||||
- **MCP OAuth 2.1 PKCE** — full standards-compliant OAuth client support ([#5420](https://github.com/NousResearch/hermes-agent/pull/5420))
|
||||
- **OSV malware check** for MCP extension packages ([#5305](https://github.com/NousResearch/hermes-agent/pull/5305))
|
||||
- **Prefer structuredContent over text** + no_mcp sentinel ([#5979](https://github.com/NousResearch/hermes-agent/pull/5979))
|
||||
- **Unknown toolsets warning suppressed** for MCP server names ([#5279](https://github.com/NousResearch/hermes-agent/pull/5279))
|
||||
|
||||
### Web & Files
|
||||
- **.zip document support** + auto-mount cache dirs into remote backends ([#4846](https://github.com/NousResearch/hermes-agent/pull/4846))
|
||||
- **Redact query secrets** in send_message errors — @WAXLYY ([#5650](https://github.com/NousResearch/hermes-agent/pull/5650))
|
||||
|
||||
### Delegation
|
||||
- **Credential pool sharing** + workspace path hints for subagents ([#5748](https://github.com/NousResearch/hermes-agent/pull/5748))
|
||||
|
||||
### ACP (VS Code / Zed / JetBrains)
|
||||
- **Aggregate ACP improvements** — auth compat, protocol fixes, command ads, delegation, SSE events ([#5292](https://github.com/NousResearch/hermes-agent/pull/5292))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skills System
|
||||
- **Skill config interface** — skills can declare required config.yaml settings, prompted during setup, injected at load time ([#5635](https://github.com/NousResearch/hermes-agent/pull/5635))
|
||||
- **Plugin CLI registration system** — plugins register their own CLI subcommands without touching main.py ([#5295](https://github.com/NousResearch/hermes-agent/pull/5295))
|
||||
- **Request-scoped API hooks** with tool call correlation IDs for plugins ([#5427](https://github.com/NousResearch/hermes-agent/pull/5427))
|
||||
- **Session lifecycle hooks** — on_session_finalize and on_session_reset for CLI + gateway ([#6129](https://github.com/NousResearch/hermes-agent/pull/6129))
|
||||
- **Prompt for required env vars** during plugin install — @kshitijk4poor ([#5470](https://github.com/NousResearch/hermes-agent/pull/5470))
|
||||
- **Plugin name validation** — reject names that resolve to plugins root ([#5368](https://github.com/NousResearch/hermes-agent/pull/5368))
|
||||
- **pre_llm_call plugin context** moved to user message to preserve prompt cache ([#5146](https://github.com/NousResearch/hermes-agent/pull/5146))
|
||||
|
||||
### New & Updated Skills
|
||||
- **popular-web-designs** — 54 production website design systems ([#5194](https://github.com/NousResearch/hermes-agent/pull/5194))
|
||||
- **p5js creative coding** — @SHL0MS ([#5600](https://github.com/NousResearch/hermes-agent/pull/5600))
|
||||
- **manim-video** — mathematical and technical animations — @SHL0MS ([#4930](https://github.com/NousResearch/hermes-agent/pull/4930))
|
||||
- **llm-wiki** — Karpathy's LLM Wiki skill ([#5635](https://github.com/NousResearch/hermes-agent/pull/5635))
|
||||
- **gitnexus-explorer** — codebase indexing and knowledge serving ([#5208](https://github.com/NousResearch/hermes-agent/pull/5208))
|
||||
- **research-paper-writing** — AI-Scientist & GPT-Researcher patterns — @SHL0MS ([#5421](https://github.com/NousResearch/hermes-agent/pull/5421))
|
||||
- **blogwatcher** updated to JulienTant's fork ([#5759](https://github.com/NousResearch/hermes-agent/pull/5759))
|
||||
- **claude-code skill** comprehensive rewrite v2.0 + v2.2 ([#5155](https://github.com/NousResearch/hermes-agent/pull/5155), [#5158](https://github.com/NousResearch/hermes-agent/pull/5158))
|
||||
- **Code verification skills** consolidated into one ([#4854](https://github.com/NousResearch/hermes-agent/pull/4854))
|
||||
- **Manim CE reference docs** expanded — geometry, animations, LaTeX — @leotrs ([#5791](https://github.com/NousResearch/hermes-agent/pull/5791))
|
||||
- **Manim-video references** — design thinking, updaters, paper explainer, decorations, production quality — @SHL0MS ([#5588](https://github.com/NousResearch/hermes-agent/pull/5588), [#5408](https://github.com/NousResearch/hermes-agent/pull/5408))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
### Security Hardening
|
||||
- **Consolidated security** — SSRF protections, timing attack mitigations, tar traversal prevention, credential leakage guards ([#5944](https://github.com/NousResearch/hermes-agent/pull/5944))
|
||||
- **Cross-session isolation** + cron path traversal hardening ([#5613](https://github.com/NousResearch/hermes-agent/pull/5613))
|
||||
- **Workdir parameter sanitized** in terminal tool across all backends ([#5629](https://github.com/NousResearch/hermes-agent/pull/5629))
|
||||
- **Approval 'once' session escalation** prevented + cron delivery platform validation ([#5280](https://github.com/NousResearch/hermes-agent/pull/5280))
|
||||
- **Profile-scoped Google Workspace OAuth tokens** protected ([#4910](https://github.com/NousResearch/hermes-agent/pull/4910))
|
||||
|
||||
### Reliability
|
||||
- **Aggressive worktree and branch cleanup** to prevent accumulation ([#6134](https://github.com/NousResearch/hermes-agent/pull/6134))
|
||||
- **O(n²) catastrophic backtracking** in redact regex fixed — 100x improvement on large outputs ([#4962](https://github.com/NousResearch/hermes-agent/pull/4962))
|
||||
- **Runtime stability fixes** across core, web, delegate, and browser tools ([#4843](https://github.com/NousResearch/hermes-agent/pull/4843))
|
||||
- **API server streaming fix** + conversation history support ([#5977](https://github.com/NousResearch/hermes-agent/pull/5977))
|
||||
- **OpenViking API endpoint paths** and response parsing corrected ([#5078](https://github.com/NousResearch/hermes-agent/pull/5078))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
- **9 community bugfixes salvaged** — gateway, cron, deps, macOS launchd in one batch ([#5288](https://github.com/NousResearch/hermes-agent/pull/5288))
|
||||
- **Batch core bug fixes** — model config, session reset, alias fallback, launchctl, delegation, atomic writes ([#5630](https://github.com/NousResearch/hermes-agent/pull/5630))
|
||||
- **Batch gateway/platform fixes** — matrix E2EE, CJK input, Windows browser, Feishu reconnect + ACL ([#5665](https://github.com/NousResearch/hermes-agent/pull/5665))
|
||||
- **Stale test skips removed**, regex backtracking, file search bug, and test flakiness ([#4969](https://github.com/NousResearch/hermes-agent/pull/4969))
|
||||
- **Nix flake** — read version, regen uv.lock, add hermes_logging — @alt-glitch ([#5651](https://github.com/NousResearch/hermes-agent/pull/5651))
|
||||
- **Lowercase variable redaction** regression tests ([#5185](https://github.com/NousResearch/hermes-agent/pull/5185))
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
- **57 failing CI tests repaired** across 14 files ([#5823](https://github.com/NousResearch/hermes-agent/pull/5823))
|
||||
- **Test suite re-architecture** + CI failure fixes — @alt-glitch ([#5946](https://github.com/NousResearch/hermes-agent/pull/5946))
|
||||
- **Codebase-wide lint cleanup** — unused imports, dead code, and inefficient patterns ([#5821](https://github.com/NousResearch/hermes-agent/pull/5821))
|
||||
- **browser_close tool removed** — auto-cleanup handles it ([#5792](https://github.com/NousResearch/hermes-agent/pull/5792))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- **Comprehensive documentation audit** — fix stale info, expand thin pages, add depth ([#5393](https://github.com/NousResearch/hermes-agent/pull/5393))
|
||||
- **40+ discrepancies fixed** between documentation and codebase ([#5818](https://github.com/NousResearch/hermes-agent/pull/5818))
|
||||
- **13 features documented** from last week's PRs ([#5815](https://github.com/NousResearch/hermes-agent/pull/5815))
|
||||
- **Guides section overhaul** — fix existing + add 3 new tutorials ([#5735](https://github.com/NousResearch/hermes-agent/pull/5735))
|
||||
- **Salvaged 4 docs PRs** — docker setup, post-update validation, local LLM guide, signal-cli install ([#5727](https://github.com/NousResearch/hermes-agent/pull/5727))
|
||||
- **Discord configuration reference** ([#5386](https://github.com/NousResearch/hermes-agent/pull/5386))
|
||||
- **Community FAQ entries** for common workflows and troubleshooting ([#4797](https://github.com/NousResearch/hermes-agent/pull/4797))
|
||||
- **WSL2 networking guide** for local model servers ([#5616](https://github.com/NousResearch/hermes-agent/pull/5616))
|
||||
- **Honcho CLI reference** + plugin CLI registration docs ([#5308](https://github.com/NousResearch/hermes-agent/pull/5308))
|
||||
- **Obsidian Headless setup** for servers in llm-wiki ([#5660](https://github.com/NousResearch/hermes-agent/pull/5660))
|
||||
- **Hermes Mod visual skin editor** added to skins page ([#6095](https://github.com/NousResearch/hermes-agent/pull/6095))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** — 179 PRs
|
||||
|
||||
### Top Community Contributors
|
||||
- **@SHL0MS** (7 PRs) — p5js creative coding skill, manim-video skill + 5 reference expansions, research-paper-writing, Nous OAuth fix, manim font fix
|
||||
- **@alt-glitch** (3 PRs) — Firecrawl cloud browser provider, test re-architecture + CI fixes, Nix flake fixes
|
||||
- **@benbarclay** (2 PRs) — Browser Use managed provider switch, Nous portal base URL fix
|
||||
- **@CharlieKerfoot** (2 PRs) — macOS portable base64 encoding, thread-safe PairingStore
|
||||
- **@WAXLYY** (2 PRs) — send_message secret redaction, gateway media URL sanitization
|
||||
- **@MadKangYu** (2 PRs) — Telegram log noise reduction, context compaction fix for temperature-restricted models
|
||||
|
||||
### All Contributors
|
||||
@alt-glitch, @austinpickett, @auspic7, @benbarclay, @CharlieKerfoot, @GratefulDave, @kshitijk4poor, @leotrs, @lumethegreat, @MadKangYu, @nericervin, @ryanautomated, @SHL0MS, @techguysimon, @tymrtn, @Vasanthdev2004, @WAXLYY, @xinbenlv
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.3...v2026.4.8](https://github.com/NousResearch/hermes-agent/compare/v2026.4.3...v2026.4.8)
|
||||
@@ -15,6 +15,7 @@ Usage::
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
@@ -36,7 +36,6 @@ from acp.schema import (
|
||||
SessionCapabilities,
|
||||
SessionForkCapabilities,
|
||||
SessionListCapabilities,
|
||||
SessionResumeCapabilities,
|
||||
SessionInfo,
|
||||
TextContentBlock,
|
||||
UnstructuredCommandInput,
|
||||
@@ -246,11 +245,9 @@ class HermesACPAgent(acp.Agent):
|
||||
protocol_version=acp.PROTOCOL_VERSION,
|
||||
agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
|
||||
agent_capabilities=AgentCapabilities(
|
||||
load_session=True,
|
||||
session_capabilities=SessionCapabilities(
|
||||
fork=SessionForkCapabilities(),
|
||||
list=SessionListCapabilities(),
|
||||
resume=SessionResumeCapabilities(),
|
||||
),
|
||||
),
|
||||
auth_methods=auth_methods,
|
||||
@@ -454,13 +451,14 @@ class HermesACPAgent(acp.Agent):
|
||||
await conn.session_update(session_id, update)
|
||||
|
||||
usage = None
|
||||
if any(result.get(key) is not None for key in ("prompt_tokens", "completion_tokens", "total_tokens")):
|
||||
usage_data = result.get("usage")
|
||||
if usage_data and isinstance(usage_data, dict):
|
||||
usage = Usage(
|
||||
input_tokens=result.get("prompt_tokens", 0),
|
||||
output_tokens=result.get("completion_tokens", 0),
|
||||
total_tokens=result.get("total_tokens", 0),
|
||||
thought_tokens=result.get("reasoning_tokens"),
|
||||
cached_read_tokens=result.get("cache_read_tokens"),
|
||||
input_tokens=usage_data.get("prompt_tokens", 0),
|
||||
output_tokens=usage_data.get("completion_tokens", 0),
|
||||
total_tokens=usage_data.get("total_tokens", 0),
|
||||
thought_tokens=usage_data.get("reasoning_tokens"),
|
||||
cached_read_tokens=usage_data.get("cached_tokens"),
|
||||
)
|
||||
|
||||
stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
|
||||
|
||||
@@ -262,6 +262,8 @@ class SessionManager:
|
||||
if self._db_instance is not None:
|
||||
return self._db_instance
|
||||
try:
|
||||
import os
|
||||
from pathlib import Path
|
||||
from hermes_state import SessionDB
|
||||
hermes_home = get_hermes_home()
|
||||
self._db_instance = SessionDB(db_path=hermes_home / "state.db")
|
||||
|
||||
@@ -39,6 +39,7 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
|
||||
"browser_scroll": "execute",
|
||||
"browser_press": "execute",
|
||||
"browser_back": "execute",
|
||||
"browser_close": "execute",
|
||||
"browser_get_images": "read",
|
||||
# Agent internals
|
||||
"delegate_task": "execute",
|
||||
|
||||
+196
-216
@@ -60,8 +60,6 @@ _ANTHROPIC_OUTPUT_LIMITS = {
|
||||
"claude-3-opus": 4_096,
|
||||
"claude-3-sonnet": 4_096,
|
||||
"claude-3-haiku": 4_096,
|
||||
# Third-party Anthropic-compatible providers
|
||||
"minimax": 131_072,
|
||||
}
|
||||
|
||||
# For any model not in the table, assume the highest current limit.
|
||||
@@ -76,11 +74,8 @@ def _get_anthropic_max_output(model: str) -> int:
|
||||
model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
|
||||
resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5"
|
||||
matching before "claude-3-5-sonnet".
|
||||
|
||||
Normalizes dots to hyphens so that model names like
|
||||
``anthropic/claude-opus-4.6`` match the ``claude-opus-4-6`` table key.
|
||||
"""
|
||||
m = model.lower().replace(".", "-")
|
||||
m = model.lower()
|
||||
best_key = ""
|
||||
best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
|
||||
for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
|
||||
@@ -100,15 +95,6 @@ _COMMON_BETAS = [
|
||||
"interleaved-thinking-2025-05-14",
|
||||
"fine-grained-tool-streaming-2025-05-14",
|
||||
]
|
||||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||||
# fall back to the provider's default response path.
|
||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||
|
||||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||||
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||||
_FAST_MODE_BETA = "fast-mode-2026-02-01"
|
||||
|
||||
# Additional beta headers required for OAuth/subscription auth.
|
||||
# Matches what Claude Code (and pi-ai / OpenCode) send.
|
||||
@@ -163,38 +149,18 @@ def _get_claude_code_version() -> str:
|
||||
|
||||
|
||||
def _is_oauth_token(key: str) -> bool:
|
||||
"""Check if the key is an Anthropic OAuth/setup token.
|
||||
"""Check if the key is an OAuth/setup token (not a regular Console API key).
|
||||
|
||||
Positively identifies Anthropic OAuth tokens by their key format:
|
||||
- ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
|
||||
- ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
|
||||
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
|
||||
and correctly return False.
|
||||
Regular API keys start with 'sk-ant-api'. Everything else (setup-tokens
|
||||
starting with 'sk-ant-oat', managed keys, JWTs, etc.) needs Bearer auth.
|
||||
"""
|
||||
if not key:
|
||||
return False
|
||||
# Regular Anthropic Console API keys — x-api-key auth, never OAuth
|
||||
# Regular Console API keys use x-api-key header
|
||||
if key.startswith("sk-ant-api"):
|
||||
return False
|
||||
# Anthropic-issued tokens (setup-tokens sk-ant-oat-*, managed keys)
|
||||
if key.startswith("sk-ant-"):
|
||||
return True
|
||||
# JWTs from Anthropic OAuth flow
|
||||
if key.startswith("eyJ"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _normalize_base_url_text(base_url) -> str:
|
||||
"""Normalize SDK/base transport URL values to a plain string for inspection.
|
||||
|
||||
Some client objects expose ``base_url`` as an ``httpx.URL`` instead of a raw
|
||||
string. Provider/auth detection should accept either shape.
|
||||
"""
|
||||
if not base_url:
|
||||
return ""
|
||||
return str(base_url).strip()
|
||||
# Everything else (setup-tokens, managed keys, JWTs) uses Bearer auth
|
||||
return True
|
||||
|
||||
|
||||
def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
@@ -204,10 +170,9 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
|
||||
detection should be skipped for these endpoints.
|
||||
"""
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
if not base_url:
|
||||
return False # No base_url = direct Anthropic API
|
||||
normalized = normalized.rstrip("/").lower()
|
||||
normalized = base_url.rstrip("/").lower()
|
||||
if "anthropic.com" in normalized:
|
||||
return False # Direct Anthropic API — OAuth applies
|
||||
return True # Any other endpoint is a third-party proxy
|
||||
@@ -217,27 +182,15 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
"""Return True for Anthropic-compatible providers that require Bearer auth.
|
||||
|
||||
Some third-party /anthropic endpoints implement Anthropic's Messages API but
|
||||
require Authorization: Bearer *** of Anthropic's native x-api-key header.
|
||||
require Authorization: Bearer instead of Anthropic's native x-api-key header.
|
||||
MiniMax's global and China Anthropic-compatible endpoints follow this pattern.
|
||||
"""
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
if not base_url:
|
||||
return False
|
||||
normalized = normalized.rstrip("/").lower()
|
||||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||
|
||||
|
||||
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
"""Return the beta headers that are safe for the configured endpoint.
|
||||
|
||||
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
||||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||||
tool-use message triggers a connection error. Strip that beta for
|
||||
Bearer-auth endpoints while keeping all other betas intact.
|
||||
"""
|
||||
if _requires_bearer_auth(base_url):
|
||||
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
|
||||
return _COMMON_BETAS
|
||||
normalized = base_url.rstrip("/").lower()
|
||||
return normalized.startswith("https://api.minimax.io/anthropic") or normalized.startswith(
|
||||
"https://api.minimaxi.com/anthropic"
|
||||
)
|
||||
|
||||
|
||||
def build_anthropic_client(api_key: str, base_url: str = None):
|
||||
@@ -252,15 +205,13 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
||||
)
|
||||
from httpx import Timeout
|
||||
|
||||
normalized_base_url = _normalize_base_url_text(base_url)
|
||||
kwargs = {
|
||||
"timeout": Timeout(timeout=900.0, connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
if base_url:
|
||||
kwargs["base_url"] = base_url
|
||||
|
||||
if _requires_bearer_auth(normalized_base_url):
|
||||
if _requires_bearer_auth(base_url):
|
||||
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
|
||||
# Authorization: Bearer even for regular API keys. Route those endpoints
|
||||
# through auth_token so the SDK sends Bearer auth instead of x-api-key.
|
||||
@@ -268,21 +219,21 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
||||
# not use Anthropic's sk-ant-api prefix and would otherwise be misread as
|
||||
# Anthropic OAuth/setup tokens.
|
||||
kwargs["auth_token"] = api_key
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
if _COMMON_BETAS:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
|
||||
elif _is_third_party_anthropic_endpoint(base_url):
|
||||
# Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
|
||||
# own API keys with x-api-key auth. Skip OAuth detection — their keys
|
||||
# don't follow Anthropic's sk-ant-* prefix convention and would be
|
||||
# misclassified as OAuth tokens.
|
||||
kwargs["api_key"] = api_key
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
if _COMMON_BETAS:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
|
||||
elif _is_oauth_token(api_key):
|
||||
# OAuth access token / setup-token → Bearer auth + Claude Code identity.
|
||||
# Anthropic routes OAuth requests based on user-agent and headers;
|
||||
# without Claude Code's fingerprint, requests get intermittent 500s.
|
||||
all_betas = common_betas + _OAUTH_ONLY_BETAS
|
||||
all_betas = _COMMON_BETAS + _OAUTH_ONLY_BETAS
|
||||
kwargs["auth_token"] = api_key
|
||||
kwargs["default_headers"] = {
|
||||
"anthropic-beta": ",".join(all_betas),
|
||||
@@ -292,8 +243,8 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
||||
else:
|
||||
# Regular API key → x-api-key header + common betas
|
||||
kwargs["api_key"] = api_key
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
if _COMMON_BETAS:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
|
||||
|
||||
return _anthropic_sdk.Anthropic(**kwargs)
|
||||
|
||||
@@ -522,6 +473,35 @@ def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[s
|
||||
return None
|
||||
|
||||
|
||||
def get_anthropic_token_source(token: Optional[str] = None) -> str:
|
||||
"""Best-effort source classification for an Anthropic credential token."""
|
||||
token = (token or "").strip()
|
||||
if not token:
|
||||
return "none"
|
||||
|
||||
env_token = os.getenv("ANTHROPIC_TOKEN", "").strip()
|
||||
if env_token and env_token == token:
|
||||
return "anthropic_token_env"
|
||||
|
||||
cc_env_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
|
||||
if cc_env_token and cc_env_token == token:
|
||||
return "claude_code_oauth_token_env"
|
||||
|
||||
creds = read_claude_code_credentials()
|
||||
if creds and creds.get("accessToken") == token:
|
||||
return str(creds.get("source") or "claude_code_credentials")
|
||||
|
||||
managed_key = read_claude_managed_key()
|
||||
if managed_key and managed_key == token:
|
||||
return "claude_json_primary_api_key"
|
||||
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
if api_key and api_key == token:
|
||||
return "anthropic_api_key_env"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def resolve_anthropic_token() -> Optional[str]:
|
||||
"""Resolve an Anthropic token from all available sources.
|
||||
|
||||
@@ -728,6 +708,44 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
||||
}
|
||||
|
||||
|
||||
def run_hermes_oauth_login() -> Optional[str]:
|
||||
"""Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription.
|
||||
|
||||
Opens a browser to claude.ai for authorization, prompts for the code,
|
||||
exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json.
|
||||
|
||||
Returns the access token on success, None on failure.
|
||||
"""
|
||||
result = run_hermes_oauth_login_pure()
|
||||
if not result:
|
||||
return None
|
||||
|
||||
access_token = result["access_token"]
|
||||
refresh_token = result["refresh_token"]
|
||||
expires_at_ms = result["expires_at_ms"]
|
||||
|
||||
_save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms)
|
||||
_write_claude_code_credentials(access_token, refresh_token, expires_at_ms)
|
||||
|
||||
print("Authentication successful!")
|
||||
return access_token
|
||||
|
||||
|
||||
def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
|
||||
"""Save OAuth credentials to ~/.hermes/.anthropic_oauth.json."""
|
||||
data = {
|
||||
"accessToken": access_token,
|
||||
"refreshToken": refresh_token,
|
||||
"expiresAt": expires_at_ms,
|
||||
}
|
||||
try:
|
||||
_HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
_HERMES_OAUTH_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
_HERMES_OAUTH_FILE.chmod(0o600)
|
||||
except (OSError, IOError) as e:
|
||||
logger.debug("Failed to save Hermes OAuth credentials: %s", e)
|
||||
|
||||
|
||||
def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||||
"""Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json."""
|
||||
if _HERMES_OAUTH_FILE.exists():
|
||||
@@ -740,6 +758,38 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||||
return None
|
||||
|
||||
|
||||
def refresh_hermes_oauth_token() -> Optional[str]:
|
||||
"""Refresh the Hermes-managed OAuth token using the stored refresh token.
|
||||
|
||||
Returns the new access token, or None if refresh fails.
|
||||
"""
|
||||
creds = read_hermes_oauth_credentials()
|
||||
if not creds or not creds.get("refreshToken"):
|
||||
return None
|
||||
|
||||
try:
|
||||
refreshed = refresh_anthropic_oauth_pure(
|
||||
creds["refreshToken"],
|
||||
use_json=True,
|
||||
)
|
||||
_save_hermes_oauth_credentials(
|
||||
refreshed["access_token"],
|
||||
refreshed["refresh_token"],
|
||||
refreshed["expires_at_ms"],
|
||||
)
|
||||
_write_claude_code_credentials(
|
||||
refreshed["access_token"],
|
||||
refreshed["refresh_token"],
|
||||
refreshed["expires_at_ms"],
|
||||
)
|
||||
logger.debug("Successfully refreshed Hermes OAuth token")
|
||||
return refreshed["access_token"]
|
||||
except Exception as e:
|
||||
logger.debug("Failed to refresh Hermes OAuth token: %s", e)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Message / tool / response format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -776,6 +826,68 @@ def _sanitize_tool_id(tool_id: str) -> str:
|
||||
return sanitized or "tool_0"
|
||||
|
||||
|
||||
def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Convert an OpenAI-style image block to Anthropic's image source format."""
|
||||
image_data = part.get("image_url", {})
|
||||
url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
|
||||
if not isinstance(url, str) or not url.strip():
|
||||
return None
|
||||
url = url.strip()
|
||||
|
||||
if url.startswith("data:"):
|
||||
header, sep, data = url.partition(",")
|
||||
if sep and ";base64" in header:
|
||||
media_type = header[5:].split(";", 1)[0] or "image/png"
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": media_type,
|
||||
"data": data,
|
||||
},
|
||||
}
|
||||
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "url",
|
||||
"url": url,
|
||||
},
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _convert_user_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
|
||||
if isinstance(part, dict):
|
||||
ptype = part.get("type")
|
||||
if ptype == "text":
|
||||
block = {"type": "text", "text": part.get("text", "")}
|
||||
if isinstance(part.get("cache_control"), dict):
|
||||
block["cache_control"] = dict(part["cache_control"])
|
||||
return block
|
||||
if ptype == "image_url":
|
||||
return _convert_openai_image_part_to_anthropic(part)
|
||||
if ptype == "image" and part.get("source"):
|
||||
return dict(part)
|
||||
if ptype == "image" and part.get("data"):
|
||||
media_type = part.get("mimeType") or part.get("media_type") or "image/png"
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": media_type,
|
||||
"data": part.get("data", ""),
|
||||
},
|
||||
}
|
||||
if ptype == "tool_result":
|
||||
return dict(part)
|
||||
elif part is not None:
|
||||
return {"type": "text", "text": str(part)}
|
||||
return None
|
||||
|
||||
|
||||
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
"""Convert OpenAI tool definitions to Anthropic format."""
|
||||
if not tools:
|
||||
@@ -916,18 +1028,12 @@ def _convert_content_to_anthropic(content: Any) -> Any:
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
@@ -1082,15 +1188,7 @@ def convert_messages_to_anthropic(
|
||||
curr_content = [{"type": "text", "text": curr_content}]
|
||||
fixed[-1]["content"] = prev_content + curr_content
|
||||
else:
|
||||
# Consecutive assistant messages — merge text content.
|
||||
# Drop thinking blocks from the *second* message: their
|
||||
# signature was computed against a different turn boundary
|
||||
# and becomes invalid once merged.
|
||||
if isinstance(m["content"], list):
|
||||
m["content"] = [
|
||||
b for b in m["content"]
|
||||
if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
|
||||
]
|
||||
# Consecutive assistant messages — merge text content
|
||||
prev_blocks = fixed[-1]["content"]
|
||||
curr_blocks = m["content"]
|
||||
if isinstance(prev_blocks, list) and isinstance(curr_blocks, list):
|
||||
@@ -1108,79 +1206,6 @@ def convert_messages_to_anthropic(
|
||||
fixed.append(m)
|
||||
result = fixed
|
||||
|
||||
# ── Thinking block signature management ──────────────────────────
|
||||
# Anthropic signs thinking blocks against the full turn content.
|
||||
# Any upstream mutation (context compression, session truncation,
|
||||
# orphan stripping, message merging) invalidates the signature,
|
||||
# causing HTTP 400 "Invalid signature in thinking block".
|
||||
#
|
||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
# thinking blocks if it supports extended thinking.
|
||||
#
|
||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||||
# EXCEPT the last one — preserves reasoning continuity on the
|
||||
# current tool-use chain while avoiding stale signature errors.
|
||||
# 2. Downgrade unsigned thinking blocks (no signature) to text —
|
||||
# Anthropic can't validate them and will reject them.
|
||||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
|
||||
last_assistant_idx = None
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
if result[i].get("role") == "assistant":
|
||||
last_assistant_idx = i
|
||||
break
|
||||
|
||||
for idx, m in enumerate(result):
|
||||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||||
continue
|
||||
|
||||
if _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
stripped = [
|
||||
b for b in m["content"]
|
||||
if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
|
||||
]
|
||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||
else:
|
||||
# Latest assistant on direct Anthropic: keep signed thinking
|
||||
# blocks for reasoning continuity; downgrade unsigned ones to
|
||||
# plain text.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("type") == "redacted_thinking":
|
||||
# Redacted blocks use 'data' for the signature payload
|
||||
if b.get("data"):
|
||||
new_content.append(b)
|
||||
# else: drop — no data means it can't be validated
|
||||
elif b.get("signature"):
|
||||
# Signed thinking block — keep it
|
||||
new_content.append(b)
|
||||
else:
|
||||
# Unsigned thinking — downgrade to text so it's not lost
|
||||
thinking_text = b.get("thinking", "")
|
||||
if thinking_text:
|
||||
new_content.append({"type": "text", "text": thinking_text})
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
|
||||
# Strip cache_control from any remaining thinking/redacted_thinking
|
||||
# blocks — cache markers interfere with signature validation.
|
||||
for b in m["content"]:
|
||||
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
||||
b.pop("cache_control", None)
|
||||
|
||||
return system, result
|
||||
|
||||
|
||||
@@ -1194,58 +1219,28 @@ def build_anthropic_kwargs(
|
||||
is_oauth: bool = False,
|
||||
preserve_dots: bool = False,
|
||||
context_length: Optional[int] = None,
|
||||
base_url: str | None = None,
|
||||
fast_mode: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create().
|
||||
|
||||
Naming note — two distinct concepts, easily confused:
|
||||
max_tokens = OUTPUT token cap for a single response.
|
||||
Anthropic's API calls this "max_tokens" but it only
|
||||
limits the *output*. Anthropic's own native SDK
|
||||
renamed it "max_output_tokens" for clarity.
|
||||
context_length = TOTAL context window (input tokens + output tokens).
|
||||
The API enforces: input_tokens + max_tokens ≤ context_length.
|
||||
Stored on the ContextCompressor; reduced on overflow errors.
|
||||
|
||||
When *max_tokens* is None the model's native output ceiling is used
|
||||
(e.g. 128K for Opus 4.6, 64K for Sonnet 4.6).
|
||||
|
||||
When *context_length* is provided and the model's native output ceiling
|
||||
exceeds it (e.g. a local endpoint with an 8K window), the output cap is
|
||||
clamped to context_length − 1. This only kicks in for unusually small
|
||||
context windows; for full-size models the native output cap is always
|
||||
smaller than the context window so no clamping happens.
|
||||
NOTE: this clamping does not account for prompt size — if the prompt is
|
||||
large, Anthropic may still reject the request. The caller must detect
|
||||
"max_tokens too large given prompt" errors and retry with a smaller cap
|
||||
(see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).
|
||||
When *max_tokens* is None, the model's native output limit is used
|
||||
(e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). If *context_length*
|
||||
is provided, the effective limit is clamped so it doesn't exceed
|
||||
the context window.
|
||||
|
||||
When *is_oauth* is True, applies Claude Code compatibility transforms:
|
||||
system prompt prefix, tool name prefixing, and prompt sanitization.
|
||||
|
||||
When *preserve_dots* is True, model name dots are not converted to hyphens
|
||||
(for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
|
||||
|
||||
When *base_url* points to a third-party Anthropic-compatible endpoint,
|
||||
thinking block signatures are stripped (they are Anthropic-proprietary).
|
||||
|
||||
When *fast_mode* is True, adds ``speed: "fast"`` and the fast-mode beta
|
||||
header for ~2.5x faster output throughput on Opus 4.6. Currently only
|
||||
supported on native Anthropic endpoints (not third-party compatible ones).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
# effective_max_tokens = output cap for this call (≠ total context window)
|
||||
effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
|
||||
|
||||
# Clamp output cap to fit inside the total context window.
|
||||
# Only matters for small custom endpoints where context_length < native
|
||||
# output ceiling. For standard Anthropic models context_length (e.g.
|
||||
# 200K) is always larger than the output ceiling (e.g. 128K), so this
|
||||
# branch is not taken.
|
||||
# Clamp to context window if the user set a lower context_length
|
||||
# (e.g. custom endpoint with limited capacity).
|
||||
if context_length and effective_max_tokens > context_length:
|
||||
effective_max_tokens = max(context_length - 1, 1)
|
||||
|
||||
@@ -1315,8 +1310,7 @@ def build_anthropic_kwargs(
|
||||
# Map reasoning_config to Anthropic's thinking parameter.
|
||||
# Claude 4.6 models use adaptive thinking + output_config.effort.
|
||||
# Older models use manual thinking with budget_tokens.
|
||||
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
|
||||
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
|
||||
# Haiku models do NOT support extended thinking at all — skip entirely.
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||
@@ -1332,20 +1326,6 @@ def build_anthropic_kwargs(
|
||||
kwargs["temperature"] = 1
|
||||
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
|
||||
|
||||
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||||
# Adds speed:"fast" + the fast-mode beta header for ~2.5x output speed.
|
||||
# Only for native Anthropic endpoints — third-party providers would
|
||||
# reject the unknown beta header and speed parameter.
|
||||
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||||
kwargs["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
if is_oauth:
|
||||
betas.extend(_OAUTH_ONLY_BETAS)
|
||||
betas.append(_FAST_MODE_BETA)
|
||||
kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
@@ -1407,4 +1387,4 @@ def normalize_anthropic_response(
|
||||
reasoning_details=reasoning_details or None,
|
||||
),
|
||||
finish_reason,
|
||||
)
|
||||
)
|
||||
+181
-575
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,113 @@
|
||||
"""BuiltinMemoryProvider — wraps MEMORY.md / USER.md as a MemoryProvider.
|
||||
|
||||
Always registered as the first provider. Cannot be disabled or removed.
|
||||
This is the existing Hermes memory system exposed through the provider
|
||||
interface for compatibility with the MemoryManager.
|
||||
|
||||
The actual storage logic lives in tools/memory_tool.py (MemoryStore).
|
||||
This provider is a thin adapter that delegates to MemoryStore and
|
||||
exposes the memory tool schema.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BuiltinMemoryProvider(MemoryProvider):
|
||||
"""Built-in file-backed memory (MEMORY.md + USER.md).
|
||||
|
||||
Always active, never disabled by other providers. The `memory` tool
|
||||
is handled by run_agent.py's agent-level tool interception (not through
|
||||
the normal registry), so get_tool_schemas() returns an empty list —
|
||||
the memory tool is already wired separately.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
memory_store=None,
|
||||
memory_enabled: bool = False,
|
||||
user_profile_enabled: bool = False,
|
||||
):
|
||||
self._store = memory_store
|
||||
self._memory_enabled = memory_enabled
|
||||
self._user_profile_enabled = user_profile_enabled
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "builtin"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Built-in memory is always available."""
|
||||
return True
|
||||
|
||||
def initialize(self, session_id: str, **kwargs) -> None:
|
||||
"""Load memory from disk if not already loaded."""
|
||||
if self._store is not None:
|
||||
self._store.load_from_disk()
|
||||
|
||||
def system_prompt_block(self) -> str:
|
||||
"""Return MEMORY.md and USER.md content for the system prompt.
|
||||
|
||||
Uses the frozen snapshot captured at load time. This ensures the
|
||||
system prompt stays stable throughout a session (preserving the
|
||||
prompt cache), even though the live entries may change via tool calls.
|
||||
"""
|
||||
if not self._store:
|
||||
return ""
|
||||
|
||||
parts = []
|
||||
if self._memory_enabled:
|
||||
mem_block = self._store.format_for_system_prompt("memory")
|
||||
if mem_block:
|
||||
parts.append(mem_block)
|
||||
if self._user_profile_enabled:
|
||||
user_block = self._store.format_for_system_prompt("user")
|
||||
if user_block:
|
||||
parts.append(user_block)
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def prefetch(self, query: str, *, session_id: str = "") -> str:
|
||||
"""Built-in memory doesn't do query-based recall — it's injected via system_prompt_block."""
|
||||
return ""
|
||||
|
||||
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
"""Built-in memory doesn't auto-sync turns — writes happen via the memory tool."""
|
||||
|
||||
def get_tool_schemas(self) -> List[Dict[str, Any]]:
|
||||
"""Return empty list.
|
||||
|
||||
The `memory` tool is an agent-level intercepted tool, handled
|
||||
specially in run_agent.py before normal tool dispatch. It's not
|
||||
part of the standard tool registry. We don't duplicate it here.
|
||||
"""
|
||||
return []
|
||||
|
||||
def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
|
||||
"""Not used — the memory tool is intercepted in run_agent.py."""
|
||||
return json.dumps({"error": "Built-in memory tool is handled by the agent loop"})
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""No cleanup needed — files are saved on every write."""
|
||||
|
||||
# -- Property access for backward compatibility --------------------------
|
||||
|
||||
@property
|
||||
def store(self):
|
||||
"""Access the underlying MemoryStore for legacy code paths."""
|
||||
return self._store
|
||||
|
||||
@property
|
||||
def memory_enabled(self) -> bool:
|
||||
return self._memory_enabled
|
||||
|
||||
@property
|
||||
def user_profile_enabled(self) -> bool:
|
||||
return self._user_profile_enabled
|
||||
+72
-142
@@ -18,7 +18,6 @@ import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.auxiliary_client import call_llm
|
||||
from agent.context_engine import ContextEngine
|
||||
from agent.model_metadata import (
|
||||
get_model_context_length,
|
||||
estimate_messages_tokens_rough,
|
||||
@@ -51,8 +50,8 @@ _CHARS_PER_TOKEN = 4
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
class ContextCompressor(ContextEngine):
|
||||
"""Default context engine — compresses conversation context via lossy summarization.
|
||||
class ContextCompressor:
|
||||
"""Compresses conversation context when approaching the model's context limit.
|
||||
|
||||
Algorithm:
|
||||
1. Prune old tool results (cheap, no LLM call)
|
||||
@@ -62,33 +61,6 @@ class ContextCompressor(ContextEngine):
|
||||
5. On subsequent compactions, iteratively update the previous summary
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "compressor"
|
||||
|
||||
def on_session_reset(self) -> None:
|
||||
"""Reset all per-session state for /new or /reset."""
|
||||
super().on_session_reset()
|
||||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
model: str,
|
||||
context_length: int,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
provider: str = "",
|
||||
) -> None:
|
||||
"""Update model info after a model switch or fallback activation."""
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.provider = provider
|
||||
self.context_length = context_length
|
||||
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
@@ -142,6 +114,7 @@ class ContextCompressor(ContextEngine):
|
||||
|
||||
self.last_prompt_tokens = 0
|
||||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
|
||||
self.summary_model = summary_model_override or ""
|
||||
|
||||
@@ -153,27 +126,40 @@ class ContextCompressor(ContextEngine):
|
||||
"""Update tracked token usage from API response."""
|
||||
self.last_prompt_tokens = usage.get("prompt_tokens", 0)
|
||||
self.last_completion_tokens = usage.get("completion_tokens", 0)
|
||||
self.last_total_tokens = usage.get("total_tokens", 0)
|
||||
|
||||
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||
"""Check if context exceeds the compression threshold."""
|
||||
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
|
||||
return tokens >= self.threshold_tokens
|
||||
|
||||
def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick pre-flight check using rough estimate (before API call)."""
|
||||
rough_estimate = estimate_messages_tokens_rough(messages)
|
||||
return rough_estimate >= self.threshold_tokens
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Get current compression status for display/logging."""
|
||||
return {
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"threshold_tokens": self.threshold_tokens,
|
||||
"context_length": self.context_length,
|
||||
"usage_percent": min(100, (self.last_prompt_tokens / self.context_length * 100)) if self.context_length else 0,
|
||||
"compression_count": self.compression_count,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool output pruning (cheap pre-pass, no LLM call)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _prune_old_tool_results(
|
||||
self, messages: List[Dict[str, Any]], protect_tail_count: int,
|
||||
protect_tail_tokens: int | None = None,
|
||||
) -> tuple[List[Dict[str, Any]], int]:
|
||||
"""Replace old tool result contents with a short placeholder.
|
||||
|
||||
Walks backward from the end, protecting the most recent messages that
|
||||
fall within ``protect_tail_tokens`` (when provided) OR the last
|
||||
``protect_tail_count`` messages (backward-compatible default).
|
||||
When both are given, the token budget takes priority and the message
|
||||
count acts as a hard minimum floor.
|
||||
Walks backward from the end, protecting the most recent
|
||||
``protect_tail_count`` messages. Older tool results get their
|
||||
content replaced with a placeholder string.
|
||||
|
||||
Returns (pruned_messages, pruned_count).
|
||||
"""
|
||||
@@ -182,29 +168,7 @@ class ContextCompressor(ContextEngine):
|
||||
|
||||
result = [m.copy() for m in messages]
|
||||
pruned = 0
|
||||
|
||||
# Determine the prune boundary
|
||||
if protect_tail_tokens is not None and protect_tail_tokens > 0:
|
||||
# Token-budget approach: walk backward accumulating tokens
|
||||
accumulated = 0
|
||||
boundary = len(result)
|
||||
min_protect = min(protect_tail_count, len(result) - 1)
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
msg = result[i]
|
||||
content_len = len(msg.get("content") or "")
|
||||
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if isinstance(tc, dict):
|
||||
args = tc.get("function", {}).get("arguments", "")
|
||||
msg_tokens += len(args) // _CHARS_PER_TOKEN
|
||||
if accumulated + msg_tokens > protect_tail_tokens and (len(result) - i) >= min_protect:
|
||||
boundary = i
|
||||
break
|
||||
accumulated += msg_tokens
|
||||
boundary = i
|
||||
prune_boundary = max(boundary, len(result) - min_protect)
|
||||
else:
|
||||
prune_boundary = len(result) - protect_tail_count
|
||||
prune_boundary = len(result) - protect_tail_count
|
||||
|
||||
for i in range(prune_boundary):
|
||||
msg = result[i]
|
||||
@@ -235,39 +199,30 @@ class ContextCompressor(ContextEngine):
|
||||
budget = int(content_tokens * _SUMMARY_RATIO)
|
||||
return max(_MIN_SUMMARY_TOKENS, min(budget, self.max_summary_tokens))
|
||||
|
||||
# Truncation limits for the summarizer input. These bound how much of
|
||||
# each message the summary model sees — the budget is the *summary*
|
||||
# model's context window, not the main model's.
|
||||
_CONTENT_MAX = 6000 # total chars per message body
|
||||
_CONTENT_HEAD = 4000 # chars kept from the start
|
||||
_CONTENT_TAIL = 1500 # chars kept from the end
|
||||
_TOOL_ARGS_MAX = 1500 # tool call argument chars
|
||||
_TOOL_ARGS_HEAD = 1200 # kept from the start of tool args
|
||||
|
||||
def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
|
||||
"""Serialize conversation turns into labeled text for the summarizer.
|
||||
|
||||
Includes tool call arguments and result content (up to
|
||||
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
|
||||
specific details like file paths, commands, and outputs.
|
||||
Includes tool call arguments and result content (up to 3000 chars
|
||||
per message) so the summarizer can preserve specific details like
|
||||
file paths, commands, and outputs.
|
||||
"""
|
||||
parts = []
|
||||
for msg in turns:
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content") or ""
|
||||
|
||||
# Tool results: keep enough content for the summarizer
|
||||
# Tool results: keep more content than before (3000 chars)
|
||||
if role == "tool":
|
||||
tool_id = msg.get("tool_call_id", "")
|
||||
if len(content) > self._CONTENT_MAX:
|
||||
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||
if len(content) > 3000:
|
||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
||||
parts.append(f"[TOOL RESULT {tool_id}]: {content}")
|
||||
continue
|
||||
|
||||
# Assistant messages: include tool call names AND arguments
|
||||
if role == "assistant":
|
||||
if len(content) > self._CONTENT_MAX:
|
||||
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||
if len(content) > 3000:
|
||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
||||
tool_calls = msg.get("tool_calls", [])
|
||||
if tool_calls:
|
||||
tc_parts = []
|
||||
@@ -277,8 +232,8 @@ class ContextCompressor(ContextEngine):
|
||||
name = fn.get("name", "?")
|
||||
args = fn.get("arguments", "")
|
||||
# Truncate long arguments but keep enough for context
|
||||
if len(args) > self._TOOL_ARGS_MAX:
|
||||
args = args[:self._TOOL_ARGS_HEAD] + "..."
|
||||
if len(args) > 500:
|
||||
args = args[:400] + "..."
|
||||
tc_parts.append(f" {name}({args})")
|
||||
else:
|
||||
fn = getattr(tc, "function", None)
|
||||
@@ -289,8 +244,8 @@ class ContextCompressor(ContextEngine):
|
||||
continue
|
||||
|
||||
# User and other roles
|
||||
if len(content) > self._CONTENT_MAX:
|
||||
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||
if len(content) > 3000:
|
||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
||||
parts.append(f"[{role.upper()}]: {content}")
|
||||
|
||||
return "\n\n".join(parts)
|
||||
@@ -355,9 +310,6 @@ Update the summary using this exact structure. PRESERVE all existing information
|
||||
## Critical Context
|
||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
||||
|
||||
## Tools & Patterns
|
||||
[Which tools were used, how they were used effectively, and any tool-specific discoveries. Accumulate across compactions.]
|
||||
|
||||
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
|
||||
|
||||
Write only the summary body. Do not include any preamble or prefix."""
|
||||
@@ -396,9 +348,6 @@ Use this exact structure:
|
||||
## Critical Context
|
||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
||||
|
||||
## Tools & Patterns
|
||||
[Which tools were used, how they were used effectively, and any tool-specific discoveries (e.g., preferred flags, working invocations, successful command patterns)]
|
||||
|
||||
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions. The goal is to prevent the next assistant from repeating work or losing important details.
|
||||
|
||||
Write only the summary body. Do not include any preamble or prefix."""
|
||||
@@ -569,20 +518,13 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
derived from ``summary_target_ratio * context_length``, so it
|
||||
scales automatically with the model's context window.
|
||||
|
||||
Token budget is the primary criterion. A hard minimum of 3 messages
|
||||
is always protected, but the budget is allowed to exceed by up to
|
||||
1.5x to avoid cutting inside an oversized message (tool output, file
|
||||
read, etc.). If even the minimum 3 messages exceed 1.5x the budget
|
||||
the cut is placed right after the head so compression still runs.
|
||||
|
||||
Never cuts inside a tool_call/result group.
|
||||
Never cuts inside a tool_call/result group. Falls back to the old
|
||||
``protect_last_n`` if the budget would protect fewer messages.
|
||||
"""
|
||||
if token_budget is None:
|
||||
token_budget = self.tail_token_budget
|
||||
n = len(messages)
|
||||
# Hard minimum: always keep at least 3 messages in the tail
|
||||
min_tail = min(3, n - head_end - 1) if n - head_end > 1 else 0
|
||||
soft_ceiling = int(token_budget * 1.5)
|
||||
min_tail = self.protect_last_n
|
||||
accumulated = 0
|
||||
cut_idx = n # start from beyond the end
|
||||
|
||||
@@ -595,21 +537,21 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
if isinstance(tc, dict):
|
||||
args = tc.get("function", {}).get("arguments", "")
|
||||
msg_tokens += len(args) // _CHARS_PER_TOKEN
|
||||
# Stop once we exceed the soft ceiling (unless we haven't hit min_tail yet)
|
||||
if accumulated + msg_tokens > soft_ceiling and (n - i) >= min_tail:
|
||||
if accumulated + msg_tokens > token_budget and (n - i) >= min_tail:
|
||||
break
|
||||
accumulated += msg_tokens
|
||||
cut_idx = i
|
||||
|
||||
# Ensure we protect at least min_tail messages
|
||||
# Ensure we protect at least protect_last_n messages
|
||||
fallback_cut = n - min_tail
|
||||
if cut_idx > fallback_cut:
|
||||
cut_idx = fallback_cut
|
||||
|
||||
# If the token budget would protect everything (small conversations),
|
||||
# force a cut after the head so compression can still remove middle turns.
|
||||
# fall back to the fixed protect_last_n approach so compression can
|
||||
# still remove middle turns.
|
||||
if cut_idx <= head_end:
|
||||
cut_idx = max(fallback_cut, head_end + 1)
|
||||
cut_idx = fallback_cut
|
||||
|
||||
# Align to avoid splitting tool groups
|
||||
cut_idx = self._align_boundary_backward(messages, cut_idx)
|
||||
@@ -634,13 +576,12 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
up so the API never receives mismatched IDs.
|
||||
"""
|
||||
n_messages = len(messages)
|
||||
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||
_min_for_compress = self.protect_first_n + 3 + 1
|
||||
if n_messages <= _min_for_compress:
|
||||
if n_messages <= self.protect_first_n + self.protect_last_n + 1:
|
||||
if not self.quiet_mode:
|
||||
logger.warning(
|
||||
"Cannot compress: only %d messages (need > %d)",
|
||||
n_messages, _min_for_compress,
|
||||
n_messages,
|
||||
self.protect_first_n + self.protect_last_n + 1,
|
||||
)
|
||||
return messages
|
||||
|
||||
@@ -648,8 +589,7 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
|
||||
# Phase 1: Prune old tool results (cheap, no LLM call)
|
||||
messages, pruned_count = self._prune_old_tool_results(
|
||||
messages, protect_tail_count=self.protect_last_n,
|
||||
protect_tail_tokens=self.tail_token_budget,
|
||||
messages, protect_tail_count=self.protect_last_n * 3,
|
||||
)
|
||||
if pruned_count and not self.quiet_mode:
|
||||
logger.info("Pre-compression: pruned %d old tool result(s)", pruned_count)
|
||||
@@ -702,43 +642,33 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
)
|
||||
compressed.append(msg)
|
||||
|
||||
# If LLM summary failed, insert a static fallback so the model
|
||||
# knows context was lost rather than silently dropping everything.
|
||||
if not summary:
|
||||
if not self.quiet_mode:
|
||||
logger.warning("Summary generation failed — inserting static fallback context marker")
|
||||
n_dropped = compress_end - compress_start
|
||||
summary = (
|
||||
f"{SUMMARY_PREFIX}\n"
|
||||
f"Summary generation was unavailable. {n_dropped} conversation turns were "
|
||||
f"removed to free context space but could not be summarized. The removed "
|
||||
f"turns contained earlier work in this session. Continue based on the "
|
||||
f"recent messages below and the current state of any files or resources."
|
||||
)
|
||||
|
||||
_merge_summary_into_tail = False
|
||||
last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
|
||||
first_tail_role = messages[compress_end].get("role", "user") if compress_end < n_messages else "user"
|
||||
# Pick a role that avoids consecutive same-role with both neighbors.
|
||||
# Priority: avoid colliding with head (already committed), then tail.
|
||||
if last_head_role in ("assistant", "tool"):
|
||||
summary_role = "user"
|
||||
else:
|
||||
summary_role = "assistant"
|
||||
# If the chosen role collides with the tail AND flipping wouldn't
|
||||
# collide with the head, flip it.
|
||||
if summary_role == first_tail_role:
|
||||
flipped = "assistant" if summary_role == "user" else "user"
|
||||
if flipped != last_head_role:
|
||||
summary_role = flipped
|
||||
if summary:
|
||||
last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
|
||||
first_tail_role = messages[compress_end].get("role", "user") if compress_end < n_messages else "user"
|
||||
# Pick a role that avoids consecutive same-role with both neighbors.
|
||||
# Priority: avoid colliding with head (already committed), then tail.
|
||||
if last_head_role in ("assistant", "tool"):
|
||||
summary_role = "user"
|
||||
else:
|
||||
# Both roles would create consecutive same-role messages
|
||||
# (e.g. head=assistant, tail=user — neither role works).
|
||||
# Merge the summary into the first tail message instead
|
||||
# of inserting a standalone message that breaks alternation.
|
||||
_merge_summary_into_tail = True
|
||||
if not _merge_summary_into_tail:
|
||||
compressed.append({"role": summary_role, "content": summary})
|
||||
summary_role = "assistant"
|
||||
# If the chosen role collides with the tail AND flipping wouldn't
|
||||
# collide with the head, flip it.
|
||||
if summary_role == first_tail_role:
|
||||
flipped = "assistant" if summary_role == "user" else "user"
|
||||
if flipped != last_head_role:
|
||||
summary_role = flipped
|
||||
else:
|
||||
# Both roles would create consecutive same-role messages
|
||||
# (e.g. head=assistant, tail=user — neither role works).
|
||||
# Merge the summary into the first tail message instead
|
||||
# of inserting a standalone message that breaks alternation.
|
||||
_merge_summary_into_tail = True
|
||||
if not _merge_summary_into_tail:
|
||||
compressed.append({"role": summary_role, "content": summary})
|
||||
else:
|
||||
if not self.quiet_mode:
|
||||
logger.debug("No summary model available — middle turns dropped without summary")
|
||||
|
||||
for i in range(compress_end, n_messages):
|
||||
msg = messages[i].copy()
|
||||
|
||||
@@ -1,184 +0,0 @@
|
||||
"""Abstract base class for pluggable context engines.
|
||||
|
||||
A context engine controls how conversation context is managed when
|
||||
approaching the model's token limit. The built-in ContextCompressor
|
||||
is the default implementation. Third-party engines (e.g. LCM) can
|
||||
replace it via the plugin system or by being placed in the
|
||||
``plugins/context_engine/<name>/`` directory.
|
||||
|
||||
Selection is config-driven: ``context.engine`` in config.yaml.
|
||||
Default is ``"compressor"`` (the built-in). Only one engine is active.
|
||||
|
||||
The engine is responsible for:
|
||||
- Deciding when compaction should fire
|
||||
- Performing compaction (summarization, DAG construction, etc.)
|
||||
- Optionally exposing tools the agent can call (e.g. lcm_grep)
|
||||
- Tracking token usage from API responses
|
||||
|
||||
Lifecycle:
|
||||
1. Engine is instantiated and registered (plugin register() or default)
|
||||
2. on_session_start() called when a conversation begins
|
||||
3. update_from_response() called after each API response with usage data
|
||||
4. should_compress() checked after each turn
|
||||
5. compress() called when should_compress() returns True
|
||||
6. on_session_end() called at real session boundaries (CLI exit, /reset,
|
||||
gateway session expiry) — NOT per-turn
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class ContextEngine(ABC):
|
||||
"""Base class all context engines must implement."""
|
||||
|
||||
# -- Identity ----------------------------------------------------------
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Short identifier (e.g. 'compressor', 'lcm')."""
|
||||
|
||||
# -- Token state (read by run_agent.py for display/logging) ------------
|
||||
#
|
||||
# Engines MUST maintain these. run_agent.py reads them directly.
|
||||
|
||||
last_prompt_tokens: int = 0
|
||||
last_completion_tokens: int = 0
|
||||
last_total_tokens: int = 0
|
||||
threshold_tokens: int = 0
|
||||
context_length: int = 0
|
||||
compression_count: int = 0
|
||||
|
||||
# -- Compaction parameters (read by run_agent.py for preflight) --------
|
||||
#
|
||||
# These control the preflight compression check. Subclasses may
|
||||
# override via __init__ or property; defaults are sensible for most
|
||||
# engines.
|
||||
|
||||
threshold_percent: float = 0.75
|
||||
protect_first_n: int = 3
|
||||
protect_last_n: int = 6
|
||||
|
||||
# -- Core interface ----------------------------------------------------
|
||||
|
||||
@abstractmethod
|
||||
def update_from_response(self, usage: Dict[str, Any]) -> None:
|
||||
"""Update tracked token usage from an API response.
|
||||
|
||||
Called after every LLM call with the usage dict from the response.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||
"""Return True if compaction should fire this turn."""
|
||||
|
||||
@abstractmethod
|
||||
def compress(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
current_tokens: int = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Compact the message list and return the new message list.
|
||||
|
||||
This is the main entry point. The engine receives the full message
|
||||
list and returns a (possibly shorter) list that fits within the
|
||||
context budget. The implementation is free to summarize, build a
|
||||
DAG, or do anything else — as long as the returned list is a valid
|
||||
OpenAI-format message sequence.
|
||||
"""
|
||||
|
||||
# -- Optional: pre-flight check ----------------------------------------
|
||||
|
||||
def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick rough check before the API call (no real token count yet).
|
||||
|
||||
Default returns False (skip pre-flight). Override if your engine
|
||||
can do a cheap estimate.
|
||||
"""
|
||||
return False
|
||||
|
||||
# -- Optional: session lifecycle ---------------------------------------
|
||||
|
||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||
"""Called when a new conversation session begins.
|
||||
|
||||
Use this to load persisted state (DAG, store) for the session.
|
||||
kwargs may include hermes_home, platform, model, etc.
|
||||
"""
|
||||
|
||||
def on_session_end(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Called at real session boundaries (CLI exit, /reset, gateway expiry).
|
||||
|
||||
Use this to flush state, close DB connections, etc.
|
||||
NOT called per-turn — only when the session truly ends.
|
||||
"""
|
||||
|
||||
def on_session_reset(self) -> None:
|
||||
"""Called on /new or /reset. Reset per-session state.
|
||||
|
||||
Default resets compression_count and token tracking.
|
||||
"""
|
||||
self.last_prompt_tokens = 0
|
||||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
self.compression_count = 0
|
||||
|
||||
# -- Optional: tools ---------------------------------------------------
|
||||
|
||||
def get_tool_schemas(self) -> List[Dict[str, Any]]:
|
||||
"""Return tool schemas this engine provides to the agent.
|
||||
|
||||
Default returns empty list (no tools). LCM would return schemas
|
||||
for lcm_grep, lcm_describe, lcm_expand here.
|
||||
"""
|
||||
return []
|
||||
|
||||
def handle_tool_call(self, name: str, args: Dict[str, Any], **kwargs) -> str:
|
||||
"""Handle a tool call from the agent.
|
||||
|
||||
Only called for tool names returned by get_tool_schemas().
|
||||
Must return a JSON string.
|
||||
|
||||
kwargs may include:
|
||||
messages: the current in-memory message list (for live ingestion)
|
||||
"""
|
||||
import json
|
||||
return json.dumps({"error": f"Unknown context engine tool: {name}"})
|
||||
|
||||
# -- Optional: status / display ----------------------------------------
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Return status dict for display/logging.
|
||||
|
||||
Default returns the standard fields run_agent.py expects.
|
||||
"""
|
||||
return {
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"threshold_tokens": self.threshold_tokens,
|
||||
"context_length": self.context_length,
|
||||
"usage_percent": (
|
||||
min(100, self.last_prompt_tokens / self.context_length * 100)
|
||||
if self.context_length else 0
|
||||
),
|
||||
"compression_count": self.compression_count,
|
||||
}
|
||||
|
||||
# -- Optional: model switch support ------------------------------------
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
model: str,
|
||||
context_length: int,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
provider: str = "",
|
||||
) -> None:
|
||||
"""Called when the user switches models or on fallback activation.
|
||||
|
||||
Default updates context_length and recalculates threshold_tokens
|
||||
from threshold_percent. Override if your engine needs more
|
||||
(e.g. recalculate DAG budgets, switch summary models).
|
||||
"""
|
||||
self.context_length = context_length
|
||||
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||
+10
-38
@@ -13,9 +13,8 @@ from typing import Awaitable, Callable
|
||||
|
||||
from agent.model_metadata import estimate_tokens_rough
|
||||
|
||||
_QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
|
||||
REFERENCE_PATTERN = re.compile(
|
||||
rf"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>{_QUOTED_REFERENCE_VALUE}(?::\d+(?:-\d+)?)?|\S+))"
|
||||
r"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>\S+))"
|
||||
)
|
||||
TRAILING_PUNCTUATION = ",.;!?"
|
||||
_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure", ".config/gh")
|
||||
@@ -82,10 +81,14 @@ def parse_context_references(message: str) -> list[ContextReference]:
|
||||
value = _strip_trailing_punctuation(match.group("value") or "")
|
||||
line_start = None
|
||||
line_end = None
|
||||
target = _strip_reference_wrappers(value)
|
||||
target = value
|
||||
|
||||
if kind == "file":
|
||||
target, line_start, line_end = _parse_file_reference_value(value)
|
||||
range_match = re.match(r"^(?P<path>.+?):(?P<start>\d+)(?:-(?P<end>\d+))?$", value)
|
||||
if range_match:
|
||||
target = range_match.group("path")
|
||||
line_start = int(range_match.group("start"))
|
||||
line_end = int(range_match.group("end") or range_match.group("start"))
|
||||
|
||||
refs.append(
|
||||
ContextReference(
|
||||
@@ -340,9 +343,10 @@ def _resolve_path(cwd: Path, target: str, *, allowed_root: Path | None = None) -
|
||||
|
||||
|
||||
def _ensure_reference_path_allowed(path: Path) -> None:
|
||||
from hermes_constants import get_hermes_home
|
||||
home = Path(os.path.expanduser("~")).resolve()
|
||||
hermes_home = get_hermes_home().resolve()
|
||||
hermes_home = Path(
|
||||
os.getenv("HERMES_HOME", str(home / ".hermes"))
|
||||
).expanduser().resolve()
|
||||
|
||||
blocked_exact = {home / rel for rel in _SENSITIVE_HOME_FILES}
|
||||
blocked_exact.add(hermes_home / ".env")
|
||||
@@ -372,38 +376,6 @@ def _strip_trailing_punctuation(value: str) -> str:
|
||||
return stripped
|
||||
|
||||
|
||||
def _strip_reference_wrappers(value: str) -> str:
|
||||
if len(value) >= 2 and value[0] == value[-1] and value[0] in "`\"'":
|
||||
return value[1:-1]
|
||||
return value
|
||||
|
||||
|
||||
def _parse_file_reference_value(value: str) -> tuple[str, int | None, int | None]:
|
||||
quoted_match = re.match(
|
||||
r'^(?P<quote>`|"|\')(?P<path>.+?)(?P=quote)(?::(?P<start>\d+)(?:-(?P<end>\d+))?)?$',
|
||||
value,
|
||||
)
|
||||
if quoted_match:
|
||||
line_start = quoted_match.group("start")
|
||||
line_end = quoted_match.group("end")
|
||||
return (
|
||||
quoted_match.group("path"),
|
||||
int(line_start) if line_start is not None else None,
|
||||
int(line_end or line_start) if line_start is not None else None,
|
||||
)
|
||||
|
||||
range_match = re.match(r"^(?P<path>.+?):(?P<start>\d+)(?:-(?P<end>\d+))?$", value)
|
||||
if range_match:
|
||||
line_start = int(range_match.group("start"))
|
||||
return (
|
||||
range_match.group("path"),
|
||||
line_start,
|
||||
int(range_match.group("end") or range_match.group("start")),
|
||||
)
|
||||
|
||||
return _strip_reference_wrappers(value), None, None
|
||||
|
||||
|
||||
def _remove_reference_tokens(message: str, refs: list[ContextReference]) -> str:
|
||||
pieces: list[str] = []
|
||||
cursor = 0
|
||||
|
||||
+18
-180
@@ -10,26 +10,23 @@ import uuid
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, fields, replace
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
|
||||
KIMI_CODE_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
_auth_store_lock,
|
||||
_agent_key_is_usable,
|
||||
_codex_access_token_is_expiring,
|
||||
_decode_jwt_claims,
|
||||
_import_codex_cli_tokens,
|
||||
_is_expiring,
|
||||
_load_auth_store,
|
||||
_load_provider_state,
|
||||
_resolve_kimi_base_url,
|
||||
_resolve_zai_base_url,
|
||||
_save_auth_store,
|
||||
_save_provider_state,
|
||||
read_credential_pool,
|
||||
write_credential_pool,
|
||||
)
|
||||
@@ -69,10 +66,10 @@ SUPPORTED_POOL_STRATEGIES = {
|
||||
}
|
||||
|
||||
# Cooldown before retrying an exhausted credential.
|
||||
# 429 (rate-limited) and 402 (billing/quota) both cool down after 1 hour.
|
||||
# Provider-supplied reset_at timestamps override these defaults.
|
||||
# 429 (rate-limited) cools down faster since quotas reset frequently.
|
||||
# 402 (billing/quota) and other codes use a longer default.
|
||||
EXHAUSTED_TTL_429_SECONDS = 60 * 60 # 1 hour
|
||||
EXHAUSTED_TTL_DEFAULT_SECONDS = 60 * 60 # 1 hour
|
||||
EXHAUSTED_TTL_DEFAULT_SECONDS = 24 * 60 * 60 # 24 hours
|
||||
|
||||
# Pool key prefix for custom OpenAI-compatible endpoints.
|
||||
# Custom endpoints all share provider='custom' but are keyed by their
|
||||
@@ -351,9 +348,6 @@ def get_pool_strategy(provider: str) -> str:
|
||||
return STRATEGY_FILL_FIRST
|
||||
|
||||
|
||||
DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
|
||||
|
||||
|
||||
class CredentialPool:
|
||||
def __init__(self, provider: str, entries: List[PooledCredential]):
|
||||
self.provider = provider
|
||||
@@ -361,8 +355,6 @@ class CredentialPool:
|
||||
self._current_id: Optional[str] = None
|
||||
self._strategy = get_pool_strategy(provider)
|
||||
self._lock = threading.Lock()
|
||||
self._active_leases: Dict[str, int] = {}
|
||||
self._max_concurrent = DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL
|
||||
|
||||
def has_credentials(self) -> bool:
|
||||
return bool(self._entries)
|
||||
@@ -482,67 +474,6 @@ class CredentialPool:
|
||||
logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
|
||||
"""Write refreshed pool entry tokens back to auth.json providers.
|
||||
|
||||
After a pool-level refresh, the pool entry has fresh tokens but
|
||||
auth.json's ``providers.<id>`` still holds the pre-refresh state.
|
||||
On the next ``load_pool()``, ``_seed_from_singletons()`` reads that
|
||||
stale state and can overwrite the fresh pool entry — potentially
|
||||
re-seeding a consumed single-use refresh token.
|
||||
|
||||
Applies to any OAuth provider whose singleton lives in auth.json
|
||||
(currently Nous and OpenAI Codex).
|
||||
"""
|
||||
if entry.source != "device_code":
|
||||
return
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
if self.provider == "nous":
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if state is None:
|
||||
return
|
||||
state["access_token"] = entry.access_token
|
||||
if entry.refresh_token:
|
||||
state["refresh_token"] = entry.refresh_token
|
||||
if entry.expires_at:
|
||||
state["expires_at"] = entry.expires_at
|
||||
if entry.agent_key:
|
||||
state["agent_key"] = entry.agent_key
|
||||
if entry.agent_key_expires_at:
|
||||
state["agent_key_expires_at"] = entry.agent_key_expires_at
|
||||
for extra_key in ("obtained_at", "expires_in", "agent_key_id",
|
||||
"agent_key_expires_in", "agent_key_reused",
|
||||
"agent_key_obtained_at"):
|
||||
val = entry.extra.get(extra_key)
|
||||
if val is not None:
|
||||
state[extra_key] = val
|
||||
if entry.inference_base_url:
|
||||
state["inference_base_url"] = entry.inference_base_url
|
||||
_save_provider_state(auth_store, "nous", state)
|
||||
|
||||
elif self.provider == "openai-codex":
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
if not isinstance(state, dict):
|
||||
return
|
||||
tokens = state.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return
|
||||
tokens["access_token"] = entry.access_token
|
||||
if entry.refresh_token:
|
||||
tokens["refresh_token"] = entry.refresh_token
|
||||
if entry.last_refresh:
|
||||
state["last_refresh"] = entry.last_refresh
|
||||
_save_provider_state(auth_store, "openai-codex", state)
|
||||
|
||||
else:
|
||||
return
|
||||
|
||||
_save_auth_store(auth_store)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync %s pool entry back to auth store: %s", self.provider, exc)
|
||||
|
||||
def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]:
|
||||
if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token:
|
||||
if force:
|
||||
@@ -577,13 +508,6 @@ class CredentialPool:
|
||||
except Exception as wexc:
|
||||
logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
|
||||
elif self.provider == "openai-codex":
|
||||
# Proactively sync from ~/.codex/auth.json before refresh.
|
||||
# The Codex CLI (or another Hermes profile) may have already
|
||||
# consumed our refresh_token. Syncing first avoids a
|
||||
# "refresh_token_reused" error when the CLI has a newer pair.
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
refreshed = auth_mod.refresh_codex_oauth_pure(
|
||||
entry.access_token,
|
||||
entry.refresh_token,
|
||||
@@ -669,37 +593,6 @@ class CredentialPool:
|
||||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For openai-codex: the refresh_token may have been consumed by
|
||||
# the Codex CLI between our proactive sync and the refresh call.
|
||||
# Re-sync and retry once.
|
||||
if self.provider == "openai-codex":
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json")
|
||||
try:
|
||||
refreshed = auth_mod.refresh_codex_oauth_pure(
|
||||
synced.access_token,
|
||||
synced.refresh_token,
|
||||
)
|
||||
updated = replace(
|
||||
synced,
|
||||
access_token=refreshed["access_token"],
|
||||
refresh_token=refreshed["refresh_token"],
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
except Exception as retry_exc:
|
||||
logger.debug("Codex retry refresh also failed: %s", retry_exc)
|
||||
elif not self._entry_needs_refresh(synced):
|
||||
logger.debug("Codex CLI has valid token, using without refresh")
|
||||
self._sync_device_code_entry_to_auth_store(synced)
|
||||
return synced
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
@@ -714,10 +607,6 @@ class CredentialPool:
|
||||
)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
# Sync refreshed tokens back to auth.json providers so that
|
||||
# _seed_from_singletons() on the next load_pool() sees fresh state
|
||||
# instead of re-seeding stale/consumed tokens.
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
|
||||
def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
|
||||
@@ -739,6 +628,17 @@ class CredentialPool:
|
||||
return False
|
||||
return False
|
||||
|
||||
def mark_used(self, entry_id: Optional[str] = None) -> None:
|
||||
"""Increment request_count for tracking. Used by least_used strategy."""
|
||||
target_id = entry_id or self._current_id
|
||||
if not target_id:
|
||||
return
|
||||
with self._lock:
|
||||
for idx, entry in enumerate(self._entries):
|
||||
if entry.id == target_id:
|
||||
self._entries[idx] = replace(entry, request_count=entry.request_count + 1)
|
||||
return
|
||||
|
||||
def select(self) -> Optional[PooledCredential]:
|
||||
with self._lock:
|
||||
return self._select_unlocked()
|
||||
@@ -860,46 +760,6 @@ class CredentialPool:
|
||||
logger.info("credential pool: rotated to %s", _next_label)
|
||||
return next_entry
|
||||
|
||||
def acquire_lease(self, credential_id: Optional[str] = None) -> Optional[str]:
|
||||
"""Acquire a soft lease on a credential.
|
||||
|
||||
If a specific credential_id is provided, lease that entry directly.
|
||||
Otherwise prefer the least-leased available credential, using priority as
|
||||
a stable tie-breaker. When every credential is already at the soft cap,
|
||||
still return the least-leased one instead of blocking.
|
||||
"""
|
||||
with self._lock:
|
||||
if credential_id:
|
||||
self._active_leases[credential_id] = self._active_leases.get(credential_id, 0) + 1
|
||||
self._current_id = credential_id
|
||||
return credential_id
|
||||
|
||||
available = self._available_entries(clear_expired=True, refresh=True)
|
||||
if not available:
|
||||
return None
|
||||
|
||||
below_cap = [
|
||||
entry for entry in available
|
||||
if self._active_leases.get(entry.id, 0) < self._max_concurrent
|
||||
]
|
||||
candidates = below_cap if below_cap else available
|
||||
chosen = min(
|
||||
candidates,
|
||||
key=lambda entry: (self._active_leases.get(entry.id, 0), entry.priority),
|
||||
)
|
||||
self._active_leases[chosen.id] = self._active_leases.get(chosen.id, 0) + 1
|
||||
self._current_id = chosen.id
|
||||
return chosen.id
|
||||
|
||||
def release_lease(self, credential_id: str) -> None:
|
||||
"""Release a previously acquired credential lease."""
|
||||
with self._lock:
|
||||
count = self._active_leases.get(credential_id, 0)
|
||||
if count <= 1:
|
||||
self._active_leases.pop(credential_id, None)
|
||||
else:
|
||||
self._active_leases[credential_id] = count - 1
|
||||
|
||||
def try_refresh_current(self) -> Optional[PooledCredential]:
|
||||
with self._lock:
|
||||
return self._try_refresh_current_unlocked()
|
||||
@@ -1059,17 +919,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
auth_store = _load_auth_store()
|
||||
|
||||
if provider == "anthropic":
|
||||
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
|
||||
# when the user has explicitly configured anthropic as their provider.
|
||||
# Without this gate, auxiliary client fallback chains silently read
|
||||
# ~/.claude/.credentials.json without user consent. See PR #4210.
|
||||
try:
|
||||
from hermes_cli.auth import is_provider_explicitly_configured
|
||||
if not is_provider_explicitly_configured("anthropic"):
|
||||
return changed, active_sources
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials
|
||||
|
||||
for source_name, creds in (
|
||||
@@ -1077,13 +926,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
("claude_code", read_claude_code_credentials()),
|
||||
):
|
||||
if creds and creds.get("accessToken"):
|
||||
# Check if user explicitly removed this source
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
if is_source_suppressed(provider, source_name):
|
||||
continue
|
||||
except ImportError:
|
||||
pass
|
||||
active_sources.add(source_name)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
@@ -1194,10 +1036,6 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
active_sources.add(source)
|
||||
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
|
||||
base_url = env_url or pconfig.inference_base_url
|
||||
if provider == "kimi-coding":
|
||||
base_url = _resolve_kimi_base_url(token, pconfig.inference_base_url, env_url)
|
||||
elif provider == "zai":
|
||||
base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
|
||||
+107
-73
@@ -21,73 +21,11 @@ _RESET = "\033[0m"
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ANSI_RESET = "\033[0m"
|
||||
|
||||
# Diff colors — resolved lazily from the skin engine so they adapt
|
||||
# to light/dark themes. Falls back to sensible defaults on import
|
||||
# failure. We cache after first resolution for performance.
|
||||
_diff_colors_cached: dict[str, str] | None = None
|
||||
|
||||
|
||||
def _diff_ansi() -> dict[str, str]:
|
||||
"""Return ANSI escapes for diff display, resolved from the active skin."""
|
||||
global _diff_colors_cached
|
||||
if _diff_colors_cached is not None:
|
||||
return _diff_colors_cached
|
||||
|
||||
# Defaults that work on dark terminals
|
||||
dim = "\033[38;2;150;150;150m"
|
||||
file_c = "\033[38;2;180;160;255m"
|
||||
hunk = "\033[38;2;120;120;140m"
|
||||
minus = "\033[38;2;255;255;255;48;2;120;20;20m"
|
||||
plus = "\033[38;2;255;255;255;48;2;20;90;20m"
|
||||
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
skin = get_active_skin()
|
||||
|
||||
def _hex_fg(key: str, fallback_rgb: tuple[int, int, int]) -> str:
|
||||
h = skin.get_color(key, "")
|
||||
if h and len(h) == 7 and h[0] == "#":
|
||||
r, g, b = int(h[1:3], 16), int(h[3:5], 16), int(h[5:7], 16)
|
||||
return f"\033[38;2;{r};{g};{b}m"
|
||||
r, g, b = fallback_rgb
|
||||
return f"\033[38;2;{r};{g};{b}m"
|
||||
|
||||
dim = _hex_fg("banner_dim", (150, 150, 150))
|
||||
file_c = _hex_fg("session_label", (180, 160, 255))
|
||||
hunk = _hex_fg("session_border", (120, 120, 140))
|
||||
# minus/plus use background colors — derive from ui_error/ui_ok
|
||||
err_h = skin.get_color("ui_error", "#ef5350")
|
||||
ok_h = skin.get_color("ui_ok", "#4caf50")
|
||||
if err_h and len(err_h) == 7:
|
||||
er, eg, eb = int(err_h[1:3], 16), int(err_h[3:5], 16), int(err_h[5:7], 16)
|
||||
# Use a dark tinted version as background
|
||||
minus = f"\033[38;2;255;255;255;48;2;{max(er//2,20)};{max(eg//4,10)};{max(eb//4,10)}m"
|
||||
if ok_h and len(ok_h) == 7:
|
||||
or_, og, ob = int(ok_h[1:3], 16), int(ok_h[3:5], 16), int(ok_h[5:7], 16)
|
||||
plus = f"\033[38;2;255;255;255;48;2;{max(or_//4,10)};{max(og//2,20)};{max(ob//4,10)}m"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_diff_colors_cached = {
|
||||
"dim": dim, "file": file_c, "hunk": hunk,
|
||||
"minus": minus, "plus": plus,
|
||||
}
|
||||
return _diff_colors_cached
|
||||
|
||||
|
||||
def reset_diff_colors() -> None:
|
||||
"""Reset cached diff colors (call after /skin switch)."""
|
||||
global _diff_colors_cached
|
||||
_diff_colors_cached = None
|
||||
|
||||
|
||||
# Module-level helpers — each call resolves from the active skin lazily.
|
||||
def _diff_dim(): return _diff_ansi()["dim"]
|
||||
def _diff_file(): return _diff_ansi()["file"]
|
||||
def _diff_hunk(): return _diff_ansi()["hunk"]
|
||||
def _diff_minus(): return _diff_ansi()["minus"]
|
||||
def _diff_plus(): return _diff_ansi()["plus"]
|
||||
_ANSI_DIM = "\033[38;2;150;150;150m"
|
||||
_ANSI_FILE = "\033[38;2;180;160;255m"
|
||||
_ANSI_HUNK = "\033[38;2;120;120;140m"
|
||||
_ANSI_MINUS = "\033[38;2;255;255;255;48;2;120;20;20m"
|
||||
_ANSI_PLUS = "\033[38;2;255;255;255;48;2;20;90;20m"
|
||||
_MAX_INLINE_DIFF_FILES = 6
|
||||
_MAX_INLINE_DIFF_LINES = 80
|
||||
|
||||
@@ -129,6 +67,26 @@ def _get_skin():
|
||||
return None
|
||||
|
||||
|
||||
def get_skin_faces(key: str, default: list) -> list:
|
||||
"""Get spinner face list from active skin, falling back to default."""
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
faces = skin.get_spinner_list(key)
|
||||
if faces:
|
||||
return faces
|
||||
return default
|
||||
|
||||
|
||||
def get_skin_verbs() -> list:
|
||||
"""Get thinking verbs from active skin."""
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
verbs = skin.get_spinner_list("thinking_verbs")
|
||||
if verbs:
|
||||
return verbs
|
||||
return KawaiiSpinner.THINKING_VERBS
|
||||
|
||||
|
||||
def get_skin_tool_prefix() -> str:
|
||||
"""Get tool output prefix character from active skin."""
|
||||
skin = _get_skin()
|
||||
@@ -465,19 +423,19 @@ def _render_inline_unified_diff(diff: str) -> list[str]:
|
||||
if raw_line.startswith("+++ "):
|
||||
to_file = raw_line[4:].strip()
|
||||
if from_file or to_file:
|
||||
rendered.append(f"{_diff_file()}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_FILE}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("@@"):
|
||||
rendered.append(f"{_diff_hunk()}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_HUNK}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("-"):
|
||||
rendered.append(f"{_diff_minus()}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_MINUS}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("+"):
|
||||
rendered.append(f"{_diff_plus()}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_PLUS}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith(" "):
|
||||
rendered.append(f"{_diff_dim()}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_DIM}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line:
|
||||
rendered.append(raw_line)
|
||||
@@ -543,7 +501,7 @@ def _summarize_rendered_diff_sections(
|
||||
summary = f"… omitted {omitted_lines} diff line(s)"
|
||||
if omitted_files:
|
||||
summary += f" across {omitted_files} additional file(s)/section(s)"
|
||||
rendered.append(f"{_diff_hunk()}{summary}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_HUNK}{summary}{_ANSI_RESET}")
|
||||
|
||||
return rendered
|
||||
|
||||
@@ -765,6 +723,46 @@ class KawaiiSpinner:
|
||||
return False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Kawaii face arrays (used by AIAgent._execute_tool_calls for spinner text)
|
||||
# =========================================================================
|
||||
|
||||
KAWAII_SEARCH = [
|
||||
"♪(´ε` )", "(。◕‿◕。)", "ヾ(^∇^)", "(◕ᴗ◕✿)", "( ˘▽˘)っ",
|
||||
"٩(◕‿◕。)۶", "(✿◠‿◠)", "♪~(´ε` )", "(ノ´ヮ`)ノ*:・゚✧", "\(◎o◎)/",
|
||||
]
|
||||
KAWAII_READ = [
|
||||
"φ(゜▽゜*)♪", "( ˘▽˘)っ", "(⌐■_■)", "٩(。•́‿•̀。)۶", "(◕‿◕✿)",
|
||||
"ヾ(@⌒ー⌒@)ノ", "(✧ω✧)", "♪(๑ᴖ◡ᴖ๑)♪", "(≧◡≦)", "( ´ ▽ ` )ノ",
|
||||
]
|
||||
KAWAII_TERMINAL = [
|
||||
"ヽ(>∀<☆)ノ", "(ノ°∀°)ノ", "٩(^ᴗ^)۶", "ヾ(⌐■_■)ノ♪", "(•̀ᴗ•́)و",
|
||||
"┗(^0^)┓", "(`・ω・´)", "\( ̄▽ ̄)/", "(ง •̀_•́)ง", "ヽ(´▽`)/",
|
||||
]
|
||||
KAWAII_BROWSER = [
|
||||
"(ノ°∀°)ノ", "(☞゚ヮ゚)☞", "( ͡° ͜ʖ ͡°)", "┌( ಠ_ಠ)┘", "(⊙_⊙)?",
|
||||
"ヾ(•ω•`)o", "( ̄ω ̄)", "( ˇωˇ )", "(ᵔᴥᵔ)", "\(◎o◎)/",
|
||||
]
|
||||
KAWAII_CREATE = [
|
||||
"✧*。٩(ˊᗜˋ*)و✧", "(ノ◕ヮ◕)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "٩(♡ε♡)۶", "(◕‿◕)♡",
|
||||
"✿◕ ‿ ◕✿", "(*≧▽≦)", "ヾ(^-^)ノ", "(☆▽☆)", "°˖✧◝(⁰▿⁰)◜✧˖°",
|
||||
]
|
||||
KAWAII_SKILL = [
|
||||
"ヾ(@⌒ー⌒@)ノ", "(๑˃ᴗ˂)ﻭ", "٩(◕‿◕。)۶", "(✿╹◡╹)", "ヽ(・∀・)ノ",
|
||||
"(ノ´ヮ`)ノ*:・゚✧", "♪(๑ᴖ◡ᴖ๑)♪", "(◠‿◠)", "٩(ˊᗜˋ*)و", "(^▽^)",
|
||||
"ヾ(^∇^)", "(★ω★)/", "٩(。•́‿•̀。)۶", "(◕ᴗ◕✿)", "\(◎o◎)/",
|
||||
"(✧ω✧)", "ヽ(>∀<☆)ノ", "( ˘▽˘)っ", "(≧◡≦) ♡", "ヾ( ̄▽ ̄)",
|
||||
]
|
||||
KAWAII_THINK = [
|
||||
"(っ°Д°;)っ", "(;′⌒`)", "(・_・ヾ", "( ´_ゝ`)", "( ̄ヘ ̄)",
|
||||
"(。-`ω´-)", "( ˘︹˘ )", "(¬_¬)", "ヽ(ー_ー )ノ", "(;一_一)",
|
||||
]
|
||||
KAWAII_GENERIC = [
|
||||
"♪(´ε` )", "(◕‿◕✿)", "ヾ(^∇^)", "٩(◕‿◕。)۶", "(✿◠‿◠)",
|
||||
"(ノ´ヮ`)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "(☆▽☆)", "( ˘▽˘)っ", "(≧◡≦)",
|
||||
]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Cute tool message (completion line that replaces the spinner)
|
||||
# =========================================================================
|
||||
@@ -892,6 +890,8 @@ def get_cute_tool_message(
|
||||
return _wrap(f"┊ ◀️ back {dur}")
|
||||
if tool_name == "browser_press":
|
||||
return _wrap(f"┊ ⌨️ press {args.get('key', '?')} {dur}")
|
||||
if tool_name == "browser_close":
|
||||
return _wrap(f"┊ 🚪 close browser {dur}")
|
||||
if tool_name == "browser_get_images":
|
||||
return _wrap(f"┊ 🖼️ images extracting {dur}")
|
||||
if tool_name == "browser_vision":
|
||||
@@ -972,6 +972,40 @@ _SKY_BLUE = "\033[38;5;117m"
|
||||
_ANSI_RESET = "\033[0m"
|
||||
|
||||
|
||||
def honcho_session_url(workspace: str, session_name: str) -> str:
|
||||
"""Build a Honcho app URL for a session."""
|
||||
from urllib.parse import quote
|
||||
return (
|
||||
f"https://app.honcho.dev/explore"
|
||||
f"?workspace={quote(workspace, safe='')}"
|
||||
f"&view=sessions"
|
||||
f"&session={quote(session_name, safe='')}"
|
||||
)
|
||||
|
||||
|
||||
def _osc8_link(url: str, text: str) -> str:
|
||||
"""OSC 8 terminal hyperlink (clickable in iTerm2, Ghostty, WezTerm, etc.)."""
|
||||
return f"\033]8;;{url}\033\\{text}\033]8;;\033\\"
|
||||
|
||||
|
||||
def honcho_session_line(workspace: str, session_name: str) -> str:
|
||||
"""One-line session indicator: `Honcho session: <clickable name>`."""
|
||||
url = honcho_session_url(workspace, session_name)
|
||||
linked_name = _osc8_link(url, f"{_SKY_BLUE}{session_name}{_ANSI_RESET}")
|
||||
return f"{_DIM}Honcho session:{_ANSI_RESET} {linked_name}"
|
||||
|
||||
|
||||
def write_tty(text: str) -> None:
|
||||
"""Write directly to /dev/tty, bypassing stdout capture."""
|
||||
try:
|
||||
fd = os.open("/dev/tty", os.O_WRONLY)
|
||||
os.write(fd, text.encode("utf-8"))
|
||||
os.close(fd)
|
||||
except OSError:
|
||||
sys.stdout.write(text)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Context pressure display (CLI user-facing warnings)
|
||||
# =========================================================================
|
||||
|
||||
@@ -1,809 +0,0 @@
|
||||
"""API error classification for smart failover and recovery.
|
||||
|
||||
Provides a structured taxonomy of API errors and a priority-ordered
|
||||
classification pipeline that determines the correct recovery action
|
||||
(retry, rotate credential, fallback to another provider, compress
|
||||
context, or abort).
|
||||
|
||||
Replaces scattered inline string-matching with a centralized classifier
|
||||
that the main retry loop in run_agent.py consults for every API failure.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Error taxonomy ──────────────────────────────────────────────────────
|
||||
|
||||
class FailoverReason(enum.Enum):
|
||||
"""Why an API call failed — determines recovery strategy."""
|
||||
|
||||
# Authentication / authorization
|
||||
auth = "auth" # Transient auth (401/403) — refresh/rotate
|
||||
auth_permanent = "auth_permanent" # Auth failed after refresh — abort
|
||||
|
||||
# Billing / quota
|
||||
billing = "billing" # 402 or confirmed credit exhaustion — rotate immediately
|
||||
rate_limit = "rate_limit" # 429 or quota-based throttling — backoff then rotate
|
||||
|
||||
# Server-side
|
||||
overloaded = "overloaded" # 503/529 — provider overloaded, backoff
|
||||
server_error = "server_error" # 500/502 — internal server error, retry
|
||||
|
||||
# Transport
|
||||
timeout = "timeout" # Connection/read timeout — rebuild client + retry
|
||||
|
||||
# Context / payload
|
||||
context_overflow = "context_overflow" # Context too large — compress, not failover
|
||||
payload_too_large = "payload_too_large" # 413 — compress payload
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
|
||||
|
||||
# Catch-all
|
||||
unknown = "unknown" # Unclassifiable — retry with backoff
|
||||
|
||||
|
||||
# ── Classification result ───────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class ClassifiedError:
|
||||
"""Structured classification of an API error with recovery hints."""
|
||||
|
||||
reason: FailoverReason
|
||||
status_code: Optional[int] = None
|
||||
provider: Optional[str] = None
|
||||
model: Optional[str] = None
|
||||
message: str = ""
|
||||
error_context: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Recovery action hints — the retry loop checks these instead of
|
||||
# re-classifying the error itself.
|
||||
retryable: bool = True
|
||||
should_compress: bool = False
|
||||
should_rotate_credential: bool = False
|
||||
should_fallback: bool = False
|
||||
|
||||
@property
|
||||
def is_auth(self) -> bool:
|
||||
return self.reason in (FailoverReason.auth, FailoverReason.auth_permanent)
|
||||
|
||||
|
||||
|
||||
# ── Provider-specific patterns ──────────────────────────────────────────
|
||||
|
||||
# Patterns that indicate billing exhaustion (not transient rate limit)
|
||||
_BILLING_PATTERNS = [
|
||||
"insufficient credits",
|
||||
"insufficient_quota",
|
||||
"credit balance",
|
||||
"credits have been exhausted",
|
||||
"top up your credits",
|
||||
"payment required",
|
||||
"billing hard limit",
|
||||
"exceeded your current quota",
|
||||
"account is deactivated",
|
||||
"plan does not include",
|
||||
]
|
||||
|
||||
# Patterns that indicate rate limiting (transient, will resolve)
|
||||
_RATE_LIMIT_PATTERNS = [
|
||||
"rate limit",
|
||||
"rate_limit",
|
||||
"too many requests",
|
||||
"throttled",
|
||||
"requests per minute",
|
||||
"tokens per minute",
|
||||
"requests per day",
|
||||
"try again in",
|
||||
"please retry after",
|
||||
"resource_exhausted",
|
||||
"rate increased too quickly", # Alibaba/DashScope throttling
|
||||
]
|
||||
|
||||
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
|
||||
_USAGE_LIMIT_PATTERNS = [
|
||||
"usage limit",
|
||||
"quota",
|
||||
"limit exceeded",
|
||||
"key limit exceeded",
|
||||
]
|
||||
|
||||
# Patterns confirming usage limit is transient (not billing)
|
||||
_USAGE_LIMIT_TRANSIENT_SIGNALS = [
|
||||
"try again",
|
||||
"retry",
|
||||
"resets at",
|
||||
"reset in",
|
||||
"wait",
|
||||
"requests remaining",
|
||||
"periodic",
|
||||
"window",
|
||||
]
|
||||
|
||||
# Payload-too-large patterns detected from message text (no status_code attr).
|
||||
# Proxies and some backends embed the HTTP status in the error message.
|
||||
_PAYLOAD_TOO_LARGE_PATTERNS = [
|
||||
"request entity too large",
|
||||
"payload too large",
|
||||
"error code: 413",
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
"context size",
|
||||
"maximum context",
|
||||
"token limit",
|
||||
"too many tokens",
|
||||
"reduce the length",
|
||||
"exceeds the limit",
|
||||
"context window",
|
||||
"prompt is too long",
|
||||
"prompt exceeds max length",
|
||||
"max_tokens",
|
||||
"maximum number of tokens",
|
||||
# Chinese error messages (some providers return these)
|
||||
"超过最大长度",
|
||||
"上下文长度",
|
||||
]
|
||||
|
||||
# Model not found patterns
|
||||
_MODEL_NOT_FOUND_PATTERNS = [
|
||||
"is not a valid model",
|
||||
"invalid model",
|
||||
"model not found",
|
||||
"model_not_found",
|
||||
"does not exist",
|
||||
"no such model",
|
||||
"unknown model",
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
"invalid_api_key",
|
||||
"authentication",
|
||||
"unauthorized",
|
||||
"forbidden",
|
||||
"invalid token",
|
||||
"token expired",
|
||||
"token revoked",
|
||||
"access denied",
|
||||
]
|
||||
|
||||
# Anthropic thinking block signature patterns
|
||||
_THINKING_SIG_PATTERNS = [
|
||||
"signature", # Combined with "thinking" check
|
||||
]
|
||||
|
||||
# Transport error type names
|
||||
_TRANSPORT_ERROR_TYPES = frozenset({
|
||||
"ReadTimeout", "ConnectTimeout", "PoolTimeout",
|
||||
"ConnectError", "RemoteProtocolError",
|
||||
"ConnectionError", "ConnectionResetError",
|
||||
"ConnectionAbortedError", "BrokenPipeError",
|
||||
"TimeoutError", "ReadError",
|
||||
"ServerDisconnectedError",
|
||||
# OpenAI SDK errors (not subclasses of Python builtins)
|
||||
"APIConnectionError",
|
||||
"APITimeoutError",
|
||||
})
|
||||
|
||||
# Server disconnect patterns (no status code, but transport-level)
|
||||
_SERVER_DISCONNECT_PATTERNS = [
|
||||
"server disconnected",
|
||||
"peer closed connection",
|
||||
"connection reset by peer",
|
||||
"connection was closed",
|
||||
"network connection lost",
|
||||
"unexpected eof",
|
||||
"incomplete chunked read",
|
||||
]
|
||||
|
||||
|
||||
# ── Classification pipeline ─────────────────────────────────────────────
|
||||
|
||||
def classify_api_error(
|
||||
error: Exception,
|
||||
*,
|
||||
provider: str = "",
|
||||
model: str = "",
|
||||
approx_tokens: int = 0,
|
||||
context_length: int = 200000,
|
||||
num_messages: int = 0,
|
||||
) -> ClassifiedError:
|
||||
"""Classify an API error into a structured recovery recommendation.
|
||||
|
||||
Priority-ordered pipeline:
|
||||
1. Special-case provider-specific patterns (thinking sigs, tier gates)
|
||||
2. HTTP status code + message-aware refinement
|
||||
3. Error code classification (from body)
|
||||
4. Message pattern matching (billing vs rate_limit vs context vs auth)
|
||||
5. Transport error heuristics
|
||||
6. Server disconnect + large session → context overflow
|
||||
7. Fallback: unknown (retryable with backoff)
|
||||
|
||||
Args:
|
||||
error: The exception from the API call.
|
||||
provider: Current provider name (e.g. "openrouter", "anthropic").
|
||||
model: Current model slug.
|
||||
approx_tokens: Approximate token count of the current context.
|
||||
context_length: Maximum context length for the current model.
|
||||
|
||||
Returns:
|
||||
ClassifiedError with reason and recovery action hints.
|
||||
"""
|
||||
status_code = _extract_status_code(error)
|
||||
error_type = type(error).__name__
|
||||
body = _extract_error_body(error)
|
||||
error_code = _extract_error_code(body)
|
||||
|
||||
# Build a comprehensive error message string for pattern matching.
|
||||
# str(error) alone may not include the body message (e.g. OpenAI SDK's
|
||||
# APIStatusError.__str__ returns the first arg, not the body). Append
|
||||
# the body message so patterns like "try again" in 402 disambiguation
|
||||
# are detected even when only present in the structured body.
|
||||
#
|
||||
# Also extract metadata.raw — OpenRouter wraps upstream provider errors
|
||||
# inside {"error": {"message": "Provider returned error", "metadata":
|
||||
# {"raw": "<actual error JSON>"}}} and the real error message (e.g.
|
||||
# "context length exceeded") is only in the inner JSON.
|
||||
_raw_msg = str(error).lower()
|
||||
_body_msg = ""
|
||||
_metadata_msg = ""
|
||||
if isinstance(body, dict):
|
||||
_err_obj = body.get("error", {})
|
||||
if isinstance(_err_obj, dict):
|
||||
_body_msg = (_err_obj.get("message") or "").lower()
|
||||
# Parse metadata.raw for wrapped provider errors
|
||||
_metadata = _err_obj.get("metadata", {})
|
||||
if isinstance(_metadata, dict):
|
||||
_raw_json = _metadata.get("raw") or ""
|
||||
if isinstance(_raw_json, str) and _raw_json.strip():
|
||||
try:
|
||||
import json
|
||||
_inner = json.loads(_raw_json)
|
||||
if isinstance(_inner, dict):
|
||||
_inner_err = _inner.get("error", {})
|
||||
if isinstance(_inner_err, dict):
|
||||
_metadata_msg = (_inner_err.get("message") or "").lower()
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
if not _body_msg:
|
||||
_body_msg = (body.get("message") or "").lower()
|
||||
# Combine all message sources for pattern matching
|
||||
parts = [_raw_msg]
|
||||
if _body_msg and _body_msg not in _raw_msg:
|
||||
parts.append(_body_msg)
|
||||
if _metadata_msg and _metadata_msg not in _raw_msg and _metadata_msg not in _body_msg:
|
||||
parts.append(_metadata_msg)
|
||||
error_msg = " ".join(parts)
|
||||
provider_lower = (provider or "").strip().lower()
|
||||
model_lower = (model or "").strip().lower()
|
||||
|
||||
def _result(reason: FailoverReason, **overrides) -> ClassifiedError:
|
||||
defaults = {
|
||||
"reason": reason,
|
||||
"status_code": status_code,
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"message": _extract_message(error, body),
|
||||
}
|
||||
defaults.update(overrides)
|
||||
return ClassifiedError(**defaults)
|
||||
|
||||
# ── 1. Provider-specific patterns (highest priority) ────────────
|
||||
|
||||
# Anthropic thinking block signature invalid (400).
|
||||
# Don't gate on provider — OpenRouter proxies Anthropic errors, so the
|
||||
# provider may be "openrouter" even though the error is Anthropic-specific.
|
||||
# The message pattern ("signature" + "thinking") is unique enough.
|
||||
if (
|
||||
status_code == 400
|
||||
and "signature" in error_msg
|
||||
and "thinking" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.thinking_signature,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# Anthropic long-context tier gate (429 "extra usage" + "long context")
|
||||
if (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.long_context_tier,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
classified = _classify_by_status(
|
||||
status_code, error_msg, error_code, body,
|
||||
provider=provider_lower, model=model_lower,
|
||||
approx_tokens=approx_tokens, context_length=context_length,
|
||||
num_messages=num_messages,
|
||||
result_fn=_result,
|
||||
)
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 3. Error code classification ────────────────────────────────
|
||||
|
||||
if error_code:
|
||||
classified = _classify_by_error_code(error_code, error_msg, _result)
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 4. Message pattern matching (no status code) ────────────────
|
||||
|
||||
classified = _classify_by_message(
|
||||
error_msg, error_type,
|
||||
approx_tokens=approx_tokens,
|
||||
context_length=context_length,
|
||||
result_fn=_result,
|
||||
)
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 5. Server disconnect + large session → context overflow ─────
|
||||
# Must come BEFORE generic transport error catch — a disconnect on
|
||||
# a large session is more likely context overflow than a transient
|
||||
# transport hiccup. Without this ordering, RemoteProtocolError
|
||||
# always maps to timeout regardless of session size.
|
||||
|
||||
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
|
||||
if is_disconnect and not status_code:
|
||||
is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
|
||||
if is_large:
|
||||
return _result(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 6. Transport / timeout heuristics ───────────────────────────
|
||||
|
||||
if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 7. Fallback: unknown ────────────────────────────────────────
|
||||
|
||||
return _result(FailoverReason.unknown, retryable=True)
|
||||
|
||||
|
||||
# ── Status code classification ──────────────────────────────────────────
|
||||
|
||||
def _classify_by_status(
|
||||
status_code: int,
|
||||
error_msg: str,
|
||||
error_code: str,
|
||||
body: dict,
|
||||
*,
|
||||
provider: str,
|
||||
model: str,
|
||||
approx_tokens: int,
|
||||
context_length: int,
|
||||
num_messages: int = 0,
|
||||
result_fn,
|
||||
) -> Optional[ClassifiedError]:
|
||||
"""Classify based on HTTP status code with message-aware refinement."""
|
||||
|
||||
if status_code == 401:
|
||||
# Not retryable on its own — credential pool rotation and
|
||||
# provider-specific refresh (Codex, Anthropic, Nous) run before
|
||||
# the retryability check in run_agent.py. If those succeed, the
|
||||
# loop `continue`s. If they fail, retryable=False ensures we
|
||||
# hit the client-error abort path (which tries fallback first).
|
||||
return result_fn(
|
||||
FailoverReason.auth,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 403:
|
||||
# OpenRouter 403 "key limit exceeded" is actually billing
|
||||
if "key limit exceeded" in error_msg or "spending limit" in error_msg:
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
return result_fn(
|
||||
FailoverReason.auth,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 402:
|
||||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
# Generic 404 — could be model or endpoint
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 413:
|
||||
return result_fn(
|
||||
FailoverReason.payload_too_large,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
if status_code == 429:
|
||||
# Already checked long_context_tier above; this is a normal rate limit
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 400:
|
||||
return _classify_400(
|
||||
error_msg, error_code, body,
|
||||
provider=provider, model=model,
|
||||
approx_tokens=approx_tokens,
|
||||
context_length=context_length,
|
||||
num_messages=num_messages,
|
||||
result_fn=result_fn,
|
||||
)
|
||||
|
||||
if status_code in (500, 502):
|
||||
return result_fn(FailoverReason.server_error, retryable=True)
|
||||
|
||||
if status_code in (503, 529):
|
||||
return result_fn(FailoverReason.overloaded, retryable=True)
|
||||
|
||||
# Other 4xx — non-retryable
|
||||
if 400 <= status_code < 500:
|
||||
return result_fn(
|
||||
FailoverReason.format_error,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Other 5xx — retryable
|
||||
if 500 <= status_code < 600:
|
||||
return result_fn(FailoverReason.server_error, retryable=True)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _classify_402(error_msg: str, result_fn) -> ClassifiedError:
|
||||
"""Disambiguate 402: billing exhaustion vs transient usage limit.
|
||||
|
||||
The key insight from OpenClaw: some 402s are transient rate limits
|
||||
disguised as payment errors. "Usage limit, try again in 5 minutes"
|
||||
is NOT a billing problem — it's a periodic quota that resets.
|
||||
"""
|
||||
# Check for transient usage-limit signals first
|
||||
has_usage_limit = any(p in error_msg for p in _USAGE_LIMIT_PATTERNS)
|
||||
has_transient_signal = any(p in error_msg for p in _USAGE_LIMIT_TRANSIENT_SIGNALS)
|
||||
|
||||
if has_usage_limit and has_transient_signal:
|
||||
# Transient quota — treat as rate limit, not billing
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Confirmed billing exhaustion
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
|
||||
def _classify_400(
|
||||
error_msg: str,
|
||||
error_code: str,
|
||||
body: dict,
|
||||
*,
|
||||
provider: str,
|
||||
model: str,
|
||||
approx_tokens: int,
|
||||
context_length: int,
|
||||
num_messages: int = 0,
|
||||
result_fn,
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Context overflow from 400
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Some providers return rate limit / billing errors as 400 instead of 429/402.
|
||||
# Check these patterns before falling through to format_error.
|
||||
if any(p in error_msg for p in _RATE_LIMIT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
if any(p in error_msg for p in _BILLING_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Generic 400 + large session → probable context overflow
|
||||
# Anthropic sometimes returns a bare "Error" message when context is too large
|
||||
err_body_msg = ""
|
||||
if isinstance(body, dict):
|
||||
err_obj = body.get("error", {})
|
||||
if isinstance(err_obj, dict):
|
||||
err_body_msg = (err_obj.get("message") or "").strip().lower()
|
||||
# Responses API (and some providers) use flat body: {"message": "..."}
|
||||
if not err_body_msg:
|
||||
err_body_msg = (body.get("message") or "").strip().lower()
|
||||
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
|
||||
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
|
||||
|
||||
if is_generic and is_large:
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Non-retryable format error
|
||||
return result_fn(
|
||||
FailoverReason.format_error,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
|
||||
# ── Error code classification ───────────────────────────────────────────
|
||||
|
||||
def _classify_by_error_code(
|
||||
error_code: str, error_msg: str, result_fn,
|
||||
) -> Optional[ClassifiedError]:
|
||||
"""Classify by structured error codes from the response body."""
|
||||
code_lower = error_code.lower()
|
||||
|
||||
if code_lower in ("resource_exhausted", "throttled", "rate_limit_exceeded"):
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
)
|
||||
|
||||
if code_lower in ("insufficient_quota", "billing_not_active", "payment_required"):
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if code_lower in ("model_not_found", "model_not_available", "invalid_model"):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if code_lower in ("context_length_exceeded", "max_tokens_exceeded"):
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ── Message pattern classification ──────────────────────────────────────
|
||||
|
||||
def _classify_by_message(
|
||||
error_msg: str,
|
||||
error_type: str,
|
||||
*,
|
||||
approx_tokens: int,
|
||||
context_length: int,
|
||||
result_fn,
|
||||
) -> Optional[ClassifiedError]:
|
||||
"""Classify based on error message patterns when no status code is available."""
|
||||
|
||||
# Payload-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _PAYLOAD_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.payload_too_large,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Usage-limit patterns need the same disambiguation as 402: some providers
|
||||
# surface "usage limit" errors without an HTTP status code. A transient
|
||||
# signal ("try again", "resets at", …) means it's a periodic quota, not
|
||||
# billing exhaustion.
|
||||
has_usage_limit = any(p in error_msg for p in _USAGE_LIMIT_PATTERNS)
|
||||
if has_usage_limit:
|
||||
has_transient_signal = any(p in error_msg for p in _USAGE_LIMIT_TRANSIENT_SIGNALS)
|
||||
if has_transient_signal:
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Billing patterns
|
||||
if any(p in error_msg for p in _BILLING_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Rate limit patterns
|
||||
if any(p in error_msg for p in _RATE_LIMIT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Context overflow patterns
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Auth patterns
|
||||
# Auth errors should NOT be retried directly — the credential is invalid and
|
||||
# retrying with the same key will always fail. Set retryable=False so the
|
||||
# caller triggers credential rotation (should_rotate_credential=True) or
|
||||
# provider fallback rather than an immediate retry loop.
|
||||
if any(p in error_msg for p in _AUTH_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.auth,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ── Helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
def _extract_status_code(error: Exception) -> Optional[int]:
|
||||
"""Walk the error and its cause chain to find an HTTP status code."""
|
||||
current = error
|
||||
for _ in range(5): # Max depth to prevent infinite loops
|
||||
code = getattr(current, "status_code", None)
|
||||
if isinstance(code, int):
|
||||
return code
|
||||
# Some SDKs use .status instead of .status_code
|
||||
code = getattr(current, "status", None)
|
||||
if isinstance(code, int) and 100 <= code < 600:
|
||||
return code
|
||||
# Walk cause chain
|
||||
cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
|
||||
if cause is None or cause is current:
|
||||
break
|
||||
current = cause
|
||||
return None
|
||||
|
||||
|
||||
def _extract_error_body(error: Exception) -> dict:
|
||||
"""Extract the structured error body from an SDK exception."""
|
||||
body = getattr(error, "body", None)
|
||||
if isinstance(body, dict):
|
||||
return body
|
||||
# Some errors have .response.json()
|
||||
response = getattr(error, "response", None)
|
||||
if response is not None:
|
||||
try:
|
||||
json_body = response.json()
|
||||
if isinstance(json_body, dict):
|
||||
return json_body
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def _extract_error_code(body: dict) -> str:
|
||||
"""Extract an error code string from the response body."""
|
||||
if not body:
|
||||
return ""
|
||||
error_obj = body.get("error", {})
|
||||
if isinstance(error_obj, dict):
|
||||
code = error_obj.get("code") or error_obj.get("type") or ""
|
||||
if isinstance(code, str) and code.strip():
|
||||
return code.strip()
|
||||
# Top-level code
|
||||
code = body.get("code") or body.get("error_code") or ""
|
||||
if isinstance(code, (str, int)):
|
||||
return str(code).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_message(error: Exception, body: dict) -> str:
|
||||
"""Extract the most informative error message."""
|
||||
# Try structured body first
|
||||
if body:
|
||||
error_obj = body.get("error", {})
|
||||
if isinstance(error_obj, dict):
|
||||
msg = error_obj.get("message", "")
|
||||
if isinstance(msg, str) and msg.strip():
|
||||
return msg.strip()[:500]
|
||||
msg = body.get("message", "")
|
||||
if isinstance(msg, str) and msg.strip():
|
||||
return msg.strip()[:500]
|
||||
# Fallback to str(error)
|
||||
return str(error)[:500]
|
||||
@@ -39,6 +39,15 @@ def _has_known_pricing(model_name: str, provider: str = None, base_url: str = No
|
||||
return has_known_pricing(model_name, provider=provider, base_url=base_url)
|
||||
|
||||
|
||||
def _get_pricing(model_name: str) -> Dict[str, float]:
|
||||
"""Look up pricing for a model. Uses fuzzy matching on model name.
|
||||
|
||||
Returns _DEFAULT_PRICING (zero cost) for unknown/custom models —
|
||||
we can't assume costs for self-hosted endpoints, local inference, etc.
|
||||
"""
|
||||
return get_pricing(model_name)
|
||||
|
||||
|
||||
def _estimate_cost(
|
||||
session_or_model: Dict[str, Any] | str,
|
||||
input_tokens: int = 0,
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
"""User-facing summaries for manual compression commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Sequence
|
||||
|
||||
|
||||
def summarize_manual_compression(
|
||||
before_messages: Sequence[dict[str, Any]],
|
||||
after_messages: Sequence[dict[str, Any]],
|
||||
before_tokens: int,
|
||||
after_tokens: int,
|
||||
) -> dict[str, Any]:
|
||||
"""Return consistent user-facing feedback for manual compression."""
|
||||
before_count = len(before_messages)
|
||||
after_count = len(after_messages)
|
||||
noop = list(after_messages) == list(before_messages)
|
||||
|
||||
if noop:
|
||||
headline = f"No changes from compression: {before_count} messages"
|
||||
if after_tokens == before_tokens:
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
|
||||
)
|
||||
else:
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
else:
|
||||
headline = f"Compressed: {before_count} → {after_count} messages"
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
|
||||
note = None
|
||||
if not noop and after_count < before_count and after_tokens > before_tokens:
|
||||
note = (
|
||||
"Note: fewer messages can still raise this rough transcript estimate "
|
||||
"when compression rewrites the transcript into denser summaries."
|
||||
)
|
||||
|
||||
return {
|
||||
"noop": noop,
|
||||
"headline": headline,
|
||||
"token_line": token_line,
|
||||
"note": note,
|
||||
}
|
||||
@@ -34,7 +34,6 @@ import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
from tools.registry import tool_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -134,6 +133,11 @@ class MemoryManager:
|
||||
"""All registered providers in order."""
|
||||
return list(self._providers)
|
||||
|
||||
@property
|
||||
def provider_names(self) -> List[str]:
|
||||
"""Names of all registered providers."""
|
||||
return [p.name for p in self._providers]
|
||||
|
||||
def get_provider(self, name: str) -> Optional[MemoryProvider]:
|
||||
"""Get a provider by name, or None if not registered."""
|
||||
for p in self._providers:
|
||||
@@ -245,7 +249,7 @@ class MemoryManager:
|
||||
"""
|
||||
provider = self._tool_to_provider.get(tool_name)
|
||||
if provider is None:
|
||||
return tool_error(f"No memory provider handles tool '{tool_name}'")
|
||||
return json.dumps({"error": f"No memory provider handles tool '{tool_name}'"})
|
||||
try:
|
||||
return provider.handle_tool_call(tool_name, args, **kwargs)
|
||||
except Exception as e:
|
||||
@@ -253,7 +257,7 @@ class MemoryManager:
|
||||
"Memory provider '%s' handle_tool_call(%s) failed: %s",
|
||||
provider.name, tool_name, e,
|
||||
)
|
||||
return tool_error(f"Memory tool '{tool_name}' failed: {e}")
|
||||
return json.dumps({"error": f"Memory tool '{tool_name}' failed: {e}"})
|
||||
|
||||
# -- Lifecycle hooks -----------------------------------------------------
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
+4
-124
@@ -26,14 +26,12 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||
"github-models", "kimi", "moonshot", "claude", "deep-seek",
|
||||
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"qwen-portal",
|
||||
})
|
||||
|
||||
|
||||
@@ -113,31 +111,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
# Qwen — specific model families before the catch-all.
|
||||
# Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
|
||||
"qwen3-coder-plus": 1000000, # 1M context
|
||||
"qwen3-coder": 262144, # 256K context
|
||||
# Qwen
|
||||
"qwen": 131072,
|
||||
# MiniMax — official docs: 204,800 context for all models
|
||||
# https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
||||
# MiniMax
|
||||
"minimax": 204800,
|
||||
# GLM
|
||||
"glm": 202752,
|
||||
# xAI Grok — xAI /v1/models does not return context_length metadata,
|
||||
# so these hardcoded fallbacks prevent Hermes from probing-down to
|
||||
# the default 128k when the user points at https://api.x.ai/v1
|
||||
# via a custom provider. Values sourced from models.dev (2026-04).
|
||||
# Keys use substring matching (longest-first), so e.g. "grok-4.20"
|
||||
# matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
|
||||
"grok-code-fast": 256000, # grok-code-fast-1
|
||||
"grok-4-1-fast": 2000000, # grok-4-1-fast-(non-)reasoning
|
||||
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
|
||||
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning
|
||||
"grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
|
||||
"grok-4": 256000, # grok-4, grok-4-0709
|
||||
"grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
|
||||
"grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest
|
||||
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
|
||||
# Kimi
|
||||
"kimi": 262144,
|
||||
# Arcee
|
||||
@@ -201,7 +180,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"api.minimax": "minimax",
|
||||
"dashscope.aliyuncs.com": "alibaba",
|
||||
"dashscope-intl.aliyuncs.com": "alibaba",
|
||||
"portal.qwen.ai": "qwen-oauth",
|
||||
"openrouter.ai": "openrouter",
|
||||
"generativelanguage.googleapis.com": "gemini",
|
||||
"inference-api.nousresearch.com": "nous",
|
||||
@@ -209,8 +187,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"api.githubcopilot.com": "copilot",
|
||||
"models.github.ai": "copilot",
|
||||
"api.fireworks.ai": "fireworks",
|
||||
"opencode.ai": "opencode-go",
|
||||
"api.x.ai": "xai",
|
||||
}
|
||||
|
||||
|
||||
@@ -534,8 +510,8 @@ def fetch_endpoint_model_metadata(
|
||||
|
||||
def _get_context_cache_path() -> Path:
|
||||
"""Return path to the persistent context length cache file."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "context_length_cache.yaml"
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
return hermes_home / "context_length_cache.yaml"
|
||||
|
||||
|
||||
def _load_context_cache() -> Dict[str, int]:
|
||||
@@ -616,49 +592,6 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
|
||||
"""Detect an "output cap too large" error and return how many output tokens are available.
|
||||
|
||||
Background — two distinct context errors exist:
|
||||
1. "Prompt too long" — the INPUT itself exceeds the context window.
|
||||
Fix: compress history and/or halve context_length.
|
||||
2. "max_tokens too large" — input is fine, but input + requested_output > window.
|
||||
Fix: reduce max_tokens (the output cap) for this call.
|
||||
Do NOT touch context_length — the window hasn't shrunk.
|
||||
|
||||
Anthropic's API returns errors like:
|
||||
"max_tokens: 32768 > context_window: 200000 - input_tokens: 190000 = available_tokens: 10000"
|
||||
|
||||
Returns the number of output tokens that would fit (e.g. 10000 above), or None if
|
||||
the error does not look like a max_tokens-too-large error.
|
||||
"""
|
||||
error_lower = error_msg.lower()
|
||||
|
||||
# Must look like an output-cap error, not a prompt-length error.
|
||||
is_output_cap_error = (
|
||||
"max_tokens" in error_lower
|
||||
and ("available_tokens" in error_lower or "available tokens" in error_lower)
|
||||
)
|
||||
if not is_output_cap_error:
|
||||
return None
|
||||
|
||||
# Extract the available_tokens figure.
|
||||
# Anthropic format: "… = available_tokens: 10000"
|
||||
patterns = [
|
||||
r'available_tokens[:\s]+(\d+)',
|
||||
r'available\s+tokens[:\s]+(\d+)',
|
||||
# fallback: last number after "=" in expressions like "200000 - 190000 = 10000"
|
||||
r'=\s*(\d+)\s*$',
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, error_lower)
|
||||
if match:
|
||||
tokens = int(match.group(1))
|
||||
if tokens >= 1:
|
||||
return tokens
|
||||
return None
|
||||
|
||||
|
||||
def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
|
||||
"""Return True if *candidate_id* (from server) matches *lookup_model* (configured).
|
||||
|
||||
@@ -678,59 +611,6 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
|
||||
"""Query an Ollama server for the model's context length.
|
||||
|
||||
Returns the model's maximum context from GGUF metadata via ``/api/show``,
|
||||
or the explicit ``num_ctx`` from the Modelfile if set. Returns None if
|
||||
the server is unreachable or not Ollama.
|
||||
|
||||
This is the value that should be passed as ``num_ctx`` in Ollama chat
|
||||
requests to override the default 2048.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
bare_model = _strip_provider_prefix(model)
|
||||
server_url = base_url.rstrip("/")
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
|
||||
try:
|
||||
server_type = detect_local_server_type(base_url)
|
||||
except Exception:
|
||||
return None
|
||||
if server_type != "ollama":
|
||||
return None
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=3.0) as client:
|
||||
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
data = resp.json()
|
||||
|
||||
# Prefer explicit num_ctx from Modelfile parameters (user override)
|
||||
params = data.get("parameters", "")
|
||||
if "num_ctx" in params:
|
||||
for line in params.split("\n"):
|
||||
if "num_ctx" in line:
|
||||
parts = line.strip().split()
|
||||
if len(parts) >= 2:
|
||||
try:
|
||||
return int(parts[-1])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Fall back to GGUF model_info context_length (training max)
|
||||
model_info = data.get("model_info", {})
|
||||
for key, value in model_info.items():
|
||||
if "context_length" in key and isinstance(value, (int, float)):
|
||||
return int(value)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
|
||||
"""Query a local server for the model's context length."""
|
||||
import httpx
|
||||
|
||||
+117
-6
@@ -23,9 +23,9 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from utils import atomic_json_write
|
||||
|
||||
@@ -135,6 +135,9 @@ class ProviderInfo:
|
||||
doc: str = "" # documentation URL
|
||||
model_count: int = 0
|
||||
|
||||
def has_api_url(self) -> bool:
|
||||
return bool(self.api)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider ID mapping: Hermes ↔ models.dev
|
||||
@@ -150,7 +153,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"minimax-cn": "minimax-cn",
|
||||
"deepseek": "deepseek",
|
||||
"alibaba": "alibaba",
|
||||
"qwen-oauth": "alibaba",
|
||||
"copilot": "github-copilot",
|
||||
"ai-gateway": "vercel",
|
||||
"opencode-zen": "opencode",
|
||||
@@ -183,8 +185,9 @@ def _get_reverse_mapping() -> Dict[str, str]:
|
||||
|
||||
def _get_cache_path() -> Path:
|
||||
"""Return path to disk cache file."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "models_dev_cache.json"
|
||||
env_val = os.environ.get("HERMES_HOME", "")
|
||||
hermes_home = Path(env_val) if env_val else Path.home() / ".hermes"
|
||||
return hermes_home / "models_dev_cache.json"
|
||||
|
||||
|
||||
def _load_disk_cache() -> Dict[str, Any]:
|
||||
@@ -228,7 +231,7 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
|
||||
response = requests.get(MODELS_DEV_URL, timeout=15)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
if isinstance(data, dict) and data:
|
||||
if isinstance(data, dict) and len(data) > 0:
|
||||
_models_dev_cache = data
|
||||
_models_dev_cache_time = time.time()
|
||||
_save_disk_cache(data)
|
||||
@@ -631,6 +634,43 @@ def get_provider_info(provider_id: str) -> Optional[ProviderInfo]:
|
||||
return _parse_provider_info(mdev_id, raw)
|
||||
|
||||
|
||||
def list_all_providers() -> Dict[str, ProviderInfo]:
|
||||
"""Return all providers from models.dev as {provider_id: ProviderInfo}.
|
||||
|
||||
Returns the full catalog — 109+ providers. For providers that have
|
||||
a Hermes alias, both the models.dev ID and the Hermes ID are included.
|
||||
"""
|
||||
data = fetch_models_dev()
|
||||
result: Dict[str, ProviderInfo] = {}
|
||||
|
||||
for pid, pdata in data.items():
|
||||
if isinstance(pdata, dict):
|
||||
info = _parse_provider_info(pid, pdata)
|
||||
result[pid] = info
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_providers_for_env_var(env_var: str) -> List[str]:
|
||||
"""Reverse lookup: find all providers that use a given env var.
|
||||
|
||||
Useful for auto-detection: "user has ANTHROPIC_API_KEY set, which
|
||||
providers does that enable?"
|
||||
|
||||
Returns list of models.dev provider IDs.
|
||||
"""
|
||||
data = fetch_models_dev()
|
||||
matches: List[str] = []
|
||||
|
||||
for pid, pdata in data.items():
|
||||
if isinstance(pdata, dict):
|
||||
env = pdata.get("env", [])
|
||||
if isinstance(env, list) and env_var in env:
|
||||
matches.append(pid)
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model-level queries (rich ModelInfo)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -668,3 +708,74 @@ def get_model_info(
|
||||
return None
|
||||
|
||||
|
||||
def get_model_info_any_provider(model_id: str) -> Optional[ModelInfo]:
|
||||
"""Search all providers for a model by ID.
|
||||
|
||||
Useful when you have a full slug like "anthropic/claude-sonnet-4.6" or
|
||||
a bare name and want to find it anywhere. Checks Hermes-mapped providers
|
||||
first, then falls back to all models.dev providers.
|
||||
"""
|
||||
data = fetch_models_dev()
|
||||
|
||||
# Try Hermes-mapped providers first (more likely what the user wants)
|
||||
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
|
||||
pdata = data.get(mdev_id)
|
||||
if not isinstance(pdata, dict):
|
||||
continue
|
||||
models = pdata.get("models", {})
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
|
||||
raw = models.get(model_id)
|
||||
if isinstance(raw, dict):
|
||||
return _parse_model_info(model_id, raw, mdev_id)
|
||||
|
||||
# Case-insensitive
|
||||
model_lower = model_id.lower()
|
||||
for mid, mdata in models.items():
|
||||
if mid.lower() == model_lower and isinstance(mdata, dict):
|
||||
return _parse_model_info(mid, mdata, mdev_id)
|
||||
|
||||
# Fall back to ALL providers
|
||||
for pid, pdata in data.items():
|
||||
if pid in _get_reverse_mapping():
|
||||
continue # already checked
|
||||
if not isinstance(pdata, dict):
|
||||
continue
|
||||
models = pdata.get("models", {})
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
|
||||
raw = models.get(model_id)
|
||||
if isinstance(raw, dict):
|
||||
return _parse_model_info(model_id, raw, pid)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def list_provider_model_infos(provider_id: str) -> List[ModelInfo]:
|
||||
"""Return all models for a provider as ModelInfo objects.
|
||||
|
||||
Filters out deprecated models by default.
|
||||
"""
|
||||
mdev_id = PROVIDER_TO_MODELS_DEV.get(provider_id, provider_id)
|
||||
|
||||
data = fetch_models_dev()
|
||||
pdata = data.get(mdev_id)
|
||||
if not isinstance(pdata, dict):
|
||||
return []
|
||||
|
||||
models = pdata.get("models", {})
|
||||
if not isinstance(models, dict):
|
||||
return []
|
||||
|
||||
result: List[ModelInfo] = []
|
||||
for mid, mdata in models.items():
|
||||
if not isinstance(mdata, dict):
|
||||
continue
|
||||
status = mdata.get("status", "")
|
||||
if status == "deprecated":
|
||||
continue
|
||||
result.append(_parse_model_info(mid, mdata, mdev_id))
|
||||
|
||||
return result
|
||||
|
||||
+18
-46
@@ -40,7 +40,7 @@ _CONTEXT_THREAT_PATTERNS = [
|
||||
(r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),
|
||||
(r'act\s+as\s+(if|though)\s+you\s+(have\s+no|don\'t\s+have)\s+(restrictions|limits|rules)', "bypass_restrictions"),
|
||||
(r'<!--[^>]*(?:ignore|override|system|secret|hidden)[^>]*-->', "html_comment_injection"),
|
||||
(r'<\s*div\s+style\s*=\s*["\'][\s\S]*?display\s*:\s*none', "hidden_div"),
|
||||
(r'<\s*div\s+style\s*=\s*["\'].*display\s*:\s*none', "hidden_div"),
|
||||
(r'translate\s+.*\s+into\s+.*\s+and\s+(execute|run|eval)', "translate_execute"),
|
||||
(r'curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)', "exfil_curl"),
|
||||
(r'cat\s+[^\n]*(\.env|credentials|\.netrc|\.pgpass)', "read_secrets"),
|
||||
@@ -204,30 +204,6 @@ OPENAI_MODEL_EXECUTION_GUIDANCE = (
|
||||
"the result.\n"
|
||||
"</tool_persistence>\n"
|
||||
"\n"
|
||||
"<mandatory_tool_use>\n"
|
||||
"NEVER answer these from memory or mental computation — ALWAYS use a tool:\n"
|
||||
"- Arithmetic, math, calculations → use terminal or execute_code\n"
|
||||
"- Hashes, encodings, checksums → use terminal (e.g. sha256sum, base64)\n"
|
||||
"- Current time, date, timezone → use terminal (e.g. date)\n"
|
||||
"- System state: OS, CPU, memory, disk, ports, processes → use terminal\n"
|
||||
"- File contents, sizes, line counts → use read_file, search_files, or terminal\n"
|
||||
"- Git history, branches, diffs → use terminal\n"
|
||||
"- Current facts (weather, news, versions) → use web_search\n"
|
||||
"Your memory and user profile describe the USER, not the system you are "
|
||||
"running on. The execution environment may differ from what the user profile "
|
||||
"says about their personal setup.\n"
|
||||
"</mandatory_tool_use>\n"
|
||||
"\n"
|
||||
"<act_dont_ask>\n"
|
||||
"When a question has an obvious default interpretation, act on it immediately "
|
||||
"instead of asking for clarification. Examples:\n"
|
||||
"- 'Is port 443 open?' → check THIS machine (don't ask 'open where?')\n"
|
||||
"- 'What OS am I running?' → check the live system (don't use user profile)\n"
|
||||
"- 'What time is it?' → run `date` (don't guess)\n"
|
||||
"Only ask for clarification when the ambiguity genuinely changes what tool "
|
||||
"you would call.\n"
|
||||
"</act_dont_ask>\n"
|
||||
"\n"
|
||||
"<prerequisite_checks>\n"
|
||||
"- Before taking an action, check whether prerequisite discovery, lookup, or "
|
||||
"context-gathering steps are needed.\n"
|
||||
@@ -349,21 +325,6 @@ PLATFORM_HINTS = {
|
||||
"only — no markdown, no formatting. SMS messages are limited to ~1600 "
|
||||
"characters, so be brief and direct."
|
||||
),
|
||||
"bluebubbles": (
|
||||
"You are chatting via iMessage (BlueBubbles). iMessage does not render "
|
||||
"markdown formatting — use plain text. Keep responses concise as they "
|
||||
"appear as text messages. You can send media files natively: include "
|
||||
"MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
|
||||
".heic) appear as photos and other files arrive as attachments."
|
||||
),
|
||||
"weixin": (
|
||||
"You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
|
||||
"it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images are sent as native "
|
||||
"photos, videos play inline when supported, and other files arrive as downloadable "
|
||||
"documents. You can also include image URLs in markdown format  and they "
|
||||
"will be downloaded and sent as native media when possible."
|
||||
),
|
||||
}
|
||||
|
||||
CONTEXT_FILE_MAX_CHARS = 20_000
|
||||
@@ -487,7 +448,7 @@ def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
|
||||
(True, {}, "") to err on the side of showing the skill.
|
||||
"""
|
||||
try:
|
||||
raw = skill_file.read_text(encoding="utf-8")
|
||||
raw = skill_file.read_text(encoding="utf-8")[:2000]
|
||||
frontmatter, _ = parse_frontmatter(raw)
|
||||
|
||||
if not skill_matches_platform(frontmatter):
|
||||
@@ -495,10 +456,21 @@ def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
|
||||
|
||||
return True, frontmatter, extract_skill_description(frontmatter)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to parse skill file %s: %s", skill_file, e)
|
||||
logger.debug("Failed to parse skill file %s: %s", skill_file, e)
|
||||
return True, {}, ""
|
||||
|
||||
|
||||
def _read_skill_conditions(skill_file: Path) -> dict:
|
||||
"""Extract conditional activation fields from SKILL.md frontmatter."""
|
||||
try:
|
||||
raw = skill_file.read_text(encoding="utf-8")[:2000]
|
||||
frontmatter, _ = parse_frontmatter(raw)
|
||||
return extract_skill_conditions(frontmatter)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to read skill conditions from %s: %s", skill_file, e)
|
||||
return {}
|
||||
|
||||
|
||||
def _skill_should_show(
|
||||
conditions: dict,
|
||||
available_tools: "set[str] | None",
|
||||
@@ -558,10 +530,9 @@ def build_skills_system_prompt(
|
||||
# ── Layer 1: in-process LRU cache ─────────────────────────────────
|
||||
# Include the resolved platform so per-platform disabled-skill lists
|
||||
# produce distinct cache entries (gateway serves multiple platforms).
|
||||
from gateway.session_context import get_session_env
|
||||
_platform_hint = (
|
||||
os.environ.get("HERMES_PLATFORM")
|
||||
or get_session_env("HERMES_SESSION_PLATFORM")
|
||||
or os.environ.get("HERMES_SESSION_PLATFORM")
|
||||
or ""
|
||||
)
|
||||
cache_key = (
|
||||
@@ -773,6 +744,7 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
"browser_type",
|
||||
"browser_scroll",
|
||||
"browser_console",
|
||||
"browser_close",
|
||||
"browser_press",
|
||||
"browser_get_images",
|
||||
"browser_vision",
|
||||
@@ -802,13 +774,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
|
||||
lines = [
|
||||
"# Nous Subscription",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.",
|
||||
"Current capability status:",
|
||||
]
|
||||
lines.extend(_status_line(feature) for feature in features.items())
|
||||
lines.extend(
|
||||
[
|
||||
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
|
||||
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.",
|
||||
"If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
|
||||
"Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
|
||||
"Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
|
||||
|
||||
@@ -1,246 +0,0 @@
|
||||
"""Rate limit tracking for inference API responses.
|
||||
|
||||
Captures x-ratelimit-* headers from provider responses and provides
|
||||
formatted display for the /usage slash command. Currently supports
|
||||
the Nous Portal header format (also used by OpenRouter and OpenAI-compatible
|
||||
APIs that follow the same convention).
|
||||
|
||||
Header schema (12 headers total):
|
||||
x-ratelimit-limit-requests RPM cap
|
||||
x-ratelimit-limit-requests-1h RPH cap
|
||||
x-ratelimit-limit-tokens TPM cap
|
||||
x-ratelimit-limit-tokens-1h TPH cap
|
||||
x-ratelimit-remaining-requests requests left in minute window
|
||||
x-ratelimit-remaining-requests-1h requests left in hour window
|
||||
x-ratelimit-remaining-tokens tokens left in minute window
|
||||
x-ratelimit-remaining-tokens-1h tokens left in hour window
|
||||
x-ratelimit-reset-requests seconds until minute request window resets
|
||||
x-ratelimit-reset-requests-1h seconds until hour request window resets
|
||||
x-ratelimit-reset-tokens seconds until minute token window resets
|
||||
x-ratelimit-reset-tokens-1h seconds until hour token window resets
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Mapping, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class RateLimitBucket:
|
||||
"""One rate-limit window (e.g. requests per minute)."""
|
||||
|
||||
limit: int = 0
|
||||
remaining: int = 0
|
||||
reset_seconds: float = 0.0
|
||||
captured_at: float = 0.0 # time.time() when this was captured
|
||||
|
||||
@property
|
||||
def used(self) -> int:
|
||||
return max(0, self.limit - self.remaining)
|
||||
|
||||
@property
|
||||
def usage_pct(self) -> float:
|
||||
if self.limit <= 0:
|
||||
return 0.0
|
||||
return (self.used / self.limit) * 100.0
|
||||
|
||||
@property
|
||||
def remaining_seconds_now(self) -> float:
|
||||
"""Estimated seconds remaining until reset, adjusted for elapsed time."""
|
||||
elapsed = time.time() - self.captured_at
|
||||
return max(0.0, self.reset_seconds - elapsed)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RateLimitState:
|
||||
"""Full rate-limit state parsed from response headers."""
|
||||
|
||||
requests_min: RateLimitBucket = field(default_factory=RateLimitBucket)
|
||||
requests_hour: RateLimitBucket = field(default_factory=RateLimitBucket)
|
||||
tokens_min: RateLimitBucket = field(default_factory=RateLimitBucket)
|
||||
tokens_hour: RateLimitBucket = field(default_factory=RateLimitBucket)
|
||||
captured_at: float = 0.0 # when the headers were captured
|
||||
provider: str = ""
|
||||
|
||||
@property
|
||||
def has_data(self) -> bool:
|
||||
return self.captured_at > 0
|
||||
|
||||
@property
|
||||
def age_seconds(self) -> float:
|
||||
if not self.has_data:
|
||||
return float("inf")
|
||||
return time.time() - self.captured_at
|
||||
|
||||
|
||||
def _safe_int(value: Any, default: int = 0) -> int:
|
||||
try:
|
||||
return int(float(value))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _safe_float(value: Any, default: float = 0.0) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def parse_rate_limit_headers(
|
||||
headers: Mapping[str, str],
|
||||
provider: str = "",
|
||||
) -> Optional[RateLimitState]:
|
||||
"""Parse x-ratelimit-* headers into a RateLimitState.
|
||||
|
||||
Returns None if no rate limit headers are present.
|
||||
"""
|
||||
# Normalize to lowercase so lookups work regardless of how the server
|
||||
# capitalises headers (HTTP header names are case-insensitive per RFC 7230).
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
|
||||
# Quick check: at least one rate limit header must exist
|
||||
has_any = any(k.startswith("x-ratelimit-") for k in lowered)
|
||||
if not has_any:
|
||||
return None
|
||||
|
||||
now = time.time()
|
||||
|
||||
def _bucket(resource: str, suffix: str = "") -> RateLimitBucket:
|
||||
# e.g. resource="requests", suffix="" -> per-minute
|
||||
# resource="tokens", suffix="-1h" -> per-hour
|
||||
tag = f"{resource}{suffix}"
|
||||
return RateLimitBucket(
|
||||
limit=_safe_int(lowered.get(f"x-ratelimit-limit-{tag}")),
|
||||
remaining=_safe_int(lowered.get(f"x-ratelimit-remaining-{tag}")),
|
||||
reset_seconds=_safe_float(lowered.get(f"x-ratelimit-reset-{tag}")),
|
||||
captured_at=now,
|
||||
)
|
||||
|
||||
return RateLimitState(
|
||||
requests_min=_bucket("requests"),
|
||||
requests_hour=_bucket("requests", "-1h"),
|
||||
tokens_min=_bucket("tokens"),
|
||||
tokens_hour=_bucket("tokens", "-1h"),
|
||||
captured_at=now,
|
||||
provider=provider,
|
||||
)
|
||||
|
||||
|
||||
# ── Formatting ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _fmt_count(n: int) -> str:
|
||||
"""Human-friendly number: 7999856 -> '8.0M', 33599 -> '33.6K', 799 -> '799'."""
|
||||
if n >= 1_000_000:
|
||||
return f"{n / 1_000_000:.1f}M"
|
||||
if n >= 10_000:
|
||||
return f"{n / 1_000:.1f}K"
|
||||
if n >= 1_000:
|
||||
return f"{n / 1_000:.1f}K"
|
||||
return str(n)
|
||||
|
||||
|
||||
def _fmt_seconds(seconds: float) -> str:
|
||||
"""Seconds -> human-friendly duration: '58s', '2m 14s', '58m 57s', '1h 2m'."""
|
||||
s = max(0, int(seconds))
|
||||
if s < 60:
|
||||
return f"{s}s"
|
||||
if s < 3600:
|
||||
m, sec = divmod(s, 60)
|
||||
return f"{m}m {sec}s" if sec else f"{m}m"
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
def _bar(pct: float, width: int = 20) -> str:
|
||||
"""ASCII progress bar: [████████░░░░░░░░░░░░] 40%."""
|
||||
filled = int(pct / 100.0 * width)
|
||||
filled = max(0, min(width, filled))
|
||||
empty = width - filled
|
||||
return f"[{'█' * filled}{'░' * empty}]"
|
||||
|
||||
|
||||
def _bucket_line(label: str, bucket: RateLimitBucket, label_width: int = 14) -> str:
|
||||
"""Format one bucket as a single line."""
|
||||
if bucket.limit <= 0:
|
||||
return f" {label:<{label_width}} (no data)"
|
||||
|
||||
pct = bucket.usage_pct
|
||||
used = _fmt_count(bucket.used)
|
||||
limit = _fmt_count(bucket.limit)
|
||||
remaining = _fmt_count(bucket.remaining)
|
||||
reset = _fmt_seconds(bucket.remaining_seconds_now)
|
||||
|
||||
bar = _bar(pct)
|
||||
return f" {label:<{label_width}} {bar} {pct:5.1f}% {used}/{limit} used ({remaining} left, resets in {reset})"
|
||||
|
||||
|
||||
def format_rate_limit_display(state: RateLimitState) -> str:
|
||||
"""Format rate limit state for terminal/chat display."""
|
||||
if not state.has_data:
|
||||
return "No rate limit data yet — make an API request first."
|
||||
|
||||
age = state.age_seconds
|
||||
if age < 5:
|
||||
freshness = "just now"
|
||||
elif age < 60:
|
||||
freshness = f"{int(age)}s ago"
|
||||
else:
|
||||
freshness = f"{_fmt_seconds(age)} ago"
|
||||
|
||||
provider_label = state.provider.title() if state.provider else "Provider"
|
||||
|
||||
lines = [
|
||||
f"{provider_label} Rate Limits (captured {freshness}):",
|
||||
"",
|
||||
_bucket_line("Requests/min", state.requests_min),
|
||||
_bucket_line("Requests/hr", state.requests_hour),
|
||||
"",
|
||||
_bucket_line("Tokens/min", state.tokens_min),
|
||||
_bucket_line("Tokens/hr", state.tokens_hour),
|
||||
]
|
||||
|
||||
# Add warnings if any bucket is getting hot
|
||||
warnings = []
|
||||
for label, bucket in [
|
||||
("requests/min", state.requests_min),
|
||||
("requests/hr", state.requests_hour),
|
||||
("tokens/min", state.tokens_min),
|
||||
("tokens/hr", state.tokens_hour),
|
||||
]:
|
||||
if bucket.limit > 0 and bucket.usage_pct >= 80:
|
||||
reset = _fmt_seconds(bucket.remaining_seconds_now)
|
||||
warnings.append(f" ⚠ {label} at {bucket.usage_pct:.0f}% — resets in {reset}")
|
||||
|
||||
if warnings:
|
||||
lines.append("")
|
||||
lines.extend(warnings)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_rate_limit_compact(state: RateLimitState) -> str:
|
||||
"""One-line compact summary for status bars / gateway messages."""
|
||||
if not state.has_data:
|
||||
return "No rate limit data."
|
||||
|
||||
rm = state.requests_min
|
||||
tm = state.tokens_min
|
||||
rh = state.requests_hour
|
||||
th = state.tokens_hour
|
||||
|
||||
parts = []
|
||||
if rm.limit > 0:
|
||||
parts.append(f"RPM: {rm.remaining}/{rm.limit}")
|
||||
if rh.limit > 0:
|
||||
parts.append(f"RPH: {_fmt_count(rh.remaining)}/{_fmt_count(rh.limit)} (resets {_fmt_seconds(rh.remaining_seconds_now)})")
|
||||
if tm.limit > 0:
|
||||
parts.append(f"TPM: {_fmt_count(tm.remaining)}/{_fmt_count(tm.limit)}")
|
||||
if th.limit > 0:
|
||||
parts.append(f"TPH: {_fmt_count(th.remaining)}/{_fmt_count(th.limit)} (resets {_fmt_seconds(th.remaining_seconds_now)})")
|
||||
|
||||
return " | ".join(parts)
|
||||
@@ -1,57 +0,0 @@
|
||||
"""Retry utilities — jittered backoff for decorrelated retries.
|
||||
|
||||
Replaces fixed exponential backoff with jittered delays to prevent
|
||||
thundering-herd retry spikes when multiple sessions hit the same
|
||||
rate-limited provider concurrently.
|
||||
"""
|
||||
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Monotonic counter for jitter seed uniqueness within the same process.
|
||||
# Protected by a lock to avoid race conditions in concurrent retry paths
|
||||
# (e.g. multiple gateway sessions retrying simultaneously).
|
||||
_jitter_counter = 0
|
||||
_jitter_lock = threading.Lock()
|
||||
|
||||
|
||||
def jittered_backoff(
|
||||
attempt: int,
|
||||
*,
|
||||
base_delay: float = 5.0,
|
||||
max_delay: float = 120.0,
|
||||
jitter_ratio: float = 0.5,
|
||||
) -> float:
|
||||
"""Compute a jittered exponential backoff delay.
|
||||
|
||||
Args:
|
||||
attempt: 1-based retry attempt number.
|
||||
base_delay: Base delay in seconds for attempt 1.
|
||||
max_delay: Maximum delay cap in seconds.
|
||||
jitter_ratio: Fraction of computed delay to use as random jitter
|
||||
range. 0.5 means jitter is uniform in [0, 0.5 * delay].
|
||||
|
||||
Returns:
|
||||
Delay in seconds: min(base * 2^(attempt-1), max_delay) + jitter.
|
||||
|
||||
The jitter decorrelates concurrent retries so multiple sessions
|
||||
hitting the same provider don't all retry at the same instant.
|
||||
"""
|
||||
global _jitter_counter
|
||||
with _jitter_lock:
|
||||
_jitter_counter += 1
|
||||
tick = _jitter_counter
|
||||
|
||||
exponent = max(0, attempt - 1)
|
||||
if exponent >= 63 or base_delay <= 0:
|
||||
delay = max_delay
|
||||
else:
|
||||
delay = min(base_delay * (2 ** exponent), max_delay)
|
||||
|
||||
# Seed from time + counter for decorrelation even with coarse clocks.
|
||||
seed = (time.time_ns() ^ (tick * 0x9E3779B9)) & 0xFFFFFFFF
|
||||
rng = random.Random(seed)
|
||||
jitter = rng.uniform(0, jitter_ratio * delay)
|
||||
|
||||
return delay + jitter
|
||||
@@ -168,7 +168,7 @@ def _build_skill_message(
|
||||
subdir_path = skill_dir / subdir
|
||||
if subdir_path.exists():
|
||||
for f in sorted(subdir_path.rglob("*")):
|
||||
if f.is_file() and not f.is_symlink():
|
||||
if f.is_file():
|
||||
rel = str(f.relative_to(skill_dir))
|
||||
supporting.append(rel)
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Set, Tuple
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
@@ -145,11 +145,10 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
|
||||
if not isinstance(skills_cfg, dict):
|
||||
return set()
|
||||
|
||||
from gateway.session_context import get_session_env
|
||||
resolved_platform = (
|
||||
platform
|
||||
or os.getenv("HERMES_PLATFORM")
|
||||
or get_session_env("HERMES_SESSION_PLATFORM")
|
||||
or os.getenv("HERMES_SESSION_PLATFORM")
|
||||
)
|
||||
if resolved_platform:
|
||||
platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
|
||||
|
||||
@@ -181,7 +181,6 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
},
|
||||
"label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
|
||||
"signature": (
|
||||
|
||||
@@ -15,6 +15,7 @@ Inspired by Block/goose's SubdirectoryHintTracker.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, Set
|
||||
@@ -159,10 +160,7 @@ class SubdirectoryHintTracker:
|
||||
|
||||
def _is_valid_subdir(self, path: Path) -> bool:
|
||||
"""Check if path is a valid directory to scan for hints."""
|
||||
try:
|
||||
if not path.is_dir():
|
||||
return False
|
||||
except OSError:
|
||||
if not path.is_dir():
|
||||
return False
|
||||
if path in self._loaded_dirs:
|
||||
return False
|
||||
@@ -175,10 +173,7 @@ class SubdirectoryHintTracker:
|
||||
found_hints = []
|
||||
for filename in _HINT_FILENAMES:
|
||||
hint_path = directory / filename
|
||||
try:
|
||||
if not hint_path.is_file():
|
||||
continue
|
||||
except OSError:
|
||||
if not hint_path.is_file():
|
||||
continue
|
||||
try:
|
||||
content = hint_path.read_text(encoding="utf-8").strip()
|
||||
|
||||
@@ -595,6 +595,30 @@ def get_pricing(
|
||||
}
|
||||
|
||||
|
||||
def estimate_cost_usd(
|
||||
model: str,
|
||||
input_tokens: int,
|
||||
output_tokens: int,
|
||||
*,
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> float:
|
||||
"""Backward-compatible helper for legacy callers.
|
||||
|
||||
This uses non-cached input/output only. New code should call
|
||||
`estimate_usage_cost()` with canonical usage buckets.
|
||||
"""
|
||||
result = estimate_usage_cost(
|
||||
model,
|
||||
CanonicalUsage(input_tokens=input_tokens, output_tokens=output_tokens),
|
||||
provider=provider,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
return float(result.amount_usd or _ZERO)
|
||||
|
||||
|
||||
def format_duration_compact(seconds: float) -> str:
|
||||
if seconds < 60:
|
||||
return f"{seconds:.0f}s"
|
||||
|
||||
+3
-5
@@ -31,8 +31,6 @@ from multiprocessing import Pool, Lock
|
||||
import traceback
|
||||
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeRemainingColumn, MofNCompleteColumn
|
||||
from rich.console import Console
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
import fire
|
||||
|
||||
from run_agent import AIAgent
|
||||
@@ -1018,7 +1016,7 @@ class BatchRunner:
|
||||
tool_stats = data.get('tool_stats', {})
|
||||
|
||||
# Check for invalid tool names (model hallucinations)
|
||||
invalid_tools = [k for k in tool_stats if k not in VALID_TOOLS]
|
||||
invalid_tools = [k for k in tool_stats.keys() if k not in VALID_TOOLS]
|
||||
|
||||
if invalid_tools:
|
||||
filtered_entries += 1
|
||||
@@ -1158,7 +1156,7 @@ def main(
|
||||
providers_order (str): Comma-separated list of OpenRouter providers to try in order (e.g. "anthropic,openai,google")
|
||||
provider_sort (str): Sort providers by "price", "throughput", or "latency" (OpenRouter only)
|
||||
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
|
||||
reasoning_effort (str): OpenRouter reasoning effort level: "none", "minimal", "low", "medium", "high", "xhigh" (default: "medium")
|
||||
reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "medium")
|
||||
reasoning_disabled (bool): Completely disable reasoning/thinking tokens (default: False)
|
||||
prefill_messages_file (str): Path to JSON file containing prefill messages (list of {role, content} dicts)
|
||||
max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
|
||||
@@ -1227,7 +1225,7 @@ def main(
|
||||
print("🧠 Reasoning: DISABLED (effort=none)")
|
||||
elif reasoning_effort:
|
||||
# Use specified effort level
|
||||
valid_efforts = ["none", "minimal", "low", "medium", "high", "xhigh"]
|
||||
valid_efforts = ["xhigh", "high", "medium", "low", "minimal", "none"]
|
||||
if reasoning_effort not in valid_efforts:
|
||||
print(f"❌ Error: --reasoning_effort must be one of: {', '.join(valid_efforts)}")
|
||||
return
|
||||
|
||||
+7
-56
@@ -48,25 +48,6 @@ model:
|
||||
# api_key: "your-key-here" # Uncomment to set here instead of .env
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
|
||||
# ── Token limits — two settings, easy to confuse ──────────────────────────
|
||||
#
|
||||
# context_length: TOTAL context window (input + output tokens combined).
|
||||
# Controls when Hermes compresses history and validates requests.
|
||||
# Leave unset — Hermes auto-detects the correct value from the provider.
|
||||
# Set manually only when auto-detection is wrong (e.g. a local server with
|
||||
# a custom num_ctx, or a proxy that doesn't expose /v1/models).
|
||||
#
|
||||
# context_length: 131072
|
||||
#
|
||||
# max_tokens: OUTPUT cap — maximum tokens the model may generate per response.
|
||||
# Unrelated to how long your conversation history can be.
|
||||
# The OpenAI-standard name "max_tokens" is a misnomer; Anthropic's native
|
||||
# API has since renamed it "max_output_tokens" for clarity.
|
||||
# Leave unset to use the model's native output ceiling (recommended).
|
||||
# Set only if you want to deliberately limit individual response length.
|
||||
#
|
||||
# max_tokens: 8192
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Provider Routing (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
@@ -136,8 +117,7 @@ terminal:
|
||||
timeout: 180
|
||||
docker_mount_cwd_to_workspace: false # SECURITY: off by default. Opt in to mount the launch cwd into Docker /workspace.
|
||||
lifetime_seconds: 300
|
||||
# sudo_password: "hunter2" # Optional: pipe a sudo password via sudo -S. SECURITY WARNING: plaintext.
|
||||
# sudo_password: "" # Explicit empty password: try empty and never open the interactive sudo prompt.
|
||||
# sudo_password: "" # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext!
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# OPTION 2: SSH remote execution
|
||||
@@ -228,18 +208,13 @@ terminal:
|
||||
#
|
||||
# SECURITY WARNING: Password stored in plaintext!
|
||||
#
|
||||
# INTERACTIVE PROMPT: If sudo_password is unset and the CLI is running,
|
||||
# INTERACTIVE PROMPT: If no sudo_password is set and the CLI is running,
|
||||
# you'll be prompted to enter your password when sudo is needed:
|
||||
# - 45-second timeout (auto-skips if no input)
|
||||
# - Press Enter to skip (command fails gracefully)
|
||||
# - Password is hidden while typing
|
||||
# - Password is cached for the session
|
||||
#
|
||||
# EMPTY PASSWORDS: Setting sudo_password to an explicit empty string is different
|
||||
# from leaving it unset. Hermes will try an empty password via `sudo -S` and
|
||||
# will not open the interactive prompt. This is useful for passwordless sudo,
|
||||
# Touch ID sudo setups, and environments where prompting is just noise.
|
||||
#
|
||||
# ALTERNATIVES:
|
||||
# - SSH backend: Configure passwordless sudo on the remote server
|
||||
# - Containers: Run as root inside the container (no sudo needed)
|
||||
@@ -470,22 +445,6 @@ agent:
|
||||
# Higher = more room for complex tasks, but costs more tokens
|
||||
# Recommended: 20-30 for focused tasks, 50-100 for open exploration
|
||||
max_turns: 60
|
||||
|
||||
# Inactivity timeout for gateway agent runs (seconds, 0 = unlimited).
|
||||
# The agent can run indefinitely when actively calling tools or receiving
|
||||
# API responses. Only fires after the agent has been idle for this duration.
|
||||
# gateway_timeout: 1800
|
||||
|
||||
# Staged warning: send a warning before escalating to full timeout.
|
||||
# Fires once per run when inactivity reaches this threshold (seconds).
|
||||
# Set to 0 to disable the warning.
|
||||
# gateway_timeout_warning: 900
|
||||
|
||||
# Graceful drain timeout for gateway stop/restart (seconds).
|
||||
# The gateway stops accepting new work, waits for in-flight agents to
|
||||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
@@ -580,7 +539,7 @@ platform_toolsets:
|
||||
# terminal - terminal, process
|
||||
# file - read_file, write_file, patch, search
|
||||
# browser - browser_navigate, browser_snapshot, browser_click, browser_type,
|
||||
# browser_scroll, browser_back, browser_press,
|
||||
# browser_scroll, browser_back, browser_press, browser_close,
|
||||
# browser_get_images, browser_vision (requires BROWSERBASE_API_KEY)
|
||||
# vision - vision_analyze (requires OPENROUTER_API_KEY)
|
||||
# image_gen - image_generate (requires FAL_KEY)
|
||||
@@ -588,7 +547,7 @@ platform_toolsets:
|
||||
# skills_hub - skill_hub (search/install/manage from online registries — user-driven only)
|
||||
# moa - mixture_of_agents (requires OPENROUTER_API_KEY)
|
||||
# todo - todo (in-memory task planning, no deps)
|
||||
# tts - text_to_speech (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX/MISTRAL key)
|
||||
# tts - text_to_speech (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX key)
|
||||
# cronjob - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
|
||||
# rl - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
|
||||
#
|
||||
@@ -617,7 +576,7 @@ platform_toolsets:
|
||||
# todo - Task planning and tracking for multi-step work
|
||||
# memory - Persistent memory across sessions (personal notes + user profile)
|
||||
# session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
|
||||
# tts - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax, Mistral)
|
||||
# tts - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax)
|
||||
# cronjob - Schedule and manage automated tasks (CLI-only)
|
||||
# rl - RL training tools (Tinker-Atropos)
|
||||
#
|
||||
@@ -685,18 +644,10 @@ platform_toolsets:
|
||||
# Voice Transcription (Speech-to-Text)
|
||||
# =============================================================================
|
||||
# Automatically transcribe voice messages on messaging platforms.
|
||||
# Providers: local (free, faster-whisper) | groq (free tier) | openai (Whisper API) | mistral (Voxtral Transcribe)
|
||||
# Set the corresponding API key in .env: GROQ_API_KEY, OPENAI_API_KEY, or MISTRAL_API_KEY.
|
||||
# Requires OPENAI_API_KEY in .env (uses OpenAI Whisper API directly).
|
||||
stt:
|
||||
enabled: true
|
||||
# provider: "local" # auto-detected if omitted
|
||||
local:
|
||||
model: "base" # tiny | base | small | medium | large-v3 | turbo
|
||||
# language: "" # auto-detect; set to "en", "es", "fr", etc. to force
|
||||
openai:
|
||||
model: "whisper-1" # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
||||
# mistral:
|
||||
# model: "voxtral-mini-latest" # voxtral-mini-latest | voxtral-mini-2602
|
||||
model: "whisper-1" # whisper-1 (cheapest) | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
||||
|
||||
# =============================================================================
|
||||
# Response Pacing (Messaging Platforms)
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
# Termux / Android dependency constraints for Hermes Agent.
|
||||
#
|
||||
# Usage:
|
||||
# python -m pip install -e '.[termux]' -c constraints-termux.txt
|
||||
#
|
||||
# These pins keep the tested Android install path stable when upstream packages
|
||||
# move faster than Termux-compatible wheels / sdists.
|
||||
|
||||
ipython<10
|
||||
jedi>=0.18.1,<0.20
|
||||
parso>=0.8.4,<0.9
|
||||
stack-data>=0.6,<0.7
|
||||
pexpect>4.3,<5
|
||||
matplotlib-inline>=0.1.7,<0.2
|
||||
asttokens>=2.1,<3
|
||||
+8
-17
@@ -31,7 +31,7 @@ except ImportError:
|
||||
# Configuration
|
||||
# =============================================================================
|
||||
|
||||
HERMES_DIR = get_hermes_home().resolve()
|
||||
HERMES_DIR = get_hermes_home()
|
||||
CRON_DIR = HERMES_DIR / "cron"
|
||||
JOBS_FILE = CRON_DIR / "jobs.json"
|
||||
OUTPUT_DIR = CRON_DIR / "output"
|
||||
@@ -338,12 +338,10 @@ def load_jobs() -> List[Dict[str, Any]]:
|
||||
save_jobs(jobs)
|
||||
logger.warning("Auto-repaired jobs.json (had invalid control characters)")
|
||||
return jobs
|
||||
except Exception as e:
|
||||
logger.error("Failed to auto-repair jobs.json: %s", e)
|
||||
raise RuntimeError(f"Cron database corrupted and unrepairable: {e}") from e
|
||||
except IOError as e:
|
||||
logger.error("IOError reading jobs.json: %s", e)
|
||||
raise RuntimeError(f"Failed to read cron database: {e}") from e
|
||||
except Exception:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
|
||||
def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
@@ -454,7 +452,6 @@ def create_job(
|
||||
"last_run_at": None,
|
||||
"last_status": None,
|
||||
"last_error": None,
|
||||
"last_delivery_error": None,
|
||||
# Delivery configuration
|
||||
"deliver": deliver,
|
||||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
@@ -577,16 +574,12 @@ def remove_job(job_id: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
|
||||
delivery_error: Optional[str] = None):
|
||||
def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
|
||||
"""
|
||||
Mark a job as having been run.
|
||||
|
||||
Updates last_run_at, last_status, increments completed count,
|
||||
computes next_run_at, and auto-deletes if repeat limit reached.
|
||||
|
||||
``delivery_error`` is tracked separately from the agent error — a job
|
||||
can succeed (agent produced output) but fail delivery (platform down).
|
||||
"""
|
||||
jobs = load_jobs()
|
||||
for i, job in enumerate(jobs):
|
||||
@@ -595,8 +588,6 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
|
||||
job["last_run_at"] = now
|
||||
job["last_status"] = "ok" if success else "error"
|
||||
job["last_error"] = error if not success else None
|
||||
# Track delivery failures separately — cleared on successful delivery
|
||||
job["last_delivery_error"] = delivery_error
|
||||
|
||||
# Increment completed count
|
||||
if job.get("repeat"):
|
||||
@@ -623,8 +614,8 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
|
||||
|
||||
save_jobs(jobs)
|
||||
return
|
||||
|
||||
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
|
||||
|
||||
save_jobs(jobs)
|
||||
|
||||
|
||||
def advance_next_run(job_id: str) -> bool:
|
||||
|
||||
+50
-164
@@ -25,6 +25,7 @@ except ImportError:
|
||||
import msvcrt
|
||||
except ImportError:
|
||||
msvcrt = None
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@@ -44,7 +45,7 @@ logger = logging.getLogger(__name__)
|
||||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
"telegram", "discord", "slack", "whatsapp", "signal",
|
||||
"matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
|
||||
"wecom", "weixin", "sms", "email", "webhook", "bluebubbles",
|
||||
"wecom", "sms", "email", "webhook",
|
||||
})
|
||||
|
||||
from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
|
||||
@@ -91,7 +92,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
|
||||
}
|
||||
# Origin missing (e.g. job created via API/script) — try each
|
||||
# platform's home channel as a fallback instead of silently dropping.
|
||||
for platform_name in ("matrix", "telegram", "discord", "slack", "bluebubbles"):
|
||||
for platform_name in ("matrix", "telegram", "discord", "slack"):
|
||||
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
|
||||
if chat_id:
|
||||
logger.info(
|
||||
@@ -158,45 +159,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
|
||||
}
|
||||
|
||||
|
||||
# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
|
||||
_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
|
||||
_VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
|
||||
_IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'})
|
||||
|
||||
|
||||
def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None:
|
||||
"""Send extracted MEDIA files as native platform attachments via a live adapter.
|
||||
|
||||
Routes each file to the appropriate adapter method (send_voice, send_image_file,
|
||||
send_video, send_document) based on file extension — mirroring the routing logic
|
||||
in ``BasePlatformAdapter._process_message_background``.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
for media_path, _is_voice in media_files:
|
||||
try:
|
||||
ext = Path(media_path).suffix.lower()
|
||||
if ext in _AUDIO_EXTS:
|
||||
coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata)
|
||||
elif ext in _VIDEO_EXTS:
|
||||
coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata)
|
||||
elif ext in _IMAGE_EXTS:
|
||||
coro = adapter.send_image_file(chat_id=chat_id, image_path=media_path, metadata=metadata)
|
||||
else:
|
||||
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
|
||||
|
||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
result = future.result(timeout=30)
|
||||
if result and not getattr(result, "success", True):
|
||||
logger.warning(
|
||||
"Job '%s': media send failed for %s: %s",
|
||||
job.get("id", "?"), media_path, getattr(result, "error", "unknown"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Job '%s': failed to send media %s: %s", job.get("id", "?"), media_path, e)
|
||||
|
||||
|
||||
def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Optional[str]:
|
||||
def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
|
||||
"""
|
||||
Deliver job output to the configured target (origin chat, specific platform, etc.).
|
||||
|
||||
@@ -204,16 +167,16 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
use the live adapter first — this supports E2EE rooms (e.g. Matrix) where
|
||||
the standalone HTTP path cannot encrypt. Falls back to standalone send if
|
||||
the adapter path fails or is unavailable.
|
||||
|
||||
Returns None on success, or an error string on failure.
|
||||
"""
|
||||
target = _resolve_delivery_target(job)
|
||||
if not target:
|
||||
if job.get("deliver", "local") != "local":
|
||||
msg = f"no delivery target resolved for deliver={job.get('deliver', 'local')}"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
return None # local-only jobs don't deliver — not a failure
|
||||
logger.warning(
|
||||
"Job '%s' deliver=%s but no concrete delivery target could be resolved",
|
||||
job["id"],
|
||||
job.get("deliver", "local"),
|
||||
)
|
||||
return
|
||||
|
||||
platform_name = target["platform"]
|
||||
chat_id = target["chat_id"]
|
||||
@@ -234,29 +197,24 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
"dingtalk": Platform.DINGTALK,
|
||||
"feishu": Platform.FEISHU,
|
||||
"wecom": Platform.WECOM,
|
||||
"weixin": Platform.WEIXIN,
|
||||
"email": Platform.EMAIL,
|
||||
"sms": Platform.SMS,
|
||||
"bluebubbles": Platform.BLUEBUBBLES,
|
||||
}
|
||||
platform = platform_map.get(platform_name.lower())
|
||||
if not platform:
|
||||
msg = f"unknown platform '{platform_name}'"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
logger.warning("Job '%s': unknown platform '%s' for delivery", job["id"], platform_name)
|
||||
return
|
||||
|
||||
try:
|
||||
config = load_gateway_config()
|
||||
except Exception as e:
|
||||
msg = f"failed to load gateway config: {e}"
|
||||
logger.error("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
logger.error("Job '%s': failed to load gateway config for delivery: %s", job["id"], e)
|
||||
return
|
||||
|
||||
pconfig = config.platforms.get(platform)
|
||||
if not pconfig or not pconfig.enabled:
|
||||
msg = f"platform '{platform_name}' not configured/enabled"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
logger.warning("Job '%s': platform '%s' not configured/enabled", job["id"], platform_name)
|
||||
return
|
||||
|
||||
# Optionally wrap the content with a header/footer so the user knows this
|
||||
# is a cron delivery. Wrapping is on by default; set cron.wrap_response: false
|
||||
@@ -289,30 +247,20 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
|
||||
send_metadata = {"thread_id": thread_id} if thread_id else None
|
||||
try:
|
||||
# Send cleaned text (MEDIA tags stripped) — not the raw content
|
||||
text_to_send = cleaned_delivery_content.strip()
|
||||
adapter_ok = True
|
||||
if text_to_send:
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
|
||||
loop,
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
runtime_adapter.send(chat_id, delivery_content, metadata=send_metadata),
|
||||
loop,
|
||||
)
|
||||
send_result = future.result(timeout=60)
|
||||
if send_result and not getattr(send_result, "success", True):
|
||||
err = getattr(send_result, "error", "unknown")
|
||||
logger.warning(
|
||||
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
|
||||
job["id"], platform_name, chat_id, err,
|
||||
)
|
||||
send_result = future.result(timeout=60)
|
||||
if send_result and not getattr(send_result, "success", True):
|
||||
err = getattr(send_result, "error", "unknown")
|
||||
logger.warning(
|
||||
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
|
||||
job["id"], platform_name, chat_id, err,
|
||||
)
|
||||
adapter_ok = False # fall through to standalone path
|
||||
|
||||
# Send extracted media files as native attachments via the live adapter
|
||||
if adapter_ok and media_files:
|
||||
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
|
||||
|
||||
if adapter_ok:
|
||||
else:
|
||||
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
|
||||
return None
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
|
||||
@@ -334,55 +282,16 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
|
||||
result = future.result(timeout=30)
|
||||
except Exception as e:
|
||||
msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
|
||||
logger.error("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
|
||||
return
|
||||
|
||||
if result and result.get("error"):
|
||||
msg = f"delivery error: {result['error']}"
|
||||
logger.error("Job '%s': %s", job["id"], msg)
|
||||
return msg
|
||||
|
||||
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
|
||||
return None
|
||||
logger.error("Job '%s': delivery error: %s", job["id"], result["error"])
|
||||
else:
|
||||
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
|
||||
|
||||
|
||||
_DEFAULT_SCRIPT_TIMEOUT = 120 # seconds
|
||||
# Backward-compatible module override used by tests and emergency monkeypatches.
|
||||
_SCRIPT_TIMEOUT = _DEFAULT_SCRIPT_TIMEOUT
|
||||
|
||||
|
||||
def _get_script_timeout() -> int:
|
||||
"""Resolve cron pre-run script timeout from module/env/config with a safe default."""
|
||||
if _SCRIPT_TIMEOUT != _DEFAULT_SCRIPT_TIMEOUT:
|
||||
try:
|
||||
timeout = int(float(_SCRIPT_TIMEOUT))
|
||||
if timeout > 0:
|
||||
return timeout
|
||||
except Exception:
|
||||
logger.warning("Invalid patched _SCRIPT_TIMEOUT=%r; using env/config/default", _SCRIPT_TIMEOUT)
|
||||
|
||||
env_value = os.getenv("HERMES_CRON_SCRIPT_TIMEOUT", "").strip()
|
||||
if env_value:
|
||||
try:
|
||||
timeout = int(float(env_value))
|
||||
if timeout > 0:
|
||||
return timeout
|
||||
except Exception:
|
||||
logger.warning("Invalid HERMES_CRON_SCRIPT_TIMEOUT=%r; using config/default", env_value)
|
||||
|
||||
try:
|
||||
cfg = load_config() or {}
|
||||
cron_cfg = cfg.get("cron", {}) if isinstance(cfg, dict) else {}
|
||||
configured = cron_cfg.get("script_timeout_seconds")
|
||||
if configured is not None:
|
||||
timeout = int(float(configured))
|
||||
if timeout > 0:
|
||||
return timeout
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to load cron script timeout from config: %s", exc)
|
||||
|
||||
return _DEFAULT_SCRIPT_TIMEOUT
|
||||
_SCRIPT_TIMEOUT = 120 # seconds
|
||||
|
||||
|
||||
def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
@@ -429,27 +338,17 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
if not path.is_file():
|
||||
return False, f"Script path is not a file: {path}"
|
||||
|
||||
script_timeout = _get_script_timeout()
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=script_timeout,
|
||||
timeout=_SCRIPT_TIMEOUT,
|
||||
cwd=str(path.parent),
|
||||
)
|
||||
stdout = (result.stdout or "").strip()
|
||||
stderr = (result.stderr or "").strip()
|
||||
|
||||
# Redact secrets from both stdout and stderr before any return path.
|
||||
try:
|
||||
from agent.redact import redact_sensitive_text
|
||||
stdout = redact_sensitive_text(stdout)
|
||||
stderr = redact_sensitive_text(stderr)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if result.returncode != 0:
|
||||
parts = [f"Script exited with code {result.returncode}"]
|
||||
if stderr:
|
||||
@@ -458,10 +357,17 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
parts.append(f"stdout:\n{stdout}")
|
||||
return False, "\n".join(parts)
|
||||
|
||||
# Redact any secrets that may appear in script output before
|
||||
# they are injected into the LLM prompt context.
|
||||
try:
|
||||
from agent.redact import redact_sensitive_text
|
||||
stdout = redact_sensitive_text(stdout)
|
||||
except Exception:
|
||||
pass
|
||||
return True, stdout
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, f"Script timed out after {script_timeout}s: {path}"
|
||||
return False, f"Script timed out after {_SCRIPT_TIMEOUT}s: {path}"
|
||||
except Exception as exc:
|
||||
return False, f"Script execution failed: {exc}"
|
||||
|
||||
@@ -625,9 +531,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
except Exception as e:
|
||||
logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e)
|
||||
|
||||
# Reasoning config from config.yaml
|
||||
# Reasoning config from env or config.yaml
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
|
||||
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
||||
if not effort:
|
||||
effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
|
||||
reasoning_config = parse_reasoning_effort(effort)
|
||||
|
||||
# Prefill messages from env or config.yaml
|
||||
@@ -685,24 +593,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
},
|
||||
)
|
||||
|
||||
fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
|
||||
credential_pool = None
|
||||
runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower()
|
||||
if runtime_provider:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(runtime_provider)
|
||||
if pool.has_credentials():
|
||||
credential_pool = pool
|
||||
logger.info(
|
||||
"Job '%s': loaded credential pool for provider %s with %d entries",
|
||||
job_id,
|
||||
runtime_provider,
|
||||
len(pool.entries()),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)
|
||||
|
||||
agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
api_key=turn_route["runtime"].get("api_key"),
|
||||
@@ -714,8 +604,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
max_iterations=max_iterations,
|
||||
reasoning_config=reasoning_config,
|
||||
prefill_messages=prefill_messages,
|
||||
fallback_model=fallback_model,
|
||||
credential_pool=credential_pool,
|
||||
providers_allowed=pr.get("only"),
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
@@ -770,7 +658,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
_cron_pool.shutdown(wait=False, cancel_futures=True)
|
||||
raise
|
||||
finally:
|
||||
_cron_pool.shutdown(wait=False, cancel_futures=True)
|
||||
_cron_pool.shutdown(wait=False)
|
||||
|
||||
if _inactivity_timeout:
|
||||
# Build diagnostic summary from the agent's activity tracker.
|
||||
@@ -933,15 +821,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
|
||||
should_deliver = False
|
||||
|
||||
delivery_error = None
|
||||
if should_deliver:
|
||||
try:
|
||||
delivery_error = _deliver_result(job, deliver_content, adapters=adapters, loop=loop)
|
||||
_deliver_result(job, deliver_content, adapters=adapters, loop=loop)
|
||||
except Exception as de:
|
||||
delivery_error = str(de)
|
||||
logger.error("Delivery failed for job %s: %s", job["id"], de)
|
||||
|
||||
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
|
||||
mark_job_run(job["id"], success, error)
|
||||
executed += 1
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -9,10 +9,7 @@ INSTALL_DIR="/opt/hermes"
|
||||
# (cache/images, cache/audio, platforms/whatsapp, etc.) are created on
|
||||
# demand by the application — don't pre-create them here so new installs
|
||||
# get the consolidated layout from get_hermes_dir().
|
||||
# The "home/" subdirectory is a per-profile HOME for subprocesses (git,
|
||||
# ssh, gh, npm …). Without it those tools write to /root which is
|
||||
# ephemeral and shared across profiles. See issue #4426.
|
||||
mkdir -p "$HERMES_HOME"/{cron,sessions,logs,hooks,memories,skills,skins,plans,workspace,home}
|
||||
mkdir -p "$HERMES_HOME"/{cron,sessions,logs,hooks,memories,skills}
|
||||
|
||||
# .env
|
||||
if [ ! -f "$HERMES_HOME/.env" ]; then
|
||||
|
||||
+26
-23
@@ -21,8 +21,6 @@ from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
from model_tools import handle_function_call
|
||||
from tools.terminal_tool import get_active_env
|
||||
from tools.tool_result_storage import maybe_persist_tool_result, enforce_turn_budget
|
||||
|
||||
# Thread pool for running sync tool calls that internally use asyncio.run()
|
||||
# (e.g., the Modal/Docker/Daytona terminal backends). Running them in a separate
|
||||
@@ -140,7 +138,6 @@ class HermesAgentLoop:
|
||||
temperature: float = 1.0,
|
||||
max_tokens: Optional[int] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
budget_config: Optional["BudgetConfig"] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the agent loop.
|
||||
@@ -157,11 +154,7 @@ class HermesAgentLoop:
|
||||
extra_body: Extra parameters passed to the OpenAI client's create() call.
|
||||
Used for OpenRouter provider preferences, transforms, etc.
|
||||
e.g. {"provider": {"ignore": ["DeepInfra"]}}
|
||||
budget_config: Tool result persistence budget. Controls per-tool
|
||||
thresholds, per-turn aggregate budget, and preview size.
|
||||
If None, uses DEFAULT_BUDGET (current hardcoded values).
|
||||
"""
|
||||
from tools.budget_config import DEFAULT_BUDGET
|
||||
self.server = server
|
||||
self.tool_schemas = tool_schemas
|
||||
self.valid_tool_names = valid_tool_names
|
||||
@@ -170,7 +163,11 @@ class HermesAgentLoop:
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.extra_body = extra_body
|
||||
self.budget_config = budget_config or DEFAULT_BUDGET
|
||||
|
||||
# Per-result and per-turn output persistence (see tools/tool_result_storage.py)
|
||||
from pathlib import Path
|
||||
self._tool_result_storage_dir = Path(f"/tmp/hermes_tool_results/{self.task_id}")
|
||||
self._tool_result_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
async def run(self, messages: List[Dict[str, Any]]) -> AgentResult:
|
||||
"""
|
||||
@@ -454,15 +451,18 @@ class HermesAgentLoop:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# Persist oversized results to disk
|
||||
tc_id = tc.get("id", "") if isinstance(tc, dict) else tc.id
|
||||
tool_result = maybe_persist_tool_result(
|
||||
content=tool_result,
|
||||
tool_name=tool_name,
|
||||
tool_use_id=tc_id,
|
||||
env=get_active_env(self.task_id),
|
||||
config=self.budget_config,
|
||||
)
|
||||
|
||||
try:
|
||||
from tools.tool_result_storage import maybe_persist_tool_result
|
||||
tool_result = maybe_persist_tool_result(
|
||||
content=tool_result,
|
||||
tool_name=tool_name,
|
||||
tool_use_id=tc_id,
|
||||
storage_dir=self._tool_result_storage_dir,
|
||||
)
|
||||
except Exception:
|
||||
pass # Persistence is best-effort in eval path
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
@@ -471,13 +471,16 @@ class HermesAgentLoop:
|
||||
}
|
||||
)
|
||||
|
||||
num_tcs = len(assistant_msg.tool_calls)
|
||||
if num_tcs > 0:
|
||||
enforce_turn_budget(
|
||||
messages[-num_tcs:],
|
||||
env=get_active_env(self.task_id),
|
||||
config=self.budget_config,
|
||||
)
|
||||
# Per-turn aggregate budget enforcement
|
||||
try:
|
||||
from tools.tool_result_storage import enforce_turn_budget
|
||||
num_tcs = len(assistant_msg.tool_calls)
|
||||
if num_tcs > 0:
|
||||
turn_msgs = [m for m in messages[-num_tcs * 2:]
|
||||
if m.get("role") == "tool"]
|
||||
enforce_turn_budget(turn_msgs, self._tool_result_storage_dir)
|
||||
except Exception:
|
||||
pass # Best-effort in eval path
|
||||
|
||||
turn_elapsed = _time.monotonic() - turn_start
|
||||
logger.info(
|
||||
|
||||
@@ -1048,7 +1048,6 @@ class AgenticOPDEnv(HermesAgentBaseEnv):
|
||||
temperature=0.0,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
budget_config=self.config.build_budget_config(),
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ import tempfile
|
||||
import time
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from pathlib import Path, PurePosixPath, PureWindowsPath
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
# Ensure repo root is on sys.path for imports
|
||||
@@ -148,62 +148,6 @@ MODAL_INCOMPATIBLE_TASKS = {
|
||||
# Tar extraction helper
|
||||
# =============================================================================
|
||||
|
||||
def _normalize_tar_member_parts(member_name: str) -> list:
|
||||
"""Return safe path components for a tar member or raise ValueError."""
|
||||
normalized_name = member_name.replace("\\", "/")
|
||||
posix_path = PurePosixPath(normalized_name)
|
||||
windows_path = PureWindowsPath(member_name)
|
||||
|
||||
if (
|
||||
not normalized_name
|
||||
or posix_path.is_absolute()
|
||||
or windows_path.is_absolute()
|
||||
or windows_path.drive
|
||||
):
|
||||
raise ValueError(f"Unsafe archive member path: {member_name}")
|
||||
|
||||
parts = [part for part in posix_path.parts if part not in ("", ".")]
|
||||
if not parts or any(part == ".." for part in parts):
|
||||
raise ValueError(f"Unsafe archive member path: {member_name}")
|
||||
return parts
|
||||
|
||||
|
||||
def _safe_extract_tar(tar: tarfile.TarFile, target_dir: Path) -> None:
|
||||
"""Extract a tar archive without allowing traversal or link entries."""
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
target_root = target_dir.resolve()
|
||||
|
||||
for member in tar.getmembers():
|
||||
parts = _normalize_tar_member_parts(member.name)
|
||||
target = target_dir.joinpath(*parts)
|
||||
target_real = target.resolve(strict=False)
|
||||
|
||||
try:
|
||||
target_real.relative_to(target_root)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"Unsafe archive member path: {member.name}") from exc
|
||||
|
||||
if member.isdir():
|
||||
target_real.mkdir(parents=True, exist_ok=True)
|
||||
continue
|
||||
|
||||
if not member.isfile():
|
||||
raise ValueError(f"Unsupported archive member type: {member.name}")
|
||||
|
||||
target_real.parent.mkdir(parents=True, exist_ok=True)
|
||||
extracted = tar.extractfile(member)
|
||||
if extracted is None:
|
||||
raise ValueError(f"Cannot read archive member: {member.name}")
|
||||
|
||||
with extracted, open(target_real, "wb") as dst:
|
||||
shutil.copyfileobj(extracted, dst)
|
||||
|
||||
try:
|
||||
os.chmod(target_real, member.mode & 0o777)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _extract_base64_tar(b64_data: str, target_dir: Path):
|
||||
"""Extract a base64-encoded tar.gz archive into target_dir."""
|
||||
if not b64_data:
|
||||
@@ -211,7 +155,7 @@ def _extract_base64_tar(b64_data: str, target_dir: Path):
|
||||
raw = base64.b64decode(b64_data)
|
||||
buf = io.BytesIO(raw)
|
||||
with tarfile.open(fileobj=buf, mode="r:gz") as tar:
|
||||
_safe_extract_tar(tar, target_dir)
|
||||
tar.extractall(path=str(target_dir))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -541,7 +485,6 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
budget_config=self.config.build_budget_config(),
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
else:
|
||||
@@ -554,7 +497,6 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
budget_config=self.config.build_budget_config(),
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
|
||||
@@ -549,7 +549,6 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
budget_config=self.config.build_budget_config(),
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
|
||||
@@ -62,11 +62,6 @@ from atroposlib.type_definitions import Item
|
||||
|
||||
from environments.agent_loop import AgentResult, HermesAgentLoop
|
||||
from environments.tool_context import ToolContext
|
||||
from tools.budget_config import (
|
||||
DEFAULT_RESULT_SIZE_CHARS,
|
||||
DEFAULT_TURN_BUDGET_CHARS,
|
||||
DEFAULT_PREVIEW_SIZE_CHARS,
|
||||
)
|
||||
|
||||
# Import hermes-agent toolset infrastructure
|
||||
from model_tools import get_tool_definitions
|
||||
@@ -165,32 +160,6 @@ class HermesAgentEnvConfig(BaseEnvConfig):
|
||||
"Options: hermes, mistral, llama3_json, qwen, deepseek_v3, etc.",
|
||||
)
|
||||
|
||||
# --- Tool result budget ---
|
||||
# Defaults imported from tools.budget_config (single source of truth).
|
||||
default_result_size_chars: int = Field(
|
||||
default=DEFAULT_RESULT_SIZE_CHARS,
|
||||
description="Default per-tool threshold (chars) for persisting large results "
|
||||
"to sandbox. Results exceeding this are written to /tmp/hermes-results/ "
|
||||
"and replaced with a preview. Per-tool registry values take precedence "
|
||||
"unless overridden via tool_result_overrides.",
|
||||
)
|
||||
turn_budget_chars: int = Field(
|
||||
default=DEFAULT_TURN_BUDGET_CHARS,
|
||||
description="Aggregate char budget per assistant turn. If all tool results "
|
||||
"in a single turn exceed this, the largest are persisted to disk first.",
|
||||
)
|
||||
preview_size_chars: int = Field(
|
||||
default=DEFAULT_PREVIEW_SIZE_CHARS,
|
||||
description="Size of the inline preview shown after a tool result is persisted.",
|
||||
)
|
||||
tool_result_overrides: Optional[Dict[str, int]] = Field(
|
||||
default=None,
|
||||
description="Per-tool threshold overrides (chars). Keys are tool names, "
|
||||
"values are char thresholds. Overrides both the default and registry "
|
||||
"per-tool values. Example: {'terminal': 10000, 'search_files': 5000}. "
|
||||
"Note: read_file is pinned to infinity and cannot be overridden.",
|
||||
)
|
||||
|
||||
# --- Provider-specific parameters ---
|
||||
# Passed as extra_body to the OpenAI client's chat.completions.create() call.
|
||||
# Useful for OpenRouter provider preferences, transforms, route settings, etc.
|
||||
@@ -207,16 +176,6 @@ class HermesAgentEnvConfig(BaseEnvConfig):
|
||||
"transforms, and other provider-specific settings.",
|
||||
)
|
||||
|
||||
def build_budget_config(self):
|
||||
"""Build a BudgetConfig from env config fields."""
|
||||
from tools.budget_config import BudgetConfig
|
||||
return BudgetConfig(
|
||||
default_result_size=self.default_result_size_chars,
|
||||
turn_budget=self.turn_budget_chars,
|
||||
preview_size=self.preview_size_chars,
|
||||
tool_overrides=dict(self.tool_result_overrides) if self.tool_result_overrides else {},
|
||||
)
|
||||
|
||||
|
||||
class HermesAgentBaseEnv(BaseEnv):
|
||||
"""
|
||||
@@ -531,7 +490,6 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
budget_config=self.config.build_budget_config(),
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
except NotImplementedError:
|
||||
@@ -549,7 +507,6 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
budget_config=self.config.build_budget_config(),
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
else:
|
||||
@@ -563,7 +520,6 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
budget_config=self.config.build_budget_config(),
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
|
||||
@@ -49,8 +49,6 @@ class HermesToolCallParser(ToolCallParser):
|
||||
continue
|
||||
|
||||
tc_data = json.loads(raw_json)
|
||||
if "name" not in tc_data:
|
||||
continue
|
||||
tool_calls.append(
|
||||
ChatCompletionMessageToolCall(
|
||||
id=f"call_{uuid.uuid4().hex[:8]}",
|
||||
|
||||
@@ -89,8 +89,6 @@ class MistralToolCallParser(ToolCallParser):
|
||||
parsed = [parsed]
|
||||
|
||||
for tc in parsed:
|
||||
if "name" not in tc:
|
||||
continue
|
||||
args = tc.get("arguments", {})
|
||||
if isinstance(args, dict):
|
||||
args = json.dumps(args, ensure_ascii=False)
|
||||
|
||||
@@ -472,7 +472,6 @@ class WebResearchEnv(HermesAgentBaseEnv):
|
||||
temperature=0.0, # Deterministic for eval
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
budget_config=self.config.build_budget_config(),
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
|
||||
Generated
+4
-4
@@ -22,16 +22,16 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1775036866,
|
||||
"narHash": "sha256-ZojAnPuCdy657PbTq5V0Y+AHKhZAIwSIT2cb8UgAz/U=",
|
||||
"lastModified": 1751274312,
|
||||
"narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "6201e203d09599479a3b3450ed24fa81537ebc4e",
|
||||
"rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"ref": "nixos-24.11",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
description = "Hermes Agent - AI agent framework by Nous Research";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
|
||||
flake-parts = {
|
||||
url = "github:hercules-ci/flake-parts";
|
||||
inputs.nixpkgs-lib.follows = "nixpkgs";
|
||||
|
||||
@@ -24,8 +24,7 @@ from pathlib import Path
|
||||
|
||||
logger = logging.getLogger("hooks.boot-md")
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
HERMES_HOME = get_hermes_home()
|
||||
HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
BOOT_FILE = HERMES_HOME / "BOOT.md"
|
||||
|
||||
|
||||
|
||||
@@ -76,15 +76,10 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
except Exception as e:
|
||||
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
|
||||
|
||||
# Platforms that don't support direct channel enumeration get session-based
|
||||
# discovery automatically. Skip infrastructure entries that aren't messaging
|
||||
# platforms — everything else falls through to _build_from_sessions().
|
||||
_SKIP_SESSION_DISCOVERY = frozenset({"local", "api_server", "webhook"})
|
||||
for plat in Platform:
|
||||
plat_name = plat.value
|
||||
if plat_name in _SKIP_SESSION_DISCOVERY or plat_name in platforms:
|
||||
continue
|
||||
platforms[plat_name] = _build_from_sessions(plat_name)
|
||||
# Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
|
||||
for plat_name in ("telegram", "whatsapp", "signal", "email", "sms"):
|
||||
if plat_name not in platforms:
|
||||
platforms[plat_name] = _build_from_sessions(plat_name)
|
||||
|
||||
directory = {
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
@@ -129,6 +124,7 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
|
||||
|
||||
def _build_slack(adapter) -> List[Dict[str, str]]:
|
||||
"""List Slack channels the bot has joined."""
|
||||
channels = []
|
||||
# Slack adapter may expose a web client
|
||||
client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
|
||||
if not client:
|
||||
|
||||
@@ -63,8 +63,6 @@ class Platform(Enum):
|
||||
WEBHOOK = "webhook"
|
||||
FEISHU = "feishu"
|
||||
WECOM = "wecom"
|
||||
WEIXIN = "weixin"
|
||||
BLUEBUBBLES = "bluebubbles"
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -262,11 +260,6 @@ class GatewayConfig:
|
||||
for platform, config in self.platforms.items():
|
||||
if not config.enabled:
|
||||
continue
|
||||
# Weixin requires both a token and an account_id
|
||||
if platform == Platform.WEIXIN:
|
||||
if config.extra.get("account_id") and (config.token or config.extra.get("token")):
|
||||
connected.append(platform)
|
||||
continue
|
||||
# Platforms that use token/api_key auth
|
||||
if config.token or config.api_key:
|
||||
connected.append(platform)
|
||||
@@ -294,9 +287,6 @@ class GatewayConfig:
|
||||
# WeCom uses extra dict for bot credentials
|
||||
elif platform == Platform.WECOM and config.extra.get("bot_id"):
|
||||
connected.append(platform)
|
||||
# BlueBubbles uses extra dict for local server config
|
||||
elif platform == Platform.BLUEBUBBLES and config.extra.get("server_url") and config.extra.get("password"):
|
||||
connected.append(platform)
|
||||
return connected
|
||||
|
||||
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
|
||||
@@ -538,12 +528,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["reply_prefix"] = platform_cfg["reply_prefix"]
|
||||
if "require_mention" in platform_cfg:
|
||||
bridged["require_mention"] = platform_cfg["require_mention"]
|
||||
if "free_response_channels" in platform_cfg:
|
||||
bridged["free_response_channels"] = platform_cfg["free_response_channels"]
|
||||
if "mention_patterns" in platform_cfg:
|
||||
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
|
||||
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
|
||||
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
|
||||
if not bridged:
|
||||
continue
|
||||
plat_data = platforms_data.setdefault(plat.value, {})
|
||||
@@ -556,19 +542,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
plat_data["extra"] = extra
|
||||
extra.update(bridged)
|
||||
|
||||
# Slack settings → env vars (env vars take precedence)
|
||||
slack_cfg = yaml_cfg.get("slack", {})
|
||||
if isinstance(slack_cfg, dict):
|
||||
if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
|
||||
os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
|
||||
if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
|
||||
os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
|
||||
frc = slack_cfg.get("free_response_channels")
|
||||
if frc is not None and not os.getenv("SLACK_FREE_RESPONSE_CHANNELS"):
|
||||
if isinstance(frc, list):
|
||||
frc = ",".join(str(v) for v in frc)
|
||||
os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
|
||||
|
||||
# Discord settings → env vars (env vars take precedence)
|
||||
discord_cfg = yaml_cfg.get("discord", {})
|
||||
if isinstance(discord_cfg, dict):
|
||||
@@ -583,24 +556,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
|
||||
if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
|
||||
os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()
|
||||
# ignored_channels: channels where bot never responds (even when mentioned)
|
||||
ic = discord_cfg.get("ignored_channels")
|
||||
if ic is not None and not os.getenv("DISCORD_IGNORED_CHANNELS"):
|
||||
if isinstance(ic, list):
|
||||
ic = ",".join(str(v) for v in ic)
|
||||
os.environ["DISCORD_IGNORED_CHANNELS"] = str(ic)
|
||||
# allowed_channels: if set, bot ONLY responds in these channels (whitelist)
|
||||
ac = discord_cfg.get("allowed_channels")
|
||||
if ac is not None and not os.getenv("DISCORD_ALLOWED_CHANNELS"):
|
||||
if isinstance(ac, list):
|
||||
ac = ",".join(str(v) for v in ac)
|
||||
os.environ["DISCORD_ALLOWED_CHANNELS"] = str(ac)
|
||||
# no_thread_channels: channels where bot responds directly without creating thread
|
||||
ntc = discord_cfg.get("no_thread_channels")
|
||||
if ntc is not None and not os.getenv("DISCORD_NO_THREAD_CHANNELS"):
|
||||
if isinstance(ntc, list):
|
||||
ntc = ",".join(str(v) for v in ntc)
|
||||
os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
|
||||
|
||||
# Telegram settings → env vars (env vars take precedence)
|
||||
telegram_cfg = yaml_cfg.get("telegram", {})
|
||||
@@ -615,8 +570,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(frc, list):
|
||||
frc = ",".join(str(v) for v in frc)
|
||||
os.environ["TELEGRAM_FREE_RESPONSE_CHATS"] = str(frc)
|
||||
if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
|
||||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
|
||||
whatsapp_cfg = yaml_cfg.get("whatsapp", {})
|
||||
if isinstance(whatsapp_cfg, dict):
|
||||
@@ -642,8 +595,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["MATRIX_FREE_RESPONSE_ROOMS"] = str(frc)
|
||||
if "auto_thread" in matrix_cfg and not os.getenv("MATRIX_AUTO_THREAD"):
|
||||
os.environ["MATRIX_AUTO_THREAD"] = str(matrix_cfg["auto_thread"]).lower()
|
||||
if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"):
|
||||
os.environ["MATRIX_DM_MENTION_THREADS"] = str(matrix_cfg["dm_mention_threads"]).lower()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
@@ -682,7 +633,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
Platform.SLACK: "SLACK_BOT_TOKEN",
|
||||
Platform.MATTERMOST: "MATTERMOST_TOKEN",
|
||||
Platform.MATRIX: "MATRIX_ACCESS_TOKEN",
|
||||
Platform.WEIXIN: "WEIXIN_TOKEN",
|
||||
}
|
||||
for platform, pconfig in config.platforms.items():
|
||||
if not pconfig.enabled:
|
||||
@@ -748,13 +698,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
name=os.getenv("DISCORD_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# Reply threading mode for Discord (off/first/all)
|
||||
discord_reply_mode = os.getenv("DISCORD_REPLY_TO_MODE", "").lower()
|
||||
if discord_reply_mode in ("off", "first", "all"):
|
||||
if Platform.DISCORD not in config.platforms:
|
||||
config.platforms[Platform.DISCORD] = PlatformConfig()
|
||||
config.platforms[Platform.DISCORD].reply_to_mode = discord_reply_mode
|
||||
|
||||
# WhatsApp (typically uses different auth mechanism)
|
||||
whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in ("true", "1", "yes")
|
||||
if whatsapp_enabled:
|
||||
@@ -918,9 +861,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
pass
|
||||
if api_server_host:
|
||||
config.platforms[Platform.API_SERVER].extra["host"] = api_server_host
|
||||
api_server_model_name = os.getenv("API_SERVER_MODEL_NAME", "")
|
||||
if api_server_model_name:
|
||||
config.platforms[Platform.API_SERVER].extra["model_name"] = api_server_model_name
|
||||
|
||||
# Webhook platform
|
||||
webhook_enabled = os.getenv("WEBHOOK_ENABLED", "").lower() in ("true", "1", "yes")
|
||||
@@ -987,67 +927,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
name=os.getenv("WECOM_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# Weixin (personal WeChat via iLink Bot API)
|
||||
weixin_token = os.getenv("WEIXIN_TOKEN")
|
||||
weixin_account_id = os.getenv("WEIXIN_ACCOUNT_ID")
|
||||
if weixin_token or weixin_account_id:
|
||||
if Platform.WEIXIN not in config.platforms:
|
||||
config.platforms[Platform.WEIXIN] = PlatformConfig()
|
||||
config.platforms[Platform.WEIXIN].enabled = True
|
||||
if weixin_token:
|
||||
config.platforms[Platform.WEIXIN].token = weixin_token
|
||||
extra = config.platforms[Platform.WEIXIN].extra
|
||||
if weixin_account_id:
|
||||
extra["account_id"] = weixin_account_id
|
||||
weixin_base_url = os.getenv("WEIXIN_BASE_URL", "").strip()
|
||||
if weixin_base_url:
|
||||
extra["base_url"] = weixin_base_url.rstrip("/")
|
||||
weixin_cdn_base_url = os.getenv("WEIXIN_CDN_BASE_URL", "").strip()
|
||||
if weixin_cdn_base_url:
|
||||
extra["cdn_base_url"] = weixin_cdn_base_url.rstrip("/")
|
||||
weixin_dm_policy = os.getenv("WEIXIN_DM_POLICY", "").strip().lower()
|
||||
if weixin_dm_policy:
|
||||
extra["dm_policy"] = weixin_dm_policy
|
||||
weixin_group_policy = os.getenv("WEIXIN_GROUP_POLICY", "").strip().lower()
|
||||
if weixin_group_policy:
|
||||
extra["group_policy"] = weixin_group_policy
|
||||
weixin_allowed_users = os.getenv("WEIXIN_ALLOWED_USERS", "").strip()
|
||||
if weixin_allowed_users:
|
||||
extra["allow_from"] = weixin_allowed_users
|
||||
weixin_group_allowed_users = os.getenv("WEIXIN_GROUP_ALLOWED_USERS", "").strip()
|
||||
if weixin_group_allowed_users:
|
||||
extra["group_allow_from"] = weixin_group_allowed_users
|
||||
weixin_home = os.getenv("WEIXIN_HOME_CHANNEL", "").strip()
|
||||
if weixin_home:
|
||||
config.platforms[Platform.WEIXIN].home_channel = HomeChannel(
|
||||
platform=Platform.WEIXIN,
|
||||
chat_id=weixin_home,
|
||||
name=os.getenv("WEIXIN_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# BlueBubbles (iMessage)
|
||||
bluebubbles_server_url = os.getenv("BLUEBUBBLES_SERVER_URL")
|
||||
bluebubbles_password = os.getenv("BLUEBUBBLES_PASSWORD")
|
||||
if bluebubbles_server_url and bluebubbles_password:
|
||||
if Platform.BLUEBUBBLES not in config.platforms:
|
||||
config.platforms[Platform.BLUEBUBBLES] = PlatformConfig()
|
||||
config.platforms[Platform.BLUEBUBBLES].enabled = True
|
||||
config.platforms[Platform.BLUEBUBBLES].extra.update({
|
||||
"server_url": bluebubbles_server_url.rstrip("/"),
|
||||
"password": bluebubbles_password,
|
||||
"webhook_host": os.getenv("BLUEBUBBLES_WEBHOOK_HOST", "127.0.0.1"),
|
||||
"webhook_port": int(os.getenv("BLUEBUBBLES_WEBHOOK_PORT", "8645")),
|
||||
"webhook_path": os.getenv("BLUEBUBBLES_WEBHOOK_PATH", "/bluebubbles-webhook"),
|
||||
"send_read_receipts": os.getenv("BLUEBUBBLES_SEND_READ_RECEIPTS", "true").lower() in ("true", "1", "yes"),
|
||||
})
|
||||
bluebubbles_home = os.getenv("BLUEBUBBLES_HOME_CHANNEL")
|
||||
if bluebubbles_home and Platform.BLUEBUBBLES in config.platforms:
|
||||
config.platforms[Platform.BLUEBUBBLES].home_channel = HomeChannel(
|
||||
platform=Platform.BLUEBUBBLES,
|
||||
chat_id=bluebubbles_home,
|
||||
name=os.getenv("BLUEBUBBLES_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# Session settings
|
||||
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
|
||||
if idle_minutes:
|
||||
|
||||
@@ -124,6 +124,53 @@ class DeliveryRouter:
|
||||
self.adapters = adapters or {}
|
||||
self.output_dir = get_hermes_home() / "cron" / "output"
|
||||
|
||||
def resolve_targets(
|
||||
self,
|
||||
deliver: Union[str, List[str]],
|
||||
origin: Optional[SessionSource] = None
|
||||
) -> List[DeliveryTarget]:
|
||||
"""
|
||||
Resolve delivery specification to concrete targets.
|
||||
|
||||
Args:
|
||||
deliver: Delivery spec - "origin", "telegram", ["local", "discord"], etc.
|
||||
origin: The source where the request originated (for "origin" target)
|
||||
|
||||
Returns:
|
||||
List of resolved delivery targets
|
||||
"""
|
||||
if isinstance(deliver, str):
|
||||
deliver = [deliver]
|
||||
|
||||
targets = []
|
||||
seen_platforms = set()
|
||||
|
||||
for target_str in deliver:
|
||||
target = DeliveryTarget.parse(target_str, origin)
|
||||
|
||||
# Resolve home channel if needed
|
||||
if target.chat_id is None and target.platform != Platform.LOCAL:
|
||||
home = self.config.get_home_channel(target.platform)
|
||||
if home:
|
||||
target.chat_id = home.chat_id
|
||||
else:
|
||||
# No home channel configured, skip this platform
|
||||
continue
|
||||
|
||||
# Deduplicate
|
||||
key = (target.platform, target.chat_id, target.thread_id)
|
||||
if key not in seen_platforms:
|
||||
seen_platforms.add(key)
|
||||
targets.append(target)
|
||||
|
||||
# Always include local if configured
|
||||
if self.config.always_log_local:
|
||||
local_key = (Platform.LOCAL, None, None)
|
||||
if local_key not in seen_platforms:
|
||||
targets.append(DeliveryTarget(platform=Platform.LOCAL))
|
||||
|
||||
return targets
|
||||
|
||||
async def deliver(
|
||||
self,
|
||||
content: str,
|
||||
@@ -252,5 +299,53 @@ class DeliveryRouter:
|
||||
return await adapter.send(target.chat_id, content, metadata=send_metadata or None)
|
||||
|
||||
|
||||
def parse_deliver_spec(
|
||||
deliver: Optional[Union[str, List[str]]],
|
||||
origin: Optional[SessionSource] = None,
|
||||
default: str = "origin"
|
||||
) -> Union[str, List[str]]:
|
||||
"""
|
||||
Normalize a delivery specification.
|
||||
|
||||
If None or empty, returns the default.
|
||||
"""
|
||||
if not deliver:
|
||||
return default
|
||||
return deliver
|
||||
|
||||
|
||||
def build_delivery_context_for_tool(
|
||||
config: GatewayConfig,
|
||||
origin: Optional[SessionSource] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build context for the unified cronjob tool to understand delivery options.
|
||||
|
||||
This is passed to the tool so it can validate and explain delivery targets.
|
||||
"""
|
||||
connected = config.get_connected_platforms()
|
||||
|
||||
options = {
|
||||
"origin": {
|
||||
"description": "Back to where this job was created",
|
||||
"available": origin is not None,
|
||||
},
|
||||
"local": {
|
||||
"description": "Save to local files only",
|
||||
"available": True,
|
||||
}
|
||||
}
|
||||
|
||||
for platform in connected:
|
||||
home = config.get_home_channel(platform)
|
||||
options[platform.value] = {
|
||||
"description": f"{platform.value.title()} home channel",
|
||||
"available": True,
|
||||
"home_channel": home.to_dict() if home else None,
|
||||
}
|
||||
|
||||
return {
|
||||
"origin": origin.to_dict() if origin else None,
|
||||
"options": options,
|
||||
"always_log_local": config.always_log_local,
|
||||
}
|
||||
|
||||
+29
-217
@@ -20,13 +20,9 @@ Requires:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket as _socket
|
||||
import re
|
||||
import sqlite3
|
||||
import time
|
||||
import uuid
|
||||
@@ -43,7 +39,6 @@ from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
SendResult,
|
||||
is_network_accessible,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -286,24 +281,6 @@ def _make_request_fingerprint(body: Dict[str, Any], keys: List[str]) -> str:
|
||||
return sha256(repr(subset).encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _derive_chat_session_id(
|
||||
system_prompt: Optional[str],
|
||||
first_user_message: str,
|
||||
) -> str:
|
||||
"""Derive a stable session ID from the conversation's first user message.
|
||||
|
||||
OpenAI-compatible frontends (Open WebUI, LibreChat, etc.) send the full
|
||||
conversation history with every request. The system prompt and first user
|
||||
message are constant across all turns of the same conversation, so hashing
|
||||
them produces a deterministic session ID that lets the API server reuse
|
||||
the same Hermes session (and therefore the same Docker container sandbox
|
||||
directory) across turns.
|
||||
"""
|
||||
seed = f"{system_prompt or ''}\n{first_user_message}"
|
||||
digest = hashlib.sha256(seed.encode("utf-8")).hexdigest()[:16]
|
||||
return f"api-{digest}"
|
||||
|
||||
|
||||
class APIServerAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
OpenAI-compatible HTTP API server adapter.
|
||||
@@ -321,9 +298,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._cors_origins: tuple[str, ...] = self._parse_cors_origins(
|
||||
extra.get("cors_origins", os.getenv("API_SERVER_CORS_ORIGINS", "")),
|
||||
)
|
||||
self._model_name: str = self._resolve_model_name(
|
||||
extra.get("model_name", os.getenv("API_SERVER_MODEL_NAME", "")),
|
||||
)
|
||||
self._app: Optional["web.Application"] = None
|
||||
self._runner: Optional["web.AppRunner"] = None
|
||||
self._site: Optional["web.TCPSite"] = None
|
||||
@@ -349,26 +323,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return tuple(str(item).strip() for item in items if str(item).strip())
|
||||
|
||||
@staticmethod
|
||||
def _resolve_model_name(explicit: str) -> str:
|
||||
"""Derive the advertised model name for /v1/models.
|
||||
|
||||
Priority:
|
||||
1. Explicit override (config extra or API_SERVER_MODEL_NAME env var)
|
||||
2. Active profile name (so each profile advertises a distinct model)
|
||||
3. Fallback: "hermes-agent"
|
||||
"""
|
||||
if explicit and explicit.strip():
|
||||
return explicit.strip()
|
||||
try:
|
||||
from hermes_cli.profiles import get_active_profile_name
|
||||
profile = get_active_profile_name()
|
||||
if profile and profile not in ("default", "custom"):
|
||||
return profile
|
||||
except Exception:
|
||||
pass
|
||||
return "hermes-agent"
|
||||
|
||||
def _cors_headers_for_origin(self, origin: str) -> Optional[Dict[str, str]]:
|
||||
"""Return CORS headers for an allowed browser origin."""
|
||||
if not origin or not self._cors_origins:
|
||||
@@ -408,8 +362,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
Validate Bearer token from Authorization header.
|
||||
|
||||
Returns None if auth is OK, or a 401 web.Response on failure.
|
||||
If no API key is configured, all requests are allowed (only when API
|
||||
server is local).
|
||||
If no API key is configured, all requests are allowed.
|
||||
"""
|
||||
if not self._api_key:
|
||||
return None # No key configured — allow all (local-only use)
|
||||
@@ -417,7 +370,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
auth_header = request.headers.get("Authorization", "")
|
||||
if auth_header.startswith("Bearer "):
|
||||
token = auth_header[7:].strip()
|
||||
if hmac.compare_digest(token, self._api_key):
|
||||
if token == self._api_key:
|
||||
return None # Auth OK
|
||||
|
||||
return web.json_response(
|
||||
@@ -514,12 +467,12 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": self._model_name,
|
||||
"id": "hermes-agent",
|
||||
"object": "model",
|
||||
"created": int(time.time()),
|
||||
"owned_by": "hermes",
|
||||
"permission": [],
|
||||
"root": self._model_name,
|
||||
"root": "hermes-agent",
|
||||
"parent": None,
|
||||
}
|
||||
],
|
||||
@@ -577,32 +530,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
# Allow caller to continue an existing session by passing X-Hermes-Session-Id.
|
||||
# When provided, history is loaded from state.db instead of from the request body.
|
||||
#
|
||||
# Security: session continuation exposes conversation history, so it is
|
||||
# only allowed when the API key is configured and the request is
|
||||
# authenticated. Without this gate, any unauthenticated client could
|
||||
# read arbitrary session history by guessing/enumerating session IDs.
|
||||
provided_session_id = request.headers.get("X-Hermes-Session-Id", "").strip()
|
||||
if provided_session_id:
|
||||
if not self._api_key:
|
||||
logger.warning(
|
||||
"Session continuation via X-Hermes-Session-Id rejected: "
|
||||
"no API key configured. Set API_SERVER_KEY to enable "
|
||||
"session continuity."
|
||||
)
|
||||
return web.json_response(
|
||||
_openai_error(
|
||||
"Session continuation requires API key authentication. "
|
||||
"Configure API_SERVER_KEY to enable this feature."
|
||||
),
|
||||
status=403,
|
||||
)
|
||||
# Sanitize: reject control characters that could enable header injection.
|
||||
if re.search(r'[\r\n\x00]', provided_session_id):
|
||||
return web.json_response(
|
||||
{"error": {"message": "Invalid session ID", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
session_id = provided_session_id
|
||||
try:
|
||||
db = self._ensure_session_db()
|
||||
@@ -612,20 +541,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
logger.warning("Failed to load session history for %s: %s", session_id, e)
|
||||
history = []
|
||||
else:
|
||||
# Derive a stable session ID from the conversation fingerprint so
|
||||
# that consecutive messages from the same Open WebUI (or similar)
|
||||
# conversation map to the same Hermes session. The first user
|
||||
# message + system prompt are constant across all turns.
|
||||
first_user = ""
|
||||
for cm in conversation_messages:
|
||||
if cm.get("role") == "user":
|
||||
first_user = cm.get("content", "")
|
||||
break
|
||||
session_id = _derive_chat_session_id(system_prompt, first_user)
|
||||
session_id = str(uuid.uuid4())
|
||||
# history already set from request body above
|
||||
|
||||
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
||||
model_name = body.get("model", self._model_name)
|
||||
model_name = body.get("model", "hermes-agent")
|
||||
created = int(time.time())
|
||||
|
||||
if stream:
|
||||
@@ -643,36 +563,14 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if delta is not None:
|
||||
_stream_q.put(delta)
|
||||
|
||||
def _on_tool_progress(event_type, name, preview, args, **kwargs):
|
||||
"""Send tool progress as a separate SSE event.
|
||||
|
||||
Previously, progress markers like ``⏰ list`` were injected
|
||||
directly into ``delta.content``. OpenAI-compatible frontends
|
||||
(Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
|
||||
the assistant message and send it back on subsequent requests.
|
||||
After enough turns the model learns to *emit* the markers as
|
||||
plain text instead of issuing real tool calls — silently
|
||||
hallucinating tool results. See #6972.
|
||||
|
||||
The fix: push a tagged tuple ``("__tool_progress__", payload)``
|
||||
onto the stream queue. The SSE writer emits it as a custom
|
||||
``event: hermes.tool.progress`` line that compliant frontends
|
||||
can render for UX but will *not* persist into conversation
|
||||
history. Clients that don't understand the custom event type
|
||||
silently ignore it per the SSE specification.
|
||||
"""
|
||||
if event_type != "tool.started":
|
||||
return
|
||||
def _on_tool_progress(name, preview, args):
|
||||
"""Inject tool progress into the SSE stream for Open WebUI."""
|
||||
if name.startswith("_"):
|
||||
return
|
||||
return # Skip internal events (_thinking)
|
||||
from agent.display import get_tool_emoji
|
||||
emoji = get_tool_emoji(name)
|
||||
label = preview or name
|
||||
_stream_q.put(("__tool_progress__", {
|
||||
"tool": name,
|
||||
"emoji": emoji,
|
||||
"label": label,
|
||||
}))
|
||||
_stream_q.put(f"\n`{emoji} {label}`\n")
|
||||
|
||||
# Start agent in background. agent_ref is a mutable container
|
||||
# so the SSE writer can interrupt the agent on client disconnect.
|
||||
@@ -783,29 +681,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
}
|
||||
await response.write(f"data: {json.dumps(role_chunk)}\n\n".encode())
|
||||
|
||||
# Helper — route a queue item to the correct SSE event.
|
||||
async def _emit(item):
|
||||
"""Write a single queue item to the SSE stream.
|
||||
|
||||
Plain strings are sent as normal ``delta.content`` chunks.
|
||||
Tagged tuples ``("__tool_progress__", payload)`` are sent
|
||||
as a custom ``event: hermes.tool.progress`` SSE event so
|
||||
frontends can display them without storing the markers in
|
||||
conversation history. See #6972.
|
||||
"""
|
||||
if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
|
||||
event_data = json.dumps(item[1])
|
||||
await response.write(
|
||||
f"event: hermes.tool.progress\ndata: {event_data}\n\n".encode()
|
||||
)
|
||||
else:
|
||||
content_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"content": item}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
|
||||
|
||||
# Stream content chunks as they arrive from the agent
|
||||
loop = asyncio.get_event_loop()
|
||||
while True:
|
||||
@@ -819,7 +694,12 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
delta = stream_q.get_nowait()
|
||||
if delta is None:
|
||||
break
|
||||
await _emit(delta)
|
||||
content_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
|
||||
except _q.Empty:
|
||||
break
|
||||
break
|
||||
@@ -828,7 +708,12 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if delta is None: # End of stream sentinel
|
||||
break
|
||||
|
||||
await _emit(delta)
|
||||
content_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
|
||||
|
||||
# Get usage from completed agent
|
||||
usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
@@ -930,29 +815,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
else:
|
||||
return web.json_response(_openai_error("'input' must be a string or array"), status=400)
|
||||
|
||||
# Accept explicit conversation_history from the request body.
|
||||
# This lets stateless clients supply their own history instead of
|
||||
# relying on server-side response chaining via previous_response_id.
|
||||
# Precedence: explicit conversation_history > previous_response_id.
|
||||
# Reconstruct conversation history from previous_response_id
|
||||
conversation_history: List[Dict[str, str]] = []
|
||||
raw_history = body.get("conversation_history")
|
||||
if raw_history:
|
||||
if not isinstance(raw_history, list):
|
||||
return web.json_response(
|
||||
_openai_error("'conversation_history' must be an array of message objects"),
|
||||
status=400,
|
||||
)
|
||||
for i, entry in enumerate(raw_history):
|
||||
if not isinstance(entry, dict) or "role" not in entry or "content" not in entry:
|
||||
return web.json_response(
|
||||
_openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
|
||||
status=400,
|
||||
)
|
||||
conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
|
||||
if previous_response_id:
|
||||
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
|
||||
|
||||
if not conversation_history and previous_response_id:
|
||||
if previous_response_id:
|
||||
stored = self._response_store.get(previous_response_id)
|
||||
if stored is None:
|
||||
return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
|
||||
@@ -1035,7 +900,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"object": "response",
|
||||
"status": "completed",
|
||||
"created_at": created_at,
|
||||
"model": body.get("model", self._model_name),
|
||||
"model": body.get("model", "hermes-agent"),
|
||||
"output": output_items,
|
||||
"usage": {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
@@ -1430,7 +1295,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
result = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
task_id="default",
|
||||
)
|
||||
usage = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
@@ -1539,49 +1403,14 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
instructions = body.get("instructions")
|
||||
previous_response_id = body.get("previous_response_id")
|
||||
|
||||
# Accept explicit conversation_history from the request body.
|
||||
# Precedence: explicit conversation_history > previous_response_id.
|
||||
conversation_history: List[Dict[str, str]] = []
|
||||
raw_history = body.get("conversation_history")
|
||||
if raw_history:
|
||||
if not isinstance(raw_history, list):
|
||||
return web.json_response(
|
||||
_openai_error("'conversation_history' must be an array of message objects"),
|
||||
status=400,
|
||||
)
|
||||
for i, entry in enumerate(raw_history):
|
||||
if not isinstance(entry, dict) or "role" not in entry or "content" not in entry:
|
||||
return web.json_response(
|
||||
_openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
|
||||
status=400,
|
||||
)
|
||||
conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
|
||||
if previous_response_id:
|
||||
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
|
||||
|
||||
if not conversation_history and previous_response_id:
|
||||
if previous_response_id:
|
||||
stored = self._response_store.get(previous_response_id)
|
||||
if stored:
|
||||
conversation_history = list(stored.get("conversation_history", []))
|
||||
if instructions is None:
|
||||
instructions = stored.get("instructions")
|
||||
|
||||
# When input is a multi-message array, extract all but the last
|
||||
# message as conversation history (the last becomes user_message).
|
||||
# Only fires when no explicit history was provided.
|
||||
if not conversation_history and isinstance(raw_input, list) and len(raw_input) > 1:
|
||||
for msg in raw_input[:-1]:
|
||||
if isinstance(msg, dict) and msg.get("role") and msg.get("content"):
|
||||
content = msg["content"]
|
||||
if isinstance(content, list):
|
||||
# Flatten multi-part content blocks to text
|
||||
content = " ".join(
|
||||
part.get("text", "") for part in content
|
||||
if isinstance(part, dict) and part.get("type") == "text"
|
||||
)
|
||||
conversation_history.append({"role": msg["role"], "content": str(content)})
|
||||
|
||||
session_id = body.get("session_id") or run_id
|
||||
ephemeral_system_prompt = instructions
|
||||
|
||||
@@ -1597,7 +1426,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
task_id="default",
|
||||
)
|
||||
u = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
@@ -1749,16 +1577,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if hasattr(sweep_task, "add_done_callback"):
|
||||
sweep_task.add_done_callback(self._background_tasks.discard)
|
||||
|
||||
# Refuse to start network-accessible without authentication
|
||||
if is_network_accessible(self._host) and not self._api_key:
|
||||
logger.error(
|
||||
"[%s] Refusing to start: binding to %s requires API_SERVER_KEY. "
|
||||
"Set API_SERVER_KEY or use the default 127.0.0.1.",
|
||||
self.name, self._host,
|
||||
)
|
||||
return False
|
||||
|
||||
# Port conflict detection — fail fast if port is already in use
|
||||
import socket as _socket
|
||||
try:
|
||||
with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as _s:
|
||||
_s.settimeout(1)
|
||||
@@ -1774,17 +1594,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
await self._site.start()
|
||||
|
||||
self._mark_connected()
|
||||
if not self._api_key:
|
||||
logger.warning(
|
||||
"[%s] ⚠️ No API key configured (API_SERVER_KEY / platforms.api_server.key). "
|
||||
"All requests will be accepted without authentication. "
|
||||
"Set an API key for production deployments to prevent "
|
||||
"unauthorized access to sessions, responses, and cron jobs.",
|
||||
self.name,
|
||||
)
|
||||
logger.info(
|
||||
"[%s] API server listening on http://%s:%d (model: %s)",
|
||||
self.name, self._host, self._port, self._model_name,
|
||||
"[%s] API server listening on http://%s:%d",
|
||||
self.name, self._host, self._port,
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
+55
-368
@@ -6,188 +6,28 @@ and implement the required methods.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import socket as _socket
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_network_accessible(host: str) -> bool:
|
||||
"""Return True if *host* would expose the server beyond loopback.
|
||||
|
||||
Loopback addresses (127.0.0.1, ::1, IPv4-mapped ::ffff:127.0.0.1)
|
||||
are local-only. Unspecified addresses (0.0.0.0, ::) bind all
|
||||
interfaces. Hostnames are resolved; DNS failure fails closed.
|
||||
"""
|
||||
try:
|
||||
addr = ipaddress.ip_address(host)
|
||||
if addr.is_loopback:
|
||||
return False
|
||||
# ::ffff:127.0.0.1 — Python reports is_loopback=False for mapped
|
||||
# addresses, so check the underlying IPv4 explicitly.
|
||||
if getattr(addr, "ipv4_mapped", None) and addr.ipv4_mapped.is_loopback:
|
||||
return False
|
||||
return True
|
||||
except ValueError:
|
||||
# when host variable is a hostname, we should try to resolve below
|
||||
pass
|
||||
|
||||
try:
|
||||
resolved = _socket.getaddrinfo(
|
||||
host, None, _socket.AF_UNSPEC, _socket.SOCK_STREAM,
|
||||
)
|
||||
# if the hostname resolves into at least one non-loopback address,
|
||||
# then we consider it to be network accessible
|
||||
for _family, _type, _proto, _canonname, sockaddr in resolved:
|
||||
addr = ipaddress.ip_address(sockaddr[0])
|
||||
if not addr.is_loopback:
|
||||
return True
|
||||
return False
|
||||
except (_socket.gaierror, OSError):
|
||||
return True
|
||||
|
||||
|
||||
def _detect_macos_system_proxy() -> str | None:
|
||||
"""Read the macOS system HTTP(S) proxy via ``scutil --proxy``.
|
||||
|
||||
Returns an ``http://host:port`` URL string if an HTTP or HTTPS proxy is
|
||||
enabled, otherwise *None*. Falls back silently on non-macOS or on any
|
||||
subprocess error.
|
||||
"""
|
||||
if sys.platform != "darwin":
|
||||
return None
|
||||
try:
|
||||
out = subprocess.check_output(
|
||||
["scutil", "--proxy"], timeout=3, text=True, stderr=subprocess.DEVNULL,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
props: dict[str, str] = {}
|
||||
for line in out.splitlines():
|
||||
line = line.strip()
|
||||
if " : " in line:
|
||||
key, _, val = line.partition(" : ")
|
||||
props[key.strip()] = val.strip()
|
||||
|
||||
# Prefer HTTPS, fall back to HTTP
|
||||
for enable_key, host_key, port_key in (
|
||||
("HTTPSEnable", "HTTPSProxy", "HTTPSPort"),
|
||||
("HTTPEnable", "HTTPProxy", "HTTPPort"),
|
||||
):
|
||||
if props.get(enable_key) == "1":
|
||||
host = props.get(host_key)
|
||||
port = props.get(port_key)
|
||||
if host and port:
|
||||
return f"http://{host}:{port}"
|
||||
return None
|
||||
|
||||
|
||||
def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
"""Return a proxy URL from env vars, or macOS system proxy.
|
||||
|
||||
Check order:
|
||||
0. *platform_env_var* (e.g. ``DISCORD_PROXY``) — highest priority
|
||||
1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
|
||||
2. macOS system proxy via ``scutil --proxy`` (auto-detect)
|
||||
|
||||
Returns *None* if no proxy is found.
|
||||
"""
|
||||
if platform_env_var:
|
||||
value = (os.environ.get(platform_env_var) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
return _detect_macos_system_proxy()
|
||||
|
||||
|
||||
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
"""Build kwargs for ``commands.Bot()`` / ``discord.Client()`` with proxy.
|
||||
|
||||
Returns:
|
||||
- SOCKS URL → ``{"connector": ProxyConnector(..., rdns=True)}``
|
||||
- HTTP URL → ``{"proxy": url}``
|
||||
- *None* → ``{}``
|
||||
|
||||
``rdns=True`` forces remote DNS resolution through the proxy — required
|
||||
by many SOCKS implementations (Shadowrocket, Clash) and essential for
|
||||
bypassing DNS pollution behind the GFW.
|
||||
"""
|
||||
if not proxy_url:
|
||||
return {}
|
||||
if proxy_url.lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return {}
|
||||
return {"proxy": proxy_url}
|
||||
|
||||
|
||||
def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
|
||||
"""Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.
|
||||
|
||||
Returns ``(session_kwargs, request_kwargs)`` where:
|
||||
- SOCKS → ``({"connector": ProxyConnector(...)}, {})``
|
||||
- HTTP → ``({}, {"proxy": url})``
|
||||
- None → ``({}, {})``
|
||||
|
||||
Usage::
|
||||
|
||||
sess_kw, req_kw = proxy_kwargs_for_aiohttp(proxy_url)
|
||||
async with aiohttp.ClientSession(**sess_kw) as session:
|
||||
async with session.get(url, **req_kw) as resp:
|
||||
...
|
||||
"""
|
||||
if not proxy_url:
|
||||
return {}, {}
|
||||
if proxy_url.lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}, {}
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return {}, {}
|
||||
return {}, {"proxy": proxy_url}
|
||||
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
|
||||
from enum import Enum
|
||||
|
||||
import sys
|
||||
from pathlib import Path as _Path
|
||||
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from hermes_constants import get_hermes_dir
|
||||
|
||||
|
||||
@@ -197,7 +37,7 @@ GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
|
||||
)
|
||||
|
||||
|
||||
def safe_url_for_log(url: str, max_len: int = 80) -> str:
|
||||
def _safe_url_for_log(url: str, max_len: int = 80) -> str:
|
||||
"""Return a URL string safe for logs (no query/fragment/userinfo)."""
|
||||
if max_len <= 0:
|
||||
return ""
|
||||
@@ -234,23 +74,6 @@ def safe_url_for_log(url: str, max_len: int = 80) -> str:
|
||||
return f"{safe[:max_len - 3]}..."
|
||||
|
||||
|
||||
async def _ssrf_redirect_guard(response):
|
||||
"""Re-validate each redirect target to prevent redirect-based SSRF.
|
||||
|
||||
Without this, an attacker can host a public URL that 302-redirects to
|
||||
http://169.254.169.254/ and bypass the pre-flight is_safe_url() check.
|
||||
|
||||
Must be async because httpx.AsyncClient awaits response event hooks.
|
||||
"""
|
||||
if response.is_redirect and response.next_request:
|
||||
redirect_url = str(response.next_request.url)
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(redirect_url):
|
||||
raise ValueError(
|
||||
f"Blocked redirect to private/internal address: {safe_url_for_log(redirect_url)}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image cache utilities
|
||||
#
|
||||
@@ -270,23 +93,6 @@ def get_image_cache_dir() -> Path:
|
||||
return IMAGE_CACHE_DIR
|
||||
|
||||
|
||||
def _looks_like_image(data: bytes) -> bool:
|
||||
"""Return True if *data* starts with a known image magic-byte sequence."""
|
||||
if len(data) < 4:
|
||||
return False
|
||||
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
||||
return True
|
||||
if data[:3] == b"\xff\xd8\xff":
|
||||
return True
|
||||
if data[:6] in (b"GIF87a", b"GIF89a"):
|
||||
return True
|
||||
if data[:2] == b"BM":
|
||||
return True
|
||||
if data[:4] == b"RIFF" and len(data) >= 12 and data[8:12] == b"WEBP":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
||||
"""
|
||||
Save raw image bytes to the cache and return the absolute file path.
|
||||
@@ -297,17 +103,7 @@ def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached image file as a string.
|
||||
|
||||
Raises:
|
||||
ValueError: If *data* does not look like a valid image (e.g. an HTML
|
||||
error page returned by the upstream server).
|
||||
"""
|
||||
if not _looks_like_image(data):
|
||||
snippet = data[:80].decode("utf-8", errors="replace")
|
||||
raise ValueError(
|
||||
f"Refusing to cache non-image data as {ext} "
|
||||
f"(starts with: {snippet!r})"
|
||||
)
|
||||
cache_dir = get_image_cache_dir()
|
||||
filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
|
||||
filepath = cache_dir / filename
|
||||
@@ -329,25 +125,14 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached image file as a string.
|
||||
|
||||
Raises:
|
||||
ValueError: If the URL targets a private/internal network (SSRF protection).
|
||||
"""
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(url):
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging as _logging
|
||||
_log = _logging.getLogger(__name__)
|
||||
|
||||
last_exc = None
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
) as client:
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
response = await client.get(
|
||||
@@ -369,7 +154,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
||||
"Media cache retry %d/%d for %s (%.1fs): %s",
|
||||
attempt + 1,
|
||||
retries,
|
||||
safe_url_for_log(url),
|
||||
_safe_url_for_log(url),
|
||||
wait,
|
||||
exc,
|
||||
)
|
||||
@@ -448,25 +233,14 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached audio file as a string.
|
||||
|
||||
Raises:
|
||||
ValueError: If the URL targets a private/internal network (SSRF protection).
|
||||
"""
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(url):
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging as _logging
|
||||
_log = _logging.getLogger(__name__)
|
||||
|
||||
last_exc = None
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
) as client:
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
response = await client.get(
|
||||
@@ -488,7 +262,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
||||
"Audio cache retry %d/%d for %s (%.1fs): %s",
|
||||
attempt + 1,
|
||||
retries,
|
||||
safe_url_for_log(url),
|
||||
_safe_url_for_log(url),
|
||||
wait,
|
||||
exc,
|
||||
)
|
||||
@@ -511,7 +285,6 @@ SUPPORTED_DOCUMENT_TYPES = {
|
||||
".pdf": "application/pdf",
|
||||
".md": "text/markdown",
|
||||
".txt": "text/plain",
|
||||
".log": "text/plain",
|
||||
".zip": "application/zip",
|
||||
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
@@ -591,14 +364,6 @@ class MessageType(Enum):
|
||||
COMMAND = "command" # /command style
|
||||
|
||||
|
||||
class ProcessingOutcome(Enum):
|
||||
"""Result classification for message-processing lifecycle hooks."""
|
||||
|
||||
SUCCESS = "success"
|
||||
FAILURE = "failure"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MessageEvent:
|
||||
"""
|
||||
@@ -626,14 +391,9 @@ class MessageEvent:
|
||||
reply_to_message_id: Optional[str] = None
|
||||
reply_to_text: Optional[str] = None # Text of the replied-to message (for context injection)
|
||||
|
||||
# Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
|
||||
# Discord channel_skill_bindings). A single name or ordered list.
|
||||
auto_skill: Optional[str | list[str]] = None
|
||||
# Auto-loaded skill for topic/channel bindings (e.g., Telegram DM Topics)
|
||||
auto_skill: Optional[str] = None
|
||||
|
||||
# Internal flag — set for synthetic events (e.g. background process
|
||||
# completion notifications) that must bypass user authorization checks.
|
||||
internal: bool = False
|
||||
|
||||
# Timestamps
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
@@ -650,9 +410,6 @@ class MessageEvent:
|
||||
raw = parts[0][1:].lower() if parts else None
|
||||
if raw and "@" in raw:
|
||||
raw = raw.split("@", 1)[0]
|
||||
# Reject file paths: valid command names never contain /
|
||||
if raw and "/" in raw:
|
||||
return None
|
||||
return raw
|
||||
|
||||
def get_command_args(self) -> str:
|
||||
@@ -673,32 +430,6 @@ class SendResult:
|
||||
retryable: bool = False # True for transient connection errors — base will retry automatically
|
||||
|
||||
|
||||
def merge_pending_message_event(
|
||||
pending_messages: Dict[str, MessageEvent],
|
||||
session_key: str,
|
||||
event: MessageEvent,
|
||||
) -> None:
|
||||
"""Store or merge a pending event for a session.
|
||||
|
||||
Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
|
||||
events. Merge those into the existing queued event so the next turn sees
|
||||
the whole burst, while non-photo follow-ups still replace the pending
|
||||
event normally.
|
||||
"""
|
||||
existing = pending_messages.get(session_key)
|
||||
if (
|
||||
existing
|
||||
and getattr(existing, "message_type", None) == MessageType.PHOTO
|
||||
and event.message_type == MessageType.PHOTO
|
||||
):
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
if event.text:
|
||||
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
|
||||
return
|
||||
pending_messages[session_key] = event
|
||||
|
||||
|
||||
# Error substrings that indicate a transient *connection* failure worth retrying.
|
||||
# "timeout" / "timed out" / "readtimeout" / "writetimeout" are intentionally
|
||||
# excluded: a read/write timeout on a non-idempotent call (e.g. send_message)
|
||||
@@ -752,13 +483,8 @@ class BasePlatformAdapter(ABC):
|
||||
# Gateway shutdown cancels these so an old gateway instance doesn't keep
|
||||
# working on a task after --replace or manual restarts.
|
||||
self._background_tasks: set[asyncio.Task] = set()
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
self._auto_tts_disabled_chats: set = set()
|
||||
# Chats where typing indicator is paused (e.g. during approval waits).
|
||||
# _keep_typing skips send_typing when the chat_id is in this set.
|
||||
self._typing_paused: set = set()
|
||||
|
||||
@property
|
||||
def has_fatal_error(self) -> bool:
|
||||
@@ -842,20 +568,6 @@ class BasePlatformAdapter(ABC):
|
||||
an optional response string.
|
||||
"""
|
||||
self._message_handler = handler
|
||||
|
||||
def set_busy_session_handler(self, handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]]) -> None:
|
||||
"""Set an optional handler for messages arriving during active sessions."""
|
||||
self._busy_session_handler = handler
|
||||
|
||||
def set_session_store(self, session_store: Any) -> None:
|
||||
"""
|
||||
Set the session store for checking active sessions.
|
||||
|
||||
Used by adapters that need to check if a thread/conversation
|
||||
has an active session before processing messages (e.g., Slack
|
||||
thread replies without explicit mentions).
|
||||
"""
|
||||
self._session_store = session_store
|
||||
|
||||
@abstractmethod
|
||||
async def connect(self) -> bool:
|
||||
@@ -1222,16 +934,10 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
Telegram/Discord typing status expires after ~5 seconds, so we refresh every 2
|
||||
to recover quickly after progress messages interrupt it.
|
||||
|
||||
Skips send_typing when the chat is in ``_typing_paused`` (e.g. while
|
||||
the agent is waiting for dangerous-command approval). This is critical
|
||||
for Slack's Assistant API where ``assistant_threads_setStatus`` disables
|
||||
the compose box — pausing lets the user type ``/approve`` or ``/deny``.
|
||||
"""
|
||||
try:
|
||||
while True:
|
||||
if chat_id not in self._typing_paused:
|
||||
await self.send_typing(chat_id, metadata=metadata)
|
||||
await self.send_typing(chat_id, metadata=metadata)
|
||||
await asyncio.sleep(interval)
|
||||
except asyncio.CancelledError:
|
||||
pass # Normal cancellation when handler completes
|
||||
@@ -1245,20 +951,7 @@ class BasePlatformAdapter(ABC):
|
||||
await self.stop_typing(chat_id)
|
||||
except Exception:
|
||||
pass
|
||||
self._typing_paused.discard(chat_id)
|
||||
|
||||
def pause_typing_for_chat(self, chat_id: str) -> None:
|
||||
"""Pause typing indicator for a chat (e.g. during approval waits).
|
||||
|
||||
Thread-safe (CPython GIL) — can be called from the sync agent thread
|
||||
while ``_keep_typing`` runs on the async event loop.
|
||||
"""
|
||||
self._typing_paused.add(chat_id)
|
||||
|
||||
def resume_typing_for_chat(self, chat_id: str) -> None:
|
||||
"""Resume typing indicator for a chat after approval resolves."""
|
||||
self._typing_paused.discard(chat_id)
|
||||
|
||||
|
||||
# ── Processing lifecycle hooks ──────────────────────────────────────────
|
||||
# Subclasses override these to react to message processing events
|
||||
# (e.g. Discord adds 👀/✅/❌ reactions).
|
||||
@@ -1266,7 +959,7 @@ class BasePlatformAdapter(ABC):
|
||||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""Hook called when background processing begins."""
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
|
||||
async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
|
||||
"""Hook called when background processing completes."""
|
||||
|
||||
async def _run_processing_hook(self, hook_name: str, *args: Any, **kwargs: Any) -> None:
|
||||
@@ -1381,22 +1074,6 @@ class BasePlatformAdapter(ABC):
|
||||
logger.error("[%s] Fallback send also failed: %s", self.name, fallback_result.error)
|
||||
return fallback_result
|
||||
|
||||
@staticmethod
|
||||
def _merge_caption(existing_text: Optional[str], new_text: str) -> str:
|
||||
"""Merge a new caption into existing text, avoiding duplicates.
|
||||
|
||||
Uses line-by-line exact match (not substring) to prevent false positives
|
||||
where a shorter caption is silently dropped because it appears as a
|
||||
substring of a longer one (e.g. "Meeting" inside "Meeting agenda").
|
||||
Whitespace is normalised for comparison.
|
||||
"""
|
||||
if not existing_text:
|
||||
return new_text
|
||||
existing_captions = [c.strip() for c in existing_text.split("\n\n")]
|
||||
if new_text.strip() not in existing_captions:
|
||||
return f"{existing_text}\n\n{new_text}".strip()
|
||||
return existing_text
|
||||
|
||||
async def handle_message(self, event: MessageEvent) -> None:
|
||||
"""
|
||||
Process an incoming message.
|
||||
@@ -1416,20 +1093,16 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
# Check if there's already an active handler for this session
|
||||
if session_key in self._active_sessions:
|
||||
# Certain commands must bypass the active-session guard and be
|
||||
# dispatched directly to the gateway runner. Without this, they
|
||||
# are queued as pending messages and either:
|
||||
# - leak into the conversation as user text (/stop, /new), or
|
||||
# - deadlock (/approve, /deny — agent is blocked on Event.wait)
|
||||
#
|
||||
# Dispatch inline: call the message handler directly and send the
|
||||
# response. Do NOT use _process_message_background — it manages
|
||||
# session lifecycle and its cleanup races with the running task
|
||||
# (see PR #4926).
|
||||
# /approve and /deny must bypass the active-session guard.
|
||||
# The agent thread is blocked on threading.Event.wait() inside
|
||||
# tools/approval.py — queuing these commands creates a deadlock:
|
||||
# the agent waits for approval, approval waits for agent to finish.
|
||||
# Dispatch directly to the message handler without touching session
|
||||
# lifecycle (no competing background task, no session guard removal).
|
||||
cmd = event.get_command()
|
||||
if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background", "restart"):
|
||||
if cmd in ("approve", "deny"):
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
"[%s] Approval command '/%s' bypassing active-session guard for %s",
|
||||
self.name, cmd, session_key,
|
||||
)
|
||||
try:
|
||||
@@ -1443,22 +1116,47 @@ class BasePlatformAdapter(ABC):
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
|
||||
logger.error("[%s] Approval dispatch failed: %s", self.name, e, exc_info=True)
|
||||
return
|
||||
|
||||
if self._busy_session_handler is not None:
|
||||
# /status must also bypass the active-session guard so it always
|
||||
# returns a system-generated response instead of being queued as
|
||||
# user text and passed to the agent (#5046).
|
||||
if cmd == "status":
|
||||
logger.debug(
|
||||
"[%s] Status command bypassing active-session guard for %s",
|
||||
self.name, session_key,
|
||||
)
|
||||
try:
|
||||
if await self._busy_session_handler(event, session_key):
|
||||
return
|
||||
_thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
response = await self._message_handler(event)
|
||||
if response:
|
||||
await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=response,
|
||||
reply_to=event.message_id,
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("[%s] Busy-session handler failed: %s", self.name, e, exc_info=True)
|
||||
logger.error("[%s] Status dispatch failed: %s", self.name, e, exc_info=True)
|
||||
return
|
||||
|
||||
# Special case: photo bursts/albums frequently arrive as multiple near-
|
||||
# simultaneous messages. Queue them without interrupting the active run,
|
||||
# then process them immediately after the current task finishes.
|
||||
if event.message_type == MessageType.PHOTO:
|
||||
logger.debug("[%s] Queuing photo follow-up for session %s without interrupt", self.name, session_key)
|
||||
merge_pending_message_event(self._pending_messages, session_key, event)
|
||||
existing = self._pending_messages.get(session_key)
|
||||
if existing and existing.message_type == MessageType.PHOTO:
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
if event.text:
|
||||
if not existing.text:
|
||||
existing.text = event.text
|
||||
elif event.text not in existing.text:
|
||||
existing.text = f"{existing.text}\n\n{event.text}".strip()
|
||||
else:
|
||||
self._pending_messages[session_key] = event
|
||||
return # Don't interrupt now - will run after current task completes
|
||||
|
||||
# Default behavior for non-photo follow-ups: interrupt the running agent
|
||||
@@ -1485,7 +1183,6 @@ class BasePlatformAdapter(ABC):
|
||||
return
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
@@ -1622,7 +1319,7 @@ class BasePlatformAdapter(ABC):
|
||||
logger.info(
|
||||
"[%s] Sending image: %s (alt=%s)",
|
||||
self.name,
|
||||
safe_url_for_log(image_url),
|
||||
_safe_url_for_log(image_url),
|
||||
alt_text[:30] if alt_text else "",
|
||||
)
|
||||
# Route animated GIFs through send_animation for proper playback
|
||||
@@ -1714,11 +1411,7 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
# Determine overall success for the processing hook
|
||||
processing_ok = delivery_succeeded if delivery_attempted else not bool(response)
|
||||
await self._run_processing_hook(
|
||||
"on_processing_complete",
|
||||
event,
|
||||
ProcessingOutcome.SUCCESS if processing_ok else ProcessingOutcome.FAILURE,
|
||||
)
|
||||
await self._run_processing_hook("on_processing_complete", event, processing_ok)
|
||||
|
||||
# Check if there's a pending message that was queued during our processing
|
||||
if session_key in self._pending_messages:
|
||||
@@ -1737,14 +1430,10 @@ class BasePlatformAdapter(ABC):
|
||||
return # Already cleaned up
|
||||
|
||||
except asyncio.CancelledError:
|
||||
current_task = asyncio.current_task()
|
||||
outcome = ProcessingOutcome.CANCELLED
|
||||
if current_task is None or current_task not in self._expected_cancelled_tasks:
|
||||
outcome = ProcessingOutcome.FAILURE
|
||||
await self._run_processing_hook("on_processing_complete", event, outcome)
|
||||
await self._run_processing_hook("on_processing_complete", event, False)
|
||||
raise
|
||||
except Exception as e:
|
||||
await self._run_processing_hook("on_processing_complete", event, ProcessingOutcome.FAILURE)
|
||||
await self._run_processing_hook("on_processing_complete", event, False)
|
||||
logger.error("[%s] Error handling message: %s", self.name, e, exc_info=True)
|
||||
# Send the error to the user so they aren't left with radio silence
|
||||
try:
|
||||
@@ -1788,12 +1477,10 @@ class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
tasks = [task for task in self._background_tasks if not task.done()]
|
||||
for task in tasks:
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
if tasks:
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
self._background_tasks.clear()
|
||||
self._expected_cancelled_tasks.clear()
|
||||
self._pending_messages.clear()
|
||||
self._active_sessions.clear()
|
||||
|
||||
|
||||
@@ -1,936 +0,0 @@
|
||||
"""BlueBubbles iMessage platform adapter.
|
||||
|
||||
Uses the local BlueBubbles macOS server for outbound REST sends and inbound
|
||||
webhooks. Supports text messaging, media attachments (images, voice, video,
|
||||
documents), tapback reactions, typing indicators, and read receipts.
|
||||
|
||||
Architecture based on PR #5869 (benjaminsehl) with inbound attachment
|
||||
downloading from PR #4588 (YuhangLin).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
import httpx
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
cache_document_from_bytes,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_WEBHOOK_HOST = "127.0.0.1"
|
||||
DEFAULT_WEBHOOK_PORT = 8645
|
||||
DEFAULT_WEBHOOK_PATH = "/bluebubbles-webhook"
|
||||
MAX_TEXT_LENGTH = 4000
|
||||
|
||||
# Tapback reaction codes (BlueBubbles associatedMessageType values)
|
||||
_TAPBACK_ADDED = {
|
||||
2000: "love", 2001: "like", 2002: "dislike",
|
||||
2003: "laugh", 2004: "emphasize", 2005: "question",
|
||||
}
|
||||
_TAPBACK_REMOVED = {
|
||||
3000: "love", 3001: "like", 3002: "dislike",
|
||||
3003: "laugh", 3004: "emphasize", 3005: "question",
|
||||
}
|
||||
|
||||
# Webhook event types that carry user messages
|
||||
_MESSAGE_EVENTS = {"new-message", "message", "updated-message"}
|
||||
|
||||
# Log redaction patterns
|
||||
_PHONE_RE = re.compile(r"\+?\d{7,15}")
|
||||
_EMAIL_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.]+")
|
||||
|
||||
|
||||
def _redact(text: str) -> str:
|
||||
"""Redact phone numbers and emails from log output."""
|
||||
text = _PHONE_RE.sub("[REDACTED]", text)
|
||||
text = _EMAIL_RE.sub("[REDACTED]", text)
|
||||
return text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_bluebubbles_requirements() -> bool:
|
||||
try:
|
||||
import aiohttp # noqa: F401
|
||||
import httpx as _httpx # noqa: F401
|
||||
except ImportError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _normalize_server_url(raw: str) -> str:
|
||||
value = (raw or "").strip()
|
||||
if not value:
|
||||
return ""
|
||||
if not re.match(r"^https?://", value, flags=re.I):
|
||||
value = f"http://{value}"
|
||||
return value.rstrip("/")
|
||||
|
||||
|
||||
def _strip_markdown(text: str) -> str:
|
||||
"""Strip common markdown formatting for iMessage plain-text delivery."""
|
||||
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text, flags=re.DOTALL)
|
||||
text = re.sub(r"\*(.+?)\*", r"\1", text, flags=re.DOTALL)
|
||||
text = re.sub(r"__(.+?)__", r"\1", text, flags=re.DOTALL)
|
||||
text = re.sub(r"_(.+?)_", r"\1", text, flags=re.DOTALL)
|
||||
text = re.sub(r"```[a-zA-Z0-9_+-]*\n?", "", text)
|
||||
text = re.sub(r"`(.+?)`", r"\1", text)
|
||||
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
|
||||
text = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"\1", text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
platform = Platform.BLUEBUBBLES
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.BLUEBUBBLES)
|
||||
extra = config.extra or {}
|
||||
self.server_url = _normalize_server_url(
|
||||
extra.get("server_url") or os.getenv("BLUEBUBBLES_SERVER_URL", "")
|
||||
)
|
||||
self.password = extra.get("password") or os.getenv("BLUEBUBBLES_PASSWORD", "")
|
||||
self.webhook_host = (
|
||||
extra.get("webhook_host")
|
||||
or os.getenv("BLUEBUBBLES_WEBHOOK_HOST", DEFAULT_WEBHOOK_HOST)
|
||||
)
|
||||
self.webhook_port = int(
|
||||
extra.get("webhook_port")
|
||||
or os.getenv("BLUEBUBBLES_WEBHOOK_PORT", str(DEFAULT_WEBHOOK_PORT))
|
||||
)
|
||||
self.webhook_path = (
|
||||
extra.get("webhook_path")
|
||||
or os.getenv("BLUEBUBBLES_WEBHOOK_PATH", DEFAULT_WEBHOOK_PATH)
|
||||
)
|
||||
if not str(self.webhook_path).startswith("/"):
|
||||
self.webhook_path = f"/{self.webhook_path}"
|
||||
self.send_read_receipts = bool(extra.get("send_read_receipts", True))
|
||||
self.client: Optional[httpx.AsyncClient] = None
|
||||
self._runner = None
|
||||
self._private_api_enabled: Optional[bool] = None
|
||||
self._helper_connected: bool = False
|
||||
self._guid_cache: Dict[str, str] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _api_url(self, path: str) -> str:
|
||||
sep = "&" if "?" in path else "?"
|
||||
return f"{self.server_url}{path}{sep}password={quote(self.password, safe='')}"
|
||||
|
||||
async def _api_get(self, path: str) -> Dict[str, Any]:
|
||||
assert self.client is not None
|
||||
res = await self.client.get(self._api_url(path))
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
async def _api_post(self, path: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
assert self.client is not None
|
||||
res = await self.client.post(self._api_url(path), json=payload)
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def connect(self) -> bool:
|
||||
if not self.server_url or not self.password:
|
||||
logger.error(
|
||||
"[bluebubbles] BLUEBUBBLES_SERVER_URL and BLUEBUBBLES_PASSWORD are required"
|
||||
)
|
||||
return False
|
||||
from aiohttp import web
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=30.0)
|
||||
try:
|
||||
await self._api_get("/api/v1/ping")
|
||||
info = await self._api_get("/api/v1/server/info")
|
||||
server_data = (info or {}).get("data", {})
|
||||
self._private_api_enabled = bool(server_data.get("private_api"))
|
||||
self._helper_connected = bool(server_data.get("helper_connected"))
|
||||
logger.info(
|
||||
"[bluebubbles] connected to %s (private_api=%s, helper=%s)",
|
||||
self.server_url,
|
||||
self._private_api_enabled,
|
||||
self._helper_connected,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"[bluebubbles] cannot reach server at %s: %s", self.server_url, exc
|
||||
)
|
||||
if self.client:
|
||||
await self.client.aclose()
|
||||
self.client = None
|
||||
return False
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_get("/health", lambda _: web.Response(text="ok"))
|
||||
app.router.add_post(self.webhook_path, self._handle_webhook)
|
||||
self._runner = web.AppRunner(app)
|
||||
await self._runner.setup()
|
||||
site = web.TCPSite(self._runner, self.webhook_host, self.webhook_port)
|
||||
await site.start()
|
||||
self._mark_connected()
|
||||
logger.info(
|
||||
"[bluebubbles] webhook listening on http://%s:%s%s",
|
||||
self.webhook_host,
|
||||
self.webhook_port,
|
||||
self.webhook_path,
|
||||
)
|
||||
|
||||
# Register webhook with BlueBubbles server
|
||||
# This is required for the server to know where to send events
|
||||
await self._register_webhook()
|
||||
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
# Unregister webhook before cleaning up
|
||||
await self._unregister_webhook()
|
||||
|
||||
if self.client:
|
||||
await self.client.aclose()
|
||||
self.client = None
|
||||
if self._runner:
|
||||
await self._runner.cleanup()
|
||||
self._runner = None
|
||||
self._mark_disconnected()
|
||||
|
||||
@property
|
||||
def _webhook_url(self) -> str:
|
||||
"""Compute the external webhook URL for BlueBubbles registration."""
|
||||
host = self.webhook_host
|
||||
if host in ("0.0.0.0", "127.0.0.1", "localhost", "::"):
|
||||
host = "localhost"
|
||||
return f"http://{host}:{self.webhook_port}{self.webhook_path}"
|
||||
|
||||
async def _find_registered_webhooks(self, url: str) -> list:
|
||||
"""Return list of BB webhook entries matching *url*."""
|
||||
try:
|
||||
res = await self._api_get("/api/v1/webhook")
|
||||
data = res.get("data")
|
||||
if isinstance(data, list):
|
||||
return [wh for wh in data if wh.get("url") == url]
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
async def _register_webhook(self) -> bool:
|
||||
"""Register this webhook URL with the BlueBubbles server.
|
||||
|
||||
BlueBubbles requires webhooks to be registered via API before
|
||||
it will send events. Checks for an existing registration first
|
||||
to avoid duplicates (e.g. after a crash without clean shutdown).
|
||||
"""
|
||||
if not self.client:
|
||||
return False
|
||||
|
||||
webhook_url = self._webhook_url
|
||||
|
||||
# Crash resilience — reuse an existing registration if present
|
||||
existing = await self._find_registered_webhooks(webhook_url)
|
||||
if existing:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook already registered: %s", webhook_url
|
||||
)
|
||||
return True
|
||||
|
||||
payload = {
|
||||
"url": webhook_url,
|
||||
"events": ["new-message", "updated-message", "message"],
|
||||
}
|
||||
|
||||
try:
|
||||
res = await self._api_post("/api/v1/webhook", payload)
|
||||
status = res.get("status", 0)
|
||||
if 200 <= status < 300:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook registered with server: %s",
|
||||
webhook_url,
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.warning(
|
||||
"[bluebubbles] webhook registration returned status %s: %s",
|
||||
status,
|
||||
res.get("message"),
|
||||
)
|
||||
return False
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[bluebubbles] failed to register webhook with server: %s",
|
||||
exc,
|
||||
)
|
||||
return False
|
||||
|
||||
async def _unregister_webhook(self) -> bool:
|
||||
"""Unregister this webhook URL from the BlueBubbles server.
|
||||
|
||||
Removes *all* matching registrations to clean up any duplicates
|
||||
left by prior crashes.
|
||||
"""
|
||||
if not self.client:
|
||||
return False
|
||||
|
||||
webhook_url = self._webhook_url
|
||||
removed = False
|
||||
|
||||
try:
|
||||
for wh in await self._find_registered_webhooks(webhook_url):
|
||||
wh_id = wh.get("id")
|
||||
if wh_id:
|
||||
res = await self.client.delete(
|
||||
self._api_url(f"/api/v1/webhook/{wh_id}")
|
||||
)
|
||||
res.raise_for_status()
|
||||
removed = True
|
||||
if removed:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook unregistered: %s", webhook_url
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"[bluebubbles] failed to unregister webhook (non-critical): %s",
|
||||
exc,
|
||||
)
|
||||
return removed
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat GUID resolution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _resolve_chat_guid(self, target: str) -> Optional[str]:
|
||||
"""Resolve an email/phone to a BlueBubbles chat GUID.
|
||||
|
||||
If *target* already contains a semicolon (raw GUID format like
|
||||
``iMessage;-;user@example.com``), it is returned as-is. Otherwise
|
||||
the adapter queries the BlueBubbles chat list and matches on
|
||||
``chatIdentifier`` or participant address.
|
||||
"""
|
||||
target = (target or "").strip()
|
||||
if not target:
|
||||
return None
|
||||
# Already a raw GUID
|
||||
if ";" in target:
|
||||
return target
|
||||
if target in self._guid_cache:
|
||||
return self._guid_cache[target]
|
||||
try:
|
||||
payload = await self._api_post(
|
||||
"/api/v1/chat/query",
|
||||
{"limit": 100, "offset": 0, "with": ["participants"]},
|
||||
)
|
||||
for chat in payload.get("data", []) or []:
|
||||
guid = chat.get("guid") or chat.get("chatGuid")
|
||||
identifier = chat.get("chatIdentifier") or chat.get("identifier")
|
||||
if identifier == target:
|
||||
if guid:
|
||||
self._guid_cache[target] = guid
|
||||
return guid
|
||||
for part in chat.get("participants", []) or []:
|
||||
if (part.get("address") or "").strip() == target and guid:
|
||||
self._guid_cache[target] = guid
|
||||
return guid
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
async def _create_chat_for_handle(
|
||||
self, address: str, message: str
|
||||
) -> SendResult:
|
||||
"""Create a new chat by sending the first message to *address*."""
|
||||
payload = {
|
||||
"addresses": [address],
|
||||
"message": message,
|
||||
"tempGuid": f"temp-{datetime.utcnow().timestamp()}",
|
||||
}
|
||||
try:
|
||||
res = await self._api_post("/api/v1/chat/new", payload)
|
||||
data = res.get("data") or {}
|
||||
msg_id = data.get("guid") or data.get("messageGuid") or "ok"
|
||||
return SendResult(success=True, message_id=str(msg_id), raw_response=res)
|
||||
except Exception as exc:
|
||||
return SendResult(success=False, error=str(exc))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
text = _strip_markdown(content or "")
|
||||
if not text:
|
||||
return SendResult(success=False, error="BlueBubbles send requires text")
|
||||
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
|
||||
last = SendResult(success=True)
|
||||
for chunk in chunks:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if not guid:
|
||||
# If the target looks like an address, try creating a new chat
|
||||
if self._private_api_enabled and (
|
||||
"@" in chat_id or re.match(r"^\+\d+", chat_id)
|
||||
):
|
||||
return await self._create_chat_for_handle(chat_id, chunk)
|
||||
return SendResult(
|
||||
success=False,
|
||||
error=f"BlueBubbles chat not found for target: {chat_id}",
|
||||
)
|
||||
payload: Dict[str, Any] = {
|
||||
"chatGuid": guid,
|
||||
"tempGuid": f"temp-{datetime.utcnow().timestamp()}",
|
||||
"message": chunk,
|
||||
}
|
||||
if reply_to and self._private_api_enabled and self._helper_connected:
|
||||
payload["method"] = "private-api"
|
||||
payload["selectedMessageGuid"] = reply_to
|
||||
payload["partIndex"] = 0
|
||||
try:
|
||||
res = await self._api_post("/api/v1/message/text", payload)
|
||||
data = res.get("data") or {}
|
||||
msg_id = data.get("guid") or data.get("messageGuid") or "ok"
|
||||
last = SendResult(
|
||||
success=True, message_id=str(msg_id), raw_response=res
|
||||
)
|
||||
except Exception as exc:
|
||||
return SendResult(success=False, error=str(exc))
|
||||
return last
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Media sending (outbound)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _send_attachment(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
filename: Optional[str] = None,
|
||||
caption: Optional[str] = None,
|
||||
is_audio_message: bool = False,
|
||||
) -> SendResult:
|
||||
"""Send a file attachment via BlueBubbles multipart upload."""
|
||||
if not self.client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
if not os.path.isfile(file_path):
|
||||
return SendResult(success=False, error=f"File not found: {file_path}")
|
||||
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if not guid:
|
||||
return SendResult(success=False, error=f"Chat not found: {chat_id}")
|
||||
|
||||
fname = filename or os.path.basename(file_path)
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
files = {"attachment": (fname, f, "application/octet-stream")}
|
||||
data: Dict[str, str] = {
|
||||
"chatGuid": guid,
|
||||
"name": fname,
|
||||
"tempGuid": uuid.uuid4().hex,
|
||||
}
|
||||
if is_audio_message:
|
||||
data["isAudioMessage"] = "true"
|
||||
res = await self.client.post(
|
||||
self._api_url("/api/v1/message/attachment"),
|
||||
files=files,
|
||||
data=data,
|
||||
timeout=120,
|
||||
)
|
||||
res.raise_for_status()
|
||||
result = res.json()
|
||||
|
||||
if caption:
|
||||
await self.send(chat_id, caption)
|
||||
|
||||
if result.get("status") == 200:
|
||||
rdata = result.get("data") or {}
|
||||
msg_id = rdata.get("guid") if isinstance(rdata, dict) else None
|
||||
return SendResult(
|
||||
success=True, message_id=msg_id, raw_response=result
|
||||
)
|
||||
return SendResult(
|
||||
success=False,
|
||||
error=result.get("message", "Attachment upload failed"),
|
||||
)
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
try:
|
||||
from gateway.platforms.base import cache_image_from_url
|
||||
|
||||
local_path = await cache_image_from_url(image_url)
|
||||
return await self._send_attachment(chat_id, local_path, caption=caption)
|
||||
except Exception:
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
return await self._send_attachment(chat_id, image_path, caption=caption)
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
audio_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
return await self._send_attachment(
|
||||
chat_id, audio_path, caption=caption, is_audio_message=True
|
||||
)
|
||||
|
||||
async def send_video(
|
||||
self,
|
||||
chat_id: str,
|
||||
video_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
return await self._send_attachment(chat_id, video_path, caption=caption)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
return await self._send_attachment(
|
||||
chat_id, file_path, filename=file_name, caption=caption
|
||||
)
|
||||
|
||||
async def send_animation(
|
||||
self,
|
||||
chat_id: str,
|
||||
animation_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
return await self.send_image(
|
||||
chat_id, animation_url, caption, reply_to, metadata
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Typing indicators
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send_typing(self, chat_id: str, metadata=None) -> None:
|
||||
if not self._private_api_enabled or not self._helper_connected or not self.client:
|
||||
return
|
||||
try:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if guid:
|
||||
encoded = quote(guid, safe="")
|
||||
await self.client.post(
|
||||
self._api_url(f"/api/v1/chat/{encoded}/typing"), timeout=5
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def stop_typing(self, chat_id: str) -> None:
|
||||
if not self._private_api_enabled or not self._helper_connected or not self.client:
|
||||
return
|
||||
try:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if guid:
|
||||
encoded = quote(guid, safe="")
|
||||
await self.client.delete(
|
||||
self._api_url(f"/api/v1/chat/{encoded}/typing"), timeout=5
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Read receipts
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def mark_read(self, chat_id: str) -> bool:
|
||||
if not self._private_api_enabled or not self._helper_connected or not self.client:
|
||||
return False
|
||||
try:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if guid:
|
||||
encoded = quote(guid, safe="")
|
||||
await self.client.post(
|
||||
self._api_url(f"/api/v1/chat/{encoded}/read"), timeout=5
|
||||
)
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tapback reactions
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send_reaction(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_guid: str,
|
||||
reaction: str,
|
||||
part_index: int = 0,
|
||||
) -> SendResult:
|
||||
"""Send a tapback reaction (requires Private API helper)."""
|
||||
if not self._private_api_enabled or not self._helper_connected:
|
||||
return SendResult(
|
||||
success=False, error="Private API helper not connected"
|
||||
)
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if not guid:
|
||||
return SendResult(success=False, error=f"Chat not found: {chat_id}")
|
||||
try:
|
||||
res = await self._api_post(
|
||||
"/api/v1/message/react",
|
||||
{
|
||||
"chatGuid": guid,
|
||||
"selectedMessageGuid": message_guid,
|
||||
"reaction": reaction,
|
||||
"partIndex": part_index,
|
||||
},
|
||||
)
|
||||
return SendResult(success=True, raw_response=res)
|
||||
except Exception as exc:
|
||||
return SendResult(success=False, error=str(exc))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat info
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
is_group = ";+;" in (chat_id or "")
|
||||
info: Dict[str, Any] = {
|
||||
"name": chat_id,
|
||||
"type": "group" if is_group else "dm",
|
||||
}
|
||||
try:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
if guid:
|
||||
encoded = quote(guid, safe="")
|
||||
res = await self._api_get(
|
||||
f"/api/v1/chat/{encoded}?with=participants"
|
||||
)
|
||||
data = (res or {}).get("data", {})
|
||||
display_name = (
|
||||
data.get("displayName")
|
||||
or data.get("chatIdentifier")
|
||||
or chat_id
|
||||
)
|
||||
participants = []
|
||||
for p in data.get("participants", []) or []:
|
||||
addr = (p.get("address") or "").strip()
|
||||
if addr:
|
||||
participants.append(addr)
|
||||
info["name"] = display_name
|
||||
if participants:
|
||||
info["participants"] = participants
|
||||
except Exception:
|
||||
pass
|
||||
return info
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
return _strip_markdown(content)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Inbound attachment downloading (from #4588)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _download_attachment(
|
||||
self, att_guid: str, att_meta: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
"""Download an attachment from BlueBubbles and cache it locally.
|
||||
|
||||
Returns the local file path on success, None on failure.
|
||||
"""
|
||||
if not self.client:
|
||||
return None
|
||||
try:
|
||||
encoded = quote(att_guid, safe="")
|
||||
resp = await self.client.get(
|
||||
self._api_url(f"/api/v1/attachment/{encoded}/download"),
|
||||
timeout=60,
|
||||
follow_redirects=True,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.content
|
||||
|
||||
mime = (att_meta.get("mimeType") or "").lower()
|
||||
transfer_name = att_meta.get("transferName", "")
|
||||
|
||||
if mime.startswith("image/"):
|
||||
ext_map = {
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/gif": ".gif",
|
||||
"image/webp": ".webp",
|
||||
"image/heic": ".jpg",
|
||||
"image/heif": ".jpg",
|
||||
"image/tiff": ".jpg",
|
||||
}
|
||||
ext = ext_map.get(mime, ".jpg")
|
||||
return cache_image_from_bytes(data, ext)
|
||||
|
||||
if mime.startswith("audio/"):
|
||||
ext_map = {
|
||||
"audio/mp3": ".mp3",
|
||||
"audio/mpeg": ".mp3",
|
||||
"audio/ogg": ".ogg",
|
||||
"audio/wav": ".wav",
|
||||
"audio/x-caf": ".mp3",
|
||||
"audio/mp4": ".m4a",
|
||||
"audio/aac": ".m4a",
|
||||
}
|
||||
ext = ext_map.get(mime, ".mp3")
|
||||
return cache_audio_from_bytes(data, ext)
|
||||
|
||||
# Videos, documents, and everything else
|
||||
filename = transfer_name or f"file_{uuid.uuid4().hex[:8]}"
|
||||
return cache_document_from_bytes(data, filename)
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[bluebubbles] failed to download attachment %s: %s",
|
||||
_redact(att_guid),
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Webhook handling
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_payload_record(
|
||||
self, payload: Dict[str, Any]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
data = payload.get("data")
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
return item
|
||||
if isinstance(payload.get("message"), dict):
|
||||
return payload.get("message")
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
@staticmethod
|
||||
def _value(*candidates: Any) -> Optional[str]:
|
||||
for candidate in candidates:
|
||||
if isinstance(candidate, str) and candidate.strip():
|
||||
return candidate.strip()
|
||||
return None
|
||||
|
||||
async def _handle_webhook(self, request):
|
||||
from aiohttp import web
|
||||
|
||||
token = (
|
||||
request.query.get("password")
|
||||
or request.query.get("guid")
|
||||
or request.headers.get("x-password")
|
||||
or request.headers.get("x-guid")
|
||||
or request.headers.get("x-bluebubbles-guid")
|
||||
)
|
||||
if token != self.password:
|
||||
return web.json_response({"error": "unauthorized"}, status=401)
|
||||
try:
|
||||
raw = await request.read()
|
||||
body = raw.decode("utf-8", errors="replace")
|
||||
try:
|
||||
payload = json.loads(body)
|
||||
except Exception:
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
form = parse_qs(body)
|
||||
payload_str = (
|
||||
form.get("payload")
|
||||
or form.get("data")
|
||||
or form.get("message")
|
||||
or [""]
|
||||
)[0]
|
||||
payload = json.loads(payload_str) if payload_str else {}
|
||||
except Exception as exc:
|
||||
logger.error("[bluebubbles] webhook parse error: %s", exc)
|
||||
return web.json_response({"error": "invalid payload"}, status=400)
|
||||
|
||||
event_type = self._value(payload.get("type"), payload.get("event")) or ""
|
||||
# Only process message events; silently acknowledge everything else
|
||||
if event_type and event_type not in _MESSAGE_EVENTS:
|
||||
return web.Response(text="ok")
|
||||
|
||||
record = self._extract_payload_record(payload) or {}
|
||||
is_from_me = bool(
|
||||
record.get("isFromMe")
|
||||
or record.get("fromMe")
|
||||
or record.get("is_from_me")
|
||||
)
|
||||
if is_from_me:
|
||||
return web.Response(text="ok")
|
||||
|
||||
# Skip tapback reactions delivered as messages
|
||||
assoc_type = record.get("associatedMessageType")
|
||||
if isinstance(assoc_type, int) and assoc_type in {
|
||||
**_TAPBACK_ADDED,
|
||||
**_TAPBACK_REMOVED,
|
||||
}:
|
||||
return web.Response(text="ok")
|
||||
|
||||
text = (
|
||||
self._value(
|
||||
record.get("text"), record.get("message"), record.get("body")
|
||||
)
|
||||
or ""
|
||||
)
|
||||
|
||||
# --- Inbound attachment handling ---
|
||||
attachments = record.get("attachments") or []
|
||||
media_urls: List[str] = []
|
||||
media_types: List[str] = []
|
||||
msg_type = MessageType.TEXT
|
||||
|
||||
for att in attachments:
|
||||
att_guid = att.get("guid", "")
|
||||
if not att_guid:
|
||||
continue
|
||||
cached = await self._download_attachment(att_guid, att)
|
||||
if cached:
|
||||
mime = (att.get("mimeType") or "").lower()
|
||||
media_urls.append(cached)
|
||||
media_types.append(mime)
|
||||
if mime.startswith("image/"):
|
||||
msg_type = MessageType.PHOTO
|
||||
elif mime.startswith("audio/") or (att.get("uti") or "").endswith(
|
||||
"caf"
|
||||
):
|
||||
msg_type = MessageType.VOICE
|
||||
elif mime.startswith("video/"):
|
||||
msg_type = MessageType.VIDEO
|
||||
else:
|
||||
msg_type = MessageType.DOCUMENT
|
||||
|
||||
# With multiple attachments, prefer PHOTO if any images present
|
||||
if len(media_urls) > 1:
|
||||
mime_prefixes = {(m or "").split("/")[0] for m in media_types}
|
||||
if "image" in mime_prefixes:
|
||||
msg_type = MessageType.PHOTO
|
||||
|
||||
if not text and media_urls:
|
||||
text = "(attachment)"
|
||||
# --- End attachment handling ---
|
||||
|
||||
chat_guid = self._value(
|
||||
record.get("chatGuid"),
|
||||
payload.get("chatGuid"),
|
||||
record.get("chat_guid"),
|
||||
payload.get("chat_guid"),
|
||||
payload.get("guid"),
|
||||
)
|
||||
chat_identifier = self._value(
|
||||
record.get("chatIdentifier"),
|
||||
record.get("identifier"),
|
||||
payload.get("chatIdentifier"),
|
||||
payload.get("identifier"),
|
||||
)
|
||||
sender = (
|
||||
self._value(
|
||||
record.get("handle", {}).get("address")
|
||||
if isinstance(record.get("handle"), dict)
|
||||
else None,
|
||||
record.get("sender"),
|
||||
record.get("from"),
|
||||
record.get("address"),
|
||||
)
|
||||
or chat_identifier
|
||||
or chat_guid
|
||||
)
|
||||
if not (chat_guid or chat_identifier) and sender:
|
||||
chat_identifier = sender
|
||||
if not sender or not (chat_guid or chat_identifier) or not text:
|
||||
return web.json_response({"error": "missing message fields"}, status=400)
|
||||
|
||||
session_chat_id = chat_guid or chat_identifier
|
||||
is_group = bool(record.get("isGroup")) or (";+;" in (chat_guid or ""))
|
||||
source = self.build_source(
|
||||
chat_id=session_chat_id,
|
||||
chat_name=chat_identifier or sender,
|
||||
chat_type="group" if is_group else "dm",
|
||||
user_id=sender,
|
||||
user_name=sender,
|
||||
chat_id_alt=chat_identifier,
|
||||
)
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=payload,
|
||||
message_id=self._value(
|
||||
record.get("guid"),
|
||||
record.get("messageGuid"),
|
||||
record.get("id"),
|
||||
),
|
||||
reply_to_message_id=self._value(
|
||||
record.get("threadOriginatorGuid"),
|
||||
record.get("associatedMessageGuid"),
|
||||
),
|
||||
media_urls=media_urls,
|
||||
media_types=media_types,
|
||||
)
|
||||
task = asyncio.create_task(self.handle_message(event))
|
||||
self._background_tasks.add(task)
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
|
||||
# Fire-and-forget read receipt
|
||||
if self.send_read_receipts and session_chat_id:
|
||||
asyncio.create_task(self.mark_read(session_chat_id))
|
||||
|
||||
return web.Response(text="ok")
|
||||
|
||||
@@ -20,7 +20,6 @@ Configuration in config.yaml:
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
@@ -55,8 +54,6 @@ MAX_MESSAGE_LENGTH = 20000
|
||||
DEDUP_WINDOW_SECONDS = 300
|
||||
DEDUP_MAX_SIZE = 1000
|
||||
RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
|
||||
_SESSION_WEBHOOKS_MAX = 500
|
||||
_DINGTALK_WEBHOOK_RE = re.compile(r'^https://api\.dingtalk\.com/')
|
||||
|
||||
|
||||
def check_dingtalk_requirements() -> bool:
|
||||
@@ -198,15 +195,9 @@ class DingTalkAdapter(BasePlatformAdapter):
|
||||
chat_id = conversation_id or sender_id
|
||||
chat_type = "group" if is_group else "dm"
|
||||
|
||||
# Store session webhook for reply routing (validate origin to prevent SSRF)
|
||||
# Store session webhook for reply routing
|
||||
session_webhook = getattr(message, "session_webhook", None) or ""
|
||||
if session_webhook and chat_id and _DINGTALK_WEBHOOK_RE.match(session_webhook):
|
||||
if len(self._session_webhooks) >= _SESSION_WEBHOOKS_MAX:
|
||||
# Evict oldest entry to cap memory growth
|
||||
try:
|
||||
self._session_webhooks.pop(next(iter(self._session_webhooks)))
|
||||
except StopIteration:
|
||||
pass
|
||||
if session_webhook and chat_id:
|
||||
self._session_webhooks[chat_id] = session_webhook
|
||||
|
||||
source = self.build_source(
|
||||
|
||||
+43
-529
@@ -49,14 +49,12 @@ from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
cache_document_from_bytes,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
)
|
||||
from tools.url_safety import is_safe_url
|
||||
|
||||
|
||||
def _clean_discord_id(entry: str) -> str:
|
||||
@@ -423,7 +421,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
# Discord message limits
|
||||
MAX_MESSAGE_LENGTH = 2000
|
||||
_SPLIT_THRESHOLD = 1900 # near the 2000-char split point
|
||||
|
||||
# Auto-disconnect from voice channel after this many seconds of inactivity
|
||||
VOICE_TIMEOUT = 300
|
||||
@@ -435,11 +432,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
self._allowed_user_ids: set = set() # For button approval authorization
|
||||
# Voice channel state (per-guild)
|
||||
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
|
||||
# Text batching: merge rapid successive messages (Telegram-style)
|
||||
self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
|
||||
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
self._voice_text_channels: Dict[int, int] = {} # guild_id -> text_channel_id
|
||||
self._voice_timeout_tasks: Dict[int, asyncio.Task] = {} # guild_id -> timeout task
|
||||
# Phase 2: voice listening
|
||||
@@ -462,9 +454,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
self._seen_messages: Dict[str, float] = {}
|
||||
self._SEEN_TTL = 300 # 5 minutes
|
||||
self._SEEN_MAX = 2000 # prune threshold
|
||||
# Reply threading mode: "off" (no replies), "first" (reply on first
|
||||
# chunk only, default), "all" (reply-reference on every chunk).
|
||||
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
@@ -536,17 +525,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
intents.members = any(not entry.isdigit() for entry in self._allowed_user_ids)
|
||||
intents.voice_states = True
|
||||
|
||||
# Resolve proxy (DISCORD_PROXY > generic env vars > macOS system proxy)
|
||||
from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_bot
|
||||
proxy_url = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
|
||||
if proxy_url:
|
||||
logger.info("[%s] Using proxy for Discord: %s", self.name, proxy_url)
|
||||
|
||||
# Create bot — proxy= for HTTP, connector= for SOCKS
|
||||
# Create bot
|
||||
self._client = commands.Bot(
|
||||
command_prefix="!", # Not really used, we handle raw messages
|
||||
intents=intents,
|
||||
**proxy_kwargs_for_bot(proxy_url),
|
||||
)
|
||||
adapter_self = self # capture for closure
|
||||
|
||||
@@ -606,35 +588,22 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not self._client.user or self._client.user not in message.mentions:
|
||||
return
|
||||
# "all" falls through to handle_message
|
||||
|
||||
# Multi-agent filtering: if the message mentions specific bots
|
||||
# but NOT this bot, the sender is talking to another agent —
|
||||
# stay silent. Messages with no bot mentions (general chat)
|
||||
# still fall through to _handle_message for the existing
|
||||
# DISCORD_REQUIRE_MENTION check.
|
||||
#
|
||||
# This replaces the older DISCORD_IGNORE_NO_MENTION logic
|
||||
# with bot-aware filtering that works correctly when multiple
|
||||
# agents share a channel.
|
||||
if not isinstance(message.channel, discord.DMChannel) and message.mentions:
|
||||
_self_mentioned = (
|
||||
|
||||
# If the message @mentions other users but NOT the bot, the
|
||||
# sender is talking to someone else — stay silent. Only
|
||||
# applies in server channels; in DMs the user is always
|
||||
# talking to the bot (mentions are just references).
|
||||
# Controlled by DISCORD_IGNORE_NO_MENTION (default: true).
|
||||
_ignore_no_mention = os.getenv(
|
||||
"DISCORD_IGNORE_NO_MENTION", "true"
|
||||
).lower() in ("true", "1", "yes")
|
||||
if _ignore_no_mention and message.mentions and not isinstance(message.channel, discord.DMChannel):
|
||||
_bot_mentioned = (
|
||||
self._client.user is not None
|
||||
and self._client.user in message.mentions
|
||||
)
|
||||
_other_bots_mentioned = any(
|
||||
m.bot and m != self._client.user
|
||||
for m in message.mentions
|
||||
)
|
||||
# If other bots are mentioned but we're not → not for us
|
||||
if _other_bots_mentioned and not _self_mentioned:
|
||||
return
|
||||
# If humans are mentioned but we're not → not for us
|
||||
# (preserves old DISCORD_IGNORE_NO_MENTION=true behavior)
|
||||
_ignore_no_mention = os.getenv(
|
||||
"DISCORD_IGNORE_NO_MENTION", "true"
|
||||
).lower() in ("true", "1", "yes")
|
||||
if _ignore_no_mention and not _self_mentioned and not _other_bots_mentioned:
|
||||
return
|
||||
if not _bot_mentioned:
|
||||
return # Talking to someone else, don't interrupt
|
||||
|
||||
await self._handle_message(message)
|
||||
|
||||
@@ -768,17 +737,14 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if hasattr(message, "add_reaction"):
|
||||
await self._add_reaction(message, "👀")
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
|
||||
async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
|
||||
"""Swap the in-progress reaction for a final success/failure reaction."""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
message = event.raw_message
|
||||
if hasattr(message, "add_reaction"):
|
||||
await self._remove_reaction(message, "👀")
|
||||
if outcome == ProcessingOutcome.SUCCESS:
|
||||
await self._add_reaction(message, "✅")
|
||||
elif outcome == ProcessingOutcome.FAILURE:
|
||||
await self._add_reaction(message, "❌")
|
||||
await self._add_reaction(message, "✅" if success else "❌")
|
||||
|
||||
async def send(
|
||||
self,
|
||||
@@ -787,34 +753,18 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> SendResult:
|
||||
"""Send a message to a Discord channel or thread.
|
||||
|
||||
When metadata contains a thread_id, the message is sent to that
|
||||
thread instead of the parent channel identified by chat_id.
|
||||
"""
|
||||
"""Send a message to a Discord channel."""
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
# Determine target channel: thread_id in metadata takes precedence.
|
||||
thread_id = None
|
||||
if metadata and metadata.get("thread_id"):
|
||||
thread_id = metadata["thread_id"]
|
||||
# Get the channel
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
|
||||
if thread_id:
|
||||
# Fetch the thread directly — threads are addressed by their own ID.
|
||||
channel = self._client.get_channel(int(thread_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(thread_id))
|
||||
if not channel:
|
||||
return SendResult(success=False, error=f"Thread {thread_id} not found")
|
||||
else:
|
||||
# Get the parent channel
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
if not channel:
|
||||
return SendResult(success=False, error=f"Channel {chat_id} not found")
|
||||
if not channel:
|
||||
return SendResult(success=False, error=f"Channel {chat_id} not found")
|
||||
|
||||
# Format and split message if needed
|
||||
formatted = self.format_message(content)
|
||||
@@ -823,7 +773,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
message_ids = []
|
||||
reference = None
|
||||
|
||||
if reply_to and self._reply_to_mode != "off":
|
||||
if reply_to:
|
||||
try:
|
||||
ref_msg = await channel.fetch_message(int(reply_to))
|
||||
reference = ref_msg
|
||||
@@ -831,10 +781,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
logger.debug("Could not fetch reply-to message: %s", e)
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
if self._reply_to_mode == "all":
|
||||
chunk_reference = reference
|
||||
else: # "first" (default) or "off"
|
||||
chunk_reference = reference if i == 0 else None
|
||||
chunk_reference = reference if i == 0 else None
|
||||
try:
|
||||
msg = await channel.send(
|
||||
content=chunk,
|
||||
@@ -1277,8 +1224,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
await asyncio.to_thread(VoiceReceiver.pcm_to_wav, pcm_data, wav_path)
|
||||
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
result = await asyncio.to_thread(transcribe_audio, wav_path)
|
||||
from tools.transcription_tools import transcribe_audio, get_stt_model_from_config
|
||||
stt_model = get_stt_model_from_config()
|
||||
result = await asyncio.to_thread(transcribe_audio, wav_path, model=stt_model)
|
||||
|
||||
if not result.get("success"):
|
||||
return
|
||||
@@ -1337,10 +1285,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
if not is_safe_url(image_url):
|
||||
logger.warning("[%s] Blocked unsafe image URL during Discord send_image", self.name)
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
@@ -1352,11 +1296,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
# Download the image and send as a Discord file attachment
|
||||
# (Discord renders attachments inline, unlike plain URLs)
|
||||
from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
|
||||
_proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
|
||||
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
|
||||
async with aiohttp.ClientSession(**_sess_kw) as session:
|
||||
async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=30), **_req_kw) as resp:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=30)) as resp:
|
||||
if resp.status != 200:
|
||||
raise Exception(f"Failed to download image: HTTP {resp.status}")
|
||||
|
||||
@@ -1633,7 +1574,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
await self._run_simple_slash(interaction, f"/model {name}".strip())
|
||||
|
||||
@tree.command(name="reasoning", description="Show or change reasoning effort")
|
||||
@discord.app_commands.describe(effort="Reasoning effort: none, minimal, low, medium, high, or xhigh.")
|
||||
@discord.app_commands.describe(effort="Reasoning effort: xhigh, high, medium, low, minimal, or none.")
|
||||
async def slash_reasoning(interaction: discord.Interaction, effort: str = ""):
|
||||
await self._run_simple_slash(interaction, f"/reasoning {effort}".strip())
|
||||
|
||||
@@ -1815,9 +1756,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if hasattr(interaction.channel, "guild") and interaction.channel.guild:
|
||||
chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
|
||||
|
||||
# Get channel topic (if available).
|
||||
# For forum threads, inherit the parent forum's topic.
|
||||
chat_topic = self._get_effective_topic(interaction.channel, is_thread=is_thread)
|
||||
# Get channel topic (if available)
|
||||
chat_topic = getattr(interaction.channel, "topic", None)
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=str(interaction.channel_id),
|
||||
@@ -1891,10 +1831,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
chat_name = f"{guild_name} / {thread_name}" if guild_name else thread_name
|
||||
|
||||
# Inherit forum topic when the thread was created inside a forum channel.
|
||||
_chan = getattr(interaction, "channel", None)
|
||||
chat_topic = self._get_effective_topic(_chan, is_thread=True) if _chan else None
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=thread_id,
|
||||
chat_name=chat_name,
|
||||
@@ -1902,45 +1838,16 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
user_id=str(interaction.user.id),
|
||||
user_name=interaction.user.display_name,
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
)
|
||||
|
||||
_parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
|
||||
_skills = self._resolve_channel_skills(thread_id, _parent_id or None)
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
raw_message=interaction,
|
||||
auto_skill=_skills,
|
||||
)
|
||||
await self.handle_message(event)
|
||||
|
||||
def _resolve_channel_skills(self, channel_id: str, parent_id: str | None = None) -> list[str] | None:
|
||||
"""Look up auto-skill bindings for a Discord channel/forum thread.
|
||||
|
||||
Config format (in platform extra):
|
||||
channel_skill_bindings:
|
||||
- id: "123456"
|
||||
skills: ["skill-a", "skill-b"]
|
||||
Also checks parent_id so forum threads inherit the forum's bindings.
|
||||
"""
|
||||
bindings = self.config.extra.get("channel_skill_bindings", [])
|
||||
if not bindings:
|
||||
return None
|
||||
ids_to_check = {channel_id}
|
||||
if parent_id:
|
||||
ids_to_check.add(parent_id)
|
||||
for entry in bindings:
|
||||
entry_id = str(entry.get("id", ""))
|
||||
if entry_id in ids_to_check:
|
||||
skills = entry.get("skills") or entry.get("skill")
|
||||
if isinstance(skills, str):
|
||||
return [skills]
|
||||
if isinstance(skills, list) and skills:
|
||||
return list(dict.fromkeys(skills)) # dedup, preserve order
|
||||
return None
|
||||
|
||||
def _thread_parent_channel(self, channel: Any) -> Any:
|
||||
"""Return the parent text channel when invoked from a thread."""
|
||||
return getattr(channel, "parent", None) or channel
|
||||
@@ -2132,66 +2039,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_model_picker(
|
||||
self,
|
||||
chat_id: str,
|
||||
providers: list,
|
||||
current_model: str,
|
||||
current_provider: str,
|
||||
session_key: str,
|
||||
on_model_selected,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send an interactive select-menu model picker.
|
||||
|
||||
Two-step drill-down: provider dropdown → model dropdown.
|
||||
Uses Discord embeds + Select menus via ``ModelPickerView``.
|
||||
"""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
# Resolve target channel (use thread_id if present)
|
||||
target_id = chat_id
|
||||
if metadata and metadata.get("thread_id"):
|
||||
target_id = metadata["thread_id"]
|
||||
|
||||
channel = self._client.get_channel(int(target_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(target_id))
|
||||
|
||||
try:
|
||||
from hermes_cli.providers import get_label
|
||||
provider_label = get_label(current_provider)
|
||||
except Exception:
|
||||
provider_label = current_provider
|
||||
|
||||
embed = discord.Embed(
|
||||
title="⚙ Model Configuration",
|
||||
description=(
|
||||
f"Current model: `{current_model or 'unknown'}`\n"
|
||||
f"Provider: {provider_label}\n\n"
|
||||
f"Select a provider:"
|
||||
),
|
||||
color=discord.Color.blue(),
|
||||
)
|
||||
|
||||
view = ModelPickerView(
|
||||
providers=providers,
|
||||
current_model=current_model,
|
||||
current_provider=current_provider,
|
||||
session_key=session_key,
|
||||
on_model_selected=on_model_selected,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("[%s] send_model_picker failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
def _get_parent_channel_id(self, channel: Any) -> Optional[str]:
|
||||
"""Return the parent channel ID for a Discord thread-like channel, if present."""
|
||||
parent = getattr(channel, "parent", None)
|
||||
@@ -2216,15 +2063,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_effective_topic(self, channel: Any, is_thread: bool = False) -> Optional[str]:
|
||||
"""Return the channel topic, falling back to the parent forum's topic for forum threads."""
|
||||
topic = getattr(channel, "topic", None)
|
||||
if not topic and is_thread:
|
||||
parent = getattr(channel, "parent", None)
|
||||
if parent and self._is_forum_parent(parent):
|
||||
topic = getattr(parent, "topic", None)
|
||||
return topic
|
||||
|
||||
def _format_thread_chat_name(self, thread: Any) -> str:
|
||||
"""Build a readable chat name for thread-like Discord channels, including forum context when available."""
|
||||
thread_name = getattr(thread, "name", None) or str(getattr(thread, "id", "thread"))
|
||||
@@ -2290,12 +2128,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# UNLESS the channel is in the free-response list or the message is
|
||||
# in a thread where the bot has already participated.
|
||||
#
|
||||
# Config (all settable via discord.* in config.yaml or DISCORD_* env vars):
|
||||
# Config (all settable via discord.* in config.yaml):
|
||||
# discord.require_mention: Require @mention in server channels (default: true)
|
||||
# discord.free_response_channels: Channel IDs where bot responds without mention
|
||||
# discord.ignored_channels: Channel IDs where bot NEVER responds (even when mentioned)
|
||||
# discord.allowed_channels: If set, bot ONLY responds in these channels (whitelist)
|
||||
# discord.no_thread_channels: Channel IDs where bot responds directly without creating thread
|
||||
# discord.auto_thread: Auto-create thread on @mention in channels (default: true)
|
||||
|
||||
thread_id = None
|
||||
@@ -2306,27 +2141,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
parent_channel_id = self._get_parent_channel_id(message.channel)
|
||||
|
||||
if not isinstance(message.channel, discord.DMChannel):
|
||||
channel_ids = {str(message.channel.id)}
|
||||
if parent_channel_id:
|
||||
channel_ids.add(parent_channel_id)
|
||||
|
||||
# Check allowed channels - if set, only respond in these channels
|
||||
allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
|
||||
if allowed_channels_raw:
|
||||
allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
|
||||
if not (channel_ids & allowed_channels):
|
||||
logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
# Check ignored channels - never respond even when mentioned
|
||||
ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
|
||||
if channel_ids & ignored_channels:
|
||||
logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
free_channels_raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
|
||||
free_channels = {ch.strip() for ch in free_channels_raw.split(",") if ch.strip()}
|
||||
channel_ids = {str(message.channel.id)}
|
||||
if parent_channel_id:
|
||||
channel_ids.add(parent_channel_id)
|
||||
|
||||
@@ -2348,14 +2165,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# Auto-thread: when enabled, automatically create a thread for every
|
||||
# @mention in a text channel so each conversation is isolated (like Slack).
|
||||
# Messages already inside threads or DMs are unaffected.
|
||||
# no_thread_channels: channels where bot responds directly without thread.
|
||||
auto_threaded_channel = None
|
||||
if not is_thread and not isinstance(message.channel, discord.DMChannel):
|
||||
no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "")
|
||||
no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
|
||||
skip_thread = bool(channel_ids & no_thread_channels)
|
||||
auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
|
||||
if auto_thread and not skip_thread:
|
||||
if auto_thread:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
is_thread = True
|
||||
@@ -2402,10 +2215,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if hasattr(message.channel, "guild") and message.channel.guild:
|
||||
chat_name = f"{message.channel.guild.name} / #{chat_name}"
|
||||
|
||||
# Get channel topic (if available - TextChannels have topics, DMs/threads don't).
|
||||
# For threads whose parent is a forum channel, inherit the parent's topic
|
||||
# so forum descriptions (e.g. project instructions) appear in the session context.
|
||||
chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)
|
||||
# Get channel topic (if available - TextChannels have topics, DMs/threads don't)
|
||||
chat_topic = getattr(message.channel, "topic", None)
|
||||
|
||||
# Build source
|
||||
source = self.build_source(
|
||||
@@ -2468,7 +2279,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
ext or "unknown", content_type,
|
||||
)
|
||||
else:
|
||||
MAX_DOC_BYTES = 32 * 1024 * 1024
|
||||
MAX_DOC_BYTES = 20 * 1024 * 1024
|
||||
if att.size and att.size > MAX_DOC_BYTES:
|
||||
logger.warning(
|
||||
"[Discord] Document too large (%s bytes), skipping: %s",
|
||||
@@ -2477,14 +2288,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
else:
|
||||
try:
|
||||
import aiohttp
|
||||
from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
|
||||
_proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
|
||||
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
|
||||
async with aiohttp.ClientSession(**_sess_kw) as session:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
att.url,
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
**_req_kw,
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
raise Exception(f"HTTP {resp.status}")
|
||||
@@ -2496,9 +2303,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(doc_mime)
|
||||
logger.info("[Discord] Cached user document: %s", cached_path)
|
||||
# Inject text content for plain-text documents (capped at 100 KB)
|
||||
# Inject text content for .txt/.md files (capped at 100 KB)
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
if ext in (".md", ".txt", ".log") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = att.filename or f"document{ext}"
|
||||
@@ -2525,10 +2332,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not event_text or not event_text.strip():
|
||||
event_text = "(The user sent a message with no text content)"
|
||||
|
||||
_chan = message.channel
|
||||
_parent_id = str(getattr(_chan, "parent_id", "") or "")
|
||||
_chan_id = str(getattr(_chan, "id", ""))
|
||||
_skills = self._resolve_channel_skills(_chan_id, _parent_id or None)
|
||||
event = MessageEvent(
|
||||
text=event_text,
|
||||
message_type=msg_type,
|
||||
@@ -2539,7 +2342,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
media_types=media_types,
|
||||
reply_to_message_id=str(message.reference.message_id) if message.reference else None,
|
||||
timestamp=message.created_at,
|
||||
auto_skill=_skills,
|
||||
)
|
||||
|
||||
# Track thread participation so the bot won't require @mention for
|
||||
@@ -2547,80 +2349,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if thread_id:
|
||||
self._track_thread(thread_id)
|
||||
|
||||
# Only batch plain text messages — commands, media, etc. dispatch
|
||||
# immediately since they won't be split by the Discord client.
|
||||
if msg_type == MessageType.TEXT and self._text_batch_delay_seconds > 0:
|
||||
self._enqueue_text_event(event)
|
||||
else:
|
||||
await self.handle_message(event)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text message aggregation (handles Discord client-side splits)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _text_batch_key(self, event: MessageEvent) -> str:
|
||||
"""Session-scoped key for text message batching."""
|
||||
from gateway.session import build_session_key
|
||||
return build_session_key(
|
||||
event.source,
|
||||
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
|
||||
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
|
||||
)
|
||||
|
||||
def _enqueue_text_event(self, event: MessageEvent) -> None:
|
||||
"""Buffer a text event and reset the flush timer.
|
||||
|
||||
When Discord splits a long user message at 2000 chars, the chunks
|
||||
arrive within a few hundred milliseconds. This merges them into
|
||||
a single event before dispatching.
|
||||
"""
|
||||
key = self._text_batch_key(event)
|
||||
existing = self._pending_text_batches.get(key)
|
||||
chunk_len = len(event.text or "")
|
||||
if existing is None:
|
||||
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
self._pending_text_batches[key] = event
|
||||
else:
|
||||
if event.text:
|
||||
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
|
||||
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
if event.media_urls:
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
|
||||
prior_task = self._pending_text_batch_tasks.get(key)
|
||||
if prior_task and not prior_task.done():
|
||||
prior_task.cancel()
|
||||
self._pending_text_batch_tasks[key] = asyncio.create_task(
|
||||
self._flush_text_batch(key)
|
||||
)
|
||||
|
||||
async def _flush_text_batch(self, key: str) -> None:
|
||||
"""Wait for the quiet period then dispatch the aggregated text.
|
||||
|
||||
Uses a longer delay when the latest chunk is near Discord's 2000-char
|
||||
split point, since a continuation chunk is almost certain.
|
||||
"""
|
||||
current_task = asyncio.current_task()
|
||||
try:
|
||||
pending = self._pending_text_batches.get(key)
|
||||
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
|
||||
if last_len >= self._SPLIT_THRESHOLD:
|
||||
delay = self._text_batch_split_delay_seconds
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
event = self._pending_text_batches.pop(key, None)
|
||||
if not event:
|
||||
return
|
||||
logger.info(
|
||||
"[Discord] Flushing text batch %s (%d chars)",
|
||||
key, len(event.text or ""),
|
||||
)
|
||||
await self.handle_message(event)
|
||||
finally:
|
||||
if self._pending_text_batch_tasks.get(key) is current_task:
|
||||
self._pending_text_batch_tasks.pop(key, None)
|
||||
await self.handle_message(event)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -2801,218 +2530,3 @@ if DISCORD_AVAILABLE:
|
||||
self.resolved = True
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
class ModelPickerView(discord.ui.View):
|
||||
"""Interactive select-menu view for model switching.
|
||||
|
||||
Two-step drill-down: provider dropdown → model dropdown.
|
||||
Edits the original message in-place as the user navigates.
|
||||
Times out after 2 minutes.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
providers: list,
|
||||
current_model: str,
|
||||
current_provider: str,
|
||||
session_key: str,
|
||||
on_model_selected,
|
||||
allowed_user_ids: set,
|
||||
):
|
||||
super().__init__(timeout=120)
|
||||
self.providers = providers
|
||||
self.current_model = current_model
|
||||
self.current_provider = current_provider
|
||||
self.session_key = session_key
|
||||
self.on_model_selected = on_model_selected
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.resolved = False
|
||||
self._selected_provider: str = ""
|
||||
|
||||
self._build_provider_select()
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
|
||||
def _build_provider_select(self):
|
||||
"""Build the provider dropdown menu."""
|
||||
self.clear_items()
|
||||
options = []
|
||||
for p in self.providers:
|
||||
count = p.get("total_models", len(p.get("models", [])))
|
||||
label = f"{p['name']} ({count} models)"
|
||||
desc = "current" if p.get("is_current") else None
|
||||
options.append(
|
||||
discord.SelectOption(
|
||||
label=label[:100],
|
||||
value=p["slug"],
|
||||
description=desc,
|
||||
)
|
||||
)
|
||||
if not options:
|
||||
return
|
||||
|
||||
select = discord.ui.Select(
|
||||
placeholder="Choose a provider...",
|
||||
options=options[:25],
|
||||
custom_id="model_provider_select",
|
||||
)
|
||||
select.callback = self._on_provider_selected
|
||||
self.add_item(select)
|
||||
|
||||
cancel_btn = discord.ui.Button(
|
||||
label="Cancel", style=discord.ButtonStyle.red, custom_id="model_cancel"
|
||||
)
|
||||
cancel_btn.callback = self._on_cancel
|
||||
self.add_item(cancel_btn)
|
||||
|
||||
def _build_model_select(self, provider_slug: str):
|
||||
"""Build the model dropdown for a specific provider."""
|
||||
self.clear_items()
|
||||
provider = next(
|
||||
(p for p in self.providers if p["slug"] == provider_slug), None
|
||||
)
|
||||
if not provider:
|
||||
return
|
||||
|
||||
models = provider.get("models", [])
|
||||
options = []
|
||||
for model_id in models[:25]:
|
||||
short = model_id.split("/")[-1] if "/" in model_id else model_id
|
||||
options.append(
|
||||
discord.SelectOption(
|
||||
label=short[:100],
|
||||
value=model_id[:100],
|
||||
)
|
||||
)
|
||||
if not options:
|
||||
return
|
||||
|
||||
select = discord.ui.Select(
|
||||
placeholder=f"Choose a model from {provider.get('name', provider_slug)}...",
|
||||
options=options,
|
||||
custom_id="model_model_select",
|
||||
)
|
||||
select.callback = self._on_model_selected
|
||||
self.add_item(select)
|
||||
|
||||
back_btn = discord.ui.Button(
|
||||
label="◀ Back", style=discord.ButtonStyle.grey, custom_id="model_back"
|
||||
)
|
||||
back_btn.callback = self._on_back
|
||||
self.add_item(back_btn)
|
||||
|
||||
cancel_btn = discord.ui.Button(
|
||||
label="Cancel", style=discord.ButtonStyle.red, custom_id="model_cancel2"
|
||||
)
|
||||
cancel_btn.callback = self._on_cancel
|
||||
self.add_item(cancel_btn)
|
||||
|
||||
async def _on_provider_selected(self, interaction: discord.Interaction):
|
||||
if not self._check_auth(interaction):
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
provider_slug = interaction.data["values"][0]
|
||||
self._selected_provider = provider_slug
|
||||
provider = next(
|
||||
(p for p in self.providers if p["slug"] == provider_slug), None
|
||||
)
|
||||
pname = provider.get("name", provider_slug) if provider else provider_slug
|
||||
|
||||
self._build_model_select(provider_slug)
|
||||
|
||||
total = provider.get("total_models", 0) if provider else 0
|
||||
shown = min(len(provider.get("models", [])), 25) if provider else 0
|
||||
extra = f"\n*{total - shown} more available — type `/model <name>` directly*" if total > shown else ""
|
||||
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Model Configuration",
|
||||
description=f"Provider: **{pname}**\nSelect a model:{extra}",
|
||||
color=discord.Color.blue(),
|
||||
),
|
||||
view=self,
|
||||
)
|
||||
|
||||
async def _on_model_selected(self, interaction: discord.Interaction):
|
||||
if self.resolved:
|
||||
await interaction.response.send_message(
|
||||
"Already resolved~", ephemeral=True
|
||||
)
|
||||
return
|
||||
if not self._check_auth(interaction):
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
self.resolved = True
|
||||
model_id = interaction.data["values"][0]
|
||||
|
||||
try:
|
||||
result_text = await self.on_model_selected(
|
||||
str(interaction.channel_id),
|
||||
model_id,
|
||||
self._selected_provider,
|
||||
)
|
||||
except Exception as exc:
|
||||
result_text = f"Error switching model: {exc}"
|
||||
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Model Switched",
|
||||
description=result_text,
|
||||
color=discord.Color.green(),
|
||||
),
|
||||
view=self,
|
||||
)
|
||||
|
||||
async def _on_back(self, interaction: discord.Interaction):
|
||||
if not self._check_auth(interaction):
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
self._build_provider_select()
|
||||
|
||||
try:
|
||||
from hermes_cli.providers import get_label
|
||||
provider_label = get_label(self.current_provider)
|
||||
except Exception:
|
||||
provider_label = self.current_provider
|
||||
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Model Configuration",
|
||||
description=(
|
||||
f"Current model: `{self.current_model or 'unknown'}`\n"
|
||||
f"Provider: {provider_label}\n\n"
|
||||
f"Select a provider:"
|
||||
),
|
||||
color=discord.Color.blue(),
|
||||
),
|
||||
view=self,
|
||||
)
|
||||
|
||||
async def _on_cancel(self, interaction: discord.Interaction):
|
||||
self.resolved = True
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Model Configuration",
|
||||
description="Model selection cancelled.",
|
||||
color=discord.Color.greyple(),
|
||||
),
|
||||
view=self,
|
||||
)
|
||||
|
||||
async def on_timeout(self):
|
||||
self.resolved = True
|
||||
self.clear_items()
|
||||
|
||||
@@ -195,11 +195,7 @@ def _extract_attachments(
|
||||
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext in _IMAGE_EXTS:
|
||||
try:
|
||||
cached_path = cache_image_from_bytes(payload, ext)
|
||||
except ValueError:
|
||||
logger.debug("Skipping non-image attachment %s (invalid magic bytes)", filename)
|
||||
continue
|
||||
cached_path = cache_image_from_bytes(payload, ext)
|
||||
attachments.append({
|
||||
"path": cached_path,
|
||||
"filename": filename,
|
||||
|
||||
+19
-195
@@ -20,7 +20,6 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
@@ -61,6 +60,7 @@ try:
|
||||
CreateMessageRequestBody,
|
||||
GetChatRequest,
|
||||
GetMessageRequest,
|
||||
GetImageRequest,
|
||||
GetMessageResourceRequest,
|
||||
P2ImMessageMessageReadV1,
|
||||
ReplyMessageRequest,
|
||||
@@ -264,7 +264,6 @@ class FeishuAdapterSettings:
|
||||
bot_name: str
|
||||
dedup_cache_size: int
|
||||
text_batch_delay_seconds: float
|
||||
text_batch_split_delay_seconds: float
|
||||
text_batch_max_messages: int
|
||||
text_batch_max_chars: int
|
||||
media_batch_delay_seconds: float
|
||||
@@ -389,6 +388,10 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
|
||||
return default if parsed is None else parsed
|
||||
|
||||
|
||||
def _is_loop_ready(loop: Optional[asyncio.AbstractEventLoop]) -> bool:
|
||||
return loop is not None and not bool(getattr(loop, "is_closed", lambda: False)())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Post payload builders and parsers
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -973,8 +976,7 @@ def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
|
||||
return await original_connect(*args, **kwargs)
|
||||
|
||||
def _configure_with_overrides(conf: Any) -> Any:
|
||||
if original_configure is None:
|
||||
raise RuntimeError("Feishu _configure_with_overrides called but original_configure is None")
|
||||
assert original_configure is not None
|
||||
result = original_configure(conf)
|
||||
_apply_runtime_ws_overrides()
|
||||
return result
|
||||
@@ -1016,10 +1018,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
"""Feishu/Lark bot adapter."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 8000
|
||||
# Threshold for detecting Feishu client-side message splits.
|
||||
# When a chunk is near the ~4096-char practical limit, a continuation
|
||||
# is almost certain.
|
||||
_SPLIT_THRESHOLD = 4000
|
||||
|
||||
# =========================================================================
|
||||
# Lifecycle — init / settings / connect / disconnect
|
||||
@@ -1059,9 +1057,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
self._media_batch_state = FeishuBatchState()
|
||||
self._pending_media_batches = self._media_batch_state.events
|
||||
self._pending_media_batch_tasks = self._media_batch_state.tasks
|
||||
# Exec approval button state (approval_id → {session_key, message_id, chat_id})
|
||||
self._approval_state: Dict[int, Dict[str, str]] = {}
|
||||
self._approval_counter = itertools.count(1)
|
||||
self._load_seen_message_ids()
|
||||
|
||||
@staticmethod
|
||||
@@ -1111,9 +1106,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
text_batch_delay_seconds=float(
|
||||
os.getenv("HERMES_FEISHU_TEXT_BATCH_DELAY_SECONDS", str(_DEFAULT_TEXT_BATCH_DELAY_SECONDS))
|
||||
),
|
||||
text_batch_split_delay_seconds=float(
|
||||
os.getenv("HERMES_FEISHU_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")
|
||||
),
|
||||
text_batch_max_messages=max(
|
||||
1,
|
||||
int(os.getenv("HERMES_FEISHU_TEXT_BATCH_MAX_MESSAGES", str(_DEFAULT_TEXT_BATCH_MAX_MESSAGES))),
|
||||
@@ -1161,7 +1153,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
self._bot_name = settings.bot_name
|
||||
self._dedup_cache_size = settings.dedup_cache_size
|
||||
self._text_batch_delay_seconds = settings.text_batch_delay_seconds
|
||||
self._text_batch_split_delay_seconds = settings.text_batch_split_delay_seconds
|
||||
self._text_batch_max_messages = settings.text_batch_max_messages
|
||||
self._text_batch_max_chars = settings.text_batch_max_chars
|
||||
self._media_batch_delay_seconds = settings.media_batch_delay_seconds
|
||||
@@ -1190,8 +1181,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
lambda data: self._on_reaction_event("im.message.reaction.deleted_v1", data)
|
||||
)
|
||||
.register_p2_card_action_trigger(self._on_card_action_trigger)
|
||||
.register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat)
|
||||
.register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat)
|
||||
.build()
|
||||
)
|
||||
|
||||
@@ -1410,104 +1399,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
logger.error("[Feishu] Failed to edit message %s: %s", message_id, exc, exc_info=True)
|
||||
return SendResult(success=False, error=str(exc))
|
||||
|
||||
async def send_exec_approval(
|
||||
self, chat_id: str, command: str, session_key: str,
|
||||
description: str = "dangerous command",
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send an interactive card with approval buttons.
|
||||
|
||||
The buttons carry ``hermes_action`` in their value dict so that
|
||||
``_handle_card_action_event`` can intercept them and call
|
||||
``resolve_gateway_approval()`` to unblock the waiting agent thread.
|
||||
"""
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
approval_id = next(self._approval_counter)
|
||||
cmd_preview = command[:3000] + "..." if len(command) > 3000 else command
|
||||
|
||||
def _btn(label: str, action_name: str, btn_type: str = "default") -> dict:
|
||||
return {
|
||||
"tag": "button",
|
||||
"text": {"tag": "plain_text", "content": label},
|
||||
"type": btn_type,
|
||||
"value": {"hermes_action": action_name, "approval_id": approval_id},
|
||||
}
|
||||
|
||||
card = {
|
||||
"config": {"wide_screen_mode": True},
|
||||
"header": {
|
||||
"title": {"content": "⚠️ Command Approval Required", "tag": "plain_text"},
|
||||
"template": "orange",
|
||||
},
|
||||
"elements": [
|
||||
{
|
||||
"tag": "markdown",
|
||||
"content": f"```\n{cmd_preview}\n```\n**Reason:** {description}",
|
||||
},
|
||||
{
|
||||
"tag": "action",
|
||||
"actions": [
|
||||
_btn("✅ Allow Once", "approve_once", "primary"),
|
||||
_btn("✅ Session", "approve_session"),
|
||||
_btn("✅ Always", "approve_always"),
|
||||
_btn("❌ Deny", "deny", "danger"),
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
payload = json.dumps(card, ensure_ascii=False)
|
||||
response = await self._feishu_send_with_retry(
|
||||
chat_id=chat_id,
|
||||
msg_type="interactive",
|
||||
payload=payload,
|
||||
reply_to=None,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
result = self._finalize_send_result(response, "send_exec_approval failed")
|
||||
if result.success:
|
||||
self._approval_state[approval_id] = {
|
||||
"session_key": session_key,
|
||||
"message_id": result.message_id or "",
|
||||
"chat_id": chat_id,
|
||||
}
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.warning("[Feishu] send_exec_approval failed: %s", exc)
|
||||
return SendResult(success=False, error=str(exc))
|
||||
|
||||
async def _update_approval_card(
|
||||
self, message_id: str, label: str, user_name: str, choice: str,
|
||||
) -> None:
|
||||
"""Replace the approval card with a resolved status card."""
|
||||
if not self._client or not message_id:
|
||||
return
|
||||
icon = "❌" if choice == "deny" else "✅"
|
||||
card = {
|
||||
"config": {"wide_screen_mode": True},
|
||||
"header": {
|
||||
"title": {"content": f"{icon} {label}", "tag": "plain_text"},
|
||||
"template": "red" if choice == "deny" else "green",
|
||||
},
|
||||
"elements": [
|
||||
{
|
||||
"tag": "markdown",
|
||||
"content": f"{icon} **{label}** by {user_name}",
|
||||
},
|
||||
],
|
||||
}
|
||||
try:
|
||||
payload = json.dumps(card, ensure_ascii=False)
|
||||
body = self._build_update_message_body(msg_type="interactive", content=payload)
|
||||
request = self._build_update_message_request(message_id=message_id, request_body=body)
|
||||
await asyncio.to_thread(self._client.im.v1.message.update, request)
|
||||
except Exception as exc:
|
||||
logger.warning("[Feishu] Failed to update approval card %s: %s", message_id, exc)
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -1582,18 +1473,13 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
return SendResult(success=False, error=f"Image file not found: {image_path}")
|
||||
|
||||
try:
|
||||
import io as _io
|
||||
with open(image_path, "rb") as f:
|
||||
image_bytes = f.read()
|
||||
# Wrap in BytesIO so lark SDK's MultipartEncoder can read .name and .tell()
|
||||
image_file = _io.BytesIO(image_bytes)
|
||||
image_file.name = os.path.basename(image_path)
|
||||
body = self._build_image_upload_body(
|
||||
image_type=_FEISHU_IMAGE_UPLOAD_TYPE,
|
||||
image=image_file,
|
||||
)
|
||||
request = self._build_image_upload_request(body)
|
||||
upload_response = await asyncio.to_thread(self._client.im.v1.image.create, request)
|
||||
with open(image_path, "rb") as image_file:
|
||||
body = self._build_image_upload_body(
|
||||
image_type=_FEISHU_IMAGE_UPLOAD_TYPE,
|
||||
image=image_file,
|
||||
)
|
||||
request = self._build_image_upload_request(body)
|
||||
upload_response = await asyncio.to_thread(self._client.im.v1.image.create, request)
|
||||
image_key = self._extract_response_field(upload_response, "image_key")
|
||||
if not image_key:
|
||||
return self._response_error_result(
|
||||
@@ -1939,52 +1825,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
action = getattr(event, "action", None)
|
||||
action_tag = str(getattr(action, "tag", "") or "button")
|
||||
action_value = getattr(action, "value", {}) or {}
|
||||
|
||||
# --- Exec approval button intercept ---
|
||||
hermes_action = action_value.get("hermes_action") if isinstance(action_value, dict) else None
|
||||
if hermes_action:
|
||||
approval_id = action_value.get("approval_id")
|
||||
state = self._approval_state.pop(approval_id, None)
|
||||
if not state:
|
||||
logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
|
||||
return
|
||||
|
||||
choice_map = {
|
||||
"approve_once": "once",
|
||||
"approve_session": "session",
|
||||
"approve_always": "always",
|
||||
"deny": "deny",
|
||||
}
|
||||
choice = choice_map.get(hermes_action, "deny")
|
||||
|
||||
label_map = {
|
||||
"once": "Approved once",
|
||||
"session": "Approved for session",
|
||||
"always": "Approved permanently",
|
||||
"deny": "Denied",
|
||||
}
|
||||
label = label_map.get(choice, "Resolved")
|
||||
|
||||
# Resolve sender name for the status card
|
||||
sender_id = SimpleNamespace(open_id=open_id, user_id=None, union_id=None)
|
||||
sender_profile = await self._resolve_sender_profile(sender_id)
|
||||
user_name = sender_profile.get("user_name") or open_id
|
||||
|
||||
# Resolve the approval — unblocks the agent thread
|
||||
try:
|
||||
from tools.approval import resolve_gateway_approval
|
||||
count = resolve_gateway_approval(state["session_key"], choice)
|
||||
logger.info(
|
||||
"Feishu button resolved %d approval(s) for session %s (choice=%s, user=%s)",
|
||||
count, state["session_key"], choice, user_name,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to resolve gateway approval from Feishu button: %s", exc)
|
||||
|
||||
# Update the card to show the decision
|
||||
await self._update_approval_card(state.get("message_id", ""), label, user_name, choice)
|
||||
return
|
||||
|
||||
synthetic_text = f"/card {action_tag}"
|
||||
if action_value:
|
||||
try:
|
||||
@@ -2230,7 +2070,10 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
if event.text:
|
||||
existing.text = self._merge_caption(existing.text, event.text)
|
||||
if not existing.text:
|
||||
existing.text = event.text
|
||||
elif event.text not in existing.text.split("\n\n"):
|
||||
existing.text = f"{existing.text}\n\n{event.text}"
|
||||
existing.timestamp = event.timestamp
|
||||
if event.message_id:
|
||||
existing.message_id = event.message_id
|
||||
@@ -2274,10 +2117,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
default_ext: str,
|
||||
preferred_name: str,
|
||||
) -> tuple[str, str]:
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(file_url):
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {file_url[:80]}")
|
||||
|
||||
import httpx
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
@@ -2495,10 +2334,8 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
async def _enqueue_text_event(self, event: MessageEvent) -> None:
|
||||
"""Debounce rapid Feishu text bursts into a single MessageEvent."""
|
||||
key = self._text_batch_key(event)
|
||||
chunk_len = len(event.text or "")
|
||||
existing = self._pending_text_batches.get(key)
|
||||
if existing is None:
|
||||
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
self._pending_text_batches[key] = event
|
||||
self._pending_text_batch_counts[key] = 1
|
||||
self._schedule_text_batch_flush(key)
|
||||
@@ -2523,7 +2360,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
return
|
||||
|
||||
existing.text = next_text
|
||||
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
existing.timestamp = event.timestamp
|
||||
if event.message_id:
|
||||
existing.message_id = event.message_id
|
||||
@@ -2550,22 +2386,10 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
task_map[key] = asyncio.create_task(flush_fn(key))
|
||||
|
||||
async def _flush_text_batch(self, key: str) -> None:
|
||||
"""Flush a pending text batch after the quiet period.
|
||||
|
||||
Uses a longer delay when the latest chunk is near Feishu's ~4096-char
|
||||
split point, since a continuation chunk is almost certain.
|
||||
"""
|
||||
"""Flush a pending text batch after the quiet period."""
|
||||
current_task = asyncio.current_task()
|
||||
try:
|
||||
# Adaptive delay: if the latest chunk is near the split threshold,
|
||||
# a continuation is almost certain — wait longer.
|
||||
pending = self._pending_text_batches.get(key)
|
||||
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
|
||||
if last_len >= self._SPLIT_THRESHOLD:
|
||||
delay = self._text_batch_split_delay_seconds
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
await asyncio.sleep(self._text_batch_delay_seconds)
|
||||
await self._flush_text_batch_now(key)
|
||||
finally:
|
||||
if self._pending_text_batch_tasks.get(key) is current_task:
|
||||
|
||||
+847
-739
File diff suppressed because it is too large
Load Diff
@@ -407,11 +407,6 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
kind: str = "file",
|
||||
) -> SendResult:
|
||||
"""Download a URL and upload it as a file attachment."""
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(url):
|
||||
logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
|
||||
return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
|
||||
@@ -435,6 +430,7 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
ct = resp.content_type or "application/octet-stream"
|
||||
break
|
||||
except (aiohttp.ClientError, asyncio.TimeoutError) as exc:
|
||||
last_exc = exc
|
||||
if attempt < 2:
|
||||
await asyncio.sleep(1.5 * (attempt + 1))
|
||||
continue
|
||||
|
||||
@@ -647,11 +647,7 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
|
||||
if result is not None:
|
||||
self._track_sent_timestamp(result)
|
||||
# Use the timestamp from the RPC result as a pseudo message_id.
|
||||
# Signal doesn't have real message IDs, but the stream consumer
|
||||
# needs a truthy value to follow its edit→fallback path correctly.
|
||||
_msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
|
||||
return SendResult(success=True, message_id=_msg_id or None)
|
||||
return SendResult(success=True)
|
||||
return SendResult(success=False, error="RPC send failed")
|
||||
|
||||
def _track_sent_timestamp(self, rpc_result) -> None:
|
||||
@@ -841,11 +837,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
async def stop_typing(self, chat_id: str) -> None:
|
||||
"""Public interface for stopping typing — called by base adapter's
|
||||
_keep_typing finally block to clean up platform-level typing tasks."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat Info
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
+31
-728
@@ -14,8 +14,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional, Any, Tuple
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
try:
|
||||
from slack_bolt.async_app import AsyncApp
|
||||
@@ -39,7 +38,6 @@ from gateway.platforms.base import (
|
||||
MessageType,
|
||||
SendResult,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
safe_url_for_log,
|
||||
cache_document_from_bytes,
|
||||
)
|
||||
|
||||
@@ -47,14 +45,6 @@ from gateway.platforms.base import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ThreadContextCache:
|
||||
"""Cache entry for fetched thread context."""
|
||||
content: str
|
||||
fetched_at: float = field(default_factory=time.monotonic)
|
||||
message_count: int = 0
|
||||
|
||||
|
||||
def check_slack_requirements() -> bool:
|
||||
"""Check if Slack dependencies are available."""
|
||||
return SLACK_AVAILABLE
|
||||
@@ -94,26 +84,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
self._seen_messages: Dict[str, float] = {}
|
||||
self._SEEN_TTL = 300 # 5 minutes
|
||||
self._SEEN_MAX = 2000 # prune threshold
|
||||
# Track pending approval message_ts → resolved flag to prevent
|
||||
# double-clicks on approval buttons.
|
||||
self._approval_resolved: Dict[str, bool] = {}
|
||||
# Track timestamps of messages sent by the bot so we can respond
|
||||
# to thread replies even without an explicit @mention.
|
||||
self._bot_message_ts: set = set()
|
||||
self._BOT_TS_MAX = 5000 # cap to avoid unbounded growth
|
||||
# Track threads where the bot has been @mentioned — once mentioned,
|
||||
# respond to ALL subsequent messages in that thread automatically.
|
||||
self._mentioned_threads: set = set()
|
||||
self._MENTIONED_THREADS_MAX = 5000
|
||||
# Assistant thread metadata keyed by (channel_id, thread_ts). Slack's
|
||||
# AI Assistant lifecycle events can arrive before/alongside message
|
||||
# events, and they carry the user/thread identity needed for stable
|
||||
# session + memory scoping.
|
||||
self._assistant_threads: Dict[Tuple[str, str], Dict[str, str]] = {}
|
||||
self._ASSISTANT_THREADS_MAX = 5000
|
||||
# Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
|
||||
self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
|
||||
self._THREAD_CACHE_TTL = 60.0
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
@@ -200,29 +170,12 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
async def handle_app_mention(event, say):
|
||||
pass
|
||||
|
||||
@self._app.event("assistant_thread_started")
|
||||
async def handle_assistant_thread_started(event, say):
|
||||
await self._handle_assistant_thread_lifecycle_event(event)
|
||||
|
||||
@self._app.event("assistant_thread_context_changed")
|
||||
async def handle_assistant_thread_context_changed(event, say):
|
||||
await self._handle_assistant_thread_lifecycle_event(event)
|
||||
|
||||
# Register slash command handler
|
||||
@self._app.command("/hermes")
|
||||
async def handle_hermes_command(ack, command):
|
||||
await ack()
|
||||
await self._handle_slash_command(command)
|
||||
|
||||
# Register Block Kit action handlers for approval buttons
|
||||
for _action_id in (
|
||||
"hermes_approve_once",
|
||||
"hermes_approve_session",
|
||||
"hermes_approve_always",
|
||||
"hermes_deny",
|
||||
):
|
||||
self._app.action(_action_id)(self._handle_approval_action)
|
||||
|
||||
# Start Socket Mode handler in background
|
||||
self._handler = AsyncSocketModeHandler(self._app, app_token)
|
||||
self._socket_mode_task = asyncio.create_task(self._handler.start_async())
|
||||
@@ -294,7 +247,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
kwargs = {
|
||||
"channel": chat_id,
|
||||
"text": chunk,
|
||||
"mrkdwn": True,
|
||||
}
|
||||
if thread_ts:
|
||||
kwargs["thread_ts"] = thread_ts
|
||||
@@ -304,22 +256,9 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
last_result = await self._get_client(chat_id).chat_postMessage(**kwargs)
|
||||
|
||||
# Track the sent message ts so we can auto-respond to thread
|
||||
# replies without requiring @mention.
|
||||
sent_ts = last_result.get("ts") if last_result else None
|
||||
if sent_ts:
|
||||
self._bot_message_ts.add(sent_ts)
|
||||
# Also register the thread root so replies-to-my-replies work
|
||||
if thread_ts:
|
||||
self._bot_message_ts.add(thread_ts)
|
||||
if len(self._bot_message_ts) > self._BOT_TS_MAX:
|
||||
excess = len(self._bot_message_ts) - self._BOT_TS_MAX // 2
|
||||
for old_ts in list(self._bot_message_ts)[:excess]:
|
||||
self._bot_message_ts.discard(old_ts)
|
||||
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=sent_ts,
|
||||
message_id=last_result.get("ts") if last_result else None,
|
||||
raw_response=last_result,
|
||||
)
|
||||
|
||||
@@ -337,11 +276,10 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
try:
|
||||
formatted = self.format_message(content)
|
||||
await self._get_client(chat_id).chat_update(
|
||||
channel=chat_id,
|
||||
ts=message_id,
|
||||
text=formatted,
|
||||
text=content,
|
||||
)
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
@@ -469,36 +407,13 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
|
||||
|
||||
# 3) Convert markdown links [text](url) → <url|text>
|
||||
def _convert_markdown_link(m):
|
||||
label = m.group(1)
|
||||
url = m.group(2).strip()
|
||||
if url.startswith('<') and url.endswith('>'):
|
||||
url = url[1:-1].strip()
|
||||
return _ph(f'<{url}|{label}>')
|
||||
|
||||
text = re.sub(
|
||||
r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)',
|
||||
_convert_markdown_link,
|
||||
r'\[([^\]]+)\]\(([^)]+)\)',
|
||||
lambda m: _ph(f'<{m.group(2)}|{m.group(1)}>'),
|
||||
text,
|
||||
)
|
||||
|
||||
# 4) Protect existing Slack entities/manual links so escaping and later
|
||||
# formatting passes don't break them.
|
||||
text = re.sub(
|
||||
r'(<(?:[@#!]|(?:https?|mailto|tel):)[^>\n]+>)',
|
||||
lambda m: _ph(m.group(1)),
|
||||
text,
|
||||
)
|
||||
|
||||
# 5) Protect blockquote markers before escaping
|
||||
text = re.sub(r'^(>+\s)', lambda m: _ph(m.group(0)), text, flags=re.MULTILINE)
|
||||
|
||||
# 6) Escape Slack control characters in remaining plain text.
|
||||
# Unescape first so already-escaped input doesn't get double-escaped.
|
||||
text = text.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||
text = text.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||
|
||||
# 7) Convert headers (## Title) → *Title* (bold)
|
||||
# 4) Convert headers (## Title) → *Title* (bold)
|
||||
def _convert_header(m):
|
||||
inner = m.group(1).strip()
|
||||
# Strip redundant bold markers inside a header
|
||||
@@ -509,39 +424,34 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
|
||||
)
|
||||
|
||||
# 8) Convert bold+italic: ***text*** → *_text_* (Slack bold wrapping italic)
|
||||
text = re.sub(
|
||||
r'\*\*\*(.+?)\*\*\*',
|
||||
lambda m: _ph(f'*_{m.group(1)}_*'),
|
||||
text,
|
||||
)
|
||||
|
||||
# 9) Convert bold: **text** → *text* (Slack bold)
|
||||
# 5) Convert bold: **text** → *text* (Slack bold)
|
||||
text = re.sub(
|
||||
r'\*\*(.+?)\*\*',
|
||||
lambda m: _ph(f'*{m.group(1)}*'),
|
||||
text,
|
||||
)
|
||||
|
||||
# 10) Convert italic: _text_ stays as _text_ (already Slack italic)
|
||||
# Single *text* → _text_ (Slack italic)
|
||||
# 6) Convert italic: _text_ stays as _text_ (already Slack italic)
|
||||
# Single *text* → _text_ (Slack italic)
|
||||
text = re.sub(
|
||||
r'(?<!\*)\*([^*\n]+)\*(?!\*)',
|
||||
lambda m: _ph(f'_{m.group(1)}_'),
|
||||
text,
|
||||
)
|
||||
|
||||
# 11) Convert strikethrough: ~~text~~ → ~text~
|
||||
# 7) Convert strikethrough: ~~text~~ → ~text~
|
||||
text = re.sub(
|
||||
r'~~(.+?)~~',
|
||||
lambda m: _ph(f'~{m.group(1)}~'),
|
||||
text,
|
||||
)
|
||||
|
||||
# 12) Blockquotes: > prefix is already protected by step 5 above.
|
||||
# 8) Convert blockquotes: > text → > text (same syntax, just ensure
|
||||
# no extra escaping happens to the > character)
|
||||
# Slack uses the same > prefix, so this is a no-op for content.
|
||||
|
||||
# 13) Restore placeholders in reverse order
|
||||
for key in reversed(placeholders):
|
||||
# 9) Restore placeholders in reverse order
|
||||
for key in reversed(list(placeholders.keys())):
|
||||
text = text.replace(key, placeholders[key])
|
||||
|
||||
return text
|
||||
@@ -649,27 +559,11 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(image_url):
|
||||
logger.warning("[Slack] Blocked unsafe image URL (SSRF protection)")
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
|
||||
|
||||
try:
|
||||
import httpx
|
||||
|
||||
async def _ssrf_redirect_guard(response):
|
||||
"""Re-check redirect targets so public URLs cannot bounce into private IPs."""
|
||||
if response.is_redirect and response.next_request:
|
||||
redirect_url = str(response.next_request.url)
|
||||
if not is_safe_url(redirect_url):
|
||||
raise ValueError("Blocked redirect to private/internal address")
|
||||
|
||||
# Download the image first
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
) as client:
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(image_url)
|
||||
response.raise_for_status()
|
||||
|
||||
@@ -686,7 +580,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.warning(
|
||||
"[Slack] Failed to upload image from URL %s, falling back to text: %s",
|
||||
safe_url_for_log(image_url),
|
||||
image_url,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
@@ -820,135 +714,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
# ----- Internal handlers -----
|
||||
|
||||
def _assistant_thread_key(self, channel_id: str, thread_ts: str) -> Optional[Tuple[str, str]]:
|
||||
"""Return a stable cache key for Slack assistant thread metadata."""
|
||||
if not channel_id or not thread_ts:
|
||||
return None
|
||||
return (str(channel_id), str(thread_ts))
|
||||
|
||||
def _extract_assistant_thread_metadata(self, event: dict) -> Dict[str, str]:
|
||||
"""Extract Slack Assistant thread identity data from an event payload."""
|
||||
assistant_thread = event.get("assistant_thread") or {}
|
||||
context = assistant_thread.get("context") or event.get("context") or {}
|
||||
|
||||
channel_id = (
|
||||
assistant_thread.get("channel_id")
|
||||
or event.get("channel")
|
||||
or context.get("channel_id")
|
||||
or ""
|
||||
)
|
||||
thread_ts = (
|
||||
assistant_thread.get("thread_ts")
|
||||
or event.get("thread_ts")
|
||||
or event.get("message_ts")
|
||||
or ""
|
||||
)
|
||||
user_id = (
|
||||
assistant_thread.get("user_id")
|
||||
or event.get("user")
|
||||
or context.get("user_id")
|
||||
or ""
|
||||
)
|
||||
team_id = (
|
||||
event.get("team")
|
||||
or event.get("team_id")
|
||||
or assistant_thread.get("team_id")
|
||||
or ""
|
||||
)
|
||||
context_channel_id = context.get("channel_id") or ""
|
||||
|
||||
return {
|
||||
"channel_id": str(channel_id) if channel_id else "",
|
||||
"thread_ts": str(thread_ts) if thread_ts else "",
|
||||
"user_id": str(user_id) if user_id else "",
|
||||
"team_id": str(team_id) if team_id else "",
|
||||
"context_channel_id": str(context_channel_id) if context_channel_id else "",
|
||||
}
|
||||
|
||||
def _cache_assistant_thread_metadata(self, metadata: Dict[str, str]) -> None:
|
||||
"""Remember assistant thread identity data for later message events."""
|
||||
channel_id = metadata.get("channel_id", "")
|
||||
thread_ts = metadata.get("thread_ts", "")
|
||||
key = self._assistant_thread_key(channel_id, thread_ts)
|
||||
if not key:
|
||||
return
|
||||
|
||||
existing = self._assistant_threads.get(key, {})
|
||||
merged = dict(existing)
|
||||
merged.update({k: v for k, v in metadata.items() if v})
|
||||
self._assistant_threads[key] = merged
|
||||
|
||||
# Evict oldest entries when the cache exceeds the limit
|
||||
if len(self._assistant_threads) > self._ASSISTANT_THREADS_MAX:
|
||||
excess = len(self._assistant_threads) - self._ASSISTANT_THREADS_MAX // 2
|
||||
for old_key in list(self._assistant_threads)[:excess]:
|
||||
del self._assistant_threads[old_key]
|
||||
|
||||
team_id = merged.get("team_id", "")
|
||||
if team_id and channel_id:
|
||||
self._channel_team[channel_id] = team_id
|
||||
|
||||
def _lookup_assistant_thread_metadata(
|
||||
self,
|
||||
event: dict,
|
||||
channel_id: str = "",
|
||||
thread_ts: str = "",
|
||||
) -> Dict[str, str]:
|
||||
"""Load cached assistant-thread metadata that matches the current event."""
|
||||
metadata = self._extract_assistant_thread_metadata(event)
|
||||
if channel_id and not metadata.get("channel_id"):
|
||||
metadata["channel_id"] = channel_id
|
||||
if thread_ts and not metadata.get("thread_ts"):
|
||||
metadata["thread_ts"] = thread_ts
|
||||
|
||||
key = self._assistant_thread_key(
|
||||
metadata.get("channel_id", ""),
|
||||
metadata.get("thread_ts", ""),
|
||||
)
|
||||
cached = self._assistant_threads.get(key, {}) if key else {}
|
||||
if cached:
|
||||
merged = dict(cached)
|
||||
merged.update({k: v for k, v in metadata.items() if v})
|
||||
return merged
|
||||
return metadata
|
||||
|
||||
def _seed_assistant_thread_session(self, metadata: Dict[str, str]) -> None:
|
||||
"""Prime the session store so assistant threads get stable user scoping."""
|
||||
session_store = getattr(self, "_session_store", None)
|
||||
if not session_store:
|
||||
return
|
||||
|
||||
channel_id = metadata.get("channel_id", "")
|
||||
thread_ts = metadata.get("thread_ts", "")
|
||||
user_id = metadata.get("user_id", "")
|
||||
if not channel_id or not thread_ts or not user_id:
|
||||
return
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_name=channel_id,
|
||||
chat_type="dm",
|
||||
user_id=user_id,
|
||||
thread_id=thread_ts,
|
||||
chat_topic=metadata.get("context_channel_id") or None,
|
||||
)
|
||||
|
||||
try:
|
||||
session_store.get_or_create_session(source)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[Slack] Failed to seed assistant thread session for %s/%s",
|
||||
channel_id,
|
||||
thread_ts,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
async def _handle_assistant_thread_lifecycle_event(self, event: dict) -> None:
|
||||
"""Handle Slack Assistant lifecycle events that carry user/thread identity."""
|
||||
metadata = self._extract_assistant_thread_metadata(event)
|
||||
self._cache_assistant_thread_metadata(metadata)
|
||||
self._seed_assistant_thread_session(metadata)
|
||||
|
||||
async def _handle_slack_message(self, event: dict) -> None:
|
||||
"""Handle an incoming Slack message event."""
|
||||
# Dedup: Slack Socket Mode can redeliver events after reconnects (#4777)
|
||||
@@ -965,26 +730,9 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
if v > cutoff
|
||||
}
|
||||
|
||||
# Bot message filtering (SLACK_ALLOW_BOTS / config allow_bots):
|
||||
# "none" — ignore all bot messages (default, backward-compatible)
|
||||
# "mentions" — accept bot messages only when they @mention us
|
||||
# "all" — accept all bot messages (except our own)
|
||||
# Ignore bot messages (including our own)
|
||||
if event.get("bot_id") or event.get("subtype") == "bot_message":
|
||||
allow_bots = self.config.extra.get("allow_bots", "")
|
||||
if not allow_bots:
|
||||
allow_bots = os.getenv("SLACK_ALLOW_BOTS", "none")
|
||||
allow_bots = str(allow_bots).lower().strip()
|
||||
if allow_bots == "none":
|
||||
return
|
||||
elif allow_bots == "mentions":
|
||||
text_check = event.get("text", "")
|
||||
if self._bot_user_id and f"<@{self._bot_user_id}>" not in text_check:
|
||||
return
|
||||
# "all" falls through to process the message
|
||||
# Always ignore our own messages to prevent echo loops
|
||||
msg_user = event.get("user", "")
|
||||
if msg_user and self._bot_user_id and msg_user == self._bot_user_id:
|
||||
return
|
||||
return
|
||||
|
||||
# Ignore message edits and deletions
|
||||
subtype = event.get("subtype")
|
||||
@@ -992,21 +740,10 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
return
|
||||
|
||||
text = event.get("text", "")
|
||||
user_id = event.get("user", "")
|
||||
channel_id = event.get("channel", "")
|
||||
ts = event.get("ts", "")
|
||||
assistant_meta = self._lookup_assistant_thread_metadata(
|
||||
event,
|
||||
channel_id=channel_id,
|
||||
thread_ts=event.get("thread_ts", ""),
|
||||
)
|
||||
user_id = event.get("user") or assistant_meta.get("user_id", "")
|
||||
if not channel_id:
|
||||
channel_id = assistant_meta.get("channel_id", "")
|
||||
team_id = (
|
||||
event.get("team")
|
||||
or event.get("team_id")
|
||||
or assistant_meta.get("team_id", "")
|
||||
)
|
||||
team_id = event.get("team", "")
|
||||
|
||||
# Track which workspace owns this channel
|
||||
if team_id and channel_id:
|
||||
@@ -1014,9 +751,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
# Determine if this is a DM or channel message
|
||||
channel_type = event.get("channel_type", "")
|
||||
if not channel_type and channel_id.startswith("D"):
|
||||
channel_type = "im"
|
||||
is_dm = channel_type in ("im", "mpim") # Both 1:1 and group DMs
|
||||
is_dm = channel_type == "im"
|
||||
|
||||
# Build thread_ts for session keying.
|
||||
# In channels: fall back to ts so each top-level @mention starts a
|
||||
@@ -1024,72 +759,17 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# In DMs: only use the real thread_ts — top-level DMs should share
|
||||
# one continuous session, threaded DMs get their own session.
|
||||
if is_dm:
|
||||
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts") # None for top-level DMs
|
||||
thread_ts = event.get("thread_ts") # None for top-level DMs
|
||||
else:
|
||||
thread_ts = event.get("thread_ts") or ts # ts fallback for channels
|
||||
|
||||
# In channels, respond if:
|
||||
# 0. Channel is in free_response_channels, OR require_mention is
|
||||
# disabled — always process regardless of mention.
|
||||
# 1. The bot is @mentioned in this message, OR
|
||||
# 2. The message is a reply in a thread the bot started/participated in, OR
|
||||
# 3. The message is in a thread where the bot was previously @mentioned, OR
|
||||
# 4. There's an existing session for this thread (survives restarts)
|
||||
# In channels, only respond if bot is mentioned
|
||||
bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
|
||||
is_mentioned = bot_uid and f"<@{bot_uid}>" in text
|
||||
event_thread_ts = event.get("thread_ts")
|
||||
is_thread_reply = bool(event_thread_ts and event_thread_ts != ts)
|
||||
|
||||
if not is_dm and bot_uid:
|
||||
if channel_id in self._slack_free_response_channels():
|
||||
pass # Free-response channel — always process
|
||||
elif not self._slack_require_mention():
|
||||
pass # Mention requirement disabled globally for Slack
|
||||
elif not is_mentioned:
|
||||
reply_to_bot_thread = (
|
||||
is_thread_reply and event_thread_ts in self._bot_message_ts
|
||||
)
|
||||
in_mentioned_thread = (
|
||||
event_thread_ts is not None
|
||||
and event_thread_ts in self._mentioned_threads
|
||||
)
|
||||
has_session = (
|
||||
is_thread_reply
|
||||
and self._has_active_session_for_thread(
|
||||
channel_id=channel_id,
|
||||
thread_ts=event_thread_ts,
|
||||
user_id=user_id,
|
||||
)
|
||||
)
|
||||
if not reply_to_bot_thread and not in_mentioned_thread and not has_session:
|
||||
return
|
||||
|
||||
if is_mentioned:
|
||||
if f"<@{bot_uid}>" not in text:
|
||||
return
|
||||
# Strip the bot mention from the text
|
||||
text = text.replace(f"<@{bot_uid}>", "").strip()
|
||||
# Register this thread so all future messages auto-trigger the bot
|
||||
if event_thread_ts:
|
||||
self._mentioned_threads.add(event_thread_ts)
|
||||
if len(self._mentioned_threads) > self._MENTIONED_THREADS_MAX:
|
||||
to_remove = list(self._mentioned_threads)[:self._MENTIONED_THREADS_MAX // 2]
|
||||
for t in to_remove:
|
||||
self._mentioned_threads.discard(t)
|
||||
|
||||
# When entering a thread for the first time (no existing session),
|
||||
# fetch thread context so the agent understands the conversation.
|
||||
if is_thread_reply and not self._has_active_session_for_thread(
|
||||
channel_id=channel_id,
|
||||
thread_ts=event_thread_ts,
|
||||
user_id=user_id,
|
||||
):
|
||||
thread_context = await self._fetch_thread_context(
|
||||
channel_id=channel_id,
|
||||
thread_ts=event_thread_ts,
|
||||
current_ts=ts,
|
||||
team_id=team_id,
|
||||
)
|
||||
if thread_context:
|
||||
text = thread_context + text
|
||||
|
||||
# Determine message type
|
||||
msg_type = MessageType.TEXT
|
||||
@@ -1203,305 +883,14 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
reply_to_message_id=thread_ts if thread_ts != ts else None,
|
||||
)
|
||||
|
||||
# Only react when bot is directly addressed (DM or @mention).
|
||||
# In listen-all channels (require_mention=false), reacting to every
|
||||
# casual message would be noisy.
|
||||
_should_react = is_dm or is_mentioned
|
||||
|
||||
if _should_react:
|
||||
await self._add_reaction(channel_id, ts, "eyes")
|
||||
# Add 👀 reaction to acknowledge receipt
|
||||
await self._add_reaction(channel_id, ts, "eyes")
|
||||
|
||||
await self.handle_message(msg_event)
|
||||
|
||||
if _should_react:
|
||||
await self._remove_reaction(channel_id, ts, "eyes")
|
||||
await self._add_reaction(channel_id, ts, "white_check_mark")
|
||||
|
||||
# ----- Approval button support (Block Kit) -----
|
||||
|
||||
async def send_exec_approval(
|
||||
self, chat_id: str, command: str, session_key: str,
|
||||
description: str = "dangerous command",
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a Block Kit approval prompt with interactive buttons.
|
||||
|
||||
The buttons call ``resolve_gateway_approval()`` to unblock the waiting
|
||||
agent thread — same mechanism as the text ``/approve`` flow.
|
||||
"""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
cmd_preview = command[:2900] + "..." if len(command) > 2900 else command
|
||||
thread_ts = self._resolve_thread_ts(None, metadata)
|
||||
|
||||
blocks = [
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": (
|
||||
f":warning: *Command Approval Required*\n"
|
||||
f"```{cmd_preview}```\n"
|
||||
f"Reason: {description}"
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "actions",
|
||||
"elements": [
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Allow Once"},
|
||||
"style": "primary",
|
||||
"action_id": "hermes_approve_once",
|
||||
"value": session_key,
|
||||
},
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Allow Session"},
|
||||
"action_id": "hermes_approve_session",
|
||||
"value": session_key,
|
||||
},
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Always Allow"},
|
||||
"action_id": "hermes_approve_always",
|
||||
"value": session_key,
|
||||
},
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Deny"},
|
||||
"style": "danger",
|
||||
"action_id": "hermes_deny",
|
||||
"value": session_key,
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"channel": chat_id,
|
||||
"text": f"⚠️ Command approval required: {cmd_preview[:100]}",
|
||||
"blocks": blocks,
|
||||
}
|
||||
if thread_ts:
|
||||
kwargs["thread_ts"] = thread_ts
|
||||
|
||||
result = await self._get_client(chat_id).chat_postMessage(**kwargs)
|
||||
msg_ts = result.get("ts", "")
|
||||
if msg_ts:
|
||||
self._approval_resolved[msg_ts] = False
|
||||
|
||||
return SendResult(success=True, message_id=msg_ts, raw_response=result)
|
||||
except Exception as e:
|
||||
logger.error("[Slack] send_exec_approval failed: %s", e, exc_info=True)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def _handle_approval_action(self, ack, body, action) -> None:
|
||||
"""Handle an approval button click from Block Kit."""
|
||||
await ack()
|
||||
|
||||
action_id = action.get("action_id", "")
|
||||
session_key = action.get("value", "")
|
||||
message = body.get("message", {})
|
||||
msg_ts = message.get("ts", "")
|
||||
channel_id = body.get("channel", {}).get("id", "")
|
||||
user_name = body.get("user", {}).get("name", "unknown")
|
||||
user_id = body.get("user", {}).get("id", "")
|
||||
|
||||
# Only authorized users may click approval buttons. Button clicks
|
||||
# bypass the normal message auth flow in gateway/run.py, so we must
|
||||
# check here as well.
|
||||
allowed_csv = os.getenv("SLACK_ALLOWED_USERS", "").strip()
|
||||
if allowed_csv:
|
||||
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
|
||||
if "*" not in allowed_ids and user_id not in allowed_ids:
|
||||
logger.warning(
|
||||
"[Slack] Unauthorized approval click by %s (%s) — ignoring",
|
||||
user_name, user_id,
|
||||
)
|
||||
return
|
||||
|
||||
# Map action_id to approval choice
|
||||
choice_map = {
|
||||
"hermes_approve_once": "once",
|
||||
"hermes_approve_session": "session",
|
||||
"hermes_approve_always": "always",
|
||||
"hermes_deny": "deny",
|
||||
}
|
||||
choice = choice_map.get(action_id, "deny")
|
||||
|
||||
# Prevent double-clicks — atomic pop; first caller gets False, others get True (default)
|
||||
if self._approval_resolved.pop(msg_ts, True):
|
||||
return
|
||||
|
||||
# Update the message to show the decision and remove buttons
|
||||
label_map = {
|
||||
"once": f"✅ Approved once by {user_name}",
|
||||
"session": f"✅ Approved for session by {user_name}",
|
||||
"always": f"✅ Approved permanently by {user_name}",
|
||||
"deny": f"❌ Denied by {user_name}",
|
||||
}
|
||||
decision_text = label_map.get(choice, f"Resolved by {user_name}")
|
||||
|
||||
# Get original text from the section block
|
||||
original_text = ""
|
||||
for block in message.get("blocks", []):
|
||||
if block.get("type") == "section":
|
||||
original_text = block.get("text", {}).get("text", "")
|
||||
break
|
||||
|
||||
updated_blocks = [
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": original_text or "Command approval request",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "context",
|
||||
"elements": [
|
||||
{"type": "mrkdwn", "text": decision_text},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
try:
|
||||
await self._get_client(channel_id).chat_update(
|
||||
channel=channel_id,
|
||||
ts=msg_ts,
|
||||
text=decision_text,
|
||||
blocks=updated_blocks,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[Slack] Failed to update approval message: %s", e)
|
||||
|
||||
# Resolve the approval — this unblocks the agent thread
|
||||
try:
|
||||
from tools.approval import resolve_gateway_approval
|
||||
count = resolve_gateway_approval(session_key, choice)
|
||||
logger.info(
|
||||
"Slack button resolved %d approval(s) for session %s (choice=%s, user=%s)",
|
||||
count, session_key, choice, user_name,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to resolve gateway approval from Slack button: %s", exc)
|
||||
|
||||
# (approval state already consumed by atomic pop above)
|
||||
|
||||
# ----- Thread context fetching -----
|
||||
|
||||
async def _fetch_thread_context(
|
||||
self, channel_id: str, thread_ts: str, current_ts: str,
|
||||
team_id: str = "", limit: int = 30,
|
||||
) -> str:
|
||||
"""Fetch recent thread messages to provide context when the bot is
|
||||
mentioned mid-thread for the first time.
|
||||
|
||||
This method is only called when there is NO active session for the
|
||||
thread (guarded at the call site by _has_active_session_for_thread).
|
||||
That guard ensures thread messages are prepended only on the very
|
||||
first turn — after that the session history already holds them, so
|
||||
there is no duplication across subsequent turns.
|
||||
|
||||
Results are cached for _THREAD_CACHE_TTL seconds per thread to avoid
|
||||
hammering conversations.replies (Tier 3, ~50 req/min).
|
||||
|
||||
Returns a formatted string with prior thread history, or empty string
|
||||
on failure or if the thread has no prior messages.
|
||||
"""
|
||||
cache_key = f"{channel_id}:{thread_ts}"
|
||||
now = time.monotonic()
|
||||
cached = self._thread_context_cache.get(cache_key)
|
||||
if cached and (now - cached.fetched_at) < self._THREAD_CACHE_TTL:
|
||||
return cached.content
|
||||
|
||||
try:
|
||||
client = self._get_client(channel_id)
|
||||
|
||||
# Retry with exponential backoff for Tier-3 rate limits (429).
|
||||
result = None
|
||||
for attempt in range(3):
|
||||
try:
|
||||
result = await client.conversations_replies(
|
||||
channel=channel_id,
|
||||
ts=thread_ts,
|
||||
limit=limit + 1, # +1 because it includes the current message
|
||||
inclusive=True,
|
||||
)
|
||||
break
|
||||
except Exception as exc:
|
||||
# Check for rate-limit error from slack_sdk
|
||||
err_str = str(exc).lower()
|
||||
is_rate_limit = (
|
||||
"ratelimited" in err_str
|
||||
or "429" in err_str
|
||||
or "rate_limited" in err_str
|
||||
)
|
||||
if is_rate_limit and attempt < 2:
|
||||
retry_after = 1.0 * (2 ** attempt) # 1s, 2s
|
||||
logger.warning(
|
||||
"[Slack] conversations.replies rate limited; retrying in %.1fs (attempt %d/3)",
|
||||
retry_after, attempt + 1,
|
||||
)
|
||||
await asyncio.sleep(retry_after)
|
||||
continue
|
||||
raise
|
||||
|
||||
if result is None:
|
||||
return ""
|
||||
|
||||
messages = result.get("messages", [])
|
||||
if not messages:
|
||||
return ""
|
||||
|
||||
bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
|
||||
context_parts = []
|
||||
for msg in messages:
|
||||
msg_ts = msg.get("ts", "")
|
||||
# Exclude the current triggering message — it will be delivered
|
||||
# as the user message itself, so including it here would duplicate it.
|
||||
if msg_ts == current_ts:
|
||||
continue
|
||||
# Exclude our own bot messages to avoid circular context.
|
||||
if msg.get("bot_id") or msg.get("subtype") == "bot_message":
|
||||
continue
|
||||
|
||||
msg_text = msg.get("text", "").strip()
|
||||
if not msg_text:
|
||||
continue
|
||||
|
||||
# Strip bot mentions from context messages
|
||||
if bot_uid:
|
||||
msg_text = msg_text.replace(f"<@{bot_uid}>", "").strip()
|
||||
|
||||
msg_user = msg.get("user", "unknown")
|
||||
is_parent = msg_ts == thread_ts
|
||||
prefix = "[thread parent] " if is_parent else ""
|
||||
name = await self._resolve_user_name(msg_user, chat_id=channel_id)
|
||||
context_parts.append(f"{prefix}{name}: {msg_text}")
|
||||
|
||||
content = ""
|
||||
if context_parts:
|
||||
content = (
|
||||
"[Thread context — prior messages in this thread (not yet in conversation history):]\n"
|
||||
+ "\n".join(context_parts)
|
||||
+ "\n[End of thread context]\n\n"
|
||||
)
|
||||
|
||||
self._thread_context_cache[cache_key] = _ThreadContextCache(
|
||||
content=content,
|
||||
fetched_at=now,
|
||||
message_count=len(context_parts),
|
||||
)
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("[Slack] Failed to fetch thread context: %s", e)
|
||||
return ""
|
||||
# Replace 👀 with ✅ when done
|
||||
await self._remove_reaction(channel_id, ts, "eyes")
|
||||
await self._add_reaction(channel_id, ts, "white_check_mark")
|
||||
|
||||
async def _handle_slash_command(self, command: dict) -> None:
|
||||
"""Handle /hermes slash command."""
|
||||
@@ -1544,53 +933,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
def _has_active_session_for_thread(
|
||||
self,
|
||||
channel_id: str,
|
||||
thread_ts: str,
|
||||
user_id: str,
|
||||
) -> bool:
|
||||
"""Check if there's an active session for a thread.
|
||||
|
||||
Used to determine if thread replies without @mentions should be
|
||||
processed (they should if there's an active session).
|
||||
|
||||
Uses ``build_session_key()`` as the single source of truth for key
|
||||
construction — avoids the bug where manual key building didn't
|
||||
respect ``thread_sessions_per_user`` and ``group_sessions_per_user``
|
||||
settings correctly.
|
||||
"""
|
||||
session_store = getattr(self, "_session_store", None)
|
||||
if not session_store:
|
||||
return False
|
||||
|
||||
try:
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.SLACK,
|
||||
chat_id=channel_id,
|
||||
chat_type="group",
|
||||
user_id=user_id,
|
||||
thread_id=thread_ts,
|
||||
)
|
||||
|
||||
# Read session isolation settings from the store's config
|
||||
store_cfg = getattr(session_store, "config", None)
|
||||
gspu = getattr(store_cfg, "group_sessions_per_user", True) if store_cfg else True
|
||||
tspu = getattr(store_cfg, "thread_sessions_per_user", False) if store_cfg else False
|
||||
|
||||
session_key = build_session_key(
|
||||
source,
|
||||
group_sessions_per_user=gspu,
|
||||
thread_sessions_per_user=tspu,
|
||||
)
|
||||
|
||||
session_store._ensure_loaded()
|
||||
return session_key in session_store._entries
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
|
||||
"""Download a Slack file using the bot token for auth, with retry."""
|
||||
import asyncio
|
||||
@@ -1608,18 +950,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Slack may return an HTML sign-in/redirect page
|
||||
# instead of actual media bytes (e.g. expired token,
|
||||
# restricted file access). Detect this early so we
|
||||
# don't cache bogus data and confuse downstream tools.
|
||||
ct = response.headers.get("content-type", "")
|
||||
if "text/html" in ct:
|
||||
raise ValueError(
|
||||
"Slack returned HTML instead of media "
|
||||
f"(content-type: {ct}); "
|
||||
"check bot token scopes and file permissions"
|
||||
)
|
||||
|
||||
if audio:
|
||||
from gateway.platforms.base import cache_audio_from_bytes
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
@@ -1666,30 +996,3 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
continue
|
||||
raise
|
||||
raise last_exc
|
||||
|
||||
# ── Channel mention gating ─────────────────────────────────────────────
|
||||
|
||||
def _slack_require_mention(self) -> bool:
|
||||
"""Return whether channel messages require an explicit bot mention.
|
||||
|
||||
Uses explicit-false parsing (like Discord/Matrix) rather than
|
||||
truthy parsing, since the safe default is True (gating on).
|
||||
Unrecognised or empty values keep gating enabled.
|
||||
"""
|
||||
configured = self.config.extra.get("require_mention")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() not in ("false", "0", "no", "off")
|
||||
return bool(configured)
|
||||
return os.getenv("SLACK_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
|
||||
|
||||
def _slack_free_response_channels(self) -> set:
|
||||
"""Return channel IDs where no @mention is required."""
|
||||
raw = self.config.extra.get("free_response_channels")
|
||||
if raw is None:
|
||||
raw = os.getenv("SLACK_FREE_RESPONSE_CHANNELS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return {part.strip() for part in raw.split(",") if part.strip()}
|
||||
return set()
|
||||
|
||||
+19
-570
@@ -60,7 +60,6 @@ from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
@@ -122,9 +121,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
# Telegram message limits
|
||||
MAX_MESSAGE_LENGTH = 4096
|
||||
# Threshold for detecting Telegram client-side message splits.
|
||||
# When a chunk is near this limit, a continuation is almost certain.
|
||||
_SPLIT_THRESHOLD = 4000
|
||||
MEDIA_GROUP_WAIT_SECONDS = 0.8
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
@@ -144,7 +140,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
# Buffer rapid text messages so Telegram client-side splits of long
|
||||
# messages are aggregated into a single MessageEvent.
|
||||
self._text_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_TEXT_BATCH_DELAY_SECONDS", "0.6"))
|
||||
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_TELEGRAM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
self._token_lock_identity: Optional[str] = None
|
||||
@@ -156,10 +151,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self._dm_topics: Dict[str, int] = {}
|
||||
# DM Topics config from extra.dm_topics
|
||||
self._dm_topics_config: List[Dict[str, Any]] = self.config.extra.get("dm_topics", [])
|
||||
# Interactive model picker state per chat
|
||||
self._model_picker_state: Dict[str, dict] = {}
|
||||
# Approval button state: message_id → session_key
|
||||
self._approval_state: Dict[int, str] = {}
|
||||
|
||||
def _fallback_ips(self) -> list[str]:
|
||||
"""Return validated fallback IPs from config (populated by _apply_env_overrides)."""
|
||||
@@ -518,45 +509,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
# Build the application
|
||||
builder = Application.builder().token(self.config.token)
|
||||
custom_base_url = self.config.extra.get("base_url")
|
||||
if custom_base_url:
|
||||
builder = builder.base_url(custom_base_url)
|
||||
builder = builder.base_file_url(
|
||||
self.config.extra.get("base_file_url", custom_base_url)
|
||||
)
|
||||
logger.info(
|
||||
"[%s] Using custom Telegram base_url: %s",
|
||||
self.name, custom_base_url,
|
||||
)
|
||||
|
||||
# PTB defaults (pool_timeout=1s) are too aggressive on flaky networks and
|
||||
# can trigger "Pool timeout: All connections in the connection pool are occupied"
|
||||
# during reconnect/bootstrap. Use safer defaults and allow env overrides.
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.getenv(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
try:
|
||||
return float(os.getenv(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
request_kwargs = {
|
||||
"connection_pool_size": _env_int("HERMES_TELEGRAM_HTTP_POOL_SIZE", 512),
|
||||
"pool_timeout": _env_float("HERMES_TELEGRAM_HTTP_POOL_TIMEOUT", 8.0),
|
||||
"connect_timeout": _env_float("HERMES_TELEGRAM_HTTP_CONNECT_TIMEOUT", 10.0),
|
||||
"read_timeout": _env_float("HERMES_TELEGRAM_HTTP_READ_TIMEOUT", 20.0),
|
||||
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
|
||||
}
|
||||
|
||||
proxy_configured = any(
|
||||
(os.getenv(k) or "").strip()
|
||||
for k in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy")
|
||||
)
|
||||
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
|
||||
fallback_ips = self._fallback_ips()
|
||||
if not fallback_ips:
|
||||
fallback_ips = await discover_fallback_ips()
|
||||
@@ -565,32 +517,16 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self.name,
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
|
||||
if fallback_ips and not proxy_configured and not disable_fallback:
|
||||
if fallback_ips:
|
||||
logger.info(
|
||||
"[%s] Telegram fallback IPs active: %s",
|
||||
self.name,
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
# Keep request/update pools separate to reduce contention during
|
||||
# polling reconnect + bot API bootstrap/delete_webhook calls.
|
||||
request = HTTPXRequest(
|
||||
**request_kwargs,
|
||||
httpx_kwargs={"transport": TelegramFallbackTransport(fallback_ips)},
|
||||
)
|
||||
get_updates_request = HTTPXRequest(
|
||||
**request_kwargs,
|
||||
httpx_kwargs={"transport": TelegramFallbackTransport(fallback_ips)},
|
||||
)
|
||||
else:
|
||||
if proxy_configured:
|
||||
logger.info("[%s] Proxy configured; skipping Telegram fallback-IP transport", self.name)
|
||||
elif disable_fallback:
|
||||
logger.info("[%s] Telegram fallback-IP transport disabled via env", self.name)
|
||||
request = HTTPXRequest(**request_kwargs)
|
||||
get_updates_request = HTTPXRequest(**request_kwargs)
|
||||
|
||||
builder = builder.request(request).get_updates_request(get_updates_request)
|
||||
transport = TelegramFallbackTransport(fallback_ips)
|
||||
request = HTTPXRequest(httpx_kwargs={"transport": transport})
|
||||
get_updates_request = HTTPXRequest(httpx_kwargs={"transport": transport})
|
||||
builder = builder.request(request).get_updates_request(get_updates_request)
|
||||
self._app = builder.build()
|
||||
self._bot = self._app.bot
|
||||
|
||||
@@ -1072,441 +1008,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
logger.warning("[%s] send_update_prompt failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_exec_approval(
|
||||
self, chat_id: str, command: str, session_key: str,
|
||||
description: str = "dangerous command",
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send an inline-keyboard approval prompt with interactive buttons.
|
||||
|
||||
The buttons call ``resolve_gateway_approval()`` to unblock the waiting
|
||||
agent thread — same mechanism as the text ``/approve`` flow.
|
||||
"""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
|
||||
text = (
|
||||
f"⚠️ *Command Approval Required*\n\n"
|
||||
f"`{cmd_preview}`\n\n"
|
||||
f"Reason: {description}"
|
||||
)
|
||||
|
||||
# Resolve thread context for thread replies
|
||||
thread_id = None
|
||||
if metadata:
|
||||
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
|
||||
|
||||
# We'll use the message_id as part of callback_data to look up session_key
|
||||
# Send a placeholder first, then update — or use a counter.
|
||||
# Simpler: use a monotonic counter to generate short IDs.
|
||||
import itertools
|
||||
if not hasattr(self, "_approval_counter"):
|
||||
self._approval_counter = itertools.count(1)
|
||||
approval_id = next(self._approval_counter)
|
||||
|
||||
keyboard = InlineKeyboardMarkup([
|
||||
[
|
||||
InlineKeyboardButton("✅ Allow Once", callback_data=f"ea:once:{approval_id}"),
|
||||
InlineKeyboardButton("✅ Session", callback_data=f"ea:session:{approval_id}"),
|
||||
],
|
||||
[
|
||||
InlineKeyboardButton("✅ Always", callback_data=f"ea:always:{approval_id}"),
|
||||
InlineKeyboardButton("❌ Deny", callback_data=f"ea:deny:{approval_id}"),
|
||||
],
|
||||
])
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"chat_id": int(chat_id),
|
||||
"text": text,
|
||||
"parse_mode": ParseMode.MARKDOWN,
|
||||
"reply_markup": keyboard,
|
||||
}
|
||||
if thread_id:
|
||||
kwargs["message_thread_id"] = int(thread_id)
|
||||
|
||||
msg = await self._bot.send_message(**kwargs)
|
||||
|
||||
# Store session_key keyed by approval_id for the callback handler
|
||||
self._approval_state[approval_id] = session_key
|
||||
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_model_picker(
|
||||
self,
|
||||
chat_id: str,
|
||||
providers: list,
|
||||
current_model: str,
|
||||
current_provider: str,
|
||||
session_key: str,
|
||||
on_model_selected,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send an interactive inline-keyboard model picker.
|
||||
|
||||
Two-step drill-down: provider selection → model selection.
|
||||
Edits the same message in-place as the user navigates.
|
||||
"""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
from hermes_cli.providers import get_label
|
||||
except ImportError:
|
||||
def get_label(slug):
|
||||
return slug
|
||||
|
||||
try:
|
||||
# Build provider buttons — 2 per row
|
||||
buttons: list = []
|
||||
for p in providers:
|
||||
count = p.get("total_models", len(p.get("models", [])))
|
||||
label = f"{p['name']} ({count})"
|
||||
if p.get("is_current"):
|
||||
label = f"✓ {label}"
|
||||
# Compact callback data: mp:<slug> (max 64 bytes)
|
||||
buttons.append(
|
||||
InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
|
||||
)
|
||||
|
||||
rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
|
||||
rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
|
||||
keyboard = InlineKeyboardMarkup(rows)
|
||||
|
||||
provider_label = get_label(current_provider)
|
||||
text = (
|
||||
f"⚙ *Model Configuration*\n\n"
|
||||
f"Current model: `{current_model or 'unknown'}`\n"
|
||||
f"Provider: {provider_label}\n\n"
|
||||
f"Select a provider:"
|
||||
)
|
||||
|
||||
thread_id = metadata.get("thread_id") if metadata else None
|
||||
msg = await self._bot.send_message(
|
||||
chat_id=int(chat_id),
|
||||
text=text,
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=keyboard,
|
||||
message_thread_id=int(thread_id) if thread_id else None,
|
||||
)
|
||||
|
||||
# Store picker state keyed by chat_id
|
||||
self._model_picker_state[str(chat_id)] = {
|
||||
"msg_id": msg.message_id,
|
||||
"providers": providers,
|
||||
"session_key": session_key,
|
||||
"on_model_selected": on_model_selected,
|
||||
"current_model": current_model,
|
||||
"current_provider": current_provider,
|
||||
}
|
||||
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.warning("[%s] send_model_picker failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
_MODEL_PAGE_SIZE = 8
|
||||
|
||||
def _build_model_keyboard(self, models: list, page: int) -> tuple:
|
||||
"""Build paginated model buttons. Returns (keyboard, page_info_text)."""
|
||||
page_size = self._MODEL_PAGE_SIZE
|
||||
total = len(models)
|
||||
total_pages = max(1, (total + page_size - 1) // page_size)
|
||||
page = max(0, min(page, total_pages - 1))
|
||||
|
||||
start = page * page_size
|
||||
end = min(start + page_size, total)
|
||||
page_models = models[start:end]
|
||||
|
||||
buttons: list = []
|
||||
for i, model_id in enumerate(page_models):
|
||||
abs_idx = start + i
|
||||
short = model_id.split("/")[-1] if "/" in model_id else model_id
|
||||
if len(short) > 38:
|
||||
short = short[:35] + "..."
|
||||
buttons.append(
|
||||
InlineKeyboardButton(short, callback_data=f"mm:{abs_idx}")
|
||||
)
|
||||
|
||||
rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
|
||||
|
||||
# Pagination row (if needed)
|
||||
if total_pages > 1:
|
||||
nav: list = []
|
||||
if page > 0:
|
||||
nav.append(InlineKeyboardButton("◀ Prev", callback_data=f"mg:{page - 1}"))
|
||||
nav.append(InlineKeyboardButton(f"{page + 1}/{total_pages}", callback_data="mx:noop"))
|
||||
if page < total_pages - 1:
|
||||
nav.append(InlineKeyboardButton("Next ▶", callback_data=f"mg:{page + 1}"))
|
||||
rows.append(nav)
|
||||
|
||||
rows.append([
|
||||
InlineKeyboardButton("◀ Back", callback_data="mb"),
|
||||
InlineKeyboardButton("✗ Cancel", callback_data="mx"),
|
||||
])
|
||||
|
||||
page_info = f" ({start + 1}–{end} of {total})" if total_pages > 1 else ""
|
||||
return InlineKeyboardMarkup(rows), page_info
|
||||
|
||||
async def _handle_model_picker_callback(
|
||||
self, query, data: str, chat_id: str
|
||||
) -> None:
|
||||
"""Handle model picker inline keyboard callbacks (mp:/mm:/mb:/mx:/mg:)."""
|
||||
state = self._model_picker_state.get(chat_id)
|
||||
if not state:
|
||||
await query.answer(text="Picker expired — use /model again.")
|
||||
return
|
||||
|
||||
try:
|
||||
from hermes_cli.providers import get_label
|
||||
except ImportError:
|
||||
def get_label(slug):
|
||||
return slug
|
||||
|
||||
if data.startswith("mp:"):
|
||||
# --- Provider selected: show model buttons (page 0) ---
|
||||
provider_slug = data[3:]
|
||||
provider = next(
|
||||
(p for p in state["providers"] if p["slug"] == provider_slug),
|
||||
None,
|
||||
)
|
||||
if not provider:
|
||||
await query.answer(text="Provider not found.")
|
||||
return
|
||||
|
||||
models = provider.get("models", [])
|
||||
state["selected_provider"] = provider_slug
|
||||
state["selected_provider_name"] = provider.get("name", provider_slug)
|
||||
state["model_list"] = models
|
||||
state["model_page"] = 0
|
||||
|
||||
keyboard, page_info = self._build_model_keyboard(models, 0)
|
||||
|
||||
pname = provider.get("name", provider_slug)
|
||||
total = provider.get("total_models", len(models))
|
||||
shown = len(models)
|
||||
extra = f"\n_{total - shown} more available — type `/model <name>` directly_" if total > shown else ""
|
||||
|
||||
await query.edit_message_text(
|
||||
text=(
|
||||
f"⚙ *Model Configuration*\n\n"
|
||||
f"Provider: *{pname}*{page_info}\n"
|
||||
f"Select a model:{extra}"
|
||||
),
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=keyboard,
|
||||
)
|
||||
await query.answer()
|
||||
|
||||
elif data.startswith("mg:"):
|
||||
# --- Page navigation ---
|
||||
try:
|
||||
page = int(data[3:])
|
||||
except ValueError:
|
||||
await query.answer(text="Invalid page.")
|
||||
return
|
||||
|
||||
models = state.get("model_list", [])
|
||||
state["model_page"] = page
|
||||
|
||||
keyboard, page_info = self._build_model_keyboard(models, page)
|
||||
|
||||
pname = state.get("selected_provider_name", "")
|
||||
provider_slug = state.get("selected_provider", "")
|
||||
provider = next(
|
||||
(p for p in state["providers"] if p["slug"] == provider_slug),
|
||||
None,
|
||||
)
|
||||
total = provider.get("total_models", len(models)) if provider else len(models)
|
||||
shown = len(models)
|
||||
extra = f"\n_{total - shown} more available — type `/model <name>` directly_" if total > shown else ""
|
||||
|
||||
await query.edit_message_text(
|
||||
text=(
|
||||
f"⚙ *Model Configuration*\n\n"
|
||||
f"Provider: *{pname}*{page_info}\n"
|
||||
f"Select a model:{extra}"
|
||||
),
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=keyboard,
|
||||
)
|
||||
await query.answer()
|
||||
|
||||
elif data.startswith("mm:"):
|
||||
# --- Model selected: perform the switch ---
|
||||
try:
|
||||
idx = int(data[3:])
|
||||
except ValueError:
|
||||
await query.answer(text="Invalid selection.")
|
||||
return
|
||||
|
||||
model_list = state.get("model_list", [])
|
||||
if idx < 0 or idx >= len(model_list):
|
||||
await query.answer(text="Invalid model index.")
|
||||
return
|
||||
|
||||
model_id = model_list[idx]
|
||||
provider_slug = state.get("selected_provider", "")
|
||||
callback = state.get("on_model_selected")
|
||||
|
||||
if not callback:
|
||||
await query.answer(text="Picker expired.")
|
||||
return
|
||||
|
||||
try:
|
||||
result_text = await callback(chat_id, model_id, provider_slug)
|
||||
except Exception as exc:
|
||||
logger.error("Model picker switch failed: %s", exc)
|
||||
result_text = f"Error switching model: {exc}"
|
||||
|
||||
# Edit message to show confirmation, remove buttons
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=result_text,
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
# Markdown parse failure — retry as plain text
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=result_text,
|
||||
parse_mode=None,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
await query.answer(text="Model switched!")
|
||||
|
||||
# Clean up state
|
||||
self._model_picker_state.pop(chat_id, None)
|
||||
|
||||
elif data == "mb":
|
||||
# --- Back to provider list ---
|
||||
buttons = []
|
||||
for p in state["providers"]:
|
||||
count = p.get("total_models", len(p.get("models", [])))
|
||||
label = f"{p['name']} ({count})"
|
||||
if p.get("is_current"):
|
||||
label = f"✓ {label}"
|
||||
buttons.append(
|
||||
InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
|
||||
)
|
||||
|
||||
rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
|
||||
rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
|
||||
keyboard = InlineKeyboardMarkup(rows)
|
||||
|
||||
try:
|
||||
provider_label = get_label(state["current_provider"])
|
||||
except Exception:
|
||||
provider_label = state["current_provider"]
|
||||
|
||||
await query.edit_message_text(
|
||||
text=(
|
||||
f"⚙ *Model Configuration*\n\n"
|
||||
f"Current model: `{state['current_model'] or 'unknown'}`\n"
|
||||
f"Provider: {provider_label}\n\n"
|
||||
f"Select a provider:"
|
||||
),
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=keyboard,
|
||||
)
|
||||
await query.answer()
|
||||
|
||||
elif data == "mx":
|
||||
# --- Cancel ---
|
||||
self._model_picker_state.pop(chat_id, None)
|
||||
await query.edit_message_text(
|
||||
text="Model selection cancelled.",
|
||||
reply_markup=None,
|
||||
)
|
||||
await query.answer()
|
||||
|
||||
else:
|
||||
# Catch-all (e.g. page counter button "mx:noop")
|
||||
await query.answer()
|
||||
|
||||
async def _handle_callback_query(
|
||||
self, update: "Update", context: "ContextTypes.DEFAULT_TYPE"
|
||||
) -> None:
|
||||
"""Handle inline keyboard button clicks."""
|
||||
"""Handle inline keyboard button clicks (update prompts)."""
|
||||
query = update.callback_query
|
||||
if not query or not query.data:
|
||||
return
|
||||
data = query.data
|
||||
|
||||
# --- Model picker callbacks ---
|
||||
if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
|
||||
chat_id = str(query.message.chat_id) if query.message else None
|
||||
if chat_id:
|
||||
await self._handle_model_picker_callback(query, data, chat_id)
|
||||
return
|
||||
|
||||
# --- Exec approval callbacks (ea:choice:id) ---
|
||||
if data.startswith("ea:"):
|
||||
parts = data.split(":", 2)
|
||||
if len(parts) == 3:
|
||||
choice = parts[1] # once, session, always, deny
|
||||
try:
|
||||
approval_id = int(parts[2])
|
||||
except (ValueError, IndexError):
|
||||
await query.answer(text="Invalid approval data.")
|
||||
return
|
||||
|
||||
# Only authorized users may click approval buttons.
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
|
||||
if allowed_csv:
|
||||
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
|
||||
if "*" not in allowed_ids and caller_id not in allowed_ids:
|
||||
await query.answer(text="⛔ You are not authorized to approve commands.")
|
||||
return
|
||||
|
||||
session_key = self._approval_state.pop(approval_id, None)
|
||||
if not session_key:
|
||||
await query.answer(text="This approval has already been resolved.")
|
||||
return
|
||||
|
||||
# Map choice to human-readable label
|
||||
label_map = {
|
||||
"once": "✅ Approved once",
|
||||
"session": "✅ Approved for session",
|
||||
"always": "✅ Approved permanently",
|
||||
"deny": "❌ Denied",
|
||||
}
|
||||
user_display = getattr(query.from_user, "first_name", "User")
|
||||
label = label_map.get(choice, "Resolved")
|
||||
|
||||
await query.answer(text=label)
|
||||
|
||||
# Edit message to show decision, remove buttons
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=f"{label} by {user_display}",
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
pass # non-fatal if edit fails
|
||||
|
||||
# Resolve the approval — unblocks the agent thread
|
||||
try:
|
||||
from tools.approval import resolve_gateway_approval
|
||||
count = resolve_gateway_approval(session_key, choice)
|
||||
logger.info(
|
||||
"Telegram button resolved %d approval(s) for session %s (choice=%s, user=%s)",
|
||||
count, session_key, choice, user_display,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
|
||||
return
|
||||
|
||||
# --- Update prompt callbacks ---
|
||||
if not data.startswith("update_prompt:"):
|
||||
return
|
||||
answer = data.split(":", 1)[1] # "y" or "n"
|
||||
@@ -1554,7 +1063,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
with open(audio_path, "rb") as audio_file:
|
||||
# .ogg files -> send as voice (round playable bubble)
|
||||
if audio_path.endswith((".ogg", ".opus")):
|
||||
if audio_path.endswith(".ogg") or audio_path.endswith(".opus"):
|
||||
_voice_thread = metadata.get("thread_id") if metadata else None
|
||||
msg = await self._bot.send_voice(
|
||||
chat_id=int(chat_id),
|
||||
@@ -1701,12 +1210,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(image_url):
|
||||
logger.warning("[%s] Blocked unsafe image URL (SSRF protection)", self.name)
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
|
||||
|
||||
|
||||
try:
|
||||
# Telegram can send photos directly from URLs (up to ~5MB)
|
||||
_photo_thread = metadata.get("thread_id") if metadata else None
|
||||
@@ -2220,15 +1724,12 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
key = self._text_batch_key(event)
|
||||
existing = self._pending_text_batches.get(key)
|
||||
chunk_len = len(event.text or "")
|
||||
if existing is None:
|
||||
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
self._pending_text_batches[key] = event
|
||||
else:
|
||||
# Append text from the follow-up chunk
|
||||
if event.text:
|
||||
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
|
||||
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
# Merge any media that might be attached
|
||||
if event.media_urls:
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
@@ -2243,22 +1744,10 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
|
||||
async def _flush_text_batch(self, key: str) -> None:
|
||||
"""Wait for the quiet period then dispatch the aggregated text.
|
||||
|
||||
Uses a longer delay when the latest chunk is near Telegram's 4096-char
|
||||
split point, since a continuation chunk is almost certain.
|
||||
"""
|
||||
"""Wait for the quiet period then dispatch the aggregated text."""
|
||||
current_task = asyncio.current_task()
|
||||
try:
|
||||
# Adaptive delay: if the latest chunk is near Telegram's 4096-char
|
||||
# split point, a continuation is almost certain — wait longer.
|
||||
pending = self._pending_text_batches.get(key)
|
||||
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
|
||||
if last_len >= self._SPLIT_THRESHOLD:
|
||||
delay = self._text_batch_split_delay_seconds
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
await asyncio.sleep(self._text_batch_delay_seconds)
|
||||
event = self._pending_text_batches.pop(key, None)
|
||||
if not event:
|
||||
return
|
||||
@@ -2311,7 +1800,10 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
if event.text:
|
||||
existing.text = self._merge_caption(existing.text, event.text)
|
||||
if not existing.text:
|
||||
existing.text = event.text
|
||||
elif event.text not in existing.text:
|
||||
existing.text = f"{existing.text}\n\n{event.text}".strip()
|
||||
|
||||
prior_task = self._pending_photo_batch_tasks.get(batch_key)
|
||||
if prior_task and not prior_task.done():
|
||||
@@ -2501,7 +1993,11 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
if event.text:
|
||||
existing.text = self._merge_caption(existing.text, event.text)
|
||||
if existing.text:
|
||||
if event.text not in existing.text.split("\n\n"):
|
||||
existing.text = f"{existing.text}\n\n{event.text}"
|
||||
else:
|
||||
existing.text = event.text
|
||||
|
||||
prior_task = self._media_group_tasks.get(media_group_id)
|
||||
if prior_task:
|
||||
@@ -2757,50 +2253,3 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
auto_skill=topic_skill,
|
||||
timestamp=message.date,
|
||||
)
|
||||
|
||||
# ── Message reactions (processing lifecycle) ──────────────────────────
|
||||
|
||||
def _reactions_enabled(self) -> bool:
|
||||
"""Check if message reactions are enabled via config/env."""
|
||||
return os.getenv("TELEGRAM_REACTIONS", "false").lower() not in ("false", "0", "no")
|
||||
|
||||
async def _set_reaction(self, chat_id: str, message_id: str, emoji: str) -> bool:
|
||||
"""Set a single emoji reaction on a Telegram message."""
|
||||
if not self._bot:
|
||||
return False
|
||||
try:
|
||||
await self._bot.set_message_reaction(
|
||||
chat_id=int(chat_id),
|
||||
message_id=int(message_id),
|
||||
reaction=emoji,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("[%s] set_message_reaction failed (%s): %s", self.name, emoji, e)
|
||||
return False
|
||||
|
||||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""Add an in-progress reaction when message processing begins."""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
chat_id = getattr(event.source, "chat_id", None)
|
||||
message_id = getattr(event, "message_id", None)
|
||||
if chat_id and message_id:
|
||||
await self._set_reaction(chat_id, message_id, "\U0001f440")
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
|
||||
"""Swap the in-progress reaction for a final success/failure reaction.
|
||||
|
||||
Unlike Discord (additive reactions), Telegram's set_message_reaction
|
||||
replaces all existing reactions in one call — no remove step needed.
|
||||
"""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
chat_id = getattr(event.source, "chat_id", None)
|
||||
message_id = getattr(event, "message_id", None)
|
||||
if chat_id and message_id and outcome != ProcessingOutcome.CANCELLED:
|
||||
await self._set_reaction(
|
||||
chat_id,
|
||||
message_id,
|
||||
"\U0001f44d" if outcome == ProcessingOutcome.SUCCESS else "\U0001f44e",
|
||||
)
|
||||
|
||||
@@ -45,9 +45,11 @@ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
|
||||
|
||||
|
||||
def _resolve_proxy_url() -> str | None:
|
||||
# Delegate to shared implementation (env vars + macOS system proxy detection)
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
return resolve_proxy_url()
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
@@ -110,8 +112,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
logger.warning("[Telegram] Fallback IP %s failed: %s", ip, exc)
|
||||
continue
|
||||
|
||||
if last_error is None:
|
||||
raise RuntimeError("All Telegram fallback IPs exhausted but no error was recorded")
|
||||
assert last_error is not None
|
||||
raise last_error
|
||||
|
||||
async def aclose(self) -> None:
|
||||
|
||||
@@ -76,17 +76,8 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
self._routes: Dict[str, dict] = dict(self._static_routes)
|
||||
self._runner = None
|
||||
|
||||
# Delivery info keyed by session chat_id.
|
||||
#
|
||||
# Read by every send() invocation for the chat_id (status messages
|
||||
# AND the final response). Cleaned up via TTL on each POST so the
|
||||
# dict stays bounded — see _prune_delivery_info(). Do NOT pop on
|
||||
# send(), or interim status messages (e.g. fallback notifications,
|
||||
# context-pressure warnings) will consume the entry before the
|
||||
# final response arrives, causing the response to silently fall
|
||||
# back to the "log" deliver type.
|
||||
# Delivery info keyed by session chat_id — consumed by send()
|
||||
self._delivery_info: Dict[str, dict] = {}
|
||||
self._delivery_info_created: Dict[str, float] = {}
|
||||
|
||||
# Reference to gateway runner for cross-platform delivery (set externally)
|
||||
self.gateway_runner = None
|
||||
@@ -169,14 +160,10 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
) -> SendResult:
|
||||
"""Deliver the agent's response to the configured destination.
|
||||
|
||||
chat_id is ``webhook:{route}:{delivery_id}``. The delivery info
|
||||
stored during webhook receipt is read with ``.get()`` (not popped)
|
||||
so that interim status messages emitted before the final response
|
||||
— fallback-model notifications, context-pressure warnings, etc. —
|
||||
do not consume the entry and silently downgrade the final response
|
||||
to the ``log`` deliver type. TTL cleanup happens on POST.
|
||||
chat_id is ``webhook:{route}:{delivery_id}`` — we pop the delivery
|
||||
info stored during webhook receipt so it doesn't leak memory.
|
||||
"""
|
||||
delivery = self._delivery_info.get(chat_id, {})
|
||||
delivery = self._delivery_info.pop(chat_id, {})
|
||||
deliver_type = delivery.get("deliver", "log")
|
||||
|
||||
if deliver_type == "log":
|
||||
@@ -186,23 +173,13 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
if deliver_type == "github_comment":
|
||||
return await self._deliver_github_comment(content, delivery)
|
||||
|
||||
# Cross-platform delivery — any platform with a gateway adapter
|
||||
# Cross-platform delivery (telegram, discord, etc.)
|
||||
if self.gateway_runner and deliver_type in (
|
||||
"telegram",
|
||||
"discord",
|
||||
"slack",
|
||||
"signal",
|
||||
"sms",
|
||||
"whatsapp",
|
||||
"matrix",
|
||||
"mattermost",
|
||||
"homeassistant",
|
||||
"email",
|
||||
"dingtalk",
|
||||
"feishu",
|
||||
"wecom",
|
||||
"weixin",
|
||||
"bluebubbles",
|
||||
):
|
||||
return await self._deliver_cross_platform(
|
||||
deliver_type, content, delivery
|
||||
@@ -213,23 +190,6 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
success=False, error=f"Unknown deliver type: {deliver_type}"
|
||||
)
|
||||
|
||||
def _prune_delivery_info(self, now: float) -> None:
|
||||
"""Drop delivery_info entries older than the idempotency TTL.
|
||||
|
||||
Mirrors the cleanup pattern used for ``_seen_deliveries``. Called
|
||||
on each POST so the dict size is bounded by ``rate_limit * TTL``
|
||||
even if many webhooks fire and never receive a final response.
|
||||
"""
|
||||
cutoff = now - self._idempotency_ttl
|
||||
stale = [
|
||||
k
|
||||
for k, t in self._delivery_info_created.items()
|
||||
if t < cutoff
|
||||
]
|
||||
for k in stale:
|
||||
self._delivery_info.pop(k, None)
|
||||
self._delivery_info_created.pop(k, None)
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
return {"name": chat_id, "type": "webhook"}
|
||||
|
||||
@@ -243,8 +203,10 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
|
||||
def _reload_dynamic_routes(self) -> None:
|
||||
"""Reload agent-created subscriptions from disk if the file changed."""
|
||||
from hermes_constants import get_hermes_home
|
||||
hermes_home = get_hermes_home()
|
||||
from pathlib import Path as _Path
|
||||
hermes_home = _Path(
|
||||
os.getenv("HERMES_HOME", str(_Path.home() / ".hermes"))
|
||||
).expanduser()
|
||||
subs_path = hermes_home / _DYNAMIC_ROUTES_FILENAME
|
||||
if not subs_path.exists():
|
||||
if self._dynamic_routes:
|
||||
@@ -272,7 +234,7 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
", ".join(self._dynamic_routes.keys()) or "(none)",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("[webhook] Failed to reload dynamic routes: %s", e)
|
||||
logger.warning("[webhook] Failed to reload dynamic routes: %s", e)
|
||||
|
||||
async def _handle_webhook(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /webhooks/{route_name} — receive and process a webhook event."""
|
||||
@@ -422,9 +384,7 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
# same route get independent agent runs (not queued/interrupted).
|
||||
session_chat_id = f"webhook:{route_name}:{delivery_id}"
|
||||
|
||||
# Store delivery info for send(). Read by every send() invocation
|
||||
# for this chat_id (interim status messages and the final response),
|
||||
# so we do NOT pop on send. TTL-based cleanup keeps the dict bounded.
|
||||
# Store delivery info for send() — consumed (popped) on delivery
|
||||
deliver_config = {
|
||||
"deliver": route_config.get("deliver", "log"),
|
||||
"deliver_extra": self._render_delivery_extra(
|
||||
@@ -433,8 +393,6 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
"payload": payload,
|
||||
}
|
||||
self._delivery_info[session_chat_id] = deliver_config
|
||||
self._delivery_info_created[session_chat_id] = now
|
||||
self._prune_delivery_info(now)
|
||||
|
||||
# Build source and event
|
||||
source = self.build_source(
|
||||
|
||||
+5
-102
@@ -143,9 +143,6 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
"""WeCom AI Bot adapter backed by a persistent WebSocket connection."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
|
||||
# Threshold for detecting WeCom client-side message splits.
|
||||
# When a chunk is near the 4000-char limit, a continuation is almost certain.
|
||||
_SPLIT_THRESHOLD = 3900
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.WECOM)
|
||||
@@ -175,13 +172,6 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
self._seen_messages: Dict[str, float] = {}
|
||||
self._reply_req_ids: Dict[str, str] = {}
|
||||
|
||||
# Text batching: merge rapid successive messages (Telegram-style).
|
||||
# WeCom clients split long messages around 4000 chars.
|
||||
self._text_batch_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_DELAY_SECONDS", "0.6"))
|
||||
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Connection lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
@@ -529,82 +519,7 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
timestamp=datetime.now(tz=timezone.utc),
|
||||
)
|
||||
|
||||
# Only batch plain text messages — commands, media, etc. dispatch
|
||||
# immediately since they won't be split by the WeCom client.
|
||||
if message_type == MessageType.TEXT and self._text_batch_delay_seconds > 0:
|
||||
self._enqueue_text_event(event)
|
||||
else:
|
||||
await self.handle_message(event)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text message aggregation (handles WeCom client-side splits)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _text_batch_key(self, event: MessageEvent) -> str:
|
||||
"""Session-scoped key for text message batching."""
|
||||
from gateway.session import build_session_key
|
||||
return build_session_key(
|
||||
event.source,
|
||||
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
|
||||
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
|
||||
)
|
||||
|
||||
def _enqueue_text_event(self, event: MessageEvent) -> None:
|
||||
"""Buffer a text event and reset the flush timer.
|
||||
|
||||
When WeCom splits a long user message at 4000 chars, the chunks
|
||||
arrive within a few hundred milliseconds. This merges them into
|
||||
a single event before dispatching.
|
||||
"""
|
||||
key = self._text_batch_key(event)
|
||||
existing = self._pending_text_batches.get(key)
|
||||
chunk_len = len(event.text or "")
|
||||
if existing is None:
|
||||
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
self._pending_text_batches[key] = event
|
||||
else:
|
||||
if event.text:
|
||||
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
|
||||
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
# Merge any media that might be attached
|
||||
if event.media_urls:
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
|
||||
# Cancel any pending flush and restart the timer
|
||||
prior_task = self._pending_text_batch_tasks.get(key)
|
||||
if prior_task and not prior_task.done():
|
||||
prior_task.cancel()
|
||||
self._pending_text_batch_tasks[key] = asyncio.create_task(
|
||||
self._flush_text_batch(key)
|
||||
)
|
||||
|
||||
async def _flush_text_batch(self, key: str) -> None:
|
||||
"""Wait for the quiet period then dispatch the aggregated text.
|
||||
|
||||
Uses a longer delay when the latest chunk is near WeCom's 4000-char
|
||||
split point, since a continuation chunk is almost certain.
|
||||
"""
|
||||
current_task = asyncio.current_task()
|
||||
try:
|
||||
pending = self._pending_text_batches.get(key)
|
||||
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
|
||||
if last_len >= self._SPLIT_THRESHOLD:
|
||||
delay = self._text_batch_split_delay_seconds
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
event = self._pending_text_batches.pop(key, None)
|
||||
if not event:
|
||||
return
|
||||
logger.info(
|
||||
"[WeCom] Flushing text batch %s (%d chars)",
|
||||
key, len(event.text or ""),
|
||||
)
|
||||
await self.handle_message(event)
|
||||
finally:
|
||||
if self._pending_text_batch_tasks.get(key) is current_task:
|
||||
self._pending_text_batch_tasks.pop(key, None)
|
||||
await self.handle_message(event)
|
||||
|
||||
@staticmethod
|
||||
def _extract_text(body: Dict[str, Any]) -> Tuple[str, Optional[str]]:
|
||||
@@ -696,11 +611,7 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
|
||||
if kind == "image":
|
||||
ext = self._detect_image_ext(raw)
|
||||
try:
|
||||
return cache_image_from_bytes(raw, ext), self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
except ValueError as exc:
|
||||
logger.warning("[%s] Rejected non-image bytes: %s", self.name, exc)
|
||||
return None
|
||||
return cache_image_from_bytes(raw, ext), self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
|
||||
filename = str(media.get("filename") or media.get("name") or "wecom_file")
|
||||
return cache_document_from_bytes(raw, filename), mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
||||
@@ -726,11 +637,7 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
content_type = str(headers.get("content-type") or "").split(";", 1)[0].strip() or "application/octet-stream"
|
||||
if kind == "image":
|
||||
ext = self._guess_extension(url, content_type, fallback=self._detect_image_ext(raw))
|
||||
try:
|
||||
return cache_image_from_bytes(raw, ext), content_type or self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
except ValueError as exc:
|
||||
logger.warning("[%s] Rejected non-image bytes from %s: %s", self.name, url, exc)
|
||||
return None
|
||||
return cache_image_from_bytes(raw, ext), content_type or self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
|
||||
filename = self._guess_filename(url, headers.get("content-disposition"), content_type)
|
||||
return cache_document_from_bytes(raw, filename), content_type
|
||||
@@ -746,7 +653,7 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
return ".png"
|
||||
if data.startswith(b"\xff\xd8\xff"):
|
||||
return ".jpg"
|
||||
if data.startswith((b"GIF87a", b"GIF89a")):
|
||||
if data.startswith(b"GIF87a") or data.startswith(b"GIF89a"):
|
||||
return ".gif"
|
||||
if data.startswith(b"RIFF") and data[8:12] == b"WEBP":
|
||||
return ".webp"
|
||||
@@ -782,7 +689,7 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
@staticmethod
|
||||
def _derive_message_type(body: Dict[str, Any], text: str, media_types: List[str]) -> MessageType:
|
||||
"""Choose the normalized inbound message type."""
|
||||
if any(mtype.startswith(("application/", "text/")) for mtype in media_types):
|
||||
if any(mtype.startswith("application/") or mtype.startswith("text/") for mtype in media_types):
|
||||
return MessageType.DOCUMENT
|
||||
if any(mtype.startswith("image/") for mtype in media_types):
|
||||
return MessageType.TEXT if text else MessageType.PHOTO
|
||||
@@ -1003,10 +910,6 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
url: str,
|
||||
max_bytes: int,
|
||||
) -> Tuple[bytes, Dict[str, str]]:
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(url):
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {url[:80]}")
|
||||
|
||||
if not HTTPX_AVAILABLE:
|
||||
raise RuntimeError("httpx is required for WeCom media download")
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,6 +27,7 @@ _IS_WINDOWS = platform.system() == "Windows"
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from hermes_constants import get_hermes_dir
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
"""Shared gateway restart constants and parsing helpers."""
|
||||
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
# EX_TEMPFAIL from sysexits.h — used to ask the service manager to restart
|
||||
# the gateway after a graceful drain/reload path completes.
|
||||
GATEWAY_SERVICE_RESTART_EXIT_CODE = 75
|
||||
|
||||
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT = float(
|
||||
DEFAULT_CONFIG["agent"]["restart_drain_timeout"]
|
||||
)
|
||||
|
||||
|
||||
def parse_restart_drain_timeout(raw: object) -> float:
|
||||
"""Parse a configured drain timeout, falling back to the shared default."""
|
||||
try:
|
||||
value = float(raw) if str(raw or "").strip() else DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
except (TypeError, ValueError):
|
||||
return DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
return max(0.0, value)
|
||||
+246
-1302
File diff suppressed because it is too large
Load Diff
+53
-2
@@ -32,6 +32,9 @@ def _now() -> datetime:
|
||||
# PII redaction helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PHONE_RE = re.compile(r"^\+?\d[\d\-\s]{6,}$")
|
||||
|
||||
|
||||
def _hash_id(value: str) -> str:
|
||||
"""Deterministic 12-char hex hash of an identifier."""
|
||||
return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
|
||||
@@ -55,6 +58,10 @@ def _hash_chat_id(value: str) -> str:
|
||||
return _hash_id(value)
|
||||
|
||||
|
||||
def _looks_like_phone(value: str) -> bool:
|
||||
"""Return True if *value* looks like a phone number (E.164 or similar)."""
|
||||
return bool(_PHONE_RE.match(value.strip()))
|
||||
|
||||
from .config import (
|
||||
Platform,
|
||||
GatewayConfig,
|
||||
@@ -137,6 +144,15 @@ class SessionSource:
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def local_cli(cls) -> "SessionSource":
|
||||
"""Create a source representing the local CLI."""
|
||||
return cls(
|
||||
platform=Platform.LOCAL,
|
||||
chat_id="cli",
|
||||
chat_name="CLI terminal",
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -177,7 +193,6 @@ _PII_SAFE_PLATFORMS = frozenset({
|
||||
Platform.WHATSAPP,
|
||||
Platform.SIGNAL,
|
||||
Platform.TELEGRAM,
|
||||
Platform.BLUEBUBBLES,
|
||||
})
|
||||
"""Platforms where user IDs can be safely redacted (no in-message mention system
|
||||
that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
|
||||
@@ -494,7 +509,8 @@ class SessionStore:
|
||||
"""
|
||||
|
||||
def __init__(self, sessions_dir: Path, config: GatewayConfig,
|
||||
has_active_processes_fn=None):
|
||||
has_active_processes_fn=None,
|
||||
on_auto_reset=None):
|
||||
self.sessions_dir = sessions_dir
|
||||
self.config = config
|
||||
self._entries: Dict[str, SessionEntry] = {}
|
||||
@@ -753,6 +769,41 @@ class SessionStore:
|
||||
except Exception as e:
|
||||
print(f"[gateway] Warning: Failed to create SQLite session: {e}")
|
||||
|
||||
# Seed new DM thread sessions with parent DM session history.
|
||||
# When a bot reply creates a Slack thread and the user responds in it,
|
||||
# the thread gets a new session (keyed by thread_ts). Without seeding,
|
||||
# the thread session starts with zero context — the user's original
|
||||
# question and the bot's answer are invisible. Fix: copy the parent
|
||||
# DM session's transcript into the new thread session so context carries
|
||||
# over while still keeping threads isolated from each other.
|
||||
if (
|
||||
source.chat_type == "dm"
|
||||
and source.thread_id
|
||||
and entry.created_at == entry.updated_at # brand-new session
|
||||
and not was_auto_reset
|
||||
):
|
||||
parent_source = SessionSource(
|
||||
platform=source.platform,
|
||||
chat_id=source.chat_id,
|
||||
chat_type="dm",
|
||||
user_id=source.user_id,
|
||||
# no thread_id — this is the parent DM session
|
||||
)
|
||||
parent_key = self._generate_session_key(parent_source)
|
||||
with self._lock:
|
||||
parent_entry = self._entries.get(parent_key)
|
||||
if parent_entry and parent_entry.session_id != entry.session_id:
|
||||
try:
|
||||
parent_history = self.load_transcript(parent_entry.session_id)
|
||||
if parent_history:
|
||||
self.rewrite_transcript(entry.session_id, parent_history)
|
||||
logger.info(
|
||||
"[Session] Seeded DM thread session %s with %d messages from parent %s",
|
||||
entry.session_id, len(parent_history), parent_entry.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[Session] Failed to seed thread session: %s", e)
|
||||
|
||||
return entry
|
||||
|
||||
def update_session(
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
"""
|
||||
Session-scoped context variables for the Hermes gateway.
|
||||
|
||||
Replaces the previous ``os.environ``-based session state
|
||||
(``HERMES_SESSION_PLATFORM``, ``HERMES_SESSION_CHAT_ID``, etc.) with
|
||||
Python's ``contextvars.ContextVar``.
|
||||
|
||||
**Why this matters**
|
||||
|
||||
The gateway processes messages concurrently via ``asyncio``. When two
|
||||
messages arrive at the same time the old code did:
|
||||
|
||||
os.environ["HERMES_SESSION_THREAD_ID"] = str(context.source.thread_id)
|
||||
|
||||
Because ``os.environ`` is *process-global*, Message A's value was
|
||||
silently overwritten by Message B before Message A's agent finished
|
||||
running. Background-task notifications and tool calls therefore routed
|
||||
to the wrong thread.
|
||||
|
||||
``contextvars.ContextVar`` values are *task-local*: each ``asyncio``
|
||||
task (and any ``run_in_executor`` thread it spawns) gets its own copy,
|
||||
so concurrent messages never interfere.
|
||||
|
||||
**Backward compatibility**
|
||||
|
||||
The public helper ``get_session_env(name, default="")`` mirrors the old
|
||||
``os.getenv("HERMES_SESSION_*", ...)`` calls. Existing tool code only
|
||||
needs to replace the import + call site:
|
||||
|
||||
# before
|
||||
import os
|
||||
platform = os.getenv("HERMES_SESSION_PLATFORM", "")
|
||||
|
||||
# after
|
||||
from gateway.session_context import get_session_env
|
||||
platform = get_session_env("HERMES_SESSION_PLATFORM", "")
|
||||
"""
|
||||
|
||||
from contextvars import ContextVar
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-task session variables
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SESSION_PLATFORM: ContextVar[str] = ContextVar("HERMES_SESSION_PLATFORM", default="")
|
||||
_SESSION_CHAT_ID: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_ID", default="")
|
||||
_SESSION_CHAT_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_NAME", default="")
|
||||
_SESSION_THREAD_ID: ContextVar[str] = ContextVar("HERMES_SESSION_THREAD_ID", default="")
|
||||
|
||||
_VAR_MAP = {
|
||||
"HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
|
||||
"HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
|
||||
"HERMES_SESSION_CHAT_NAME": _SESSION_CHAT_NAME,
|
||||
"HERMES_SESSION_THREAD_ID": _SESSION_THREAD_ID,
|
||||
}
|
||||
|
||||
|
||||
def set_session_vars(
|
||||
platform: str = "",
|
||||
chat_id: str = "",
|
||||
chat_name: str = "",
|
||||
thread_id: str = "",
|
||||
) -> list:
|
||||
"""Set all session context variables and return reset tokens.
|
||||
|
||||
Call ``clear_session_vars(tokens)`` in a ``finally`` block to restore
|
||||
the previous values when the handler exits.
|
||||
|
||||
Returns a list of ``Token`` objects (one per variable) that can be
|
||||
passed to ``clear_session_vars``.
|
||||
"""
|
||||
tokens = [
|
||||
_SESSION_PLATFORM.set(platform),
|
||||
_SESSION_CHAT_ID.set(chat_id),
|
||||
_SESSION_CHAT_NAME.set(chat_name),
|
||||
_SESSION_THREAD_ID.set(thread_id),
|
||||
]
|
||||
return tokens
|
||||
|
||||
|
||||
def clear_session_vars(tokens: list) -> None:
|
||||
"""Restore session context variables to their pre-handler values."""
|
||||
if not tokens:
|
||||
return
|
||||
vars_in_order = [
|
||||
_SESSION_PLATFORM,
|
||||
_SESSION_CHAT_ID,
|
||||
_SESSION_CHAT_NAME,
|
||||
_SESSION_THREAD_ID,
|
||||
]
|
||||
for var, token in zip(vars_in_order, tokens):
|
||||
var.reset(token)
|
||||
|
||||
|
||||
def get_session_env(name: str, default: str = "") -> str:
|
||||
"""Read a session context variable by its legacy ``HERMES_SESSION_*`` name.
|
||||
|
||||
Drop-in replacement for ``os.getenv("HERMES_SESSION_*", default)``.
|
||||
|
||||
Resolution order:
|
||||
1. Context variable (set by the gateway for concurrency-safe access)
|
||||
2. ``os.environ`` (used by CLI, cron scheduler, and tests)
|
||||
3. *default*
|
||||
"""
|
||||
import os
|
||||
|
||||
var = _VAR_MAP.get(name)
|
||||
if var is not None:
|
||||
value = var.get()
|
||||
if value:
|
||||
return value
|
||||
# Fall back to os.environ for CLI, cron, and test compatibility
|
||||
return os.getenv(name, default)
|
||||
@@ -14,8 +14,6 @@ concurrently under distinct configurations).
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
@@ -25,7 +23,6 @@ from typing import Any, Optional
|
||||
_GATEWAY_KIND = "hermes-gateway"
|
||||
_RUNTIME_STATUS_FILE = "gateway_state.json"
|
||||
_LOCKS_DIRNAME = "gateway-locks"
|
||||
_IS_WINDOWS = sys.platform == "win32"
|
||||
|
||||
|
||||
def _get_pid_path() -> Path:
|
||||
@@ -52,33 +49,6 @@ def _utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def terminate_pid(pid: int, *, force: bool = False) -> None:
|
||||
"""Terminate a PID with platform-appropriate force semantics.
|
||||
|
||||
POSIX uses SIGTERM/SIGKILL. Windows uses taskkill /T /F for true force-kill
|
||||
because os.kill(..., SIGTERM) is not equivalent to a tree-killing hard stop.
|
||||
"""
|
||||
if force and _IS_WINDOWS:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["taskkill", "/PID", str(pid), "/T", "/F"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
return
|
||||
|
||||
if result.returncode != 0:
|
||||
details = (result.stderr or result.stdout or "").strip()
|
||||
raise OSError(details or f"taskkill failed for PID {pid}")
|
||||
return
|
||||
|
||||
sig = signal.SIGTERM if not force else getattr(signal, "SIGKILL", signal.SIGTERM)
|
||||
os.kill(pid, sig)
|
||||
|
||||
|
||||
def _scope_hash(identity: str) -> str:
|
||||
return hashlib.sha256(identity.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
@@ -158,8 +128,6 @@ def _build_runtime_status_record() -> dict[str, Any]:
|
||||
payload.update({
|
||||
"gateway_state": "starting",
|
||||
"exit_reason": None,
|
||||
"restart_requested": False,
|
||||
"active_agents": 0,
|
||||
"platforms": {},
|
||||
"updated_at": _utc_now_iso(),
|
||||
})
|
||||
@@ -220,8 +188,6 @@ def write_runtime_status(
|
||||
*,
|
||||
gateway_state: Optional[str] = None,
|
||||
exit_reason: Optional[str] = None,
|
||||
restart_requested: Optional[bool] = None,
|
||||
active_agents: Optional[int] = None,
|
||||
platform: Optional[str] = None,
|
||||
platform_state: Optional[str] = None,
|
||||
error_code: Optional[str] = None,
|
||||
@@ -240,10 +206,6 @@ def write_runtime_status(
|
||||
payload["gateway_state"] = gateway_state
|
||||
if exit_reason is not None:
|
||||
payload["exit_reason"] = exit_reason
|
||||
if restart_requested is not None:
|
||||
payload["restart_requested"] = bool(restart_requested)
|
||||
if active_agents is not None:
|
||||
payload["active_agents"] = max(0, int(active_agents))
|
||||
|
||||
if platform is not None:
|
||||
platform_payload = payload["platforms"].get(platform, {})
|
||||
|
||||
+11
-212
@@ -28,10 +28,6 @@ logger = logging.getLogger("gateway.stream_consumer")
|
||||
# Sentinel to signal the stream is complete
|
||||
_DONE = object()
|
||||
|
||||
# Sentinel to signal a tool boundary — finalize current message and start a
|
||||
# new one so that subsequent text appears below tool progress messages.
|
||||
_NEW_SEGMENT = object()
|
||||
|
||||
|
||||
@dataclass
|
||||
class StreamConsumerConfig:
|
||||
@@ -74,8 +70,6 @@ class GatewayStreamConsumer:
|
||||
self._edit_supported = True # Disabled on first edit failure (Signal/Email/HA)
|
||||
self._last_edit_time = 0.0
|
||||
self._last_sent_text = "" # Track last-sent text to skip redundant edits
|
||||
self._fallback_final_send = False
|
||||
self._fallback_prefix = ""
|
||||
|
||||
@property
|
||||
def already_sent(self) -> bool:
|
||||
@@ -84,16 +78,9 @@ class GatewayStreamConsumer:
|
||||
return self._already_sent
|
||||
|
||||
def on_delta(self, text: str) -> None:
|
||||
"""Thread-safe callback — called from the agent's worker thread.
|
||||
|
||||
When *text* is ``None``, signals a tool boundary: the current message
|
||||
is finalized and subsequent text will be sent as a new message so it
|
||||
appears below any tool-progress messages the gateway sent in between.
|
||||
"""
|
||||
"""Thread-safe callback — called from the agent's worker thread."""
|
||||
if text:
|
||||
self._queue.put(text)
|
||||
elif text is None:
|
||||
self._queue.put(_NEW_SEGMENT)
|
||||
|
||||
def finish(self) -> None:
|
||||
"""Signal that the stream is complete."""
|
||||
@@ -109,16 +96,12 @@ class GatewayStreamConsumer:
|
||||
while True:
|
||||
# Drain all available items from the queue
|
||||
got_done = False
|
||||
got_segment_break = False
|
||||
while True:
|
||||
try:
|
||||
item = self._queue.get_nowait()
|
||||
if item is _DONE:
|
||||
got_done = True
|
||||
break
|
||||
if item is _NEW_SEGMENT:
|
||||
got_segment_break = True
|
||||
break
|
||||
self._accumulated += item
|
||||
except queue.Empty:
|
||||
break
|
||||
@@ -128,103 +111,40 @@ class GatewayStreamConsumer:
|
||||
elapsed = now - self._last_edit_time
|
||||
should_edit = (
|
||||
got_done
|
||||
or got_segment_break
|
||||
or (elapsed >= self.cfg.edit_interval
|
||||
and self._accumulated)
|
||||
and len(self._accumulated) > 0)
|
||||
or len(self._accumulated) >= self.cfg.buffer_threshold
|
||||
)
|
||||
|
||||
if should_edit and self._accumulated:
|
||||
# Split overflow: if accumulated text exceeds the platform
|
||||
# limit, split into properly sized chunks.
|
||||
if (
|
||||
len(self._accumulated) > _safe_limit
|
||||
and self._message_id is None
|
||||
):
|
||||
# No existing message to edit (first message or after a
|
||||
# segment break). Use truncate_message — the same
|
||||
# helper the non-streaming path uses — to split with
|
||||
# proper word/code-fence boundaries and chunk
|
||||
# indicators like "(1/2)".
|
||||
chunks = self.adapter.truncate_message(
|
||||
self._accumulated, _safe_limit
|
||||
)
|
||||
for chunk in chunks:
|
||||
await self._send_new_chunk(chunk, self._message_id)
|
||||
self._accumulated = ""
|
||||
self._last_sent_text = ""
|
||||
self._last_edit_time = time.monotonic()
|
||||
if got_done:
|
||||
return
|
||||
if got_segment_break:
|
||||
self._message_id = None
|
||||
self._fallback_final_send = False
|
||||
self._fallback_prefix = ""
|
||||
continue
|
||||
|
||||
# Existing message: edit it with the first chunk, then
|
||||
# start a new message for the overflow remainder.
|
||||
# limit, finalize the current message and start a new one.
|
||||
while (
|
||||
len(self._accumulated) > _safe_limit
|
||||
and self._message_id is not None
|
||||
and self._edit_supported
|
||||
):
|
||||
split_at = self._accumulated.rfind("\n", 0, _safe_limit)
|
||||
if split_at < _safe_limit // 2:
|
||||
split_at = _safe_limit
|
||||
chunk = self._accumulated[:split_at]
|
||||
await self._send_or_edit(chunk)
|
||||
if self._fallback_final_send:
|
||||
# Edit failed while attempting to split an oversized
|
||||
# message. Keep the full accumulated text intact so
|
||||
# the fallback final-send path can deliver the
|
||||
# remaining continuation without dropping content.
|
||||
break
|
||||
self._accumulated = self._accumulated[split_at:].lstrip("\n")
|
||||
self._message_id = None
|
||||
self._last_sent_text = ""
|
||||
|
||||
display_text = self._accumulated
|
||||
if not got_done and not got_segment_break:
|
||||
if not got_done:
|
||||
display_text += self.cfg.cursor
|
||||
|
||||
await self._send_or_edit(display_text)
|
||||
self._last_edit_time = time.monotonic()
|
||||
|
||||
if got_done:
|
||||
# Final edit without cursor. If progressive editing failed
|
||||
# mid-stream, send a single continuation/fallback message
|
||||
# here instead of letting the base gateway path send the
|
||||
# full response again.
|
||||
if self._accumulated:
|
||||
if self._fallback_final_send:
|
||||
await self._send_fallback_final(self._accumulated)
|
||||
elif self._message_id:
|
||||
await self._send_or_edit(self._accumulated)
|
||||
elif not self._already_sent:
|
||||
await self._send_or_edit(self._accumulated)
|
||||
# Final edit without cursor
|
||||
if self._accumulated and self._message_id:
|
||||
await self._send_or_edit(self._accumulated)
|
||||
return
|
||||
|
||||
# Tool boundary: reset message state so the next text chunk
|
||||
# creates a fresh message below any tool-progress messages.
|
||||
#
|
||||
# Exception: when _message_id is "__no_edit__" the platform
|
||||
# never returned a real message ID (e.g. Signal, webhook with
|
||||
# github_comment delivery). Resetting to None would re-enter
|
||||
# the "first send" path on every tool boundary and post one
|
||||
# platform message per tool call — that is what caused 155
|
||||
# comments under a single PR. Instead, keep all state so the
|
||||
# full continuation is delivered once via _send_fallback_final.
|
||||
# (When editing fails mid-stream due to flood control the id is
|
||||
# a real string like "msg_1", not "__no_edit__", so that case
|
||||
# still resets and creates a fresh segment as intended.)
|
||||
if got_segment_break and self._message_id != "__no_edit__":
|
||||
self._message_id = None
|
||||
self._accumulated = ""
|
||||
self._last_sent_text = ""
|
||||
self._fallback_final_send = False
|
||||
self._fallback_prefix = ""
|
||||
|
||||
await asyncio.sleep(0.05) # Small yield to not busy-loop
|
||||
|
||||
except asyncio.CancelledError:
|
||||
@@ -262,114 +182,6 @@ class GatewayStreamConsumer:
|
||||
# Strip trailing whitespace/newlines but preserve leading content
|
||||
return cleaned.rstrip()
|
||||
|
||||
async def _send_new_chunk(self, text: str, reply_to_id: Optional[str]) -> Optional[str]:
|
||||
"""Send a new message chunk, optionally threaded to a previous message.
|
||||
|
||||
Returns the message_id so callers can thread subsequent chunks.
|
||||
"""
|
||||
text = self._clean_for_display(text)
|
||||
if not text.strip():
|
||||
return reply_to_id
|
||||
try:
|
||||
meta = dict(self.metadata) if self.metadata else {}
|
||||
result = await self.adapter.send(
|
||||
chat_id=self.chat_id,
|
||||
content=text,
|
||||
reply_to=reply_to_id,
|
||||
metadata=meta,
|
||||
)
|
||||
if result.success and result.message_id:
|
||||
self._message_id = str(result.message_id)
|
||||
self._already_sent = True
|
||||
self._last_sent_text = text
|
||||
return str(result.message_id)
|
||||
else:
|
||||
self._edit_supported = False
|
||||
return reply_to_id
|
||||
except Exception as e:
|
||||
logger.error("Stream send chunk error: %s", e)
|
||||
return reply_to_id
|
||||
|
||||
def _visible_prefix(self) -> str:
|
||||
"""Return the visible text already shown in the streamed message."""
|
||||
prefix = self._last_sent_text or ""
|
||||
if self.cfg.cursor and prefix.endswith(self.cfg.cursor):
|
||||
prefix = prefix[:-len(self.cfg.cursor)]
|
||||
return self._clean_for_display(prefix)
|
||||
|
||||
def _continuation_text(self, final_text: str) -> str:
|
||||
"""Return only the part of final_text the user has not already seen."""
|
||||
prefix = self._fallback_prefix or self._visible_prefix()
|
||||
if prefix and final_text.startswith(prefix):
|
||||
return final_text[len(prefix):].lstrip()
|
||||
return final_text
|
||||
|
||||
@staticmethod
|
||||
def _split_text_chunks(text: str, limit: int) -> list[str]:
|
||||
"""Split text into reasonably sized chunks for fallback sends."""
|
||||
if len(text) <= limit:
|
||||
return [text]
|
||||
chunks: list[str] = []
|
||||
remaining = text
|
||||
while len(remaining) > limit:
|
||||
split_at = remaining.rfind("\n", 0, limit)
|
||||
if split_at < limit // 2:
|
||||
split_at = limit
|
||||
chunks.append(remaining[:split_at])
|
||||
remaining = remaining[split_at:].lstrip("\n")
|
||||
if remaining:
|
||||
chunks.append(remaining)
|
||||
return chunks
|
||||
|
||||
async def _send_fallback_final(self, text: str) -> None:
|
||||
"""Send the final continuation after streaming edits stop working."""
|
||||
final_text = self._clean_for_display(text)
|
||||
continuation = self._continuation_text(final_text)
|
||||
self._fallback_final_send = False
|
||||
if not continuation.strip():
|
||||
# Nothing new to send — the visible partial already matches final text.
|
||||
self._already_sent = True
|
||||
return
|
||||
|
||||
raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
|
||||
safe_limit = max(500, raw_limit - 100)
|
||||
chunks = self._split_text_chunks(continuation, safe_limit)
|
||||
|
||||
last_message_id: Optional[str] = None
|
||||
last_successful_chunk = ""
|
||||
sent_any_chunk = False
|
||||
for chunk in chunks:
|
||||
result = await self.adapter.send(
|
||||
chat_id=self.chat_id,
|
||||
content=chunk,
|
||||
metadata=self.metadata,
|
||||
)
|
||||
if not result.success:
|
||||
if sent_any_chunk:
|
||||
# Some continuation text already reached the user. Suppress
|
||||
# the base gateway final-send path so we don't resend the
|
||||
# full response and create another duplicate.
|
||||
self._already_sent = True
|
||||
self._message_id = last_message_id
|
||||
self._last_sent_text = last_successful_chunk
|
||||
self._fallback_prefix = ""
|
||||
return
|
||||
# No fallback chunk reached the user — allow the normal gateway
|
||||
# final-send path to try one more time.
|
||||
self._already_sent = False
|
||||
self._message_id = None
|
||||
self._last_sent_text = ""
|
||||
self._fallback_prefix = ""
|
||||
return
|
||||
sent_any_chunk = True
|
||||
last_successful_chunk = chunk
|
||||
last_message_id = result.message_id or last_message_id
|
||||
|
||||
self._message_id = last_message_id
|
||||
self._already_sent = True
|
||||
self._last_sent_text = chunks[-1]
|
||||
self._fallback_prefix = ""
|
||||
|
||||
async def _send_or_edit(self, text: str) -> None:
|
||||
"""Send or edit the streaming message."""
|
||||
# Strip MEDIA: directives so they don't appear as visible text.
|
||||
@@ -395,16 +207,14 @@ class GatewayStreamConsumer:
|
||||
self._last_sent_text = text
|
||||
else:
|
||||
# If an edit fails mid-stream (especially Telegram flood control),
|
||||
# stop progressive edits and send only the missing tail once the
|
||||
# final response is available.
|
||||
# stop progressive edits and let the normal final send path deliver
|
||||
# the complete answer instead of leaving the user with a partial.
|
||||
logger.debug("Edit failed, disabling streaming for this adapter")
|
||||
self._fallback_prefix = self._visible_prefix()
|
||||
self._fallback_final_send = True
|
||||
self._edit_supported = False
|
||||
self._already_sent = True
|
||||
self._already_sent = False
|
||||
else:
|
||||
# Editing not supported — skip intermediate updates.
|
||||
# The final response will be sent by the fallback path.
|
||||
# The final response will be sent by the normal path.
|
||||
pass
|
||||
else:
|
||||
# First message — send new
|
||||
@@ -417,17 +227,6 @@ class GatewayStreamConsumer:
|
||||
self._message_id = result.message_id
|
||||
self._already_sent = True
|
||||
self._last_sent_text = text
|
||||
elif result.success:
|
||||
# Platform accepted the message but returned no message_id
|
||||
# (e.g. Signal). Can't edit without an ID — switch to
|
||||
# fallback mode: suppress intermediate deltas, send only
|
||||
# the missing tail once the final response is ready.
|
||||
self._already_sent = True
|
||||
self._edit_supported = False
|
||||
self._fallback_prefix = self._clean_for_display(text)
|
||||
self._fallback_final_send = True
|
||||
# Sentinel prevents re-entering this branch on every delta
|
||||
self._message_id = "__no_edit__"
|
||||
else:
|
||||
# Initial send failed — disable streaming for this session
|
||||
self._edit_supported = False
|
||||
|
||||
@@ -11,5 +11,5 @@ Provides subcommands for:
|
||||
- hermes cron - Manage cron jobs
|
||||
"""
|
||||
|
||||
__version__ = "0.8.0"
|
||||
__release_date__ = "2026.4.8"
|
||||
__version__ = "0.7.0"
|
||||
__release_date__ = "2026.4.3"
|
||||
|
||||
+59
-405
@@ -37,7 +37,7 @@ from typing import Any, Dict, List, Optional
|
||||
import httpx
|
||||
import yaml
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
|
||||
from hermes_cli.config import get_hermes_home, get_config_path
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -67,15 +67,12 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes
|
||||
ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry
|
||||
DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1 # poll at most every 1s
|
||||
DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
|
||||
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
|
||||
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
|
||||
DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
|
||||
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
|
||||
QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
|
||||
QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -115,12 +112,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
auth_type="oauth_external",
|
||||
inference_base_url=DEFAULT_CODEX_BASE_URL,
|
||||
),
|
||||
"qwen-oauth": ProviderConfig(
|
||||
id="qwen-oauth",
|
||||
name="Qwen OAuth",
|
||||
auth_type="oauth_external",
|
||||
inference_base_url=DEFAULT_QWEN_BASE_URL,
|
||||
),
|
||||
"copilot": ProviderConfig(
|
||||
id="copilot",
|
||||
name="GitHub Copilot",
|
||||
@@ -198,14 +189,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("DEEPSEEK_API_KEY",),
|
||||
base_url_env_var="DEEPSEEK_BASE_URL",
|
||||
),
|
||||
"xai": ProviderConfig(
|
||||
id="xai",
|
||||
name="xAI",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://api.x.ai/v1",
|
||||
api_key_env_vars=("XAI_API_KEY",),
|
||||
base_url_env_var="XAI_BASE_URL",
|
||||
),
|
||||
"ai-gateway": ProviderConfig(
|
||||
id="ai-gateway",
|
||||
name="AI Gateway",
|
||||
@@ -257,7 +240,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
# Kimi Code Endpoint Detection
|
||||
# =============================================================================
|
||||
|
||||
# Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
|
||||
# Kimi Code (platform.kimi.ai) issues keys prefixed "sk-kimi-" that only work
|
||||
# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on
|
||||
# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set
|
||||
# KIMI_BASE_URL explicitly.
|
||||
@@ -421,47 +404,6 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> str:
|
||||
"""Return the correct Z.AI base URL by probing endpoints.
|
||||
|
||||
If the user has explicitly set GLM_BASE_URL, that always wins.
|
||||
Otherwise, probe the candidate endpoints to find one that accepts the
|
||||
key. The detected endpoint is cached in provider state (auth.json) keyed
|
||||
on a hash of the API key so subsequent starts skip the probe.
|
||||
"""
|
||||
if env_override:
|
||||
return env_override
|
||||
|
||||
# Check provider-state cache for a previously-detected endpoint.
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "zai") or {}
|
||||
cached = state.get("detected_endpoint")
|
||||
if isinstance(cached, dict) and cached.get("base_url"):
|
||||
key_hash = cached.get("key_hash", "")
|
||||
if key_hash == hashlib.sha256(api_key.encode()).hexdigest()[:16]:
|
||||
logger.debug("Z.AI: using cached endpoint %s", cached["base_url"])
|
||||
return cached["base_url"]
|
||||
|
||||
# Probe — may take up to ~8s per endpoint.
|
||||
detected = detect_zai_endpoint(api_key)
|
||||
if detected and detected.get("base_url"):
|
||||
# Persist the detection result keyed on the API key hash.
|
||||
key_hash = hashlib.sha256(api_key.encode()).hexdigest()[:16]
|
||||
state["detected_endpoint"] = {
|
||||
"base_url": detected["base_url"],
|
||||
"endpoint_id": detected.get("id", ""),
|
||||
"model": detected.get("model", ""),
|
||||
"label": detected.get("label", ""),
|
||||
"key_hash": key_hash,
|
||||
}
|
||||
_save_provider_state(auth_store, "zai", state)
|
||||
logger.info("Z.AI: auto-detected endpoint %s (%s)", detected["label"], detected["base_url"])
|
||||
return detected["base_url"]
|
||||
|
||||
logger.debug("Z.AI: probe failed, falling back to default %s", default_url)
|
||||
return default_url
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Error Types
|
||||
# =============================================================================
|
||||
@@ -712,27 +654,6 @@ def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Pa
|
||||
return _save_auth_store(auth_store)
|
||||
|
||||
|
||||
def suppress_credential_source(provider_id: str, source: str) -> None:
|
||||
"""Mark a credential source as suppressed so it won't be re-seeded."""
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
suppressed = auth_store.setdefault("suppressed_sources", {})
|
||||
provider_list = suppressed.setdefault(provider_id, [])
|
||||
if source not in provider_list:
|
||||
provider_list.append(source)
|
||||
_save_auth_store(auth_store)
|
||||
|
||||
|
||||
def is_source_suppressed(provider_id: str, source: str) -> bool:
|
||||
"""Check if a credential source has been suppressed by the user."""
|
||||
try:
|
||||
auth_store = _load_auth_store()
|
||||
suppressed = auth_store.get("suppressed_sources", {})
|
||||
return source in suppressed.get(provider_id, [])
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return persisted auth state for a provider, or None."""
|
||||
auth_store = _load_auth_store()
|
||||
@@ -745,57 +666,6 @@ def get_active_provider() -> Optional[str]:
|
||||
return auth_store.get("active_provider")
|
||||
|
||||
|
||||
def is_provider_explicitly_configured(provider_id: str) -> bool:
|
||||
"""Return True only if the user has explicitly configured this provider.
|
||||
|
||||
Checks:
|
||||
1. active_provider in auth.json matches
|
||||
2. model.provider in config.yaml matches
|
||||
3. Provider-specific env vars are set (e.g. ANTHROPIC_API_KEY)
|
||||
|
||||
This is used to gate auto-discovery of external credentials (e.g.
|
||||
Claude Code's ~/.claude/.credentials.json) so they are never used
|
||||
without the user's explicit choice. See PR #4210 for the same
|
||||
pattern applied to the setup wizard gate.
|
||||
"""
|
||||
normalized = (provider_id or "").strip().lower()
|
||||
|
||||
# 1. Check auth.json active_provider
|
||||
try:
|
||||
auth_store = _load_auth_store()
|
||||
active = (auth_store.get("active_provider") or "").strip().lower()
|
||||
if active and active == normalized:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Check config.yaml model.provider
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = (model_cfg.get("provider") or "").strip().lower()
|
||||
if cfg_provider == normalized:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3. Check provider-specific env vars
|
||||
# Exclude CLAUDE_CODE_OAUTH_TOKEN — it's set by Claude Code itself,
|
||||
# not by the user explicitly configuring anthropic in Hermes.
|
||||
_IMPLICIT_ENV_VARS = {"CLAUDE_CODE_OAUTH_TOKEN"}
|
||||
pconfig = PROVIDER_REGISTRY.get(normalized)
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if env_var in _IMPLICIT_ENV_VARS:
|
||||
continue
|
||||
if has_usable_secret(os.getenv(env_var, "")):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def clear_provider_auth(provider_id: Optional[str] = None) -> bool:
|
||||
"""
|
||||
Clear auth state for a provider. Used by `hermes logout`.
|
||||
@@ -898,7 +768,7 @@ def resolve_provider(
|
||||
_PROVIDER_ALIASES = {
|
||||
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
|
||||
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
|
||||
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
|
||||
"kimi": "kimi-coding", "moonshot": "kimi-coding",
|
||||
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic", "claude-code": "anthropic",
|
||||
"github": "copilot", "github-copilot": "copilot",
|
||||
@@ -906,7 +776,6 @@ def resolve_provider(
|
||||
"github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
|
||||
"aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
|
||||
"opencode": "opencode-zen", "zen": "opencode-zen",
|
||||
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
|
||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
@@ -1036,176 +905,6 @@ def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> boo
|
||||
return float(exp) <= (time.time() + max(0, int(skew_seconds)))
|
||||
|
||||
|
||||
def _qwen_cli_auth_path() -> Path:
|
||||
return Path.home() / ".qwen" / "oauth_creds.json"
|
||||
|
||||
|
||||
def _read_qwen_cli_tokens() -> Dict[str, Any]:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
if not auth_path.exists():
|
||||
raise AuthError(
|
||||
"Qwen CLI credentials not found. Run 'qwen auth qwen-oauth' first.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_auth_missing",
|
||||
)
|
||||
try:
|
||||
data = json.loads(auth_path.read_text(encoding="utf-8"))
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Failed to read Qwen CLI credentials from {auth_path}: {exc}",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_auth_read_failed",
|
||||
) from exc
|
||||
if not isinstance(data, dict):
|
||||
raise AuthError(
|
||||
f"Invalid Qwen CLI credentials in {auth_path}.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_auth_invalid",
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def _save_qwen_cli_tokens(tokens: Dict[str, Any]) -> Path:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = auth_path.with_suffix(".tmp")
|
||||
tmp_path.write_text(json.dumps(tokens, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
tmp_path.replace(auth_path)
|
||||
return auth_path
|
||||
|
||||
|
||||
def _qwen_access_token_is_expiring(expiry_date_ms: Any, skew_seconds: int = QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS) -> bool:
|
||||
try:
|
||||
expiry_ms = int(expiry_date_ms)
|
||||
except Exception:
|
||||
return True
|
||||
return (time.time() + max(0, int(skew_seconds))) * 1000 >= expiry_ms
|
||||
|
||||
|
||||
def _refresh_qwen_cli_tokens(tokens: Dict[str, Any], timeout_seconds: float = 20.0) -> Dict[str, Any]:
|
||||
refresh_token = str(tokens.get("refresh_token", "") or "").strip()
|
||||
if not refresh_token:
|
||||
raise AuthError(
|
||||
"Qwen OAuth refresh token missing. Re-run 'qwen auth qwen-oauth'.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_token_missing",
|
||||
)
|
||||
|
||||
try:
|
||||
response = httpx.post(
|
||||
QWEN_OAUTH_TOKEN_URL,
|
||||
headers={
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
data={
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"client_id": QWEN_OAUTH_CLIENT_ID,
|
||||
},
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Qwen OAuth refresh failed: {exc}",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_failed",
|
||||
) from exc
|
||||
|
||||
if response.status_code >= 400:
|
||||
body = response.text.strip()
|
||||
raise AuthError(
|
||||
"Qwen OAuth refresh failed. Re-run 'qwen auth qwen-oauth'."
|
||||
+ (f" Response: {body}" if body else ""),
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_failed",
|
||||
)
|
||||
|
||||
try:
|
||||
payload = response.json()
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Qwen OAuth refresh returned invalid JSON: {exc}",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_invalid_json",
|
||||
) from exc
|
||||
|
||||
if not isinstance(payload, dict) or not str(payload.get("access_token", "") or "").strip():
|
||||
raise AuthError(
|
||||
"Qwen OAuth refresh response missing access_token.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_refresh_invalid_response",
|
||||
)
|
||||
|
||||
expires_in = payload.get("expires_in")
|
||||
try:
|
||||
expires_in_seconds = int(expires_in)
|
||||
except Exception:
|
||||
expires_in_seconds = 6 * 60 * 60
|
||||
|
||||
refreshed = {
|
||||
"access_token": str(payload.get("access_token", "") or "").strip(),
|
||||
"refresh_token": str(payload.get("refresh_token", refresh_token) or refresh_token).strip(),
|
||||
"token_type": str(payload.get("token_type", tokens.get("token_type", "Bearer")) or "Bearer").strip() or "Bearer",
|
||||
"resource_url": str(payload.get("resource_url", tokens.get("resource_url", "portal.qwen.ai")) or "portal.qwen.ai").strip(),
|
||||
"expiry_date": int(time.time() * 1000) + max(1, expires_in_seconds) * 1000,
|
||||
}
|
||||
_save_qwen_cli_tokens(refreshed)
|
||||
return refreshed
|
||||
|
||||
|
||||
def resolve_qwen_runtime_credentials(
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
refresh_if_expiring: bool = True,
|
||||
refresh_skew_seconds: int = QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
) -> Dict[str, Any]:
|
||||
tokens = _read_qwen_cli_tokens()
|
||||
access_token = str(tokens.get("access_token", "") or "").strip()
|
||||
should_refresh = bool(force_refresh)
|
||||
if not should_refresh and refresh_if_expiring:
|
||||
should_refresh = _qwen_access_token_is_expiring(tokens.get("expiry_date"), refresh_skew_seconds)
|
||||
if should_refresh:
|
||||
tokens = _refresh_qwen_cli_tokens(tokens)
|
||||
access_token = str(tokens.get("access_token", "") or "").strip()
|
||||
if not access_token:
|
||||
raise AuthError(
|
||||
"Qwen OAuth access token missing. Re-run 'qwen auth qwen-oauth'.",
|
||||
provider="qwen-oauth",
|
||||
code="qwen_access_token_missing",
|
||||
)
|
||||
|
||||
base_url = os.getenv("HERMES_QWEN_BASE_URL", "").strip().rstrip("/") or DEFAULT_QWEN_BASE_URL
|
||||
return {
|
||||
"provider": "qwen-oauth",
|
||||
"base_url": base_url,
|
||||
"api_key": access_token,
|
||||
"source": "qwen-cli",
|
||||
"expires_at_ms": tokens.get("expiry_date"),
|
||||
"auth_file": str(_qwen_cli_auth_path()),
|
||||
}
|
||||
|
||||
|
||||
def get_qwen_auth_status() -> Dict[str, Any]:
|
||||
auth_path = _qwen_cli_auth_path()
|
||||
try:
|
||||
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
|
||||
return {
|
||||
"logged_in": True,
|
||||
"auth_file": str(auth_path),
|
||||
"source": creds.get("source"),
|
||||
"api_key": creds.get("api_key"),
|
||||
"expires_at_ms": creds.get("expires_at_ms"),
|
||||
}
|
||||
except AuthError as exc:
|
||||
return {
|
||||
"logged_in": False,
|
||||
"auth_file": str(auth_path),
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SSH / remote session detection
|
||||
# =============================================================================
|
||||
@@ -1521,15 +1220,7 @@ def _resolve_verify(
|
||||
if effective_insecure:
|
||||
return False
|
||||
if effective_ca:
|
||||
ca_path = str(effective_ca)
|
||||
if not os.path.isfile(ca_path):
|
||||
import logging
|
||||
logging.getLogger("hermes.auth").warning(
|
||||
"CA bundle path does not exist: %s — falling back to default certificates",
|
||||
ca_path,
|
||||
)
|
||||
return True
|
||||
return ca_path
|
||||
return str(effective_ca)
|
||||
return True
|
||||
|
||||
|
||||
@@ -2340,8 +2031,6 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
return get_nous_auth_status()
|
||||
if target == "openai-codex":
|
||||
return get_codex_auth_status()
|
||||
if target == "qwen-oauth":
|
||||
return get_qwen_auth_status()
|
||||
if target == "copilot-acp":
|
||||
return get_external_process_provider_status(target)
|
||||
# API-key providers
|
||||
@@ -2374,8 +2063,6 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
|
||||
|
||||
if provider_id == "kimi-coding":
|
||||
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
|
||||
elif provider_id == "zai":
|
||||
base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
|
||||
elif env_url:
|
||||
base_url = env_url.rstrip("/")
|
||||
else:
|
||||
@@ -2429,6 +2116,33 @@ def resolve_external_process_provider_credentials(provider_id: str) -> Dict[str,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# External credential detection
|
||||
# =============================================================================
|
||||
|
||||
def detect_external_credentials() -> List[Dict[str, Any]]:
|
||||
"""Scan for credentials from other CLI tools that Hermes can reuse.
|
||||
|
||||
Returns a list of dicts, each with:
|
||||
- provider: str -- Hermes provider id (e.g. "openai-codex")
|
||||
- path: str -- filesystem path where creds were found
|
||||
- label: str -- human-friendly description for the setup UI
|
||||
"""
|
||||
found: List[Dict[str, Any]] = []
|
||||
|
||||
# Codex CLI: ~/.codex/auth.json (importable, not shared)
|
||||
cli_tokens = _import_codex_cli_tokens()
|
||||
if cli_tokens:
|
||||
codex_path = Path.home() / ".codex" / "auth.json"
|
||||
found.append({
|
||||
"provider": "openai-codex",
|
||||
"path": str(codex_path),
|
||||
"label": f"Codex CLI credentials found ({codex_path}) — run `hermes auth` to create a separate session",
|
||||
})
|
||||
|
||||
return found
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLI Commands — login / logout
|
||||
# =============================================================================
|
||||
@@ -2457,7 +2171,14 @@ def _update_config_for_provider(
|
||||
config_path = get_config_path()
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
config = read_raw_config()
|
||||
config: Dict[str, Any] = {}
|
||||
if config_path.exists():
|
||||
try:
|
||||
loaded = yaml.safe_load(config_path.read_text()) or {}
|
||||
if isinstance(loaded, dict):
|
||||
config = loaded
|
||||
except Exception:
|
||||
config = {}
|
||||
|
||||
current_model = config.get("model")
|
||||
if isinstance(current_model, dict):
|
||||
@@ -2494,8 +2215,12 @@ def _reset_config_provider() -> Path:
|
||||
if not config_path.exists():
|
||||
return config_path
|
||||
|
||||
config = read_raw_config()
|
||||
if not config:
|
||||
try:
|
||||
config = yaml.safe_load(config_path.read_text()) or {}
|
||||
except Exception:
|
||||
return config_path
|
||||
|
||||
if not isinstance(config, dict):
|
||||
return config_path
|
||||
|
||||
model = config.get("model")
|
||||
@@ -2511,21 +2236,14 @@ def _prompt_model_selection(
|
||||
model_ids: List[str],
|
||||
current_model: str = "",
|
||||
pricing: Optional[Dict[str, Dict[str, str]]] = None,
|
||||
unavailable_models: Optional[List[str]] = None,
|
||||
portal_url: str = "",
|
||||
) -> Optional[str]:
|
||||
"""Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.
|
||||
|
||||
If *pricing* is provided (``{model_id: {prompt, completion}}``), a compact
|
||||
price indicator is shown next to each model in aligned columns.
|
||||
|
||||
If *unavailable_models* is provided, those models are shown grayed out
|
||||
and unselectable, with an upgrade link to *portal_url*.
|
||||
"""
|
||||
from hermes_cli.models import _format_price_per_mtok
|
||||
|
||||
_unavailable = unavailable_models or []
|
||||
|
||||
# Reorder: current model first, then the rest (deduplicated)
|
||||
ordered = []
|
||||
if current_model and current_model in model_ids:
|
||||
@@ -2534,12 +2252,9 @@ def _prompt_model_selection(
|
||||
if mid not in ordered:
|
||||
ordered.append(mid)
|
||||
|
||||
# All models for column-width computation (selectable + unavailable)
|
||||
all_models = list(ordered) + list(_unavailable)
|
||||
|
||||
# Column-aligned labels when pricing is available
|
||||
has_pricing = bool(pricing and any(pricing.get(m) for m in all_models))
|
||||
name_col = max((len(m) for m in all_models), default=0) + 2 if has_pricing else 0
|
||||
has_pricing = bool(pricing and any(pricing.get(m) for m in ordered))
|
||||
name_col = max((len(m) for m in ordered), default=0) + 2 if has_pricing else 0
|
||||
|
||||
# Pre-compute formatted prices and dynamic column widths
|
||||
_price_cache: dict[str, tuple[str, str, str]] = {}
|
||||
@@ -2547,7 +2262,7 @@ def _prompt_model_selection(
|
||||
cache_col = 0 # only set if any model has cache pricing
|
||||
has_cache = False
|
||||
if has_pricing:
|
||||
for mid in all_models:
|
||||
for mid in ordered:
|
||||
p = pricing.get(mid) # type: ignore[union-attr]
|
||||
if p:
|
||||
inp = _format_price_per_mtok(p.get("prompt", ""))
|
||||
@@ -2592,35 +2307,12 @@ def _prompt_model_selection(
|
||||
header += f" {'Cache':>{cache_col}}"
|
||||
menu_title += header + " /Mtok"
|
||||
|
||||
# ANSI escape for dim text
|
||||
_DIM = "\033[2m"
|
||||
_RESET = "\033[0m"
|
||||
|
||||
# Try arrow-key menu first, fall back to number input
|
||||
try:
|
||||
from simple_term_menu import TerminalMenu
|
||||
|
||||
choices = [f" {_label(mid)}" for mid in ordered]
|
||||
choices.append(" Enter custom model name")
|
||||
choices.append(" Skip (keep current)")
|
||||
|
||||
# Print the unavailable block BEFORE the menu via regular print().
|
||||
# simple_term_menu pads title lines to terminal width (causes wrapping),
|
||||
# so we keep the title minimal and use stdout for the static block.
|
||||
# clear_screen=False means our printed output stays visible above.
|
||||
_upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
|
||||
if _unavailable:
|
||||
print(menu_title)
|
||||
print()
|
||||
for mid in _unavailable:
|
||||
print(f"{_DIM} {_label(mid)}{_RESET}")
|
||||
print()
|
||||
print(f"{_DIM} ── Upgrade at {_upgrade_url} for paid models ──{_RESET}")
|
||||
print()
|
||||
effective_title = "Available free models:"
|
||||
else:
|
||||
effective_title = menu_title
|
||||
|
||||
menu = TerminalMenu(
|
||||
choices,
|
||||
cursor_index=default_idx,
|
||||
@@ -2629,11 +2321,9 @@ def _prompt_model_selection(
|
||||
menu_highlight_style=("fg_green",),
|
||||
cycle_cursor=True,
|
||||
clear_screen=False,
|
||||
title=effective_title,
|
||||
title=menu_title,
|
||||
)
|
||||
idx = menu.show()
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
if idx is None:
|
||||
return None
|
||||
print()
|
||||
@@ -2643,7 +2333,7 @@ def _prompt_model_selection(
|
||||
custom = input("Enter model name: ").strip()
|
||||
return custom if custom else None
|
||||
return None
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
except (ImportError, NotImplementedError):
|
||||
pass
|
||||
|
||||
# Fallback: numbered list
|
||||
@@ -2654,13 +2344,6 @@ def _prompt_model_selection(
|
||||
n = len(ordered)
|
||||
print(f" {n + 1:>{num_width}}. Enter custom model name")
|
||||
print(f" {n + 2:>{num_width}}. Skip (keep current)")
|
||||
|
||||
if _unavailable:
|
||||
_upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
|
||||
print()
|
||||
print(f" {_DIM}── Unavailable models (requires paid tier — upgrade at {_upgrade_url}) ──{_RESET}")
|
||||
for mid in _unavailable:
|
||||
print(f" {'':>{num_width}} {_DIM}{_label(mid)}{_RESET}")
|
||||
print()
|
||||
|
||||
while True:
|
||||
@@ -3073,21 +2756,19 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
)
|
||||
|
||||
inference_base_url = auth_state["inference_base_url"]
|
||||
verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)
|
||||
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
_save_provider_state(auth_store, "nous", auth_state)
|
||||
saved_to = _save_auth_store(auth_store)
|
||||
|
||||
config_path = _update_config_for_provider("nous", inference_base_url)
|
||||
print()
|
||||
print("Login successful!")
|
||||
print(f" Auth state: {saved_to}")
|
||||
print(f" Config updated: {config_path} (model.provider=nous)")
|
||||
|
||||
# Resolve model BEFORE writing provider to config.yaml so we never
|
||||
# leave the config in a half-updated state (provider=nous but model
|
||||
# still set to the previous provider's model, e.g. opus from
|
||||
# OpenRouter). The auth.json active_provider was already set above.
|
||||
selected_model = None
|
||||
try:
|
||||
runtime_key = auth_state.get("agent_key") or auth_state.get("access_token")
|
||||
if not isinstance(runtime_key, str) or not runtime_key:
|
||||
@@ -3097,34 +2778,16 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
code="invalid_token",
|
||||
)
|
||||
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
)
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
|
||||
print()
|
||||
unavailable_models: list = []
|
||||
if model_ids:
|
||||
pricing = get_pricing_for_provider("nous")
|
||||
model_ids = filter_nous_free_models(model_ids, pricing)
|
||||
free_tier = check_nous_free_tier()
|
||||
if free_tier:
|
||||
model_ids, unavailable_models = partition_nous_models_by_tier(
|
||||
model_ids, pricing, free_tier=True,
|
||||
)
|
||||
_portal = auth_state.get("portal_base_url", "")
|
||||
if model_ids:
|
||||
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
|
||||
selected_model = _prompt_model_selection(
|
||||
model_ids, pricing=pricing,
|
||||
unavailable_models=unavailable_models,
|
||||
portal_url=_portal,
|
||||
)
|
||||
elif unavailable_models:
|
||||
_url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
|
||||
print("No free models currently available.")
|
||||
print(f"Upgrade at {_url} to access paid models.")
|
||||
selected_model = _prompt_model_selection(model_ids)
|
||||
if selected_model:
|
||||
_save_model_choice(selected_model)
|
||||
print(f"Default model set to: {selected_model}")
|
||||
else:
|
||||
print("No curated models available for Nous Portal.")
|
||||
except Exception as exc:
|
||||
@@ -3132,15 +2795,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
print()
|
||||
print(f"Login succeeded, but could not fetch available models. Reason: {message}")
|
||||
|
||||
# Write provider + model atomically so config is never mismatched.
|
||||
config_path = _update_config_for_provider(
|
||||
"nous", inference_base_url, default_model=selected_model,
|
||||
)
|
||||
if selected_model:
|
||||
_save_model_choice(selected_model)
|
||||
print(f"Default model set to: {selected_model}")
|
||||
print(f" Config updated: {config_path} (model.provider=nous)")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nLogin cancelled.")
|
||||
raise SystemExit(130)
|
||||
|
||||
@@ -18,6 +18,7 @@ from agent.credential_pool import (
|
||||
STRATEGY_ROUND_ROBIN,
|
||||
STRATEGY_RANDOM,
|
||||
STRATEGY_LEAST_USED,
|
||||
SUPPORTED_POOL_STRATEGIES,
|
||||
PooledCredential,
|
||||
_exhausted_until,
|
||||
_normalize_custom_pool_name,
|
||||
@@ -32,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL
|
||||
|
||||
|
||||
# Providers that support OAuth login in addition to API keys.
|
||||
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"}
|
||||
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex"}
|
||||
|
||||
|
||||
def _get_custom_provider_names() -> list:
|
||||
@@ -147,7 +148,7 @@ def auth_add_command(args) -> None:
|
||||
if provider.startswith(CUSTOM_POOL_PREFIX):
|
||||
requested_type = AUTH_TYPE_API_KEY
|
||||
else:
|
||||
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth"} else AUTH_TYPE_API_KEY
|
||||
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex"} else AUTH_TYPE_API_KEY
|
||||
|
||||
pool = load_pool(provider)
|
||||
|
||||
@@ -250,26 +251,6 @@ def auth_add_command(args) -> None:
|
||||
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
|
||||
return
|
||||
|
||||
if provider == "qwen-oauth":
|
||||
creds = auth_mod.resolve_qwen_runtime_credentials(refresh_if_expiring=False)
|
||||
label = (getattr(args, "label", None) or "").strip() or label_from_token(
|
||||
creds["api_key"],
|
||||
_oauth_default_label(provider, len(pool.entries()) + 1),
|
||||
)
|
||||
entry = PooledCredential(
|
||||
provider=provider,
|
||||
id=uuid.uuid4().hex[:6],
|
||||
label=label,
|
||||
auth_type=AUTH_TYPE_OAUTH,
|
||||
priority=0,
|
||||
source=f"{SOURCE_MANUAL}:qwen_cli",
|
||||
access_token=creds["api_key"],
|
||||
base_url=creds.get("base_url"),
|
||||
)
|
||||
pool.add_entry(entry)
|
||||
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
|
||||
return
|
||||
|
||||
raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")
|
||||
|
||||
|
||||
@@ -347,11 +328,8 @@ def auth_remove_command(args) -> None:
|
||||
print("Cleared Hermes Anthropic OAuth credentials")
|
||||
|
||||
elif removed.source == "claude_code" and provider == "anthropic":
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
suppress_credential_source(provider, "claude_code")
|
||||
print("Suppressed claude_code credential — it will not be re-seeded.")
|
||||
print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
|
||||
print("Run `hermes auth add anthropic` to re-enable if needed.")
|
||||
print("Note: Claude Code credentials live in ~/.claude/.credentials.json")
|
||||
print(" Remove them manually if you want to deauthorize Claude Code.")
|
||||
|
||||
|
||||
def auth_reset_command(args) -> None:
|
||||
|
||||
+9
-82
@@ -90,6 +90,12 @@ HERMES_CADUCEUS = """[#CD7F32]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⡀⠀⣀⣀
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠳⠈⣡⠞⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]"""
|
||||
|
||||
COMPACT_BANNER = """
|
||||
[bold #FFD700]╔══════════════════════════════════════════════════════════════╗[/]
|
||||
[bold #FFD700]║[/] [#FFBF00]⚕ NOUS HERMES[/] [dim #B8860B]- AI Agent Framework[/] [bold #FFD700]║[/]
|
||||
[bold #FFD700]║[/] [#CD7F32]Messenger of the Digital Gods[/] [dim #B8860B]Nous Research[/] [bold #FFD700]║[/]
|
||||
[bold #FFD700]╚══════════════════════════════════════════════════════════════╝[/]
|
||||
"""
|
||||
|
||||
|
||||
# =========================================================================
|
||||
@@ -184,79 +190,6 @@ def check_for_updates() -> Optional[int]:
|
||||
return behind
|
||||
|
||||
|
||||
def _resolve_repo_dir() -> Optional[Path]:
|
||||
"""Return the active Hermes git checkout, or None if this isn't a git install."""
|
||||
hermes_home = get_hermes_home()
|
||||
repo_dir = hermes_home / "hermes-agent"
|
||||
if not (repo_dir / ".git").exists():
|
||||
repo_dir = Path(__file__).parent.parent.resolve()
|
||||
return repo_dir if (repo_dir / ".git").exists() else None
|
||||
|
||||
|
||||
def _git_short_hash(repo_dir: Path, rev: str) -> Optional[str]:
|
||||
"""Resolve a git revision to an 8-character short hash."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "--short=8", rev],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
value = (result.stdout or "").strip()
|
||||
return value or None
|
||||
|
||||
|
||||
def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
|
||||
"""Return upstream/local git hashes for the startup banner."""
|
||||
repo_dir = repo_dir or _resolve_repo_dir()
|
||||
if repo_dir is None:
|
||||
return None
|
||||
|
||||
upstream = _git_short_hash(repo_dir, "origin/main")
|
||||
local = _git_short_hash(repo_dir, "HEAD")
|
||||
if not upstream or not local:
|
||||
return None
|
||||
|
||||
ahead = 0
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-list", "--count", "origin/main..HEAD"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
if result.returncode == 0:
|
||||
ahead = int((result.stdout or "0").strip() or "0")
|
||||
except Exception:
|
||||
ahead = 0
|
||||
|
||||
return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}
|
||||
|
||||
|
||||
def format_banner_version_label() -> str:
|
||||
"""Return the version label shown in the startup banner title."""
|
||||
base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
|
||||
state = get_git_banner_state()
|
||||
if not state:
|
||||
return base
|
||||
|
||||
upstream = state["upstream"]
|
||||
local = state["local"]
|
||||
ahead = int(state.get("ahead") or 0)
|
||||
|
||||
if ahead <= 0 or upstream == local:
|
||||
return f"{base} · upstream {upstream}"
|
||||
|
||||
carried_word = "commit" if ahead == 1 else "commits"
|
||||
return f"{base} · upstream {upstream} · local {local} (+{ahead} carried {carried_word})"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Non-blocking update check
|
||||
# =========================================================================
|
||||
@@ -289,16 +222,10 @@ def _format_context_length(tokens: int) -> str:
|
||||
"""Format a token count for display (e.g. 128000 → '128K', 1048576 → '1M')."""
|
||||
if tokens >= 1_000_000:
|
||||
val = tokens / 1_000_000
|
||||
rounded = round(val)
|
||||
if abs(val - rounded) < 0.05:
|
||||
return f"{rounded}M"
|
||||
return f"{val:.1f}M"
|
||||
return f"{val:g}M"
|
||||
elif tokens >= 1_000:
|
||||
val = tokens / 1_000
|
||||
rounded = round(val)
|
||||
if abs(val - rounded) < 0.05:
|
||||
return f"{rounded}K"
|
||||
return f"{val:.1f}K"
|
||||
return f"{val:g}K"
|
||||
return str(tokens)
|
||||
|
||||
|
||||
@@ -522,7 +449,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
outer_panel = Panel(
|
||||
layout_table,
|
||||
title=f"[bold {title_color}]{format_banner_version_label()}[/]",
|
||||
title=f"[bold {title_color}]{agent_name} v{VERSION} ({RELEASE_DATE})[/]",
|
||||
border_style=border_color,
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
||||
+42
-1
@@ -25,7 +25,7 @@ def clarify_callback(cli, question, choices):
|
||||
|
||||
timeout = CLI_CONFIG.get("clarify", {}).get("timeout", 120)
|
||||
response_queue = queue.Queue()
|
||||
is_open_ended = not choices
|
||||
is_open_ended = not choices or len(choices) == 0
|
||||
|
||||
cli._clarify_state = {
|
||||
"question": question,
|
||||
@@ -63,6 +63,47 @@ def clarify_callback(cli, question, choices):
|
||||
)
|
||||
|
||||
|
||||
def sudo_password_callback(cli) -> str:
|
||||
"""Prompt for sudo password through the TUI.
|
||||
|
||||
Sets up a password input area and blocks until the user responds.
|
||||
"""
|
||||
timeout = 45
|
||||
response_queue = queue.Queue()
|
||||
|
||||
cli._sudo_state = {"response_queue": response_queue}
|
||||
cli._sudo_deadline = _time.monotonic() + timeout
|
||||
|
||||
if hasattr(cli, "_app") and cli._app:
|
||||
cli._app.invalidate()
|
||||
|
||||
while True:
|
||||
try:
|
||||
result = response_queue.get(timeout=1)
|
||||
cli._sudo_state = None
|
||||
cli._sudo_deadline = 0
|
||||
if hasattr(cli, "_app") and cli._app:
|
||||
cli._app.invalidate()
|
||||
if result:
|
||||
cprint(f"\n{_DIM} ✓ Password received (cached for session){_RST}")
|
||||
else:
|
||||
cprint(f"\n{_DIM} ⏭ Skipped{_RST}")
|
||||
return result
|
||||
except queue.Empty:
|
||||
remaining = cli._sudo_deadline - _time.monotonic()
|
||||
if remaining <= 0:
|
||||
break
|
||||
if hasattr(cli, "_app") and cli._app:
|
||||
cli._app.invalidate()
|
||||
|
||||
cli._sudo_state = None
|
||||
cli._sudo_deadline = 0
|
||||
if hasattr(cli, "_app") and cli._app:
|
||||
cli._app.invalidate()
|
||||
cprint(f"\n{_DIM} ⏱ Timeout — continuing without sudo{_RST}")
|
||||
return ""
|
||||
|
||||
|
||||
def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
|
||||
"""Prompt for a secret value through the TUI (e.g. API keys for skills).
|
||||
|
||||
|
||||
@@ -0,0 +1,140 @@
|
||||
"""Shared curses-based multi-select checklist for Hermes CLI.
|
||||
|
||||
Used by both ``hermes tools`` and ``hermes skills`` to present a
|
||||
toggleable list of items. Falls back to a numbered text UI when
|
||||
curses is unavailable (Windows without curses, piped stdin, etc.).
|
||||
"""
|
||||
|
||||
import sys
|
||||
from typing import List, Set
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
|
||||
|
||||
def curses_checklist(
|
||||
title: str,
|
||||
items: List[str],
|
||||
pre_selected: Set[int],
|
||||
) -> Set[int]:
|
||||
"""Multi-select checklist. Returns set of **selected** indices.
|
||||
|
||||
Args:
|
||||
title: Header text shown at the top of the checklist.
|
||||
items: Display labels for each row.
|
||||
pre_selected: Indices that start checked.
|
||||
|
||||
Returns:
|
||||
The indices the user confirmed as checked. On cancel (ESC/q),
|
||||
returns ``pre_selected`` unchanged.
|
||||
"""
|
||||
# Safety: return defaults when stdin is not a terminal.
|
||||
if not sys.stdin.isatty():
|
||||
return set(pre_selected)
|
||||
|
||||
try:
|
||||
import curses
|
||||
selected = set(pre_selected)
|
||||
result = [None]
|
||||
|
||||
def _ui(stdscr):
|
||||
curses.curs_set(0)
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, 8, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
while True:
|
||||
stdscr.clear()
|
||||
max_y, max_x = stdscr.getmaxyx()
|
||||
|
||||
# Header
|
||||
try:
|
||||
hattr = curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0)
|
||||
stdscr.addnstr(0, 0, title, max_x - 1, hattr)
|
||||
stdscr.addnstr(
|
||||
1, 0,
|
||||
" ↑↓ navigate SPACE toggle ENTER confirm ESC cancel",
|
||||
max_x - 1, curses.A_DIM,
|
||||
)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
# Scrollable item list
|
||||
visible_rows = max_y - 3
|
||||
if cursor < scroll_offset:
|
||||
scroll_offset = cursor
|
||||
elif cursor >= scroll_offset + visible_rows:
|
||||
scroll_offset = cursor - visible_rows + 1
|
||||
|
||||
for draw_i, i in enumerate(
|
||||
range(scroll_offset, min(len(items), scroll_offset + visible_rows))
|
||||
):
|
||||
y = draw_i + 3
|
||||
if y >= max_y - 1:
|
||||
break
|
||||
check = "✓" if i in selected else " "
|
||||
arrow = "→" if i == cursor else " "
|
||||
line = f" {arrow} [{check}] {items[i]}"
|
||||
|
||||
attr = curses.A_NORMAL
|
||||
if i == cursor:
|
||||
attr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
attr |= curses.color_pair(1)
|
||||
try:
|
||||
stdscr.addnstr(y, 0, line, max_x - 1, attr)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
stdscr.refresh()
|
||||
key = stdscr.getch()
|
||||
|
||||
if key in (curses.KEY_UP, ord("k")):
|
||||
cursor = (cursor - 1) % len(items)
|
||||
elif key in (curses.KEY_DOWN, ord("j")):
|
||||
cursor = (cursor + 1) % len(items)
|
||||
elif key == ord(" "):
|
||||
selected.symmetric_difference_update({cursor})
|
||||
elif key in (curses.KEY_ENTER, 10, 13):
|
||||
result[0] = set(selected)
|
||||
return
|
||||
elif key in (27, ord("q")):
|
||||
result[0] = set(pre_selected)
|
||||
return
|
||||
|
||||
curses.wrapper(_ui)
|
||||
return result[0] if result[0] is not None else set(pre_selected)
|
||||
|
||||
except Exception:
|
||||
pass # fall through to numbered fallback
|
||||
|
||||
# ── Numbered text fallback ────────────────────────────────────────────
|
||||
selected = set(pre_selected)
|
||||
print(color(f"\n {title}", Colors.YELLOW))
|
||||
print(color(" Toggle by number, Enter to confirm.\n", Colors.DIM))
|
||||
|
||||
while True:
|
||||
for i, label in enumerate(items):
|
||||
check = "✓" if i in selected else " "
|
||||
print(f" {i + 1:3}. [{check}] {label}")
|
||||
print()
|
||||
|
||||
try:
|
||||
raw = input(color(" Number to toggle, 's' to save, 'q' to cancel: ", Colors.DIM)).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
return set(pre_selected)
|
||||
|
||||
if raw.lower() == "s" or raw == "":
|
||||
return selected
|
||||
if raw.lower() == "q":
|
||||
return set(pre_selected)
|
||||
try:
|
||||
idx = int(raw) - 1
|
||||
if 0 <= idx < len(items):
|
||||
selected.symmetric_difference_update({idx})
|
||||
except ValueError:
|
||||
print(color(" Invalid input", Colors.DIM))
|
||||
@@ -10,6 +10,7 @@ Usage:
|
||||
|
||||
import importlib.util
|
||||
import logging
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -23,6 +24,7 @@ from hermes_cli.setup import (
|
||||
print_info,
|
||||
print_success,
|
||||
print_error,
|
||||
print_warning,
|
||||
prompt_yes_no,
|
||||
)
|
||||
|
||||
|
||||
+38
-110
@@ -1,4 +1,4 @@
|
||||
"""Clipboard image extraction for macOS, Windows, Linux, and WSL2.
|
||||
"""Clipboard image extraction for macOS, Linux, and WSL2.
|
||||
|
||||
Provides a single function `save_clipboard_image(dest)` that checks the
|
||||
system clipboard for image data, saves it to *dest* as PNG, and returns
|
||||
@@ -6,10 +6,9 @@ True on success. No external Python dependencies — uses only OS-level
|
||||
CLI tools that ship with the platform (or are commonly installed).
|
||||
|
||||
Platform support:
|
||||
macOS — osascript (always available), pngpaste (if installed)
|
||||
Windows — PowerShell via .NET System.Windows.Forms.Clipboard
|
||||
WSL2 — powershell.exe via .NET System.Windows.Forms.Clipboard
|
||||
Linux — wl-paste (Wayland), xclip (X11)
|
||||
macOS — osascript (always available), pngpaste (if installed)
|
||||
WSL2 — powershell.exe via .NET System.Windows.Forms.Clipboard
|
||||
Linux — wl-paste (Wayland), xclip (X11)
|
||||
"""
|
||||
|
||||
import base64
|
||||
@@ -19,10 +18,11 @@ import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_constants import is_wsl as _is_wsl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache WSL detection (checked once per process)
|
||||
_wsl_detected: bool | None = None
|
||||
|
||||
|
||||
def save_clipboard_image(dest: Path) -> bool:
|
||||
"""Extract an image from the system clipboard and save it as PNG.
|
||||
@@ -32,8 +32,6 @@ def save_clipboard_image(dest: Path) -> bool:
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
if sys.platform == "darwin":
|
||||
return _macos_save(dest)
|
||||
if sys.platform == "win32":
|
||||
return _windows_save(dest)
|
||||
return _linux_save(dest)
|
||||
|
||||
|
||||
@@ -44,8 +42,6 @@ def has_clipboard_image() -> bool:
|
||||
"""
|
||||
if sys.platform == "darwin":
|
||||
return _macos_has_image()
|
||||
if sys.platform == "win32":
|
||||
return _windows_has_image()
|
||||
if _is_wsl():
|
||||
return _wsl_has_image()
|
||||
if os.environ.get("WAYLAND_DISPLAY"):
|
||||
@@ -116,106 +112,21 @@ def _macos_osascript(dest: Path) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
# ── Shared PowerShell scripts (native Windows + WSL2) ─────────────────────
|
||||
|
||||
# .NET System.Windows.Forms.Clipboard — used by both native Windows (powershell)
|
||||
# and WSL2 (powershell.exe) paths.
|
||||
_PS_CHECK_IMAGE = (
|
||||
"Add-Type -AssemblyName System.Windows.Forms;"
|
||||
"[System.Windows.Forms.Clipboard]::ContainsImage()"
|
||||
)
|
||||
|
||||
_PS_EXTRACT_IMAGE = (
|
||||
"Add-Type -AssemblyName System.Windows.Forms;"
|
||||
"Add-Type -AssemblyName System.Drawing;"
|
||||
"$img = [System.Windows.Forms.Clipboard]::GetImage();"
|
||||
"if ($null -eq $img) { exit 1 }"
|
||||
"$ms = New-Object System.IO.MemoryStream;"
|
||||
"$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
|
||||
"[System.Convert]::ToBase64String($ms.ToArray())"
|
||||
)
|
||||
|
||||
|
||||
# ── Native Windows ────────────────────────────────────────────────────────
|
||||
|
||||
# Native Windows uses ``powershell`` (Windows PowerShell 5.1, always present)
|
||||
# or ``pwsh`` (PowerShell 7+, optional). Discovery is cached per-process.
|
||||
|
||||
|
||||
def _find_powershell() -> str | None:
|
||||
"""Return the first available PowerShell executable, or None."""
|
||||
for name in ("powershell", "pwsh"):
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[name, "-NoProfile", "-NonInteractive", "-Command", "echo ok"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and "ok" in r.stdout:
|
||||
return name
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
# Cache the resolved PowerShell executable (checked once per process)
|
||||
_ps_exe: str | None | bool = False # False = not yet checked
|
||||
|
||||
|
||||
def _get_ps_exe() -> str | None:
|
||||
global _ps_exe
|
||||
if _ps_exe is False:
|
||||
_ps_exe = _find_powershell()
|
||||
return _ps_exe
|
||||
|
||||
|
||||
def _windows_has_image() -> bool:
|
||||
"""Check if the Windows clipboard contains an image."""
|
||||
ps = _get_ps_exe()
|
||||
if ps is None:
|
||||
return False
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[ps, "-NoProfile", "-NonInteractive", "-Command", _PS_CHECK_IMAGE],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return r.returncode == 0 and "True" in r.stdout
|
||||
except Exception as e:
|
||||
logger.debug("Windows clipboard image check failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def _windows_save(dest: Path) -> bool:
|
||||
"""Extract clipboard image on native Windows via PowerShell → base64 PNG."""
|
||||
ps = _get_ps_exe()
|
||||
if ps is None:
|
||||
logger.debug("No PowerShell found — Windows clipboard image paste unavailable")
|
||||
return False
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[ps, "-NoProfile", "-NonInteractive", "-Command", _PS_EXTRACT_IMAGE],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return False
|
||||
|
||||
b64_data = r.stdout.strip()
|
||||
if not b64_data:
|
||||
return False
|
||||
|
||||
png_bytes = base64.b64decode(b64_data)
|
||||
dest.write_bytes(png_bytes)
|
||||
return dest.exists() and dest.stat().st_size > 0
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Windows clipboard image extraction failed: %s", e)
|
||||
dest.unlink(missing_ok=True)
|
||||
return False
|
||||
|
||||
|
||||
# ── Linux ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _is_wsl() -> bool:
|
||||
"""Detect if running inside WSL (1 or 2)."""
|
||||
global _wsl_detected
|
||||
if _wsl_detected is not None:
|
||||
return _wsl_detected
|
||||
try:
|
||||
with open("/proc/version", "r") as f:
|
||||
_wsl_detected = "microsoft" in f.read().lower()
|
||||
except Exception:
|
||||
_wsl_detected = False
|
||||
return _wsl_detected
|
||||
|
||||
|
||||
def _linux_save(dest: Path) -> bool:
|
||||
"""Try clipboard backends in priority order: WSL → Wayland → X11."""
|
||||
if _is_wsl():
|
||||
@@ -231,7 +142,24 @@ def _linux_save(dest: Path) -> bool:
|
||||
|
||||
|
||||
# ── WSL2 (powershell.exe) ────────────────────────────────────────────────
|
||||
# Reuses _PS_CHECK_IMAGE / _PS_EXTRACT_IMAGE defined above.
|
||||
|
||||
# PowerShell script: get clipboard image as base64-encoded PNG on stdout.
|
||||
# Using .NET System.Windows.Forms.Clipboard — always available on Windows.
|
||||
_PS_CHECK_IMAGE = (
|
||||
"Add-Type -AssemblyName System.Windows.Forms;"
|
||||
"[System.Windows.Forms.Clipboard]::ContainsImage()"
|
||||
)
|
||||
|
||||
_PS_EXTRACT_IMAGE = (
|
||||
"Add-Type -AssemblyName System.Windows.Forms;"
|
||||
"Add-Type -AssemblyName System.Drawing;"
|
||||
"$img = [System.Windows.Forms.Clipboard]::GetImage();"
|
||||
"if ($null -eq $img) { exit 1 }"
|
||||
"$ms = New-Object System.IO.MemoryStream;"
|
||||
"$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
|
||||
"[System.Convert]::ToBase64String($ms.ToArray())"
|
||||
)
|
||||
|
||||
|
||||
def _wsl_has_image() -> bool:
|
||||
"""Check if Windows clipboard has an image (via powershell.exe)."""
|
||||
|
||||
+25
-42
@@ -16,18 +16,8 @@ from collections.abc import Callable, Mapping
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
# prompt_toolkit is an optional CLI dependency — only needed for
|
||||
# SlashCommandCompleter and SlashCommandAutoSuggest. Gateway and test
|
||||
# environments that lack it must still be able to import this module
|
||||
# for resolve_command, gateway_help_lines, and COMMAND_REGISTRY.
|
||||
try:
|
||||
from prompt_toolkit.auto_suggest import AutoSuggest, Suggestion
|
||||
from prompt_toolkit.completion import Completer, Completion
|
||||
except ImportError: # pragma: no cover
|
||||
AutoSuggest = object # type: ignore[assignment,misc]
|
||||
Completer = object # type: ignore[assignment,misc]
|
||||
Suggestion = None # type: ignore[assignment]
|
||||
Completion = None # type: ignore[assignment]
|
||||
from prompt_toolkit.auto_suggest import AutoSuggest, Suggestion
|
||||
from prompt_toolkit.completion import Completer, Completion
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -83,7 +73,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
args_hint="<question>"),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
aliases=("q",), args_hint="<prompt>"),
|
||||
CommandDef("status", "Show session info", "Session"),
|
||||
CommandDef("status", "Show session info", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("profile", "Show active profile name and home directory", "Info"),
|
||||
CommandDef("sethome", "Set this chat as the home channel", "Session",
|
||||
gateway_only=True, aliases=("set-home",)),
|
||||
@@ -96,7 +87,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"),
|
||||
CommandDef("provider", "Show available providers and current provider",
|
||||
"Configuration"),
|
||||
|
||||
CommandDef("prompt", "View/set custom system prompt", "Configuration",
|
||||
cli_only=True, args_hint="[text]", subcommands=("clear",)),
|
||||
CommandDef("personality", "Set a predefined personality", "Configuration",
|
||||
args_hint="[name]"),
|
||||
CommandDef("statusbar", "Toggle the context/model status bar", "Configuration",
|
||||
@@ -108,10 +100,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
"Configuration"),
|
||||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||
args_hint="[level|show|hide]",
|
||||
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||
CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration",
|
||||
args_hint="[normal|fast|status]",
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
subcommands=("none", "low", "minimal", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
cli_only=True, args_hint="[name]"),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
@@ -140,17 +129,13 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("commands", "Browse all commands and skills (paginated)", "Info",
|
||||
gateway_only=True, args_hint="[page]"),
|
||||
CommandDef("help", "Show available commands", "Info"),
|
||||
CommandDef("restart", "Gracefully restart the gateway after draining active runs", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("usage", "Show token usage and rate limits for the current session", "Info"),
|
||||
CommandDef("usage", "Show token usage for the current session", "Info"),
|
||||
CommandDef("insights", "Show usage insights and analytics", "Info",
|
||||
args_hint="[days]"),
|
||||
CommandDef("platforms", "Show gateway/messaging platform status", "Info",
|
||||
cli_only=True, aliases=("gateway",)),
|
||||
CommandDef("paste", "Check clipboard for an image and attach it", "Info",
|
||||
cli_only=True),
|
||||
CommandDef("image", "Attach a local image file for your next prompt", "Info",
|
||||
cli_only=True, args_hint="<path>"),
|
||||
CommandDef("update", "Update Hermes Agent to the latest version", "Info",
|
||||
gateway_only=True),
|
||||
|
||||
@@ -185,6 +170,12 @@ def resolve_command(name: str) -> CommandDef | None:
|
||||
return _COMMAND_LOOKUP.get(name.lower().lstrip("/"))
|
||||
|
||||
|
||||
def register_plugin_command(cmd: CommandDef) -> None:
|
||||
"""Append a plugin-defined command to the registry and refresh lookups."""
|
||||
COMMAND_REGISTRY.append(cmd)
|
||||
rebuild_lookups()
|
||||
|
||||
|
||||
def rebuild_lookups() -> None:
|
||||
"""Rebuild all derived lookup dicts from the current COMMAND_REGISTRY.
|
||||
|
||||
@@ -302,8 +293,16 @@ def _resolve_config_gates() -> set[str]:
|
||||
if not gated:
|
||||
return set()
|
||||
try:
|
||||
from hermes_cli.config import read_raw_config
|
||||
cfg = read_raw_config()
|
||||
import yaml
|
||||
config_path = os.path.join(
|
||||
os.getenv("HERMES_HOME", os.path.expanduser("~/.hermes")),
|
||||
"config.yaml",
|
||||
)
|
||||
if os.path.exists(config_path):
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
else:
|
||||
cfg = {}
|
||||
except Exception:
|
||||
return set()
|
||||
result: set[str] = set()
|
||||
@@ -647,18 +646,8 @@ class SlashCommandCompleter(Completer):
|
||||
def __init__(
|
||||
self,
|
||||
skill_commands_provider: Callable[[], Mapping[str, dict[str, Any]]] | None = None,
|
||||
command_filter: Callable[[str], bool] | None = None,
|
||||
) -> None:
|
||||
self._skill_commands_provider = skill_commands_provider
|
||||
self._command_filter = command_filter
|
||||
|
||||
def _command_allowed(self, slash_command: str) -> bool:
|
||||
if self._command_filter is None:
|
||||
return True
|
||||
try:
|
||||
return bool(self._command_filter(slash_command))
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
def _iter_skill_commands(self) -> Mapping[str, dict[str, Any]]:
|
||||
if self._skill_commands_provider is None:
|
||||
@@ -936,7 +925,7 @@ class SlashCommandCompleter(Completer):
|
||||
return
|
||||
|
||||
# Static subcommand completions
|
||||
if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd):
|
||||
if " " not in sub_text and base_cmd in SUBCOMMANDS:
|
||||
for sub in SUBCOMMANDS[base_cmd]:
|
||||
if sub.startswith(sub_lower) and sub != sub_lower:
|
||||
yield Completion(
|
||||
@@ -949,8 +938,6 @@ class SlashCommandCompleter(Completer):
|
||||
word = text[1:]
|
||||
|
||||
for cmd, desc in COMMANDS.items():
|
||||
if not self._command_allowed(cmd):
|
||||
continue
|
||||
cmd_name = cmd[1:]
|
||||
if cmd_name.startswith(word):
|
||||
yield Completion(
|
||||
@@ -1009,8 +996,6 @@ class SlashCommandAutoSuggest(AutoSuggest):
|
||||
# Still typing the command name: /upd → suggest "ate"
|
||||
word = text[1:].lower()
|
||||
for cmd in COMMANDS:
|
||||
if self._completer is not None and not self._completer._command_allowed(cmd):
|
||||
continue
|
||||
cmd_name = cmd[1:] # strip leading /
|
||||
if cmd_name.startswith(word) and cmd_name != word:
|
||||
return Suggestion(cmd_name[len(word):])
|
||||
@@ -1021,8 +1006,6 @@ class SlashCommandAutoSuggest(AutoSuggest):
|
||||
sub_lower = sub_text.lower()
|
||||
|
||||
# Static subcommands
|
||||
if self._completer is not None and not self._completer._command_allowed(base_cmd):
|
||||
return None
|
||||
if base_cmd in SUBCOMMANDS and SUBCOMMANDS[base_cmd]:
|
||||
if " " not in sub_text:
|
||||
for sub in SUBCOMMANDS[base_cmd]:
|
||||
|
||||
+17
-245
@@ -39,10 +39,6 @@ _EXTRA_ENV_KEYS = frozenset({
|
||||
"DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
|
||||
"FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
|
||||
"WECOM_BOT_ID", "WECOM_SECRET",
|
||||
"WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
|
||||
"WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
|
||||
"WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
|
||||
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
|
||||
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
|
||||
"WHATSAPP_MODE", "WHATSAPP_ENABLED",
|
||||
"MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
|
||||
@@ -161,39 +157,15 @@ def get_project_root() -> Path:
|
||||
return Path(__file__).parent.parent.resolve()
|
||||
|
||||
def _secure_dir(path):
|
||||
"""Set directory to owner-only access (0700 by default). No-op on Windows.
|
||||
|
||||
Skipped in managed mode — the NixOS module sets group-readable
|
||||
permissions (0750) so interactive users in the hermes group can
|
||||
share state with the gateway service.
|
||||
|
||||
The mode can be overridden via the HERMES_HOME_MODE environment variable
|
||||
(e.g. HERMES_HOME_MODE=0701) for deployments where a web server (nginx,
|
||||
caddy, etc.) needs to traverse HERMES_HOME to reach a served subdirectory.
|
||||
The execute-only bit on a directory permits cd-through without exposing
|
||||
directory listings.
|
||||
"""
|
||||
if is_managed():
|
||||
return
|
||||
"""Set directory to owner-only access (0700). No-op on Windows."""
|
||||
try:
|
||||
mode_str = os.environ.get("HERMES_HOME_MODE", "").strip()
|
||||
mode = int(mode_str, 8) if mode_str else 0o700
|
||||
except ValueError:
|
||||
mode = 0o700
|
||||
try:
|
||||
os.chmod(path, mode)
|
||||
os.chmod(path, 0o700)
|
||||
except (OSError, NotImplementedError):
|
||||
pass
|
||||
|
||||
|
||||
def _secure_file(path):
|
||||
"""Set file to owner-only read/write (0600). No-op on Windows.
|
||||
|
||||
Skipped in managed mode — the NixOS activation script sets
|
||||
group-readable permissions (0640) on config files.
|
||||
"""
|
||||
if is_managed():
|
||||
return
|
||||
"""Set file to owner-only read/write (0600). No-op on Windows."""
|
||||
try:
|
||||
if os.path.exists(str(path)):
|
||||
os.chmod(path, 0o600)
|
||||
@@ -211,44 +183,14 @@ def _ensure_default_soul_md(home: Path) -> None:
|
||||
|
||||
|
||||
def ensure_hermes_home():
|
||||
"""Ensure ~/.hermes directory structure exists with secure permissions.
|
||||
|
||||
In managed mode (NixOS), dirs are created by the activation script with
|
||||
setgid + group-writable (2770). We skip mkdir and set umask(0o007) so
|
||||
any files created (e.g. SOUL.md) are group-writable (0660).
|
||||
"""
|
||||
"""Ensure ~/.hermes directory structure exists with secure permissions."""
|
||||
home = get_hermes_home()
|
||||
if is_managed():
|
||||
old_umask = os.umask(0o007)
|
||||
try:
|
||||
_ensure_hermes_home_managed(home)
|
||||
finally:
|
||||
os.umask(old_umask)
|
||||
else:
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
_secure_dir(home)
|
||||
for subdir in ("cron", "sessions", "logs", "memories"):
|
||||
d = home / subdir
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
_secure_dir(d)
|
||||
_ensure_default_soul_md(home)
|
||||
|
||||
|
||||
def _ensure_hermes_home_managed(home: Path):
|
||||
"""Managed-mode variant: verify dirs exist (activation creates them), seed SOUL.md."""
|
||||
if not home.is_dir():
|
||||
raise RuntimeError(
|
||||
f"HERMES_HOME {home} does not exist. "
|
||||
"Run 'sudo nixos-rebuild switch' first."
|
||||
)
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
_secure_dir(home)
|
||||
for subdir in ("cron", "sessions", "logs", "memories"):
|
||||
d = home / subdir
|
||||
if not d.is_dir():
|
||||
raise RuntimeError(
|
||||
f"{d} does not exist. "
|
||||
"Run 'sudo nixos-rebuild switch' first."
|
||||
)
|
||||
# Inside umask(0o007) scope — SOUL.md will be created as 0660
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
_secure_dir(d)
|
||||
_ensure_default_soul_md(home)
|
||||
|
||||
|
||||
@@ -269,22 +211,12 @@ DEFAULT_CONFIG = {
|
||||
# tools or receiving API responses. Only fires when the agent has
|
||||
# been completely idle for this duration. 0 = unlimited.
|
||||
"gateway_timeout": 1800,
|
||||
# Graceful drain timeout for gateway stop/restart (seconds).
|
||||
# The gateway stops accepting new work, waits for running agents
|
||||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
"service_tier": "",
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
# Values: "auto" (default — applies to gpt/codex models), true/false
|
||||
# (force on/off for all models), or a list of model-name substrings
|
||||
# to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
|
||||
"tool_use_enforcement": "auto",
|
||||
# Staged inactivity warning: send a warning to the user at this
|
||||
# threshold before escalating to a full timeout. The warning fires
|
||||
# once per run and does not interrupt the agent. 0 = disable warning.
|
||||
"gateway_timeout_warning": 900,
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
@@ -447,7 +379,6 @@ DEFAULT_CONFIG = {
|
||||
"show_cost": False, # Show $ cost in the status bar (off by default)
|
||||
"skin": "default",
|
||||
"tool_progress_command": False, # Enable /verbose command in messaging gateway
|
||||
"tool_progress_overrides": {}, # Per-platform overrides: {"signal": "off", "telegram": "all"}
|
||||
"tool_preview_length": 0, # Max chars for tool call previews (0 = no limit, show full paths/commands)
|
||||
},
|
||||
|
||||
@@ -458,7 +389,7 @@ DEFAULT_CONFIG = {
|
||||
|
||||
# Text-to-speech configuration
|
||||
"tts": {
|
||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
|
||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "neutts" (local)
|
||||
"edge": {
|
||||
"voice": "en-US-AriaNeural",
|
||||
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
|
||||
@@ -472,10 +403,6 @@ DEFAULT_CONFIG = {
|
||||
"voice": "alloy",
|
||||
# Voices: alloy, echo, fable, onyx, nova, shimmer
|
||||
},
|
||||
"mistral": {
|
||||
"model": "voxtral-mini-tts-2603",
|
||||
"voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral
|
||||
},
|
||||
"neutts": {
|
||||
"ref_audio": "", # Path to reference voice audio (empty = bundled default)
|
||||
"ref_text": "", # Path to reference voice transcript (empty = bundled default)
|
||||
@@ -486,17 +413,13 @@ DEFAULT_CONFIG = {
|
||||
|
||||
"stt": {
|
||||
"enabled": True,
|
||||
"provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) | "mistral" (Voxtral Transcribe)
|
||||
"provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API)
|
||||
"local": {
|
||||
"model": "base", # tiny, base, small, medium, large-v3
|
||||
"language": "", # auto-detect by default; set to "en", "es", "fr", etc. to force
|
||||
},
|
||||
"openai": {
|
||||
"model": "whisper-1", # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe
|
||||
},
|
||||
"mistral": {
|
||||
"model": "voxtral-mini-latest", # voxtral-mini-latest, voxtral-mini-2602
|
||||
},
|
||||
},
|
||||
|
||||
"voice": {
|
||||
@@ -513,16 +436,6 @@ DEFAULT_CONFIG = {
|
||||
"max_ms": 2500,
|
||||
},
|
||||
|
||||
# Context engine -- controls how the context window is managed when
|
||||
# approaching the model's token limit.
|
||||
# "compressor" = built-in lossy summarization (default).
|
||||
# Set to a plugin name to activate an alternative engine (e.g. "lcm"
|
||||
# for Lossless Context Management). The engine must be installed as
|
||||
# a plugin in plugins/context_engine/<name>/ or ~/.hermes/plugins/.
|
||||
"context": {
|
||||
"engine": "compressor",
|
||||
},
|
||||
|
||||
# Persistent memory -- bounded curated memory injected into system prompt
|
||||
"memory": {
|
||||
"memory_enabled": True,
|
||||
@@ -547,8 +460,6 @@ DEFAULT_CONFIG = {
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
},
|
||||
|
||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||
@@ -576,7 +487,6 @@ DEFAULT_CONFIG = {
|
||||
"discord": {
|
||||
"require_mention": True, # Require @mention to respond in server channels
|
||||
"free_response_channels": "", # Comma-separated channel IDs where bot responds without mention
|
||||
"allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist)
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
},
|
||||
@@ -636,7 +546,7 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 14,
|
||||
"_config_version": 12,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -813,14 +723,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"HERMES_QWEN_BASE_URL": {
|
||||
"description": "Qwen Portal base URL override (default: https://portal.qwen.ai/v1)",
|
||||
"prompt": "Qwen Portal base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"OPENCODE_ZEN_API_KEY": {
|
||||
"description": "OpenCode Zen API key (pay-as-you-go access to curated models)",
|
||||
"prompt": "OpenCode Zen API key",
|
||||
@@ -1020,13 +922,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"MISTRAL_API_KEY": {
|
||||
"description": "Mistral API key for Voxtral TTS and transcription (STT)",
|
||||
"prompt": "Mistral API key",
|
||||
"url": "https://console.mistral.ai/",
|
||||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"GITHUB_TOKEN": {
|
||||
"description": "GitHub token for Skills Hub (higher API rate limits, skill publish)",
|
||||
"prompt": "GitHub Token",
|
||||
@@ -1079,13 +974,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"DISCORD_REPLY_TO_MODE": {
|
||||
"description": "Discord reply threading mode: 'off' (no reply references), 'first' (reply on first message only, default), 'all' (reply on every chunk)",
|
||||
"prompt": "Discord reply mode (off/first/all)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"SLACK_BOT_TOKEN": {
|
||||
"description": "Slack bot token (xoxb-). Get from OAuth & Permissions after installing your app. "
|
||||
"Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
|
||||
@@ -1199,27 +1087,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"BLUEBUBBLES_SERVER_URL": {
|
||||
"description": "BlueBubbles server URL for iMessage integration (e.g. http://192.168.1.10:1234)",
|
||||
"prompt": "BlueBubbles server URL",
|
||||
"url": "https://bluebubbles.app/",
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"BLUEBUBBLES_PASSWORD": {
|
||||
"description": "BlueBubbles server password (from BlueBubbles Server → Settings → API)",
|
||||
"prompt": "BlueBubbles server password",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
},
|
||||
"BLUEBUBBLES_ALLOWED_USERS": {
|
||||
"description": "Comma-separated iMessage addresses (email or phone) allowed to use the bot",
|
||||
"prompt": "Allowed iMessage addresses (comma-separated)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"GATEWAY_ALLOW_ALL_USERS": {
|
||||
"description": "Allow all users to interact with messaging bots (true/false). Default: false.",
|
||||
"prompt": "Allow all users (true/false)",
|
||||
@@ -1237,8 +1104,8 @@ OPTIONAL_ENV_VARS = {
|
||||
"advanced": True,
|
||||
},
|
||||
"API_SERVER_KEY": {
|
||||
"description": "Bearer token for API server authentication. Required for non-loopback binding; server refuses to start without it. On loopback (127.0.0.1), all requests are allowed if empty.",
|
||||
"prompt": "API server auth key (required for network access)",
|
||||
"description": "Bearer token for API server authentication. If empty, all requests are allowed (local use only).",
|
||||
"prompt": "API server auth key (optional)",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
@@ -1253,21 +1120,13 @@ OPTIONAL_ENV_VARS = {
|
||||
"advanced": True,
|
||||
},
|
||||
"API_SERVER_HOST": {
|
||||
"description": "Host/bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access — server refuses to start without API_SERVER_KEY.",
|
||||
"description": "Host/bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access — requires API_SERVER_KEY for security.",
|
||||
"prompt": "API server host",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"API_SERVER_MODEL_NAME": {
|
||||
"description": "Model name advertised on /v1/models. Defaults to the profile name (or 'hermes-agent' for the default profile). Useful for multi-user setups with OpenWebUI.",
|
||||
"prompt": "API server model name",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"WEBHOOK_ENABLED": {
|
||||
"description": "Enable the webhook platform adapter for receiving events from GitHub, GitLab, etc.",
|
||||
"prompt": "Enable webhooks (true/false)",
|
||||
@@ -1299,7 +1158,7 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "setting",
|
||||
},
|
||||
"SUDO_PASSWORD": {
|
||||
"description": "Sudo password for terminal commands requiring root access; set to an explicit empty string to try empty without prompting",
|
||||
"description": "Sudo password for terminal commands requiring root access",
|
||||
"prompt": "Sudo password",
|
||||
"url": None,
|
||||
"password": True,
|
||||
@@ -1478,7 +1337,7 @@ _KNOWN_ROOT_KEYS = {
|
||||
"_config_version", "model", "providers", "fallback_model",
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
"auxiliary", "custom_providers", "memory", "gateway",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
@@ -1783,71 +1642,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
ep = providers_dict[key]
|
||||
print(f" → {key}: {ep.get('api', '')}")
|
||||
|
||||
# ── Version 12 → 13: clear dead LLM_MODEL / OPENAI_MODEL from .env ──
|
||||
# These env vars were written by the old setup wizard but nothing reads
|
||||
# them anymore (config.yaml is the sole source of truth since March 2026).
|
||||
# Stale entries cause user confusion — see issue report.
|
||||
if current_ver < 13:
|
||||
for dead_var in ("LLM_MODEL", "OPENAI_MODEL"):
|
||||
try:
|
||||
old_val = get_env_value(dead_var)
|
||||
if old_val:
|
||||
save_env_value(dead_var, "")
|
||||
if not quiet:
|
||||
print(f" ✓ Cleared {dead_var} from .env (no longer used — config.yaml is source of truth)")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Version 13 → 14: migrate legacy flat stt.model to provider section ──
|
||||
# Old configs (and cli-config.yaml.example) had a flat `stt.model` key
|
||||
# that was provider-agnostic. When the provider was "local" this caused
|
||||
# OpenAI model names (e.g. "whisper-1") to be fed to faster-whisper,
|
||||
# crashing with "Invalid model size". Move the value into the correct
|
||||
# provider-specific section and remove the flat key.
|
||||
if current_ver < 14:
|
||||
# Read raw config (no defaults merged) to check what the user actually
|
||||
# wrote, then apply changes to the merged config for saving.
|
||||
raw = read_raw_config()
|
||||
raw_stt = raw.get("stt", {})
|
||||
if isinstance(raw_stt, dict) and "model" in raw_stt:
|
||||
legacy_model = raw_stt["model"]
|
||||
provider = raw_stt.get("provider", "local")
|
||||
config = load_config()
|
||||
stt = config.get("stt", {})
|
||||
# Remove the legacy flat key
|
||||
stt.pop("model", None)
|
||||
# Place it in the appropriate provider section only if the
|
||||
# user didn't already set a model there
|
||||
if provider in ("local", "local_command"):
|
||||
# Don't migrate an OpenAI model name into the local section
|
||||
_local_models = {
|
||||
"tiny.en", "tiny", "base.en", "base", "small.en", "small",
|
||||
"medium.en", "medium", "large-v1", "large-v2", "large-v3",
|
||||
"large", "distil-large-v2", "distil-medium.en",
|
||||
"distil-small.en", "distil-large-v3", "distil-large-v3.5",
|
||||
"large-v3-turbo", "turbo",
|
||||
}
|
||||
if legacy_model in _local_models:
|
||||
# Check raw config — only set if user didn't already
|
||||
# have a nested local.model
|
||||
raw_local = raw_stt.get("local", {})
|
||||
if not isinstance(raw_local, dict) or "model" not in raw_local:
|
||||
local_cfg = stt.setdefault("local", {})
|
||||
local_cfg["model"] = legacy_model
|
||||
# else: drop it — it was an OpenAI model name, local section
|
||||
# already defaults to "base" via DEFAULT_CONFIG
|
||||
else:
|
||||
# Cloud provider — put it in that provider's section only
|
||||
# if user didn't already set a nested model
|
||||
raw_provider = raw_stt.get(provider, {})
|
||||
if not isinstance(raw_provider, dict) or "model" not in raw_provider:
|
||||
provider_cfg = stt.setdefault(provider, {})
|
||||
provider_cfg["model"] = legacy_model
|
||||
config["stt"] = stt
|
||||
save_config(config)
|
||||
if not quiet:
|
||||
print(f" ✓ Migrated legacy stt.model to provider-specific config")
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
@@ -2087,24 +1881,6 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
||||
|
||||
|
||||
def read_raw_config() -> Dict[str, Any]:
|
||||
"""Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
|
||||
|
||||
Returns the raw YAML dict, or ``{}`` if the file doesn't exist or can't
|
||||
be parsed. Use this for lightweight config reads where you just need a
|
||||
single value and don't want the overhead of ``load_config()``'s deep-merge
|
||||
+ migration pipeline.
|
||||
"""
|
||||
try:
|
||||
config_path = get_config_path()
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def load_config() -> Dict[str, Any]:
|
||||
"""Load configuration from ~/.hermes/config.yaml."""
|
||||
import copy
|
||||
@@ -2744,7 +2520,7 @@ def set_config_value(key: str, value: str):
|
||||
'TINKER_API_KEY',
|
||||
]
|
||||
|
||||
if key.upper() in api_keys or key.upper().endswith(('_API_KEY', '_TOKEN')) or key.upper().startswith('TERMINAL_SSH'):
|
||||
if key.upper() in api_keys or key.upper().endswith('_API_KEY') or key.upper().endswith('_TOKEN') or key.upper().startswith('TERMINAL_SSH'):
|
||||
save_env_value(key.upper(), value)
|
||||
print(f"✓ Set {key} in {get_env_path()}")
|
||||
return
|
||||
@@ -2801,10 +2577,6 @@ def set_config_value(key: str, value: str):
|
||||
"terminal.timeout": "TERMINAL_TIMEOUT",
|
||||
"terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
"terminal.persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||||
"terminal.container_cpu": "TERMINAL_CONTAINER_CPU",
|
||||
"terminal.container_memory": "TERMINAL_CONTAINER_MEMORY",
|
||||
"terminal.container_disk": "TERMINAL_CONTAINER_DISK",
|
||||
"terminal.container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
|
||||
}
|
||||
if key in _config_to_env_sync:
|
||||
save_env_value(_config_to_env_sync[key], str(value))
|
||||
|
||||
@@ -31,6 +31,13 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# OAuth device code flow constants (same client ID as opencode/Copilot CLI)
|
||||
COPILOT_OAUTH_CLIENT_ID = "Ov23li8tweQw6odWQebz"
|
||||
COPILOT_DEVICE_CODE_URL = "https://github.com/login/device/code"
|
||||
COPILOT_ACCESS_TOKEN_URL = "https://github.com/login/oauth/access_token"
|
||||
|
||||
# Copilot API constants
|
||||
COPILOT_TOKEN_EXCHANGE_URL = "https://api.github.com/copilot_internal/v2/token"
|
||||
COPILOT_API_BASE_URL = "https://api.githubcopilot.com"
|
||||
|
||||
# Token type prefixes
|
||||
_CLASSIC_PAT_PREFIX = "ghp_"
|
||||
_SUPPORTED_PREFIXES = ("gho_", "github_pat_", "ghu_")
|
||||
@@ -43,6 +50,11 @@ _DEVICE_CODE_POLL_INTERVAL = 5 # seconds
|
||||
_DEVICE_CODE_POLL_SAFETY_MARGIN = 3 # seconds
|
||||
|
||||
|
||||
def is_classic_pat(token: str) -> bool:
|
||||
"""Check if a token is a classic PAT (ghp_*), which Copilot doesn't support."""
|
||||
return token.strip().startswith(_CLASSIC_PAT_PREFIX)
|
||||
|
||||
|
||||
def validate_copilot_token(token: str) -> tuple[bool, str]:
|
||||
"""Validate that a token is usable with the Copilot API.
|
||||
|
||||
@@ -273,7 +285,6 @@ def copilot_request_headers(
|
||||
headers: dict[str, str] = {
|
||||
"Editor-Version": "vscode/1.104.1",
|
||||
"User-Agent": "HermesAgent/1.0",
|
||||
"Copilot-Integration-Id": "vscode-chat",
|
||||
"Openai-Intent": "conversation-edits",
|
||||
"x-initiator": "agent" if is_agent_turn else "user",
|
||||
}
|
||||
|
||||
@@ -93,21 +93,6 @@ def cron_list(show_all: bool = False):
|
||||
script = job.get("script")
|
||||
if script:
|
||||
print(f" Script: {script}")
|
||||
|
||||
# Execution history
|
||||
last_status = job.get("last_status")
|
||||
if last_status:
|
||||
last_run = job.get("last_run_at", "?")
|
||||
if last_status == "ok":
|
||||
status_display = color("ok", Colors.GREEN)
|
||||
else:
|
||||
status_display = color(f"{last_status}: {job.get('last_error', '?')}", Colors.RED)
|
||||
print(f" Last run: {last_run} {status_display}")
|
||||
|
||||
delivery_err = job.get("last_delivery_error")
|
||||
if delivery_err:
|
||||
print(f" {color('⚠ Delivery failed:', Colors.YELLOW)} {delivery_err}")
|
||||
|
||||
print()
|
||||
|
||||
from hermes_cli.gateway import find_gateway_pids
|
||||
|
||||
@@ -10,28 +10,6 @@ from typing import Callable, List, Optional, Set
|
||||
from hermes_cli.colors import Colors, color
|
||||
|
||||
|
||||
def flush_stdin() -> None:
|
||||
"""Flush any stray bytes from the stdin input buffer.
|
||||
|
||||
Must be called after ``curses.wrapper()`` (or any terminal-mode library
|
||||
like simple_term_menu) returns, **before** the next ``input()`` /
|
||||
``getpass.getpass()`` call. ``curses.endwin()`` restores the terminal
|
||||
but does NOT drain the OS input buffer — leftover escape-sequence bytes
|
||||
(from arrow keys, terminal mode-switch responses, or rapid keypresses)
|
||||
remain buffered and silently get consumed by the next ``input()`` call,
|
||||
corrupting user data (e.g. writing ``^[^[`` into .env files).
|
||||
|
||||
On non-TTY stdin (piped, redirected) or Windows, this is a no-op.
|
||||
"""
|
||||
try:
|
||||
if not sys.stdin.isatty():
|
||||
return
|
||||
import termios
|
||||
termios.tcflush(sys.stdin, termios.TCIFLUSH)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def curses_checklist(
|
||||
title: str,
|
||||
items: List[str],
|
||||
@@ -153,140 +131,12 @@ def curses_checklist(
|
||||
return
|
||||
|
||||
curses.wrapper(_draw)
|
||||
flush_stdin()
|
||||
return result_holder[0] if result_holder[0] is not None else cancel_returns
|
||||
|
||||
except Exception:
|
||||
return _numbered_fallback(title, items, selected, cancel_returns, status_fn)
|
||||
|
||||
|
||||
def curses_radiolist(
|
||||
title: str,
|
||||
items: List[str],
|
||||
selected: int = 0,
|
||||
*,
|
||||
cancel_returns: int | None = None,
|
||||
) -> int:
|
||||
"""Curses single-select radio list. Returns the selected index.
|
||||
|
||||
Args:
|
||||
title: Header line displayed above the list.
|
||||
items: Display labels for each row.
|
||||
selected: Index that starts selected (pre-selected).
|
||||
cancel_returns: Returned on ESC/q. Defaults to the original *selected*.
|
||||
"""
|
||||
if cancel_returns is None:
|
||||
cancel_returns = selected
|
||||
|
||||
if not sys.stdin.isatty():
|
||||
return cancel_returns
|
||||
|
||||
try:
|
||||
import curses
|
||||
result_holder: list = [None]
|
||||
|
||||
def _draw(stdscr):
|
||||
curses.curs_set(0)
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
cursor = selected
|
||||
scroll_offset = 0
|
||||
|
||||
while True:
|
||||
stdscr.clear()
|
||||
max_y, max_x = stdscr.getmaxyx()
|
||||
|
||||
# Header
|
||||
try:
|
||||
hattr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
hattr |= curses.color_pair(2)
|
||||
stdscr.addnstr(0, 0, title, max_x - 1, hattr)
|
||||
stdscr.addnstr(
|
||||
1, 0,
|
||||
" \u2191\u2193 navigate ENTER/SPACE select ESC cancel",
|
||||
max_x - 1, curses.A_DIM,
|
||||
)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
# Scrollable item list
|
||||
visible_rows = max_y - 4
|
||||
if cursor < scroll_offset:
|
||||
scroll_offset = cursor
|
||||
elif cursor >= scroll_offset + visible_rows:
|
||||
scroll_offset = cursor - visible_rows + 1
|
||||
|
||||
for draw_i, i in enumerate(
|
||||
range(scroll_offset, min(len(items), scroll_offset + visible_rows))
|
||||
):
|
||||
y = draw_i + 3
|
||||
if y >= max_y - 1:
|
||||
break
|
||||
radio = "\u25cf" if i == selected else "\u25cb"
|
||||
arrow = "\u2192" if i == cursor else " "
|
||||
line = f" {arrow} ({radio}) {items[i]}"
|
||||
attr = curses.A_NORMAL
|
||||
if i == cursor:
|
||||
attr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
attr |= curses.color_pair(1)
|
||||
try:
|
||||
stdscr.addnstr(y, 0, line, max_x - 1, attr)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
stdscr.refresh()
|
||||
key = stdscr.getch()
|
||||
|
||||
if key in (curses.KEY_UP, ord("k")):
|
||||
cursor = (cursor - 1) % len(items)
|
||||
elif key in (curses.KEY_DOWN, ord("j")):
|
||||
cursor = (cursor + 1) % len(items)
|
||||
elif key in (ord(" "), curses.KEY_ENTER, 10, 13):
|
||||
result_holder[0] = cursor
|
||||
return
|
||||
elif key in (27, ord("q")):
|
||||
result_holder[0] = cancel_returns
|
||||
return
|
||||
|
||||
curses.wrapper(_draw)
|
||||
flush_stdin()
|
||||
return result_holder[0] if result_holder[0] is not None else cancel_returns
|
||||
|
||||
except Exception:
|
||||
return _radio_numbered_fallback(title, items, selected, cancel_returns)
|
||||
|
||||
|
||||
def _radio_numbered_fallback(
|
||||
title: str,
|
||||
items: List[str],
|
||||
selected: int,
|
||||
cancel_returns: int,
|
||||
) -> int:
|
||||
"""Text-based numbered fallback for radio selection."""
|
||||
print(color(f"\n {title}", Colors.YELLOW))
|
||||
print(color(" Select by number, Enter to confirm.\n", Colors.DIM))
|
||||
|
||||
for i, label in enumerate(items):
|
||||
marker = color("(\u25cf)", Colors.GREEN) if i == selected else "(\u25cb)"
|
||||
print(f" {marker} {i + 1:>2}. {label}")
|
||||
print()
|
||||
try:
|
||||
val = input(color(f" Choice [default {selected + 1}]: ", Colors.DIM)).strip()
|
||||
if not val:
|
||||
return selected
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(items):
|
||||
return idx
|
||||
return selected
|
||||
except (ValueError, KeyboardInterrupt, EOFError):
|
||||
return cancel_returns
|
||||
|
||||
|
||||
def _numbered_fallback(
|
||||
title: str,
|
||||
items: List[str],
|
||||
|
||||
+69
-132
@@ -54,32 +54,6 @@ _PROVIDER_ENV_HINTS = (
|
||||
)
|
||||
|
||||
|
||||
from hermes_constants import is_termux as _is_termux
|
||||
|
||||
|
||||
def _python_install_cmd() -> str:
|
||||
return "python -m pip install" if _is_termux() else "uv pip install"
|
||||
|
||||
|
||||
def _system_package_install_cmd(pkg: str) -> str:
|
||||
if _is_termux():
|
||||
return f"pkg install {pkg}"
|
||||
if sys.platform == "darwin":
|
||||
return f"brew install {pkg}"
|
||||
return f"sudo apt install {pkg}"
|
||||
|
||||
|
||||
def _termux_browser_setup_steps(node_installed: bool) -> list[str]:
|
||||
steps: list[str] = []
|
||||
step = 1
|
||||
if not node_installed:
|
||||
steps.append(f"{step}) pkg install nodejs")
|
||||
step += 1
|
||||
steps.append(f"{step}) npm install -g agent-browser")
|
||||
steps.append(f"{step + 1}) agent-browser install")
|
||||
return steps
|
||||
|
||||
|
||||
def _has_provider_env_config(content: str) -> bool:
|
||||
"""Return True when ~/.hermes/.env contains provider auth/base URL settings."""
|
||||
return any(key in content for key in _PROVIDER_ENV_HINTS)
|
||||
@@ -226,7 +200,7 @@ def run_doctor(args):
|
||||
check_ok(name)
|
||||
except ImportError:
|
||||
check_fail(name, "(missing)")
|
||||
issues.append(f"Install {name}: {_python_install_cmd()} {module}")
|
||||
issues.append(f"Install {name}: uv pip install {module}")
|
||||
|
||||
for module, name in optional_packages:
|
||||
try:
|
||||
@@ -529,7 +503,7 @@ def run_doctor(args):
|
||||
check_ok("ripgrep (rg)", "(faster file search)")
|
||||
else:
|
||||
check_warn("ripgrep (rg) not found", "(file search uses grep fallback)")
|
||||
check_info(f"Install for faster search: {_system_package_install_cmd('ripgrep')}")
|
||||
check_info("Install for faster search: sudo apt install ripgrep")
|
||||
|
||||
# Docker (optional)
|
||||
terminal_env = os.getenv("TERMINAL_ENV", "local")
|
||||
@@ -552,10 +526,7 @@ def run_doctor(args):
|
||||
if shutil.which("docker"):
|
||||
check_ok("docker", "(optional)")
|
||||
else:
|
||||
if _is_termux():
|
||||
check_info("Docker backend is not available inside Termux (expected on Android)")
|
||||
else:
|
||||
check_warn("docker not found", "(optional)")
|
||||
check_warn("docker not found", "(optional)")
|
||||
|
||||
# SSH (if using ssh backend)
|
||||
if terminal_env == "ssh":
|
||||
@@ -603,23 +574,9 @@ def run_doctor(args):
|
||||
if agent_browser_path.exists():
|
||||
check_ok("agent-browser (Node.js)", "(browser automation)")
|
||||
else:
|
||||
if _is_termux():
|
||||
check_info("agent-browser is not installed (expected in the tested Termux path)")
|
||||
check_info("Install it manually later with: npm install -g agent-browser && agent-browser install")
|
||||
check_info("Termux browser setup:")
|
||||
for step in _termux_browser_setup_steps(node_installed=True):
|
||||
check_info(step)
|
||||
else:
|
||||
check_warn("agent-browser not installed", "(run: npm install)")
|
||||
check_warn("agent-browser not installed", "(run: npm install)")
|
||||
else:
|
||||
if _is_termux():
|
||||
check_info("Node.js not found (browser tools are optional in the tested Termux path)")
|
||||
check_info("Install Node.js on Termux with: pkg install nodejs")
|
||||
check_info("Termux browser setup:")
|
||||
for step in _termux_browser_setup_steps(node_installed=False):
|
||||
check_info(step)
|
||||
else:
|
||||
check_warn("Node.js not found", "(optional, needed for browser tools)")
|
||||
check_warn("Node.js not found", "(optional, needed for browser tools)")
|
||||
|
||||
# npm audit for all Node.js packages
|
||||
if shutil.which("npm"):
|
||||
@@ -722,9 +679,9 @@ def run_doctor(args):
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
|
||||
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
|
||||
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
|
||||
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True),
|
||||
# MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811
|
||||
("MiniMax", ("MINIMAX_API_KEY",), None, "MINIMAX_BASE_URL", False),
|
||||
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), None, "MINIMAX_CN_BASE_URL", False),
|
||||
("AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
|
||||
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
|
||||
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
|
||||
@@ -749,15 +706,10 @@ def run_doctor(args):
|
||||
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
|
||||
if not _base and _key.startswith("sk-kimi-"):
|
||||
_base = "https://api.kimi.com/coding/v1"
|
||||
# Anthropic-compat endpoints (/anthropic) don't support /models.
|
||||
# Rewrite to the OpenAI-compat /v1 surface for health checks.
|
||||
if _base and _base.rstrip("/").endswith("/anthropic"):
|
||||
from agent.auxiliary_client import _to_openai_base_url
|
||||
_base = _to_openai_base_url(_base)
|
||||
_url = (_base.rstrip("/") + "/models") if _base else _default_url
|
||||
_headers = {"Authorization": f"Bearer {_key}"}
|
||||
if "api.kimi.com" in _url.lower():
|
||||
_headers["User-Agent"] = "KimiCLI/1.30.0"
|
||||
_headers["User-Agent"] = "KimiCLI/1.0"
|
||||
_resp = httpx.get(
|
||||
_url,
|
||||
headers=_headers,
|
||||
@@ -787,9 +739,8 @@ def run_doctor(args):
|
||||
__import__("tinker_atropos")
|
||||
check_ok("tinker-atropos", "(RL training backend)")
|
||||
except ImportError:
|
||||
install_cmd = f"{_python_install_cmd()} -e ./tinker-atropos"
|
||||
check_warn("tinker-atropos found but not installed", f"(run: {install_cmd})")
|
||||
issues.append(f"Install tinker-atropos: {install_cmd}")
|
||||
check_warn("tinker-atropos found but not installed", "(run: uv pip install -e ./tinker-atropos)")
|
||||
issues.append("Install tinker-atropos: uv pip install -e ./tinker-atropos")
|
||||
else:
|
||||
check_warn("tinker-atropos requires Python 3.11+", f"(current: {py_version.major}.{py_version.minor})")
|
||||
else:
|
||||
@@ -861,83 +812,69 @@ def run_doctor(args):
|
||||
check_warn("No GITHUB_TOKEN", f"(60 req/hr rate limit — set in {_DHH}/.env for better rates)")
|
||||
|
||||
# =========================================================================
|
||||
# Memory Provider (only check the active provider, if any)
|
||||
# Honcho memory
|
||||
# =========================================================================
|
||||
print()
|
||||
print(color("◆ Memory Provider", Colors.CYAN, Colors.BOLD))
|
||||
print(color("◆ Honcho Memory", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
_active_memory_provider = ""
|
||||
try:
|
||||
import yaml as _yaml
|
||||
_mem_cfg_path = HERMES_HOME / "config.yaml"
|
||||
if _mem_cfg_path.exists():
|
||||
with open(_mem_cfg_path) as _f:
|
||||
_raw_cfg = _yaml.safe_load(_f) or {}
|
||||
_active_memory_provider = (_raw_cfg.get("memory") or {}).get("provider", "")
|
||||
except Exception:
|
||||
pass
|
||||
from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
|
||||
hcfg = HonchoClientConfig.from_global_config()
|
||||
_honcho_cfg_path = resolve_config_path()
|
||||
|
||||
if not _active_memory_provider:
|
||||
check_ok("Built-in memory active", "(no external provider configured — this is fine)")
|
||||
elif _active_memory_provider == "honcho":
|
||||
try:
|
||||
from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
|
||||
hcfg = HonchoClientConfig.from_global_config()
|
||||
_honcho_cfg_path = resolve_config_path()
|
||||
if not _honcho_cfg_path.exists():
|
||||
check_warn("Honcho config not found", "run: hermes memory setup")
|
||||
elif not hcfg.enabled:
|
||||
check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
|
||||
elif not (hcfg.api_key or hcfg.base_url):
|
||||
check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
|
||||
issues.append("No Honcho API key — run 'hermes memory setup'")
|
||||
else:
|
||||
from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
|
||||
reset_honcho_client()
|
||||
try:
|
||||
get_honcho_client(hcfg)
|
||||
check_ok(
|
||||
"Honcho connected",
|
||||
f"workspace={hcfg.workspace_id} mode={hcfg.recall_mode} freq={hcfg.write_frequency}",
|
||||
)
|
||||
except Exception as _e:
|
||||
check_fail("Honcho connection failed", str(_e))
|
||||
issues.append(f"Honcho unreachable: {_e}")
|
||||
except ImportError:
|
||||
check_warn("honcho-ai not installed", "pip install honcho-ai")
|
||||
except Exception as _e:
|
||||
check_warn("Honcho check failed", str(_e))
|
||||
|
||||
if not _honcho_cfg_path.exists():
|
||||
check_warn("Honcho config not found", "run: hermes memory setup")
|
||||
elif not hcfg.enabled:
|
||||
check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
|
||||
elif not (hcfg.api_key or hcfg.base_url):
|
||||
check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
|
||||
issues.append("No Honcho API key — run 'hermes memory setup'")
|
||||
else:
|
||||
from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
|
||||
reset_honcho_client()
|
||||
# =========================================================================
|
||||
# Mem0 memory
|
||||
# =========================================================================
|
||||
print()
|
||||
print(color("◆ Mem0 Memory", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
try:
|
||||
from plugins.memory.mem0 import _load_config as _load_mem0_config
|
||||
mem0_cfg = _load_mem0_config()
|
||||
mem0_key = mem0_cfg.get("api_key", "")
|
||||
if mem0_key:
|
||||
check_ok("Mem0 API key configured")
|
||||
check_info(f"user_id={mem0_cfg.get('user_id', '?')} agent_id={mem0_cfg.get('agent_id', '?')}")
|
||||
# Check if mem0.json exists but is missing api_key (the bug we fixed)
|
||||
mem0_json = HERMES_HOME / "mem0.json"
|
||||
if mem0_json.exists():
|
||||
try:
|
||||
get_honcho_client(hcfg)
|
||||
check_ok(
|
||||
"Honcho connected",
|
||||
f"workspace={hcfg.workspace_id} mode={hcfg.recall_mode} freq={hcfg.write_frequency}",
|
||||
)
|
||||
except Exception as _e:
|
||||
check_fail("Honcho connection failed", str(_e))
|
||||
issues.append(f"Honcho unreachable: {_e}")
|
||||
except ImportError:
|
||||
check_fail("honcho-ai not installed", "pip install honcho-ai")
|
||||
issues.append("Honcho is set as memory provider but honcho-ai is not installed")
|
||||
except Exception as _e:
|
||||
check_warn("Honcho check failed", str(_e))
|
||||
elif _active_memory_provider == "mem0":
|
||||
try:
|
||||
from plugins.memory.mem0 import _load_config as _load_mem0_config
|
||||
mem0_cfg = _load_mem0_config()
|
||||
mem0_key = mem0_cfg.get("api_key", "")
|
||||
if mem0_key:
|
||||
check_ok("Mem0 API key configured")
|
||||
check_info(f"user_id={mem0_cfg.get('user_id', '?')} agent_id={mem0_cfg.get('agent_id', '?')}")
|
||||
else:
|
||||
check_fail("Mem0 API key not set", "(set MEM0_API_KEY in .env or run hermes memory setup)")
|
||||
issues.append("Mem0 is set as memory provider but API key is missing")
|
||||
except ImportError:
|
||||
check_fail("Mem0 plugin not loadable", "pip install mem0ai")
|
||||
issues.append("Mem0 is set as memory provider but mem0ai is not installed")
|
||||
except Exception as _e:
|
||||
check_warn("Mem0 check failed", str(_e))
|
||||
else:
|
||||
# Generic check for other memory providers (openviking, hindsight, etc.)
|
||||
try:
|
||||
from plugins.memory import load_memory_provider
|
||||
_provider = load_memory_provider(_active_memory_provider)
|
||||
if _provider and _provider.is_available():
|
||||
check_ok(f"{_active_memory_provider} provider active")
|
||||
elif _provider:
|
||||
check_warn(f"{_active_memory_provider} configured but not available", "run: hermes memory status")
|
||||
else:
|
||||
check_warn(f"{_active_memory_provider} plugin not found", "run: hermes memory setup")
|
||||
except Exception as _e:
|
||||
check_warn(f"{_active_memory_provider} check failed", str(_e))
|
||||
import json as _json
|
||||
file_cfg = _json.loads(mem0_json.read_text())
|
||||
if not file_cfg.get("api_key") and mem0_key:
|
||||
check_info("api_key from .env (not in mem0.json) — this is fine")
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
check_warn("Mem0 not configured", "(set MEM0_API_KEY in .env or run hermes memory setup)")
|
||||
except ImportError:
|
||||
check_warn("Mem0 plugin not loadable", "(optional)")
|
||||
except Exception as _e:
|
||||
check_warn("Mem0 check failed", str(_e))
|
||||
|
||||
# =========================================================================
|
||||
# Profiles
|
||||
@@ -983,8 +920,8 @@ def run_doctor(args):
|
||||
pass
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as _e:
|
||||
logger.debug("Profile health check failed: %s", _e)
|
||||
|
||||
# =========================================================================
|
||||
# Summary
|
||||
|
||||
@@ -1,333 +0,0 @@
|
||||
"""
|
||||
Dump command for hermes CLI.
|
||||
|
||||
Outputs a compact, plain-text summary of the user's Hermes setup
|
||||
that can be copy-pasted into Discord/GitHub/Telegram for support context.
|
||||
No ANSI colors, no checkmarks — just data.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
|
||||
from hermes_constants import display_hermes_home
|
||||
|
||||
|
||||
def _get_git_commit(project_root: Path) -> str:
|
||||
"""Return short git commit hash, or '(unknown)'."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "--short=8", "HEAD"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
cwd=str(project_root),
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return "(unknown)"
|
||||
|
||||
|
||||
def _redact(value: str) -> str:
|
||||
"""Redact all but first 4 and last 4 chars."""
|
||||
if not value:
|
||||
return ""
|
||||
if len(value) < 12:
|
||||
return "***"
|
||||
return value[:4] + "..." + value[-4:]
|
||||
|
||||
|
||||
def _gateway_status() -> str:
|
||||
"""Return a short gateway status string."""
|
||||
if sys.platform.startswith("linux"):
|
||||
try:
|
||||
from hermes_cli.gateway import get_service_name
|
||||
svc = get_service_name()
|
||||
except Exception:
|
||||
svc = "hermes-gateway"
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["systemctl", "--user", "is-active", svc],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return "running (systemd)" if r.stdout.strip() == "active" else "stopped"
|
||||
except Exception:
|
||||
return "unknown"
|
||||
elif sys.platform == "darwin":
|
||||
try:
|
||||
from hermes_cli.gateway import get_launchd_label
|
||||
r = subprocess.run(
|
||||
["launchctl", "list", get_launchd_label()],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return "loaded (launchd)" if r.returncode == 0 else "not loaded"
|
||||
except Exception:
|
||||
return "unknown"
|
||||
return "N/A"
|
||||
|
||||
|
||||
def _count_skills(hermes_home: Path) -> int:
|
||||
"""Count installed skills."""
|
||||
skills_dir = hermes_home / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return 0
|
||||
count = 0
|
||||
for item in skills_dir.rglob("SKILL.md"):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def _count_mcp_servers(config: dict) -> int:
|
||||
"""Count configured MCP servers."""
|
||||
mcp = config.get("mcp", {})
|
||||
servers = mcp.get("servers", {})
|
||||
return len(servers)
|
||||
|
||||
|
||||
def _cron_summary(hermes_home: Path) -> str:
|
||||
"""Return cron jobs summary."""
|
||||
jobs_file = hermes_home / "cron" / "jobs.json"
|
||||
if not jobs_file.exists():
|
||||
return "0"
|
||||
try:
|
||||
with open(jobs_file, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
jobs = data.get("jobs", [])
|
||||
active = sum(1 for j in jobs if j.get("enabled", True))
|
||||
return f"{active} active / {len(jobs)} total"
|
||||
except Exception:
|
||||
return "(error reading)"
|
||||
|
||||
|
||||
def _configured_platforms() -> list[str]:
|
||||
"""Return list of configured messaging platform names."""
|
||||
checks = {
|
||||
"telegram": "TELEGRAM_BOT_TOKEN",
|
||||
"discord": "DISCORD_BOT_TOKEN",
|
||||
"slack": "SLACK_BOT_TOKEN",
|
||||
"whatsapp": "WHATSAPP_ENABLED",
|
||||
"signal": "SIGNAL_HTTP_URL",
|
||||
"email": "EMAIL_ADDRESS",
|
||||
"sms": "TWILIO_ACCOUNT_SID",
|
||||
"matrix": "MATRIX_HOMESERVER_URL",
|
||||
"mattermost": "MATTERMOST_URL",
|
||||
"homeassistant": "HASS_TOKEN",
|
||||
"dingtalk": "DINGTALK_CLIENT_ID",
|
||||
"feishu": "FEISHU_APP_ID",
|
||||
"wecom": "WECOM_BOT_ID",
|
||||
"weixin": "WEIXIN_ACCOUNT_ID",
|
||||
}
|
||||
return [name for name, env in checks.items() if os.getenv(env)]
|
||||
|
||||
|
||||
def _memory_provider(config: dict) -> str:
|
||||
"""Return the active memory provider name."""
|
||||
mem = config.get("memory", {})
|
||||
provider = mem.get("provider", "")
|
||||
return provider if provider else "built-in"
|
||||
|
||||
|
||||
def _get_model_and_provider(config: dict) -> tuple[str, str]:
|
||||
"""Extract model and provider from config."""
|
||||
model_cfg = config.get("model", "")
|
||||
if isinstance(model_cfg, dict):
|
||||
model = model_cfg.get("default") or model_cfg.get("model") or model_cfg.get("name") or "(not set)"
|
||||
provider = model_cfg.get("provider") or "(auto)"
|
||||
elif isinstance(model_cfg, str):
|
||||
model = model_cfg or "(not set)"
|
||||
provider = "(auto)"
|
||||
else:
|
||||
model = "(not set)"
|
||||
provider = "(auto)"
|
||||
return model, provider
|
||||
|
||||
|
||||
def _config_overrides(config: dict) -> dict[str, str]:
|
||||
"""Find non-default config values worth reporting.
|
||||
|
||||
Returns a flat dict of dotpath -> value for interesting overrides.
|
||||
"""
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
overrides = {}
|
||||
|
||||
# Sections with interesting user-facing overrides
|
||||
interesting_paths = [
|
||||
("agent", "max_turns"),
|
||||
("agent", "gateway_timeout"),
|
||||
("agent", "tool_use_enforcement"),
|
||||
("terminal", "backend"),
|
||||
("terminal", "docker_image"),
|
||||
("terminal", "persistent_shell"),
|
||||
("browser", "allow_private_urls"),
|
||||
("compression", "enabled"),
|
||||
("compression", "threshold"),
|
||||
("display", "streaming"),
|
||||
("display", "skin"),
|
||||
("display", "show_reasoning"),
|
||||
("smart_model_routing", "enabled"),
|
||||
("privacy", "redact_pii"),
|
||||
("tts", "provider"),
|
||||
]
|
||||
|
||||
for section, key in interesting_paths:
|
||||
default_section = DEFAULT_CONFIG.get(section, {})
|
||||
user_section = config.get(section, {})
|
||||
if not isinstance(default_section, dict) or not isinstance(user_section, dict):
|
||||
continue
|
||||
default_val = default_section.get(key)
|
||||
user_val = user_section.get(key)
|
||||
if user_val is not None and user_val != default_val:
|
||||
overrides[f"{section}.{key}"] = str(user_val)
|
||||
|
||||
# Toolsets (if different from default)
|
||||
default_toolsets = DEFAULT_CONFIG.get("toolsets", [])
|
||||
user_toolsets = config.get("toolsets", [])
|
||||
if user_toolsets != default_toolsets:
|
||||
overrides["toolsets"] = str(user_toolsets)
|
||||
|
||||
# Fallback providers
|
||||
fallbacks = config.get("fallback_providers", [])
|
||||
if fallbacks:
|
||||
overrides["fallback_providers"] = str(fallbacks)
|
||||
|
||||
return overrides
|
||||
|
||||
|
||||
def run_dump(args):
|
||||
"""Output a compact, copy-pasteable setup summary."""
|
||||
show_keys = getattr(args, "show_keys", False)
|
||||
|
||||
# Load env from .env file so key checks work
|
||||
from dotenv import load_dotenv
|
||||
env_path = get_env_path()
|
||||
if env_path.exists():
|
||||
try:
|
||||
load_dotenv(env_path, encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(env_path, encoding="latin-1")
|
||||
# Also try project .env as dev fallback
|
||||
load_dotenv(get_project_root() / ".env", override=False, encoding="utf-8")
|
||||
|
||||
project_root = get_project_root()
|
||||
hermes_home = get_hermes_home()
|
||||
|
||||
try:
|
||||
from hermes_cli import __version__, __release_date__
|
||||
except ImportError:
|
||||
__version__ = "(unknown)"
|
||||
__release_date__ = ""
|
||||
|
||||
commit = _get_git_commit(project_root)
|
||||
|
||||
try:
|
||||
config = load_config()
|
||||
except Exception:
|
||||
config = {}
|
||||
|
||||
model, provider = _get_model_and_provider(config)
|
||||
|
||||
# Profile
|
||||
try:
|
||||
from hermes_cli.profiles import get_active_profile_name
|
||||
profile = get_active_profile_name() or "(default)"
|
||||
except Exception:
|
||||
profile = "(default)"
|
||||
|
||||
# Terminal backend
|
||||
terminal_cfg = config.get("terminal", {})
|
||||
backend = terminal_cfg.get("backend", "local")
|
||||
|
||||
# OpenAI SDK version
|
||||
try:
|
||||
import openai
|
||||
openai_ver = openai.__version__
|
||||
except ImportError:
|
||||
openai_ver = "not installed"
|
||||
|
||||
# OS info
|
||||
os_info = f"{platform.system()} {platform.release()} {platform.machine()}"
|
||||
|
||||
lines = []
|
||||
lines.append("--- hermes dump ---")
|
||||
ver_str = f"{__version__}"
|
||||
if __release_date__:
|
||||
ver_str += f" ({__release_date__})"
|
||||
ver_str += f" [{commit}]"
|
||||
lines.append(f"version: {ver_str}")
|
||||
lines.append(f"os: {os_info}")
|
||||
lines.append(f"python: {sys.version.split()[0]}")
|
||||
lines.append(f"openai_sdk: {openai_ver}")
|
||||
lines.append(f"profile: {profile}")
|
||||
lines.append(f"hermes_home: {display_hermes_home()}")
|
||||
lines.append(f"model: {model}")
|
||||
lines.append(f"provider: {provider}")
|
||||
lines.append(f"terminal: {backend}")
|
||||
|
||||
# API keys
|
||||
lines.append("")
|
||||
lines.append("api_keys:")
|
||||
api_keys = [
|
||||
("OPENROUTER_API_KEY", "openrouter"),
|
||||
("OPENAI_API_KEY", "openai"),
|
||||
("ANTHROPIC_API_KEY", "anthropic"),
|
||||
("ANTHROPIC_TOKEN", "anthropic_token"),
|
||||
("NOUS_API_KEY", "nous"),
|
||||
("GLM_API_KEY", "glm/zai"),
|
||||
("ZAI_API_KEY", "zai"),
|
||||
("KIMI_API_KEY", "kimi"),
|
||||
("MINIMAX_API_KEY", "minimax"),
|
||||
("DEEPSEEK_API_KEY", "deepseek"),
|
||||
("DASHSCOPE_API_KEY", "dashscope"),
|
||||
("HF_TOKEN", "huggingface"),
|
||||
("AI_GATEWAY_API_KEY", "ai_gateway"),
|
||||
("OPENCODE_ZEN_API_KEY", "opencode_zen"),
|
||||
("OPENCODE_GO_API_KEY", "opencode_go"),
|
||||
("KILOCODE_API_KEY", "kilocode"),
|
||||
("FIRECRAWL_API_KEY", "firecrawl"),
|
||||
("TAVILY_API_KEY", "tavily"),
|
||||
("BROWSERBASE_API_KEY", "browserbase"),
|
||||
("FAL_KEY", "fal"),
|
||||
("ELEVENLABS_API_KEY", "elevenlabs"),
|
||||
("GITHUB_TOKEN", "github"),
|
||||
]
|
||||
|
||||
for env_var, label in api_keys:
|
||||
val = os.getenv(env_var, "")
|
||||
if show_keys and val:
|
||||
display = _redact(val)
|
||||
else:
|
||||
display = "set" if val else "not set"
|
||||
lines.append(f" {label:<20} {display}")
|
||||
|
||||
# Features summary
|
||||
lines.append("")
|
||||
lines.append("features:")
|
||||
|
||||
toolsets = config.get("toolsets", ["hermes-cli"])
|
||||
lines.append(f" toolsets: {', '.join(toolsets) if toolsets else '(default)'}")
|
||||
lines.append(f" mcp_servers: {_count_mcp_servers(config)}")
|
||||
lines.append(f" memory_provider: {_memory_provider(config)}")
|
||||
lines.append(f" gateway: {_gateway_status()}")
|
||||
|
||||
platforms = _configured_platforms()
|
||||
lines.append(f" platforms: {', '.join(platforms) if platforms else 'none'}")
|
||||
lines.append(f" cron_jobs: {_cron_summary(hermes_home)}")
|
||||
lines.append(f" skills: {_count_skills(hermes_home)}")
|
||||
|
||||
# Config overrides (non-default values)
|
||||
overrides = _config_overrides(config)
|
||||
if overrides:
|
||||
lines.append("")
|
||||
lines.append("config_overrides:")
|
||||
for key, val in overrides.items():
|
||||
lines.append(f" {key}: {val}")
|
||||
|
||||
lines.append("--- end dump ---")
|
||||
|
||||
output = "\n".join(lines)
|
||||
print(output)
|
||||
+86
-528
File diff suppressed because it is too large
Load Diff
@@ -15,6 +15,7 @@ Usage examples::
|
||||
hermes logs --since 30m -f # follow, starting 30 min ago
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
+114
-281
@@ -97,11 +97,10 @@ def _apply_profile_override() -> None:
|
||||
consume = 1
|
||||
break
|
||||
|
||||
# 2. If no flag, check active_profile in the hermes root
|
||||
# 2. If no flag, check ~/.hermes/active_profile
|
||||
if profile_name is None:
|
||||
try:
|
||||
from hermes_constants import get_default_hermes_root
|
||||
active_path = get_default_hermes_root() / "active_profile"
|
||||
active_path = Path.home() / ".hermes" / "active_profile"
|
||||
if active_path.exists():
|
||||
name = active_path.read_text().strip()
|
||||
if name and name != "default":
|
||||
@@ -647,7 +646,6 @@ def cmd_chat(args):
|
||||
"verbose": args.verbose,
|
||||
"quiet": getattr(args, "quiet", False),
|
||||
"query": args.query,
|
||||
"image": getattr(args, "image", None),
|
||||
"resume": getattr(args, "resume", None),
|
||||
"worktree": getattr(args, "worktree", False),
|
||||
"checkpoints": getattr(args, "checkpoints", False),
|
||||
@@ -859,6 +857,7 @@ def cmd_whatsapp(args):
|
||||
|
||||
def cmd_setup(args):
|
||||
"""Interactive setup wizard."""
|
||||
_require_tty("setup")
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
run_setup_wizard(args)
|
||||
|
||||
@@ -919,7 +918,6 @@ def select_provider_and_model(args=None):
|
||||
"openrouter": "OpenRouter",
|
||||
"nous": "Nous Portal",
|
||||
"openai-codex": "OpenAI Codex",
|
||||
"qwen-oauth": "Qwen OAuth",
|
||||
"copilot-acp": "GitHub Copilot ACP",
|
||||
"copilot": "GitHub Copilot",
|
||||
"anthropic": "Anthropic",
|
||||
@@ -949,7 +947,6 @@ def select_provider_and_model(args=None):
|
||||
("openrouter", "OpenRouter (100+ models, pay-per-use)"),
|
||||
("anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
("openai-codex", "OpenAI Codex"),
|
||||
("qwen-oauth", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
("copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
("huggingface", "Hugging Face Inference Providers (20+ open models)"),
|
||||
]
|
||||
@@ -968,11 +965,10 @@ def select_provider_and_model(args=None):
|
||||
("alibaba", "Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||
]
|
||||
|
||||
def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]:
|
||||
custom_providers_cfg = cfg.get("custom_providers") or []
|
||||
custom_provider_map = {}
|
||||
if not isinstance(custom_providers_cfg, list):
|
||||
return custom_provider_map
|
||||
# Add user-defined custom providers from config.yaml
|
||||
custom_providers_cfg = config.get("custom_providers") or []
|
||||
_custom_provider_map = {} # key → {name, base_url, api_key}
|
||||
if isinstance(custom_providers_cfg, list):
|
||||
for entry in custom_providers_cfg:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
@@ -981,23 +977,16 @@ def select_provider_and_model(args=None):
|
||||
if not name or not base_url:
|
||||
continue
|
||||
key = "custom:" + name.lower().replace(" ", "-")
|
||||
custom_provider_map[key] = {
|
||||
short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
|
||||
saved_model = entry.get("model", "")
|
||||
model_hint = f" — {saved_model}" if saved_model else ""
|
||||
top_providers.append((key, f"{name} ({short_url}){model_hint}"))
|
||||
_custom_provider_map[key] = {
|
||||
"name": name,
|
||||
"base_url": base_url,
|
||||
"api_key": entry.get("api_key", ""),
|
||||
"model": entry.get("model", ""),
|
||||
"model": saved_model,
|
||||
}
|
||||
return custom_provider_map
|
||||
|
||||
# Add user-defined custom providers from config.yaml
|
||||
_custom_provider_map = _named_custom_provider_map(config) # key → {name, base_url, api_key}
|
||||
for key, provider_info in _custom_provider_map.items():
|
||||
name = provider_info["name"]
|
||||
base_url = provider_info["base_url"]
|
||||
short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
|
||||
saved_model = provider_info.get("model", "")
|
||||
model_hint = f" — {saved_model}" if saved_model else ""
|
||||
top_providers.append((key, f"{name} ({short_url}){model_hint}"))
|
||||
|
||||
top_keys = {k for k, _ in top_providers}
|
||||
extended_keys = {k for k, _ in extended_providers}
|
||||
@@ -1054,23 +1043,14 @@ def select_provider_and_model(args=None):
|
||||
_model_flow_nous(config, current_model, args=args)
|
||||
elif selected_provider == "openai-codex":
|
||||
_model_flow_openai_codex(config, current_model)
|
||||
elif selected_provider == "qwen-oauth":
|
||||
_model_flow_qwen_oauth(config, current_model)
|
||||
elif selected_provider == "copilot-acp":
|
||||
_model_flow_copilot_acp(config, current_model)
|
||||
elif selected_provider == "copilot":
|
||||
_model_flow_copilot(config, current_model)
|
||||
elif selected_provider == "custom":
|
||||
_model_flow_custom(config)
|
||||
elif selected_provider.startswith("custom:"):
|
||||
provider_info = _named_custom_provider_map(load_config()).get(selected_provider)
|
||||
if provider_info is None:
|
||||
print(
|
||||
"Warning: the selected saved custom provider is no longer available. "
|
||||
"It may have been removed from config.yaml. No change."
|
||||
)
|
||||
return
|
||||
_model_flow_named_custom(config, provider_info)
|
||||
elif selected_provider.startswith("custom:") and selected_provider in _custom_provider_map:
|
||||
_model_flow_named_custom(config, _custom_provider_map[selected_provider])
|
||||
elif selected_provider == "remove-custom":
|
||||
_remove_custom_provider(config)
|
||||
elif selected_provider == "anthropic":
|
||||
@@ -1080,42 +1060,6 @@ def select_provider_and_model(args=None):
|
||||
elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"):
|
||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||
|
||||
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
|
||||
# When the user switches to a named provider (anything except "custom"),
|
||||
# a leftover OPENAI_BASE_URL in ~/.hermes/.env can poison auxiliary
|
||||
# clients that use provider:auto. Clear it proactively. (#5161)
|
||||
if selected_provider not in ("custom", "cancel", "remove-custom") \
|
||||
and not selected_provider.startswith("custom:"):
|
||||
_clear_stale_openai_base_url()
|
||||
|
||||
|
||||
def _clear_stale_openai_base_url():
|
||||
"""Remove OPENAI_BASE_URL from ~/.hermes/.env if the active provider is not 'custom'.
|
||||
|
||||
After a provider switch, a leftover OPENAI_BASE_URL causes auxiliary
|
||||
clients (compression, vision, delegation) with provider:auto to route
|
||||
requests to the old custom endpoint instead of the newly selected
|
||||
provider. See issue #5161.
|
||||
"""
|
||||
from hermes_cli.config import get_env_value, save_env_value, load_config
|
||||
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = (model_cfg.get("provider") or "").strip().lower()
|
||||
else:
|
||||
provider = ""
|
||||
|
||||
if provider == "custom" or not provider:
|
||||
return # custom provider legitimately uses OPENAI_BASE_URL
|
||||
|
||||
stale_url = get_env_value("OPENAI_BASE_URL")
|
||||
if stale_url:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
print(f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url[:40]}...)"
|
||||
if len(stale_url) > 40
|
||||
else f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url})")
|
||||
|
||||
|
||||
def _prompt_provider_choice(choices, *, default=0):
|
||||
"""Show provider selection menu with curses arrow-key navigation.
|
||||
@@ -1179,10 +1123,10 @@ def _model_flow_openrouter(config, current_model=""):
|
||||
print()
|
||||
|
||||
from hermes_cli.models import model_ids, get_pricing_for_provider
|
||||
openrouter_models = model_ids(force_refresh=True)
|
||||
openrouter_models = model_ids()
|
||||
|
||||
# Fetch live pricing (non-blocking — returns empty dict on failure)
|
||||
pricing = get_pricing_for_provider("openrouter", force_refresh=True)
|
||||
pricing = get_pricing_for_provider("openrouter")
|
||||
|
||||
selected = _prompt_model_selection(openrouter_models, current_model=current_model, pricing=pricing)
|
||||
if selected:
|
||||
@@ -1210,7 +1154,7 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
from hermes_cli.auth import (
|
||||
get_provider_auth_state, _prompt_model_selection, _save_model_choice,
|
||||
_update_config_for_provider, resolve_nous_runtime_credentials,
|
||||
AuthError, format_auth_error,
|
||||
fetch_nous_models, AuthError, format_auth_error,
|
||||
_login_nous, PROVIDER_REGISTRY,
|
||||
)
|
||||
from hermes_cli.config import get_env_value, save_config, save_env_value
|
||||
@@ -1251,15 +1195,14 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
# Already logged in — use curated model list (same as OpenRouter defaults).
|
||||
# The live /models endpoint returns hundreds of models; the curated list
|
||||
# shows only agentic models users recognize from OpenRouter.
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
)
|
||||
from hermes_cli.models import _PROVIDER_MODELS, get_pricing_for_provider
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
if not model_ids:
|
||||
print("No curated models available for Nous Portal.")
|
||||
return
|
||||
|
||||
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
|
||||
|
||||
# Verify credentials are still valid (catches expired sessions early)
|
||||
try:
|
||||
creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
|
||||
@@ -1285,44 +1228,7 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
# Fetch live pricing (non-blocking — returns empty dict on failure)
|
||||
pricing = get_pricing_for_provider("nous")
|
||||
|
||||
# Check if user is on free tier
|
||||
free_tier = check_nous_free_tier()
|
||||
|
||||
# For both tiers: apply the allowlist filter first (removes non-allowlisted
|
||||
# free models and allowlist models that aren't actually free).
|
||||
# Then for free users: partition remaining models into selectable/unavailable.
|
||||
model_ids = filter_nous_free_models(model_ids, pricing)
|
||||
unavailable_models: list[str] = []
|
||||
if free_tier:
|
||||
model_ids, unavailable_models = partition_nous_models_by_tier(model_ids, pricing, free_tier=True)
|
||||
|
||||
if not model_ids and not unavailable_models:
|
||||
print("No models available for Nous Portal after filtering.")
|
||||
return
|
||||
|
||||
# Resolve portal URL for upgrade links (may differ on staging)
|
||||
_nous_portal_url = ""
|
||||
try:
|
||||
_nous_state = get_provider_auth_state("nous")
|
||||
if _nous_state:
|
||||
_nous_portal_url = _nous_state.get("portal_base_url", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if free_tier and not model_ids:
|
||||
print("No free models currently available.")
|
||||
if unavailable_models:
|
||||
from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL
|
||||
_url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
|
||||
print(f"Upgrade at {_url} to access paid models.")
|
||||
return
|
||||
|
||||
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
|
||||
|
||||
selected = _prompt_model_selection(
|
||||
model_ids, current_model=current_model, pricing=pricing,
|
||||
unavailable_models=unavailable_models, portal_url=_nous_portal_url,
|
||||
)
|
||||
selected = _prompt_model_selection(model_ids, current_model=current_model, pricing=pricing)
|
||||
if selected:
|
||||
_save_model_choice(selected)
|
||||
# Reactivate Nous as the provider and update config
|
||||
@@ -1370,6 +1276,7 @@ def _model_flow_openai_codex(config, current_model=""):
|
||||
PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
|
||||
)
|
||||
from hermes_cli.codex_models import get_codex_model_ids
|
||||
from hermes_cli.config import get_env_value, save_env_value
|
||||
import argparse
|
||||
|
||||
status = get_codex_auth_status()
|
||||
@@ -1415,56 +1322,6 @@ def _model_flow_openai_codex(config, current_model=""):
|
||||
|
||||
|
||||
|
||||
_DEFAULT_QWEN_PORTAL_MODELS = [
|
||||
"qwen3-coder-plus",
|
||||
"qwen3-coder",
|
||||
]
|
||||
|
||||
|
||||
def _model_flow_qwen_oauth(_config, current_model=""):
|
||||
"""Qwen OAuth provider: reuse local Qwen CLI login, then pick model."""
|
||||
from hermes_cli.auth import (
|
||||
get_qwen_auth_status,
|
||||
resolve_qwen_runtime_credentials,
|
||||
_prompt_model_selection,
|
||||
_save_model_choice,
|
||||
_update_config_for_provider,
|
||||
DEFAULT_QWEN_BASE_URL,
|
||||
)
|
||||
from hermes_cli.models import fetch_api_models
|
||||
|
||||
status = get_qwen_auth_status()
|
||||
if not status.get("logged_in"):
|
||||
print("Not logged into Qwen CLI OAuth.")
|
||||
print("Run: qwen auth qwen-oauth")
|
||||
auth_file = status.get("auth_file")
|
||||
if auth_file:
|
||||
print(f"Expected credentials file: {auth_file}")
|
||||
if status.get("error"):
|
||||
print(f"Error: {status.get('error')}")
|
||||
return
|
||||
|
||||
# Try live model discovery, fall back to curated list.
|
||||
models = None
|
||||
try:
|
||||
creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
|
||||
models = fetch_api_models(creds["api_key"], creds["base_url"])
|
||||
except Exception:
|
||||
pass
|
||||
if not models:
|
||||
models = list(_DEFAULT_QWEN_PORTAL_MODELS)
|
||||
|
||||
default = current_model or (models[0] if models else "qwen3-coder-plus")
|
||||
selected = _prompt_model_selection(models, current_model=default)
|
||||
if selected:
|
||||
_save_model_choice(selected)
|
||||
_update_config_for_provider("qwen-oauth", DEFAULT_QWEN_BASE_URL)
|
||||
print(f"Default model set to: {selected} (via Qwen OAuth)")
|
||||
else:
|
||||
print("No change.")
|
||||
|
||||
|
||||
|
||||
def _model_flow_custom(config):
|
||||
"""Custom endpoint: collect URL, API key, and model name.
|
||||
|
||||
@@ -1472,7 +1329,7 @@ def _model_flow_custom(config):
|
||||
so it appears in the provider menu on subsequent runs.
|
||||
"""
|
||||
from hermes_cli.auth import _save_model_choice, deactivate_provider
|
||||
from hermes_cli.config import get_env_value, load_config, save_config
|
||||
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
|
||||
|
||||
current_url = get_env_value("OPENAI_BASE_URL") or ""
|
||||
current_key = get_env_value("OPENAI_API_KEY") or ""
|
||||
@@ -1526,11 +1383,7 @@ def _model_flow_custom(config):
|
||||
f"Hermes will still save it."
|
||||
)
|
||||
if probe.get("suggested_base_url"):
|
||||
suggested = probe["suggested_base_url"]
|
||||
if suggested.endswith("/v1"):
|
||||
print(f" If this server expects /v1 in the path, try base URL: {suggested}")
|
||||
else:
|
||||
print(f" If /v1 should not be in the base URL, try: {suggested}")
|
||||
print(f" If this server expects /v1, try base URL: {probe['suggested_base_url']}")
|
||||
|
||||
# Select model — use probe results when available, fall back to manual input
|
||||
model_name = ""
|
||||
@@ -1709,10 +1562,8 @@ def _remove_custom_provider(config):
|
||||
title="Select provider to remove:",
|
||||
)
|
||||
idx = menu.show()
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
print()
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
except (ImportError, NotImplementedError):
|
||||
for i, c in enumerate(choices, 1):
|
||||
print(f" {i}. {c}")
|
||||
print()
|
||||
@@ -1736,12 +1587,11 @@ def _remove_custom_provider(config):
|
||||
def _model_flow_named_custom(config, provider_info):
|
||||
"""Handle a named custom provider from config.yaml custom_providers list.
|
||||
|
||||
Always probes the endpoint's /models API to let the user pick a model.
|
||||
If a model was previously saved, it is pre-selected in the menu.
|
||||
Falls back to the saved model if probing fails.
|
||||
If the entry has a saved model name, activates it immediately.
|
||||
Otherwise probes the endpoint's /models API to let the user pick one.
|
||||
"""
|
||||
from hermes_cli.auth import _save_model_choice, deactivate_provider
|
||||
from hermes_cli.config import load_config, save_config
|
||||
from hermes_cli.config import save_env_value, load_config, save_config
|
||||
from hermes_cli.models import fetch_api_models
|
||||
|
||||
name = provider_info["name"]
|
||||
@@ -1749,46 +1599,54 @@ def _model_flow_named_custom(config, provider_info):
|
||||
api_key = provider_info.get("api_key", "")
|
||||
saved_model = provider_info.get("model", "")
|
||||
|
||||
# If a model is saved, just activate immediately — no probing needed
|
||||
if saved_model:
|
||||
_save_model_choice(saved_model)
|
||||
|
||||
cfg = load_config()
|
||||
model = cfg.get("model")
|
||||
if not isinstance(model, dict):
|
||||
model = {"default": model} if model else {}
|
||||
cfg["model"] = model
|
||||
model["provider"] = "custom"
|
||||
model["base_url"] = base_url
|
||||
if api_key:
|
||||
model["api_key"] = api_key
|
||||
save_config(cfg)
|
||||
deactivate_provider()
|
||||
|
||||
print(f"✅ Switched to: {saved_model}")
|
||||
print(f" Provider: {name} ({base_url})")
|
||||
return
|
||||
|
||||
# No saved model — probe endpoint and let user pick
|
||||
print(f" Provider: {name}")
|
||||
print(f" URL: {base_url}")
|
||||
if saved_model:
|
||||
print(f" Current: {saved_model}")
|
||||
print()
|
||||
|
||||
print("Fetching available models...")
|
||||
print("No model saved for this provider. Fetching available models...")
|
||||
models = fetch_api_models(api_key, base_url, timeout=8.0)
|
||||
|
||||
if models:
|
||||
default_idx = 0
|
||||
if saved_model and saved_model in models:
|
||||
default_idx = models.index(saved_model)
|
||||
|
||||
print(f"Found {len(models)} model(s):\n")
|
||||
try:
|
||||
from simple_term_menu import TerminalMenu
|
||||
menu_items = [
|
||||
f" {m} (current)" if m == saved_model else f" {m}"
|
||||
for m in models
|
||||
] + [" Cancel"]
|
||||
menu_items = [f" {m}" for m in models] + [" Cancel"]
|
||||
menu = TerminalMenu(
|
||||
menu_items, cursor_index=default_idx,
|
||||
menu_items, cursor_index=0,
|
||||
menu_cursor="-> ", menu_cursor_style=("fg_green", "bold"),
|
||||
menu_highlight_style=("fg_green",),
|
||||
cycle_cursor=True, clear_screen=False,
|
||||
title=f"Select model from {name}:",
|
||||
)
|
||||
idx = menu.show()
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
print()
|
||||
if idx is None or idx >= len(models):
|
||||
print("Cancelled.")
|
||||
return
|
||||
model_name = models[idx]
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
except (ImportError, NotImplementedError):
|
||||
for i, m in enumerate(models, 1):
|
||||
suffix = " (current)" if m == saved_model else ""
|
||||
print(f" {i}. {m}{suffix}")
|
||||
print(f" {i}. {m}")
|
||||
print(f" {len(models) + 1}. Cancel")
|
||||
print()
|
||||
try:
|
||||
@@ -1804,13 +1662,6 @@ def _model_flow_named_custom(config, provider_info):
|
||||
except (ValueError, KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
elif saved_model:
|
||||
print("Could not fetch models from endpoint.")
|
||||
try:
|
||||
model_name = input(f"Model name [{saved_model}]: ").strip() or saved_model
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
else:
|
||||
print("Could not fetch models from endpoint. Enter model name manually.")
|
||||
try:
|
||||
@@ -1865,10 +1716,7 @@ def _set_reasoning_effort(config, effort: str) -> None:
|
||||
|
||||
def _prompt_reasoning_effort_selection(efforts, current_effort=""):
|
||||
"""Prompt for a reasoning effort. Returns effort, 'none', or None to keep current."""
|
||||
deduped = list(dict.fromkeys(str(effort).strip().lower() for effort in efforts if str(effort).strip()))
|
||||
canonical_order = ("minimal", "low", "medium", "high", "xhigh")
|
||||
ordered = [effort for effort in canonical_order if effort in deduped]
|
||||
ordered.extend(effort for effort in deduped if effort not in canonical_order)
|
||||
ordered = list(dict.fromkeys(str(effort).strip().lower() for effort in efforts if str(effort).strip()))
|
||||
if not ordered:
|
||||
return None
|
||||
|
||||
@@ -1906,8 +1754,6 @@ def _prompt_reasoning_effort_selection(efforts, current_effort=""):
|
||||
title="Select reasoning effort:",
|
||||
)
|
||||
idx = menu.show()
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
if idx is None:
|
||||
return None
|
||||
print()
|
||||
@@ -1916,7 +1762,7 @@ def _prompt_reasoning_effort_selection(efforts, current_effort=""):
|
||||
if idx == len(ordered):
|
||||
return "none"
|
||||
return None
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
except (ImportError, NotImplementedError):
|
||||
pass
|
||||
|
||||
print("Select reasoning effort:")
|
||||
@@ -1955,7 +1801,7 @@ def _model_flow_copilot(config, current_model=""):
|
||||
deactivate_provider,
|
||||
resolve_api_key_provider_credentials,
|
||||
)
|
||||
from hermes_cli.config import save_env_value, load_config, save_config
|
||||
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
|
||||
from hermes_cli.models import (
|
||||
fetch_api_models,
|
||||
fetch_github_model_catalog,
|
||||
@@ -2546,6 +2392,8 @@ def _model_flow_anthropic(config, current_model=""):
|
||||
)
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
|
||||
pconfig = PROVIDER_REGISTRY["anthropic"]
|
||||
|
||||
# Check ALL credential sources
|
||||
existing_key = (
|
||||
get_env_value("ANTHROPIC_TOKEN")
|
||||
@@ -2702,12 +2550,6 @@ def cmd_doctor(args):
|
||||
run_doctor(args)
|
||||
|
||||
|
||||
def cmd_dump(args):
|
||||
"""Dump setup summary for support/debugging."""
|
||||
from hermes_cli.dump import run_dump
|
||||
run_dump(args)
|
||||
|
||||
|
||||
def cmd_config(args):
|
||||
"""Configuration management."""
|
||||
from hermes_cli.config import config_command
|
||||
@@ -3077,19 +2919,33 @@ def _restore_stashed_changes(
|
||||
print("\nYour stashed changes are preserved — nothing is lost.")
|
||||
print(f" Stash ref: {stash_ref}")
|
||||
|
||||
# Always reset to clean state — leaving conflict markers in source
|
||||
# files makes hermes completely unrunnable (SyntaxError on import).
|
||||
# The user's changes are safe in the stash for manual recovery.
|
||||
subprocess.run(
|
||||
git_cmd + ["reset", "--hard", "HEAD"],
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
)
|
||||
print("Working tree reset to clean state.")
|
||||
print(f"Restore your changes later with: git stash apply {stash_ref}")
|
||||
# Don't sys.exit — the code update itself succeeded, only the stash
|
||||
# restore had conflicts. Let cmd_update continue with pip install,
|
||||
# skill sync, and gateway restart.
|
||||
# Ask before resetting (if interactive)
|
||||
do_reset = True
|
||||
if prompt_user:
|
||||
print("\nReset working tree to clean state so Hermes can run?")
|
||||
print(" (You can re-apply your changes later with: git stash apply)")
|
||||
print("[Y/n] ", end="", flush=True)
|
||||
response = input().strip().lower()
|
||||
if response not in ("", "y", "yes"):
|
||||
do_reset = False
|
||||
|
||||
if do_reset:
|
||||
subprocess.run(
|
||||
git_cmd + ["reset", "--hard", "HEAD"],
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
)
|
||||
print("Working tree reset to clean state.")
|
||||
else:
|
||||
print("Working tree left as-is (may have conflict markers).")
|
||||
print("Resolve conflicts manually, then run: git stash drop")
|
||||
|
||||
print(f"Restore your changes with: git stash apply {stash_ref}")
|
||||
# In non-interactive mode (gateway /update), don't abort — the code
|
||||
# update itself succeeded, only the stash restore had conflicts.
|
||||
# Aborting would report the entire update as failed.
|
||||
if prompt_user:
|
||||
sys.exit(1)
|
||||
return False
|
||||
|
||||
stash_selector = _resolve_stash_selector(git_cmd, cwd, stash_ref)
|
||||
@@ -3350,11 +3206,10 @@ def _invalidate_update_cache():
|
||||
``hermes update``, every profile is now current.
|
||||
"""
|
||||
homes = []
|
||||
# Default profile home (Docker-aware — uses /opt/data in Docker)
|
||||
from hermes_constants import get_default_hermes_root
|
||||
default_home = get_default_hermes_root()
|
||||
# Default profile home
|
||||
default_home = Path.home() / ".hermes"
|
||||
homes.append(default_home)
|
||||
# Named profiles under <root>/profiles/
|
||||
# Named profiles under ~/.hermes/profiles/
|
||||
profiles_root = default_home / "profiles"
|
||||
if profiles_root.is_dir():
|
||||
for entry in profiles_root.iterdir():
|
||||
@@ -3711,7 +3566,7 @@ def cmd_update(args):
|
||||
try:
|
||||
from hermes_cli.profiles import list_profiles, get_active_profile_name, seed_profile_skills
|
||||
active = get_active_profile_name()
|
||||
other_profiles = [p for p in list_profiles() if p.name != active]
|
||||
other_profiles = [p for p in list_profiles() if not p.is_default and p.name != active]
|
||||
if other_profiles:
|
||||
print()
|
||||
print("→ Syncing bundled skills to other profiles...")
|
||||
@@ -3806,8 +3661,8 @@ def cmd_update(args):
|
||||
# running gateway needs restarting to pick up the new code.
|
||||
try:
|
||||
from hermes_cli.gateway import (
|
||||
is_macos, supports_systemd_services, _ensure_user_systemd_env,
|
||||
find_gateway_pids,
|
||||
is_macos, is_linux, _ensure_user_systemd_env,
|
||||
get_systemd_linger_status, find_gateway_pids,
|
||||
_get_service_pids,
|
||||
)
|
||||
import signal as _signal
|
||||
@@ -3817,7 +3672,7 @@ def cmd_update(args):
|
||||
|
||||
# --- Systemd services (Linux) ---
|
||||
# Discover all hermes-gateway* units (default + profiles)
|
||||
if supports_systemd_services():
|
||||
if is_linux():
|
||||
try:
|
||||
_ensure_user_systemd_env()
|
||||
except Exception:
|
||||
@@ -3963,7 +3818,7 @@ def cmd_profile(args):
|
||||
"""Profile management — create, delete, list, switch, alias."""
|
||||
from hermes_cli.profiles import (
|
||||
list_profiles, create_profile, delete_profile, seed_profile_skills,
|
||||
set_active_profile, get_active_profile_name,
|
||||
get_active_profile, set_active_profile, get_active_profile_name,
|
||||
check_alias_collision, create_wrapper_script, remove_wrapper_script,
|
||||
_is_wrapper_dir_in_path, _get_wrapper_dir,
|
||||
)
|
||||
@@ -4091,10 +3946,8 @@ def cmd_profile(args):
|
||||
print(f" {name} chat Start chatting")
|
||||
print(f" {name} gateway start Start the messaging gateway")
|
||||
if clone or clone_all:
|
||||
try:
|
||||
profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
|
||||
except ValueError:
|
||||
profile_dir_display = str(profile_dir)
|
||||
from hermes_constants import get_hermes_home
|
||||
profile_dir_display = f"~/.hermes/profiles/{name}"
|
||||
print(f"\n Edit {profile_dir_display}/.env for different API keys")
|
||||
print(f" Edit {profile_dir_display}/SOUL.md for different personality")
|
||||
print()
|
||||
@@ -4337,10 +4190,6 @@ For more help on a command:
|
||||
"-q", "--query",
|
||||
help="Single query (non-interactive mode)"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--image",
|
||||
help="Optional local image path to attach to a single query"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-m", "--model",
|
||||
help="Model to use (e.g., anthropic/claude-sonnet-4)"
|
||||
@@ -4483,7 +4332,7 @@ For more help on a command:
|
||||
gateway_subparsers = gateway_parser.add_subparsers(dest="gateway_command")
|
||||
|
||||
# gateway run (default)
|
||||
gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground (recommended for WSL, Docker, Termux)")
|
||||
gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground")
|
||||
gateway_run.add_argument("-v", "--verbose", action="count", default=0,
|
||||
help="Increase stderr log verbosity (-v=INFO, -vv=DEBUG)")
|
||||
gateway_run.add_argument("-q", "--quiet", action="store_true",
|
||||
@@ -4492,7 +4341,7 @@ For more help on a command:
|
||||
help="Replace any existing gateway instance (useful for systemd)")
|
||||
|
||||
# gateway start
|
||||
gateway_start = gateway_subparsers.add_parser("start", help="Start the installed systemd/launchd background service")
|
||||
gateway_start = gateway_subparsers.add_parser("start", help="Start gateway service")
|
||||
gateway_start.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
|
||||
|
||||
# gateway stop
|
||||
@@ -4510,7 +4359,7 @@ For more help on a command:
|
||||
gateway_status.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
|
||||
|
||||
# gateway install
|
||||
gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as a systemd/launchd background service")
|
||||
gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as service")
|
||||
gateway_install.add_argument("--force", action="store_true", help="Force reinstall")
|
||||
gateway_install.add_argument("--system", action="store_true", help="Install as a Linux system-level service (starts at boot)")
|
||||
gateway_install.add_argument("--run-as-user", dest="run_as_user", help="User account the Linux system service should run as")
|
||||
@@ -4520,7 +4369,7 @@ For more help on a command:
|
||||
gateway_uninstall.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
|
||||
|
||||
# gateway setup
|
||||
gateway_subparsers.add_parser("setup", help="Configure messaging platforms")
|
||||
gateway_setup = gateway_subparsers.add_parser("setup", help="Configure messaging platforms")
|
||||
|
||||
gateway_parser.set_defaults(func=cmd_gateway)
|
||||
|
||||
@@ -4531,12 +4380,12 @@ For more help on a command:
|
||||
"setup",
|
||||
help="Interactive setup wizard",
|
||||
description="Configure Hermes Agent with an interactive wizard. "
|
||||
"Run a specific section: hermes setup model|tts|terminal|gateway|tools|agent"
|
||||
"Run a specific section: hermes setup model|terminal|gateway|tools|agent"
|
||||
)
|
||||
setup_parser.add_argument(
|
||||
"section",
|
||||
nargs="?",
|
||||
choices=["model", "tts", "terminal", "gateway", "tools", "agent"],
|
||||
choices=["model", "terminal", "gateway", "tools", "agent"],
|
||||
default=None,
|
||||
help="Run a specific setup section instead of the full wizard"
|
||||
)
|
||||
@@ -4783,22 +4632,6 @@ For more help on a command:
|
||||
help="Attempt to fix issues automatically"
|
||||
)
|
||||
doctor_parser.set_defaults(func=cmd_doctor)
|
||||
|
||||
# =========================================================================
|
||||
# dump command
|
||||
# =========================================================================
|
||||
dump_parser = subparsers.add_parser(
|
||||
"dump",
|
||||
help="Dump setup summary for support/debugging",
|
||||
description="Output a compact, plain-text summary of your Hermes setup "
|
||||
"that can be copy-pasted into Discord/GitHub for support context"
|
||||
)
|
||||
dump_parser.add_argument(
|
||||
"--show-keys",
|
||||
action="store_true",
|
||||
help="Show redacted API key prefixes (first/last 4 chars) instead of just set/not set"
|
||||
)
|
||||
dump_parser.set_defaults(func=cmd_dump)
|
||||
|
||||
# =========================================================================
|
||||
# config command
|
||||
@@ -4811,10 +4644,10 @@ For more help on a command:
|
||||
config_subparsers = config_parser.add_subparsers(dest="config_command")
|
||||
|
||||
# config show (default)
|
||||
config_subparsers.add_parser("show", help="Show current configuration")
|
||||
config_show = config_subparsers.add_parser("show", help="Show current configuration")
|
||||
|
||||
# config edit
|
||||
config_subparsers.add_parser("edit", help="Open config file in editor")
|
||||
config_edit = config_subparsers.add_parser("edit", help="Open config file in editor")
|
||||
|
||||
# config set
|
||||
config_set = config_subparsers.add_parser("set", help="Set a configuration value")
|
||||
@@ -4822,16 +4655,16 @@ For more help on a command:
|
||||
config_set.add_argument("value", nargs="?", help="Value to set")
|
||||
|
||||
# config path
|
||||
config_subparsers.add_parser("path", help="Print config file path")
|
||||
config_path = config_subparsers.add_parser("path", help="Print config file path")
|
||||
|
||||
# config env-path
|
||||
config_subparsers.add_parser("env-path", help="Print .env file path")
|
||||
config_env = config_subparsers.add_parser("env-path", help="Print .env file path")
|
||||
|
||||
# config check
|
||||
config_subparsers.add_parser("check", help="Check for missing/outdated config")
|
||||
config_check = config_subparsers.add_parser("check", help="Check for missing/outdated config")
|
||||
|
||||
# config migrate
|
||||
config_subparsers.add_parser("migrate", help="Update config with new options")
|
||||
config_migrate = config_subparsers.add_parser("migrate", help="Update config with new options")
|
||||
|
||||
config_parser.set_defaults(func=cmd_config)
|
||||
|
||||
@@ -4845,7 +4678,7 @@ For more help on a command:
|
||||
)
|
||||
pairing_sub = pairing_parser.add_subparsers(dest="pairing_action")
|
||||
|
||||
pairing_sub.add_parser("list", help="Show pending + approved users")
|
||||
pairing_list_parser = pairing_sub.add_parser("list", help="Show pending + approved users")
|
||||
|
||||
pairing_approve_parser = pairing_sub.add_parser("approve", help="Approve a pairing code")
|
||||
pairing_approve_parser.add_argument("platform", help="Platform name (telegram, discord, slack, whatsapp)")
|
||||
@@ -4855,7 +4688,7 @@ For more help on a command:
|
||||
pairing_revoke_parser.add_argument("platform", help="Platform name")
|
||||
pairing_revoke_parser.add_argument("user_id", help="User ID to revoke")
|
||||
|
||||
pairing_sub.add_parser("clear-pending", help="Clear all pending codes")
|
||||
pairing_clear_parser = pairing_sub.add_parser("clear-pending", help="Clear all pending codes")
|
||||
|
||||
def cmd_pairing(args):
|
||||
from hermes_cli.pairing import pairing_command
|
||||
@@ -5031,7 +4864,7 @@ For more help on a command:
|
||||
memory_sub = memory_parser.add_subparsers(dest="memory_command")
|
||||
memory_sub.add_parser("setup", help="Interactive provider selection and configuration")
|
||||
memory_sub.add_parser("status", help="Show current memory provider config")
|
||||
memory_sub.add_parser("off", help="Disable external provider (built-in only)")
|
||||
memory_off_p = memory_sub.add_parser("off", help="Disable external provider (built-in only)")
|
||||
|
||||
def cmd_memory(args):
|
||||
sub = getattr(args, "memory_command", None)
|
||||
@@ -5195,7 +5028,7 @@ For more help on a command:
|
||||
sessions_prune.add_argument("--source", help="Only prune sessions from this source")
|
||||
sessions_prune.add_argument("--yes", "-y", action="store_true", help="Skip confirmation")
|
||||
|
||||
sessions_subparsers.add_parser("stats", help="Show session store statistics")
|
||||
sessions_stats = sessions_subparsers.add_parser("stats", help="Show session store statistics")
|
||||
|
||||
sessions_rename = sessions_subparsers.add_parser("rename", help="Set or change a session's title")
|
||||
sessions_rename.add_argument("session_id", help="Session ID to rename")
|
||||
@@ -5555,7 +5388,7 @@ For more help on a command:
|
||||
)
|
||||
profile_subparsers = profile_parser.add_subparsers(dest="profile_action")
|
||||
|
||||
profile_subparsers.add_parser("list", help="List all profiles")
|
||||
profile_list = profile_subparsers.add_parser("list", help="List all profiles")
|
||||
profile_use = profile_subparsers.add_parser("use", help="Set sticky default profile")
|
||||
profile_use.add_argument("profile_name", help="Profile name (or 'default')")
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@ import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Curses-based interactive picker (same pattern as hermes tools)
|
||||
@@ -277,7 +275,7 @@ def cmd_setup_provider(provider_name: str) -> None:
|
||||
config["memory"] = {}
|
||||
|
||||
if hasattr(provider, "post_setup"):
|
||||
hermes_home = str(get_hermes_home())
|
||||
hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
|
||||
provider.post_setup(hermes_home, config)
|
||||
return
|
||||
|
||||
@@ -328,7 +326,7 @@ def cmd_setup(args) -> None:
|
||||
# If the provider has a post_setup hook, delegate entirely to it.
|
||||
# The hook handles its own config, connection test, and activation.
|
||||
if hasattr(provider, "post_setup"):
|
||||
hermes_home = str(get_hermes_home())
|
||||
hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
|
||||
provider.post_setup(hermes_home, config)
|
||||
return
|
||||
|
||||
@@ -338,7 +336,7 @@ def cmd_setup(args) -> None:
|
||||
if not isinstance(provider_config, dict):
|
||||
provider_config = {}
|
||||
|
||||
env_path = get_hermes_home() / ".env"
|
||||
env_path = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / ".env"
|
||||
env_writes = {}
|
||||
|
||||
if schema:
|
||||
@@ -402,7 +400,7 @@ def cmd_setup(args) -> None:
|
||||
save_config(config)
|
||||
|
||||
# Write non-secret config to provider's native location
|
||||
hermes_home = str(get_hermes_home())
|
||||
hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
|
||||
if provider_config and hasattr(provider, "save_config"):
|
||||
try:
|
||||
provider.save_config(provider_config, hermes_home)
|
||||
|
||||
@@ -76,22 +76,16 @@ _STRIP_VENDOR_ONLY_PROVIDERS: frozenset[str] = frozenset({
|
||||
"copilot-acp",
|
||||
})
|
||||
|
||||
# Providers whose native naming is authoritative -- pass through unchanged.
|
||||
_AUTHORITATIVE_NATIVE_PROVIDERS: frozenset[str] = frozenset({
|
||||
# Providers whose own naming is authoritative -- pass through unchanged.
|
||||
_PASSTHROUGH_PROVIDERS: frozenset[str] = frozenset({
|
||||
"gemini",
|
||||
"huggingface",
|
||||
"openai-codex",
|
||||
})
|
||||
|
||||
# Direct providers that accept bare native names but should repair a matching
|
||||
# provider/ prefix when users copy the aggregator form into config.yaml.
|
||||
_MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
|
||||
"zai",
|
||||
"kimi-coding",
|
||||
"minimax",
|
||||
"minimax-cn",
|
||||
"alibaba",
|
||||
"qwen-oauth",
|
||||
"huggingface",
|
||||
"openai-codex",
|
||||
"custom",
|
||||
})
|
||||
|
||||
@@ -173,40 +167,6 @@ def _dots_to_hyphens(model_name: str) -> str:
|
||||
return model_name.replace(".", "-")
|
||||
|
||||
|
||||
def _normalize_provider_alias(provider_name: str) -> str:
|
||||
"""Resolve provider aliases to Hermes' canonical ids."""
|
||||
raw = (provider_name or "").strip().lower()
|
||||
if not raw:
|
||||
return raw
|
||||
try:
|
||||
from hermes_cli.models import normalize_provider
|
||||
|
||||
return normalize_provider(raw)
|
||||
except Exception:
|
||||
return raw
|
||||
|
||||
|
||||
def _strip_matching_provider_prefix(model_name: str, target_provider: str) -> str:
|
||||
"""Strip ``provider/`` only when the prefix matches the target provider.
|
||||
|
||||
This prevents arbitrary slash-bearing model IDs from being mangled on
|
||||
native providers while still repairing manual config values like
|
||||
``zai/glm-5.1`` for the ``zai`` provider.
|
||||
"""
|
||||
if "/" not in model_name:
|
||||
return model_name
|
||||
|
||||
prefix, remainder = model_name.split("/", 1)
|
||||
if not prefix.strip() or not remainder.strip():
|
||||
return model_name
|
||||
|
||||
normalized_prefix = _normalize_provider_alias(prefix)
|
||||
normalized_target = _normalize_provider_alias(target_provider)
|
||||
if normalized_prefix and normalized_prefix == normalized_target:
|
||||
return remainder.strip()
|
||||
return model_name
|
||||
|
||||
|
||||
def detect_vendor(model_name: str) -> Optional[str]:
|
||||
"""Detect the vendor slug from a bare model name.
|
||||
|
||||
@@ -344,37 +304,24 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
||||
if not name:
|
||||
return name
|
||||
|
||||
provider = _normalize_provider_alias(target_provider)
|
||||
provider = (target_provider or "").strip().lower()
|
||||
|
||||
# --- Aggregators: need vendor/model format ---
|
||||
if provider in _AGGREGATOR_PROVIDERS:
|
||||
return _prepend_vendor(name)
|
||||
|
||||
# --- Anthropic / OpenCode: strip matching provider prefix, dots -> hyphens ---
|
||||
# --- Anthropic / OpenCode: strip vendor, dots -> hyphens ---
|
||||
if provider in _DOT_TO_HYPHEN_PROVIDERS:
|
||||
bare = _strip_matching_provider_prefix(name, provider)
|
||||
if "/" in bare:
|
||||
return bare
|
||||
bare = _strip_vendor_prefix(name)
|
||||
return _dots_to_hyphens(bare)
|
||||
|
||||
# --- Copilot: strip matching provider prefix, keep dots ---
|
||||
# --- Copilot: strip vendor, keep dots ---
|
||||
if provider in _STRIP_VENDOR_ONLY_PROVIDERS:
|
||||
return _strip_matching_provider_prefix(name, provider)
|
||||
return _strip_vendor_prefix(name)
|
||||
|
||||
# --- DeepSeek: map to one of two canonical names ---
|
||||
if provider == "deepseek":
|
||||
bare = _strip_matching_provider_prefix(name, provider)
|
||||
if "/" in bare:
|
||||
return bare
|
||||
return _normalize_for_deepseek(bare)
|
||||
|
||||
# --- Direct providers: repair matching provider prefixes only ---
|
||||
if provider in _MATCHING_PREFIX_STRIP_PROVIDERS:
|
||||
return _strip_matching_provider_prefix(name, provider)
|
||||
|
||||
# --- Authoritative native providers: preserve user-facing slugs as-is ---
|
||||
if provider in _AUTHORITATIVE_NATIVE_PROVIDERS:
|
||||
return name
|
||||
return _normalize_for_deepseek(name)
|
||||
|
||||
# --- Custom & all others: pass through as-is ---
|
||||
return name
|
||||
@@ -384,3 +331,31 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
||||
# Batch / convenience helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def model_display_name(model_id: str) -> str:
|
||||
"""Return a short, human-readable display name for a model id.
|
||||
|
||||
Strips the vendor prefix (if any) for a cleaner display in menus
|
||||
and status bars, while preserving dots for readability.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> model_display_name("anthropic/claude-sonnet-4.6")
|
||||
'claude-sonnet-4.6'
|
||||
>>> model_display_name("claude-sonnet-4-6")
|
||||
'claude-sonnet-4-6'
|
||||
"""
|
||||
return _strip_vendor_prefix((model_id or "").strip())
|
||||
|
||||
|
||||
def is_aggregator_provider(provider: str) -> bool:
|
||||
"""Check if a provider is an aggregator that needs vendor/model format."""
|
||||
return (provider or "").strip().lower() in _AGGREGATOR_PROVIDERS
|
||||
|
||||
|
||||
def vendor_for_model(model_name: str) -> str:
|
||||
"""Return the vendor slug for a model, or ``""`` if unknown.
|
||||
|
||||
Convenience wrapper around :func:`detect_vendor` that never returns
|
||||
``None``.
|
||||
"""
|
||||
return detect_vendor(model_name) or ""
|
||||
|
||||
+95
-112
@@ -21,17 +21,22 @@ OpenRouter variant suffixes (``:free``, ``:extended``, ``:fast``).
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, NamedTuple, Optional
|
||||
|
||||
from hermes_cli.providers import (
|
||||
custom_provider_slug,
|
||||
ALIASES,
|
||||
LABELS,
|
||||
TRANSPORT_TO_API_MODE,
|
||||
determine_api_mode,
|
||||
get_label,
|
||||
get_provider,
|
||||
is_aggregator,
|
||||
normalize_provider,
|
||||
resolve_provider_full,
|
||||
)
|
||||
from hermes_cli.model_normalize import (
|
||||
detect_vendor,
|
||||
normalize_model_for_provider,
|
||||
)
|
||||
from agent.models_dev import (
|
||||
@@ -337,7 +342,6 @@ def resolve_alias(
|
||||
def get_authenticated_provider_slugs(
|
||||
current_provider: str = "",
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> list[str]:
|
||||
"""Return slugs of providers that have credentials.
|
||||
|
||||
@@ -348,7 +352,6 @@ def get_authenticated_provider_slugs(
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=current_provider,
|
||||
user_providers=user_providers,
|
||||
custom_providers=custom_providers,
|
||||
max_models=0,
|
||||
)
|
||||
return [p["slug"] for p in providers]
|
||||
@@ -386,7 +389,6 @@ def switch_model(
|
||||
is_global: bool = False,
|
||||
explicit_provider: str = "",
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> ModelSwitchResult:
|
||||
"""Core model-switching pipeline shared between CLI and gateway.
|
||||
|
||||
@@ -420,7 +422,6 @@ def switch_model(
|
||||
is_global: Whether to persist the switch.
|
||||
explicit_provider: From --provider flag (empty = no explicit provider).
|
||||
user_providers: The ``providers:`` dict from config.yaml (for user endpoints).
|
||||
custom_providers: The ``custom_providers:`` list from config.yaml.
|
||||
|
||||
Returns:
|
||||
ModelSwitchResult with all information the caller needs.
|
||||
@@ -441,11 +442,7 @@ def switch_model(
|
||||
# =================================================================
|
||||
if explicit_provider:
|
||||
# Resolve the provider
|
||||
pdef = resolve_provider_full(
|
||||
explicit_provider,
|
||||
user_providers,
|
||||
custom_providers,
|
||||
)
|
||||
pdef = resolve_provider_full(explicit_provider, user_providers)
|
||||
if pdef is None:
|
||||
_switch_err = (
|
||||
f"Unknown provider '{explicit_provider}'. "
|
||||
@@ -525,7 +522,6 @@ def switch_model(
|
||||
authed = get_authenticated_provider_slugs(
|
||||
current_provider=current_provider,
|
||||
user_providers=user_providers,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
fallback_result = _resolve_alias_fallback(raw_input, authed)
|
||||
if fallback_result is not None:
|
||||
@@ -547,11 +543,8 @@ def switch_model(
|
||||
)
|
||||
else:
|
||||
# --- Step c: On aggregator, convert vendor:model to vendor/model ---
|
||||
# Only convert when there's no slash — a slash means the name
|
||||
# is already in vendor/model format and the colon is a variant
|
||||
# tag (:free, :extended, :fast) that must be preserved.
|
||||
colon_pos = raw_input.find(":")
|
||||
if colon_pos > 0 and "/" not in raw_input and is_aggregator(current_provider):
|
||||
if colon_pos > 0 and is_aggregator(current_provider):
|
||||
left = raw_input[:colon_pos].strip().lower()
|
||||
right = raw_input[colon_pos + 1:].strip()
|
||||
if left and right:
|
||||
@@ -600,14 +593,6 @@ def switch_model(
|
||||
|
||||
provider_changed = target_provider != current_provider
|
||||
provider_label = get_label(target_provider)
|
||||
if target_provider.startswith("custom:"):
|
||||
custom_pdef = resolve_provider_full(
|
||||
target_provider,
|
||||
user_providers,
|
||||
custom_providers,
|
||||
)
|
||||
if custom_pdef is not None:
|
||||
provider_label = custom_pdef.name
|
||||
|
||||
# --- Resolve credentials ---
|
||||
api_key = current_api_key
|
||||
@@ -726,7 +711,6 @@ def switch_model(
|
||||
def list_authenticated_providers(
|
||||
current_provider: str = "",
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
max_models: int = 8,
|
||||
) -> List[dict]:
|
||||
"""Detect which providers have credentials and list their curated models.
|
||||
@@ -752,7 +736,6 @@ def list_authenticated_providers(
|
||||
fetch_models_dev,
|
||||
get_provider_info as _mdev_pinfo,
|
||||
)
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
|
||||
|
||||
results: List[dict] = []
|
||||
@@ -773,16 +756,9 @@ def list_authenticated_providers(
|
||||
if not isinstance(pdata, dict):
|
||||
continue
|
||||
|
||||
# Prefer auth.py PROVIDER_REGISTRY for env var names — it's our
|
||||
# source of truth. models.dev can have wrong mappings (e.g.
|
||||
# minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
|
||||
pconfig = PROVIDER_REGISTRY.get(hermes_id)
|
||||
if pconfig and pconfig.api_key_env_vars:
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
else:
|
||||
env_vars = pdata.get("env", [])
|
||||
if not isinstance(env_vars, list):
|
||||
continue
|
||||
env_vars = pdata.get("env", [])
|
||||
if not isinstance(env_vars, list):
|
||||
continue
|
||||
|
||||
# Check if any env var is set
|
||||
has_creds = any(os.environ.get(ev) for ev in env_vars)
|
||||
@@ -809,69 +785,42 @@ def list_authenticated_providers(
|
||||
})
|
||||
seen_slugs.add(slug)
|
||||
|
||||
# --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
|
||||
# --- 2. Check Hermes-only providers (nous, openai-codex, copilot) ---
|
||||
from hermes_cli.providers import HERMES_OVERLAYS
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY as _auth_registry
|
||||
|
||||
# Build reverse mapping: models.dev ID → Hermes provider ID.
|
||||
# HERMES_OVERLAYS keys may be models.dev IDs (e.g. "github-copilot")
|
||||
# while _PROVIDER_MODELS and config.yaml use Hermes IDs ("copilot").
|
||||
_mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
|
||||
|
||||
for pid, overlay in HERMES_OVERLAYS.items():
|
||||
if pid in seen_slugs:
|
||||
continue
|
||||
|
||||
# Resolve Hermes slug — e.g. "github-copilot" → "copilot"
|
||||
hermes_slug = _mdev_to_hermes.get(pid, pid)
|
||||
if hermes_slug in seen_slugs:
|
||||
continue
|
||||
|
||||
# Check if credentials exist
|
||||
has_creds = False
|
||||
if overlay.extra_env_vars:
|
||||
has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
|
||||
# Also check api_key_env_vars from PROVIDER_REGISTRY for api_key auth_type
|
||||
if not has_creds and overlay.auth_type == "api_key":
|
||||
for _key in (pid, hermes_slug):
|
||||
pcfg = _auth_registry.get(_key)
|
||||
if pcfg and pcfg.api_key_env_vars:
|
||||
if any(os.environ.get(ev) for ev in pcfg.api_key_env_vars):
|
||||
has_creds = True
|
||||
break
|
||||
if not has_creds and overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
|
||||
if overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
|
||||
# These use auth stores, not env vars — check for auth.json entries
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
providers_store = store.get("providers", {})
|
||||
pool_store = store.get("credential_pool", {})
|
||||
if store and (
|
||||
pid in providers_store or hermes_slug in providers_store
|
||||
or pid in pool_store or hermes_slug in pool_store
|
||||
):
|
||||
from hermes_cli.auth import _read_auth_store
|
||||
store = _read_auth_store()
|
||||
if store and pid in store:
|
||||
has_creds = True
|
||||
except Exception as exc:
|
||||
logger.debug("Auth store check failed for %s: %s", pid, exc)
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
# Use curated list
|
||||
model_ids = curated.get(pid, [])
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
results.append({
|
||||
"slug": hermes_slug,
|
||||
"name": get_label(hermes_slug),
|
||||
"is_current": hermes_slug == current_provider or pid == current_provider,
|
||||
"slug": pid,
|
||||
"name": get_label(pid),
|
||||
"is_current": pid == current_provider,
|
||||
"is_user_defined": False,
|
||||
"models": top,
|
||||
"total_models": total,
|
||||
"source": "hermes",
|
||||
})
|
||||
seen_slugs.add(pid)
|
||||
seen_slugs.add(hermes_slug)
|
||||
|
||||
# --- 3. User-defined endpoints from config ---
|
||||
if user_providers and isinstance(user_providers, dict):
|
||||
@@ -899,46 +848,80 @@ def list_authenticated_providers(
|
||||
"api_url": api_url,
|
||||
})
|
||||
|
||||
# --- 4. Saved custom providers from config ---
|
||||
if custom_providers and isinstance(custom_providers, list):
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
|
||||
display_name = (entry.get("name") or "").strip()
|
||||
api_url = (
|
||||
entry.get("base_url", "")
|
||||
or entry.get("url", "")
|
||||
or entry.get("api", "")
|
||||
or ""
|
||||
).strip()
|
||||
if not display_name or not api_url:
|
||||
continue
|
||||
|
||||
slug = custom_provider_slug(display_name)
|
||||
if slug in seen_slugs:
|
||||
continue
|
||||
|
||||
models_list = []
|
||||
default_model = (entry.get("model") or "").strip()
|
||||
if default_model:
|
||||
models_list.append(default_model)
|
||||
|
||||
results.append({
|
||||
"slug": slug,
|
||||
"name": display_name,
|
||||
"is_current": slug == current_provider,
|
||||
"is_user_defined": True,
|
||||
"models": models_list,
|
||||
"total_models": len(models_list),
|
||||
"source": "user-config",
|
||||
"api_url": api_url,
|
||||
})
|
||||
seen_slugs.add(slug)
|
||||
|
||||
# Sort: current provider first, then by model count descending
|
||||
results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fuzzy suggestions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def suggest_models(raw_input: str, limit: int = 3) -> List[str]:
|
||||
"""Return fuzzy model suggestions for a (possibly misspelled) input."""
|
||||
query = raw_input.strip()
|
||||
if not query:
|
||||
return []
|
||||
|
||||
results = search_models_dev(query, limit=limit)
|
||||
suggestions: list[str] = []
|
||||
for r in results:
|
||||
mid = r.get("model_id", "")
|
||||
if mid:
|
||||
suggestions.append(mid)
|
||||
|
||||
return suggestions[:limit]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Custom provider switch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def switch_to_custom_provider() -> CustomAutoResult:
|
||||
"""Handle bare '/model --provider custom' — resolve endpoint and auto-detect model."""
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
_auto_detect_local_model,
|
||||
)
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested="custom")
|
||||
except Exception as e:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
error_message=f"Could not resolve custom endpoint: {e}",
|
||||
)
|
||||
|
||||
cust_base = runtime.get("base_url", "")
|
||||
cust_key = runtime.get("api_key", "")
|
||||
|
||||
if not cust_base or "openrouter.ai" in cust_base:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
error_message=(
|
||||
"No custom endpoint configured. "
|
||||
"Set model.base_url in config.yaml, or set OPENAI_BASE_URL "
|
||||
"in .env, or run: hermes setup -> Custom OpenAI-compatible endpoint"
|
||||
),
|
||||
)
|
||||
|
||||
detected_model = _auto_detect_local_model(cust_base)
|
||||
if not detected_model:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
base_url=cust_base,
|
||||
api_key=cust_key,
|
||||
error_message=(
|
||||
f"Custom endpoint at {cust_base} is reachable but no single "
|
||||
f"model was auto-detected. Specify the model explicitly: "
|
||||
f"/model <model-name> --provider custom"
|
||||
),
|
||||
)
|
||||
|
||||
return CustomAutoResult(
|
||||
success=True,
|
||||
model=detected_model,
|
||||
base_url=cust_base,
|
||||
api_key=cust_key,
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user