Compare commits

..

9 Commits

Author SHA1 Message Date
Sam Herring dff5481e58 Eval splits for holdout sets 2026-03-03 14:42:45 -05:00
Sam Herring fe17b5ff08 Changing return type to be ScoredDataGroup to account for multiple trajectories 2026-03-02 11:35:06 -08:00
Sam Herring 6fdb38ed29 Added task sppecific metris and evals 2026-02-27 11:20:18 -08:00
Sam Herring b7e713b101 Wandb changes 2026-02-26 10:41:24 -08:00
Sam Herring 0e694b954a Updating config 2026-02-24 19:23:05 -08:00
Sam Herring c12e46cf13 Adding config init method 2026-02-24 19:19:39 -08:00
Sam Herring b93ad43191 Updating path vars and dataset loading 2026-02-24 18:58:19 -08:00
Sam Herring f1c2f8a414 Updating to use hermes-agent backend and parse container definition out of provided .sif files 2026-02-24 16:35:18 -08:00
Sam Herring 9139eeaa60 Adding endless terminal environment after rebase: 2026-02-17 16:56:17 -08:00
516 changed files with 13233 additions and 132071 deletions
+25 -40
View File
@@ -10,7 +10,7 @@
OPENROUTER_API_KEY=
# Default model to use (OpenRouter format: provider/model)
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
LLM_MODEL=anthropic/claude-opus-4.6
# =============================================================================
@@ -29,26 +29,21 @@ NOUS_API_KEY=
# Get at: https://fal.ai/
FAL_KEY=
# Honcho - Cross-session AI-native user modeling (optional)
# Builds a persistent understanding of the user across sessions and tools.
# Get at: https://app.honcho.dev
# Also requires ~/.honcho/config.json with enabled=true (see README).
HONCHO_API_KEY=
# =============================================================================
# TERMINAL TOOL CONFIGURATION (mini-swe-agent backend)
# =============================================================================
# Backend type: "local", "singularity", "docker", "modal", or "ssh"
# Terminal backend is configured in ~/.hermes/config.yaml (terminal.backend).
# Use 'hermes setup' or 'hermes config set terminal.backend docker' to change.
# Supported: local, docker, singularity, modal, ssh
#
# Only override here if you need to force a backend without touching config.yaml:
# TERMINAL_ENV=local
# - local: Runs directly on your machine (fastest, no isolation)
# - ssh: Runs on remote server via SSH (great for sandboxing - agent can't touch its own code)
# - singularity: Runs in Apptainer/Singularity containers (HPC clusters, no root needed)
# - docker: Runs in Docker containers (isolated, requires Docker + docker group)
# - modal: Runs in Modal cloud sandboxes (scalable, requires Modal account)
TERMINAL_ENV=local
# Container images (for singularity/docker/modal backends)
# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
# TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
@@ -152,9 +147,9 @@ BROWSER_INACTIVITY_TIMEOUT=120
# =============================================================================
# Required for voice message transcription (Whisper) and OpenAI TTS voices.
# Uses OpenAI's API directly (not via OpenRouter).
# Named VOICE_TOOLS_OPENAI_KEY to avoid interference with OpenRouter.
# Named HERMES_OPENAI_API_KEY to avoid interference with OpenRouter.
# Get at: https://platform.openai.com/api-keys
VOICE_TOOLS_OPENAI_KEY=
HERMES_OPENAI_API_KEY=
# =============================================================================
# SLACK INTEGRATION
@@ -169,14 +164,6 @@ VOICE_TOOLS_OPENAI_KEY=
# Slack allowed users (comma-separated Slack user IDs)
# SLACK_ALLOWED_USERS=
# WhatsApp (built-in Baileys bridge — run `hermes whatsapp` to pair)
# WHATSAPP_ENABLED=false
# WHATSAPP_ALLOWED_USERS=15551234567
# Gateway-wide: allow ALL users without an allowlist (default: false = deny)
# Only set to true if you intentionally want open access.
# GATEWAY_ALLOW_ALL_USERS=false
# =============================================================================
# RESPONSE PACING
# =============================================================================
@@ -186,6 +173,18 @@ VOICE_TOOLS_OPENAI_KEY=
# HERMES_HUMAN_DELAY_MIN_MS=800 # Min delay in ms (custom mode)
# HERMES_HUMAN_DELAY_MAX_MS=2500 # Max delay in ms (custom mode)
# =============================================================================
# LEGACY/OPTIONAL API KEYS
# =============================================================================
# Morph API Key - For legacy Hecate terminal backend (terminal-hecate tool)
# Get at: https://morph.so/
MORPH_API_KEY=
# Hecate VM Settings (only if using terminal-hecate tool)
HECATE_VM_LIFETIME_SECONDS=300
HECATE_DEFAULT_SNAPSHOT_ID=snapshot_p5294qxt
# =============================================================================
# DEBUG OPTIONS
# =============================================================================
@@ -200,10 +199,9 @@ IMAGE_TOOLS_DEBUG=false
# When conversation approaches model's context limit, middle turns are
# automatically summarized to free up space.
#
# Context compression is configured in ~/.hermes/config.yaml under compression:
# CONTEXT_COMPRESSION_ENABLED=true # Enable auto-compression (default: true)
# CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit
# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001 # Fast model for summaries
# =============================================================================
# RL TRAINING (Tinker + Atropos)
@@ -222,16 +220,3 @@ WANDB_API_KEY=
# RL API Server URL (default: http://localhost:8080)
# Change if running the rl-server on a different host/port
# RL_API_URL=http://localhost:8080
# =============================================================================
# SKILLS HUB (GitHub integration for skill search/install/publish)
# =============================================================================
# GitHub Personal Access Token — for higher API rate limits on skill search/install
# Get at: https://github.com/settings/tokens (Fine-grained recommended)
# GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
# GitHub App credentials (optional — for bot identity on PRs)
# GITHUB_APP_ID=
# GITHUB_APP_PRIVATE_KEY_PATH=
# GITHUB_APP_INSTALLATION_ID=
-144
View File
@@ -1,144 +0,0 @@
name: "🐛 Bug Report"
description: Report a bug — something that's broken, crashes, or behaves incorrectly.
title: "[Bug]: "
labels: ["bug"]
body:
- type: markdown
attributes:
value: |
Thanks for reporting a bug! Please fill out the sections below so we can reproduce and fix it quickly.
**Before submitting**, please:
- [ ] Search [existing issues](https://github.com/NousResearch/hermes-agent/issues) to avoid duplicates
- [ ] Update to the latest version (`hermes update`) and confirm the bug still exists
- type: textarea
id: description
attributes:
label: Bug Description
description: A clear description of what's broken. Include error messages, tracebacks, or screenshots if relevant.
placeholder: |
What happened? What did you expect to happen instead?
validations:
required: true
- type: textarea
id: reproduction
attributes:
label: Steps to Reproduce
description: Minimal steps to trigger the bug. The more specific, the faster we can fix it.
placeholder: |
1. Run `hermes chat`
2. Send the message "..."
3. Agent calls tool X
4. Error appears: ...
validations:
required: true
- type: textarea
id: expected
attributes:
label: Expected Behavior
description: What should have happened instead?
validations:
required: true
- type: textarea
id: actual
attributes:
label: Actual Behavior
description: What actually happened? Include full error output if available.
validations:
required: true
- type: dropdown
id: component
attributes:
label: Affected Component
description: Which part of Hermes is affected?
multiple: true
options:
- CLI (interactive chat)
- Gateway (Telegram/Discord/Slack/WhatsApp)
- Setup / Installation
- Tools (terminal, file ops, web, code execution, etc.)
- Skills (skill loading, skill hub, skill guard)
- Agent Core (conversation loop, context compression, memory)
- Configuration (config.yaml, .env, hermes setup)
- Other
validations:
required: true
- type: dropdown
id: platform
attributes:
label: Messaging Platform (if gateway-related)
description: Which platform adapter is affected?
multiple: true
options:
- N/A (CLI only)
- Telegram
- Discord
- Slack
- WhatsApp
- type: input
id: os
attributes:
label: Operating System
description: e.g. Ubuntu 24.04, macOS 15.2, Windows 11
placeholder: Ubuntu 24.04
validations:
required: true
- type: input
id: python-version
attributes:
label: Python Version
description: Output of `python --version`
placeholder: "3.11.9"
validations:
required: true
- type: input
id: hermes-version
attributes:
label: Hermes Version
description: Output of `hermes version`
placeholder: "2.1.0"
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant Logs / Traceback
description: Paste any error output, traceback, or log messages. This will be auto-formatted as code.
render: shell
- type: textarea
id: root-cause
attributes:
label: Root Cause Analysis (optional)
description: |
If you've dug into the code and identified the root cause, share it here.
Include file paths, line numbers, and code snippets if possible. This massively speeds up fixes.
placeholder: |
The bug is in `gateway/run.py` line 949. `len(history)` counts session_meta entries
but `agent_messages` was built from filtered history...
- type: textarea
id: proposed-fix
attributes:
label: Proposed Fix (optional)
description: If you have a fix in mind (or a PR ready), describe it here.
placeholder: |
Replace `.get()` with `.pop()` on line 289 of `gateway/platforms/base.py`
to actually clear the pending message after retrieval.
- type: checkboxes
id: pr-ready
attributes:
label: Are you willing to submit a PR for this?
options:
- label: I'd like to fix this myself and submit a PR
-11
View File
@@ -1,11 +0,0 @@
blank_issues_enabled: true
contact_links:
- name: 💬 Nous Research Discord
url: https://discord.gg/NousResearch
about: For quick questions, showcasing projects, sharing skills, and community chat.
- name: 📖 Documentation
url: https://github.com/NousResearch/hermes-agent/blob/main/README.md
about: Check the README and docs before opening an issue.
- name: 🤝 Contributing Guide
url: https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md
about: Read this before submitting a PR.
@@ -1,73 +0,0 @@
name: "✨ Feature Request"
description: Suggest a new feature or improvement.
title: "[Feature]: "
labels: ["enhancement"]
body:
- type: markdown
attributes:
value: |
Thanks for the suggestion! Before submitting, please consider:
- **Is this a new skill?** Most capabilities should be [skills, not tools](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#should-it-be-a-skill-or-a-tool). If it's a specialized integration (crypto, NFT, niche SaaS), it belongs on the Skills Hub, not bundled.
- **Search [existing issues](https://github.com/NousResearch/hermes-agent/issues)** — someone may have already proposed this.
- type: textarea
id: problem
attributes:
label: Problem or Use Case
description: What problem does this solve? What are you trying to do that you can't today?
placeholder: |
I'm trying to use Hermes with [provider/platform/workflow] but currently
there's no way to...
validations:
required: true
- type: textarea
id: solution
attributes:
label: Proposed Solution
description: How do you think this should work? Be as specific as you can — CLI flags, config options, UI behavior.
placeholder: |
Add a `--foo` flag to `hermes chat` that enables...
Or: Add a config key `bar.baz` that controls...
validations:
required: true
- type: textarea
id: alternatives
attributes:
label: Alternatives Considered
description: What other approaches did you consider? Why is the proposed solution better?
- type: dropdown
id: type
attributes:
label: Feature Type
options:
- New tool
- New bundled skill
- CLI improvement
- Gateway / messaging improvement
- Configuration option
- Performance / reliability
- Developer experience (tests, docs, CI)
- Other
validations:
required: true
- type: dropdown
id: scope
attributes:
label: Scope
description: How big is this change?
options:
- Small (single file, < 50 lines)
- Medium (few files, < 300 lines)
- Large (new module or significant refactor)
- type: checkboxes
id: pr-ready
attributes:
label: Contribution
options:
- label: I'd like to implement this myself and submit a PR
-100
View File
@@ -1,100 +0,0 @@
name: "🔧 Setup / Installation Help"
description: Having trouble installing or configuring Hermes? Ask here.
title: "[Setup]: "
labels: ["setup"]
body:
- type: markdown
attributes:
value: |
Sorry you're having trouble! Please fill out the details below so we can help.
**Quick checks first:**
- Run `hermes doctor` and include the output below
- Try `hermes update` to get the latest version
- Check the [README troubleshooting section](https://github.com/NousResearch/hermes-agent#troubleshooting)
- For general questions, consider the [Nous Research Discord](https://discord.gg/NousResearch) for faster help
- type: textarea
id: description
attributes:
label: What's Going Wrong?
description: Describe what you're trying to do and where it fails.
placeholder: |
I ran `hermes setup` and selected Nous Portal, but when I try to
start the gateway I get...
validations:
required: true
- type: textarea
id: steps
attributes:
label: Steps Taken
description: What did you do? Include the exact commands you ran.
placeholder: |
1. Ran the install script: `curl -fsSL ... | bash`
2. Ran `hermes setup` and chose "Quick setup"
3. Selected OpenRouter, entered API key
4. Ran `hermes chat` and got error...
validations:
required: true
- type: dropdown
id: install-method
attributes:
label: Installation Method
options:
- Install script (curl | bash)
- Manual clone + pip/uv install
- PowerShell installer (Windows)
- Docker
- Other
validations:
required: true
- type: input
id: os
attributes:
label: Operating System
placeholder: Ubuntu 24.04 / macOS 15.2 / Windows 11
validations:
required: true
- type: input
id: python-version
attributes:
label: Python Version
description: Output of `python --version` (or `python3 --version`)
placeholder: "3.11.9"
- type: input
id: hermes-version
attributes:
label: Hermes Version
description: Output of `hermes version` (if install got that far)
placeholder: "2.1.0"
- type: textarea
id: doctor-output
attributes:
label: Output of `hermes doctor`
description: Run `hermes doctor` and paste the full output. This will be auto-formatted.
render: shell
- type: textarea
id: error-output
attributes:
label: Full Error Output
description: Paste the complete error message or traceback. This will be auto-formatted.
render: shell
validations:
required: true
- type: textarea
id: tried
attributes:
label: What I've Already Tried
description: List any fixes or workarounds you've already attempted.
placeholder: |
- Ran `hermes update`
- Tried reinstalling with `pip install -e ".[all]"`
- Checked that OPENROUTER_API_KEY is set in ~/.hermes/.env
-75
View File
@@ -1,75 +0,0 @@
## What does this PR do?
<!-- Describe the change clearly. What problem does it solve? Why is this approach the right one? -->
## Related Issue
<!-- Link the issue this PR addresses. If no issue exists, consider creating one first. -->
Fixes #
## Type of Change
<!-- Check the one that applies. -->
- [ ] 🐛 Bug fix (non-breaking change that fixes an issue)
- [ ] ✨ New feature (non-breaking change that adds functionality)
- [ ] 🔒 Security fix
- [ ] 📝 Documentation update
- [ ] ✅ Tests (adding or improving test coverage)
- [ ] ♻️ Refactor (no behavior change)
- [ ] 🎯 New skill (bundled or hub)
## Changes Made
<!-- List the specific changes. Include file paths for code changes. -->
-
## How to Test
<!-- Steps to verify this change works. For bugs: reproduction steps + proof that the fix works. -->
1.
2.
3.
## Checklist
<!-- Complete these before requesting review. -->
### Code
- [ ] I've read the [Contributing Guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md)
- [ ] My commit messages follow [Conventional Commits](https://www.conventionalcommits.org/) (`fix(scope):`, `feat(scope):`, etc.)
- [ ] I searched for [existing PRs](https://github.com/NousResearch/hermes-agent/pulls) to make sure this isn't a duplicate
- [ ] My PR contains **only** changes related to this fix/feature (no unrelated commits)
- [ ] I've run `pytest tests/ -q` and all tests pass
- [ ] I've added tests for my changes (required for bug fixes, strongly encouraged for features)
- [ ] I've tested on my platform: <!-- e.g. Ubuntu 24.04, macOS 15.2, Windows 11 -->
### Documentation & Housekeeping
<!-- Check all that apply. It's OK to check "N/A" if a category doesn't apply to your change. -->
- [ ] I've updated relevant documentation (README, `docs/`, docstrings) — or N/A
- [ ] I've updated `cli-config.yaml.example` if I added/changed config keys — or N/A
- [ ] I've updated `CONTRIBUTING.md` or `AGENTS.md` if I changed architecture or workflows — or N/A
- [ ] I've considered cross-platform impact (Windows, macOS) per the [compatibility guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#cross-platform-compatibility) — or N/A
- [ ] I've updated tool descriptions/schemas if I changed tool behavior — or N/A
## For New Skills
<!-- Only fill this out if you're adding a skill. Delete this section otherwise. -->
- [ ] This skill is **broadly useful** to most users (if bundled) — see [Contributing Guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#should-the-skill-be-bundled)
- [ ] SKILL.md follows the [standard format](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#skillmd-format) (frontmatter, trigger conditions, steps, pitfalls)
- [ ] No external dependencies that aren't already available (prefer stdlib, curl, existing Hermes tools)
- [ ] I've tested the skill end-to-end: `hermes --toolsets skills -q "Use the X skill to do Y"`
## Screenshots / Logs
<!-- If applicable, add screenshots or log output showing the fix/feature in action. -->
-60
View File
@@ -1,60 +0,0 @@
name: Deploy Site
on:
push:
branches: [main]
paths:
- 'website/**'
- 'landingpage/**'
- '.github/workflows/deploy-site.yml'
workflow_dispatch:
permissions:
pages: write
id-token: write
concurrency:
group: pages
cancel-in-progress: false
jobs:
build-and-deploy:
runs-on: ubuntu-latest
environment:
name: github-pages
url: ${{ steps.deploy.outputs.page_url }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
cache-dependency-path: website/package-lock.json
- name: Install dependencies
run: npm ci
working-directory: website
- name: Build Docusaurus
run: npm run build
working-directory: website
- name: Stage deployment
run: |
mkdir -p _site/docs
# Landing page at root
cp -r landingpage/* _site/
# Docusaurus at /docs/
cp -r website/build/* _site/docs/
# CNAME so GitHub Pages keeps the custom domain between deploys
echo "hermes-agent.nousresearch.com" > _site/CNAME
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: _site
- name: Deploy to GitHub Pages
id: deploy
uses: actions/deploy-pages@v4
-42
View File
@@ -1,42 +0,0 @@
name: Tests
on:
push:
branches: [main]
pull_request:
branches: [main]
# Cancel in-progress runs for the same PR/branch
concurrency:
group: tests-${{ github.ref }}
cancel-in-progress: true
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Set up Python 3.11
run: uv python install 3.11
- name: Install dependencies
run: |
uv venv .venv --python 3.11
source .venv/bin/activate
uv pip install -e ".[all,dev]"
- name: Run tests
run: |
source .venv/bin/activate
python -m pytest tests/ -q --ignore=tests/integration --tb=short
env:
# Ensure tests don't accidentally call real APIs
OPENROUTER_API_KEY: ""
OPENAI_API_KEY: ""
NOUS_API_KEY: ""
+2 -4
View File
@@ -1,5 +1,7 @@
/venv/
/_pycache/
hecate/
hecate-lib/
*.pyc*
__pycache__/
.venv/
@@ -44,7 +46,3 @@ testlogs
# CLI config (may contain sensitive SSH paths)
cli-config.yaml
# Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case)
skills/.hub/
ignored/
+84 -160
View File
@@ -2,7 +2,7 @@
Instructions for AI coding assistants (GitHub Copilot, Cursor, etc.) and human developers.
Hermes Agent is an AI agent harness with tool-calling capabilities, interactive CLI, messaging integrations, and scheduled tasks.
Hermes-Agent is an AI agent harness with tool-calling capabilities, interactive CLI, messaging integrations, and scheduled tasks.
## Development Environment
@@ -15,51 +15,29 @@ source venv/bin/activate # Before running any Python commands
```
hermes-agent/
├── agent/ # Agent internals (extracted from run_agent.py)
│ ├── model_metadata.py # Model context lengths, token estimation
│ ├── context_compressor.py # Auto context compression
│ ├── prompt_caching.py # Anthropic prompt caching
│ ├── prompt_builder.py # System prompt assembly (identity, skills index, context files)
│ ├── display.py # KawaiiSpinner, tool preview formatting
│ └── trajectory.py # Trajectory saving helpers
├── hermes_cli/ # CLI implementation
├── hermes_cli/ # Unified CLI commands
│ ├── main.py # Entry point, command dispatcher
│ ├── banner.py # Welcome banner, ASCII art, skills summary
│ ├── commands.py # Slash command definitions + autocomplete
│ ├── callbacks.py # Interactive prompt callbacks (clarify, sudo, approval)
│ ├── setup.py # Interactive setup wizard
│ ├── config.py # Config management & migration
│ ├── status.py # Status display
│ ├── doctor.py # Diagnostics
│ ├── gateway.py # Gateway management
│ ├── uninstall.py # Uninstaller
── cron.py # Cron job management
│ └── skills_hub.py # Skills Hub CLI + /skills slash command
── cron.py # Cron job management
├── tools/ # Tool implementations
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
│ ├── approval.py # Dangerous command detection + per-session approval
│ ├── environments/ # Terminal execution backends
│ │ ├── base.py # BaseEnvironment ABC
│ │ ├── local.py # Local execution with interrupt support
│ │ ├── docker.py # Docker container execution
│ │ ├── ssh.py # SSH remote execution
│ │ ├── singularity.py # Singularity/Apptainer + SIF management
│ │ ├── modal.py # Modal cloud execution
│ │ └── daytona.py # Daytona cloud sandboxes
│ ├── terminal_tool.py # Terminal orchestration (sudo, lifecycle, factory)
│ ├── todo_tool.py # Planning & task management
│ ├── process_registry.py # Background process management
│ └── ... # Other tool files
│ ├── process_registry.py # Background process management (spawn, poll, wait, kill)
│ ├── transcription_tools.py # Speech-to-text (Whisper API)
├── gateway/ # Messaging platform adapters
│ ├── platforms/ # Platform-specific adapters (telegram, discord, slack, whatsapp)
── ...
│ ├── pairing.py # DM pairing code system
── hooks.py # Event hook system
│ ├── sticker_cache.py # Telegram sticker vision cache
│ ├── platforms/
│ │ └── slack.py # Slack adapter (slack-bolt)
├── cron/ # Scheduler implementation
├── environments/ # RL training environments (Atropos integration)
├── skills/ # Bundled skill sources
├── optional-skills/ # Official optional skills (not activated by default)
├── cli.py # Interactive CLI orchestrator (HermesCLI class)
├── run_agent.py # AIAgent class (core conversation loop)
├── model_tools.py # Tool orchestration (thin layer over tools/registry.py)
├── skills/ # Knowledge documents
├── cli.py # Interactive CLI (Rich UI)
├── run_agent.py # Agent runner with AIAgent class
├── model_tools.py # Tool schemas and handlers
├── toolsets.py # Tool groupings
├── toolset_distributions.py # Probability-based tool selection
└── batch_runner.py # Parallel batch processing
@@ -77,16 +55,14 @@ hermes-agent/
## File Dependency Chain
```
tools/registry.py (no deps — imported by all tool files)
tools/*.py (each calls registry.register() at import time)
model_tools.py (imports tools/registry + triggers tool discovery)
run_agent.py, cli.py, batch_runner.py, environments/
tools/*.py → tools/__init__.py → model_tools.py → toolsets.py → toolset_distributions.py
run_agent.py ──────────────────────────┘
cli.py → run_agent.py (uses AIAgent with quiet_mode=True)
batch_runner.py → run_agent.py + toolset_distributions.py
```
Each tool file co-locates its schema, handler, and registration. `model_tools.py` is a thin orchestration layer.
Always ensure consistency between tools, model_tools.py, and toolsets.py when changing any of them.
---
@@ -175,41 +151,17 @@ For models that support chain-of-thought reasoning:
The interactive CLI uses:
- **Rich** - For the welcome banner and styled panels
- **prompt_toolkit** - For fixed input area with history, `patch_stdout`, slash command autocomplete, and floating completion menus
- **KawaiiSpinner** (in run_agent.py) - Animated kawaii faces during API calls; clean `┊` activity feed for tool execution results
- **prompt_toolkit** - For fixed input area with history and `patch_stdout`
- **KawaiiSpinner** (in run_agent.py) - Animated feedback during API calls and tool execution
Key components:
- `HermesCLI` class - Main CLI controller with commands and conversation loop
- `SlashCommandCompleter` - Autocomplete dropdown for `/commands` (type `/` to see all)
- `agent/skill_commands.py` - Scans skills and builds invocation messages (shared with gateway)
- `load_cli_config()` - Loads config, sets environment variables for terminal
- `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
CLI UX notes:
- Thinking spinner (during LLM API call) shows animated kawaii face + verb (`(⌐■_■) deliberating...`)
- When LLM returns tool calls, the spinner clears silently (no "got it!" noise)
- Tool execution results appear as a clean activity feed: `┊ {emoji} {verb} {detail} {duration}`
- "got it!" only appears when the LLM returns a final text response (`⚕ ready`)
- The prompt shows `⚕ ` when the agent is working, `` when idle
- Pasting 5+ lines auto-saves to `~/.hermes/pastes/` and collapses to a reference
- Multi-line input via Alt+Enter or Ctrl+J
- `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
- `/skill-name` - Invoke installed skills directly (e.g., `/axolotl`, `/gif-search`)
CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging.
### Skill Slash Commands
Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command.
The skill name (from frontmatter or folder name) becomes the command: `axolotl``/axolotl`.
Implementation (`agent/skill_commands.py`, shared between CLI and gateway):
1. `scan_skill_commands()` scans all SKILL.md files at startup
2. `build_skill_invocation_message()` loads the SKILL.md content and builds a user-turn message
3. The message includes the full skill content, a list of supporting files (not loaded), and the user's instruction
4. Supporting files can be loaded on demand via the `skill_view` tool
5. Injected as a **user message** (not system prompt) to preserve prompt caching
### Adding CLI Commands
1. Add to `COMMANDS` dict with description
@@ -236,11 +188,8 @@ The unified `hermes` command provides all functionality:
| `hermes doctor` | Diagnose issues |
| `hermes update` | Update to latest (checks for new config) |
| `hermes uninstall` | Uninstall (can keep configs for reinstall) |
| `hermes gateway` | Start gateway (messaging + cron scheduler) |
| `hermes gateway setup` | Configure messaging platforms interactively |
| `hermes gateway install` | Install gateway as system service |
| `hermes gateway` | Start messaging gateway |
| `hermes cron list` | View scheduled jobs |
| `hermes cron status` | Check if cron scheduler is running |
| `hermes version` | Show version info |
| `hermes pairing list/approve/revoke` | Manage DM pairing codes |
@@ -248,19 +197,7 @@ The unified `hermes` command provides all functionality:
## Messaging Gateway
The gateway connects Hermes to Telegram, Discord, Slack, and WhatsApp.
### Setup
The interactive setup wizard handles platform configuration:
```bash
hermes gateway setup # Arrow-key menu of all platforms, configure tokens/allowlists/home channels
```
This is the recommended way to configure messaging. It shows which platforms are already set up, walks through each one interactively, and offers to start/restart the gateway service at the end.
Platforms can also be configured manually in `~/.hermes/.env`:
The gateway connects Hermes to Telegram, Discord, and WhatsApp.
### Configuration (in `~/.hermes/.env`):
@@ -277,7 +214,9 @@ DISCORD_ALLOWED_USERS=123456789012345678 # Comma-separated user IDs
HERMES_MAX_ITERATIONS=60 # Max tool-calling iterations
MESSAGING_CWD=/home/myuser # Terminal working directory for messaging
# Tool progress is configured in config.yaml (display.tool_progress: off|new|all|verbose)
# Tool Progress (optional)
HERMES_TOOL_PROGRESS=true # Send progress messages
HERMES_TOOL_PROGRESS_MODE=new # "new" or "all"
```
### Working Directory Behavior
@@ -289,11 +228,11 @@ This is intentional: CLI users are in a terminal and expect the agent to work in
### Security (User Allowlists):
**IMPORTANT**: By default, the gateway denies all users who are not in an allowlist or paired via DM.
**IMPORTANT**: Without an allowlist, anyone who finds your bot can use it!
The gateway checks `{PLATFORM}_ALLOWED_USERS` environment variables:
- If set: Only listed user IDs can interact with the bot
- If unset: All users are denied unless `GATEWAY_ALLOW_ALL_USERS=true` is set
- If unset: All users are allowed (dangerous with terminal access!)
Users can find their IDs:
- **Telegram**: Message [@userinfobot](https://t.me/userinfobot)
@@ -328,13 +267,10 @@ Files: `gateway/hooks.py`
### Tool Progress Notifications
When `tool_progress` is enabled in `config.yaml`, the bot sends status messages as it works:
When `HERMES_TOOL_PROGRESS=true`, the bot sends status messages as it works:
- `💻 \`ls -la\`...` (terminal commands show the actual command)
- `🔍 web_search...`
- `📄 web_extract...`
- `🐍 execute_code...` (programmatic tool calling sandbox)
- `🔀 delegate_task...` (subagent delegation)
- `❓ clarify...` (user question, CLI-only)
Modes:
- `new`: Only when switching to a different tool (less spam)
@@ -423,25 +359,23 @@ The system uses `_config_version` to detect outdated configs:
API keys are loaded from `~/.hermes/.env`:
- `OPENROUTER_API_KEY` - Main LLM API access (primary provider)
- `FIRECRAWL_API_KEY` - Web search/extract tools
- `FIRECRAWL_API_URL` - Self-hosted Firecrawl endpoint (optional)
- `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` - Browser automation
- `FAL_KEY` - Image generation (FLUX model)
- `NOUS_API_KEY` - Vision and Mixture-of-Agents tools
Terminal tool configuration (in `~/.hermes/config.yaml`):
- `terminal.backend` - Backend: local, docker, singularity, modal, daytona, or ssh
- `terminal.backend` - Backend: local, docker, singularity, modal, or ssh
- `terminal.cwd` - Working directory ("." = host CWD for local only; for remote backends set an absolute path inside the target, or omit to use the backend's default)
- `terminal.docker_image` - Image for Docker backend
- `terminal.singularity_image` - Image for Singularity backend
- `terminal.modal_image` - Image for Modal backend
- `terminal.daytona_image` - Image for Daytona backend
- `DAYTONA_API_KEY` - API key for Daytona backend (in .env)
- SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env
Agent behavior (in `~/.hermes/.env`):
- `HERMES_MAX_ITERATIONS` - Max tool-calling iterations (default: 60)
- `MESSAGING_CWD` - Working directory for messaging platforms (default: ~)
- `display.tool_progress` in config.yaml - Tool progress: `off`, `new`, `all`, `verbose`
- `HERMES_TOOL_PROGRESS` - Enable tool progress messages (`true`/`false`)
- `HERMES_TOOL_PROGRESS_MODE` - Progress mode: `new` (tool changes) or `all`
- `OPENAI_API_KEY` - Voice transcription (Whisper STT)
- `SLACK_BOT_TOKEN` / `SLACK_APP_TOKEN` - Slack integration (Socket Mode)
- `SLACK_ALLOWED_USERS` - Comma-separated Slack user IDs
@@ -499,28 +433,53 @@ terminal(command="pytest -v tests/", background=true)
- `process(action="submit", session_id="proc_abc123", data="yes")` -- send + Enter
**Key behaviors:**
- Background processes execute through the configured terminal backend (local/Docker/Modal/Daytona/SSH/Singularity) -- never directly on the host unless `TERMINAL_ENV=local`
- Background processes execute through the configured terminal backend (local/Docker/Modal/SSH/Singularity) -- never directly on the host unless `TERMINAL_ENV=local`
- The `wait` action blocks the tool call until the process finishes, times out, or is interrupted by a new user message
- PTY mode (`pty=true` on terminal) enables interactive CLI tools (Codex, Claude Code)
- In RL training, background processes are auto-killed when the episode ends (`tool_context.cleanup()`)
- In the gateway, sessions with active background processes are exempt from idle reset
- The process registry checkpoints to `~/.hermes/processes.json` for crash recovery
Files: `tools/process_registry.py` (registry + handler), `tools/terminal_tool.py` (spawn integration)
Files: `tools/process_registry.py` (registry), `model_tools.py` (tool definition + handler), `tools/terminal_tool.py` (spawn integration)
---
## Adding New Tools
Adding a tool requires changes in **2 files** (the tool file and `toolsets.py`):
Follow this strict order to maintain consistency:
1. **Create `tools/your_tool.py`** with handler, schema, check function, and registry call:
1. Create `tools/your_tool.py` with:
- Handler function (sync or async) returning a JSON string via `json.dumps()`
- `check_*_requirements()` function to verify dependencies (e.g., API keys)
- Schema definition following OpenAI function-calling format
2. Export in `tools/__init__.py`:
- Import the handler and check function
- Add to `__all__` list
3. Register in `model_tools.py`:
- Add to `TOOLSET_REQUIREMENTS` if it needs API keys
- Create `get_*_tool_definitions()` function or add to existing
- Add routing in `handle_function_call()` dispatcher
- Update `get_all_tool_names()` with the tool name
- Update `get_toolset_for_tool()` mapping
- Update `get_available_toolsets()` and `check_toolset_requirements()`
4. Add to toolset in `toolsets.py`:
- Add to existing toolset or create new one in TOOLSETS dict
5. If the tool requires an API key:
- Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py`
- The tool will be auto-disabled if the key is missing
6. Optionally add to `toolset_distributions.py` for batch processing
### Tool Implementation Pattern
```python
# tools/example_tool.py
import json
import os
from tools.registry import registry
def check_example_requirements() -> bool:
"""Check if required API keys/dependencies are available."""
@@ -533,46 +492,24 @@ def example_tool(param: str, task_id: str = None) -> str:
return json.dumps(result, ensure_ascii=False)
except Exception as e:
return json.dumps({"error": str(e)}, ensure_ascii=False)
EXAMPLE_SCHEMA = {
"name": "example_tool",
"description": "Does something useful.",
"parameters": {
"type": "object",
"properties": {
"param": {"type": "string", "description": "The parameter"}
},
"required": ["param"]
}
}
registry.register(
name="example_tool",
toolset="example",
schema=EXAMPLE_SCHEMA,
handler=lambda args, **kw: example_tool(
param=args.get("param", ""), task_id=kw.get("task_id")),
check_fn=check_example_requirements,
requires_env=["EXAMPLE_API_KEY"],
)
```
2. **Add to `toolsets.py`**: Add `"example_tool"` to `_HERMES_CORE_TOOLS` if it should be in all platform toolsets, or create a new toolset entry.
3. **Add discovery import** in `model_tools.py`'s `_discover_tools()` list: `"tools.example_tool"`.
That's it. The registry handles schema collection, dispatch, availability checking, and error wrapping automatically. No edits to `TOOLSET_REQUIREMENTS`, `handle_function_call()`, `get_all_tool_names()`, or any other data structure.
**Optional:** Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` for the setup wizard, and to `toolset_distributions.py` for batch processing.
**Special case: tools that need agent-level state** (like `todo`, `memory`):
These are intercepted by `run_agent.py`'s tool dispatch loop *before* `handle_function_call()`. The registry still holds their schemas, but dispatch returns a stub error as a safety fallback. See `todo_tool.py` for the pattern.
All tool handlers MUST return a JSON string. The registry's `dispatch()` wraps all exceptions in `{"error": "..."}` automatically.
All tool handlers MUST return a JSON string. Never return raw dicts.
### Dynamic Tool Availability
Tools declare their requirements at registration time via `check_fn` and `requires_env`. The registry checks `check_fn()` when building tool definitions -- tools whose check fails are silently excluded.
Tools are automatically disabled when their API keys are missing:
```python
# In model_tools.py
TOOLSET_REQUIREMENTS = {
"web": {"env_vars": ["FIRECRAWL_API_KEY"]},
"browser": {"env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"]},
"creative": {"env_vars": ["FAL_KEY"]},
}
```
The `check_tool_availability()` function determines which tools to include.
### Stateful Tools
@@ -626,7 +563,7 @@ python batch_runner.py \
## Skills System
Skills are on-demand knowledge documents the agent can load. Compatible with the [agentskills.io](https://agentskills.io/specification) open standard.
Skills are on-demand knowledge documents the agent can load. Located in `skills/` directory:
```
skills/
@@ -634,16 +571,11 @@ skills/
│ ├── axolotl/ # Skill folder
│ │ ├── SKILL.md # Main instructions (required)
│ │ ├── references/ # Additional docs, API specs
│ │ ── templates/ # Output formats, configs
│ │ └── assets/ # Supplementary files (agentskills.io)
│ │ ── templates/ # Output formats, configs
│ └── vllm/
│ └── SKILL.md
── .hub/ # Skills Hub state (gitignored)
── lock.json # Installed skill provenance
│ ├── quarantine/ # Pending security review
│ ├── audit.log # Security scan history
│ ├── taps.json # Custom source repos
│ └── index-cache/ # Cached remote indexes
── example-skill/
── SKILL.md
```
**Progressive disclosure** (token-efficient):
@@ -651,27 +583,19 @@ skills/
2. `skills_list(category)` - Name + description per skill (~3k tokens)
3. `skill_view(name)` - Full content + tags + linked files
SKILL.md files use YAML frontmatter (agentskills.io format):
SKILL.md files use YAML frontmatter:
```yaml
---
name: skill-name
description: Brief description for listing
tags: [tag1, tag2]
related_skills: [other-skill]
version: 1.0.0
metadata:
hermes:
tags: [tag1, tag2]
related_skills: [other-skill]
---
# Skill Content...
```
**Skills Hub** — user-driven skill search/install from online registries and official optional skills. Sources: official optional skills (shipped with repo, labeled "official"), GitHub (openai/skills, anthropics/skills, custom taps), ClawHub, Claude marketplace, LobeHub. Not exposed as an agent tool — the model cannot search for or install skills. Users manage skills via `hermes skills browse/search/install` CLI commands or the `/skills` slash command in chat.
Key files:
- `tools/skills_tool.py` — Agent-facing skill list/view (progressive disclosure)
- `tools/skills_guard.py` — Security scanner (regex + LLM audit, trust-aware install policy)
- `tools/skills_hub.py` — Source adapters (OptionalSkillSource, GitHub, ClawHub, Claude marketplace, LobeHub), lock file, auth
- `hermes_cli/skills_hub.py` — CLI subcommands + `/skills` slash command handler
Tool files: `tools/skills_tool.py``model_tools.py``toolsets.py`
---
-507
View File
@@ -1,507 +0,0 @@
# Contributing to Hermes Agent
Thank you for contributing to Hermes Agent! This guide covers everything you need: setting up your dev environment, understanding the architecture, deciding what to build, and getting your PR merged.
---
## Contribution Priorities
We value contributions in this order:
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
6. **New tools** — rarely needed. Most capabilities should be skills. See below.
7. **Documentation** — fixes, clarifications, new examples.
---
## Should it be a Skill or a Tool?
This is the most common question for new contributors. The answer is almost always **skill**.
### Make it a Skill when:
- The capability can be expressed as instructions + shell commands + existing tools
- It wraps an external CLI or API that the agent can call via `terminal` or `web_extract`
- It doesn't need custom Python integration or API key management baked into the agent
- Examples: arXiv search, git workflows, Docker management, PDF processing, email via CLI tools
### Make it a Tool when:
- It requires end-to-end integration with API keys, auth flows, or multi-component configuration managed by the agent harness
- It needs custom processing logic that must execute precisely every time (not "best effort" from LLM interpretation)
- It handles binary data, streaming, or real-time events that can't go through the terminal
- Examples: browser automation (Browserbase session management), TTS (audio encoding + platform delivery), vision analysis (base64 image handling)
### Should the Skill be bundled?
Bundled skills (in `skills/`) ship with every Hermes install. They should be **broadly useful to most users**:
- Document handling, web research, common dev workflows, system administration
- Used regularly by a wide range of people
If your skill is official and useful but not universally needed (e.g., a paid service integration, a heavyweight dependency), put it in **`optional-skills/`** — it ships with the repo but isn't activated by default. Users can discover it via `hermes skills browse` (labeled "official") and install it with `hermes skills install` (no third-party warning, builtin trust).
If your skill is specialized, community-contributed, or niche, it's better suited for a **Skills Hub** — upload it to a skills registry and share it in the [Nous Research Discord](https://discord.gg/NousResearch). Users can install it with `hermes skills install`.
---
## Development Setup
### Prerequisites
| Requirement | Notes |
|-------------|-------|
| **Git** | With `--recurse-submodules` support |
| **Python 3.11+** | uv will install it if missing |
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
### Clone and install
```bash
git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
cd hermes-agent
# Create venv with Python 3.11
uv venv venv --python 3.11
export VIRTUAL_ENV="$(pwd)/venv"
# Install with all extras (messaging, cron, CLI menus, dev tools)
uv pip install -e ".[all,dev]"
uv pip install -e "./mini-swe-agent"
uv pip install -e "./tinker-atropos"
# Optional: browser tools
npm install
```
### Configure for development
```bash
mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills}
cp cli-config.yaml.example ~/.hermes/config.yaml
touch ~/.hermes/.env
# Add at minimum an LLM provider key:
echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
```
### Run
```bash
# Symlink for global access
mkdir -p ~/.local/bin
ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
# Verify
hermes doctor
hermes chat -q "Hello"
```
### Run tests
```bash
pytest tests/ -v
```
---
## Project Structure
```
hermes-agent/
├── run_agent.py # AIAgent class — core conversation loop, tool dispatch, session persistence
├── cli.py # HermesCLI class — interactive TUI, prompt_toolkit integration
├── model_tools.py # Tool orchestration (thin layer over tools/registry.py)
├── toolsets.py # Tool groupings and presets (hermes-cli, hermes-telegram, etc.)
├── hermes_state.py # SQLite session database with FTS5 full-text search
├── batch_runner.py # Parallel batch processing for trajectory generation
├── agent/ # Agent internals (extracted modules)
│ ├── prompt_builder.py # System prompt assembly (identity, skills, context files, memory)
│ ├── context_compressor.py # Auto-summarization when approaching context limits
│ ├── auxiliary_client.py # Resolves auxiliary OpenAI clients (summarization, vision)
│ ├── display.py # KawaiiSpinner, tool progress formatting
│ ├── model_metadata.py # Model context lengths, token estimation
│ └── trajectory.py # Trajectory saving helpers
├── hermes_cli/ # CLI command implementations
│ ├── main.py # Entry point, argument parsing, command dispatch
│ ├── config.py # Config management, migration, env var definitions
│ ├── setup.py # Interactive setup wizard
│ ├── auth.py # Provider resolution, OAuth, Nous Portal
│ ├── models.py # OpenRouter model selection lists
│ ├── banner.py # Welcome banner, ASCII art
│ ├── commands.py # Slash command definitions + autocomplete
│ ├── callbacks.py # Interactive callbacks (clarify, sudo, approval)
│ ├── doctor.py # Diagnostics
│ └── skills_hub.py # Skills Hub CLI + /skills slash command
├── tools/ # Tool implementations (self-registering)
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
│ ├── approval.py # Dangerous command detection + per-session approval
│ ├── terminal_tool.py # Terminal orchestration (sudo, env lifecycle, backends)
│ ├── file_operations.py # read_file, write_file, search, patch, etc.
│ ├── web_tools.py # web_search, web_extract (Firecrawl + Gemini summarization)
│ ├── vision_tools.py # Image analysis via multimodal models
│ ├── delegate_tool.py # Subagent spawning and parallel task execution
│ ├── code_execution_tool.py # Sandboxed Python with RPC tool access
│ ├── session_search_tool.py # Search past conversations with FTS5 + summarization
│ ├── cronjob_tools.py # Scheduled task management
│ ├── skill_tools.py # Skill search, load, manage
│ └── environments/ # Terminal execution backends
│ ├── base.py # BaseEnvironment ABC
│ ├── local.py, docker.py, ssh.py, singularity.py, modal.py, daytona.py
├── gateway/ # Messaging gateway
│ ├── run.py # GatewayRunner — platform lifecycle, message routing, cron
│ ├── config.py # Platform configuration resolution
│ ├── session.py # Session store, context prompts, reset policies
│ └── platforms/ # Platform adapters
│ ├── telegram.py, discord_adapter.py, slack.py, whatsapp.py
├── scripts/ # Installer and bridge scripts
│ ├── install.sh # Linux/macOS installer
│ ├── install.ps1 # Windows PowerShell installer
│ └── whatsapp-bridge/ # Node.js WhatsApp bridge (Baileys)
├── skills/ # Bundled skills (copied to ~/.hermes/skills/ on install)
├── optional-skills/ # Official optional skills (discoverable via hub, not activated by default)
├── environments/ # RL training environments (Atropos integration)
├── tests/ # Test suite
├── website/ # Documentation site (hermes-agent.nousresearch.com)
├── cli-config.yaml.example # Example configuration (copied to ~/.hermes/config.yaml)
└── AGENTS.md # Development guide for AI coding assistants
```
### User configuration (stored in `~/.hermes/`)
| Path | Purpose |
|------|---------|
| `~/.hermes/config.yaml` | Settings (model, terminal, toolsets, compression, etc.) |
| `~/.hermes/.env` | API keys and secrets |
| `~/.hermes/auth.json` | OAuth credentials (Nous Portal) |
| `~/.hermes/skills/` | All active skills (bundled + hub-installed + agent-created) |
| `~/.hermes/memories/` | Persistent memory (MEMORY.md, USER.md) |
| `~/.hermes/state.db` | SQLite session database |
| `~/.hermes/sessions/` | JSON session logs |
| `~/.hermes/cron/` | Scheduled job data |
| `~/.hermes/whatsapp/session/` | WhatsApp bridge credentials |
---
## Architecture Overview
### Core Loop
```
User message → AIAgent._run_agent_loop()
├── Build system prompt (prompt_builder.py)
├── Build API kwargs (model, messages, tools, reasoning config)
├── Call LLM (OpenAI-compatible API)
├── If tool_calls in response:
│ ├── Execute each tool via registry dispatch
│ ├── Add tool results to conversation
│ └── Loop back to LLM call
├── If text response:
│ ├── Persist session to DB
│ └── Return final_response
└── Context compression if approaching token limit
```
### Key Design Patterns
- **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules.
- **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform.
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search. JSON logs go to `~/.hermes/sessions/`.
- **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs.
- **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint).
- **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests.
---
## Code Style
- **PEP 8** with practical exceptions (we don't enforce strict line length)
- **Comments**: Only when explaining non-obvious intent, trade-offs, or API quirks. Don't narrate what the code does — `# increment counter` adds nothing
- **Error handling**: Catch specific exceptions. Log with `logger.warning()`/`logger.error()` — use `exc_info=True` for unexpected errors so stack traces appear in logs
- **Cross-platform**: Never assume Unix. See [Cross-Platform Compatibility](#cross-platform-compatibility)
---
## Adding a New Tool
Before writing a tool, ask: [should this be a skill instead?](#should-it-be-a-skill-or-a-tool)
Tools self-register with the central registry. Each tool file co-locates its schema, handler, and registration:
```python
"""my_tool — Brief description of what this tool does."""
import json
from tools.registry import registry
def my_tool(param1: str, param2: int = 10, **kwargs) -> str:
"""Handler. Returns a string result (often JSON)."""
result = do_work(param1, param2)
return json.dumps(result)
MY_TOOL_SCHEMA = {
"type": "function",
"function": {
"name": "my_tool",
"description": "What this tool does and when the agent should use it.",
"parameters": {
"type": "object",
"properties": {
"param1": {"type": "string", "description": "What param1 is"},
"param2": {"type": "integer", "description": "What param2 is", "default": 10},
},
"required": ["param1"],
},
},
}
def _check_requirements() -> bool:
"""Return True if this tool's dependencies are available."""
return True
registry.register(
name="my_tool",
toolset="my_toolset",
schema=MY_TOOL_SCHEMA,
handler=lambda args, **kw: my_tool(**args, **kw),
check_fn=_check_requirements,
)
```
Then add the import to `model_tools.py` in the `_modules` list:
```python
_modules = [
# ... existing modules ...
"tools.my_tool",
]
```
If it's a new toolset, add it to `toolsets.py` and to the relevant platform presets.
---
## Adding a Skill
Bundled skills live in `skills/` organized by category. Official optional skills use the same structure in `optional-skills/`:
```
skills/
├── research/
│ └── arxiv/
│ ├── SKILL.md # Required: main instructions
│ └── scripts/ # Optional: helper scripts
│ └── search_arxiv.py
├── productivity/
│ └── ocr-and-documents/
│ ├── SKILL.md
│ ├── scripts/
│ └── references/
└── ...
```
### SKILL.md format
```markdown
---
name: my-skill
description: Brief description (shown in skill search results)
version: 1.0.0
author: Your Name
license: MIT
metadata:
hermes:
tags: [Category, Subcategory, Keywords]
related_skills: [other-skill-name]
---
# Skill Title
Brief intro.
## When to Use
Trigger conditions — when should the agent load this skill?
## Quick Reference
Table of common commands or API calls.
## Procedure
Step-by-step instructions the agent follows.
## Pitfalls
Known failure modes and how to handle them.
## Verification
How the agent confirms it worked.
```
### Skill guidelines
- **No external dependencies unless absolutely necessary.** Prefer stdlib Python, curl, and existing Hermes tools (`web_extract`, `terminal`, `read_file`).
- **Progressive disclosure.** Put the most common workflow first. Edge cases and advanced usage go at the bottom.
- **Include helper scripts** for XML/JSON parsing or complex logic — don't expect the LLM to write parsers inline every time.
- **Test it.** Run `hermes --toolsets skills -q "Use the X skill to do Y"` and verify the agent follows the instructions correctly.
---
## Cross-Platform Compatibility
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
### Critical rules
1. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError` and `NotImplementedError`:
```python
try:
from simple_term_menu import TerminalMenu
menu = TerminalMenu(options)
idx = menu.show()
except (ImportError, NotImplementedError):
# Fallback: numbered menu for Windows
for i, opt in enumerate(options):
print(f" {i+1}. {opt}")
idx = int(input("Choice: ")) - 1
```
2. **File encoding.** Windows may save `.env` files in `cp1252`. Always handle encoding errors:
```python
try:
load_dotenv(env_path)
except UnicodeDecodeError:
load_dotenv(env_path, encoding="latin-1")
```
3. **Process management.** `os.setsid()`, `os.killpg()`, and signal handling differ on Windows. Use platform checks:
```python
import platform
if platform.system() != "Windows":
kwargs["preexec_fn"] = os.setsid
```
4. **Path separators.** Use `pathlib.Path` instead of string concatenation with `/`.
5. **Shell commands in installers.** If you change `scripts/install.sh`, check if the equivalent change is needed in `scripts/install.ps1`.
---
## Security Considerations
Hermes has terminal access. Security matters.
### Existing protections
| Layer | Implementation |
|-------|---------------|
| **Sudo password piping** | Uses `shlex.quote()` to prevent shell injection |
| **Dangerous command detection** | Regex patterns in `tools/approval.py` with user approval flow |
| **Cron prompt injection** | Scanner in `tools/cronjob_tools.py` blocks instruction-override patterns |
| **Write deny list** | Protected paths (`~/.ssh/authorized_keys`, `/etc/shadow`) resolved via `os.path.realpath()` to prevent symlink bypass |
| **Skills guard** | Security scanner for hub-installed skills (`tools/skills_guard.py`) |
| **Code execution sandbox** | `execute_code` child process runs with API keys stripped from environment |
| **Container hardening** | Docker: all capabilities dropped, no privilege escalation, PID limits, size-limited tmpfs |
### When contributing security-sensitive code
- **Always use `shlex.quote()`** when interpolating user input into shell commands
- **Resolve symlinks** with `os.path.realpath()` before path-based access control checks
- **Don't log secrets.** API keys, tokens, and passwords should never appear in log output
- **Catch broad exceptions** around tool execution so a single failure doesn't crash the agent loop
- **Test on all platforms** if your change touches file paths, process management, or shell commands
If your PR affects security, note it explicitly in the description.
---
## Pull Request Process
### Branch naming
```
fix/description # Bug fixes
feat/description # New features
docs/description # Documentation
test/description # Tests
refactor/description # Code restructuring
```
### Before submitting
1. **Run tests**: `pytest tests/ -v`
2. **Test manually**: Run `hermes` and exercise the code path you changed
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
### PR description
Include:
- **What** changed and **why**
- **How to test** it (reproduction steps for bugs, usage examples for features)
- **What platforms** you tested on
- Reference any related issues
### Commit messages
We use [Conventional Commits](https://www.conventionalcommits.org/):
```
<type>(<scope>): <description>
```
| Type | Use for |
|------|---------|
| `fix` | Bug fixes |
| `feat` | New features |
| `docs` | Documentation |
| `test` | Tests |
| `refactor` | Code restructuring (no behavior change) |
| `chore` | Build, CI, dependency updates |
Scopes: `cli`, `gateway`, `tools`, `skills`, `agent`, `install`, `whatsapp`, `security`, etc.
Examples:
```
fix(cli): prevent crash in save_config_value when model is a string
feat(gateway): add WhatsApp multi-user session isolation
fix(security): prevent shell injection in sudo password piping
test(tools): add unit tests for file_operations
```
---
## Reporting Issues
- Use [GitHub Issues](https://github.com/NousResearch/hermes-agent/issues)
- Include: OS, Python version, Hermes version (`hermes version`), full error traceback
- Include steps to reproduce
- Check existing issues before creating duplicates
- For security vulnerabilities, please report privately
---
## Community
- **Discord**: [discord.gg/NousResearch](https://discord.gg/NousResearch) — for questions, showcasing projects, and sharing skills
- **GitHub Discussions**: For design proposals and architecture discussions
- **Skills Hub**: Upload specialized skills to a registry and share them with the community
---
## License
By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
+1120 -88
View File
File diff suppressed because it is too large Load Diff
+36 -102
View File
@@ -2,128 +2,62 @@
---
## 1. Subagent Architecture (Context Isolation) 🎯
The main agent becomes an orchestrator that delegates context-heavy tasks to subagents with isolated context. Each subagent returns a summary, keeping the orchestrator's context clean. `delegate_task(goal, context, toolsets=[])` with fresh conversation, limited toolset, task-specific system prompt.
## 3. Local Browser Control via CDP 🌐
## 2. Planning & Task Management 📋
**Status:** Not started (currently Browserbase cloud only)
**Priority:** Medium
Task decomposition tool, progress checkpoints after N tool calls, persistent plan storage that survives context compression, failure recovery with replanning.
Support local Chrome/Chromium via Chrome DevTools Protocol alongside existing Browserbase cloud backend.
## 3. Dynamic Skills Expansion 📚
**What other agents do:**
- **OpenClaw**: Full CDP-based Chrome control with snapshots, actions, uploads, profiles, file chooser, PDF save, console messages, tab management. Uses local Chrome for persistent login sessions.
- **Cline**: Headless browser with Computer Use (click, type, scroll, screenshot, console logs)
Skill acquisition from successful tasks, parameterized skill templates, skill chaining with dependency graphs.
**Our approach:**
- Add a `local` backend option to `browser_tool.py` using Playwright or raw CDP
- Config toggle: `browser.backend: local | browserbase | auto`
- `auto` mode: try local first, fall back to Browserbase
- Local advantages: free, persistent login sessions, no API key needed
- Local disadvantages: no CAPTCHA solving, no stealth mode, requires Chrome installed
- Reuse the same 10-tool interface -- just swap the backend
- Later: Chrome profile management for persistent sessions across restarts
## 4. Interactive Clarifying Questions ❓
---
Multiple-choice prompt tool with rich terminal UI. Up to 4 choices + free-text. CLI-only with graceful fallback for non-interactive modes.
## 4. Signal Integration 📡
## 5. Memory System 🧠
**Status:** Not started
**Priority:** Low
Daily memory logs, long-term curated MEMORY.md, vector/semantic search, pre-compaction memory flush, user profile, learning store for error patterns and discovered fixes. *Inspired by ClawdBot's memory system.*
## 6. Heartbeat System 💓
Periodic agent wake-up that reads HEARTBEAT.md for instructions. Runs inside the main session with full context. Triggers on interval, exec completion, cron events, or manual wake. HEARTBEAT_OK suppression when nothing needs attention. *Inspired by ClawdBot's heartbeat.*
## 7. Local Browser Control via CDP 🌐
Support both local Chrome (via CDP, free) and Browserbase (cloud, paid) as browser backends. Local gives persistent login sessions but lacks CAPTCHA solving.
## 8. Signal Integration 📡
New platform adapter using signal-cli daemon (JSON-RPC HTTP + SSE). Requires Java runtime and phone number registration.
**Reference:** OpenClaw has Signal support via signal-cli.
## 9. Session Transcript Search 🔍
---
`hermes sessions search <query>` CLI command and `session_search` agent tool. Text-based first (ripgrep over JSONL), vector search later.
## 5. Plugin/Extension System 🔌
## 10. Plugin/Extension System 🔌
**Status:** Partially implemented (event hooks exist in `gateway/hooks.py`)
**Priority:** Medium
Python plugin interface with `plugin.yaml` + `handler.py`. Discovery from `~/.hermes/plugins/`. Plugins can register tools, hooks, and CLI commands. *Inspired by ClawdBot's 36-plugin extension system.*
Full Python plugin interface that goes beyond the current hook system.
## 11. Native Companion Apps 📱
**What other agents do:**
- **OpenClaw**: Plugin SDK with tool-send capabilities, lifecycle phase hooks (before-agent-start, after-tool-call, model-override), plugin registry with install/uninstall.
- **Pi**: Extensions are TypeScript modules that can register tools, commands, keyboard shortcuts, custom UI widgets, overlays, status lines, dialogs, compaction hooks, raw terminal input listeners. Extremely comprehensive.
- **OpenCode**: MCP client support (stdio, SSE, StreamableHTTP), OAuth auth for MCP servers. Also has Copilot/Codex plugins.
- **Codex**: Full MCP integration with skill dependencies.
- **Cline**: MCP integration + lifecycle hooks with cancellation support.
macOS (Swift/SwiftUI), iOS, Android apps connecting via WebSocket. Prerequisite: WS API on gateway. MVP: web UI with Flask/FastAPI. *Inspired by ClawdBot's companion apps.*
**Our approach (phased):**
## 12. Evaluation System 📏
### Phase 1: Enhanced hooks
- Expand the existing `gateway/hooks.py` to support more events: `before-tool-call`, `after-tool-call`, `before-response`, `context-compress`, `session-end`
- Allow hooks to modify tool results (e.g., filter sensitive output)
LLM grader mode for batch_runner, action comparison against expected tool calls, string matching baselines.
### Phase 2: Plugin interface
- `~/.hermes/plugins/<name>/plugin.yaml` + `handler.py`
- Plugins can: register new tools, add CLI commands, subscribe to events, inject system prompt sections
- `hermes plugin list|install|uninstall|create` CLI commands
- Plugin discovery and validation on startup
## 13. Layered Context Architecture 📊
### Phase 3: MCP support (industry standard) ✅ DONE
- ✅ MCP client that connects to external MCP servers (stdio + HTTP/StreamableHTTP)
- ✅ Config: `mcp_servers` in config.yaml with connection details
- ✅ Each MCP server's tools auto-registered as a dynamic toolset
- Future: Resources, Prompts, Progress notifications, `hermes mcp` CLI command
Structured hierarchy: project context > skills > user profile > learnings > external knowledge > runtime introspection.
---
## 14. Tools Wishlist 🧰
## 6. MCP (Model Context Protocol) Support 🔗 ✅ DONE
**Status:** Implemented (PR #301)
**Priority:** Complete
Native MCP client support with stdio and HTTP/StreamableHTTP transports, auto-discovery, reconnection with exponential backoff, env var filtering, and credential stripping. See `docs/mcp.md` for full documentation.
**Still TODO:**
- `hermes mcp` CLI subcommand (list/test/status)
- `hermes tools` UI integration for MCP toolsets
- MCP Resources and Prompts support
- OAuth authentication for remote servers
- Progress notifications for long-running tools
---
## 8. Filesystem Checkpointing / Rollback 🔄
**Status:** Not started
**Priority:** Low-Medium
Automatic filesystem snapshots after each agent loop iteration so the user can roll back destructive changes to their project.
**What other agents do:**
- **Cline**: Workspace checkpoints at each step with Compare/Restore UI
- **OpenCode**: Git-backed workspace snapshots per step, with weekly gc
- **Codex**: Sandboxed execution with commit-per-step, rollback on failure
**Our approach:**
- After each tool call (or batch of tool calls in a single turn) that modifies files, create a lightweight checkpoint of the affected files
- Git-based when the project is a repo: auto-commit to a detached/temporary branch (`hermes/checkpoints/<session>`) after each agent turn, squash or discard on session end
- Non-git fallback: tar snapshots of changed files in `~/.hermes/checkpoints/<session_id>/`
- `hermes rollback` CLI command to restore to a previous checkpoint
- Agent-accessible via a `checkpoint` tool: `list` (show available restore points), `restore` (roll back to a named point), `diff` (show what changed since a checkpoint)
- Configurable: off by default (opt-in via `config.yaml`), since auto-committing can be surprising
- Cleanup: checkpoints expire after session ends (or configurable retention period)
- Integration with the terminal backend: works with local, SSH, and Docker backends (snapshots happen on the execution host)
---
## Implementation Priority Order
### Tier 1: Next Up
1. ~~MCP Support -- #6~~ ✅ Done (PR #301)
### Tier 2: Quality of Life
3. Local Browser Control via CDP -- #3
4. Plugin/Extension System -- #5
### Tier 3: Nice to Have
5. Session Branching / Checkpoints -- #7
6. Filesystem Checkpointing / Rollback -- #8
7. Signal Integration -- #4
- Diagram rendering (Mermaid/PlantUML to images)
- Document generation (PDFs, Word, presentations)
- Canvas / visual workspace
- Coding agent skill (Codex, Claude Code orchestration via PTY)
- Domain skill packs (DevOps, data science, security)
-6
View File
@@ -1,6 +0,0 @@
"""Agent internals -- extracted modules from run_agent.py.
These modules contain pure utility functions and self-contained classes
that were previously embedded in the 3,600-line run_agent.py. Extracting
them makes run_agent.py focused on the AIAgent orchestrator class.
"""
-407
View File
@@ -1,407 +0,0 @@
"""Shared auxiliary OpenAI client for cheap/fast side tasks.
Provides a single resolution chain so every consumer (context compression,
session search, web extraction, vision analysis, browser vision) picks up
the best available backend without duplicating fallback logic.
Resolution order for text tasks:
1. OpenRouter (OPENROUTER_API_KEY)
2. Nous Portal (~/.hermes/auth.json active provider)
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
wrapped to look like a chat.completions client)
5. None
Resolution order for vision/multimodal tasks:
1. OpenRouter
2. Nous Portal
3. None (custom endpoints can't substitute for Gemini multimodal)
"""
import json
import logging
import os
from pathlib import Path
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from openai import OpenAI
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
# OpenRouter app attribution headers
_OR_HEADERS = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
# Nous Portal extra_body for product attribution.
# Callers should pass this as extra_body in chat.completions.create()
# when the auxiliary client is backed by Nous Portal.
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent"]}
# Set at resolve time — True if the auxiliary client points to Nous Portal
auxiliary_is_nous: bool = False
# Default auxiliary models per provider
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
_NOUS_MODEL = "gemini-3-flash"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
# Codex fallback: uses the Responses API (the only endpoint the Codex
# OAuth token can access) with a fast model for auxiliary tasks.
_CODEX_AUX_MODEL = "gpt-5.3-codex"
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
# ── Codex Responses → chat.completions adapter ─────────────────────────────
# All auxiliary consumers call client.chat.completions.create(**kwargs) and
# read response.choices[0].message.content. This adapter translates those
# calls to the Codex Responses API so callers don't need any changes.
class _CodexCompletionsAdapter:
"""Drop-in shim that accepts chat.completions.create() kwargs and
routes them through the Codex Responses streaming API."""
def __init__(self, real_client: OpenAI, model: str):
self._client = real_client
self._model = model
def create(self, **kwargs) -> Any:
messages = kwargs.get("messages", [])
model = kwargs.get("model", self._model)
temperature = kwargs.get("temperature")
# Separate system/instructions from conversation messages
instructions = "You are a helpful assistant."
input_msgs: List[Dict[str, Any]] = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content") or ""
if role == "system":
instructions = content
else:
input_msgs.append({"role": role, "content": content})
resp_kwargs: Dict[str, Any] = {
"model": model,
"instructions": instructions,
"input": input_msgs or [{"role": "user", "content": ""}],
"stream": True,
"store": False,
}
max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
if max_tokens is not None:
resp_kwargs["max_output_tokens"] = int(max_tokens)
if temperature is not None:
resp_kwargs["temperature"] = temperature
# Tools support for flush_memories and similar callers
tools = kwargs.get("tools")
if tools:
converted = []
for t in tools:
fn = t.get("function", {}) if isinstance(t, dict) else {}
name = fn.get("name")
if not name:
continue
converted.append({
"type": "function",
"name": name,
"description": fn.get("description", ""),
"parameters": fn.get("parameters", {}),
})
if converted:
resp_kwargs["tools"] = converted
# Stream and collect the response
text_parts: List[str] = []
tool_calls_raw: List[Any] = []
usage = None
try:
with self._client.responses.stream(**resp_kwargs) as stream:
for _event in stream:
pass
final = stream.get_final_response()
# Extract text and tool calls from the Responses output
for item in getattr(final, "output", []):
item_type = getattr(item, "type", None)
if item_type == "message":
for part in getattr(item, "content", []):
ptype = getattr(part, "type", None)
if ptype in ("output_text", "text"):
text_parts.append(getattr(part, "text", ""))
elif item_type == "function_call":
tool_calls_raw.append(SimpleNamespace(
id=getattr(item, "call_id", ""),
type="function",
function=SimpleNamespace(
name=getattr(item, "name", ""),
arguments=getattr(item, "arguments", "{}"),
),
))
resp_usage = getattr(final, "usage", None)
if resp_usage:
usage = SimpleNamespace(
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
completion_tokens=getattr(resp_usage, "output_tokens", 0),
total_tokens=getattr(resp_usage, "total_tokens", 0),
)
except Exception as exc:
logger.debug("Codex auxiliary Responses API call failed: %s", exc)
raise
content = "".join(text_parts).strip() or None
# Build a response that looks like chat.completions
message = SimpleNamespace(
role="assistant",
content=content,
tool_calls=tool_calls_raw or None,
)
choice = SimpleNamespace(
index=0,
message=message,
finish_reason="stop" if not tool_calls_raw else "tool_calls",
)
return SimpleNamespace(
choices=[choice],
model=model,
usage=usage,
)
class _CodexChatShim:
"""Wraps the adapter to provide client.chat.completions.create()."""
def __init__(self, adapter: _CodexCompletionsAdapter):
self.completions = adapter
class CodexAuxiliaryClient:
"""OpenAI-client-compatible wrapper that routes through Codex Responses API.
Consumers can call client.chat.completions.create(**kwargs) as normal.
Also exposes .api_key and .base_url for introspection by async wrappers.
"""
def __init__(self, real_client: OpenAI, model: str):
self._real_client = real_client
adapter = _CodexCompletionsAdapter(real_client, model)
self.chat = _CodexChatShim(adapter)
self.api_key = real_client.api_key
self.base_url = real_client.base_url
def close(self):
self._real_client.close()
class _AsyncCodexCompletionsAdapter:
"""Async version of the Codex Responses adapter.
Wraps the sync adapter via asyncio.to_thread() so async consumers
(web_tools, session_search) can await it as normal.
"""
def __init__(self, sync_adapter: _CodexCompletionsAdapter):
self._sync = sync_adapter
async def create(self, **kwargs) -> Any:
import asyncio
return await asyncio.to_thread(self._sync.create, **kwargs)
class _AsyncCodexChatShim:
def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
self.completions = adapter
class AsyncCodexAuxiliaryClient:
"""Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
sync_adapter = sync_wrapper.chat.completions
async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
self.chat = _AsyncCodexChatShim(async_adapter)
self.api_key = sync_wrapper.api_key
self.base_url = sync_wrapper.base_url
def _read_nous_auth() -> Optional[dict]:
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
Returns the provider state dict if Nous is active with tokens,
otherwise None.
"""
try:
if not _AUTH_JSON_PATH.is_file():
return None
data = json.loads(_AUTH_JSON_PATH.read_text())
if data.get("active_provider") != "nous":
return None
provider = data.get("providers", {}).get("nous", {})
# Must have at least an access_token or agent_key
if not provider.get("agent_key") and not provider.get("access_token"):
return None
return provider
except Exception as exc:
logger.debug("Could not read Nous auth: %s", exc)
return None
def _nous_api_key(provider: dict) -> str:
"""Extract the best API key from a Nous provider state dict."""
return provider.get("agent_key") or provider.get("access_token", "")
def _nous_base_url() -> str:
"""Resolve the Nous inference base URL from env or default."""
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
def _read_codex_access_token() -> Optional[str]:
"""Read a valid Codex OAuth access token from Hermes auth store (~/.hermes/auth.json)."""
try:
from hermes_cli.auth import _read_codex_tokens
data = _read_codex_tokens()
tokens = data.get("tokens", {})
access_token = tokens.get("access_token")
if isinstance(access_token, str) and access_token.strip():
return access_token.strip()
return None
except Exception as exc:
logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
return None
# ── Public API ──────────────────────────────────────────────────────────────
def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""Return (client, model_slug) for text-only auxiliary tasks.
Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
"""
# 1. OpenRouter
or_key = os.getenv("OPENROUTER_API_KEY")
if or_key:
logger.debug("Auxiliary text client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
# 2. Nous Portal
nous = _read_nous_auth()
if nous:
global auxiliary_is_nous
auxiliary_is_nous = True
logger.debug("Auxiliary text client: Nous Portal")
return (
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
_NOUS_MODEL,
)
# 3. Custom endpoint (both base URL and key must be set)
custom_base = os.getenv("OPENAI_BASE_URL")
custom_key = os.getenv("OPENAI_API_KEY")
if custom_base and custom_key:
model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
logger.debug("Auxiliary text client: custom endpoint (%s)", model)
return OpenAI(api_key=custom_key, base_url=custom_base), model
# 4. Codex OAuth -- uses the Responses API (only endpoint the token
# can access), wrapped to look like a chat.completions client.
codex_token = _read_codex_access_token()
if codex_token:
logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
# 5. Nothing available
logger.debug("Auxiliary text client: none available")
return None, None
def get_async_text_auxiliary_client():
"""Return (async_client, model_slug) for async consumers.
For standard providers returns (AsyncOpenAI, model). For Codex returns
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available.
"""
from openai import AsyncOpenAI
sync_client, model = get_text_auxiliary_client()
if sync_client is None:
return None, None
if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model
async_kwargs = {
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
if "openrouter" in str(sync_client.base_url).lower():
async_kwargs["default_headers"] = dict(_OR_HEADERS)
return AsyncOpenAI(**async_kwargs), model
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""Return (client, model_slug) for vision/multimodal auxiliary tasks.
Only OpenRouter and Nous Portal qualify — custom endpoints cannot
substitute for Gemini multimodal.
"""
# 1. OpenRouter
or_key = os.getenv("OPENROUTER_API_KEY")
if or_key:
logger.debug("Auxiliary vision client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
# 2. Nous Portal
nous = _read_nous_auth()
if nous:
logger.debug("Auxiliary vision client: Nous Portal")
return (
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
_NOUS_MODEL,
)
# 3. Nothing suitable
logger.debug("Auxiliary vision client: none available")
return None, None
def get_auxiliary_extra_body() -> dict:
"""Return extra_body kwargs for auxiliary API calls.
Includes Nous Portal product tags when the auxiliary client is backed
by Nous Portal. Returns empty dict otherwise.
"""
return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
def auxiliary_max_tokens_param(value: int) -> dict:
"""Return the correct max tokens kwarg for the auxiliary client's provider.
OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
The Codex adapter translates max_tokens internally, so we use max_tokens
for it as well.
"""
custom_base = os.getenv("OPENAI_BASE_URL", "")
or_key = os.getenv("OPENROUTER_API_KEY")
# Only use max_completion_tokens for direct OpenAI custom endpoints
if (not or_key
and _read_nous_auth() is None
and "api.openai.com" in custom_base.lower()):
return {"max_completion_tokens": value}
return {"max_tokens": value}
-265
View File
@@ -1,265 +0,0 @@
"""Automatic context window compression for long conversations.
Self-contained class with its own OpenAI client for summarization.
Uses Gemini Flash (cheap/fast) to summarize middle turns while
protecting head and tail context.
"""
import logging
import os
from typing import Any, Dict, List
from agent.auxiliary_client import get_text_auxiliary_client
from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
logger = logging.getLogger(__name__)
class ContextCompressor:
"""Compresses conversation context when approaching the model's context limit.
Algorithm: protect first N + last N turns, summarize everything in between.
Token tracking uses actual counts from API responses for accuracy.
"""
def __init__(
self,
model: str,
threshold_percent: float = 0.85,
protect_first_n: int = 3,
protect_last_n: int = 4,
summary_target_tokens: int = 2500,
quiet_mode: bool = False,
summary_model_override: str = None,
base_url: str = "",
):
self.model = model
self.base_url = base_url
self.threshold_percent = threshold_percent
self.protect_first_n = protect_first_n
self.protect_last_n = protect_last_n
self.summary_target_tokens = summary_target_tokens
self.quiet_mode = quiet_mode
self.context_length = get_model_context_length(model, base_url=base_url)
self.threshold_tokens = int(self.context_length * threshold_percent)
self.compression_count = 0
self._context_probed = False # True after a step-down from context error
self.last_prompt_tokens = 0
self.last_completion_tokens = 0
self.last_total_tokens = 0
self.client, default_model = get_text_auxiliary_client()
self.summary_model = summary_model_override or default_model
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""
self.last_prompt_tokens = usage.get("prompt_tokens", 0)
self.last_completion_tokens = usage.get("completion_tokens", 0)
self.last_total_tokens = usage.get("total_tokens", 0)
def should_compress(self, prompt_tokens: int = None) -> bool:
"""Check if context exceeds the compression threshold."""
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
return tokens >= self.threshold_tokens
def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
"""Quick pre-flight check using rough estimate (before API call)."""
rough_estimate = estimate_messages_tokens_rough(messages)
return rough_estimate >= self.threshold_tokens
def get_status(self) -> Dict[str, Any]:
"""Get current compression status for display/logging."""
return {
"last_prompt_tokens": self.last_prompt_tokens,
"threshold_tokens": self.threshold_tokens,
"context_length": self.context_length,
"usage_percent": (self.last_prompt_tokens / self.context_length * 100) if self.context_length else 0,
"compression_count": self.compression_count,
}
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> str:
"""Generate a concise summary of conversation turns using a fast model."""
if not self.client:
return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed to save space. The assistant performed various actions and received responses."
parts = []
for msg in turns_to_summarize:
role = msg.get("role", "unknown")
content = msg.get("content") or ""
if len(content) > 2000:
content = content[:1000] + "\n...[truncated]...\n" + content[-500:]
tool_calls = msg.get("tool_calls", [])
if tool_calls:
tool_names = [tc.get("function", {}).get("name", "?") for tc in tool_calls if isinstance(tc, dict)]
content += f"\n[Tool calls: {', '.join(tool_names)}]"
parts.append(f"[{role.upper()}]: {content}")
content_to_summarize = "\n\n".join(parts)
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
Write from a neutral perspective describing:
1. What actions were taken (tool calls, searches, file operations)
2. Key information or results obtained
3. Important decisions or findings
4. Relevant data, file names, or outputs
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
---
TURNS TO SUMMARIZE:
{content_to_summarize}
---
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
try:
return self._call_summary_model(self.client, self.summary_model, prompt)
except Exception as e:
logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
# Fallback: try the main model's endpoint. This handles the common
# case where the user switched providers (e.g. OpenRouter → local LLM)
# but a stale API key causes the auxiliary client to pick the old
# provider which then fails (402, auth error, etc.).
fallback_client, fallback_model = self._get_fallback_client()
if fallback_client is not None:
try:
logger.info("Retrying context summary with fallback client (%s)", fallback_model)
summary = self._call_summary_model(fallback_client, fallback_model, prompt)
# Success — swap in the working client for future compressions
self.client = fallback_client
self.summary_model = fallback_model
return summary
except Exception as fallback_err:
logging.warning(f"Fallback summary model also failed: {fallback_err}")
return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed. The assistant performed tool calls and received responses."
def _call_summary_model(self, client, model: str, prompt: str) -> str:
"""Make the actual LLM call to generate a summary. Raises on failure."""
kwargs = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
"timeout": 30.0,
}
# Most providers (OpenRouter, local models) use max_tokens.
# Direct OpenAI with newer models (gpt-4o, o-series, gpt-5+)
# requires max_completion_tokens instead.
try:
kwargs["max_tokens"] = self.summary_target_tokens * 2
response = client.chat.completions.create(**kwargs)
except Exception as first_err:
if "max_tokens" in str(first_err) or "unsupported_parameter" in str(first_err):
kwargs.pop("max_tokens", None)
kwargs["max_completion_tokens"] = self.summary_target_tokens * 2
response = client.chat.completions.create(**kwargs)
else:
raise
summary = response.choices[0].message.content.strip()
if not summary.startswith("[CONTEXT SUMMARY]:"):
summary = "[CONTEXT SUMMARY]: " + summary
return summary
def _get_fallback_client(self):
"""Try to build a fallback client from the main model's endpoint config.
When the primary auxiliary client fails (e.g. stale OpenRouter key), this
creates a client using the user's active custom endpoint (OPENAI_BASE_URL)
so compression can still produce a real summary instead of a static string.
Returns (client, model) or (None, None).
"""
custom_base = os.getenv("OPENAI_BASE_URL")
custom_key = os.getenv("OPENAI_API_KEY")
if not custom_base or not custom_key:
return None, None
# Don't fallback to the same provider that just failed
from hermes_constants import OPENROUTER_BASE_URL
if custom_base.rstrip("/") == OPENROUTER_BASE_URL.rstrip("/"):
return None, None
model = os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or self.model
try:
from openai import OpenAI as _OpenAI
client = _OpenAI(api_key=custom_key, base_url=custom_base)
logger.debug("Built fallback auxiliary client: %s via %s", model, custom_base)
return client, model
except Exception as exc:
logger.debug("Could not build fallback auxiliary client: %s", exc)
return None, None
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
"""Compress conversation messages by summarizing middle turns.
Keeps first N + last N turns, summarizes everything in between.
"""
n_messages = len(messages)
if n_messages <= self.protect_first_n + self.protect_last_n + 1:
if not self.quiet_mode:
print(f"⚠️ Cannot compress: only {n_messages} messages (need > {self.protect_first_n + self.protect_last_n + 1})")
return messages
compress_start = self.protect_first_n
compress_end = n_messages - self.protect_last_n
if compress_start >= compress_end:
return messages
turns_to_summarize = messages[compress_start:compress_end]
display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
if not self.quiet_mode:
print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
# Truncation fallback when no auxiliary model is available
if self.client is None:
print("⚠️ Context compression: no auxiliary model available. Falling back to message truncation.")
# Keep system message(s) at the front and the protected tail;
# simply drop the oldest non-system messages until under threshold.
kept = []
for msg in messages:
if msg.get("role") == "system":
kept.append(msg.copy())
else:
break
tail = messages[-self.protect_last_n:]
kept.extend(m.copy() for m in tail)
self.compression_count += 1
if not self.quiet_mode:
print(f" ✂️ Truncated: {len(messages)}{len(kept)} messages (dropped middle turns)")
return kept
if not self.quiet_mode:
print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
summary = self._generate_summary(turns_to_summarize)
compressed = []
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
compressed.append(msg)
compressed.append({"role": "user", "content": summary})
for i in range(compress_end, n_messages):
compressed.append(messages[i].copy())
self.compression_count += 1
if not self.quiet_mode:
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
print(f" ✅ Compressed: {n_messages}{len(compressed)} messages (~{saved_estimate:,} tokens saved)")
print(f" 💡 Compression #{self.compression_count} complete")
return compressed
-469
View File
@@ -1,469 +0,0 @@
"""CLI presentation -- spinner, kawaii faces, tool preview formatting.
Pure display functions and classes with no AIAgent dependency.
Used by AIAgent._execute_tool_calls for CLI feedback.
"""
import json
import os
import random
import sys
import threading
import time
# ANSI escape codes for coloring tool failure indicators
_RED = "\033[31m"
_RESET = "\033[0m"
# =========================================================================
# Tool preview (one-line summary of a tool call's primary argument)
# =========================================================================
def build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str:
"""Build a short preview of a tool call's primary argument for display."""
primary_args = {
"terminal": "command", "web_search": "query", "web_extract": "urls",
"read_file": "path", "write_file": "path", "patch": "path",
"search_files": "pattern", "browser_navigate": "url",
"browser_click": "ref", "browser_type": "text",
"image_generate": "prompt", "text_to_speech": "text",
"vision_analyze": "question", "mixture_of_agents": "user_prompt",
"skill_view": "name", "skills_list": "category",
"schedule_cronjob": "name",
"execute_code": "code", "delegate_task": "goal",
"clarify": "question", "skill_manage": "name",
}
if tool_name == "process":
action = args.get("action", "")
sid = args.get("session_id", "")
data = args.get("data", "")
timeout_val = args.get("timeout")
parts = [action]
if sid:
parts.append(sid[:16])
if data:
parts.append(f'"{data[:20]}"')
if timeout_val and action == "wait":
parts.append(f"{timeout_val}s")
return " ".join(parts) if parts else None
if tool_name == "todo":
todos_arg = args.get("todos")
merge = args.get("merge", False)
if todos_arg is None:
return "reading task list"
elif merge:
return f"updating {len(todos_arg)} task(s)"
else:
return f"planning {len(todos_arg)} task(s)"
if tool_name == "session_search":
query = args.get("query", "")
return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\""
if tool_name == "memory":
action = args.get("action", "")
target = args.get("target", "")
if action == "add":
content = args.get("content", "")
return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
elif action == "replace":
return f"~{target}: \"{args.get('old_text', '')[:20]}\""
elif action == "remove":
return f"-{target}: \"{args.get('old_text', '')[:20]}\""
return action
if tool_name == "send_message":
target = args.get("target", "?")
msg = args.get("message", "")
if len(msg) > 20:
msg = msg[:17] + "..."
return f"to {target}: \"{msg}\""
if tool_name.startswith("rl_"):
rl_previews = {
"rl_list_environments": "listing envs",
"rl_select_environment": args.get("name", ""),
"rl_get_current_config": "reading config",
"rl_edit_config": f"{args.get('field', '')}={args.get('value', '')}",
"rl_start_training": "starting",
"rl_check_status": args.get("run_id", "")[:16],
"rl_stop_training": f"stopping {args.get('run_id', '')[:16]}",
"rl_get_results": args.get("run_id", "")[:16],
"rl_list_runs": "listing runs",
"rl_test_inference": f"{args.get('num_steps', 3)} steps",
}
return rl_previews.get(tool_name)
key = primary_args.get(tool_name)
if not key:
for fallback_key in ("query", "text", "command", "path", "name", "prompt", "code", "goal"):
if fallback_key in args:
key = fallback_key
break
if not key or key not in args:
return None
value = args[key]
if isinstance(value, list):
value = value[0] if value else ""
preview = str(value).strip()
if not preview:
return None
if len(preview) > max_len:
preview = preview[:max_len - 3] + "..."
return preview
# =========================================================================
# KawaiiSpinner
# =========================================================================
class KawaiiSpinner:
"""Animated spinner with kawaii faces for CLI feedback during tool execution."""
SPINNERS = {
'dots': ['', '', '', '', '', '', '', '', '', ''],
'bounce': ['', '', '', '', '', '', '', ''],
'grow': ['', '', '', '', '', '', '', '', '', '', '', '', '', ''],
'arrows': ['', '', '', '', '', '', '', ''],
'star': ['', '', '', '', '', '', '', ''],
'moon': ['🌑', '🌒', '🌓', '🌔', '🌕', '🌖', '🌗', '🌘'],
'pulse': ['', '', '', '', '', ''],
'brain': ['🧠', '💭', '💡', '', '💫', '🌟', '💡', '💭'],
'sparkle': ['', '˚', '*', '', '', '', '*', '˚'],
}
KAWAII_WAITING = [
"(。◕‿◕。)", "(◕‿◕✿)", "٩(◕‿◕。)۶", "(✿◠‿◠)", "( ˘▽˘)っ",
"♪(´ε` )", "(◕ᴗ◕✿)", "ヾ(^∇^)", "(≧◡≦)", "(★ω★)",
]
KAWAII_THINKING = [
"(。•́︿•̀。)", "(◔_◔)", "(¬‿¬)", "( •_•)>⌐■-■", "(⌐■_■)",
"(´・_・`)", "◉_◉", "(°ロ°)", "( ˘⌣˘)♡", "ヽ(>∀<☆)☆",
"٩(๑❛ᴗ❛๑)۶", "(⊙_⊙)", "(¬_¬)", "( ͡° ͜ʖ ͡°)", "ಠ_ಠ",
]
THINKING_VERBS = [
"pondering", "contemplating", "musing", "cogitating", "ruminating",
"deliberating", "mulling", "reflecting", "processing", "reasoning",
"analyzing", "computing", "synthesizing", "formulating", "brainstorming",
]
def __init__(self, message: str = "", spinner_type: str = 'dots'):
self.message = message
self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
self.running = False
self.thread = None
self.frame_idx = 0
self.start_time = None
self.last_line_len = 0
# Capture stdout NOW, before any redirect_stdout(devnull) from
# child agents can replace sys.stdout with a black hole.
self._out = sys.stdout
def _write(self, text: str, end: str = '\n', flush: bool = False):
"""Write to the stdout captured at spinner creation time."""
try:
self._out.write(text + end)
if flush:
self._out.flush()
except (ValueError, OSError):
pass
def _animate(self):
while self.running:
if os.getenv("HERMES_SPINNER_PAUSE"):
time.sleep(0.1)
continue
frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
elapsed = time.time() - self.start_time
line = f" {frame} {self.message} ({elapsed:.1f}s)"
pad = max(self.last_line_len - len(line), 0)
self._write(f"\r{line}{' ' * pad}", end='', flush=True)
self.last_line_len = len(line)
self.frame_idx += 1
time.sleep(0.12)
def start(self):
if self.running:
return
self.running = True
self.start_time = time.time()
self.thread = threading.Thread(target=self._animate, daemon=True)
self.thread.start()
def update_text(self, new_message: str):
self.message = new_message
def print_above(self, text: str):
"""Print a line above the spinner without disrupting animation.
Clears the current spinner line, prints the text, and lets the
next animation tick redraw the spinner on the line below.
Thread-safe: uses the captured stdout reference (self._out).
Works inside redirect_stdout(devnull) because _write bypasses
sys.stdout and writes to the stdout captured at spinner creation.
"""
if not self.running:
self._write(f" {text}", flush=True)
return
# Clear spinner line with spaces (not \033[K) to avoid garbled escape
# codes when prompt_toolkit's patch_stdout is active — same approach
# as stop(). Then print text; spinner redraws on next tick.
blanks = ' ' * max(self.last_line_len + 5, 40)
self._write(f"\r{blanks}\r {text}", flush=True)
def stop(self, final_message: str = None):
self.running = False
if self.thread:
self.thread.join(timeout=0.5)
# Clear the spinner line with spaces instead of \033[K to avoid
# garbled escape codes when prompt_toolkit's patch_stdout is active.
blanks = ' ' * max(self.last_line_len + 5, 40)
self._write(f"\r{blanks}\r", end='', flush=True)
if final_message:
self._write(f" {final_message}", flush=True)
def __enter__(self):
self.start()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.stop()
return False
# =========================================================================
# Kawaii face arrays (used by AIAgent._execute_tool_calls for spinner text)
# =========================================================================
KAWAII_SEARCH = [
"♪(´ε` )", "(。◕‿◕。)", "ヾ(^∇^)", "(◕ᴗ◕✿)", "( ˘▽˘)っ",
"٩(◕‿◕。)۶", "(✿◠‿◠)", "♪~(´ε` )", "(ノ´ヮ`)*:・゚✧", "(◎o◎)",
]
KAWAII_READ = [
"φ(゜▽゜*)♪", "( ˘▽˘)っ", "(⌐■_■)", "٩(。•́‿•̀。)۶", "(◕‿◕✿)",
"ヾ(@⌒ー⌒@)", "(✧ω✧)", "♪(๑ᴖ◡ᴖ๑)♪", "(≧◡≦)", "( ´ ▽ ` )",
]
KAWAII_TERMINAL = [
"ヽ(>∀<☆)", "(ノ°∀°)", "٩(^ᴗ^)۶", "ヾ(⌐■_■)ノ♪", "(•̀ᴗ•́)و",
"┗(0)┓", "(`・ω・´)", "( ̄▽ ̄)", "(ง •̀_•́)ง", "ヽ(´▽`)/",
]
KAWAII_BROWSER = [
"(ノ°∀°)", "(☞゚ヮ゚)☞", "( ͡° ͜ʖ ͡°)", "┌( ಠ_ಠ)┘", "(⊙_⊙)",
"ヾ(•ω•`)o", "( ̄ω ̄)", "( ˇωˇ )", "(ᵔᴥᵔ)", "(◎o◎)",
]
KAWAII_CREATE = [
"✧*。٩(ˊᗜˋ*)و✧", "(ノ◕ヮ◕)ノ*:・゚✧", "ヽ(>∀<☆)", "٩(♡ε♡)۶", "(◕‿◕)♡",
"✿◕ ‿ ◕✿", "(*≧▽≦)", "ヾ(-)", "(☆▽☆)", "°˖✧◝(⁰▿⁰)◜✧˖°",
]
KAWAII_SKILL = [
"ヾ(@⌒ー⌒@)", "(๑˃ᴗ˂)ﻭ", "٩(◕‿◕。)۶", "(✿╹◡╹)", "ヽ(・∀・)",
"(ノ´ヮ`)*:・゚✧", "♪(๑ᴖ◡ᴖ๑)♪", "(◠‿◠)", "٩(ˊᗜˋ*)و", "(^▽^)",
"ヾ(^∇^)", "(★ω★)/", "٩(。•́‿•̀。)۶", "(◕ᴗ◕✿)", "(◎o◎)",
"(✧ω✧)", "ヽ(>∀<☆)", "( ˘▽˘)っ", "(≧◡≦) ♡", "ヾ( ̄▽ ̄)",
]
KAWAII_THINK = [
"(っ°Д°;)っ", "(;′⌒`)", "(・_・ヾ", "( ´_ゝ`)", "( ̄ヘ ̄)",
"(。-`ω´-)", "( ˘︹˘ )", "(¬_¬)", "ヽ(ー_ー )", "(;一_一)",
]
KAWAII_GENERIC = [
"♪(´ε` )", "(◕‿◕✿)", "ヾ(^∇^)", "٩(◕‿◕。)۶", "(✿◠‿◠)",
"(ノ´ヮ`)*:・゚✧", "ヽ(>∀<☆)", "(☆▽☆)", "( ˘▽˘)っ", "(≧◡≦)",
]
# =========================================================================
# Cute tool message (completion line that replaces the spinner)
# =========================================================================
def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
"""Inspect a tool result string for signs of failure.
Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
failures. On success, returns ``(False, "")``.
"""
if result is None:
return False, ""
if tool_name == "terminal":
try:
data = json.loads(result)
exit_code = data.get("exit_code")
if exit_code is not None and exit_code != 0:
return True, f" [exit {exit_code}]"
except (json.JSONDecodeError, TypeError, AttributeError):
pass
return False, ""
# Memory-specific: distinguish "full" from real errors
if tool_name == "memory":
try:
data = json.loads(result)
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
return True, " [full]"
except (json.JSONDecodeError, TypeError, AttributeError):
pass
# Generic heuristic for non-terminal tools
lower = result[:500].lower()
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
return True, " [error]"
return False, ""
def get_cute_tool_message(
tool_name: str, args: dict, duration: float, result: str | None = None,
) -> str:
"""Generate a formatted tool completion line for CLI quiet mode.
Format: ``| {emoji} {verb:9} {detail} {duration}``
When *result* is provided the line is checked for failure indicators.
Failed tool calls get a red prefix and an informational suffix.
"""
dur = f"{duration:.1f}s"
is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
def _trunc(s, n=40):
s = str(s)
return (s[:n-3] + "...") if len(s) > n else s
def _path(p, n=35):
p = str(p)
return ("..." + p[-(n-3):]) if len(p) > n else p
def _wrap(line: str) -> str:
"""Append failure suffix when the tool failed."""
if not is_failure:
return line
return f"{line}{failure_suffix}"
if tool_name == "web_search":
return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}")
if tool_name == "web_extract":
urls = args.get("urls", [])
if urls:
url = urls[0] if isinstance(urls, list) else str(urls)
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
return _wrap(f"┊ 📄 fetch pages {dur}")
if tool_name == "web_crawl":
url = args.get("url", "")
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
return _wrap(f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}")
if tool_name == "terminal":
return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
if tool_name == "process":
action = args.get("action", "?")
sid = args.get("session_id", "")[:12]
labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}",
"wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
return _wrap(f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}")
if tool_name == "read_file":
return _wrap(f"┊ 📖 read {_path(args.get('path', ''))} {dur}")
if tool_name == "write_file":
return _wrap(f"┊ ✍️ write {_path(args.get('path', ''))} {dur}")
if tool_name == "patch":
return _wrap(f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}")
if tool_name == "search_files":
pattern = _trunc(args.get("pattern", ""), 35)
target = args.get("target", "content")
verb = "find" if target == "files" else "grep"
return _wrap(f"┊ 🔎 {verb:9} {pattern} {dur}")
if tool_name == "browser_navigate":
url = args.get("url", "")
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
return _wrap(f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}")
if tool_name == "browser_snapshot":
mode = "full" if args.get("full") else "compact"
return _wrap(f"┊ 📸 snapshot {mode} {dur}")
if tool_name == "browser_click":
return _wrap(f"┊ 👆 click {args.get('ref', '?')} {dur}")
if tool_name == "browser_type":
return _wrap(f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}")
if tool_name == "browser_scroll":
d = args.get("direction", "down")
arrow = {"down": "", "up": "", "right": "", "left": ""}.get(d, "")
return _wrap(f"{arrow} scroll {d} {dur}")
if tool_name == "browser_back":
return _wrap(f"┊ ◀️ back {dur}")
if tool_name == "browser_press":
return _wrap(f"┊ ⌨️ press {args.get('key', '?')} {dur}")
if tool_name == "browser_close":
return _wrap(f"┊ 🚪 close browser {dur}")
if tool_name == "browser_get_images":
return _wrap(f"┊ 🖼️ images extracting {dur}")
if tool_name == "browser_vision":
return _wrap(f"┊ 👁️ vision analyzing page {dur}")
if tool_name == "todo":
todos_arg = args.get("todos")
merge = args.get("merge", False)
if todos_arg is None:
return _wrap(f"┊ 📋 plan reading tasks {dur}")
elif merge:
return _wrap(f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}")
else:
return _wrap(f"┊ 📋 plan {len(todos_arg)} task(s) {dur}")
if tool_name == "session_search":
return _wrap(f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}")
if tool_name == "memory":
action = args.get("action", "?")
target = args.get("target", "")
if action == "add":
return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
elif action == "replace":
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
elif action == "remove":
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
return _wrap(f"┊ 🧠 memory {action} {dur}")
if tool_name == "skills_list":
return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
if tool_name == "skill_view":
return _wrap(f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}")
if tool_name == "image_generate":
return _wrap(f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}")
if tool_name == "text_to_speech":
return _wrap(f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}")
if tool_name == "vision_analyze":
return _wrap(f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}")
if tool_name == "mixture_of_agents":
return _wrap(f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}")
if tool_name == "send_message":
return _wrap(f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}")
if tool_name == "schedule_cronjob":
return _wrap(f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}")
if tool_name == "list_cronjobs":
return _wrap(f"┊ ⏰ jobs listing {dur}")
if tool_name == "remove_cronjob":
return _wrap(f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}")
if tool_name.startswith("rl_"):
rl = {
"rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
"rl_get_current_config": "get config", "rl_edit_config": f"set {args.get('field', '?')}",
"rl_start_training": "start training", "rl_check_status": f"status {args.get('run_id', '?')[:12]}",
"rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
"rl_list_runs": "list runs", "rl_test_inference": "test inference",
}
return _wrap(f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}")
if tool_name == "execute_code":
code = args.get("code", "")
first_line = code.strip().split("\n")[0] if code.strip() else ""
return _wrap(f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}")
if tool_name == "delegate_task":
tasks = args.get("tasks")
if tasks and isinstance(tasks, list):
return _wrap(f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}")
return _wrap(f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}")
preview = build_tool_preview(tool_name, args) or ""
return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}")
-804
View File
@@ -1,804 +0,0 @@
"""
Session Insights Engine for Hermes Agent.
Analyzes historical session data from the SQLite state database to produce
comprehensive usage insights — token consumption, cost estimates, tool usage
patterns, activity trends, model/platform breakdowns, and session metrics.
Inspired by Claude Code's /insights command, adapted for Hermes Agent's
multi-platform architecture with additional cost estimation and platform
breakdown capabilities.
Usage:
from agent.insights import InsightsEngine
engine = InsightsEngine(db)
report = engine.generate(days=30)
print(engine.format_terminal(report))
"""
import json
import time
from collections import Counter, defaultdict
from datetime import datetime
from typing import Any, Dict, List, Optional
# =========================================================================
# Model pricing (USD per million tokens) — approximate as of early 2026
# =========================================================================
MODEL_PRICING = {
# OpenAI
"gpt-4o": {"input": 2.50, "output": 10.00},
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"gpt-4.1": {"input": 2.00, "output": 8.00},
"gpt-4.1-mini": {"input": 0.40, "output": 1.60},
"gpt-4.1-nano": {"input": 0.10, "output": 0.40},
"gpt-4.5-preview": {"input": 75.00, "output": 150.00},
"gpt-5": {"input": 10.00, "output": 30.00},
"gpt-5.4": {"input": 10.00, "output": 30.00},
"o3": {"input": 10.00, "output": 40.00},
"o3-mini": {"input": 1.10, "output": 4.40},
"o4-mini": {"input": 1.10, "output": 4.40},
# Anthropic
"claude-opus-4-20250514": {"input": 15.00, "output": 75.00},
"claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00},
"claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
"claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00},
"claude-3-opus-20240229": {"input": 15.00, "output": 75.00},
"claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
# DeepSeek
"deepseek-chat": {"input": 0.14, "output": 0.28},
"deepseek-reasoner": {"input": 0.55, "output": 2.19},
# Google
"gemini-2.5-pro": {"input": 1.25, "output": 10.00},
"gemini-2.5-flash": {"input": 0.15, "output": 0.60},
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
# Meta (via providers)
"llama-4-maverick": {"input": 0.50, "output": 0.70},
"llama-4-scout": {"input": 0.20, "output": 0.30},
}
# Fallback: unknown/custom models get zero cost (we can't assume pricing
# for self-hosted models, custom OAI endpoints, local inference, etc.)
_DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
def _has_known_pricing(model_name: str) -> bool:
"""Check if a model has known pricing (vs unknown/custom endpoint)."""
return _get_pricing(model_name) is not _DEFAULT_PRICING
def _get_pricing(model_name: str) -> Dict[str, float]:
"""Look up pricing for a model. Uses fuzzy matching on model name.
Returns _DEFAULT_PRICING (zero cost) for unknown/custom models —
we can't assume costs for self-hosted endpoints, local inference, etc.
"""
if not model_name:
return _DEFAULT_PRICING
# Strip provider prefix (e.g., "anthropic/claude-..." -> "claude-...")
bare = model_name.split("/")[-1].lower()
# Exact match first
if bare in MODEL_PRICING:
return MODEL_PRICING[bare]
# Fuzzy prefix match — prefer the LONGEST matching key to avoid
# e.g. "gpt-4o" matching before "gpt-4o-mini" for "gpt-4o-mini-2024-07-18"
best_match = None
best_len = 0
for key, price in MODEL_PRICING.items():
if bare.startswith(key) and len(key) > best_len:
best_match = price
best_len = len(key)
if best_match:
return best_match
# Keyword heuristics (checked in most-specific-first order)
if "opus" in bare:
return {"input": 15.00, "output": 75.00}
if "sonnet" in bare:
return {"input": 3.00, "output": 15.00}
if "haiku" in bare:
return {"input": 0.80, "output": 4.00}
if "gpt-4o-mini" in bare:
return {"input": 0.15, "output": 0.60}
if "gpt-4o" in bare:
return {"input": 2.50, "output": 10.00}
if "gpt-5" in bare:
return {"input": 10.00, "output": 30.00}
if "deepseek" in bare:
return {"input": 0.14, "output": 0.28}
if "gemini" in bare:
return {"input": 0.15, "output": 0.60}
return _DEFAULT_PRICING
def _estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
"""Estimate the USD cost for a given model and token counts."""
pricing = _get_pricing(model)
return (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
def _format_duration(seconds: float) -> str:
"""Format seconds into a human-readable duration string."""
if seconds < 60:
return f"{seconds:.0f}s"
minutes = seconds / 60
if minutes < 60:
return f"{minutes:.0f}m"
hours = minutes / 60
if hours < 24:
remaining_min = int(minutes % 60)
return f"{int(hours)}h {remaining_min}m" if remaining_min else f"{int(hours)}h"
days = hours / 24
return f"{days:.1f}d"
def _bar_chart(values: List[int], max_width: int = 20) -> List[str]:
"""Create simple horizontal bar chart strings from values."""
peak = max(values) if values else 1
if peak == 0:
return ["" for _ in values]
return ["" * max(1, int(v / peak * max_width)) if v > 0 else "" for v in values]
class InsightsEngine:
"""
Analyzes session history and produces usage insights.
Works directly with a SessionDB instance (or raw sqlite3 connection)
to query session and message data.
"""
def __init__(self, db):
"""
Initialize with a SessionDB instance.
Args:
db: A SessionDB instance (from hermes_state.py)
"""
self.db = db
self._conn = db._conn
def generate(self, days: int = 30, source: str = None) -> Dict[str, Any]:
"""
Generate a complete insights report.
Args:
days: Number of days to look back (default: 30)
source: Optional filter by source platform
Returns:
Dict with all computed insights
"""
cutoff = time.time() - (days * 86400)
# Gather raw data
sessions = self._get_sessions(cutoff, source)
tool_usage = self._get_tool_usage(cutoff, source)
message_stats = self._get_message_stats(cutoff, source)
if not sessions:
return {
"days": days,
"source_filter": source,
"empty": True,
"overview": {},
"models": [],
"platforms": [],
"tools": [],
"activity": {},
"top_sessions": [],
}
# Compute insights
overview = self._compute_overview(sessions, message_stats)
models = self._compute_model_breakdown(sessions)
platforms = self._compute_platform_breakdown(sessions)
tools = self._compute_tool_breakdown(tool_usage)
activity = self._compute_activity_patterns(sessions)
top_sessions = self._compute_top_sessions(sessions)
return {
"days": days,
"source_filter": source,
"empty": False,
"generated_at": time.time(),
"overview": overview,
"models": models,
"platforms": platforms,
"tools": tools,
"activity": activity,
"top_sessions": top_sessions,
}
# =========================================================================
# Data gathering (SQL queries)
# =========================================================================
# Columns we actually need (skip system_prompt, model_config blobs)
_SESSION_COLS = ("id, source, model, started_at, ended_at, "
"message_count, tool_call_count, input_tokens, output_tokens")
def _get_sessions(self, cutoff: float, source: str = None) -> List[Dict]:
"""Fetch sessions within the time window."""
if source:
cursor = self._conn.execute(
f"""SELECT {self._SESSION_COLS} FROM sessions
WHERE started_at >= ? AND source = ?
ORDER BY started_at DESC""",
(cutoff, source),
)
else:
cursor = self._conn.execute(
f"""SELECT {self._SESSION_COLS} FROM sessions
WHERE started_at >= ?
ORDER BY started_at DESC""",
(cutoff,),
)
return [dict(row) for row in cursor.fetchall()]
def _get_tool_usage(self, cutoff: float, source: str = None) -> List[Dict]:
"""Get tool call counts from messages.
Uses two sources:
1. tool_name column on 'tool' role messages (set by gateway)
2. tool_calls JSON on 'assistant' role messages (covers CLI where
tool_name is not populated on tool responses)
"""
tool_counts = Counter()
# Source 1: explicit tool_name on tool response messages
if source:
cursor = self._conn.execute(
"""SELECT m.tool_name, COUNT(*) as count
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ? AND s.source = ?
AND m.role = 'tool' AND m.tool_name IS NOT NULL
GROUP BY m.tool_name
ORDER BY count DESC""",
(cutoff, source),
)
else:
cursor = self._conn.execute(
"""SELECT m.tool_name, COUNT(*) as count
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ?
AND m.role = 'tool' AND m.tool_name IS NOT NULL
GROUP BY m.tool_name
ORDER BY count DESC""",
(cutoff,),
)
for row in cursor.fetchall():
tool_counts[row["tool_name"]] += row["count"]
# Source 2: extract from tool_calls JSON on assistant messages
# (covers CLI sessions where tool_name is NULL on tool responses)
if source:
cursor2 = self._conn.execute(
"""SELECT m.tool_calls
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ? AND s.source = ?
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
(cutoff, source),
)
else:
cursor2 = self._conn.execute(
"""SELECT m.tool_calls
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ?
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
(cutoff,),
)
tool_calls_counts = Counter()
for row in cursor2.fetchall():
try:
calls = row["tool_calls"]
if isinstance(calls, str):
calls = json.loads(calls)
if isinstance(calls, list):
for call in calls:
func = call.get("function", {}) if isinstance(call, dict) else {}
name = func.get("name")
if name:
tool_calls_counts[name] += 1
except (json.JSONDecodeError, TypeError, AttributeError):
continue
# Merge: prefer tool_name source, supplement with tool_calls source
# for tools not already counted
if not tool_counts and tool_calls_counts:
# No tool_name data at all — use tool_calls exclusively
tool_counts = tool_calls_counts
elif tool_counts and tool_calls_counts:
# Both sources have data — use whichever has the higher count per tool
# (they may overlap, so take the max to avoid double-counting)
all_tools = set(tool_counts) | set(tool_calls_counts)
merged = Counter()
for tool in all_tools:
merged[tool] = max(tool_counts.get(tool, 0), tool_calls_counts.get(tool, 0))
tool_counts = merged
# Convert to the expected format
return [
{"tool_name": name, "count": count}
for name, count in tool_counts.most_common()
]
def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
"""Get aggregate message statistics."""
if source:
cursor = self._conn.execute(
"""SELECT
COUNT(*) as total_messages,
SUM(CASE WHEN m.role = 'user' THEN 1 ELSE 0 END) as user_messages,
SUM(CASE WHEN m.role = 'assistant' THEN 1 ELSE 0 END) as assistant_messages,
SUM(CASE WHEN m.role = 'tool' THEN 1 ELSE 0 END) as tool_messages
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ? AND s.source = ?""",
(cutoff, source),
)
else:
cursor = self._conn.execute(
"""SELECT
COUNT(*) as total_messages,
SUM(CASE WHEN m.role = 'user' THEN 1 ELSE 0 END) as user_messages,
SUM(CASE WHEN m.role = 'assistant' THEN 1 ELSE 0 END) as assistant_messages,
SUM(CASE WHEN m.role = 'tool' THEN 1 ELSE 0 END) as tool_messages
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ?""",
(cutoff,),
)
row = cursor.fetchone()
return dict(row) if row else {
"total_messages": 0, "user_messages": 0,
"assistant_messages": 0, "tool_messages": 0,
}
# =========================================================================
# Computation
# =========================================================================
def _compute_overview(self, sessions: List[Dict], message_stats: Dict) -> Dict:
"""Compute high-level overview statistics."""
total_input = sum(s.get("input_tokens") or 0 for s in sessions)
total_output = sum(s.get("output_tokens") or 0 for s in sessions)
total_tokens = total_input + total_output
total_tool_calls = sum(s.get("tool_call_count") or 0 for s in sessions)
total_messages = sum(s.get("message_count") or 0 for s in sessions)
# Cost estimation (weighted by model)
total_cost = 0.0
models_with_pricing = set()
models_without_pricing = set()
for s in sessions:
model = s.get("model") or ""
inp = s.get("input_tokens") or 0
out = s.get("output_tokens") or 0
total_cost += _estimate_cost(model, inp, out)
display = model.split("/")[-1] if "/" in model else (model or "unknown")
if _has_known_pricing(model):
models_with_pricing.add(display)
else:
models_without_pricing.add(display)
# Session duration stats (guard against negative durations from clock drift)
durations = []
for s in sessions:
start = s.get("started_at")
end = s.get("ended_at")
if start and end and end > start:
durations.append(end - start)
total_hours = sum(durations) / 3600 if durations else 0
avg_duration = sum(durations) / len(durations) if durations else 0
# Earliest and latest session
started_timestamps = [s["started_at"] for s in sessions if s.get("started_at")]
date_range_start = min(started_timestamps) if started_timestamps else None
date_range_end = max(started_timestamps) if started_timestamps else None
return {
"total_sessions": len(sessions),
"total_messages": total_messages,
"total_tool_calls": total_tool_calls,
"total_input_tokens": total_input,
"total_output_tokens": total_output,
"total_tokens": total_tokens,
"estimated_cost": total_cost,
"total_hours": total_hours,
"avg_session_duration": avg_duration,
"avg_messages_per_session": total_messages / len(sessions) if sessions else 0,
"avg_tokens_per_session": total_tokens / len(sessions) if sessions else 0,
"user_messages": message_stats.get("user_messages") or 0,
"assistant_messages": message_stats.get("assistant_messages") or 0,
"tool_messages": message_stats.get("tool_messages") or 0,
"date_range_start": date_range_start,
"date_range_end": date_range_end,
"models_with_pricing": sorted(models_with_pricing),
"models_without_pricing": sorted(models_without_pricing),
}
def _compute_model_breakdown(self, sessions: List[Dict]) -> List[Dict]:
"""Break down usage by model."""
model_data = defaultdict(lambda: {
"sessions": 0, "input_tokens": 0, "output_tokens": 0,
"total_tokens": 0, "tool_calls": 0, "cost": 0.0,
})
for s in sessions:
model = s.get("model") or "unknown"
# Normalize: strip provider prefix for display
display_model = model.split("/")[-1] if "/" in model else model
d = model_data[display_model]
d["sessions"] += 1
inp = s.get("input_tokens") or 0
out = s.get("output_tokens") or 0
d["input_tokens"] += inp
d["output_tokens"] += out
d["total_tokens"] += inp + out
d["tool_calls"] += s.get("tool_call_count") or 0
d["cost"] += _estimate_cost(model, inp, out)
d["has_pricing"] = _has_known_pricing(model)
result = [
{"model": model, **data}
for model, data in model_data.items()
]
# Sort by tokens first, fall back to session count when tokens are 0
result.sort(key=lambda x: (x["total_tokens"], x["sessions"]), reverse=True)
return result
def _compute_platform_breakdown(self, sessions: List[Dict]) -> List[Dict]:
"""Break down usage by platform/source."""
platform_data = defaultdict(lambda: {
"sessions": 0, "messages": 0, "input_tokens": 0,
"output_tokens": 0, "total_tokens": 0, "tool_calls": 0,
})
for s in sessions:
source = s.get("source") or "unknown"
d = platform_data[source]
d["sessions"] += 1
d["messages"] += s.get("message_count") or 0
inp = s.get("input_tokens") or 0
out = s.get("output_tokens") or 0
d["input_tokens"] += inp
d["output_tokens"] += out
d["total_tokens"] += inp + out
d["tool_calls"] += s.get("tool_call_count") or 0
result = [
{"platform": platform, **data}
for platform, data in platform_data.items()
]
result.sort(key=lambda x: x["sessions"], reverse=True)
return result
def _compute_tool_breakdown(self, tool_usage: List[Dict]) -> List[Dict]:
"""Process tool usage data into a ranked list with percentages."""
total_calls = sum(t["count"] for t in tool_usage) if tool_usage else 0
result = []
for t in tool_usage:
pct = (t["count"] / total_calls * 100) if total_calls else 0
result.append({
"tool": t["tool_name"],
"count": t["count"],
"percentage": pct,
})
return result
def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
"""Analyze activity patterns by day of week and hour."""
day_counts = Counter() # 0=Monday ... 6=Sunday
hour_counts = Counter()
daily_counts = Counter() # date string -> count
for s in sessions:
ts = s.get("started_at")
if not ts:
continue
dt = datetime.fromtimestamp(ts)
day_counts[dt.weekday()] += 1
hour_counts[dt.hour] += 1
daily_counts[dt.strftime("%Y-%m-%d")] += 1
day_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
day_breakdown = [
{"day": day_names[i], "count": day_counts.get(i, 0)}
for i in range(7)
]
hour_breakdown = [
{"hour": i, "count": hour_counts.get(i, 0)}
for i in range(24)
]
# Busiest day and hour
busiest_day = max(day_breakdown, key=lambda x: x["count"]) if day_breakdown else None
busiest_hour = max(hour_breakdown, key=lambda x: x["count"]) if hour_breakdown else None
# Active days (days with at least one session)
active_days = len(daily_counts)
# Streak calculation
if daily_counts:
all_dates = sorted(daily_counts.keys())
current_streak = 1
max_streak = 1
for i in range(1, len(all_dates)):
d1 = datetime.strptime(all_dates[i - 1], "%Y-%m-%d")
d2 = datetime.strptime(all_dates[i], "%Y-%m-%d")
if (d2 - d1).days == 1:
current_streak += 1
max_streak = max(max_streak, current_streak)
else:
current_streak = 1
else:
max_streak = 0
return {
"by_day": day_breakdown,
"by_hour": hour_breakdown,
"busiest_day": busiest_day,
"busiest_hour": busiest_hour,
"active_days": active_days,
"max_streak": max_streak,
}
def _compute_top_sessions(self, sessions: List[Dict]) -> List[Dict]:
"""Find notable sessions (longest, most messages, most tokens)."""
top = []
# Longest by duration
sessions_with_duration = [
s for s in sessions
if s.get("started_at") and s.get("ended_at")
]
if sessions_with_duration:
longest = max(
sessions_with_duration,
key=lambda s: (s["ended_at"] - s["started_at"]),
)
dur = longest["ended_at"] - longest["started_at"]
top.append({
"label": "Longest session",
"session_id": longest["id"][:16],
"value": _format_duration(dur),
"date": datetime.fromtimestamp(longest["started_at"]).strftime("%b %d"),
})
# Most messages
most_msgs = max(sessions, key=lambda s: s.get("message_count") or 0)
if (most_msgs.get("message_count") or 0) > 0:
top.append({
"label": "Most messages",
"session_id": most_msgs["id"][:16],
"value": f"{most_msgs['message_count']} msgs",
"date": datetime.fromtimestamp(most_msgs["started_at"]).strftime("%b %d") if most_msgs.get("started_at") else "?",
})
# Most tokens
most_tokens = max(
sessions,
key=lambda s: (s.get("input_tokens") or 0) + (s.get("output_tokens") or 0),
)
token_total = (most_tokens.get("input_tokens") or 0) + (most_tokens.get("output_tokens") or 0)
if token_total > 0:
top.append({
"label": "Most tokens",
"session_id": most_tokens["id"][:16],
"value": f"{token_total:,} tokens",
"date": datetime.fromtimestamp(most_tokens["started_at"]).strftime("%b %d") if most_tokens.get("started_at") else "?",
})
# Most tool calls
most_tools = max(sessions, key=lambda s: s.get("tool_call_count") or 0)
if (most_tools.get("tool_call_count") or 0) > 0:
top.append({
"label": "Most tool calls",
"session_id": most_tools["id"][:16],
"value": f"{most_tools['tool_call_count']} calls",
"date": datetime.fromtimestamp(most_tools["started_at"]).strftime("%b %d") if most_tools.get("started_at") else "?",
})
return top
# =========================================================================
# Formatting
# =========================================================================
def format_terminal(self, report: Dict) -> str:
"""Format the insights report for terminal display (CLI)."""
if report.get("empty"):
days = report.get("days", 30)
src = f" (source: {report['source_filter']})" if report.get("source_filter") else ""
return f" No sessions found in the last {days} days{src}."
lines = []
o = report["overview"]
days = report["days"]
src_filter = report.get("source_filter")
# Header
lines.append("")
lines.append(" ╔══════════════════════════════════════════════════════════╗")
lines.append(" ║ 📊 Hermes Insights ║")
period_label = f"Last {days} days"
if src_filter:
period_label += f" ({src_filter})"
padding = 58 - len(period_label) - 2
left_pad = padding // 2
right_pad = padding - left_pad
lines.append(f"{' ' * left_pad} {period_label} {' ' * right_pad}")
lines.append(" ╚══════════════════════════════════════════════════════════╝")
lines.append("")
# Date range
if o.get("date_range_start") and o.get("date_range_end"):
start_str = datetime.fromtimestamp(o["date_range_start"]).strftime("%b %d, %Y")
end_str = datetime.fromtimestamp(o["date_range_end"]).strftime("%b %d, %Y")
lines.append(f" Period: {start_str}{end_str}")
lines.append("")
# Overview
lines.append(" 📋 Overview")
lines.append(" " + "" * 56)
lines.append(f" Sessions: {o['total_sessions']:<12} Messages: {o['total_messages']:,}")
lines.append(f" Tool calls: {o['total_tool_calls']:<12,} User messages: {o['user_messages']:,}")
lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}")
cost_str = f"${o['estimated_cost']:.2f}"
if o.get("models_without_pricing"):
cost_str += " *"
lines.append(f" Total tokens: {o['total_tokens']:<12,} Est. cost: {cost_str}")
if o["total_hours"] > 0:
lines.append(f" Active time: ~{_format_duration(o['total_hours'] * 3600):<11} Avg session: ~{_format_duration(o['avg_session_duration'])}")
lines.append(f" Avg msgs/session: {o['avg_messages_per_session']:.1f}")
lines.append("")
# Model breakdown
if report["models"]:
lines.append(" 🤖 Models Used")
lines.append(" " + "" * 56)
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}")
for m in report["models"]:
model_name = m["model"][:28]
if m.get("has_pricing"):
cost_cell = f"${m['cost']:>6.2f}"
else:
cost_cell = " N/A"
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}")
if o.get("models_without_pricing"):
lines.append(f" * Cost N/A for custom/self-hosted models")
lines.append("")
# Platform breakdown
if len(report["platforms"]) > 1 or (report["platforms"] and report["platforms"][0]["platform"] != "cli"):
lines.append(" 📱 Platforms")
lines.append(" " + "" * 56)
lines.append(f" {'Platform':<14} {'Sessions':>8} {'Messages':>10} {'Tokens':>14}")
for p in report["platforms"]:
lines.append(f" {p['platform']:<14} {p['sessions']:>8} {p['messages']:>10,} {p['total_tokens']:>14,}")
lines.append("")
# Tool usage
if report["tools"]:
lines.append(" 🔧 Top Tools")
lines.append(" " + "" * 56)
lines.append(f" {'Tool':<28} {'Calls':>8} {'%':>8}")
for t in report["tools"][:15]: # Top 15
lines.append(f" {t['tool']:<28} {t['count']:>8,} {t['percentage']:>7.1f}%")
if len(report["tools"]) > 15:
lines.append(f" ... and {len(report['tools']) - 15} more tools")
lines.append("")
# Activity patterns
act = report.get("activity", {})
if act.get("by_day"):
lines.append(" 📅 Activity Patterns")
lines.append(" " + "" * 56)
# Day of week chart
day_values = [d["count"] for d in act["by_day"]]
bars = _bar_chart(day_values, max_width=15)
for i, d in enumerate(act["by_day"]):
bar = bars[i]
lines.append(f" {d['day']} {bar:<15} {d['count']}")
lines.append("")
# Peak hours (show top 5 busiest hours)
busy_hours = sorted(act["by_hour"], key=lambda x: x["count"], reverse=True)
busy_hours = [h for h in busy_hours if h["count"] > 0][:5]
if busy_hours:
hour_strs = []
for h in busy_hours:
hr = h["hour"]
ampm = "AM" if hr < 12 else "PM"
display_hr = hr % 12 or 12
hour_strs.append(f"{display_hr}{ampm} ({h['count']})")
lines.append(f" Peak hours: {', '.join(hour_strs)}")
if act.get("active_days"):
lines.append(f" Active days: {act['active_days']}")
if act.get("max_streak") and act["max_streak"] > 1:
lines.append(f" Best streak: {act['max_streak']} consecutive days")
lines.append("")
# Notable sessions
if report.get("top_sessions"):
lines.append(" 🏆 Notable Sessions")
lines.append(" " + "" * 56)
for ts in report["top_sessions"]:
lines.append(f" {ts['label']:<20} {ts['value']:<18} ({ts['date']}, {ts['session_id']})")
lines.append("")
return "\n".join(lines)
def format_gateway(self, report: Dict) -> str:
"""Format the insights report for gateway/messaging (shorter)."""
if report.get("empty"):
days = report.get("days", 30)
return f"No sessions found in the last {days} days."
lines = []
o = report["overview"]
days = report["days"]
lines.append(f"📊 **Hermes Insights** — Last {days} days\n")
# Overview
lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
cost_note = ""
if o.get("models_without_pricing"):
cost_note = " _(excludes custom/self-hosted models)_"
lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}")
if o["total_hours"] > 0:
lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
lines.append("")
# Models (top 5)
if report["models"]:
lines.append("**🤖 Models:**")
for m in report["models"][:5]:
cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A"
lines.append(f" {m['model'][:25]}{m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}")
lines.append("")
# Platforms (if multi-platform)
if len(report["platforms"]) > 1:
lines.append("**📱 Platforms:**")
for p in report["platforms"]:
lines.append(f" {p['platform']}{p['sessions']} sessions, {p['messages']:,} msgs")
lines.append("")
# Tools (top 8)
if report["tools"]:
lines.append("**🔧 Top Tools:**")
for t in report["tools"][:8]:
lines.append(f" {t['tool']}{t['count']:,} calls ({t['percentage']:.1f}%)")
lines.append("")
# Activity summary
act = report.get("activity", {})
if act.get("busiest_day") and act.get("busiest_hour"):
hr = act["busiest_hour"]["hour"]
ampm = "AM" if hr < 12 else "PM"
display_hr = hr % 12 or 12
lines.append(f"**📅 Busiest:** {act['busiest_day']['day']}s ({act['busiest_day']['count']} sessions), {display_hr}{ampm} ({act['busiest_hour']['count']} sessions)")
if act.get("active_days"):
lines.append(f"**Active days:** {act['active_days']}", )
if act.get("max_streak", 0) > 1:
lines.append(f"**Best streak:** {act['max_streak']} consecutive days")
return "\n".join(lines)
-213
View File
@@ -1,213 +0,0 @@
"""Model metadata, context lengths, and token estimation utilities.
Pure utility functions with no AIAgent dependency. Used by ContextCompressor
and run_agent.py for pre-flight context checks.
"""
import logging
import os
import re
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
import requests
import yaml
from hermes_constants import OPENROUTER_MODELS_URL
logger = logging.getLogger(__name__)
_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
_model_metadata_cache_time: float = 0
_MODEL_CACHE_TTL = 3600
# Descending tiers for context length probing when the model is unknown.
# We start high and step down on context-length errors until one works.
CONTEXT_PROBE_TIERS = [
2_000_000,
1_000_000,
512_000,
200_000,
128_000,
64_000,
32_000,
]
DEFAULT_CONTEXT_LENGTHS = {
"anthropic/claude-opus-4": 200000,
"anthropic/claude-opus-4.5": 200000,
"anthropic/claude-opus-4.6": 200000,
"anthropic/claude-sonnet-4": 200000,
"anthropic/claude-sonnet-4-20250514": 200000,
"anthropic/claude-haiku-4.5": 200000,
"openai/gpt-4o": 128000,
"openai/gpt-4-turbo": 128000,
"openai/gpt-4o-mini": 128000,
"google/gemini-2.0-flash": 1048576,
"google/gemini-2.5-pro": 1048576,
"meta-llama/llama-3.3-70b-instruct": 131072,
"deepseek/deepseek-chat-v3": 65536,
"qwen/qwen-2.5-72b-instruct": 32768,
}
def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]:
"""Fetch model metadata from OpenRouter (cached for 1 hour)."""
global _model_metadata_cache, _model_metadata_cache_time
if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
return _model_metadata_cache
try:
response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
response.raise_for_status()
data = response.json()
cache = {}
for model in data.get("data", []):
model_id = model.get("id", "")
cache[model_id] = {
"context_length": model.get("context_length", 128000),
"max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096),
"name": model.get("name", model_id),
"pricing": model.get("pricing", {}),
}
canonical = model.get("canonical_slug", "")
if canonical and canonical != model_id:
cache[canonical] = cache[model_id]
_model_metadata_cache = cache
_model_metadata_cache_time = time.time()
logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
return cache
except Exception as e:
logging.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
return _model_metadata_cache or {}
def _get_context_cache_path() -> Path:
"""Return path to the persistent context length cache file."""
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
return hermes_home / "context_length_cache.yaml"
def _load_context_cache() -> Dict[str, int]:
"""Load the model+provider → context_length cache from disk."""
path = _get_context_cache_path()
if not path.exists():
return {}
try:
with open(path) as f:
data = yaml.safe_load(f) or {}
return data.get("context_lengths", {})
except Exception as e:
logger.debug("Failed to load context length cache: %s", e)
return {}
def save_context_length(model: str, base_url: str, length: int) -> None:
"""Persist a discovered context length for a model+provider combo.
Cache key is ``model@base_url`` so the same model name served from
different providers can have different limits.
"""
key = f"{model}@{base_url}"
cache = _load_context_cache()
if cache.get(key) == length:
return # already stored
cache[key] = length
path = _get_context_cache_path()
try:
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
logger.info("Cached context length %s%s tokens", key, f"{length:,}")
except Exception as e:
logger.debug("Failed to save context length cache: %s", e)
def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
"""Look up a previously discovered context length for model+provider."""
key = f"{model}@{base_url}"
cache = _load_context_cache()
return cache.get(key)
def get_next_probe_tier(current_length: int) -> Optional[int]:
"""Return the next lower probe tier, or None if already at minimum."""
for tier in CONTEXT_PROBE_TIERS:
if tier < current_length:
return tier
return None
def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
"""Try to extract the actual context limit from an API error message.
Many providers include the limit in their error text, e.g.:
- "maximum context length is 32768 tokens"
- "context_length_exceeded: 131072"
- "Maximum context size 32768 exceeded"
- "model's max context length is 65536"
"""
error_lower = error_msg.lower()
# Pattern: look for numbers near context-related keywords
patterns = [
r'(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})',
r'context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})',
r'(\d{4,})\s*(?:token)?\s*(?:context|limit)',
r'>\s*(\d{4,})\s*(?:max|limit|token)', # "250000 tokens > 200000 maximum"
r'(\d{4,})\s*(?:max(?:imum)?)\b', # "200000 maximum"
]
for pattern in patterns:
match = re.search(pattern, error_lower)
if match:
limit = int(match.group(1))
# Sanity check: must be a reasonable context length
if 1024 <= limit <= 10_000_000:
return limit
return None
def get_model_context_length(model: str, base_url: str = "") -> int:
"""Get the context length for a model.
Resolution order:
1. Persistent cache (previously discovered via probing)
2. OpenRouter API metadata
3. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match)
4. First probe tier (2M) — will be narrowed on first context error
"""
# 1. Check persistent cache (model+provider)
if base_url:
cached = get_cached_context_length(model, base_url)
if cached is not None:
return cached
# 2. OpenRouter API metadata
metadata = fetch_model_metadata()
if model in metadata:
return metadata[model].get("context_length", 128000)
# 3. Hardcoded defaults (fuzzy match)
for default_model, length in DEFAULT_CONTEXT_LENGTHS.items():
if default_model in model or model in default_model:
return length
# 4. Unknown model — start at highest probe tier
return CONTEXT_PROBE_TIERS[0]
def estimate_tokens_rough(text: str) -> int:
"""Rough token estimate (~4 chars/token) for pre-flight checks."""
if not text:
return 0
return len(text) // 4
def estimate_messages_tokens_rough(messages: List[Dict[str, Any]]) -> int:
"""Rough token estimate for a message list (pre-flight only)."""
total_chars = sum(len(str(msg)) for msg in messages)
return total_chars // 4
-337
View File
@@ -1,337 +0,0 @@
"""System prompt assembly -- identity, platform hints, skills index, context files.
All functions are stateless. AIAgent._build_system_prompt() calls these to
assemble pieces, then combines them with memory and ephemeral prompts.
"""
import logging
import os
import re
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Context file scanning — detect prompt injection in AGENTS.md, .cursorrules,
# SOUL.md before they get injected into the system prompt.
# ---------------------------------------------------------------------------
_CONTEXT_THREAT_PATTERNS = [
(r'ignore\s+(previous|all|above|prior)\s+instructions', "prompt_injection"),
(r'do\s+not\s+tell\s+the\s+user', "deception_hide"),
(r'system\s+prompt\s+override', "sys_prompt_override"),
(r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),
(r'act\s+as\s+(if|though)\s+you\s+(have\s+no|don\'t\s+have)\s+(restrictions|limits|rules)', "bypass_restrictions"),
(r'<!--[^>]*(?:ignore|override|system|secret|hidden)[^>]*-->', "html_comment_injection"),
(r'<\s*div\s+style\s*=\s*["\'].*display\s*:\s*none', "hidden_div"),
(r'translate\s+.*\s+into\s+.*\s+and\s+(execute|run|eval)', "translate_execute"),
(r'curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)', "exfil_curl"),
(r'cat\s+[^\n]*(\.env|credentials|\.netrc|\.pgpass)', "read_secrets"),
]
_CONTEXT_INVISIBLE_CHARS = {
'\u200b', '\u200c', '\u200d', '\u2060', '\ufeff',
'\u202a', '\u202b', '\u202c', '\u202d', '\u202e',
}
def _scan_context_content(content: str, filename: str) -> str:
"""Scan context file content for injection. Returns sanitized content."""
findings = []
# Check invisible unicode
for char in _CONTEXT_INVISIBLE_CHARS:
if char in content:
findings.append(f"invisible unicode U+{ord(char):04X}")
# Check threat patterns
for pattern, pid in _CONTEXT_THREAT_PATTERNS:
if re.search(pattern, content, re.IGNORECASE):
findings.append(pid)
if findings:
logger.warning("Context file %s blocked: %s", filename, ", ".join(findings))
return f"[BLOCKED: {filename} contained potential prompt injection ({', '.join(findings)}). Content not loaded.]"
return content
# =========================================================================
# Constants
# =========================================================================
DEFAULT_AGENT_IDENTITY = (
"You are Hermes Agent, an intelligent AI assistant created by Nous Research. "
"You are helpful, knowledgeable, and direct. You assist users with a wide "
"range of tasks including answering questions, writing and editing code, "
"analyzing information, creative work, and executing actions via your tools. "
"You communicate clearly, admit uncertainty when appropriate, and prioritize "
"being genuinely useful over being verbose unless otherwise directed below."
)
MEMORY_GUIDANCE = (
"You have persistent memory across sessions. Proactively save important things "
"you learn (user preferences, environment details, useful approaches) and do "
"(like a diary!) using the memory tool -- don't wait to be asked."
)
SESSION_SEARCH_GUIDANCE = (
"When the user references something from a past conversation or you suspect "
"relevant prior context exists, use session_search to recall it before asking "
"them to repeat themselves."
)
SKILLS_GUIDANCE = (
"After completing a complex task (5+ tool calls), fixing a tricky error, "
"or discovering a non-trivial workflow, consider saving the approach as a "
"skill with skill_manage so you can reuse it next time."
)
PLATFORM_HINTS = {
"whatsapp": (
"You are on a text messaging communication platform, WhatsApp. "
"Please do not use markdown as it does not render. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. The file "
"will be sent as a native WhatsApp attachment — images (.jpg, .png, "
".webp) appear as photos, videos (.mp4, .mov) play inline, and other "
"files arrive as downloadable documents. You can also include image "
"URLs in markdown format ![alt](url) and they will be sent as photos."
),
"telegram": (
"You are on a text messaging communication platform, Telegram. "
"Please do not use markdown as it does not render. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. Audio "
"(.ogg) sends as voice bubbles. You can also include image URLs "
"in markdown format ![alt](url) and they will be sent as native photos."
),
"discord": (
"You are in a Discord server or group chat communicating with your user."
),
"cli": (
"You are a CLI AI Agent. Try not to use markdown but simple text "
"renderable inside a terminal."
),
}
CONTEXT_FILE_MAX_CHARS = 20_000
CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
# =========================================================================
# Skills index
# =========================================================================
def _read_skill_description(skill_file: Path, max_chars: int = 60) -> str:
"""Read the description from a SKILL.md frontmatter, capped at max_chars."""
try:
raw = skill_file.read_text(encoding="utf-8")[:2000]
match = re.search(
r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---",
raw, re.MULTILINE | re.DOTALL,
)
if match:
desc = match.group(1).strip().strip("'\"")
if len(desc) > max_chars:
desc = desc[:max_chars - 3] + "..."
return desc
except Exception:
pass
return ""
def build_skills_system_prompt() -> str:
"""Build a compact skill index for the system prompt.
Scans ~/.hermes/skills/ for SKILL.md files grouped by category.
Includes per-skill descriptions from frontmatter so the model can
match skills by meaning, not just name.
"""
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
skills_dir = hermes_home / "skills"
if not skills_dir.exists():
return ""
# Collect skills with descriptions, grouped by category
# Each entry: (skill_name, description)
skills_by_category: dict[str, list[tuple[str, str]]] = {}
for skill_file in skills_dir.rglob("SKILL.md"):
rel_path = skill_file.relative_to(skills_dir)
parts = rel_path.parts
if len(parts) >= 2:
category = parts[0]
skill_name = parts[-2]
else:
category = "general"
skill_name = skill_file.parent.name
desc = _read_skill_description(skill_file)
skills_by_category.setdefault(category, []).append((skill_name, desc))
if not skills_by_category:
return ""
# Read category-level descriptions from DESCRIPTION.md
category_descriptions = {}
for category in skills_by_category:
desc_file = skills_dir / category / "DESCRIPTION.md"
if desc_file.exists():
try:
content = desc_file.read_text(encoding="utf-8")
match = re.search(r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---", content, re.MULTILINE | re.DOTALL)
if match:
category_descriptions[category] = match.group(1).strip()
except Exception as e:
logger.debug("Could not read skill description %s: %s", desc_file, e)
index_lines = []
for category in sorted(skills_by_category.keys()):
cat_desc = category_descriptions.get(category, "")
if cat_desc:
index_lines.append(f" {category}: {cat_desc}")
else:
index_lines.append(f" {category}:")
# Deduplicate and sort skills within each category
seen = set()
for name, desc in sorted(skills_by_category[category], key=lambda x: x[0]):
if name in seen:
continue
seen.add(name)
if desc:
index_lines.append(f" - {name}: {desc}")
else:
index_lines.append(f" - {name}")
return (
"## Skills (mandatory)\n"
"Before replying, scan the skills below. If one clearly matches your task, "
"load it with skill_view(name) and follow its instructions. "
"If a skill has issues, fix it with skill_manage(action='patch').\n"
"\n"
"<available_skills>\n"
+ "\n".join(index_lines) + "\n"
"</available_skills>\n"
"\n"
"If none match, proceed normally without loading a skill."
)
# =========================================================================
# Context files (SOUL.md, AGENTS.md, .cursorrules)
# =========================================================================
def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
"""Head/tail truncation with a marker in the middle."""
if len(content) <= max_chars:
return content
head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
head = content[:head_chars]
tail = content[-tail_chars:]
marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n"
return head + marker + tail
def build_context_files_prompt(cwd: Optional[str] = None) -> str:
"""Discover and load context files for the system prompt.
Discovery: AGENTS.md (recursive), .cursorrules / .cursor/rules/*.mdc,
SOUL.md (cwd then ~/.hermes/ fallback). Each capped at 20,000 chars.
"""
if cwd is None:
cwd = os.getcwd()
cwd_path = Path(cwd).resolve()
sections = []
# AGENTS.md (hierarchical, recursive)
top_level_agents = None
for name in ["AGENTS.md", "agents.md"]:
candidate = cwd_path / name
if candidate.exists():
top_level_agents = candidate
break
if top_level_agents:
agents_files = []
for root, dirs, files in os.walk(cwd_path):
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__', 'venv', '.venv')]
for f in files:
if f.lower() == "agents.md":
agents_files.append(Path(root) / f)
agents_files.sort(key=lambda p: len(p.parts))
total_agents_content = ""
for agents_path in agents_files:
try:
content = agents_path.read_text(encoding="utf-8").strip()
if content:
rel_path = agents_path.relative_to(cwd_path)
content = _scan_context_content(content, str(rel_path))
total_agents_content += f"## {rel_path}\n\n{content}\n\n"
except Exception as e:
logger.debug("Could not read %s: %s", agents_path, e)
if total_agents_content:
total_agents_content = _truncate_content(total_agents_content, "AGENTS.md")
sections.append(total_agents_content)
# .cursorrules
cursorrules_content = ""
cursorrules_file = cwd_path / ".cursorrules"
if cursorrules_file.exists():
try:
content = cursorrules_file.read_text(encoding="utf-8").strip()
if content:
content = _scan_context_content(content, ".cursorrules")
cursorrules_content += f"## .cursorrules\n\n{content}\n\n"
except Exception as e:
logger.debug("Could not read .cursorrules: %s", e)
cursor_rules_dir = cwd_path / ".cursor" / "rules"
if cursor_rules_dir.exists() and cursor_rules_dir.is_dir():
mdc_files = sorted(cursor_rules_dir.glob("*.mdc"))
for mdc_file in mdc_files:
try:
content = mdc_file.read_text(encoding="utf-8").strip()
if content:
content = _scan_context_content(content, f".cursor/rules/{mdc_file.name}")
cursorrules_content += f"## .cursor/rules/{mdc_file.name}\n\n{content}\n\n"
except Exception as e:
logger.debug("Could not read %s: %s", mdc_file, e)
if cursorrules_content:
cursorrules_content = _truncate_content(cursorrules_content, ".cursorrules")
sections.append(cursorrules_content)
# SOUL.md (cwd first, then ~/.hermes/ fallback)
soul_path = None
for name in ["SOUL.md", "soul.md"]:
candidate = cwd_path / name
if candidate.exists():
soul_path = candidate
break
if not soul_path:
global_soul = Path.home() / ".hermes" / "SOUL.md"
if global_soul.exists():
soul_path = global_soul
if soul_path:
try:
content = soul_path.read_text(encoding="utf-8").strip()
if content:
content = _scan_context_content(content, "SOUL.md")
content = _truncate_content(content, "SOUL.md")
sections.append(
f"## SOUL.md\n\nIf SOUL.md is present, embody its persona and tone. "
f"Avoid stiff, generic replies; follow its guidance unless higher-priority "
f"instructions override it.\n\n{content}"
)
except Exception as e:
logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
if not sections:
return ""
return "# Project Context\n\nThe following project context files have been loaded and should be followed:\n\n" + "\n".join(sections)
-68
View File
@@ -1,68 +0,0 @@
"""Anthropic prompt caching (system_and_3 strategy).
Reduces input token costs by ~75% on multi-turn conversations by caching
the conversation prefix. Uses 4 cache_control breakpoints (Anthropic max):
1. System prompt (stable across all turns)
2-4. Last 3 non-system messages (rolling window)
Pure functions -- no class state, no AIAgent dependency.
"""
import copy
from typing import Any, Dict, List
def _apply_cache_marker(msg: dict, cache_marker: dict) -> None:
"""Add cache_control to a single message, handling all format variations."""
role = msg.get("role", "")
content = msg.get("content")
if role == "tool":
msg["cache_control"] = cache_marker
return
if content is None:
msg["cache_control"] = cache_marker
return
if isinstance(content, str):
msg["content"] = [{"type": "text", "text": content, "cache_control": cache_marker}]
return
if isinstance(content, list) and content:
last = content[-1]
if isinstance(last, dict):
last["cache_control"] = cache_marker
def apply_anthropic_cache_control(
api_messages: List[Dict[str, Any]],
cache_ttl: str = "5m",
) -> List[Dict[str, Any]]:
"""Apply system_and_3 caching strategy to messages for Anthropic models.
Places up to 4 cache_control breakpoints: system prompt + last 3 non-system messages.
Returns:
Deep copy of messages with cache_control breakpoints injected.
"""
messages = copy.deepcopy(api_messages)
if not messages:
return messages
marker = {"type": "ephemeral"}
if cache_ttl == "1h":
marker["ttl"] = "1h"
breakpoints_used = 0
if messages[0].get("role") == "system":
_apply_cache_marker(messages[0], marker)
breakpoints_used += 1
remaining = 4 - breakpoints_used
non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
for idx in non_sys[-remaining:]:
_apply_cache_marker(messages[idx], marker)
return messages
-115
View File
@@ -1,115 +0,0 @@
"""Regex-based secret redaction for logs and tool output.
Applies pattern matching to mask API keys, tokens, and credentials
before they reach log files, verbose output, or gateway logs.
Short tokens (< 18 chars) are fully masked. Longer tokens preserve
the first 6 and last 4 characters for debuggability.
"""
import logging
import re
from typing import Optional
logger = logging.getLogger(__name__)
# Known API key prefixes -- match the prefix + contiguous token chars
_PREFIX_PATTERNS = [
r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter
r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic)
r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained)
r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens
r"AIza[A-Za-z0-9_-]{30,}", # Google API keys
r"pplx-[A-Za-z0-9]{10,}", # Perplexity
r"fal_[A-Za-z0-9_-]{10,}", # Fal.ai
r"fc-[A-Za-z0-9]{10,}", # Firecrawl
r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase
r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens
]
# ENV assignment patterns: KEY=value where KEY contains a secret-like name
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
_ENV_ASSIGN_RE = re.compile(
rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
re.IGNORECASE,
)
# JSON field patterns: "apiKey": "value", "token": "value", etc.
_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)"
_JSON_FIELD_RE = re.compile(
rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"',
re.IGNORECASE,
)
# Authorization headers
_AUTH_HEADER_RE = re.compile(
r"(Authorization:\s*Bearer\s+)(\S+)",
re.IGNORECASE,
)
# Telegram bot tokens: bot<digits>:<token> or <digits>:<alphanum>
_TELEGRAM_RE = re.compile(
r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
)
# Compile known prefix patterns into one alternation
_PREFIX_RE = re.compile(
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
)
def _mask_token(token: str) -> str:
"""Mask a token, preserving prefix for long tokens."""
if len(token) < 18:
return "***"
return f"{token[:6]}...{token[-4:]}"
def redact_sensitive_text(text: str) -> str:
"""Apply all redaction patterns to a block of text.
Safe to call on any string -- non-matching text passes through unchanged.
"""
if not text:
return text
# Known prefixes (sk-, ghp_, etc.)
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
# ENV assignments: OPENAI_API_KEY=sk-abc...
def _redact_env(m):
name, quote, value = m.group(1), m.group(2), m.group(3)
return f"{name}={quote}{_mask_token(value)}{quote}"
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
# JSON fields: "apiKey": "value"
def _redact_json(m):
key, value = m.group(1), m.group(2)
return f'{key}: "{_mask_token(value)}"'
text = _JSON_FIELD_RE.sub(_redact_json, text)
# Authorization headers
text = _AUTH_HEADER_RE.sub(
lambda m: m.group(1) + _mask_token(m.group(2)),
text,
)
# Telegram bot tokens
def _redact_telegram(m):
prefix = m.group(1) or ""
digits = m.group(2)
return f"{prefix}{digits}:***"
text = _TELEGRAM_RE.sub(_redact_telegram, text)
return text
class RedactingFormatter(logging.Formatter):
"""Log formatter that redacts secrets from all log messages."""
def __init__(self, fmt=None, datefmt=None, style='%', **kwargs):
super().__init__(fmt, datefmt, style, **kwargs)
def format(self, record: logging.LogRecord) -> str:
original = super().format(record)
return redact_sensitive_text(original)
-113
View File
@@ -1,113 +0,0 @@
"""Skill slash commands — scan installed skills and build invocation messages.
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
can invoke skills via /skill-name commands.
"""
import logging
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
"""Scan ~/.hermes/skills/ and return a mapping of /command -> skill info.
Returns:
Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
"""
global _skill_commands
_skill_commands = {}
try:
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter
if not SKILLS_DIR.exists():
return _skill_commands
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
continue
try:
content = skill_md.read_text(encoding='utf-8')
frontmatter, body = _parse_frontmatter(content)
name = frontmatter.get('name', skill_md.parent.name)
description = frontmatter.get('description', '')
if not description:
for line in body.strip().split('\n'):
line = line.strip()
if line and not line.startswith('#'):
description = line[:80]
break
cmd_name = name.lower().replace(' ', '-').replace('_', '-')
_skill_commands[f"/{cmd_name}"] = {
"name": name,
"description": description or f"Invoke the {name} skill",
"skill_md_path": str(skill_md),
"skill_dir": str(skill_md.parent),
}
except Exception:
continue
except Exception:
pass
return _skill_commands
def get_skill_commands() -> Dict[str, Dict[str, Any]]:
"""Return the current skill commands mapping (scan first if empty)."""
if not _skill_commands:
scan_skill_commands()
return _skill_commands
def build_skill_invocation_message(cmd_key: str, user_instruction: str = "") -> Optional[str]:
"""Build the user message content for a skill slash command invocation.
Args:
cmd_key: The command key including leading slash (e.g., "/gif-search").
user_instruction: Optional text the user typed after the command.
Returns:
The formatted message string, or None if the skill wasn't found.
"""
commands = get_skill_commands()
skill_info = commands.get(cmd_key)
if not skill_info:
return None
skill_md_path = Path(skill_info["skill_md_path"])
skill_dir = Path(skill_info["skill_dir"])
skill_name = skill_info["name"]
try:
content = skill_md_path.read_text(encoding='utf-8')
except Exception:
return f"[Failed to load skill: {skill_name}]"
parts = [
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
"",
content.strip(),
]
supporting = []
for subdir in ("references", "templates", "scripts", "assets"):
subdir_path = skill_dir / subdir
if subdir_path.exists():
for f in sorted(subdir_path.rglob("*")):
if f.is_file():
rel = str(f.relative_to(skill_dir))
supporting.append(rel)
if supporting:
parts.append("")
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
for sf in supporting:
parts.append(f"- {sf}")
parts.append(f'\nTo view any of these, use: skill_view(name="{skill_name}", file="<path>")')
if user_instruction:
parts.append("")
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
return "\n".join(parts)
-56
View File
@@ -1,56 +0,0 @@
"""Trajectory saving utilities and static helpers.
_convert_to_trajectory_format stays as an AIAgent method (batch_runner.py
calls agent._convert_to_trajectory_format). Only the static helpers and
the file-write logic live here.
"""
import json
import logging
from datetime import datetime
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
def convert_scratchpad_to_think(content: str) -> str:
"""Convert <REASONING_SCRATCHPAD> tags to <think> tags."""
if not content or "<REASONING_SCRATCHPAD>" not in content:
return content
return content.replace("<REASONING_SCRATCHPAD>", "<think>").replace("</REASONING_SCRATCHPAD>", "</think>")
def has_incomplete_scratchpad(content: str) -> bool:
"""Check if content has an opening <REASONING_SCRATCHPAD> without a closing tag."""
if not content:
return False
return "<REASONING_SCRATCHPAD>" in content and "</REASONING_SCRATCHPAD>" not in content
def save_trajectory(trajectory: List[Dict[str, Any]], model: str,
completed: bool, filename: str = None):
"""Append a trajectory entry to a JSONL file.
Args:
trajectory: The ShareGPT-format conversation list.
model: Model name for metadata.
completed: Whether the conversation completed successfully.
filename: Override output filename. Defaults to trajectory_samples.jsonl
or failed_trajectories.jsonl based on ``completed``.
"""
if filename is None:
filename = "trajectory_samples.jsonl" if completed else "failed_trajectories.jsonl"
entry = {
"conversations": trajectory,
"timestamp": datetime.now().isoformat(),
"model": model,
"completed": completed,
}
try:
with open(filename, "a", encoding="utf-8") as f:
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
logger.info("Trajectory saved to %s", filename)
except Exception as e:
logger.warning("Failed to save trajectory: %s", e)
BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

+22 -102
View File
@@ -27,14 +27,16 @@ import time
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
from datetime import datetime
from multiprocessing import Pool, Lock
from multiprocessing import Pool, Manager, Lock
import traceback
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeRemainingColumn, MofNCompleteColumn
from rich.console import Console
import fire
from run_agent import AIAgent
from toolset_distributions import (
get_distribution,
list_distributions,
sample_toolsets_from_distribution,
validate_distribution
@@ -171,7 +173,7 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
if content_json.get("success") is False:
is_success = False
except (json.JSONDecodeError, ValueError, TypeError):
except:
# If not JSON, check if content is empty or explicitly states an error
# Note: We avoid simple substring matching to prevent false positives
if not content:
@@ -238,7 +240,7 @@ def _process_single_prompt(
Args:
prompt_index (int): Index of prompt in dataset
prompt_data (Dict): Prompt data containing 'prompt' field and optional 'image' field
prompt_data (Dict): Prompt data containing 'prompt' field
batch_num (int): Batch number
config (Dict): Configuration dict with agent parameters
@@ -246,58 +248,6 @@ def _process_single_prompt(
Dict: Result containing trajectory, stats, and metadata
"""
prompt = prompt_data["prompt"]
task_id = f"task_{prompt_index}"
# Per-prompt container image override: if the dataset row has an 'image' field,
# register it for this task's sandbox. Works with Docker, Modal, Singularity, and Daytona.
container_image = prompt_data.get("image") or prompt_data.get("docker_image")
if container_image:
# Verify the image is accessible before spending tokens on the agent loop.
# For Docker: check local cache, then try pulling.
# For Modal: skip local check (Modal pulls server-side).
env_type = os.getenv("TERMINAL_ENV", "local")
if env_type == "docker":
import subprocess as _sp
try:
probe = _sp.run(
["docker", "image", "inspect", container_image],
capture_output=True, timeout=10,
)
if probe.returncode != 0:
if config.get("verbose"):
print(f" Prompt {prompt_index}: Pulling docker image {container_image}...", flush=True)
pull = _sp.run(
["docker", "pull", container_image],
capture_output=True, text=True, timeout=600,
)
if pull.returncode != 0:
return {
"success": False,
"prompt_index": prompt_index,
"error": f"Docker image not available: {container_image}\n{pull.stderr[:500]}",
"trajectory": None,
"tool_stats": {},
"toolsets_used": [],
"metadata": {"batch_num": batch_num, "timestamp": datetime.now().isoformat()},
}
except FileNotFoundError:
pass # Docker CLI not installed — skip check (e.g., Modal backend)
except Exception as img_err:
if config.get("verbose"):
print(f" Prompt {prompt_index}: Docker image check failed: {img_err}", flush=True)
from tools.terminal_tool import register_task_env_overrides
overrides = {
"docker_image": container_image,
"modal_image": container_image,
"singularity_image": f"docker://{container_image}",
"daytona_image": container_image,
}
if prompt_data.get("cwd"):
overrides["cwd"] = prompt_data["cwd"]
register_task_env_overrides(task_id, overrides)
if config.get("verbose"):
print(f" Prompt {prompt_index}: Using container image {container_image}")
try:
# Sample toolsets from distribution for this prompt
@@ -327,11 +277,10 @@ def _process_single_prompt(
reasoning_config=config.get("reasoning_config"),
prefill_messages=config.get("prefill_messages"),
skip_context_files=True, # Don't pollute trajectories with SOUL.md/AGENTS.md
skip_memory=True, # Don't use persistent memory in batch runs
)
# Run the agent with task_id to ensure each task gets its own isolated VM
result = agent.run_conversation(prompt, task_id=task_id)
result = agent.run_conversation(prompt, task_id=f"task_{prompt_index}")
# Extract tool usage statistics
tool_stats = _extract_tool_stats(result["messages"])
@@ -700,13 +649,14 @@ class BatchRunner:
lock (Lock): Optional lock for thread-safe access
"""
checkpoint_data["last_updated"] = datetime.now().isoformat()
from utils import atomic_json_write
if lock:
with lock:
atomic_json_write(self.checkpoint_file, checkpoint_data)
with open(self.checkpoint_file, 'w', encoding='utf-8') as f:
json.dump(checkpoint_data, f, indent=2, ensure_ascii=False)
else:
atomic_json_write(self.checkpoint_file, checkpoint_data)
with open(self.checkpoint_file, 'w', encoding='utf-8') as f:
json.dump(checkpoint_data, f, indent=2, ensure_ascii=False)
def _scan_completed_prompts_by_content(self) -> set:
"""
@@ -831,15 +781,13 @@ class BatchRunner:
print(f" New batches created: {len(batches_to_process)}")
print("=" * 70 + "\n")
# Load existing checkpoint (so resume doesn't clobber prior progress)
checkpoint_data = self._load_checkpoint()
if checkpoint_data.get("run_name") != self.run_name:
checkpoint_data = {
"run_name": self.run_name,
"completed_prompts": [],
"batch_stats": {},
"last_updated": None
}
# Initialize checkpoint data (needed for saving at the end)
checkpoint_data = {
"run_name": self.run_name,
"completed_prompts": [],
"batch_stats": {},
"last_updated": None
}
# Prepare configuration for workers
config = {
@@ -861,7 +809,7 @@ class BatchRunner:
}
# For backward compatibility, still track by index (but this is secondary to content matching)
completed_prompts_set = set(checkpoint_data.get("completed_prompts", []))
completed_prompts_set = set()
# Aggregate statistics across all batches
total_tool_stats = {}
@@ -870,9 +818,6 @@ class BatchRunner:
print(f"\n🔧 Initializing {self.num_workers} worker processes...")
# Checkpoint writes happen in the parent process; keep a lock for safety.
checkpoint_lock = Lock()
# Process batches in parallel
with Pool(processes=self.num_workers) as pool:
# Create tasks for each batch
@@ -918,28 +863,6 @@ class BatchRunner:
for result in pool.imap_unordered(_process_batch_worker, tasks):
results.append(result)
progress.update(task, advance=1)
# Incremental checkpoint update (so resume works after crash)
try:
batch_num = result.get('batch_num')
completed = result.get('completed_prompts', []) or []
completed_prompts_set.update(completed)
if isinstance(batch_num, int):
checkpoint_data.setdefault('batch_stats', {})[str(batch_num)] = {
'processed': result.get('processed', 0),
'skipped': result.get('skipped', 0),
'discarded_no_reasoning': result.get('discarded_no_reasoning', 0),
}
checkpoint_data['completed_prompts'] = sorted(completed_prompts_set)
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
except Exception as ckpt_err:
# Don't fail the run if checkpoint write fails
print(f"⚠️ Warning: Failed to save incremental checkpoint: {ckpt_err}")
except Exception as e:
logger.error("Batch worker failed: %s", e, exc_info=True)
raise
finally:
root_logger.setLevel(original_level)
@@ -968,12 +891,9 @@ class BatchRunner:
for key in total_reasoning_stats:
total_reasoning_stats[key] += batch_result.get("reasoning_stats", {}).get(key, 0)
# Save final checkpoint (best-effort; incremental writes already happened)
try:
checkpoint_data["completed_prompts"] = all_completed_prompts
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
except Exception as ckpt_err:
print(f"⚠️ Warning: Failed to save final checkpoint: {ckpt_err}")
# Save final checkpoint
checkpoint_data["completed_prompts"] = all_completed_prompts
self._save_checkpoint(checkpoint_data)
# Calculate success rates
for tool_name in total_tool_stats:
+7 -295
View File
@@ -9,43 +9,10 @@ model:
# Default model to use (can be overridden with --model flag)
default: "anthropic/claude-opus-4.6"
# Inference provider selection:
# "auto" - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
# "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
# "nous" - Always use Nous Portal (requires: hermes login)
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
provider: "auto"
# API configuration (falls back to OPENROUTER_API_KEY env var)
# api_key: "your-key-here" # Uncomment to set here instead of .env
base_url: "https://openrouter.ai/api/v1"
# =============================================================================
# OpenRouter Provider Routing (only applies when using OpenRouter)
# =============================================================================
# Control how requests are routed across providers on OpenRouter.
# See: https://openrouter.ai/docs/guides/routing/provider-selection
#
# provider_routing:
# # Sort strategy: "price" (default), "throughput", or "latency"
# # Append :nitro to model name for a shortcut to throughput sorting.
# sort: "throughput"
#
# # Only allow these providers (provider slugs from OpenRouter)
# # only: ["anthropic", "google"]
#
# # Skip these providers entirely
# # ignore: ["deepinfra", "fireworks"]
#
# # Try providers in this order (overrides default load balancing)
# # order: ["anthropic", "google", "together"]
#
# # Require providers to support all parameters in your request
# # require_parameters: true
#
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
# # data_collection: "deny"
# =============================================================================
# Terminal Tool Configuration
# =============================================================================
@@ -117,29 +84,6 @@ terminal:
# lifetime_seconds: 300
# modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"
# -----------------------------------------------------------------------------
# OPTION 6: Daytona cloud execution
# Commands run in Daytona cloud sandboxes
# Great for: Cloud dev environments, persistent workspaces, team collaboration
# Requires: pip install daytona, DAYTONA_API_KEY env var
# -----------------------------------------------------------------------------
# terminal:
# backend: "daytona"
# cwd: "~"
# timeout: 180
# lifetime_seconds: 300
# daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20"
# container_disk: 10240 # Daytona max is 10GB per sandbox
#
# --- Container resource limits (docker, singularity, modal, daytona -- ignored for local/ssh) ---
# These settings apply to all container backends. They control the resources
# allocated to the sandbox and whether its filesystem persists across sessions.
container_cpu: 1 # CPU cores
container_memory: 5120 # Memory in MB (5120 = 5GB)
container_disk: 51200 # Disk in MB (51200 = 50GB)
container_persistent: true # Persist filesystem across sessions (false = ephemeral)
# -----------------------------------------------------------------------------
# SUDO SUPPORT (works with ALL backends above)
# -----------------------------------------------------------------------------
@@ -198,74 +142,6 @@ compression:
# This model compresses the middle turns into a concise summary
summary_model: "google/gemini-3-flash-preview"
# =============================================================================
# Persistent Memory
# =============================================================================
# Bounded curated memory injected into the system prompt every session.
# Two stores: MEMORY.md (agent's notes) and USER.md (user profile).
# Character limits keep the memory small and focused. The agent manages
# pruning -- when at the limit, it must consolidate or replace entries.
# Disabled by default in batch_runner and RL environments.
#
memory:
# Agent's personal notes: environment facts, conventions, things learned
memory_enabled: true
# User profile: preferences, communication style, expectations
user_profile_enabled: true
# Character limits (~2.75 chars per token, model-independent)
memory_char_limit: 2200 # ~800 tokens
user_char_limit: 1375 # ~500 tokens
# Periodic memory nudge: remind the agent to consider saving memories
# every N user turns. Set to 0 to disable. Only active when memory is enabled.
nudge_interval: 10 # Nudge every 10 user turns (0 = disabled)
# Memory flush: give the agent one turn to save memories before context is
# lost (compression, /new, /reset, exit). Set to 0 to disable.
# For exit/reset, only fires if the session had at least this many user turns.
flush_min_turns: 6 # Min user turns to trigger flush on exit/reset (0 = disabled)
# =============================================================================
# Session Reset Policy (Messaging Platforms)
# =============================================================================
# Controls when messaging sessions (Telegram, Discord, WhatsApp, Slack) are
# automatically cleared. Without resets, conversation context grows indefinitely
# which increases API costs with every message.
#
# When a reset triggers, the agent first saves important information to its
# persistent memory — but the conversation context is wiped. The agent starts
# fresh but retains learned facts via its memory system.
#
# Users can always manually reset with /reset or /new in chat.
#
# Modes:
# "both" - Reset on EITHER inactivity timeout or daily boundary (recommended)
# "idle" - Reset only after N minutes of inactivity
# "daily" - Reset only at a fixed hour each day
# "none" - Never auto-reset; context lives until /reset or compression kicks in
#
# When a reset triggers, the agent gets one turn to save important memories and
# skills before the context is wiped. Persistent memory carries across sessions.
#
session_reset:
mode: both # "both", "idle", "daily", or "none"
idle_minutes: 1440 # Inactivity timeout in minutes (default: 1440 = 24 hours)
at_hour: 4 # Daily reset hour, 0-23 local time (default: 4 AM)
# =============================================================================
# Skills Configuration
# =============================================================================
# Skills are reusable procedures the agent can load and follow. The agent can
# also create new skills after completing complex tasks.
#
skills:
# Nudge the agent to create skills after complex tasks.
# Every N tool-calling iterations, remind the model to consider saving a skill.
# Set to 0 to disable.
creation_nudge_interval: 15
# =============================================================================
# Agent Behavior
# =============================================================================
@@ -278,10 +154,9 @@ agent:
# Enable verbose logging
verbose: false
# Reasoning effort level (OpenRouter and Nous Portal)
# Controls how much "thinking" the model does before responding.
# Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable)
reasoning_effort: "xhigh"
# Custom system prompt (personality, instructions, etc.)
# Leave empty or remove to use default agent behavior
system_prompt: ""
# Predefined personalities (use with /personality command)
personalities:
@@ -306,107 +181,19 @@ agent:
# Control which tools the agent has access to.
# Use "all" to enable everything, or specify individual toolsets.
# =============================================================================
# Platform Toolsets (per-platform tool configuration)
# =============================================================================
# Override which toolsets are available on each platform.
# If a platform isn't listed here, its built-in default is used.
#
# You can use EITHER:
# - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
# - A list of individual toolsets to compose your own (see list below)
#
# Supported platform keys: cli, telegram, discord, whatsapp, slack
#
# Examples:
#
# # Use presets (same as defaults):
# platform_toolsets:
# cli: [hermes-cli]
# telegram: [hermes-telegram]
#
# # Custom: give Telegram only web + terminal + file + planning:
# platform_toolsets:
# telegram: [web, terminal, file, todo]
#
# # Custom: CLI without browser or image gen:
# platform_toolsets:
# cli: [web, terminal, file, skills, todo, tts, cronjob]
#
# # Restrictive: Discord gets read-only tools only:
# platform_toolsets:
# discord: [web, vision, skills, todo]
#
# If not set, defaults are:
# cli: hermes-cli (everything + cronjob management)
# telegram: hermes-telegram (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
# discord: hermes-discord (same as telegram)
# whatsapp: hermes-whatsapp (same as telegram)
# slack: hermes-slack (same as telegram)
#
platform_toolsets:
cli: [hermes-cli]
telegram: [hermes-telegram]
discord: [hermes-discord]
whatsapp: [hermes-whatsapp]
slack: [hermes-slack]
# ─────────────────────────────────────────────────────────────────────────────
# Available toolsets (use these names in platform_toolsets or the toolsets list)
#
# Run `hermes chat --list-toolsets` to see all toolsets and their tools.
# Run `hermes chat --list-tools` to see every individual tool with descriptions.
# ─────────────────────────────────────────────────────────────────────────────
#
# INDIVIDUAL TOOLSETS (compose your own):
# web - web_search, web_extract
# search - web_search only (no scraping)
# terminal - terminal, process
# file - read_file, write_file, patch, search
# browser - browser_navigate, browser_snapshot, browser_click, browser_type,
# browser_scroll, browser_back, browser_press, browser_close,
# browser_get_images, browser_vision (requires BROWSERBASE_API_KEY)
# vision - vision_analyze (requires OPENROUTER_API_KEY)
# image_gen - image_generate (requires FAL_KEY)
# skills - skills_list, skill_view
# skills_hub - skill_hub (search/install/manage from online registries — user-driven only)
# moa - mixture_of_agents (requires OPENROUTER_API_KEY)
# todo - todo (in-memory task planning, no deps)
# tts - text_to_speech (Edge TTS free, or ELEVENLABS/OPENAI key)
# cronjob - schedule_cronjob, list_cronjobs, remove_cronjob
# rl - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
#
# PRESETS (curated bundles):
# hermes-cli - All of the above except rl + send_message
# hermes-telegram - terminal, file, web, vision, image_gen, tts, browser,
# skills, todo, cronjob, send_message
# hermes-discord - Same as hermes-telegram
# hermes-whatsapp - Same as hermes-telegram
# hermes-slack - Same as hermes-telegram
#
# COMPOSITE:
# debugging - terminal + web + file
# safe - web + vision + moa (no terminal access)
# all - Everything available
# Available toolsets:
#
# web - Web search and content extraction (web_search, web_extract)
# search - Web search only, no scraping (web_search)
# terminal - Command execution and process management (terminal, process)
# file - File operations: read, write, patch, search
# terminal - Command execution (terminal)
# browser - Full browser automation (navigate, click, type, screenshot, etc.)
# vision - Image analysis (vision_analyze)
# image_gen - Image generation with FLUX (image_generate)
# skills - Load skill documents (skills_list, skill_view)
# skills - Load skill documents (skills_categories, skills_list, skill_view)
# moa - Mixture of Agents reasoning (mixture_of_agents)
# todo - Task planning and tracking for multi-step work
# memory - Persistent memory across sessions (personal notes + user profile)
# session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
# tts - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI)
# cronjob - Schedule and manage automated tasks (CLI-only)
# rl - RL training tools (Tinker-Atropos)
#
# Composite toolsets:
# debugging - terminal + web + file (for troubleshooting)
# debugging - terminal + web (for troubleshooting)
# safe - web + vision + moa (no terminal access)
# -----------------------------------------------------------------------------
@@ -457,41 +244,6 @@ toolsets:
# toolsets:
# - safe
# =============================================================================
# MCP (Model Context Protocol) Servers
# =============================================================================
# Connect to external MCP servers to add tools from the MCP ecosystem.
# Each server's tools are automatically discovered and registered.
# See docs/mcp.md for full documentation.
#
# Stdio servers (spawn a subprocess):
# command: the executable to run
# args: command-line arguments
# env: environment variables (only these + safe defaults passed to subprocess)
#
# HTTP servers (connect to a URL):
# url: the MCP server endpoint
# headers: HTTP headers (e.g., for authentication)
#
# Optional per-server settings:
# timeout: tool call timeout in seconds (default: 120)
# connect_timeout: initial connection timeout (default: 60)
#
# mcp_servers:
# time:
# command: uvx
# args: ["mcp-server-time"]
# filesystem:
# command: npx
# args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user"]
# notion:
# url: https://mcp.notion.com/mcp
# github:
# command: npx
# args: ["-y", "@modelcontextprotocol/server-github"]
# env:
# GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
# =============================================================================
# Voice Transcription (Speech-to-Text)
# =============================================================================
@@ -525,49 +277,9 @@ stt:
# No configuration needed - logging is always enabled.
# To disable, you would need to modify the source code.
# =============================================================================
# Code Execution Sandbox (Programmatic Tool Calling)
# =============================================================================
# The execute_code tool runs Python scripts that call Hermes tools via RPC.
# Intermediate tool results stay out of the LLM's context window.
code_execution:
timeout: 300 # Max seconds per script before kill (default: 300 = 5 min)
max_tool_calls: 50 # Max RPC tool calls per execution (default: 50)
# =============================================================================
# Subagent Delegation
# =============================================================================
# The delegate_task tool spawns child agents with isolated context.
# Supports single tasks and batch mode (up to 3 parallel).
delegation:
max_iterations: 50 # Max tool-calling turns per child (default: 50)
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
# =============================================================================
# Honcho Integration (Cross-Session User Modeling)
# =============================================================================
# AI-native persistent memory via Honcho (https://honcho.dev/).
# Builds a deeper understanding of the user across sessions and tools.
# Runs alongside USER.md — additive, not a replacement.
#
# Requires: pip install honcho-ai
# Config: ~/.honcho/config.json (shared with Claude Code, Cursor, etc.)
# API key: HONCHO_API_KEY in ~/.hermes/.env or ~/.honcho/config.json
#
# Hermes-specific overrides (optional — most config comes from ~/.honcho/config.json):
# honcho: {}
# =============================================================================
# Display
# =============================================================================
display:
# Use compact banner mode
compact: false
# Tool progress display level (CLI and gateway)
# off: Silent — no tool activity shown, just the final response
# new: Show a tool indicator only when the tool changes (skip repeats)
# all: Show every tool call with a short preview (default)
# verbose: Full args, results, and debug logs (same as /verbose)
# Toggle at runtime with /verbose in the CLI
tool_progress: all
+190 -1717
View File
File diff suppressed because it is too large Load Diff
+42
View File
@@ -0,0 +1,42 @@
#!/bin/bash
# Browser-focused data generation run
# Uses browser-use-tasks.jsonl (6504 tasks)
# Distribution: browser 97%, web 20%, vision 12%, terminal 15%
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate log filename with timestamp
LOG_FILE="logs/browser_tasks_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
echo "🌐 Running browser-focused tasks with browser_tasks distribution"
python batch_runner.py \
--dataset_file="browser-use-tasks.jsonl" \
--batch_size=20 \
--run_name="browser_tasks" \
--distribution="browser_tasks" \
--model="moonshotai/kimi-k2.5" \
--verbose \
--base_url="https://openrouter.ai/api/v1" \
--num_workers=50 \
--max_turns=60 \
--resume \
--ephemeral_system_prompt="You are an AI assistant with browser automation capabilities. Your primary task is to navigate and interact with web pages to accomplish user goals.
IMPORTANT GUIDELINES:
1. SEARCHING: Do NOT try to search directly on Google or other search engines via the browser - they block automated searches. Instead, ALWAYS use the web_search tool first to find URLs for any pages you need to visit, then use browser tools to navigate to those URLs.
2. COOKIE/PRIVACY DIALOGS: After navigating to a page, ALWAYS check if there are cookie consent dialogs, privacy popups, or overlay modals blocking the page. These appear in snapshots as 'dialog' elements with buttons like 'Close', 'Accept', 'Accept All', 'Decline', 'I Agree', 'Got it', 'OK', or 'X'. You MUST dismiss these dialogs FIRST by clicking the appropriate button before trying to interact with other page elements. After dismissing a dialog, take a fresh browser_snapshot to get updated element references.
3. HANDLING TIMEOUTS: If an action times out, it often means the element is blocked by an overlay or the page state has changed. Take a new snapshot to see the current page state and look for any dialogs or popups that need to be dismissed. If there is no dialog box to bypass, then try a new method or report the error to the user and complete the task.
4. GENERAL: Use browser tools to click elements, fill forms, extract information, and perform web-based tasks. If terminal is available, use it for any local file operations or computations needed to support your web tasks. Be thorough in verifying your actions and handle any errors gracefully by retrying or trying alternative approaches." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
# --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
+26
View File
@@ -0,0 +1,26 @@
#!/bin/bash
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate a timestamp for the log file
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG_FILE="logs/imagen_eval_gpt5_${TIMESTAMP}.log"
echo "📝 Logging output to: $LOG_FILE"
python batch_runner.py \
--dataset_file="source-data/hermes-agent-imagen-data/hermes_agent_imagen_train_sft.jsonl" \
--batch_size=20 \
--run_name="imagen_train_sft_glm4.7" \
--distribution="image_gen" \
--model="z-ai/glm-4.7" \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=50 \
--max_turns=25 \
--ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
# --verbose \
+26
View File
@@ -0,0 +1,26 @@
#!/bin/bash
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate log filename with timestamp
LOG_FILE="logs/glm4.7-thinking-sft1_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
python batch_runner.py \
--dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_sft_2.jsonl" \
--batch_size=20 \
--run_name="megascience_glm4.7-thinking-sft2" \
--distribution="science" \
--model="z-ai/glm-4.7" \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=15 \
--max_turns=60 \
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12, so you can maintain focused context." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
# --verbose \
+27
View File
@@ -0,0 +1,27 @@
#!/bin/bash
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate log filename with timestamp
LOG_FILE="logs/glm4.7-thinking-sft1-10k_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
python batch_runner.py \
--dataset_file="source-data/hermes-agent-megascience-data/hermes_agent_megascience_sft_train_1_10k.jsonl" \
--batch_size=20 \
--run_name="megascience_glm4.7-thinking-sft1" \
--distribution="science" \
--model="z-ai/glm-4.7" \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=50 \
--max_turns=60 \
--resume \
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used for furthering results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12, so you can maintain a focused context." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
# --verbose \
+28
View File
@@ -0,0 +1,28 @@
#!/bin/bash
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate log filename with timestamp
LOG_FILE="logs/glm4.7-terminal-tasks_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
python batch_runner.py \
--dataset_file="source-data/raw_tasks_prompts.jsonl" \
--batch_size=20 \
--run_name="terminal-tasks-glm4.7-thinking" \
--distribution="default" \
--model="z-ai/glm-4.7" \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=50 \
--max_turns=60 \
--ephemeral_system_prompt="You have access to a variety of tools to help you complete coding, system administration, and general computing tasks. You can use them in sequence and build off of the results of prior tools you've used. Always use the terminal tool to execute commands, write code, install packages, and verify your work. You should test and validate everything you create. Always pip install any packages you need (use --break-system-packages if needed). If you need a tool that isn't available, you can use the terminal to install or create it. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Use web search when you need to look up documentation, APIs, or current best practices." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
# --verbose \
# --resume \
+12
View File
@@ -0,0 +1,12 @@
python batch_runner.py \
--dataset_file="hermes-agent-megascience-data/hermes_agent_megascience_eval.jsonl" \
--batch_size=10 \
--run_name="megascience_eval_gpt5_2" \
--distribution="science" \
--model="gpt-5" \
--base_url="https://api.openai.com/v1" \
--api_key="${OPENAI_API_KEY}" \
--num_workers=5 \
--max_turns=30 \
--verbose \
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use a tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should not be confident in your own reasoning, knowledge, or calculations without using a tool to verify or validate your work."
+12
View File
@@ -0,0 +1,12 @@
python batch_runner.py \
--dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_eval.jsonl" \
--batch_size=50 \
--run_name="megascience_sft_minimax-m2.1-thinking-2-eval" \
--distribution="science" \
--model="minimax/minimax-m2.1" \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="minimax" \
--num_workers=1 \
--max_turns=40 \
--verbose \
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12."
+29
View File
@@ -0,0 +1,29 @@
#!/bin/bash
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate log filename with timestamp
LOG_FILE="logs/glm4.7-terminal-tasks-newterm_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
python batch_runner.py \
--dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_eval.jsonl" \
--batch_size=1 \
--run_name="terminal-tasks-test-newterm" \
--distribution="terminal_only" \
--verbose \
--model="z-ai/glm-4.7" \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=5 \
--max_turns=60 \
--ephemeral_system_prompt="You have access to a variety of tools to help you complete coding, system administration, and general computing tasks. You can use them in sequence and build off of the results of prior tools you've used. Always use the terminal tool to execute commands, write code, install packages, and verify your work. You should test and validate everything you create. Always pip install any packages you need (use --break-system-packages if needed). If you need a tool that isn't available, you can use the terminal to install or create it. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Use web search when you need to look up documentation, APIs, or current best practices." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
# --verbose \
# --resume \
+33
View File
@@ -0,0 +1,33 @@
#!/bin/bash
# Terminal-only evaluation run using Modal sandboxes
# Uses 10 sample tasks from nous-terminal-tasks
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate log filename with timestamp
LOG_FILE="logs/terminal_eval_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
echo "🔧 Using Modal sandboxes (TERMINAL_ENV=modal)"
# Set terminal to use Modal
export TERMINAL_ENV=modal
export TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
export TERMINAL_TIMEOUT=300
python batch_runner.py \
--dataset_file="nous-terminal-tasks_eval.jsonl" \
--batch_size=5 \
--run_name="terminal_eval" \
--distribution="terminal_only" \
--model="z-ai/glm-4.7" \
--base_url="https://openrouter.ai/api/v1" \
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
--num_workers=2 \
--max_turns=30 \
--ephemeral_system_prompt="You have access to a terminal tool for executing commands. Use it to complete the task. Install any packages you need with apt-get or pip (use --break-system-packages if needed). Do not use interactive tools (vim, nano, python repl). If git output is large, pipe to cat." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
+46
View File
@@ -0,0 +1,46 @@
#!/bin/bash
# Mixed browser+terminal data generation run
# Uses mixed-browser-terminal-tasks.jsonl (200 tasks)
# Distribution: browser 92%, terminal 92%, web 35%, vision 15%, image_gen 15%
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate log filename with timestamp
LOG_FILE="logs/mixed_tasks_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
echo "🔀 Running mixed browser+terminal tasks with mixed_tasks distribution"
# Set terminal environment
# SIF images are automatically built/cached by terminal_tool.py
export TERMINAL_ENV=singularity
export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
export TERMINAL_TIMEOUT=300
# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
if [ -d "/scratch" ] && [ -w "/scratch" ]; then
CACHE_BASE="/scratch/$USER/.apptainer"
else
CACHE_BASE="/tmp/$USER/.apptainer"
fi
export APPTAINER_CACHEDIR="$CACHE_BASE"
export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
python batch_runner.py \
--dataset_file="mixed-browser-terminal-tasks.jsonl" \
--batch_size=20 \
--run_name="mixed_tasks" \
--distribution="mixed_tasks" \
--model="moonshotai/kimi-k2.5" \
--base_url="https://openrouter.ai/api/v1" \
--num_workers=25 \
--max_turns=60 \
--ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully. Whenever you can do something in a terminal instead of a web browser, you should choose to do so, as it's much cheaper." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
+50
View File
@@ -0,0 +1,50 @@
#!/bin/bash
# Terminal-focused data generation run
# Uses nous-terminal-tasks.jsonl (597 tasks)
# Distribution: terminal 97%, web 15%, browser 0%, vision 8%, image_gen 3%
# Create logs directory if it doesn't exist
mkdir -p logs
# Generate log filename with timestamp
LOG_FILE="logs/terminal_tasks_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging output to: $LOG_FILE"
echo "💻 Running terminal-focused tasks with terminal_tasks distribution"
# Set terminal environment
# SIF images are automatically built/cached by terminal_tool.py
export TERMINAL_ENV=singularity
export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
export TERMINAL_TIMEOUT=300
# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
if [ -d "/scratch" ] && [ -w "/scratch" ]; then
CACHE_BASE="/scratch/$USER/.apptainer"
else
CACHE_BASE="/tmp/$USER/.apptainer"
fi
export APPTAINER_CACHEDIR="$CACHE_BASE"
export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
echo "🐳 Image: $TERMINAL_SINGULARITY_IMAGE (auto-converted to SIF on first use)"
python batch_runner.py \
--dataset_file="nous-terminal-tasks.jsonl" \
--batch_size=5 \
--run_name="terminal_tasks-kimi-k2.5" \
--distribution="terminal_tasks" \
--model="moonshotai/kimi-k2.5" \
--verbose \
--base_url="https://openrouter.ai/api/v1" \
--num_workers=80 \
--max_turns=60 \
--providers_ignored="Novita" \
--resume \
--ephemeral_system_prompt="You have access to a terminal tool for executing commands and completing coding, system administration, and computing tasks. Use the terminal to write code, run scripts, install packages (use --break-system-packages with pip if needed), manipulate files, and verify your work. Always test and validate code you create. Do not use interactive tools like vim, nano, or python REPL. If git output is large, pipe to cat. When web search is available, use it to look up documentation, APIs, or best practices. If browser tools are available, use them for web interactions that require page manipulation. Do not use the terminal to communicate with the user - only your final response will be shown to them." \
2>&1 | tee "$LOG_FILE"
echo "✅ Log saved to: $LOG_FILE"
+23
View File
@@ -0,0 +1,23 @@
#!/bin/bash
# Check if a prompt argument was provided
if [ $# -eq 0 ]; then
echo "Error: Please provide a prompt as an argument"
echo "Usage: $0 \"your prompt here\""
exit 1
fi
# Get the prompt from the first argument
PROMPT="$1"
# Set debug mode for web tools
export WEB_TOOLS_DEBUG=true
# Run the agent with the provided prompt
python run_agent.py \
--query "$PROMPT" \
--max_turns 30 \
--model claude-sonnet-4-5-20250929 \
--base_url https://api.anthropic.com/v1/ \
--api_key $ANTHROPIC_API_KEY \
--save_trajectories
+21
View File
@@ -0,0 +1,21 @@
#!/bin/bash
# Test skills tool with Kimi K2.5
# Usage: ./configs/test_skills_kimi.sh "your query here"
# Example: ./configs/test_skills_kimi.sh "List available skills and show me the vllm skill"
# Default query if none provided
QUERY="${1:-List all available skills. Then show me the axolotl skill and view one of its reference files.}"
echo "🎯 Testing Skills Tool with Kimi K2.5"
echo "📝 Query: $QUERY"
echo "="
python run_agent.py \
--enabled_toolsets=skills \
--model="moonshotai/kimi-k2.5" \
--base_url="https://openrouter.ai/api/v1" \
--max_turns=10 \
--verbose \
--save_sample \
--query="$QUERY"
+8 -7
View File
@@ -6,12 +6,12 @@ This module provides scheduled task execution, allowing the agent to:
- Self-schedule reminders and follow-up tasks
- Execute tasks in isolated sessions (no prior context)
Cron jobs are executed automatically by the gateway daemon:
hermes gateway install # Install as system service (recommended)
hermes gateway # Or run in foreground
The gateway ticks the scheduler every 60 seconds. A file lock prevents
duplicate execution if multiple processes overlap.
Usage:
# Run due jobs (for system cron integration)
python -c "from cron import tick; tick()"
# Or via CLI
python cli.py --cron-daemon
"""
from cron.jobs import (
@@ -22,7 +22,7 @@ from cron.jobs import (
update_job,
JOBS_FILE,
)
from cron.scheduler import tick
from cron.scheduler import tick, run_daemon
__all__ = [
"create_job",
@@ -31,5 +31,6 @@ __all__ = [
"remove_job",
"update_job",
"tick",
"run_daemon",
"JOBS_FILE",
]
+2 -14
View File
@@ -6,7 +6,6 @@ Output is saved to ~/.hermes/cron/output/{job_id}/{timestamp}.md
"""
import json
import tempfile
import os
import re
import uuid
@@ -201,19 +200,8 @@ def load_jobs() -> List[Dict[str, Any]]:
def save_jobs(jobs: List[Dict[str, Any]]):
"""Save all jobs to storage."""
ensure_dirs()
fd, tmp_path = tempfile.mkstemp(dir=str(JOBS_FILE.parent), suffix='.tmp', prefix='.jobs_')
try:
with os.fdopen(fd, 'w', encoding='utf-8') as f:
json.dump({"jobs": jobs, "updated_at": datetime.now().isoformat()}, f, indent=2)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, JOBS_FILE)
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
with open(JOBS_FILE, 'w', encoding='utf-8') as f:
json.dump({"jobs": jobs, "updated_at": datetime.now().isoformat()}, f, indent=2)
def create_job(
+90 -245
View File
@@ -1,221 +1,59 @@
"""
Cron job scheduler - executes due jobs.
Provides tick() which checks for due jobs and runs them. The gateway
calls this every 60 seconds from a background thread.
Uses a file-based lock (~/.hermes/cron/.tick.lock) so only one tick
runs at a time if multiple processes overlap.
This module provides:
- tick(): Run all due jobs once (for system cron integration)
- run_daemon(): Run continuously, checking every 60 seconds
"""
import asyncio
import logging
import os
import sys
import time
import traceback
# fcntl is Unix-only; on Windows use msvcrt for file locking
try:
import fcntl
except ImportError:
fcntl = None
try:
import msvcrt
except ImportError:
msvcrt = None
from datetime import datetime
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from cron.jobs import get_due_jobs, mark_job_run, save_job_output
# Resolve Hermes home directory (respects HERMES_HOME override)
_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
# File-based lock prevents concurrent ticks from gateway + daemon + systemd timer
_LOCK_DIR = _hermes_home / "cron"
_LOCK_FILE = _LOCK_DIR / ".tick.lock"
def _resolve_origin(job: dict) -> Optional[dict]:
"""Extract origin info from a job, returning {platform, chat_id, chat_name} or None."""
origin = job.get("origin")
if not origin:
return None
platform = origin.get("platform")
chat_id = origin.get("chat_id")
if platform and chat_id:
return origin
return None
def _deliver_result(job: dict, content: str) -> None:
"""
Deliver job output to the configured target (origin chat, specific platform, etc.).
Uses the standalone platform send functions from send_message_tool so delivery
works whether or not the gateway is running.
"""
deliver = job.get("deliver", "local")
origin = _resolve_origin(job)
if deliver == "local":
return
# Resolve target platform + chat_id
if deliver == "origin":
if not origin:
logger.warning("Job '%s' deliver=origin but no origin stored, skipping delivery", job["id"])
return
platform_name = origin["platform"]
chat_id = origin["chat_id"]
elif ":" in deliver:
platform_name, chat_id = deliver.split(":", 1)
else:
# Bare platform name like "telegram" — need to resolve to origin or home channel
platform_name = deliver
if origin and origin.get("platform") == platform_name:
chat_id = origin["chat_id"]
else:
# Fall back to home channel
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
if not chat_id:
logger.warning("Job '%s' deliver=%s but no chat_id or home channel. Set via: hermes config set %s_HOME_CHANNEL <channel_id>", job["id"], deliver, platform_name.upper())
return
from tools.send_message_tool import _send_to_platform
from gateway.config import load_gateway_config, Platform
platform_map = {
"telegram": Platform.TELEGRAM,
"discord": Platform.DISCORD,
"slack": Platform.SLACK,
"whatsapp": Platform.WHATSAPP,
}
platform = platform_map.get(platform_name.lower())
if not platform:
logger.warning("Job '%s': unknown platform '%s' for delivery", job["id"], platform_name)
return
try:
config = load_gateway_config()
except Exception as e:
logger.error("Job '%s': failed to load gateway config for delivery: %s", job["id"], e)
return
pconfig = config.platforms.get(platform)
if not pconfig or not pconfig.enabled:
logger.warning("Job '%s': platform '%s' not configured/enabled", job["id"], platform_name)
return
# Run the async send in a fresh event loop (safe from any thread)
try:
result = asyncio.run(_send_to_platform(platform, pconfig, chat_id, content))
except RuntimeError:
# asyncio.run() fails if there's already a running loop in this thread;
# spin up a new thread to avoid that.
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, content))
result = future.result(timeout=30)
except Exception as e:
logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
return
if result and result.get("error"):
logger.error("Job '%s': delivery error: %s", job["id"], result["error"])
else:
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
# Mirror the delivered content into the target's gateway session
try:
from gateway.mirror import mirror_to_session
mirror_to_session(platform_name, chat_id, content, source_label="cron")
except Exception:
pass
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
def run_job(job: dict) -> tuple[bool, str, Optional[str]]:
"""
Execute a single cron job.
Returns:
Tuple of (success, full_output_doc, final_response, error_message)
Tuple of (success, output, error_message)
"""
from run_agent import AIAgent
job_id = job["id"]
job_name = job["name"]
prompt = job["prompt"]
origin = _resolve_origin(job)
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
# Inject origin context so the agent's send_message tool knows the chat
if origin:
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
if origin.get("chat_name"):
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
print(f"[cron] Running job '{job_name}' (ID: {job_id})")
print(f"[cron] Prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
try:
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
from dotenv import load_dotenv
try:
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="utf-8")
except UnicodeDecodeError:
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
try:
import yaml
_cfg_path = str(_hermes_home / "config.yaml")
if os.path.exists(_cfg_path):
with open(_cfg_path) as _f:
_cfg = yaml.safe_load(_f) or {}
_model_cfg = _cfg.get("model", {})
if isinstance(_model_cfg, str):
model = _model_cfg
elif isinstance(_model_cfg, dict):
model = _model_cfg.get("default", model)
except Exception:
pass
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
format_runtime_provider_error,
)
try:
runtime = resolve_runtime_provider(
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
)
except Exception as exc:
message = format_runtime_provider_error(exc)
raise RuntimeError(message) from exc
# Create agent with default settings
# Jobs run in isolated sessions (no prior context)
agent = AIAgent(
model=model,
api_key=runtime.get("api_key"),
base_url=runtime.get("base_url"),
provider=runtime.get("provider"),
api_mode=runtime.get("api_mode"),
model=os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6"),
quiet_mode=True,
session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
)
# Run the conversation
result = agent.run_conversation(prompt)
# Extract final response
final_response = result.get("final_response", "")
if not final_response:
final_response = "(No response generated)"
# Build output document
output = f"""# Cron Job: {job_name}
**Job ID:** {job_id}
@@ -231,13 +69,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
{final_response}
"""
logger.info("Job '%s' completed successfully", job_name)
return True, output, final_response, None
print(f"[cron] Job '{job_name}' completed successfully")
return True, output, None
except Exception as e:
error_msg = f"{type(e).__name__}: {str(e)}"
logger.error("Job '%s' failed: %s", job_name, error_msg)
print(f"[cron] Job '{job_name}' failed: {error_msg}")
# Build error output
output = f"""# Cron Job: {job_name} (FAILED)
**Job ID:** {job_id}
@@ -256,88 +95,94 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
{traceback.format_exc()}
```
"""
return False, output, "", error_msg
finally:
# Clean up injected env vars so they don't leak to other jobs
for key in ("HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"):
os.environ.pop(key, None)
return False, output, error_msg
def tick(verbose: bool = True) -> int:
"""
Check and run all due jobs.
Uses a file lock so only one tick runs at a time, even if the gateway's
in-process ticker and a standalone daemon or manual tick overlap.
This is designed to be called by system cron every minute:
*/1 * * * * cd ~/hermes-agent && python -c "from cron import tick; tick()"
Args:
verbose: Whether to print status messages
Returns:
Number of jobs executed (0 if another tick is already running)
Number of jobs executed
"""
_LOCK_DIR.mkdir(parents=True, exist_ok=True)
# Cross-platform file locking: fcntl on Unix, msvcrt on Windows
lock_fd = None
try:
lock_fd = open(_LOCK_FILE, "w")
if fcntl:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
elif msvcrt:
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1)
except (OSError, IOError):
logger.debug("Tick skipped — another instance holds the lock")
if lock_fd is not None:
lock_fd.close()
due_jobs = get_due_jobs()
if verbose and not due_jobs:
print(f"[cron] {datetime.now().strftime('%H:%M:%S')} - No jobs due")
return 0
if verbose:
print(f"[cron] {datetime.now().strftime('%H:%M:%S')} - {len(due_jobs)} job(s) due")
executed = 0
for job in due_jobs:
try:
success, output, error = run_job(job)
# Save output to file
output_file = save_job_output(job["id"], output)
if verbose:
print(f"[cron] Output saved to: {output_file}")
# Mark job as run (handles repeat counting, next_run computation)
mark_job_run(job["id"], success, error)
executed += 1
except Exception as e:
print(f"[cron] Error processing job {job['id']}: {e}")
mark_job_run(job["id"], False, str(e))
return executed
def run_daemon(check_interval: int = 60, verbose: bool = True):
"""
Run the cron daemon continuously.
Checks for due jobs every `check_interval` seconds.
Args:
check_interval: Seconds between checks (default: 60)
verbose: Whether to print status messages
"""
print(f"[cron] Starting daemon (checking every {check_interval}s)")
print(f"[cron] Press Ctrl+C to stop")
print()
try:
due_jobs = get_due_jobs()
if verbose and not due_jobs:
logger.info("%s - No jobs due", datetime.now().strftime('%H:%M:%S'))
return 0
if verbose:
logger.info("%s - %s job(s) due", datetime.now().strftime('%H:%M:%S'), len(due_jobs))
executed = 0
for job in due_jobs:
while True:
try:
success, output, final_response, error = run_job(job)
output_file = save_job_output(job["id"], output)
if verbose:
logger.info("Output saved to: %s", output_file)
# Deliver the final response to the origin/target chat
deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
if deliver_content:
try:
_deliver_result(job, deliver_content)
except Exception as de:
logger.error("Delivery failed for job %s: %s", job["id"], de)
mark_job_run(job["id"], success, error)
executed += 1
tick(verbose=verbose)
except Exception as e:
logger.error("Error processing job %s: %s", job['id'], e)
mark_job_run(job["id"], False, str(e))
return executed
finally:
if fcntl:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
elif msvcrt:
try:
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1)
except (OSError, IOError):
pass
lock_fd.close()
print(f"[cron] Tick error: {e}")
time.sleep(check_interval)
except KeyboardInterrupt:
print("\n[cron] Daemon stopped")
if __name__ == "__main__":
tick(verbose=True)
# Allow running directly: python cron/scheduler.py [daemon|tick]
import argparse
parser = argparse.ArgumentParser(description="Hermes Cron Scheduler")
parser.add_argument("mode", choices=["daemon", "tick"], default="tick", nargs="?",
help="Mode: 'tick' to run once, 'daemon' to run continuously")
parser.add_argument("--interval", type=int, default=60,
help="Check interval in seconds for daemon mode")
parser.add_argument("--quiet", "-q", action="store_true",
help="Suppress status messages")
args = parser.parse_args()
if args.mode == "daemon":
run_daemon(check_interval=args.interval, verbose=not args.quiet)
else:
tick(verbose=not args.quiet)
@@ -1,5 +0,0 @@
{"prompt": "Go to https://news.ycombinator.com and find the top 5 posts on the front page. For each post, get the title, URL, points, and number of comments. Return the results as a formatted summary."}
{"prompt": "Navigate to https://en.wikipedia.org/wiki/Hermes and extract the first paragraph of the article, the image caption, and the list of items in the infobox. Summarize what you find."}
{"prompt": "Go to https://github.com/trending and find the top 3 trending repositories today. For each repo, get the name, description, language, and star count. Write the results to a file called trending_repos.md."}
{"prompt": "Visit https://httpbin.org/forms/post and fill out the form with sample data (customer name: Jane Doe, size: Medium, topping: Bacon, delivery time: 12:00). Submit the form and report what the response page shows."}
{"prompt": "Navigate to https://books.toscrape.com, browse to the Travel category, find the highest-rated book, and extract its title, price, availability, and description."}
@@ -1,65 +0,0 @@
#!/bin/bash
# =============================================================================
# Example: Browser-Focused Data Generation
# =============================================================================
#
# Generates tool-calling trajectories for browser automation tasks.
# The agent navigates websites, fills forms, extracts information, etc.
#
# Distribution: browser 97%, web 20%, vision 12%, terminal 15%
#
# Prerequisites:
# - OPENROUTER_API_KEY in ~/.hermes/.env
# - BROWSERBASE_API_KEY in ~/.hermes/.env (for browser tools)
# - A dataset JSONL file with one {"prompt": "..."} per line
#
# Usage:
# cd ~/.hermes/hermes-agent
# bash datagen-config-examples/run_browser_tasks.sh
#
# Output: data/browser_tasks_example/trajectories.jsonl
# =============================================================================
mkdir -p logs
LOG_FILE="logs/browser_tasks_$(date +%Y%m%d_%H%M%S).log"
echo "📝 Logging to: $LOG_FILE"
# Point to the example dataset in this directory
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
python batch_runner.py \
--dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
--batch_size=5 \
--run_name="browser_tasks_example" \
--distribution="browser_tasks" \
--model="anthropic/claude-sonnet-4" \
--base_url="https://openrouter.ai/api/v1" \
--num_workers=3 \
--max_turns=30 \
--ephemeral_system_prompt="You are an AI assistant with browser automation capabilities. Your primary task is to navigate and interact with web pages to accomplish user goals.
IMPORTANT GUIDELINES:
1. SEARCHING: Do NOT search directly on Google via the browser — they block automated searches. Use the web_search tool first to find URLs, then navigate to them with browser tools.
2. COOKIE/PRIVACY DIALOGS: After navigating to a page, check for cookie consent or privacy popups. Dismiss them by clicking Accept/Close/OK before interacting with other elements. Take a fresh browser_snapshot afterward.
3. HANDLING TIMEOUTS: If an action times out, the element may be blocked by an overlay. Take a new snapshot and look for dialogs to dismiss. If none, try an alternative approach or report the issue.
4. GENERAL: Use browser tools to click, fill forms, and extract information. Use terminal for local file operations. Verify your actions and handle errors gracefully." \
2>&1 | tee "$LOG_FILE"
echo "✅ Done. Log: $LOG_FILE"
# =============================================================================
# Common options you can add:
#
# --resume Resume from checkpoint if interrupted
# --verbose Enable detailed logging
# --max_tokens=63000 Set max response tokens
# --reasoning_disabled Disable model thinking/reasoning tokens
# --providers_allowed="anthropic,google" Restrict to specific providers
# --prefill_messages_file="configs/prefill.json" Few-shot priming
# =============================================================================
-7
View File
@@ -1,7 +0,0 @@
# Documentation
All documentation has moved to the website:
**📖 [hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**
The documentation source files live in [`website/docs/`](../website/docs/).
+104
View File
@@ -0,0 +1,104 @@
# Agents
The agent is the core loop that orchestrates LLM calls and tool execution.
## AIAgent Class
The main agent is implemented in `run_agent.py`:
```python
class AIAgent:
def __init__(
self,
model: str = "anthropic/claude-sonnet-4",
api_key: str = None,
base_url: str = "https://openrouter.ai/api/v1",
max_turns: int = 20,
enabled_toolsets: list = None,
disabled_toolsets: list = None,
verbose_logging: bool = False,
):
# Initialize OpenAI client, load tools based on toolsets
...
def chat(self, user_message: str, task_id: str = None) -> str:
# Main entry point - runs the agent loop
...
```
## Agent Loop
The core loop in `_run_agent_loop()`:
```
1. Add user message to conversation
2. Call LLM with tools
3. If LLM returns tool calls:
- Execute each tool
- Add tool results to conversation
- Go to step 2
4. If LLM returns text response:
- Return response to user
```
```python
while turns < max_turns:
response = client.chat.completions.create(
model=model,
messages=messages,
tools=tool_schemas,
)
if response.tool_calls:
for tool_call in response.tool_calls:
result = await execute_tool(tool_call)
messages.append(tool_result_message(result))
turns += 1
else:
return response.content
```
## Conversation Management
Messages are stored as a list of dicts following OpenAI format:
```python
messages = [
{"role": "system", "content": "You are a helpful assistant..."},
{"role": "user", "content": "Search for Python tutorials"},
{"role": "assistant", "content": None, "tool_calls": [...]},
{"role": "tool", "tool_call_id": "...", "content": "..."},
{"role": "assistant", "content": "Here's what I found..."},
]
```
## Reasoning Context
For models that support reasoning (chain-of-thought), the agent:
1. Extracts `reasoning_content` from API responses
2. Stores it in `assistant_msg["reasoning"]` for trajectory export
3. Passes it back via `reasoning_content` field on subsequent turns
## Trajectory Export
Conversations can be exported for training:
```python
agent = AIAgent(save_trajectories=True)
agent.chat("Do something")
# Saves to trajectories/*.jsonl in ShareGPT format
```
## Batch Processing
For processing multiple prompts, use `batch_runner.py`:
```bash
python batch_runner.py \
--dataset_file=prompts.jsonl \
--batch_size=20 \
--num_workers=4 \
--run_name=my_run
```
See `batch_runner.py` for parallel execution with checkpointing.
+296
View File
@@ -0,0 +1,296 @@
# CLI
The Hermes Agent CLI provides an interactive terminal interface for working with the agent.
## Running the CLI
```bash
# Basic usage
./hermes
# With specific model
./hermes --model "anthropic/claude-sonnet-4"
# With specific toolsets
./hermes --toolsets "web,terminal,skills"
# Verbose mode
./hermes --verbose
```
## Architecture
The CLI is implemented in `cli.py` and uses:
- **Rich** - Welcome banner with ASCII art and styled panels
- **prompt_toolkit** - Fixed input area with command history
- **KawaiiSpinner** - Animated feedback during operations
```
┌─────────────────────────────────────────────────┐
│ HERMES-AGENT ASCII Logo │
│ ┌─────────────┐ ┌────────────────────────────┐ │
│ │ Caduceus │ │ Model: claude-opus-4.5 │ │
│ │ ASCII Art │ │ Terminal: local │ │
│ │ │ │ Working Dir: /home/user │ │
│ │ │ │ Available Tools: 19 │ │
│ │ │ │ Available Skills: 12 │ │
│ └─────────────┘ └────────────────────────────┘ │
└─────────────────────────────────────────────────┘
│ Conversation output scrolls here... │
│ │
│ User: Hello! │
│ ────────────────────────────────────────────── │
│ (◕‿◕✿) 🧠 pondering... (2.3s) │
│ ✧٩(ˊᗜˋ*)و✧ got it! (2.3s) │
│ │
│ Assistant: Hello! How can I help you today? │
├─────────────────────────────────────────────────┤
[Fixed input area at bottom] │
└─────────────────────────────────────────────────┘
```
## Commands
| Command | Description |
|---------|-------------|
| `/help` | Show available commands |
| `/tools` | List available tools grouped by toolset |
| `/toolsets` | List available toolsets with descriptions |
| `/model [name]` | Show or change the current model |
| `/prompt [text]` | View/set/clear custom system prompt |
| `/personality [name]` | Set a predefined personality |
| `/clear` | Clear screen and reset conversation |
| `/reset` | Reset conversation only (keep screen) |
| `/history` | Show conversation history |
| `/save` | Save current conversation to file |
| `/config` | Show current configuration |
| `/quit` | Exit the CLI (also: `/exit`, `/q`) |
## Configuration
The CLI is configured via `cli-config.yaml`. Copy from `cli-config.yaml.example`:
```bash
cp cli-config.yaml.example cli-config.yaml
```
### Model Configuration
```yaml
model:
default: "anthropic/claude-opus-4.5"
base_url: "https://openrouter.ai/api/v1"
```
### Terminal Configuration
The CLI supports multiple terminal backends:
```yaml
# Local execution (default)
terminal:
env_type: "local"
cwd: "." # Current directory
# SSH remote execution (sandboxed - agent can't touch its own code)
terminal:
env_type: "ssh"
cwd: "/home/myuser/project"
ssh_host: "my-server.example.com"
ssh_user: "myuser"
ssh_key: "~/.ssh/id_rsa"
# Docker container
terminal:
env_type: "docker"
docker_image: "python:3.11"
# Singularity/Apptainer (HPC)
terminal:
env_type: "singularity"
singularity_image: "docker://python:3.11"
# Modal cloud
terminal:
env_type: "modal"
modal_image: "python:3.11"
```
### Sudo Support
The CLI supports interactive sudo prompts:
```
┌──────────────────────────────────────────────────────────┐
│ 🔐 SUDO PASSWORD REQUIRED │
├──────────────────────────────────────────────────────────┤
│ Enter password below (input is hidden), or: │
│ • Press Enter to skip (command fails gracefully) │
│ • Wait 45s to auto-skip │
└──────────────────────────────────────────────────────────┘
Password (hidden):
```
**Options:**
- **Interactive**: Leave `sudo_password` unset - you'll be prompted when needed
- **Configured**: Set `sudo_password` in `cli-config.yaml` to auto-fill
- **Environment**: Set `SUDO_PASSWORD` in `.env` for all runs
Password is cached for the session once entered.
### Toolsets
Control which tools are available:
```yaml
# Enable all tools
toolsets:
- all
# Or enable specific toolsets
toolsets:
- web
- terminal
- skills
```
Available toolsets: `web`, `search`, `terminal`, `browser`, `vision`, `image_gen`, `skills`, `moa`, `debugging`, `safe`
### Personalities
Predefined personalities for the `/personality` command:
```yaml
agent:
personalities:
helpful: "You are a helpful, friendly AI assistant."
kawaii: "You are a kawaii assistant! Use cute expressions..."
pirate: "Arrr! Ye be talkin' to Captain Hermes..."
# Add your own!
```
Built-in personalities:
- `helpful`, `concise`, `technical`, `creative`, `teacher`
- `kawaii`, `catgirl`, `pirate`, `shakespeare`, `surfer`
- `noir`, `uwu`, `philosopher`, `hype`
## Animated Feedback
The CLI provides animated feedback during operations:
### Thinking Animation
During API calls, shows animated spinner with thinking verbs:
```
◜ (。•́︿•̀。) pondering... (1.2s)
◠ (⊙_⊙) contemplating... (2.4s)
✧٩(ˊᗜˋ*)و✧ got it! (3.1s)
```
### Tool Execution Animation
Each tool type has unique animations:
```
⠋ (◕‿◕✿) 🔍 web_search... (0.8s)
▅ (≧◡≦) 💻 terminal... (1.2s)
🌓 (★ω★) 🌐 browser_navigate... (2.1s)
✧ (✿◠‿◠) 🎨 image_generate... (4.5s)
```
## Multi-line Input
For multi-line input, end a line with `\` to continue:
```
Write a function that:\
1. Takes a list of numbers\
2. Returns the sum
```
## Environment Variable Priority
For terminal settings, `cli-config.yaml` takes precedence over `.env`:
1. `cli-config.yaml` (highest priority in CLI)
2. `.env` file
3. System environment variables
4. Default values
This allows you to have different terminal configs for CLI vs batch processing.
## Session Management
- **History**: Command history is saved to `~/.hermes_history`
- **Conversations**: Use `/save` to export conversations
- **Reset**: Use `/clear` for full reset, `/reset` to just clear history
- **Session Logs**: Every session automatically logs to `logs/session_{session_id}.json`
### Session Logging
Sessions are automatically logged to the `logs/` directory:
```
logs/
├── session_20260201_143052_a1b2c3.json
├── session_20260201_150217_d4e5f6.json
└── ...
```
The session ID is displayed in the welcome banner and follows the format: `YYYYMMDD_HHMMSS_UUID`.
Log files contain:
- Full conversation history in trajectory format
- Timestamps for session start and last update
- Model and message count metadata
This is useful for:
- Debugging agent behavior
- Replaying conversations
- Training data inspection
### Context Compression
Long conversations can exceed model context limits. The CLI automatically compresses context when approaching the limit:
```yaml
# In cli-config.yaml
compression:
enabled: true # Enable auto-compression
threshold: 0.85 # Compress at 85% of context limit
summary_model: "google/gemini-2.0-flash-001"
```
**How it works:**
1. Tracks actual token usage from each API response
2. When tokens reach threshold, middle turns are summarized
3. First 3 and last 4 turns are always protected
4. Conversation continues seamlessly after compression
**When compression triggers:**
```
📦 Context compression triggered (170,000 tokens ≥ 170,000 threshold)
📊 Model context limit: 200,000 tokens (85% = 170,000)
🗜️ Summarizing turns 4-15 (12 turns)
✅ Compressed: 20 → 9 messages (~45,000 tokens saved)
```
To disable compression:
```yaml
compression:
enabled: false
```
## Quiet Mode
The CLI runs in "quiet mode" (`HERMES_QUIET=1`), which:
- Suppresses verbose logging from tools
- Enables kawaii-style animated feedback
- Hides terminal environment warnings
- Keeps output clean and user-friendly
For verbose output (debugging), use:
```bash
./hermes --verbose
```
+124
View File
@@ -0,0 +1,124 @@
# LLM Client
Hermes Agent uses the OpenAI Python SDK with OpenRouter as the backend, providing access to many models through a single API.
## Configuration
```python
from openai import OpenAI
client = OpenAI(
api_key=os.getenv("OPENROUTER_API_KEY"),
base_url="https://openrouter.ai/api/v1"
)
```
## Supported Models
Any model available on [OpenRouter](https://openrouter.ai/models):
```python
# Anthropic
model = "anthropic/claude-sonnet-4"
model = "anthropic/claude-opus-4"
# OpenAI
model = "openai/gpt-4o"
model = "openai/o1"
# Google
model = "google/gemini-2.0-flash"
# Open models
model = "meta-llama/llama-3.3-70b-instruct"
model = "deepseek/deepseek-chat-v3"
model = "moonshotai/kimi-k2.5"
```
## Tool Calling
Standard OpenAI function calling format:
```python
response = client.chat.completions.create(
model=model,
messages=messages,
tools=[
{
"type": "function",
"function": {
"name": "web_search",
"description": "Search the web",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string"}
},
"required": ["query"]
}
}
}
],
)
# Check for tool calls
if response.choices[0].message.tool_calls:
for tool_call in response.choices[0].message.tool_calls:
name = tool_call.function.name
args = json.loads(tool_call.function.arguments)
# Execute tool...
```
## Reasoning Models
Some models return reasoning/thinking content:
```python
# Access reasoning if available
message = response.choices[0].message
if hasattr(message, 'reasoning_content') and message.reasoning_content:
reasoning = message.reasoning_content
# Store for trajectory export
```
## Provider Selection
OpenRouter allows selecting specific providers:
```python
response = client.chat.completions.create(
model=model,
messages=messages,
extra_body={
"provider": {
"order": ["Anthropic", "Google"], # Preferred providers
"ignore": ["Novita"], # Providers to skip
}
}
)
```
## Error Handling
Common errors and handling:
```python
try:
response = client.chat.completions.create(...)
except openai.RateLimitError:
# Back off and retry
except openai.APIError as e:
# Check e.code for specific errors
# 400 = bad request (often provider-specific)
# 502 = bad gateway (retry with different provider)
```
## Cost Tracking
OpenRouter returns usage info:
```python
usage = response.usage
print(f"Tokens: {usage.prompt_tokens} + {usage.completion_tokens}")
print(f"Cost: ${usage.cost:.6f}") # If available
```
+121
View File
@@ -0,0 +1,121 @@
# Message Format & Trajectories
Hermes Agent uses two message formats: the **API format** for LLM calls and the **trajectory format** for training data export.
## API Message Format
Standard OpenAI chat format used during execution:
```python
messages = [
# System prompt
{"role": "system", "content": "You are a helpful assistant with tools..."},
# User query
{"role": "user", "content": "Search for Python tutorials"},
# Assistant with tool call
{
"role": "assistant",
"content": None,
"tool_calls": [{
"id": "call_abc123",
"type": "function",
"function": {
"name": "web_search",
"arguments": "{\"query\": \"Python tutorials\"}"
}
}]
},
# Tool result
{
"role": "tool",
"tool_call_id": "call_abc123",
"content": "{\"results\": [...]}"
},
# Final response
{"role": "assistant", "content": "Here's what I found..."}
]
```
## Trajectory Format (ShareGPT)
Exported for training in ShareGPT format:
```json
{
"conversations": [
{"from": "system", "value": "You are a helpful assistant..."},
{"from": "human", "value": "Search for Python tutorials"},
{"from": "gpt", "value": "<tool_call>\n{\"name\": \"web_search\", \"arguments\": {\"query\": \"Python tutorials\"}}\n</tool_call>"},
{"from": "tool", "value": "<tool_response>\n{\"results\": [...]}\n</tool_response>"},
{"from": "gpt", "value": "Here's what I found..."}
],
"tools": "[{\"type\": \"function\", \"function\": {...}}]",
"source": "hermes-agent"
}
```
## Reasoning Content
For models that output reasoning/chain-of-thought:
**During execution** (API format):
```python
# Stored internally but not sent back to model in content
assistant_msg = {
"role": "assistant",
"content": "Here's what I found...",
"reasoning": "Let me think about this step by step..." # Internal only
}
```
**In trajectory export** (reasoning wrapped in tags):
```json
{
"from": "gpt",
"value": "<think>\nLet me think about this step by step...\n</think>\nHere's what I found..."
}
```
## Conversion Flow
```
API Response → Internal Storage → Trajectory Export
↓ ↓ ↓
tool_calls reasoning field <tool_call> tags
reasoning_content <think> tags
```
The conversion happens in `_convert_to_trajectory_format()` in `run_agent.py`.
## Ephemeral System Prompts
Batch processing supports ephemeral system prompts that guide behavior during execution but are NOT saved to trajectories:
```python
# During execution: full system prompt + ephemeral guidance
messages = [
{"role": "system", "content": SYSTEM_PROMPT + "\n\n" + ephemeral_prompt},
...
]
# In saved trajectory: only the base system prompt
trajectory = {
"conversations": [
{"from": "system", "value": SYSTEM_PROMPT}, # No ephemeral
...
]
}
```
## Trajectory Compression
Long trajectories can be compressed for training using `trajectory_compressor.py`:
- Protects first/last N turns
- Summarizes middle turns with LLM
- Targets specific token budget
- See `configs/trajectory_compression.yaml` for settings
+547
View File
@@ -0,0 +1,547 @@
# Messaging Platform Integrations (Gateway)
Hermes Agent can connect to messaging platforms like Telegram, Discord, and WhatsApp to serve as a conversational AI assistant.
## Quick Start
```bash
# 1. Set your bot token(s) in .env file
echo 'TELEGRAM_BOT_TOKEN="your_telegram_bot_token"' >> .env
echo 'DISCORD_BOT_TOKEN="your_discord_bot_token"' >> .env
# 2. Test the gateway (foreground)
./scripts/hermes-gateway run
# 3. Install as a system service (runs in background)
./scripts/hermes-gateway install
# 4. Manage the service
./scripts/hermes-gateway start
./scripts/hermes-gateway stop
./scripts/hermes-gateway restart
./scripts/hermes-gateway status
```
**Quick test (without service install):**
```bash
python cli.py --gateway # Runs in foreground, useful for debugging
```
## Architecture Overview
```
┌─────────────────────────────────────────────────────────────────┐
│ Hermes Gateway │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ Telegram │ │ Discord │ │ WhatsApp │ │
│ │ Adapter │ │ Adapter │ │ Adapter │ │
│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │
│ │ │ │ │
│ └─────────────────┼─────────────────┘ │
│ │ │
│ ┌────────▼────────┐ │
│ │ Session Store │ │
│ │ (per-chat) │ │
│ └────────┬────────┘ │
│ │ │
│ ┌────────▼────────┐ │
│ │ AIAgent │ │
│ │ (run_agent) │ │
│ └─────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘
```
## Session Management
### Session Persistence
Sessions persist across messages until they reset. The agent remembers your conversation context.
### Reset Policies
Sessions reset based on configurable policies:
| Policy | Default | Description |
|--------|---------|-------------|
| Daily | 4:00 AM | Reset at a specific hour each day |
| Idle | 120 min | Reset after N minutes of inactivity |
| Both | (combined) | Whichever triggers first |
### Manual Reset
Send `/new` or `/reset` as a message to start fresh.
### Per-Platform Overrides
Configure different reset policies per platform:
```json
{
"reset_by_platform": {
"telegram": { "mode": "idle", "idle_minutes": 240 },
"discord": { "mode": "idle", "idle_minutes": 60 }
}
}
```
## Platform Setup
### Telegram
1. **Create a bot** via [@BotFather](https://t.me/BotFather)
2. **Get your token** (looks like `123456789:ABCdefGHIjklMNOpqrsTUVwxyz`)
3. **Set environment variable:**
```bash
export TELEGRAM_BOT_TOKEN="your_token_here"
```
4. **Optional: Set home channel** for cron job delivery:
```bash
export TELEGRAM_HOME_CHANNEL="-1001234567890"
export TELEGRAM_HOME_CHANNEL_NAME="My Notes"
```
**Requirements:**
```bash
pip install python-telegram-bot>=20.0
```
### Discord
1. **Create an application** at [Discord Developer Portal](https://discord.com/developers/applications)
2. **Create a bot** under your application
3. **Get the bot token**
4. **Enable required intents:**
- Message Content Intent
- Server Members Intent (optional)
5. **Invite to your server** using OAuth2 URL generator (scopes: `bot`, `applications.commands`)
6. **Set environment variable:**
```bash
export DISCORD_BOT_TOKEN="your_token_here"
```
7. **Optional: Set home channel:**
```bash
export DISCORD_HOME_CHANNEL="123456789012345678"
export DISCORD_HOME_CHANNEL_NAME="#bot-updates"
```
**Requirements:**
```bash
pip install discord.py>=2.0
```
### WhatsApp
WhatsApp integration is more complex due to the lack of a simple bot API.
**Options:**
1. **WhatsApp Business API** (requires Meta verification)
2. **whatsapp-web.js** via Node.js bridge (for personal accounts)
**Bridge Setup:**
1. Install Node.js
2. Set up the bridge script (see `scripts/whatsapp-bridge/` for reference)
3. Configure in gateway:
```json
{
"platforms": {
"whatsapp": {
"enabled": true,
"extra": {
"bridge_script": "/path/to/bridge.js",
"bridge_port": 3000
}
}
}
}
```
## Configuration
There are **three ways** to configure the gateway (in order of precedence):
### 1. Environment Variables (`.env` file) - Recommended for Quick Setup
Add to your `~/.hermes/.env` file:
```bash
# =============================================================================
# MESSAGING PLATFORM TOKENS
# =============================================================================
# Telegram - get from @BotFather on Telegram
TELEGRAM_BOT_TOKEN=your_telegram_bot_token
TELEGRAM_ALLOWED_USERS=123456789,987654321 # Security: restrict to these user IDs
# Optional: Default channel for cron job delivery
TELEGRAM_HOME_CHANNEL=-1001234567890
TELEGRAM_HOME_CHANNEL_NAME="My Notes"
# Discord - get from Discord Developer Portal
DISCORD_BOT_TOKEN=your_discord_bot_token
DISCORD_ALLOWED_USERS=123456789012345678 # Security: restrict to these user IDs
# Optional: Default channel for cron job delivery
DISCORD_HOME_CHANNEL=123456789012345678
DISCORD_HOME_CHANNEL_NAME="#bot-updates"
# WhatsApp - requires Node.js bridge setup
WHATSAPP_ENABLED=true
# =============================================================================
# AGENT SETTINGS
# =============================================================================
# Max tool-calling iterations per conversation (default: 60)
HERMES_MAX_ITERATIONS=60
# Working directory for terminal commands (default: home ~)
MESSAGING_CWD=/home/myuser
# =============================================================================
# TOOL PROGRESS NOTIFICATIONS
# =============================================================================
# Show progress messages as agent uses tools
HERMES_TOOL_PROGRESS=true
# Mode: "new" (only when tool changes) or "all" (every tool call)
HERMES_TOOL_PROGRESS_MODE=new
# =============================================================================
# SESSION SETTINGS
# =============================================================================
# Reset sessions after N minutes of inactivity (default: 120)
SESSION_IDLE_MINUTES=120
# Daily reset hour in 24h format (default: 4 = 4am)
SESSION_RESET_HOUR=4
```
### 2. Gateway Config File (`~/.hermes/gateway.json`) - Full Control
For advanced configuration, create `~/.hermes/gateway.json`:
```json
{
"platforms": {
"telegram": {
"enabled": true,
"token": "your_telegram_token",
"home_channel": {
"platform": "telegram",
"chat_id": "-1001234567890",
"name": "My Notes"
}
},
"discord": {
"enabled": true,
"token": "your_discord_token",
"home_channel": {
"platform": "discord",
"chat_id": "123456789012345678",
"name": "#bot-updates"
}
}
},
"default_reset_policy": {
"mode": "both",
"at_hour": 4,
"idle_minutes": 120
},
"reset_by_platform": {
"discord": {
"mode": "idle",
"idle_minutes": 60
}
},
"always_log_local": true
}
```
## Platform-Specific Toolsets
Each platform has its own toolset for security:
| Platform | Toolset | Capabilities |
|----------|---------|--------------|
| CLI | `hermes-cli` | Full access (terminal, browser, etc.) |
| Telegram | `hermes-telegram` | Full tools including terminal |
| Discord | `hermes-discord` | Full tools including terminal |
| WhatsApp | `hermes-whatsapp` | Full tools including terminal |
## User Experience Features
### Typing Indicator
The gateway keeps the "typing..." indicator active throughout processing, refreshing every 4 seconds. This lets users know the bot is working even during long tool-calling sequences.
### Tool Progress Notifications
When `HERMES_TOOL_PROGRESS=true`, the bot sends status messages as it works:
```
💻 `ls -la`...
🔍 web_search...
📄 web_extract...
🎨 image_generate...
```
Terminal commands show the actual command (truncated to 50 chars). Other tools just show the tool name.
**Modes:**
- `new`: Only sends message when switching to a different tool (less spam)
- `all`: Sends message for every single tool call
### Working Directory
- **CLI (`hermes` command)**: Uses current directory where you run the command
- **Messaging**: Uses `MESSAGING_CWD` (default: home directory `~`)
This is intentional: CLI users are in a terminal and expect the agent to work in their current directory, while messaging users need a consistent starting location.
### Max Iterations
If the agent hits the max iteration limit while working, instead of a generic error, it asks the model to summarize what it found so far. This gives you a useful response even when the task couldn't be fully completed.
## Voice Messages (TTS)
The `text_to_speech` tool generates audio that the gateway delivers as native voice messages on each platform:
| Platform | Delivery | Format |
|----------|----------|--------|
| Telegram | Voice bubble (plays inline) | Opus `.ogg` — native from OpenAI/ElevenLabs, converted via ffmpeg for Edge TTS |
| Discord | Audio file attachment | MP3 |
| WhatsApp | Audio file attachment | MP3 |
| CLI | Saved to `~/voice-memos/` | MP3 |
**Providers:**
- **Edge TTS** (default) — Free, no API key, 322 voices in 74 languages
- **ElevenLabs** — Premium quality, requires `ELEVENLABS_API_KEY`
- **OpenAI TTS** — Good quality, requires `OPENAI_API_KEY`
Voice and provider are configured by the user in `~/.hermes/config.yaml` under the `tts:` key. The model only sends text; it does not choose the voice.
The tool returns a `MEDIA:<path>` tag that the gateway send pipeline intercepts and delivers as a native audio message. If `[[audio_as_voice]]` is present (Opus format available), Telegram sends it as a voice bubble instead of an audio file.
**Telegram voice bubbles & ffmpeg:**
Telegram requires Opus/OGG format for native voice bubbles (the round, inline-playable kind). **OpenAI and ElevenLabs** produce Opus natively when on Telegram — no extra setup needed. **Edge TTS** (the default free provider) outputs MP3 and needs `ffmpeg` to convert:
```bash
sudo apt install ffmpeg # Ubuntu/Debian
brew install ffmpeg # macOS
sudo dnf install ffmpeg # Fedora
```
Without ffmpeg, Edge TTS audio is sent as a regular audio file (still playable, but shows as a rectangular music player instead of a voice bubble).
## Cron Job Delivery
When scheduling cron jobs, you can specify where the output should be delivered:
```
User: "Remind me to check the server in 30 minutes"
Agent uses: schedule_cronjob(
prompt="Check server status...",
schedule="30m",
deliver="origin" # Back to this chat
)
```
### Delivery Options
| Option | Description |
|--------|-------------|
| `"origin"` | Back to where the job was created |
| `"local"` | Save to local files only |
| `"telegram"` | Telegram home channel |
| `"discord"` | Discord home channel |
| `"telegram:123456"` | Specific Telegram chat |
## Dynamic Context Injection
The agent knows where it is via injected context:
```
## Current Session Context
**Source:** Telegram (group: Dev Team, ID: -1001234567890)
**Connected Platforms:** local, telegram, discord
**Home Channels:**
- telegram: My Notes (ID: -1001234567890)
- discord: #bot-updates (ID: 123456789012345678)
**Delivery options for scheduled tasks:**
- "origin" → Back to this chat (Dev Team)
- "local" → Save to local files only
- "telegram" → Home channel (My Notes)
- "discord" → Home channel (#bot-updates)
```
## CLI Commands
| Command | Description |
|---------|-------------|
| `/platforms` | Show gateway configuration and status |
| `--gateway` | Start the gateway (CLI flag) |
## Troubleshooting
### "python-telegram-bot not installed"
```bash
pip install python-telegram-bot>=20.0
```
### "discord.py not installed"
```bash
pip install discord.py>=2.0
```
### "No platforms connected"
1. Check your environment variables are set
2. Check your tokens are valid
3. Try `/platforms` to see configuration status
### Session not persisting
1. Check `~/.hermes/sessions/` exists
2. Check session policies aren't too aggressive
3. Verify no errors in gateway logs
## Adding a New Platform
To add a new messaging platform:
### 1. Create the adapter
Create `gateway/platforms/your_platform.py`:
```python
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
from gateway.config import Platform, PlatformConfig
class YourPlatformAdapter(BasePlatformAdapter):
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.YOUR_PLATFORM)
async def connect(self) -> bool:
# Connect to the platform
...
async def disconnect(self) -> None:
# Disconnect
...
async def send(self, chat_id: str, content: str, ...) -> SendResult:
# Send a message
...
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
# Get chat information
...
```
### 2. Register the platform
Add to `gateway/config.py`:
```python
class Platform(Enum):
# ... existing ...
YOUR_PLATFORM = "your_platform"
```
### 3. Add to gateway runner
Update `gateway/run.py` `_create_adapter()`:
```python
elif platform == Platform.YOUR_PLATFORM:
from gateway.platforms.your_platform import YourPlatformAdapter
return YourPlatformAdapter(config)
```
### 4. Create a toolset (optional)
Add to `toolsets.py`:
```python
"hermes-your-platform": {
"description": "Your platform toolset",
"tools": [...],
"includes": []
}
```
### 5. Configure
Add environment variables to `.env`:
```bash
YOUR_PLATFORM_TOKEN=...
YOUR_PLATFORM_HOME_CHANNEL=...
```
## Service Management
### Linux (systemd)
```bash
# Install as user service
./scripts/hermes-gateway install
# Manage
systemctl --user start hermes-gateway
systemctl --user stop hermes-gateway
systemctl --user restart hermes-gateway
systemctl --user status hermes-gateway
# View logs
journalctl --user -u hermes-gateway -f
# Enable lingering (keeps running after logout)
sudo loginctl enable-linger $USER
```
### macOS (launchd)
```bash
# Install
./scripts/hermes-gateway install
# Manage
launchctl start ai.hermes.gateway
launchctl stop ai.hermes.gateway
# View logs
tail -f ~/.hermes/logs/gateway.log
```
### Manual (any platform)
```bash
# Run in foreground (for testing/debugging)
./scripts/hermes-gateway run
# Or via CLI (also foreground)
python cli.py --gateway
```
## Storage Locations
| Path | Purpose |
|------|---------|
| `~/.hermes/gateway.json` | Gateway configuration |
| `~/.hermes/sessions/sessions.json` | Session index |
| `~/.hermes/sessions/{id}.jsonl` | Conversation transcripts |
| `~/.hermes/cron/output/` | Cron job outputs |
| `~/.hermes/logs/gateway.log` | Gateway logs (macOS launchd) |
-344
View File
@@ -1,344 +0,0 @@
# send_file Integration Map — Hermes Agent Codebase Deep Dive
## 1. environments/tool_context.py — Base64 File Transfer Implementation
### upload_file() (lines 153-205)
- Reads local file as raw bytes, base64-encodes to ASCII string
- Creates parent dirs in sandbox via `self.terminal(f"mkdir -p {parent}")`
- **Chunk size:** 60,000 chars (~60KB per shell command)
- **Small files (<=60KB b64):** Single `printf '%s' '{b64}' | base64 -d > {remote_path}`
- **Large files:** Writes chunks to `/tmp/_hermes_upload.b64` via `printf >> append`, then `base64 -d` to target
- **Error handling:** Checks local file exists; returns `{exit_code, output}`
- **Size limits:** No explicit limit, but shell arg limit ~2MB means chunking is necessary for files >~45KB raw
- **No theoretical max** — but very large files would be slow (many terminal round trips)
### download_file() (lines 234-278)
- Runs `base64 {remote_path}` inside sandbox, captures stdout
- Strips output, base64-decodes to raw bytes
- Writes to host filesystem with parent dir creation
- **Error handling:** Checks exit code, empty output, decode errors
- Returns `{success: bool, bytes: int}` or `{success: false, error: str}`
- **Size limit:** Bounded by terminal output buffer (practical limit ~few MB via base64 terminal output)
### Promotion potential:
- These methods work via `self.terminal()` — they're environment-agnostic
- Could be directly lifted into a new tool that operates on the agent's current sandbox
- For send_file, this `download_file()` pattern is the key: it extracts files from sandbox → host
## 2. tools/environments/base.py — BaseEnvironment Interface
### Current methods:
- `execute(command, cwd, timeout, stdin_data)``{output, returncode}`
- `cleanup()` — release resources
- `stop()` — alias for cleanup
- `_prepare_command()` — sudo transformation
- `_build_run_kwargs()` — subprocess kwargs
- `_timeout_result()` — standard timeout dict
### What would need to be added for file transfer:
- **Nothing required at this level.** File transfer can be implemented via `execute()` (base64 over terminal, like ToolContext does) or via environment-specific methods.
- Optional: `upload_file(local_path, remote_path)` and `download_file(remote_path, local_path)` methods could be added to BaseEnvironment for optimized per-backend transfers, but the base64-over-terminal approach already works universally.
## 3. tools/environments/docker.py — Docker Container Details
### Container ID tracking:
- `self._container_id` stored at init from `self._inner.container_id`
- Inner is `minisweagent.environments.docker.DockerEnvironment`
- Container ID is a standard Docker container hash
### docker cp feasibility:
- **YES**, `docker cp` could be used for optimized file transfer:
- `docker cp {container_id}:{remote_path} {local_path}` (download)
- `docker cp {local_path} {container_id}:{remote_path}` (upload)
- Much faster than base64-over-terminal for large files
- Container ID is directly accessible via `env._container_id` or `env._inner.container_id`
### Volumes mounted:
- **Persistent mode:** Bind mounts at `~/.hermes/sandboxes/docker/{task_id}/workspace``/workspace` and `.../home``/root`
- **Ephemeral mode:** tmpfs at `/workspace` (10GB), `/home` (1GB), `/root` (1GB)
- **User volumes:** From `config.yaml docker_volumes` (arbitrary `-v` mounts)
- **Security tmpfs:** `/tmp` (512MB), `/var/tmp` (256MB), `/run` (64MB)
### Direct host access for persistent mode:
- If persistent, files at `/workspace/foo.txt` are just `~/.hermes/sandboxes/docker/{task_id}/workspace/foo.txt` on host — no transfer needed!
## 4. tools/environments/ssh.py — SSH Connection Management
### Connection management:
- Uses SSH ControlMaster for persistent connection
- Control socket at `/tmp/hermes-ssh/{user}@{host}:{port}.sock`
- ControlPersist=300 (5 min keepalive)
- BatchMode=yes (non-interactive)
- Stores: `self.host`, `self.user`, `self.port`, `self.key_path`
### SCP/SFTP feasibility:
- **YES**, SCP can piggyback on the ControlMaster socket:
- `scp -o ControlPath={socket} {user}@{host}:{remote} {local}` (download)
- `scp -o ControlPath={socket} {local} {user}@{host}:{remote}` (upload)
- Same SSH key and connection reuse — zero additional auth
- Would be much faster than base64-over-terminal for large files
## 5. tools/environments/modal.py — Modal Sandbox Filesystem
### Filesystem API exposure:
- **Not directly.** The inner `SwerexModalEnvironment` wraps Modal's sandbox
- The sandbox object is accessible at: `env._inner.deployment._sandbox`
- Modal's Python SDK exposes `sandbox.open()` for file I/O — but only via async API
- Currently only used for `snapshot_filesystem()` during cleanup
- **Could use:** `sandbox.open(path, "rb")` to read files or `sandbox.open(path, "wb")` to write
- **Alternative:** Base64-over-terminal already works via `execute()` — simpler, no SDK dependency
## 6. gateway/platforms/base.py — MEDIA: Tag Flow (Complete)
### extract_media() (lines 587-620):
- **Pattern:** `MEDIA:\S+` — extracts file paths after MEDIA: prefix
- **Voice flag:** `[[audio_as_voice]]` global directive sets `is_voice=True` for all media in message
- Returns `List[Tuple[str, bool]]` (path, is_voice) and cleaned content
### _process_message_background() media routing (lines 752-786):
- After extracting MEDIA tags, routes by file extension:
- `.ogg .opus .mp3 .wav .m4a``send_voice()`
- `.mp4 .mov .avi .mkv .3gp``send_video()`
- `.jpg .jpeg .png .webp .gif``send_image_file()`
- **Everything else** → `send_document()`
- This routing already supports arbitrary files!
### send_* method inventory (base class):
- `send(chat_id, content, reply_to, metadata)` — ABSTRACT, text
- `send_image(chat_id, image_url, caption, reply_to)` — URL-based images
- `send_animation(chat_id, animation_url, caption, reply_to)` — GIF animations
- `send_voice(chat_id, audio_path, caption, reply_to)` — voice messages
- `send_video(chat_id, video_path, caption, reply_to)` — video files
- `send_document(chat_id, file_path, caption, file_name, reply_to)` — generic files
- `send_image_file(chat_id, image_path, caption, reply_to)` — local image files
- `send_typing(chat_id)` — typing indicator
- `edit_message(chat_id, message_id, content)` — edit sent messages
### What's missing:
- **Telegram:** No override for `send_document` or `send_image_file` — falls back to text!
- **Discord:** No override for `send_document` — falls back to text!
- **WhatsApp:** Has `send_document` and `send_image_file` via bridge — COMPLETE.
- The base class defaults just send "📎 File: /path" as text — useless for actual file delivery.
## 7. gateway/platforms/telegram.py — Send Method Analysis
### Implemented send methods:
- `send()` — MarkdownV2 text with fallback to plain
- `send_voice()``.ogg`/`.opus` as `send_voice()`, others as `send_audio()`
- `send_image()` — URL-based via `send_photo()`
- `send_animation()` — GIF via `send_animation()`
- `send_typing()` — "typing" chat action
- `edit_message()` — edit text messages
### MISSING:
- **`send_document()` NOT overridden** — Need to add `self._bot.send_document(chat_id, document=open(file_path, 'rb'), ...)`
- **`send_image_file()` NOT overridden** — Need to add `self._bot.send_photo(chat_id, photo=open(path, 'rb'), ...)`
- **`send_video()` NOT overridden** — Need to add `self._bot.send_video(...)`
## 8. gateway/platforms/discord.py — Send Method Analysis
### Implemented send methods:
- `send()` — text messages with chunking
- `send_voice()` — discord.File attachment
- `send_image()` — downloads URL, creates discord.File attachment
- `send_typing()` — channel.typing()
- `edit_message()` — edit text messages
### MISSING:
- **`send_document()` NOT overridden** — Need to add discord.File attachment
- **`send_image_file()` NOT overridden** — Need to add discord.File from local path
- **`send_video()` NOT overridden** — Need to add discord.File attachment
## 9. gateway/run.py — User File Attachment Handling
### Current attachment flow:
1. **Telegram photos** (line 509-529): Download via `photo.get_file()``cache_image_from_bytes()` → vision auto-analysis
2. **Telegram voice** (line 532-541): Download → `cache_audio_from_bytes()` → STT transcription
3. **Telegram audio** (line 542-551): Same pattern
4. **Telegram documents** (line 553-617): Extension validation against `SUPPORTED_DOCUMENT_TYPES`, 20MB limit, content injection for text files
5. **Discord attachments** (line 717-751): Content-type detection, image/audio caching, URL fallback for other types
6. **Gateway run.py** (lines 818-883): Auto-analyzes images with vision, transcribes audio, enriches document messages with context notes
### Key insight: Files are always cached to host filesystem first, then processed. The agent sees local file paths.
## 10. tools/terminal_tool.py — Terminal Tool & Environment Interaction
### How it manages environments:
- Global dict `_active_environments: Dict[str, Any]` keyed by task_id
- Per-task creation locks prevent duplicate sandbox creation
- Auto-cleanup thread kills idle environments after `TERMINAL_LIFETIME_SECONDS`
- `_get_env_config()` reads all TERMINAL_* env vars for backend selection
- `_create_environment()` factory creates the right backend type
### Could send_file piggyback?
- **YES.** send_file needs access to the same environment to extract files from sandboxes.
- It can reuse `_active_environments[task_id]` to get the environment, then:
- Docker: Use `docker cp` via `env._container_id`
- SSH: Use `scp` via `env.control_socket`
- Local: Just read the file directly
- Modal: Use base64-over-terminal via `env.execute()`
- The file_tools.py module already does this with `ShellFileOperations` — read_file/write_file/search/patch all share the same env instance.
## 11. tools/tts_tool.py — Working Example of File Delivery
### Flow:
1. Generate audio file to `~/.hermes/audio_cache/tts_TIMESTAMP.{ogg,mp3}`
2. Return JSON with `media_tag: "MEDIA:/path/to/file"`
3. For Telegram voice: prepend `[[audio_as_voice]]` directive
4. The LLM includes the MEDIA tag in its response text
5. `BasePlatformAdapter._process_message_background()` calls `extract_media()` to find the tag
6. Routes by extension → `send_voice()` for audio files
7. Platform adapter sends the file natively
### Key pattern: Tool saves file to host → returns MEDIA: path → LLM echoes it → gateway extracts → platform delivers
## 12. tools/image_generation_tool.py — Working Example of Image Delivery
### Flow:
1. Call FAL.ai API → get image URL
2. Return JSON with `image: "https://fal.media/..."` URL
3. The LLM includes the URL in markdown: `![description](URL)`
4. `BasePlatformAdapter.extract_images()` finds `![alt](url)` patterns
5. Routes through `send_image()` (URL) or `send_animation()` (GIF)
6. Platform downloads and sends natively
### Key difference from TTS: Images are URL-based, not local files. The gateway downloads at send time.
---
# INTEGRATION MAP: Where send_file Hooks In
## Architecture Decision: MEDIA: Tag Protocol vs. New Tool
The MEDIA: tag protocol is already the established pattern for file delivery. Two options:
### Option A: Pure MEDIA: Tag (Minimal Change)
- No new tool needed
- Agent downloads file from sandbox to host using terminal (base64)
- Saves to known location (e.g., `~/.hermes/file_cache/`)
- Includes `MEDIA:/path` in response text
- Existing routing in `_process_message_background()` handles delivery
- **Problem:** Agent has to manually do base64 dance + know about MEDIA: convention
### Option B: Dedicated send_file Tool (Recommended)
- New tool that the agent calls with `(file_path, caption?)`
- Tool handles the sandbox → host extraction automatically
- Returns MEDIA: tag that gets routed through existing pipeline
- Much cleaner agent experience
## Implementation Plan for Option B
### Files to CREATE:
1. **`tools/send_file_tool.py`** — The new tool
- Accepts: `file_path` (path in sandbox), `caption` (optional)
- Detects environment backend from `_active_environments`
- Extracts file from sandbox:
- **local:** `shutil.copy()` or direct path
- **docker:** `docker cp {container_id}:{path} {local_cache}/`
- **ssh:** `scp -o ControlPath=... {user}@{host}:{path} {local_cache}/`
- **modal:** base64-over-terminal via `env.execute("base64 {path}")`
- Saves to `~/.hermes/file_cache/{uuid}_{filename}`
- Returns: `MEDIA:/cached/path` in response for gateway to pick up
- Register with `registry.register(name="send_file", toolset="file", ...)`
### Files to MODIFY:
2. **`gateway/platforms/telegram.py`** — Add missing send methods:
```python
async def send_document(self, chat_id, file_path, caption=None, file_name=None, reply_to=None):
with open(file_path, "rb") as f:
msg = await self._bot.send_document(
chat_id=int(chat_id), document=f,
caption=caption, filename=file_name or os.path.basename(file_path))
return SendResult(success=True, message_id=str(msg.message_id))
async def send_image_file(self, chat_id, image_path, caption=None, reply_to=None):
with open(image_path, "rb") as f:
msg = await self._bot.send_photo(chat_id=int(chat_id), photo=f, caption=caption)
return SendResult(success=True, message_id=str(msg.message_id))
async def send_video(self, chat_id, video_path, caption=None, reply_to=None):
with open(video_path, "rb") as f:
msg = await self._bot.send_video(chat_id=int(chat_id), video=f, caption=caption)
return SendResult(success=True, message_id=str(msg.message_id))
```
3. **`gateway/platforms/discord.py`** — Add missing send methods:
```python
async def send_document(self, chat_id, file_path, caption=None, file_name=None, reply_to=None):
channel = self._client.get_channel(int(chat_id)) or await self._client.fetch_channel(int(chat_id))
with open(file_path, "rb") as f:
file = discord.File(io.BytesIO(f.read()), filename=file_name or os.path.basename(file_path))
msg = await channel.send(content=caption, file=file)
return SendResult(success=True, message_id=str(msg.id))
async def send_image_file(self, chat_id, image_path, caption=None, reply_to=None):
# Same pattern as send_document with image filename
async def send_video(self, chat_id, video_path, caption=None, reply_to=None):
# Same pattern, discord renders video attachments inline
```
4. **`toolsets.py`** — Add `"send_file"` to `_HERMES_CORE_TOOLS` list
5. **`agent/prompt_builder.py`** — Update platform hints to mention send_file tool
### Code that can be REUSED (zero rewrite):
- `BasePlatformAdapter.extract_media()` — Already extracts MEDIA: tags
- `BasePlatformAdapter._process_message_background()` — Already routes by extension
- `ToolContext.download_file()` — Base64-over-terminal extraction pattern
- `tools/terminal_tool.py` _active_environments dict — Environment access
- `tools/registry.py` — Tool registration infrastructure
- `gateway/platforms/base.py` send_document/send_image_file/send_video signatures — Already defined
### Code that needs to be WRITTEN from scratch:
1. `tools/send_file_tool.py` (~150 lines):
- File extraction from each environment backend type
- Local file cache management
- Registry registration
2. Telegram `send_document` + `send_image_file` + `send_video` overrides (~40 lines)
3. Discord `send_document` + `send_image_file` + `send_video` overrides (~50 lines)
### Total effort: ~240 lines of new code, ~5 lines of config changes
## Key Environment-Specific Extract Strategies
| Backend | Extract Method | Speed | Complexity |
|------------|-------------------------------|----------|------------|
| local | shutil.copy / direct path | Instant | None |
| docker | `docker cp container:path .` | Fast | Low |
| docker+vol | Direct host path access | Instant | None |
| ssh | `scp -o ControlPath=...` | Fast | Low |
| modal | base64-over-terminal | Moderate | Medium |
| singularity| Direct path (overlay mount) | Fast | Low |
## Data Flow Summary
```
Agent calls send_file(file_path="/workspace/output.pdf", caption="Here's the report")
send_file_tool.py:
1. Get environment from _active_environments[task_id]
2. Detect backend type (docker/ssh/modal/local)
3. Extract file to ~/.hermes/file_cache/{uuid}_{filename}
4. Return: '{"success": true, "media_tag": "MEDIA:/home/user/.hermes/file_cache/abc123_output.pdf"}'
LLM includes MEDIA: tag in its response text
BasePlatformAdapter._process_message_background():
1. extract_media(response) → finds MEDIA:/path
2. Checks extension: .pdf → send_document()
3. Calls platform-specific send_document(chat_id, file_path, caption)
TelegramAdapter.send_document() / DiscordAdapter.send_document():
Opens file, sends via platform API as native document attachment
User receives downloadable file in chat
```
+163
View File
@@ -0,0 +1,163 @@
# Tools
Tools are functions that extend the agent's capabilities. Each tool is defined with an OpenAI-compatible JSON schema and an async handler function.
## Tool Structure
Each tool module in `tools/` exports:
1. **Schema definitions** - OpenAI function-calling format
2. **Handler functions** - Async functions that execute the tool
```python
# Example: tools/web_tools.py
# Schema definition
WEB_SEARCH_SCHEMA = {
"type": "function",
"function": {
"name": "web_search",
"description": "Search the web for information",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"}
},
"required": ["query"]
}
}
}
# Handler function
async def web_search(query: str) -> dict:
"""Execute web search and return results."""
# Implementation...
return {"results": [...]}
```
## Tool Categories
| Category | Module | Tools |
|----------|--------|-------|
| **Web** | `web_tools.py` | `web_search`, `web_extract`, `web_crawl` |
| **Terminal** | `terminal_tool.py` | `terminal` (local/docker/singularity/modal/ssh backends) |
| **File** | `file_tools.py` | `read_file`, `write_file`, `patch`, `search` |
| **Browser** | `browser_tool.py` | `browser_navigate`, `browser_click`, `browser_type`, etc. |
| **Vision** | `vision_tools.py` | `vision_analyze` |
| **Image Gen** | `image_generation_tool.py` | `image_generate` |
| **TTS** | `tts_tool.py` | `text_to_speech` (Edge TTS free / ElevenLabs / OpenAI) |
| **Reasoning** | `mixture_of_agents_tool.py` | `mixture_of_agents` |
| **Skills** | `skills_tool.py` | `skills_list`, `skill_view` |
| **Cronjob** | `cronjob_tools.py` | `schedule_cronjob`, `list_cronjobs`, `remove_cronjob` |
| **RL Training** | `rl_training_tool.py` | `rl_list_environments`, `rl_start_training`, `rl_check_status`, etc. |
## Tool Registration
Tools are registered in `model_tools.py`:
```python
# model_tools.py
TOOL_SCHEMAS = [
*WEB_TOOL_SCHEMAS,
*TERMINAL_TOOL_SCHEMAS,
*BROWSER_TOOL_SCHEMAS,
# ...
]
TOOL_HANDLERS = {
"web_search": web_search,
"terminal": terminal_tool,
"browser_navigate": browser_navigate,
# ...
}
```
## Toolsets
Tools are grouped into **toolsets** for logical organization (see `toolsets.py`):
```python
TOOLSETS = {
"web": {
"description": "Web search and content extraction",
"tools": ["web_search", "web_extract", "web_crawl"]
},
"terminal": {
"description": "Command execution",
"tools": ["terminal"]
},
# ...
}
```
## Adding a New Tool
1. Create handler function in `tools/your_tool.py`
2. Define JSON schema following OpenAI format
3. Register in `model_tools.py` (schemas and handlers)
4. Add to appropriate toolset in `toolsets.py`
5. Update `tools/__init__.py` exports
## Stateful Tools
Some tools maintain state across calls within a session:
- **Terminal**: Keeps container/sandbox running between commands
- **Browser**: Maintains browser session for multi-step navigation
State is managed per `task_id` and cleaned up automatically.
## Terminal Backends
The terminal tool supports multiple execution backends:
| Backend | Description | Use Case |
|---------|-------------|----------|
| `local` | Direct execution on host | Development, simple tasks |
| `ssh` | Remote execution via SSH | Sandboxing (agent can't modify its own code) |
| `docker` | Docker container | Isolation, reproducibility |
| `singularity` | Singularity/Apptainer | HPC clusters, rootless containers |
| `modal` | Modal cloud | Scalable cloud compute, GPUs |
Configure via environment variables or `cli-config.yaml`:
```yaml
# SSH backend example (in cli-config.yaml)
terminal:
env_type: "ssh"
ssh_host: "my-server.example.com"
ssh_user: "myuser"
ssh_key: "~/.ssh/id_rsa"
cwd: "/home/myuser/project"
```
The SSH backend uses ControlMaster for connection persistence, making subsequent commands fast.
## Skills Tools (Progressive Disclosure)
Skills are on-demand knowledge documents. They use **progressive disclosure** to minimize tokens:
```
Level 0: skills_categories() → ["mlops", "devops"] (~50 tokens)
Level 1: skills_list(category) → [{name, description}, ...] (~3k tokens)
Level 2: skill_view(name) → Full content + metadata (varies)
Level 3: skill_view(name, path) → Specific reference file (varies)
```
Skill directory structure:
```
skills/
└── mlops/
└── axolotl/
├── SKILL.md # Main instructions (required)
├── references/ # Additional docs
└── templates/ # Output formats, configs
```
SKILL.md uses YAML frontmatter:
```yaml
---
name: axolotl
description: Fine-tuning LLMs with Axolotl
tags: [Fine-Tuning, LoRA, DPO]
---
```
+4 -4
View File
@@ -40,8 +40,8 @@ This directory contains the integration layer between **hermes-agent's** tool-ca
- `evaluate_log()` for saving eval results to JSON + samples.jsonl
**HermesAgentBaseEnv** (`hermes_base_env.py`) extends BaseEnv with hermes-agent specifics:
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, daytona, ssh, singularity)
- Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` which queries `tools/registry.py`)
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, ssh, singularity)
- Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` from `model_tools.py`)
- Implements `collect_trajectory()` which runs the full agent loop and computes rewards
- Supports two-phase operation (Phase 1: OpenAI server, Phase 2: VLLM ManagedServer)
- Applies monkey patches for async-safe tool operation at import time
@@ -60,7 +60,7 @@ Concrete environments inherit from `HermesAgentBaseEnv` and implement:
`HermesAgentLoop` is the reusable multi-turn agent engine. It runs the same pattern as hermes-agent's `run_agent.py`:
1. Send messages + tools to the API via `server.chat_completion()`
2. If the response contains `tool_calls`, execute each one via `handle_function_call()` (which delegates to `tools/registry.py`'s `dispatch()`)
2. If the response contains `tool_calls`, execute each one via `handle_function_call()` from `model_tools.py`
3. Append tool results to the conversation and go back to step 1
4. If the response has no tool_calls, the agent is done
@@ -324,7 +324,7 @@ For eval benchmarks, follow the pattern in `terminalbench2_env.py`:
| `distribution` | Probabilistic toolset distribution name | `None` |
| `max_agent_turns` | Max LLM calls per rollout | `30` |
| `agent_temperature` | Sampling temperature | `1.0` |
| `terminal_backend` | `local`, `docker`, `modal`, `daytona`, `ssh`, `singularity` | `local` |
| `terminal_backend` | `local`, `docker`, `modal`, `ssh`, `singularity` | `local` |
| `system_prompt` | System message for the agent | `None` |
| `tool_call_parser` | Parser name for Phase 2 | `hermes` |
| `eval_handling` | `STOP_TRAIN`, `LIMIT_TRAIN`, `NONE` | `STOP_TRAIN` |
+1
View File
@@ -13,6 +13,7 @@ Core layers:
Concrete environments:
- terminal_test_env/: Simple file-creation tasks for testing the stack
- hermes_swe_env/: SWE-bench style tasks with Modal sandboxes
- endless_terminals/: Terminal tasks from HuggingFace dataset with Apptainer containers
Benchmarks (eval-only):
- benchmarks/terminalbench_2/: Terminal-Bench 2.0 evaluation
+12 -44
View File
@@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional, Set
from model_tools import handle_function_call
# Thread pool for running sync tool calls that internally use asyncio.run()
# (e.g., mini-swe-agent's modal/docker/daytona backends). Running them in a separate
# (e.g., mini-swe-agent's modal/docker backends). Running them in a separate
# thread gives them a clean event loop so they don't deadlock inside Atropos's loop.
# Size must be large enough for concurrent eval tasks (e.g., 89 TB2 tasks all
# making tool calls). Too small = thread pool starvation, tasks queue for minutes.
@@ -176,19 +176,6 @@ class HermesAgentLoop:
reasoning_per_turn = []
tool_errors: List[ToolError] = []
# Per-loop TodoStore for the todo tool (ephemeral, dies with the loop)
from tools.todo_tool import TodoStore, todo_tool as _todo_tool
_todo_store = TodoStore()
# Extract user task from first user message for browser_snapshot context
_user_task = None
for msg in messages:
if msg.get("role") == "user":
content = msg.get("content", "")
if isinstance(content, str) and content.strip():
_user_task = content.strip()[:500] # Cap to avoid huge strings
break
import time as _time
for turn in range(self.max_turns):
@@ -318,37 +305,18 @@ class HermesAgentLoop:
"[%s] $ %s", self.task_id[:8], cmd_preview,
)
# Run tool calls in a thread pool so backends that use
# asyncio.run() internally (modal, docker) get a clean
# event loop instead of deadlocking inside Atropos's loop.
tool_submit_time = _time.monotonic()
# Todo tool -- handle locally (needs per-loop TodoStore)
if tool_name == "todo":
tool_result = _todo_tool(
todos=args.get("todos"),
merge=args.get("merge", False),
store=_todo_store,
)
tool_elapsed = _time.monotonic() - tool_submit_time
elif tool_name == "memory":
tool_result = json.dumps({"error": "Memory is not available in RL environments."})
tool_elapsed = _time.monotonic() - tool_submit_time
elif tool_name == "session_search":
tool_result = json.dumps({"error": "Session search is not available in RL environments."})
tool_elapsed = _time.monotonic() - tool_submit_time
else:
# Run tool calls in a thread pool so backends that
# use asyncio.run() internally (modal, docker, daytona) get
# a clean event loop instead of deadlocking.
loop = asyncio.get_event_loop()
# Capture current tool_name/args for the lambda
_tn, _ta, _tid = tool_name, args, self.task_id
tool_result = await loop.run_in_executor(
_tool_executor,
lambda: handle_function_call(
_tn, _ta, task_id=_tid,
user_task=_user_task,
),
)
tool_elapsed = _time.monotonic() - tool_submit_time
loop = asyncio.get_event_loop()
tool_result = await loop.run_in_executor(
_tool_executor,
lambda: handle_function_call(
tool_name, args, task_id=self.task_id
),
)
tool_elapsed = _time.monotonic() - tool_submit_time
# Log slow tools and thread pool stats for debugging
pool_active = _tool_executor._work_queue.qsize()
-73
View File
@@ -1,73 +0,0 @@
# OpenThoughts-TBLite Evaluation Environment
This environment evaluates terminal agents on the [OpenThoughts-TBLite](https://huggingface.co/datasets/open-thoughts/OpenThoughts-TBLite) benchmark, a difficulty-calibrated subset of [Terminal-Bench 2.0](https://www.tbench.ai/leaderboard/terminal-bench/2.0).
## Source
OpenThoughts-TBLite was created by the [OpenThoughts](https://www.openthoughts.ai/) Agent team in collaboration with [Snorkel AI](https://snorkel.ai/) and [Bespoke Labs](https://bespokelabs.ai/). The original dataset and documentation live at:
- **Dataset (source):** [open-thoughts/OpenThoughts-TBLite](https://huggingface.co/datasets/open-thoughts/OpenThoughts-TBLite)
- **GitHub:** [open-thoughts/OpenThoughts-TBLite](https://github.com/open-thoughts/OpenThoughts-TBLite)
- **Blog post:** [openthoughts.ai/blog/openthoughts-tblite](https://www.openthoughts.ai/blog/openthoughts-tblite)
## Our Dataset
We converted the source into the same schema used by our Terminal-Bench 2.0 environment (pre-built Docker Hub images, base64-encoded test tarballs, etc.) and published it as:
- **Dataset (ours):** [NousResearch/openthoughts-tblite](https://huggingface.co/datasets/NousResearch/openthoughts-tblite)
- **Docker images:** `nousresearch/tblite-<task-name>:latest` on Docker Hub (100 images)
The conversion script is at `scripts/prepare_tblite_dataset.py`.
## Why TBLite?
Terminal-Bench 2.0 is one of the strongest frontier evaluations for terminal agents, but when a model scores near the floor (e.g., Qwen 3 8B at <1%), many changes look identical in aggregate score. TBLite addresses this by calibrating task difficulty using Claude Haiku 4.5 as a reference:
| Difficulty | Pass Rate Range | Tasks |
|------------|----------------|-------|
| Easy | >= 70% | 40 |
| Medium | 40-69% | 26 |
| Hard | 10-39% | 26 |
| Extreme | < 10% | 8 |
This gives enough solvable tasks to detect small improvements quickly, while preserving enough hard tasks to avoid saturation. The correlation between TBLite and TB2 scores is **r = 0.911**.
TBLite also runs 2.6-8x faster than the full TB2, making it practical for iteration loops.
## Usage
```bash
# Run the full benchmark
python environments/benchmarks/tblite/tblite_env.py evaluate
# Filter to specific tasks
python environments/benchmarks/tblite/tblite_env.py evaluate \
--env.task_filter "broken-python,pandas-etl"
# Use a different model
python environments/benchmarks/tblite/tblite_env.py evaluate \
--server.model_name "qwen/qwen3-30b"
```
## Architecture
`TBLiteEvalEnv` is a thin subclass of `TerminalBench2EvalEnv`. All evaluation logic (agent loop, Docker sandbox management, test verification, metrics) is inherited. Only the defaults differ:
| Setting | TB2 | TBLite |
|----------------|----------------------------------|-----------------------------------------|
| Dataset | `NousResearch/terminal-bench-2` | `NousResearch/openthoughts-tblite` |
| Tasks | 89 | 100 |
| Task timeout | 1800s (30 min) | 1200s (20 min) |
| Wandb name | `terminal-bench-2` | `openthoughts-tblite` |
## Citation
```bibtex
@software{OpenThoughts-TBLite,
author = {OpenThoughts-Agent team, Snorkel AI, Bespoke Labs},
month = Feb,
title = {{OpenThoughts-TBLite: A High-Signal Benchmark for Iterating on Terminal Agents}},
howpublished = {https://www.openthoughts.ai/blog/openthoughts-tblite},
year = {2026}
}
```
@@ -1,39 +0,0 @@
# OpenThoughts-TBLite Evaluation -- Default Configuration
#
# Eval-only environment for the TBLite benchmark (100 difficulty-calibrated
# terminal tasks, a faster proxy for Terminal-Bench 2.0).
# Uses Modal terminal backend for per-task cloud-isolated sandboxes
# and OpenRouter for inference.
#
# Usage:
# python environments/benchmarks/tblite/tblite_env.py evaluate \
# --config environments/benchmarks/tblite/default.yaml
#
# # Override model:
# python environments/benchmarks/tblite/tblite_env.py evaluate \
# --config environments/benchmarks/tblite/default.yaml \
# --openai.model_name anthropic/claude-sonnet-4
env:
enabled_toolsets: ["terminal", "file"]
max_agent_turns: 60
max_token_length: 32000
agent_temperature: 0.8
terminal_backend: "modal"
terminal_timeout: 300 # 5 min per command (builds, pip install)
tool_pool_size: 128 # thread pool for 100 parallel tasks
dataset_name: "NousResearch/openthoughts-tblite"
test_timeout: 600
task_timeout: 1200 # 20 min wall-clock per task (TBLite tasks are faster)
tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B"
use_wandb: true
wandb_name: "openthoughts-tblite"
ensure_scores_are_not_same: false
data_dir_to_save_evals: "environments/benchmarks/evals/openthoughts-tblite"
openai:
base_url: "https://openrouter.ai/api/v1"
model_name: "anthropic/claude-opus-4.6"
server_type: "openai"
health_check: false
# api_key loaded from OPENROUTER_API_KEY in .env
@@ -1,42 +0,0 @@
#!/bin/bash
# OpenThoughts-TBLite Evaluation
#
# Run from repo root:
# bash environments/benchmarks/tblite/run_eval.sh
#
# Override model:
# bash environments/benchmarks/tblite/run_eval.sh \
# --openai.model_name anthropic/claude-sonnet-4
#
# Run a subset:
# bash environments/benchmarks/tblite/run_eval.sh \
# --env.task_filter broken-python,pandas-etl
#
# All terminal settings (backend, timeout, lifetime, pool size) are
# configured via env config fields -- no env vars needed.
set -euo pipefail
mkdir -p logs evals/openthoughts-tblite
LOG_FILE="logs/tblite_$(date +%Y%m%d_%H%M%S).log"
echo "OpenThoughts-TBLite Evaluation"
echo "Log file: $LOG_FILE"
echo ""
# Unbuffered python output so logs are written in real-time
export PYTHONUNBUFFERED=1
# Show INFO-level agent loop timing (api/tool durations per turn)
# These go to the log file; tqdm + [START]/[PASS]/[FAIL] go to terminal
export LOGLEVEL=INFO
python tblite_env.py evaluate \
--config default.yaml \
"$@" \
2>&1 | tee "$LOG_FILE"
echo ""
echo "Log saved to: $LOG_FILE"
echo "Eval results: evals/openthoughts-tblite/"
@@ -1,119 +0,0 @@
"""
OpenThoughts-TBLite Evaluation Environment
A lighter, faster alternative to Terminal-Bench 2.0 for iterating on terminal
agents. Uses the same evaluation logic as TerminalBench2EvalEnv but defaults
to the NousResearch/openthoughts-tblite dataset (100 difficulty-calibrated
tasks vs TB2's 89 harder tasks).
TBLite tasks are a curated subset of TB2 with a difficulty distribution
designed to give meaningful signal even for smaller models:
- Easy (40 tasks): >= 70% pass rate with Claude Haiku 4.5
- Medium (26 tasks): 40-69% pass rate
- Hard (26 tasks): 10-39% pass rate
- Extreme (8 tasks): < 10% pass rate
Usage:
python environments/benchmarks/tblite/tblite_env.py evaluate
# Filter to specific tasks:
python environments/benchmarks/tblite/tblite_env.py evaluate \\
--env.task_filter "broken-python,pandas-etl"
"""
import os
import sys
from pathlib import Path
from typing import List, Tuple
_repo_root = Path(__file__).resolve().parent.parent.parent.parent
if str(_repo_root) not in sys.path:
sys.path.insert(0, str(_repo_root))
from pydantic import Field
from atroposlib.envs.base import EvalHandlingEnum
from atroposlib.envs.server_handling.server_manager import APIServerConfig
from environments.benchmarks.terminalbench_2.terminalbench2_env import (
TerminalBench2EvalConfig,
TerminalBench2EvalEnv,
)
class TBLiteEvalConfig(TerminalBench2EvalConfig):
"""Configuration for the OpenThoughts-TBLite evaluation environment.
Inherits all TB2 config fields. Only the dataset default and task timeout
differ -- TBLite tasks are calibrated to be faster.
"""
dataset_name: str = Field(
default="NousResearch/openthoughts-tblite",
description="HuggingFace dataset containing TBLite tasks.",
)
task_timeout: int = Field(
default=1200,
description="Maximum wall-clock seconds per task. TBLite tasks are "
"generally faster than TB2, so 20 minutes is usually sufficient.",
)
class TBLiteEvalEnv(TerminalBench2EvalEnv):
"""OpenThoughts-TBLite evaluation environment.
Inherits all evaluation logic from TerminalBench2EvalEnv (agent loop,
test verification, Docker image resolution, metrics, wandb logging).
Only the default configuration differs.
"""
name = "openthoughts-tblite"
env_config_cls = TBLiteEvalConfig
@classmethod
def config_init(cls) -> Tuple[TBLiteEvalConfig, List[APIServerConfig]]:
env_config = TBLiteEvalConfig(
enabled_toolsets=["terminal", "file"],
disabled_toolsets=None,
distribution=None,
max_agent_turns=60,
max_token_length=16000,
agent_temperature=0.6,
system_prompt=None,
terminal_backend="modal",
terminal_timeout=300,
test_timeout=180,
# 100 tasks in parallel
tool_pool_size=128,
eval_handling=EvalHandlingEnum.STOP_TRAIN,
group_size=1,
steps_per_eval=1,
total_steps=1,
tokenizer_name="NousResearch/Hermes-3-Llama-3.1-8B",
use_wandb=True,
wandb_name="openthoughts-tblite",
ensure_scores_are_not_same=False,
)
server_configs = [
APIServerConfig(
base_url="https://openrouter.ai/api/v1",
model_name="anthropic/claude-sonnet-4",
server_type="openai",
api_key=os.getenv("OPENROUTER_API_KEY", ""),
health_check=False,
)
]
return env_config, server_configs
if __name__ == "__main__":
TBLiteEvalEnv.cli()
@@ -12,31 +12,21 @@
# Run a subset:
# bash environments/benchmarks/terminalbench_2/run_eval.sh \
# --env.task_filter fix-git,git-multibranch
#
# All terminal settings (backend, timeout, lifetime, pool size) are
# configured via env config fields -- no env vars needed.
set -euo pipefail
mkdir -p logs evals/terminal-bench-2
LOG_FILE="logs/terminalbench2_$(date +%Y%m%d_%H%M%S).log"
echo "Terminal-Bench 2.0 Evaluation"
echo "Log file: $LOG_FILE"
echo "Log: $LOG_FILE"
echo ""
# Unbuffered python output so logs are written in real-time
export PYTHONUNBUFFERED=1
export TERMINAL_ENV=modal
export TERMINAL_TIMEOUT=300
# Show INFO-level agent loop timing (api/tool durations per turn)
# These go to the log file; tqdm + [START]/[PASS]/[FAIL] go to terminal
export LOGLEVEL=INFO
python terminalbench2_env.py evaluate \
--config default.yaml \
python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \
--config environments/benchmarks/terminalbench_2/default.yaml \
"$@" \
2>&1 | tee "$LOG_FILE"
echo ""
echo "Log saved to: $LOG_FILE"
echo "Eval results: evals/terminal-bench-2/"
@@ -0,0 +1,5 @@
"""Endless Terminals Environment - Terminal task training from HuggingFace dataset."""
from .endless_terminals_env import EndlessTerminalsEnv, EndlessTerminalsEnvConfig
__all__ = ["EndlessTerminalsEnv", "EndlessTerminalsEnvConfig"]
@@ -0,0 +1,69 @@
# Endless Terminals Environment -- Default Configuration
#
# Trains agents on terminal tasks from the Endless Terminals HuggingFace dataset.
# Uses hermes-agent backends (modal/docker/local) with per-task Docker images.
# Tests run in the same sandbox the agent used (no separate containers needed).
#
# Dataset: https://huggingface.co/datasets/obiwan96/endless-terminals-train
#
# Prerequisites:
# 1. Download dataset: huggingface-cli download obiwan96/endless-terminals-train \
# --repo-type dataset --local-dir ~/endless-terminals-data \
# --local-dir-use-symlinks False
# 2. Set TASKS_BASE_DIR environment variable or configure tasks_base_dir below
# 3. For modal backend: Configure Modal CLI (modal token set)
# 4. For docker backend: Install Docker
#
# Usage:
# python environments/endless_terminals/endless_terminals_env.py process \
# --config environments/endless_terminals/default.yaml
env:
# Toolsets
enabled_toolsets: ["terminal", "file"]
# Agent configuration
max_agent_turns: 32
max_token_length: 4096
agent_temperature: 1.0
# Terminal backend
terminal_backend: "local" # Change to "modal" or "docker" for cloud isolation
# Dataset settings
use_dataset: true
dataset_name: "obiwan96/endless-terminals"
dataset_split: "train"
dataset_cache_dir: "~/.cache/huggingface/datasets"
tasks_base_dir: "" # Set to directory containing task_* folders (e.g., ~/endless-terminals-data)
# Test execution
test_timeout_s: 60
# Training configuration
group_size: 8
total_steps: 10000
steps_per_eval: 500
num_eval_tasks: 10
eval_split_ratio: 0.1
# Logging
use_wandb: true
wandb_name: "endless-terminals"
# System prompt
system_prompt: >
You are a skilled Linux system administrator and programmer.
You have access to a terminal and file tools to complete system administration
and programming tasks. Use the tools effectively to solve the given task,
and verify your solution works correctly before finishing.
openai:
base_url: "https://openrouter.ai/api/v1"
model_name: "anthropic/claude-sonnet-4.5"
server_type: "openai"
api_key: "" # Loaded from OPENROUTER_API_KEY env var
health_check: false
timeout: 30 # 30 second timeout per request
max_retries: 2 # Only retry twice
@@ -0,0 +1,921 @@
"""
Endless Terminals Environment for Hermes-Agent + Atropos RL.
Loads pre-generated terminal tasks from HuggingFace dataset and scores
agent performance using test execution in the agent's sandbox.
Uses hermes-agent backends (modal, docker, local) with per-task Docker images
extracted from container.def files. Tests run in the same sandbox the agent
used, following the Terminal Bench 2 pattern.
Dataset: https://huggingface.co/datasets/obiwan96/endless-terminals-train
Run:
python environments/endless_terminals/endless_terminals_env.py process \
--config environments/endless_terminals/default.yaml
"""
import asyncio
import logging
import os
import random
import re
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from pydantic import Field
# Ensure hermes-agent root is on path
_repo_root = Path(__file__).resolve().parent.parent.parent
if str(_repo_root) not in sys.path:
sys.path.insert(0, str(_repo_root))
from atroposlib.envs.base import ScoredDataGroup, ScoredDataItem
from atroposlib.type_definitions import Item
from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
from environments.agent_loop import AgentResult
from environments.tool_context import ToolContext
from tools.terminal_tool import (
register_task_env_overrides,
clear_task_env_overrides,
cleanup_vm,
)
logger = logging.getLogger(__name__)
# Add endless-terminals to path for imports
ENDLESS_TERMINALS_PATH = os.getenv(
"ENDLESS_TERMINALS_PATH",
str(Path.home() / "Desktop" / "Projects" / "endless-terminals")
)
sys.path.insert(0, ENDLESS_TERMINALS_PATH)
class EndlessTerminalsEnvConfig(HermesAgentEnvConfig):
"""Configuration for Endless Terminals environment."""
# Dataset settings
use_dataset: bool = Field(
default=True,
description="Load tasks from HuggingFace dataset (recommended). If False, generate procedurally."
)
dataset_name: str = Field(
default="obiwan96/endless-terminals-train",
description="HuggingFace dataset name"
)
dataset_split: str = Field(
default="train",
description="Dataset split to use"
)
dataset_cache_dir: str = Field(
default="~/.cache/huggingface/datasets",
description="HuggingFace datasets cache directory"
)
tasks_base_dir: str = Field(
default="",
description="Base directory containing task_* folders. If empty, uses paths from dataset."
)
# Test execution
test_timeout_s: int = Field(default=60, description="Test execution timeout (seconds)")
# Docker image fallback
default_docker_image: str = Field(
default="ubuntu:22.04",
description="Default Docker image if container.def parsing fails"
)
# Agent defaults
max_agent_turns: int = Field(default=32, description="Max turns for agent (increased for long traces)")
# Evaluation settings
num_eval_tasks: int = Field(
default=10,
description="Number of tasks to run during periodic evaluation"
)
eval_split_ratio: float = Field(
default=0.1,
description="Fraction of dataset to hold out for evaluation (0.0-1.0)"
)
class EndlessTerminalsEnv(HermesAgentBaseEnv):
"""
Endless Terminals environment using pre-generated HuggingFace dataset.
Loads terminal tasks from dataset, runs agent with terminal tools,
and scores by executing tests in the agent's sandbox using ToolContext.
"""
name = "endless_terminals_env"
env_config_cls = EndlessTerminalsEnvConfig
@classmethod
def config_init(cls) -> Tuple[EndlessTerminalsEnvConfig, List["APIServerConfig"]]:
"""
Default configuration for Endless Terminals environment.
This is used when no config file is provided, but note that when using
--config, the YAML is loaded differently and this may not be called.
"""
from atroposlib.envs.server_handling.server_manager import APIServerConfig
env_config = EndlessTerminalsEnvConfig(
enabled_toolsets=["terminal", "file"],
max_agent_turns=32,
terminal_backend="local",
use_dataset=True,
tasks_base_dir="",
group_size=1,
total_steps=1,
use_wandb=False,
)
server_configs = [
APIServerConfig(
base_url="https://openrouter.ai/api/v1",
model_name="anthropic/claude-sonnet-4.5",
server_type="openai",
api_key=os.getenv("OPENROUTER_API_KEY", ""),
health_check=False,
)
]
return env_config, server_configs
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._dataset = None
self._train_dataset = None
self._eval_dataset = None
self._dataset_indices = []
self._current_index = 0
# Metrics tracking for wandb - single buffer with dicts
self._metrics_buffer = []
# Debug: check server config
if hasattr(self, 'server') and hasattr(self.server, 'servers'):
for i, srv in enumerate(self.server.servers):
logger.debug(f"Server {i}: model_name={getattr(srv.config, 'model_name', 'NONE')}")
async def setup(self):
"""Load dataset from HuggingFace or local directory."""
if not self.config.use_dataset:
logger.info("Using procedural task generation (not implemented yet)")
return
# If tasks_base_dir is set, load from local directory instead of HuggingFace
if self.config.tasks_base_dir:
tasks_base = Path(os.path.expanduser(self.config.tasks_base_dir))
# Resolve to absolute path if relative
if not tasks_base.is_absolute():
tasks_base = Path.cwd() / tasks_base
tasks_base = tasks_base.resolve()
if not tasks_base.exists():
raise RuntimeError(f"tasks_base_dir not found: {tasks_base}")
logger.info(f"Loading tasks from local directory: {tasks_base}")
# Find all task_* directories
task_dirs = sorted(tasks_base.glob("task_*"))
logger.info(f"Found {len(task_dirs)} task directories")
if not task_dirs:
# Debug: show what's actually in the directory
all_items = list(tasks_base.iterdir())
logger.warning(f"Directory contains {len(all_items)} items:")
for item in all_items[:10]:
logger.warning(f" - {item.name} ({'dir' if item.is_dir() else 'file'})")
raise RuntimeError(f"No task_* directories found in {tasks_base}")
# Create fake dataset items (just the directory paths)
self._dataset = [
{
"description": f"Task from {task_dir.name}",
"extra_info": {"task_dir": str(task_dir)},
}
for task_dir in task_dirs
]
logger.info(f"Loaded {len(self._dataset)} tasks from local directory")
self._split_dataset()
return
# Otherwise, load from HuggingFace
logger.info(f"Loading dataset from HuggingFace: {self.config.dataset_name}")
try:
from datasets import load_dataset
self._dataset = await asyncio.get_event_loop().run_in_executor(
None,
lambda: load_dataset(
self.config.dataset_name,
split=self.config.dataset_split,
cache_dir=os.path.expanduser(self.config.dataset_cache_dir)
)
)
logger.info(f"Loaded {len(self._dataset)} tasks from HuggingFace")
self._split_dataset()
except Exception as e:
logger.error(f"ERROR loading dataset: {e}")
raise
def _split_dataset(self):
"""Split dataset into train and eval sets based on eval_split_ratio."""
if self._dataset is None or len(self._dataset) == 0:
raise RuntimeError("Cannot split empty dataset")
total_size = len(self._dataset)
eval_size = int(total_size * self.config.eval_split_ratio)
train_size = total_size - eval_size
all_indices = list(range(total_size))
random.shuffle(all_indices)
train_indices = all_indices[:train_size]
eval_indices = all_indices[train_size:]
if isinstance(self._dataset, list):
self._train_dataset = [self._dataset[i] for i in train_indices]
self._eval_dataset = [self._dataset[i] for i in eval_indices]
else:
self._train_dataset = self._dataset.select(train_indices)
self._eval_dataset = self._dataset.select(eval_indices)
self._dataset_indices = list(range(len(self._train_dataset)))
random.shuffle(self._dataset_indices)
self._current_index = 0
logger.info(
f"Split dataset: {len(self._train_dataset)} train, "
f"{len(self._eval_dataset)} eval "
f"(ratio={self.config.eval_split_ratio:.1%})"
)
async def get_next_item(self) -> Item:
"""Sample next task from training dataset."""
if self._train_dataset is None:
raise RuntimeError("Dataset not loaded. Call setup() first.")
# Get next task (with wraparound)
idx = self._dataset_indices[self._current_index]
task = self._train_dataset[idx]
# Advance to next task
self._current_index += 1
if self._current_index >= len(self._dataset_indices):
# Reshuffle for next epoch
random.shuffle(self._dataset_indices)
self._current_index = 0
logger.info("Reshuffled dataset (completed one epoch)")
# Extract task directory path
task_dir = task.get("extra_info", {}).get("task_dir")
if not task_dir:
task_dir = task.get("reward_spec", {}).get("ground_truth")
# Resolve task directory path
if task_dir:
task_dir_path = Path(task_dir)
# If tasks_base_dir is configured and path doesn't exist, reconstruct it
if self.config.tasks_base_dir and not task_dir_path.exists():
original_path = Path(task_dir)
task_name = original_path.name
task_dir_path = Path(os.path.expanduser(self.config.tasks_base_dir)) / task_name
else:
logger.error("No task directory path found in dataset item")
return await self.get_next_item()
# Verify directory exists
if not task_dir_path.exists():
logger.warning(f"Task dir not found: {task_dir_path}")
logger.warning("Hint: Set tasks_base_dir to directory containing task_* folders")
return await self.get_next_item() # Try next task
# Look for test file in tests/ subdirectory first, then at root
final_test = task_dir_path / "tests" / "test_final_state.py"
if not final_test.exists():
final_test = task_dir_path / "test_final_state.py"
# Verify test file exists
if not final_test.exists():
logger.warning(f"Missing test file in {task_dir_path} (checked tests/ and root)")
return await self.get_next_item()
# Parse container.def to extract Docker image
# Check environment/ subdirectory first, then root
container_def = task_dir_path / "environment" / "container.def"
if not container_def.exists():
container_def = task_dir_path / "container.def"
docker_image = self._parse_docker_image_from_def(container_def)
# Try to load description from instruction.md or task.json
description = task.get("description", "")
# First try instruction.md
instruction_md = task_dir_path / "instruction.md"
if not description and instruction_md.exists():
try:
description = instruction_md.read_text().strip()
except Exception as e:
logger.warning(f"Failed to load instruction.md for {task_dir_path.name}: {e}")
# Fallback to task.json in environment/
if not description:
task_json = task_dir_path / "environment" / "task.json"
if task_json.exists():
try:
import json
task_data = json.loads(task_json.read_text())
description = task_data.get("description", "") or task_data.get("instruction", "")
except Exception as e:
logger.warning(f"Failed to load task.json for {task_dir_path.name}: {e}")
if not description:
description = f"Complete the task in {task_dir_path.name}"
return {
"task_id": f"{task_dir_path.name}",
"task_name": task_dir_path.name,
"description": description,
"task_dir": str(task_dir_path),
"final_test": str(final_test),
"docker_image": docker_image,
"dataset_index": idx,
}
def format_prompt(self, item: Item) -> str:
"""Return the task description for the agent."""
return str(item.get("description", ""))
def _parse_docker_image_from_def(self, container_def_path: Path) -> str:
"""
Parse container.def file to extract the Docker base image.
Apptainer definition files typically look like:
Bootstrap: docker
From: ubuntu:22.04
Returns the image from the "From:" line, or falls back to default.
"""
if not container_def_path.exists():
logger.warning(f"container.def not found at {container_def_path}, using default image")
return self.config.default_docker_image
try:
content = container_def_path.read_text()
# Look for "From: <image>" line (case-insensitive)
match = re.search(r'^From:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE)
if match:
image = match.group(1).strip()
logger.info(f"Extracted Docker image from container.def: {image}")
return image
except Exception as e:
logger.warning(f"Failed to parse {container_def_path}: {e}")
logger.warning(f"Could not extract image from {container_def_path}, using default")
return self.config.default_docker_image
async def collect_trajectory(
self, item: Item
) -> Tuple[Optional[ScoredDataItem], List[Item]]:
"""
Override to register per-task Docker image before running the agent.
Follows Terminal Bench 2 pattern: register_task_env_overrides() tells
the hermes-agent terminal backend to use a specific Docker image for
this task_id.
This is a copy of HermesAgentBaseEnv.collect_trajectory with Docker
image registration added after task_id generation.
"""
import uuid
from environments.agent_loop import HermesAgentLoop
task_id = str(uuid.uuid4())
task_name = item.get("task_name", "unknown")
docker_image = item.get("docker_image", self.config.default_docker_image)
logger.debug(f"collect_trajectory START for {task_name}")
# Register Docker image override for this task_id
logger.debug(f"Registering Docker image: {docker_image}")
register_task_env_overrides(task_id, {"modal_image": docker_image})
logger.info(
f"Task {task_name}: registered Docker image {docker_image} for task_id {task_id[:8]}"
)
logger.debug("Docker image registered")
try:
# Get group-level tools (resolved once in collect_trajectories)
logger.debug("Resolving tools...")
if self._current_group_tools is None:
tools, valid_names = self._resolve_tools_for_group()
else:
tools, valid_names = self._current_group_tools
logger.debug(f"Tools resolved: {len(tools)} tools")
# Build initial messages
logger.debug("Building initial messages...")
messages: List[Dict[str, Any]] = []
if self.config.system_prompt:
messages.append({"role": "system", "content": self.config.system_prompt})
messages.append({"role": "user", "content": self.format_prompt(item)})
logger.debug("Messages built, starting agent loop...")
# Run the agent loop
result: AgentResult
managed_state: Optional[Dict[str, Any]] = None
if self._use_managed_server():
# Phase 2: ManagedServer with parser
from environments.tool_call_parsers import get_parser
try:
tc_parser = get_parser(self.config.tool_call_parser)
except KeyError:
logger.warning(
"Tool call parser '%s' not found, falling back to 'hermes'",
self.config.tool_call_parser,
)
tc_parser = get_parser("hermes")
try:
async with self.server.managed_server(
tokenizer=self.tokenizer,
tool_call_parser=tc_parser,
) as managed:
agent = HermesAgentLoop(
server=managed,
tool_schemas=tools,
valid_tool_names=valid_names,
max_turns=self.config.max_agent_turns,
task_id=task_id,
temperature=self.config.agent_temperature,
max_tokens=self.config.max_token_length,
extra_body=self.config.extra_body,
)
result = await agent.run(messages)
# Get state directly from managed server while still in context
managed_state = managed.get_state()
except NotImplementedError:
# DummyManagedServer not allowed
logger.warning("ManagedServer not available. Falling back to direct server mode.")
agent = HermesAgentLoop(
server=self.server,
tool_schemas=tools,
valid_tool_names=valid_names,
max_turns=self.config.max_agent_turns,
task_id=task_id,
temperature=self.config.agent_temperature,
max_tokens=self.config.max_token_length,
extra_body=self.config.extra_body,
)
result = await agent.run(messages)
else:
# Phase 1: OpenAI server
agent = HermesAgentLoop(
server=self.server,
tool_schemas=tools,
valid_tool_names=valid_names,
max_turns=self.config.max_agent_turns,
task_id=task_id,
temperature=self.config.agent_temperature,
max_tokens=self.config.max_token_length,
extra_body=self.config.extra_body,
)
result = await agent.run(messages)
# Skip reward computation if agent produced no output
only_system_and_user = all(
msg.get("role") in ("system", "user") for msg in result.messages
)
if result.turns_used == 0 or only_system_and_user:
logger.warning(
"Agent loop produced no output (turns=%d). Skipping trajectory.",
result.turns_used,
)
# Return None to skip this trajectory (likely an API failure)
return None, []
else:
# Compute reward using ToolContext
ctx = ToolContext(task_id)
try:
reward = await self.compute_reward(item, result, ctx)
except Exception as e:
logger.error("compute_reward failed: %s", e)
reward = 0.0
finally:
ctx.cleanup()
# Track metrics for wandb logging
task_metrics = {
"test_passed": 1.0 if reward > 0.5 else 0.0,
"reward": reward,
"turns_used": result.turns_used,
"finished_naturally": result.finished_naturally,
"docker_image": docker_image,
"num_tool_errors": len(result.tool_errors),
}
# Include detailed tool errors if any occurred
if result.tool_errors:
task_metrics["tool_errors"] = [
{
"turn": err.turn,
"tool": err.tool_name,
"error": err.error[:200],
}
for err in result.tool_errors
]
self._metrics_buffer.append(task_metrics)
# ============================================================================
# Build ScoredDataGroup from ManagedServer state
# ============================================================================
# Phase 2: Extract pre-computed data from SequenceNodes
# We may have multiple trajectories in the nodes due to how interesting
# agents can be, so iterate through all nodes and return multiple sequences.
#
# Each SequenceNode contains:
# - tokens: Full unmasked token sequence [1, 2, 3, ..., N]
# - masked_tokens: Training format [-100, -100, ..., -100, actual, actual, ...]
# - logprobs: Training format [1.0, 1.0, ..., 1.0, -0.5, -0.3, ...]
# - full_text: Complete text (prompt + all completions)
#
# Phase 1: Create placeholder tokens for OpenAI-style servers
# ============================================================================
nodes = (managed_state or {}).get("nodes", []) if managed_state else []
# Create ScoredDataGroup with lists for multiple trajectories
scored_group = ScoredDataGroup()
scored_group["tokens"] = []
scored_group["masks"] = []
scored_group["scores"] = []
scored_group["messages"] = []
scored_group["inference_logprobs"] = []
if nodes:
# Phase 2: iterate through all nodes (may have multiple trajectories)
for i, node in enumerate(nodes):
scored_group["tokens"].append(node.tokens)
scored_group["masks"].append(node.masked_tokens)
scored_group["scores"].append(reward)
scored_group["messages"].append(result.messages)
if hasattr(node, "logprobs") and node.logprobs:
scored_group["inference_logprobs"].append(node.logprobs)
else:
# Placeholder logprobs if not available
scored_group["inference_logprobs"].append([1.0] * len(node.tokens))
logger.debug(f"Added trajectory {i+1}/{len(nodes)} with {len(node.tokens)} tokens")
else:
# Phase 1: create placeholder tokens for OpenAI-style servers
full_text = "\n".join(
msg.get("content", "") for msg in result.messages if msg.get("content")
)
if self.tokenizer:
tokens = self.tokenizer.encode(full_text, add_special_tokens=True)
else:
tokens = list(range(min(len(full_text) // 4, 128)))
scored_group["tokens"].append(tokens)
scored_group["masks"].append([-100] + tokens[1:])
scored_group["scores"].append(reward)
scored_group["messages"].append(result.messages)
scored_group["inference_logprobs"].append([1.0] * len(tokens))
# Return None if no trajectories collected
if len(scored_group["tokens"]) == 0:
return None, []
logger.debug(f"Returning ScoredDataGroup with {len(scored_group['tokens'])} trajectories")
return scored_group, []
finally:
# Clean up task overrides and sandbox
clear_task_env_overrides(task_id)
try:
cleanup_vm(task_id)
except Exception as e:
logger.debug(f"VM cleanup for {task_id[:8]}: {e}")
async def compute_reward(
self,
item: Item,
result: AgentResult,
ctx: ToolContext
) -> float:
"""
Run final tests in the agent's sandbox and return binary reward.
Uses ToolContext to execute pytest in the SAME sandbox the agent used,
following the Terminal Bench 2 verification pattern. No separate
Apptainer execution needed.
Returns 1.0 if tests pass, 0.0 otherwise.
"""
task_name = item.get("task_name", "unknown")
final_test_path = Path(item.get("final_test", ""))
if not final_test_path.exists():
logger.error(f"Task {task_name}: test file not found at {final_test_path}")
return 0.0
logger.info(f"Task {task_name}: running tests in sandbox...")
try:
# Run tests in a thread to avoid blocking the event loop
loop = asyncio.get_event_loop()
reward = await loop.run_in_executor(
None,
self._run_tests_in_sandbox,
final_test_path,
ctx,
task_name,
)
status = "PASS" if reward == 1.0 else "FAIL"
logger.info(f"Task {task_name}: {status} (reward={reward})")
return reward
except Exception as e:
logger.error(f"Task {task_name}: test execution failed: {e}", exc_info=True)
return 0.0
def _run_tests_in_sandbox(
self,
test_file_path: Path,
ctx: ToolContext,
task_name: str,
) -> float:
"""
Upload test file to sandbox and execute pytest.
Runs in thread pool (via run_in_executor) to avoid blocking the event loop
with synchronous ToolContext calls.
Args:
test_file_path: Local path to test_final_state.py
ctx: ToolContext scoped to the agent's sandbox
task_name: For logging
Returns:
1.0 if tests pass, 0.0 otherwise
"""
try:
# Upload test file to sandbox
test_content = test_file_path.read_text()
ctx.write_file("/workspace/test_final_state.py", test_content)
logger.debug(f"Task {task_name}: uploaded test file to /workspace/test_final_state.py")
# Run pytest in the sandbox
result = ctx.terminal(
"cd /workspace && python -m pytest -q test_final_state.py",
timeout=self.config.test_timeout_s,
)
exit_code = result.get("exit_code", -1)
output = result.get("output", "")
if exit_code == 0:
logger.debug(f"Task {task_name}: tests passed")
return 1.0
else:
# Log failure output (last 500 chars for debugging)
output_preview = output[-500:] if output else "(no output)"
logger.info(
f"Task {task_name}: tests failed (exit_code={exit_code})\n{output_preview}"
)
return 0.0
except Exception as e:
logger.error(f"Task {task_name}: error running tests: {e}")
return 0.0
async def evaluate(self):
"""
Periodic evaluation on holdout eval set.
Runs the agent on num_eval_tasks from the held-out eval set
(never seen during training). Returns metrics for wandb logging.
"""
if self._eval_dataset is None:
logger.warning("Cannot evaluate: eval dataset not loaded")
return {}
if len(self._eval_dataset) == 0:
logger.warning("Eval dataset is empty")
return {}
# Use min of num_eval_tasks and actual eval set size
num_tasks = min(self.config.num_eval_tasks, len(self._eval_dataset))
logger.info(f"Starting evaluation on {num_tasks} held-out tasks...")
eval_metrics = {
"rewards": [],
"passes": [],
"turns": [],
"natural_finishes": [],
}
# Sample from eval set (holdout)
import random
eval_indices = random.sample(range(len(self._eval_dataset)), num_tasks)
for idx in eval_indices:
task = self._eval_dataset[idx]
# Build item using same logic as get_next_item
task_dir = task.get("extra_info", {}).get("task_dir")
if not task_dir:
task_dir = task.get("reward_spec", {}).get("ground_truth")
if not task_dir:
continue
task_dir_path = Path(task_dir)
if self.config.tasks_base_dir and not task_dir_path.exists():
original_path = Path(task_dir)
task_name = original_path.name
task_dir_path = Path(os.path.expanduser(self.config.tasks_base_dir)) / task_name
if not task_dir_path.exists():
continue
# Find test file
final_test = task_dir_path / "tests" / "test_final_state.py"
if not final_test.exists():
final_test = task_dir_path / "test_final_state.py"
if not final_test.exists():
continue
# Parse Docker image
container_def = task_dir_path / "environment" / "container.def"
if not container_def.exists():
container_def = task_dir_path / "container.def"
docker_image = self._parse_docker_image_from_def(container_def)
# Load description
description = task.get("description", "")
instruction_md = task_dir_path / "instruction.md"
if not description and instruction_md.exists():
try:
description = instruction_md.read_text().strip()
except Exception:
pass
item = {
"description": description,
"final_test": str(final_test),
"docker_image": docker_image,
}
# Run agent on this task
try:
import uuid
task_id = str(uuid.uuid4())
# Register task environment
from model_tools import register_task_env_overrides
register_task_env_overrides(task_id, {"modal_image": docker_image})
# Build messages
messages = [
{"role": "system", "content": self.config.system_prompt},
{"role": "user", "content": description or "Complete the task."},
]
# Get tools
from model_tools import get_tool_definitions
tools = get_tool_definitions(self.config.enabled_toolsets)
valid_names = {t["function"]["name"] for t in tools}
# Run agent
from environments.agent_loop import HermesAgentLoop
agent = HermesAgentLoop(
server=self.server,
tool_schemas=tools,
valid_tool_names=valid_names,
max_turns=self.config.max_agent_turns,
task_id=task_id,
temperature=self.config.agent_temperature,
max_tokens=self.config.max_token_length,
extra_body=self.config.extra_body,
)
result = await agent.run(messages)
# Compute reward
from environments.tool_context import ToolContext
ctx = ToolContext(task_id)
try:
reward = await self.compute_reward(item, result, ctx)
except Exception as e:
logger.warning(f"Eval reward computation failed: {e}")
reward = 0.0
finally:
ctx.cleanup()
# Track metrics
eval_metrics["rewards"].append(reward)
eval_metrics["passes"].append(1.0 if reward > 0.5 else 0.0)
eval_metrics["turns"].append(result.turns_used)
eval_metrics["natural_finishes"].append(1.0 if result.finished_naturally else 0.0)
except Exception as e:
logger.error(f"Eval task failed: {e}")
continue
finally:
# Cleanup
from model_tools import clear_task_env_overrides, cleanup_vm
clear_task_env_overrides(task_id)
cleanup_vm(task_id)
# Aggregate metrics
if not eval_metrics["rewards"]:
logger.warning("No eval tasks completed successfully")
return {}
aggregated = {
"eval/pass_rate": sum(eval_metrics["passes"]) / len(eval_metrics["passes"]),
"eval/avg_reward": sum(eval_metrics["rewards"]) / len(eval_metrics["rewards"]),
"eval/avg_turns": sum(eval_metrics["turns"]) / len(eval_metrics["turns"]),
"eval/natural_finish_rate": sum(eval_metrics["natural_finishes"]) / len(eval_metrics["natural_finishes"]),
"eval/num_tasks": len(eval_metrics["rewards"]),
}
logger.info(f"Evaluation complete: pass_rate={aggregated['eval/pass_rate']:.2%}, avg_turns={aggregated['eval/avg_turns']:.1f}")
return aggregated
async def wandb_log(self, wandb_metrics: Optional[Dict] = None):
"""Log Endless Terminals specific metrics to wandb."""
if wandb_metrics is None:
wandb_metrics = {}
# Aggregate metrics from buffer
if self._metrics_buffer:
# Test pass rate
test_passes = [m["test_passed"] for m in self._metrics_buffer]
wandb_metrics["endless_terminals/test_pass_rate"] = sum(test_passes) / len(test_passes)
wandb_metrics["endless_terminals/num_tests_passed"] = sum(test_passes)
wandb_metrics["endless_terminals/num_tests_total"] = len(test_passes)
# Turns used statistics
turns = [m["turns_used"] for m in self._metrics_buffer]
wandb_metrics["endless_terminals/avg_turns_used"] = sum(turns) / len(turns)
wandb_metrics["endless_terminals/max_turns_used"] = max(turns)
wandb_metrics["endless_terminals/min_turns_used"] = min(turns)
# Natural finish rate (did model stop on its own vs hitting max turns)
natural_finishes = [1.0 if m["finished_naturally"] else 0.0 for m in self._metrics_buffer]
wandb_metrics["endless_terminals/natural_finish_rate"] = sum(natural_finishes) / len(natural_finishes)
# Tool error statistics
total_tool_errors = sum(m["num_tool_errors"] for m in self._metrics_buffer)
wandb_metrics["endless_terminals/total_tool_errors"] = total_tool_errors
wandb_metrics["endless_terminals/avg_tool_errors_per_task"] = total_tool_errors / len(self._metrics_buffer)
# Docker image distribution (count unique images used)
docker_images = [m["docker_image"] for m in self._metrics_buffer]
unique_images = set(docker_images)
wandb_metrics["endless_terminals/num_unique_docker_images"] = len(unique_images)
# Log most common errors if any
all_errors = []
for m in self._metrics_buffer:
if "tool_errors" in m:
all_errors.extend(m["tool_errors"])
if all_errors:
# Count error types
error_tools = {}
for err in all_errors:
tool = err["tool"]
error_tools[tool] = error_tools.get(tool, 0) + 1
# Log top 3 error-prone tools
for i, (tool, count) in enumerate(sorted(error_tools.items(), key=lambda x: x[1], reverse=True)[:3]):
wandb_metrics[f"endless_terminals/errors_by_tool/{tool}"] = count
# Clear buffer after logging
self._metrics_buffer = []
await super().wandb_log(wandb_metrics)
if __name__ == "__main__":
EndlessTerminalsEnv.cli()
+2 -2
View File
@@ -114,8 +114,8 @@ class HermesAgentEnvConfig(BaseEnvConfig):
# --- Terminal backend ---
terminal_backend: str = Field(
default="local",
description="Terminal backend: 'local', 'docker', 'modal', 'daytona', 'ssh', 'singularity'. "
"Modal or Daytona recommended for production RL (cloud isolation per rollout).",
description="Terminal backend: 'local', 'docker', 'modal', 'ssh', 'singularity'. "
"Modal recommended for production RL (cloud isolation per rollout).",
)
terminal_timeout: int = Field(
default=120,
@@ -35,8 +35,7 @@ class DeepSeekV31ToolCallParser(ToolCallParser):
# Regex captures: function_name, function_arguments
PATTERN = re.compile(
r"<tool▁call▁begin>(?P<function_name>.*?)<tool▁sep>(?P<function_arguments>.*?)<tool▁call▁end>",
re.DOTALL,
r"<tool▁call▁begin>(?P<function_name>.*?)<tool▁sep>(?P<function_arguments>.*?)<tool▁call▁end>"
)
def parse(self, text: str) -> ParseResult:
@@ -38,8 +38,7 @@ class DeepSeekV3ToolCallParser(ToolCallParser):
# Regex captures: type, function_name, function_arguments
PATTERN = re.compile(
r"<tool▁call▁begin>(?P<type>.*)<tool▁sep>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*)\n```<tool▁call▁end>",
re.DOTALL,
r"<tool▁call▁begin>(?P<type>.*)<tool▁sep>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*)\n```<tool▁call▁end>"
)
def parse(self, text: str) -> ParseResult:
+4 -5
View File
@@ -44,10 +44,9 @@ _tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str) -> str:
"""
Run a tool call in a thread pool executor so backends that use asyncio.run()
internally (modal, docker, daytona) get a clean event loop.
internally (modal, docker) get a clean event loop.
If we're already in an async context, executes handle_function_call() in a
disposable worker thread and blocks for the result.
If we're already in an async context, uses run_in_executor.
If not (e.g., called from sync code), runs directly.
"""
try:
@@ -95,7 +94,7 @@ class ToolContext:
backend = os.getenv("TERMINAL_ENV", "local")
logger.debug("ToolContext.terminal [%s backend] task=%s: %s", backend, self.task_id[:8], command[:100])
# Run via thread helper so modal/docker/daytona backends' asyncio.run() doesn't deadlock
# Run in thread pool so modal/docker backends' asyncio.run() doesn't deadlock
result = _run_tool_in_thread(
"terminal",
{"command": command, "timeout": timeout},
@@ -333,7 +332,7 @@ class ToolContext:
Dict with search results
"""
result = handle_function_call(
"search_files", {"pattern": query, "path": path}, task_id=self.task_id
"search", {"query": query, "path": path}, task_id=self.task_id
)
try:
return json.loads(result)
-237
View File
@@ -1,237 +0,0 @@
"""
Channel directory -- cached map of reachable channels/contacts per platform.
Built on gateway startup, refreshed periodically (every 5 min), and saved to
~/.hermes/channel_directory.json. The send_message tool reads this file for
action="list" and for resolving human-friendly channel names to numeric IDs.
"""
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
DIRECTORY_PATH = Path.home() / ".hermes" / "channel_directory.json"
# ---------------------------------------------------------------------------
# Build / refresh
# ---------------------------------------------------------------------------
def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
"""
Build a channel directory from connected platform adapters and session data.
Returns the directory dict and writes it to DIRECTORY_PATH.
"""
from gateway.config import Platform
platforms: Dict[str, List[Dict[str, str]]] = {}
for platform, adapter in adapters.items():
try:
if platform == Platform.DISCORD:
platforms["discord"] = _build_discord(adapter)
elif platform == Platform.SLACK:
platforms["slack"] = _build_slack(adapter)
except Exception as e:
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
# Telegram & WhatsApp can't enumerate chats -- pull from session history
for plat_name in ("telegram", "whatsapp"):
if plat_name not in platforms:
platforms[plat_name] = _build_from_sessions(plat_name)
directory = {
"updated_at": datetime.now().isoformat(),
"platforms": platforms,
}
try:
DIRECTORY_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(DIRECTORY_PATH, "w") as f:
json.dump(directory, f, indent=2, ensure_ascii=False)
except Exception as e:
logger.warning("Channel directory: failed to write: %s", e)
return directory
def _build_discord(adapter) -> List[Dict[str, str]]:
"""Enumerate all text channels the Discord bot can see."""
channels = []
client = getattr(adapter, "_client", None)
if not client:
return channels
try:
import discord as _discord
except ImportError:
return channels
for guild in client.guilds:
for ch in guild.text_channels:
channels.append({
"id": str(ch.id),
"name": ch.name,
"guild": guild.name,
"type": "channel",
})
# Also include DM-capable users we've interacted with is not
# feasible via guild enumeration; those come from sessions.
# Merge any DMs from session history
channels.extend(_build_from_sessions("discord"))
return channels
def _build_slack(adapter) -> List[Dict[str, str]]:
"""List Slack channels the bot has joined."""
channels = []
# Slack adapter may expose a web client
client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
if not client:
return _build_from_sessions("slack")
try:
import asyncio
from tools.send_message_tool import _send_slack # noqa: F401
# Use the Slack Web API directly if available
except Exception:
pass
# Fallback to session data
return _build_from_sessions("slack")
def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
"""Pull known channels/contacts from sessions.json origin data."""
sessions_path = Path.home() / ".hermes" / "sessions" / "sessions.json"
if not sessions_path.exists():
return []
entries = []
try:
with open(sessions_path) as f:
data = json.load(f)
seen_ids = set()
for _key, session in data.items():
origin = session.get("origin") or {}
if origin.get("platform") != platform_name:
continue
chat_id = origin.get("chat_id")
if not chat_id or chat_id in seen_ids:
continue
seen_ids.add(chat_id)
entries.append({
"id": str(chat_id),
"name": origin.get("chat_name") or origin.get("user_name") or str(chat_id),
"type": session.get("chat_type", "dm"),
})
except Exception as e:
logger.debug("Channel directory: failed to read sessions for %s: %s", platform_name, e)
return entries
# ---------------------------------------------------------------------------
# Read / resolve
# ---------------------------------------------------------------------------
def load_directory() -> Dict[str, Any]:
"""Load the cached channel directory from disk."""
if not DIRECTORY_PATH.exists():
return {"updated_at": None, "platforms": {}}
try:
with open(DIRECTORY_PATH) as f:
return json.load(f)
except Exception:
return {"updated_at": None, "platforms": {}}
def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
"""
Resolve a human-friendly channel name to a numeric ID.
Matching strategy (case-insensitive, first match wins):
- Discord: "bot-home", "#bot-home", "GuildName/bot-home"
- Telegram: display name or group name
- Slack: "engineering", "#engineering"
"""
directory = load_directory()
channels = directory.get("platforms", {}).get(platform_name, [])
if not channels:
return None
query = name.lstrip("#").lower()
# 1. Exact name match
for ch in channels:
if ch["name"].lower() == query:
return ch["id"]
# 2. Guild-qualified match for Discord ("GuildName/channel")
if "/" in query:
guild_part, ch_part = query.rsplit("/", 1)
for ch in channels:
guild = ch.get("guild", "").lower()
if guild == guild_part and ch["name"].lower() == ch_part:
return ch["id"]
# 3. Partial prefix match (only if unambiguous)
matches = [ch for ch in channels if ch["name"].lower().startswith(query)]
if len(matches) == 1:
return matches[0]["id"]
return None
def format_directory_for_display() -> str:
"""Format the channel directory as a human-readable list for the model."""
directory = load_directory()
platforms = directory.get("platforms", {})
if not any(platforms.values()):
return "No messaging platforms connected or no channels discovered yet."
lines = ["Available messaging targets:\n"]
for plat_name, channels in sorted(platforms.items()):
if not channels:
continue
# Group Discord channels by guild
if plat_name == "discord":
guilds: Dict[str, List] = {}
dms: List = []
for ch in channels:
guild = ch.get("guild")
if guild:
guilds.setdefault(guild, []).append(ch)
else:
dms.append(ch)
for guild_name, guild_channels in sorted(guilds.items()):
lines.append(f"Discord ({guild_name}):")
for ch in sorted(guild_channels, key=lambda c: c["name"]):
lines.append(f" discord:#{ch['name']}")
if dms:
lines.append("Discord (DMs):")
for ch in dms:
lines.append(f" discord:{ch['name']}")
lines.append("")
else:
lines.append(f"{plat_name.title()}:")
for ch in channels:
type_label = f" ({ch['type']})" if ch.get("type") else ""
lines.append(f" {plat_name}:{ch['name']}{type_label}")
lines.append("")
lines.append('Use these as the "target" parameter when sending.')
lines.append('Bare platform name (e.g. "telegram") sends to home channel.')
return "\n".join(lines)
+1 -66
View File
@@ -8,7 +8,6 @@ Handles loading and validating configuration for:
- Delivery preferences
"""
import logging
import os
import json
from pathlib import Path
@@ -16,8 +15,6 @@ from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
from enum import Enum
logger = logging.getLogger(__name__)
class Platform(Enum):
"""Supported messaging platforms."""
@@ -26,7 +23,6 @@ class Platform(Enum):
DISCORD = "discord"
WHATSAPP = "whatsapp"
SLACK = "slack"
HOMEASSISTANT = "homeassistant"
@dataclass
@@ -66,9 +62,8 @@ class SessionResetPolicy:
- "daily": Reset at a specific hour each day
- "idle": Reset after N minutes of inactivity
- "both": Whichever triggers first (daily boundary OR idle timeout)
- "none": Never auto-reset (context managed only by compression)
"""
mode: str = "both" # "daily", "idle", "both", or "none"
mode: str = "both" # "daily", "idle", or "both"
at_hour: int = 4 # Hour for daily reset (0-23, local time)
idle_minutes: int = 1440 # Minutes of inactivity before reset (24 hours)
@@ -266,58 +261,9 @@ def load_gateway_config() -> GatewayConfig:
except Exception as e:
print(f"[gateway] Warning: Failed to load {gateway_config_path}: {e}")
# Bridge session_reset from config.yaml (the user-facing config file)
# into the gateway config. config.yaml takes precedence over gateway.json
# for session reset policy since that's where hermes setup writes it.
try:
import yaml
config_yaml_path = Path.home() / ".hermes" / "config.yaml"
if config_yaml_path.exists():
with open(config_yaml_path) as f:
yaml_cfg = yaml.safe_load(f) or {}
sr = yaml_cfg.get("session_reset")
if sr and isinstance(sr, dict):
config.default_reset_policy = SessionResetPolicy.from_dict(sr)
except Exception:
pass
# Override with environment variables
_apply_env_overrides(config)
# --- Validate loaded values ---
policy = config.default_reset_policy
if not (0 <= policy.at_hour <= 23):
logger.warning(
"Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour
)
policy.at_hour = 4
if policy.idle_minutes is None or policy.idle_minutes <= 0:
logger.warning(
"Invalid idle_minutes=%s (must be positive). Using default 1440.",
policy.idle_minutes,
)
policy.idle_minutes = 1440
# Warn about empty bot tokens — platforms that loaded an empty string
# won't connect and the cause can be confusing without a log line.
_token_env_names = {
Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
Platform.DISCORD: "DISCORD_BOT_TOKEN",
Platform.SLACK: "SLACK_BOT_TOKEN",
}
for platform, pconfig in config.platforms.items():
if not pconfig.enabled:
continue
env_name = _token_env_names.get(platform)
if env_name and pconfig.token is not None and not pconfig.token.strip():
logger.warning(
"%s is enabled but %s is empty. "
"The adapter will likely fail to connect.",
platform.value, env_name,
)
return config
@@ -379,17 +325,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
name=os.getenv("SLACK_HOME_CHANNEL_NAME", ""),
)
# Home Assistant
hass_token = os.getenv("HASS_TOKEN")
if hass_token:
if Platform.HOMEASSISTANT not in config.platforms:
config.platforms[Platform.HOMEASSISTANT] = PlatformConfig()
config.platforms[Platform.HOMEASSISTANT].enabled = True
config.platforms[Platform.HOMEASSISTANT].token = hass_token
hass_url = os.getenv("HASS_URL")
if hass_url:
config.platforms[Platform.HOMEASSISTANT].extra["url"] = hass_url
# Session settings
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
if idle_minutes:
+4 -26
View File
@@ -8,19 +8,14 @@ Routes messages to the appropriate destination based on:
- Local (always saved to files)
"""
import logging
import json
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass
from typing import Dict, List, Optional, Any, Union
from enum import Enum
logger = logging.getLogger(__name__)
MAX_PLATFORM_OUTPUT = 4000
TRUNCATED_VISIBLE = 3800
from .config import Platform, GatewayConfig
from .config import Platform, GatewayConfig, HomeChannel
from .session import SessionSource
@@ -251,15 +246,6 @@ class DeliveryRouter:
"timestamp": timestamp
}
def _save_full_output(self, content: str, job_id: str) -> Path:
"""Save full cron output to disk and return the file path."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
out_dir = Path.home() / ".hermes" / "cron" / "output"
out_dir.mkdir(parents=True, exist_ok=True)
path = out_dir / f"{job_id}_{timestamp}.txt"
path.write_text(content)
return path
async def _deliver_to_platform(
self,
target: DeliveryTarget,
@@ -275,16 +261,8 @@ class DeliveryRouter:
if not target.chat_id:
raise ValueError(f"No chat ID for {target.platform.value} delivery")
# Guard: truncate oversized cron output to stay within platform limits
if len(content) > MAX_PLATFORM_OUTPUT:
job_id = (metadata or {}).get("job_id", "unknown")
saved_path = self._save_full_output(content, job_id)
logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path)
content = (
content[:TRUNCATED_VISIBLE]
+ f"\n\n... [truncated, full output saved to {saved_path}]"
)
# Call the adapter's send method
# Adapters should implement: async def send(chat_id: str, content: str) -> Dict
return await adapter.send(target.chat_id, content, metadata=metadata)
-123
View File
@@ -1,123 +0,0 @@
"""
Session mirroring for cross-platform message delivery.
When a message is sent to a platform (via send_message or cron delivery),
this module appends a "delivery-mirror" record to the target session's
transcript so the receiving-side agent has context about what was sent.
Standalone -- works from CLI, cron, and gateway contexts without needing
the full SessionStore machinery.
"""
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
_SESSIONS_DIR = Path.home() / ".hermes" / "sessions"
_SESSIONS_INDEX = _SESSIONS_DIR / "sessions.json"
def mirror_to_session(
platform: str,
chat_id: str,
message_text: str,
source_label: str = "cli",
) -> bool:
"""
Append a delivery-mirror message to the target session's transcript.
Finds the gateway session that matches the given platform + chat_id,
then writes a mirror entry to both the JSONL transcript and SQLite DB.
Returns True if mirrored successfully, False if no matching session or error.
All errors are caught -- this is never fatal.
"""
try:
session_id = _find_session_id(platform, str(chat_id))
if not session_id:
logger.debug("Mirror: no session found for %s:%s", platform, chat_id)
return False
mirror_msg = {
"role": "assistant",
"content": message_text,
"timestamp": datetime.now().isoformat(),
"mirror": True,
"mirror_source": source_label,
}
_append_to_jsonl(session_id, mirror_msg)
_append_to_sqlite(session_id, mirror_msg)
logger.debug("Mirror: wrote to session %s (from %s)", session_id, source_label)
return True
except Exception as e:
logger.debug("Mirror failed for %s:%s: %s", platform, chat_id, e)
return False
def _find_session_id(platform: str, chat_id: str) -> Optional[str]:
"""
Find the active session_id for a platform + chat_id pair.
Scans sessions.json entries and matches where origin.chat_id == chat_id
on the right platform. DM session keys don't embed the chat_id
(e.g. "agent:main:telegram:dm"), so we check the origin dict.
"""
if not _SESSIONS_INDEX.exists():
return None
try:
with open(_SESSIONS_INDEX) as f:
data = json.load(f)
except Exception:
return None
platform_lower = platform.lower()
best_match = None
best_updated = ""
for _key, entry in data.items():
origin = entry.get("origin") or {}
entry_platform = (origin.get("platform") or entry.get("platform", "")).lower()
if entry_platform != platform_lower:
continue
origin_chat_id = str(origin.get("chat_id", ""))
if origin_chat_id == str(chat_id):
updated = entry.get("updated_at", "")
if updated > best_updated:
best_updated = updated
best_match = entry.get("session_id")
return best_match
def _append_to_jsonl(session_id: str, message: dict) -> None:
"""Append a message to the JSONL transcript file."""
transcript_path = _SESSIONS_DIR / f"{session_id}.jsonl"
try:
with open(transcript_path, "a") as f:
f.write(json.dumps(message, ensure_ascii=False) + "\n")
except Exception as e:
logger.debug("Mirror JSONL write failed: %s", e)
def _append_to_sqlite(session_id: str, message: dict) -> None:
"""Append a message to the SQLite session database."""
try:
from hermes_state import SessionDB
db = SessionDB()
db.append_message(
session_id=session_id,
role=message.get("role", "assistant"),
content=message.get("content"),
)
except Exception as e:
logger.debug("Mirror SQLite write failed: %s", e)
+43 -314
View File
@@ -6,13 +6,10 @@ and implement the required methods.
"""
import asyncio
import logging
import os
import re
import uuid
from abc import ABC, abstractmethod
logger = logging.getLogger(__name__)
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
@@ -20,8 +17,7 @@ from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
from enum import Enum
import sys
from pathlib import Path as _Path
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
from gateway.config import Platform, PlatformConfig
from gateway.session import SessionSource
@@ -171,84 +167,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg") -> str:
return cache_audio_from_bytes(response.content, ext)
# ---------------------------------------------------------------------------
# Document cache utilities
#
# Same pattern as image/audio cache -- documents from platforms are downloaded
# here so the agent can reference them by local file path.
# ---------------------------------------------------------------------------
DOCUMENT_CACHE_DIR = Path(os.path.expanduser("~/.hermes/document_cache"))
SUPPORTED_DOCUMENT_TYPES = {
".pdf": "application/pdf",
".md": "text/markdown",
".txt": "text/plain",
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
}
def get_document_cache_dir() -> Path:
"""Return the document cache directory, creating it if it doesn't exist."""
DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
return DOCUMENT_CACHE_DIR
def cache_document_from_bytes(data: bytes, filename: str) -> str:
"""
Save raw document bytes to the cache and return the absolute file path.
The cached filename preserves the original human-readable name with a
unique prefix: ``doc_{uuid12}_{original_filename}``.
Args:
data: Raw document bytes.
filename: Original filename (e.g. "report.pdf").
Returns:
Absolute path to the cached document file as a string.
Raises:
ValueError: If the sanitized path escapes the cache directory.
"""
cache_dir = get_document_cache_dir()
# Sanitize: strip directory components, null bytes, and control characters
safe_name = Path(filename).name if filename else "document"
safe_name = safe_name.replace("\x00", "").strip()
if not safe_name or safe_name in (".", ".."):
safe_name = "document"
cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
filepath = cache_dir / cached_name
# Final safety check: ensure path stays inside cache dir
if not filepath.resolve().is_relative_to(cache_dir.resolve()):
raise ValueError(f"Path traversal rejected: {filename!r}")
filepath.write_bytes(data)
return str(filepath)
def cleanup_document_cache(max_age_hours: int = 24) -> int:
"""
Delete cached documents older than *max_age_hours*.
Returns the number of files removed.
"""
import time
cache_dir = get_document_cache_dir()
cutoff = time.time() - (max_age_hours * 3600)
removed = 0
for f in cache_dir.iterdir():
if f.is_file() and f.stat().st_mtime < cutoff:
try:
f.unlink()
removed += 1
except OSError:
pass
return removed
class MessageType(Enum):
"""Types of incoming messages."""
TEXT = "text"
@@ -398,20 +316,7 @@ class BasePlatformAdapter(ABC):
SendResult with success status and message ID
"""
pass
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
) -> SendResult:
"""
Edit a previously sent message. Optional — platforms that don't
support editing return success=False and callers fall back to
sending a new message.
"""
return SendResult(success=False, error="Not supported")
async def send_typing(self, chat_id: str) -> None:
"""
Send a typing indicator.
@@ -438,28 +343,6 @@ class BasePlatformAdapter(ABC):
text = f"{caption}\n{image_url}" if caption else image_url
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""
Send an animated GIF natively via the platform API.
Override in subclasses to send GIFs as proper animations
(e.g., Telegram send_animation) so they auto-play inline.
Default falls back to send_image.
"""
return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to)
@staticmethod
def _is_animation_url(url: str) -> bool:
"""Check if a URL points to an animated GIF (vs a static image)."""
lower = url.lower().split('?')[0] # Strip query params
return lower.endswith('.gif')
@staticmethod
def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
"""
@@ -495,14 +378,10 @@ class BasePlatformAdapter(ABC):
url = match.group(1)
images.append((url, ""))
# Remove only the matched image tags from content (not all markdown images)
# Remove matched image tags from content if we found images
if images:
extracted_urls = {url for url, _ in images}
def _remove_if_extracted(match):
url = match.group(2) if match.lastindex >= 2 else match.group(1)
return '' if url in extracted_urls else match.group(0)
cleaned = re.sub(md_pattern, _remove_if_extracted, cleaned)
cleaned = re.sub(html_pattern, _remove_if_extracted, cleaned)
cleaned = re.sub(md_pattern, '', cleaned)
cleaned = re.sub(html_pattern, '', cleaned)
# Clean up leftover blank lines
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
@@ -526,63 +405,7 @@ class BasePlatformAdapter(ABC):
if caption:
text = f"{caption}\n{text}"
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
async def send_video(
self,
chat_id: str,
video_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""
Send a video natively via the platform API.
Override in subclasses to send videos as inline playable media.
Default falls back to sending the file path as text.
"""
text = f"🎬 Video: {video_path}"
if caption:
text = f"{caption}\n{text}"
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""
Send a document/file natively via the platform API.
Override in subclasses to send files as downloadable attachments.
Default falls back to sending the file path as text.
"""
text = f"📎 File: {file_path}"
if caption:
text = f"{caption}\n{text}"
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""
Send a local image file natively via the platform API.
Unlike send_image() which takes a URL, this takes a local file path.
Override in subclasses for native photo attachments.
Default falls back to sending the file path as text.
"""
text = f"🖼️ Image: {image_path}"
if caption:
text = f"{caption}\n{text}"
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
@staticmethod
def extract_media(content: str) -> Tuple[List[Tuple[str, bool]], str]:
"""
@@ -693,8 +516,6 @@ class BasePlatformAdapter(ABC):
response = await self._message_handler(event)
# Send response if any
if not response:
logger.warning("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
if response:
# Extract MEDIA:<path> tags (from TTS tool) before other processing
media_files, response = self.extract_media(response)
@@ -704,7 +525,6 @@ class BasePlatformAdapter(ABC):
# Send the text portion first (if any remains after extractions)
if text_content:
logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
result = await self.send(
chat_id=event.source.chat_id,
content=text_content,
@@ -731,59 +551,29 @@ class BasePlatformAdapter(ABC):
if human_delay > 0:
await asyncio.sleep(human_delay)
try:
# Route animated GIFs through send_animation for proper playback
if self._is_animation_url(image_url):
img_result = await self.send_animation(
chat_id=event.source.chat_id,
animation_url=image_url,
caption=alt_text if alt_text else None,
)
else:
img_result = await self.send_image(
chat_id=event.source.chat_id,
image_url=image_url,
caption=alt_text if alt_text else None,
)
img_result = await self.send_image(
chat_id=event.source.chat_id,
image_url=image_url,
caption=alt_text if alt_text else None,
)
if not img_result.success:
print(f"[{self.name}] Failed to send image: {img_result.error}")
except Exception as img_err:
print(f"[{self.name}] Error sending image: {img_err}")
# Send extracted media files — route by file type
_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.3gp'}
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
for media_path, is_voice in media_files:
# Send extracted audio/voice files as native attachments
for audio_path, is_voice in media_files:
if human_delay > 0:
await asyncio.sleep(human_delay)
try:
ext = Path(media_path).suffix.lower()
if ext in _AUDIO_EXTS:
media_result = await self.send_voice(
chat_id=event.source.chat_id,
audio_path=media_path,
)
elif ext in _VIDEO_EXTS:
media_result = await self.send_video(
chat_id=event.source.chat_id,
video_path=media_path,
)
elif ext in _IMAGE_EXTS:
media_result = await self.send_image_file(
chat_id=event.source.chat_id,
image_path=media_path,
)
else:
media_result = await self.send_document(
chat_id=event.source.chat_id,
file_path=media_path,
)
if not media_result.success:
print(f"[{self.name}] Failed to send media ({ext}): {media_result.error}")
except Exception as media_err:
print(f"[{self.name}] Error sending media: {media_err}")
voice_result = await self.send_voice(
chat_id=event.source.chat_id,
audio_path=audio_path,
)
if not voice_result.success:
print(f"[{self.name}] Failed to send voice: {voice_result.error}")
except Exception as voice_err:
print(f"[{self.name}] Error sending voice: {voice_err}")
# Check if there's a pending message that was queued during our processing
if session_key in self._pending_messages:
@@ -831,13 +621,9 @@ class BasePlatformAdapter(ABC):
chat_type: str = "dm",
user_id: Optional[str] = None,
user_name: Optional[str] = None,
thread_id: Optional[str] = None,
chat_topic: Optional[str] = None,
thread_id: Optional[str] = None
) -> SessionSource:
"""Helper to build a SessionSource for this platform."""
# Normalize empty topic to None
if chat_topic is not None and not chat_topic.strip():
chat_topic = None
return SessionSource(
platform=self.platform,
chat_id=str(chat_id),
@@ -846,7 +632,6 @@ class BasePlatformAdapter(ABC):
user_id=str(user_id) if user_id else None,
user_name=user_name,
thread_id=str(thread_id) if thread_id else None,
chat_topic=chat_topic.strip() if chat_topic else None,
)
@abstractmethod
@@ -873,90 +658,34 @@ class BasePlatformAdapter(ABC):
def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
"""
Split a long message into chunks, preserving code block boundaries.
When a split falls inside a triple-backtick code block, the fence is
closed at the end of the current chunk and reopened (with the original
language tag) at the start of the next chunk. Multi-chunk responses
receive indicators like ``(1/3)``.
Split a long message into chunks.
Args:
content: The full message content
max_length: Maximum length per chunk (platform-specific)
Returns:
List of message chunks
"""
if len(content) <= max_length:
return [content]
INDICATOR_RESERVE = 10 # room for " (XX/XX)"
FENCE_CLOSE = "\n```"
chunks: List[str] = []
remaining = content
# When the previous chunk ended mid-code-block, this holds the
# language tag (possibly "") so we can reopen the fence.
carry_lang: Optional[str] = None
while remaining:
# If we're continuing a code block from the previous chunk,
# prepend a new opening fence with the same language tag.
prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
# How much body text we can fit after accounting for the prefix,
# a potential closing fence, and the chunk indicator.
headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
if headroom < 1:
headroom = max_length // 2
# Everything remaining fits in one final chunk
if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
chunks.append(prefix + remaining)
chunks = []
while content:
if len(content) <= max_length:
chunks.append(content)
break
# Find a natural split point (prefer newlines, then spaces)
region = remaining[:headroom]
split_at = region.rfind("\n")
if split_at < headroom // 2:
split_at = region.rfind(" ")
if split_at < 1:
split_at = headroom
chunk_body = remaining[:split_at]
remaining = remaining[split_at:].lstrip()
full_chunk = prefix + chunk_body
# Walk only the chunk_body (not the prefix we prepended) to
# determine whether we end inside an open code block.
in_code = carry_lang is not None
lang = carry_lang or ""
for line in chunk_body.split("\n"):
stripped = line.strip()
if stripped.startswith("```"):
if in_code:
in_code = False
lang = ""
else:
in_code = True
tag = stripped[3:].strip()
lang = tag.split()[0] if tag else ""
if in_code:
# Close the orphaned fence so the chunk is valid on its own
full_chunk += FENCE_CLOSE
carry_lang = lang
else:
carry_lang = None
chunks.append(full_chunk)
# Append chunk indicators when the response spans multiple messages
if len(chunks) > 1:
total = len(chunks)
chunks = [
f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
]
# Try to split at a newline
split_idx = content.rfind("\n", 0, max_length)
if split_idx == -1:
# No newline, split at space
split_idx = content.rfind(" ", 0, max_length)
if split_idx == -1:
# No space either, hard split
split_idx = max_length
chunks.append(content[:split_idx])
content = content[split_idx:].lstrip()
return chunks
+22 -199
View File
@@ -8,12 +8,9 @@ Uses discord.py library for:
"""
import asyncio
import logging
import os
from typing import Dict, List, Optional, Any
logger = logging.getLogger(__name__)
try:
import discord
from discord import Message as DiscordMessage, Intents
@@ -27,8 +24,7 @@ except ImportError:
commands = None
import sys
from pathlib import Path as _Path
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
@@ -80,12 +76,11 @@ class DiscordAdapter(BasePlatformAdapter):
return False
try:
# Set up intents -- members intent needed for username-to-ID resolution
# Set up intents
intents = Intents.default()
intents.message_content = True
intents.dm_messages = True
intents.guild_messages = True
intents.members = True
# Create bot
self._client = commands.Bot(
@@ -93,30 +88,24 @@ class DiscordAdapter(BasePlatformAdapter):
intents=intents,
)
# Parse allowed user entries (may contain usernames or IDs)
# Parse allowed user IDs for button authorization
allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "")
if allowed_env:
self._allowed_user_ids = {
uid.strip() for uid in allowed_env.split(",") if uid.strip()
}
adapter_self = self # capture for closure
# Register event handlers
@self._client.event
async def on_ready():
print(f"[{adapter_self.name}] Connected as {adapter_self._client.user}")
# Resolve any usernames in the allowed list to numeric IDs
await adapter_self._resolve_allowed_usernames()
print(f"[{self.name}] Connected as {self._client.user}")
# Sync slash commands with Discord
try:
synced = await adapter_self._client.tree.sync()
print(f"[{adapter_self.name}] Synced {len(synced)} slash command(s)")
synced = await self._client.tree.sync()
print(f"[{self.name}] Synced {len(synced)} slash command(s)")
except Exception as e:
print(f"[{adapter_self.name}] Slash command sync failed: {e}")
adapter_self._ready_event.set()
print(f"[{self.name}] Slash command sync failed: {e}")
self._ready_event.set()
@self._client.event
async def on_message(message: DiscordMessage):
@@ -188,8 +177,8 @@ class DiscordAdapter(BasePlatformAdapter):
try:
ref_msg = await channel.fetch_message(int(reply_to))
reference = ref_msg
except Exception as e:
logger.debug("Could not fetch reply-to message: %s", e)
except Exception:
pass # Ignore if we can't find the referenced message
for i, chunk in enumerate(chunks):
msg = await channel.send(
@@ -206,29 +195,7 @@ class DiscordAdapter(BasePlatformAdapter):
except Exception as e:
return SendResult(success=False, error=str(e))
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
) -> SendResult:
"""Edit a previously sent Discord message."""
if not self._client:
return SendResult(success=False, error="Not connected")
try:
channel = self._client.get_channel(int(chat_id))
if not channel:
channel = await self._client.fetch_channel(int(chat_id))
msg = await channel.fetch_message(int(message_id))
formatted = self.format_message(content)
if len(formatted) > self.MAX_MESSAGE_LENGTH:
formatted = formatted[:self.MAX_MESSAGE_LENGTH - 3] + "..."
await msg.edit(content=formatted)
return SendResult(success=True, message_id=message_id)
except Exception as e:
return SendResult(success=False, error=str(e))
async def send_voice(
self,
chat_id: str,
@@ -370,70 +337,6 @@ class DiscordAdapter(BasePlatformAdapter):
except Exception as e:
return {"name": str(chat_id), "type": "dm", "error": str(e)}
async def _resolve_allowed_usernames(self) -> None:
"""
Resolve non-numeric entries in DISCORD_ALLOWED_USERS to Discord user IDs.
Users can specify usernames (e.g. "teknium") or display names instead of
raw numeric IDs. After resolution, the env var and internal set are updated
so authorization checks work with IDs only.
"""
if not self._allowed_user_ids or not self._client:
return
numeric_ids = set()
to_resolve = set()
for entry in self._allowed_user_ids:
if entry.isdigit():
numeric_ids.add(entry)
else:
to_resolve.add(entry.lower())
if not to_resolve:
return
print(f"[{self.name}] Resolving {len(to_resolve)} username(s): {', '.join(to_resolve)}")
resolved_count = 0
for guild in self._client.guilds:
# Fetch full member list (requires members intent)
try:
members = guild.members
if len(members) < guild.member_count:
members = [m async for m in guild.fetch_members(limit=None)]
except Exception as e:
logger.warning("Failed to fetch members for guild %s: %s", guild.name, e)
continue
for member in members:
name_lower = member.name.lower()
display_lower = member.display_name.lower()
global_lower = (member.global_name or "").lower()
matched = name_lower in to_resolve or display_lower in to_resolve or global_lower in to_resolve
if matched:
uid = str(member.id)
numeric_ids.add(uid)
resolved_count += 1
matched_name = name_lower if name_lower in to_resolve else (
display_lower if display_lower in to_resolve else global_lower
)
to_resolve.discard(matched_name)
print(f"[{self.name}] Resolved '{matched_name}' -> {uid} ({member.name}#{member.discriminator})")
if not to_resolve:
break
if to_resolve:
print(f"[{self.name}] Could not resolve usernames: {', '.join(to_resolve)}")
# Update internal set and env var so gateway auth checks use IDs
self._allowed_user_ids = numeric_ids
os.environ["DISCORD_ALLOWED_USERS"] = ",".join(sorted(numeric_ids))
if resolved_count:
print(f"[{self.name}] Updated DISCORD_ALLOWED_USERS with {resolved_count} resolved ID(s)")
def format_message(self, content: str) -> str:
"""
Format message for Discord.
@@ -460,18 +363,8 @@ class DiscordAdapter(BasePlatformAdapter):
# Send a followup to close the interaction if needed
try:
await interaction.followup.send("Processing complete~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
@tree.command(name="new", description="Start a new conversation")
async def slash_new(interaction: discord.Interaction):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, "/reset")
await self.handle_message(event)
try:
await interaction.followup.send("New conversation started~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
except Exception:
pass
@tree.command(name="reset", description="Reset your Hermes session")
async def slash_reset(interaction: discord.Interaction):
@@ -480,50 +373,8 @@ class DiscordAdapter(BasePlatformAdapter):
await self.handle_message(event)
try:
await interaction.followup.send("Session reset~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
@tree.command(name="model", description="Show or change the model")
@discord.app_commands.describe(name="Model name (e.g. anthropic/claude-sonnet-4). Leave empty to see current.")
async def slash_model(interaction: discord.Interaction, name: str = ""):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, f"/model {name}".strip())
await self.handle_message(event)
try:
await interaction.followup.send("Done~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
@tree.command(name="personality", description="Set a personality")
@discord.app_commands.describe(name="Personality name. Leave empty to list available.")
async def slash_personality(interaction: discord.Interaction, name: str = ""):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, f"/personality {name}".strip())
await self.handle_message(event)
try:
await interaction.followup.send("Done~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
@tree.command(name="retry", description="Retry your last message")
async def slash_retry(interaction: discord.Interaction):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, "/retry")
await self.handle_message(event)
try:
await interaction.followup.send("Retrying~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
@tree.command(name="undo", description="Remove the last exchange")
async def slash_undo(interaction: discord.Interaction):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, "/undo")
await self.handle_message(event)
try:
await interaction.followup.send("Done~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
except Exception:
pass
@tree.command(name="status", description="Show Hermes session status")
async def slash_status(interaction: discord.Interaction):
@@ -532,18 +383,8 @@ class DiscordAdapter(BasePlatformAdapter):
await self.handle_message(event)
try:
await interaction.followup.send("Status sent~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
@tree.command(name="sethome", description="Set this chat as the home channel")
async def slash_sethome(interaction: discord.Interaction):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, "/sethome")
await self.handle_message(event)
try:
await interaction.followup.send("Done~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
except Exception:
pass
@tree.command(name="stop", description="Stop the running Hermes agent")
async def slash_stop(interaction: discord.Interaction):
@@ -552,18 +393,8 @@ class DiscordAdapter(BasePlatformAdapter):
await self.handle_message(event)
try:
await interaction.followup.send("Stop requested~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
@tree.command(name="update", description="Update Hermes Agent to the latest version")
async def slash_update(interaction: discord.Interaction):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, "/update")
await self.handle_message(event)
try:
await interaction.followup.send("Update initiated~", ephemeral=True)
except Exception as e:
logger.debug("Discord followup failed: %s", e)
except Exception:
pass
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
"""Build a MessageEvent from a Discord slash command interaction."""
@@ -574,9 +405,6 @@ class DiscordAdapter(BasePlatformAdapter):
chat_name = interaction.channel.name
if hasattr(interaction.channel, "guild") and interaction.channel.guild:
chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
# Get channel topic (if available)
chat_topic = getattr(interaction.channel, "topic", None)
source = self.build_source(
chat_id=str(interaction.channel_id),
@@ -584,7 +412,6 @@ class DiscordAdapter(BasePlatformAdapter):
chat_type=chat_type,
user_id=str(interaction.user.id),
user_name=interaction.user.display_name,
chat_topic=chat_topic,
)
msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
@@ -697,9 +524,6 @@ class DiscordAdapter(BasePlatformAdapter):
if isinstance(message.channel, discord.Thread):
thread_id = str(message.channel.id)
# Get channel topic (if available - TextChannels have topics, DMs/threads don't)
chat_topic = getattr(message.channel, "topic", None)
# Build source
source = self.build_source(
chat_id=str(message.channel.id),
@@ -708,7 +532,6 @@ class DiscordAdapter(BasePlatformAdapter):
user_id=str(message.author.id),
user_name=message.author.display_name,
thread_id=thread_id,
chat_topic=chat_topic,
)
# Build media URLs -- download image attachments to local cache so the
@@ -821,13 +644,13 @@ if DISCORD_AVAILABLE:
await interaction.response.edit_message(embed=embed, view=self)
# Store the approval decision
# Store the approval decision for the gateway to pick up
try:
from tools.approval import approve_permanent
from tools.terminal_tool import _session_approved_patterns
if action == "allow_once":
pass # One-time approval handled by gateway
elif action == "allow_always":
approve_permanent(self.approval_id)
_session_approved_patterns.add(self.approval_id)
except ImportError:
pass
-432
View File
@@ -1,432 +0,0 @@
"""
Home Assistant platform adapter.
Connects to the HA WebSocket API for real-time event monitoring.
State-change events are converted to MessageEvent objects and forwarded
to the agent for processing. Outbound messages are delivered as HA
persistent notifications.
Requires:
- aiohttp (already in messaging extras)
- HASS_TOKEN env var (Long-Lived Access Token)
- HASS_URL env var (default: http://homeassistant.local:8123)
"""
import asyncio
import json
import logging
import os
import time
import uuid
from datetime import datetime
from typing import Any, Dict, List, Optional, Set
try:
import aiohttp
AIOHTTP_AVAILABLE = True
except ImportError:
AIOHTTP_AVAILABLE = False
aiohttp = None # type: ignore[assignment]
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
SendResult,
)
logger = logging.getLogger(__name__)
def check_ha_requirements() -> bool:
"""Check if Home Assistant dependencies are available and configured."""
if not AIOHTTP_AVAILABLE:
return False
if not os.getenv("HASS_TOKEN"):
return False
return True
class HomeAssistantAdapter(BasePlatformAdapter):
"""
Home Assistant WebSocket adapter.
Subscribes to ``state_changed`` events and forwards them as
MessageEvent objects. Supports domain/entity filtering and
per-entity cooldowns to avoid event floods.
"""
MAX_MESSAGE_LENGTH = 4096
# Reconnection backoff schedule (seconds)
_BACKOFF_STEPS = [5, 10, 30, 60]
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.HOMEASSISTANT)
# Connection state
self._session: Optional["aiohttp.ClientSession"] = None
self._ws: Optional["aiohttp.ClientWebSocketResponse"] = None
self._rest_session: Optional["aiohttp.ClientSession"] = None
self._listen_task: Optional[asyncio.Task] = None
self._msg_id: int = 0
# Configuration from extra
extra = config.extra or {}
token = config.token or os.getenv("HASS_TOKEN", "")
url = extra.get("url") or os.getenv("HASS_URL", "http://homeassistant.local:8123")
self._hass_url: str = url.rstrip("/")
self._hass_token: str = token
# Event filtering
self._watch_domains: Set[str] = set(extra.get("watch_domains", []))
self._watch_entities: Set[str] = set(extra.get("watch_entities", []))
self._ignore_entities: Set[str] = set(extra.get("ignore_entities", []))
self._cooldown_seconds: int = int(extra.get("cooldown_seconds", 30))
# Cooldown tracking: entity_id -> last_event_timestamp
self._last_event_time: Dict[str, float] = {}
def _next_id(self) -> int:
"""Return the next WebSocket message ID."""
self._msg_id += 1
return self._msg_id
# ------------------------------------------------------------------
# Connection lifecycle
# ------------------------------------------------------------------
async def connect(self) -> bool:
"""Connect to HA WebSocket API and subscribe to events."""
if not AIOHTTP_AVAILABLE:
logger.warning("[%s] aiohttp not installed. Run: pip install aiohttp", self.name)
return False
if not self._hass_token:
logger.warning("[%s] No HASS_TOKEN configured", self.name)
return False
try:
success = await self._ws_connect()
if not success:
return False
# Dedicated REST session for send() calls
self._rest_session = aiohttp.ClientSession()
# Start background listener
self._listen_task = asyncio.create_task(self._listen_loop())
self._running = True
logger.info("[%s] Connected to %s", self.name, self._hass_url)
return True
except Exception as e:
logger.error("[%s] Failed to connect: %s", self.name, e)
return False
async def _ws_connect(self) -> bool:
"""Establish WebSocket connection and authenticate."""
ws_url = self._hass_url.replace("http://", "ws://").replace("https://", "wss://")
ws_url = f"{ws_url}/api/websocket"
self._session = aiohttp.ClientSession()
self._ws = await self._session.ws_connect(ws_url, heartbeat=30)
# Step 1: Receive auth_required
msg = await self._ws.receive_json()
if msg.get("type") != "auth_required":
logger.error("Expected auth_required, got: %s", msg.get("type"))
await self._cleanup_ws()
return False
# Step 2: Send auth
await self._ws.send_json({
"type": "auth",
"access_token": self._hass_token,
})
# Step 3: Wait for auth_ok
msg = await self._ws.receive_json()
if msg.get("type") != "auth_ok":
logger.error("Auth failed: %s", msg)
await self._cleanup_ws()
return False
# Step 4: Subscribe to state_changed events
sub_id = self._next_id()
await self._ws.send_json({
"id": sub_id,
"type": "subscribe_events",
"event_type": "state_changed",
})
# Verify subscription acknowledgement
msg = await self._ws.receive_json()
if not msg.get("success"):
logger.error("Failed to subscribe to events: %s", msg)
await self._cleanup_ws()
return False
return True
async def _cleanup_ws(self) -> None:
"""Close WebSocket and session."""
if self._ws and not self._ws.closed:
await self._ws.close()
self._ws = None
if self._session and not self._session.closed:
await self._session.close()
self._session = None
async def disconnect(self) -> None:
"""Disconnect from Home Assistant."""
self._running = False
if self._listen_task:
self._listen_task.cancel()
try:
await self._listen_task
except asyncio.CancelledError:
pass
self._listen_task = None
await self._cleanup_ws()
if self._rest_session and not self._rest_session.closed:
await self._rest_session.close()
self._rest_session = None
logger.info("[%s] Disconnected", self.name)
# ------------------------------------------------------------------
# Event listener
# ------------------------------------------------------------------
async def _listen_loop(self) -> None:
"""Main event loop with automatic reconnection."""
backoff_idx = 0
while self._running:
try:
await self._read_events()
except asyncio.CancelledError:
return
except Exception as e:
logger.warning("[%s] WebSocket error: %s", self.name, e)
if not self._running:
return
# Reconnect with backoff
delay = self._BACKOFF_STEPS[min(backoff_idx, len(self._BACKOFF_STEPS) - 1)]
logger.info("[%s] Reconnecting in %ds...", self.name, delay)
await asyncio.sleep(delay)
backoff_idx += 1
try:
await self._cleanup_ws()
success = await self._ws_connect()
if success:
backoff_idx = 0 # Reset on successful reconnect
logger.info("[%s] Reconnected", self.name)
except Exception as e:
logger.warning("[%s] Reconnection failed: %s", self.name, e)
async def _read_events(self) -> None:
"""Read events from WebSocket until disconnected."""
if self._ws is None or self._ws.closed:
return
async for ws_msg in self._ws:
if ws_msg.type == aiohttp.WSMsgType.TEXT:
try:
data = json.loads(ws_msg.data)
if data.get("type") == "event":
await self._handle_ha_event(data.get("event", {}))
except json.JSONDecodeError:
logger.debug("Invalid JSON from HA WS: %s", ws_msg.data[:200])
elif ws_msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR):
break
async def _handle_ha_event(self, event: Dict[str, Any]) -> None:
"""Process a state_changed event from Home Assistant."""
event_data = event.get("data", {})
entity_id: str = event_data.get("entity_id", "")
if not entity_id:
return
# Apply ignore filter
if entity_id in self._ignore_entities:
return
# Apply domain/entity watch filters
domain = entity_id.split(".")[0] if "." in entity_id else ""
if self._watch_domains or self._watch_entities:
domain_match = domain in self._watch_domains if self._watch_domains else False
entity_match = entity_id in self._watch_entities if self._watch_entities else False
if not domain_match and not entity_match:
return
# Apply cooldown
now = time.time()
last = self._last_event_time.get(entity_id, 0)
if (now - last) < self._cooldown_seconds:
return
self._last_event_time[entity_id] = now
# Build human-readable message
old_state = event_data.get("old_state", {})
new_state = event_data.get("new_state", {})
message = self._format_state_change(entity_id, old_state, new_state)
if not message:
return
# Build MessageEvent and forward to handler
source = self.build_source(
chat_id="ha_events",
chat_name="Home Assistant Events",
chat_type="channel",
user_id="homeassistant",
user_name="Home Assistant",
)
msg_event = MessageEvent(
text=message,
message_type=MessageType.TEXT,
source=source,
message_id=f"ha_{entity_id}_{int(now)}",
timestamp=datetime.now(),
)
await self.handle_message(msg_event)
@staticmethod
def _format_state_change(
entity_id: str,
old_state: Dict[str, Any],
new_state: Dict[str, Any],
) -> Optional[str]:
"""Convert a state_changed event into a human-readable description."""
if not new_state:
return None
old_val = old_state.get("state", "unknown") if old_state else "unknown"
new_val = new_state.get("state", "unknown")
# Skip if state didn't actually change
if old_val == new_val:
return None
friendly_name = new_state.get("attributes", {}).get("friendly_name", entity_id)
domain = entity_id.split(".")[0] if "." in entity_id else ""
# Domain-specific formatting
if domain == "climate":
attrs = new_state.get("attributes", {})
temp = attrs.get("current_temperature", "?")
target = attrs.get("temperature", "?")
return (
f"[Home Assistant] {friendly_name}: HVAC mode changed from "
f"'{old_val}' to '{new_val}' (current: {temp}, target: {target})"
)
if domain == "sensor":
unit = new_state.get("attributes", {}).get("unit_of_measurement", "")
return (
f"[Home Assistant] {friendly_name}: changed from "
f"{old_val}{unit} to {new_val}{unit}"
)
if domain == "binary_sensor":
return (
f"[Home Assistant] {friendly_name}: "
f"{'triggered' if new_val == 'on' else 'cleared'} "
f"(was {'triggered' if old_val == 'on' else 'cleared'})"
)
if domain in ("light", "switch", "fan"):
return (
f"[Home Assistant] {friendly_name}: turned "
f"{'on' if new_val == 'on' else 'off'}"
)
if domain == "alarm_control_panel":
return (
f"[Home Assistant] {friendly_name}: alarm state changed from "
f"'{old_val}' to '{new_val}'"
)
# Generic fallback
return (
f"[Home Assistant] {friendly_name} ({entity_id}): "
f"changed from '{old_val}' to '{new_val}'"
)
# ------------------------------------------------------------------
# Outbound messaging
# ------------------------------------------------------------------
async def send(
self,
chat_id: str,
content: str,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send a notification via HA REST API (persistent_notification.create).
Uses the REST API instead of WebSocket to avoid a race condition
with the event listener loop that reads from the same WS connection.
"""
url = f"{self._hass_url}/api/services/persistent_notification/create"
headers = {
"Authorization": f"Bearer {self._hass_token}",
"Content-Type": "application/json",
}
payload = {
"title": "Hermes Agent",
"message": content[:self.MAX_MESSAGE_LENGTH],
}
try:
if self._rest_session:
async with self._rest_session.post(
url,
headers=headers,
json=payload,
timeout=aiohttp.ClientTimeout(total=10),
) as resp:
if resp.status < 300:
return SendResult(success=True, message_id=uuid.uuid4().hex[:12])
else:
body = await resp.text()
return SendResult(success=False, error=f"HTTP {resp.status}: {body}")
else:
async with aiohttp.ClientSession() as session:
async with session.post(
url,
headers=headers,
json=payload,
timeout=aiohttp.ClientTimeout(total=10),
) as resp:
if resp.status < 300:
return SendResult(success=True, message_id=uuid.uuid4().hex[:12])
else:
body = await resp.text()
return SendResult(success=False, error=f"HTTP {resp.status}: {body}")
except asyncio.TimeoutError:
return SendResult(success=False, error="Timeout sending notification to HA")
except Exception as e:
return SendResult(success=False, error=str(e))
async def send_typing(self, chat_id: str) -> None:
"""No typing indicator for Home Assistant."""
pass
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
"""Return basic info about the HA event channel."""
return {
"name": "Home Assistant Events",
"type": "channel",
"url": self._hass_url,
}
+8 -34
View File
@@ -24,8 +24,7 @@ except ImportError:
AsyncWebClient = Any
import sys
from pathlib import Path as _Path
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
@@ -156,25 +155,6 @@ class SlackAdapter(BasePlatformAdapter):
print(f"[Slack] Send error: {e}")
return SendResult(success=False, error=str(e))
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
) -> SendResult:
"""Edit a previously sent Slack message."""
if not self._app:
return SendResult(success=False, error="Not connected")
try:
await self._app.client.chat_update(
channel=chat_id,
ts=message_id,
text=content,
)
return SendResult(success=True, message_id=message_id)
except Exception as e:
return SendResult(success=False, error=str(e))
async def send_typing(self, chat_id: str) -> None:
"""Slack doesn't have a direct typing indicator API for bots."""
pass
@@ -347,19 +327,13 @@ class SlackAdapter(BasePlatformAdapter):
user_id = command.get("user_id", "")
channel_id = command.get("channel_id", "")
# Map subcommands to gateway commands
subcommand_map = {
"new": "/reset", "reset": "/reset",
"status": "/status", "stop": "/stop",
"help": "/help",
"model": "/model", "personality": "/personality",
"retry": "/retry", "undo": "/undo",
}
first_word = text.split()[0] if text else ""
if first_word in subcommand_map:
# Preserve arguments after the subcommand
rest = text[len(first_word):].strip()
text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
# Map common slash subcommands to gateway commands
if text in ("new", "reset"):
text = "/reset"
elif text == "status":
text = "/status"
elif text == "stop":
text = "/stop"
elif text:
pass # Treat as a regular question
else:
+12 -245
View File
@@ -8,8 +8,6 @@ Uses python-telegram-bot library for:
"""
import asyncio
import os
import re
from typing import Dict, List, Optional, Any
try:
@@ -29,21 +27,10 @@ except ImportError:
Bot = Any
Message = Any
Application = Any
CommandHandler = Any
TelegramMessageHandler = Any
filters = None
ParseMode = None
ChatType = None
# Mock ContextTypes so type annotations using ContextTypes.DEFAULT_TYPE
# don't crash during class definition when the library isn't installed.
class _MockContextTypes:
DEFAULT_TYPE = Any
ContextTypes = _MockContextTypes
ContextTypes = Any
import sys
from pathlib import Path as _Path
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
@@ -53,8 +40,6 @@ from gateway.platforms.base import (
SendResult,
cache_image_from_bytes,
cache_audio_from_bytes,
cache_document_from_bytes,
SUPPORTED_DOCUMENT_TYPES,
)
@@ -63,16 +48,6 @@ def check_telegram_requirements() -> bool:
return TELEGRAM_AVAILABLE
# Matches every character that MarkdownV2 requires to be backslash-escaped
# when it appears outside a code span or fenced code block.
_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
def _escape_mdv2(text: str) -> str:
"""Escape Telegram MarkdownV2 special characters with a preceding backslash."""
return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
@@ -126,24 +101,6 @@ class TelegramAdapter(BasePlatformAdapter):
await self._app.start()
await self._app.updater.start_polling(allowed_updates=Update.ALL_TYPES)
# Register bot commands so Telegram shows a hint menu when users type /
try:
from telegram import BotCommand
await self._bot.set_my_commands([
BotCommand("new", "Start a new conversation"),
BotCommand("reset", "Reset conversation history"),
BotCommand("model", "Show or change the model"),
BotCommand("personality", "Set a personality"),
BotCommand("retry", "Retry your last message"),
BotCommand("undo", "Remove the last exchange"),
BotCommand("status", "Show session info"),
BotCommand("stop", "Stop the running agent"),
BotCommand("sethome", "Set this chat as the home channel"),
BotCommand("help", "Show available commands"),
])
except Exception as e:
print(f"[{self.name}] Could not register command menu: {e}")
self._running = True
print(f"[{self.name}] Connected and polling for updates")
return True
@@ -192,7 +149,7 @@ class TelegramAdapter(BasePlatformAdapter):
msg = await self._bot.send_message(
chat_id=int(chat_id),
text=chunk,
parse_mode=ParseMode.MARKDOWN_V2,
parse_mode=ParseMode.MARKDOWN,
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
message_thread_id=int(thread_id) if thread_id else None,
)
@@ -218,36 +175,7 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as e:
return SendResult(success=False, error=str(e))
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
) -> SendResult:
"""Edit a previously sent Telegram message."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
formatted = self.format_message(content)
try:
await self._bot.edit_message_text(
chat_id=int(chat_id),
message_id=int(message_id),
text=formatted,
parse_mode=ParseMode.MARKDOWN_V2,
)
except Exception:
# Fallback: retry without markdown formatting
await self._bot.edit_message_text(
chat_id=int(chat_id),
message_id=int(message_id),
text=content,
)
return SendResult(success=True, message_id=message_id)
except Exception as e:
return SendResult(success=False, error=str(e))
async def send_voice(
self,
chat_id: str,
@@ -311,30 +239,6 @@ class TelegramAdapter(BasePlatformAdapter):
# Fallback: send as text link
return await super().send_image(chat_id, image_url, caption, reply_to)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Send an animated GIF natively as a Telegram animation (auto-plays inline)."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
msg = await self._bot.send_animation(
chat_id=int(chat_id),
animation=animation_url,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
print(f"[{self.name}] Failed to send animation, falling back to photo: {e}")
# Fallback: try as a regular photo
return await self.send_image(chat_id, animation_url, caption, reply_to)
async def send_typing(self, chat_id: str) -> None:
"""Send typing indicator."""
if self._bot:
@@ -375,83 +279,14 @@ class TelegramAdapter(BasePlatformAdapter):
def format_message(self, content: str) -> str:
"""
Convert standard markdown to Telegram MarkdownV2 format.
Protected regions (code blocks, inline code) are extracted first so
their contents are never modified. Standard markdown constructs
(headers, bold, italic, links) are translated to MarkdownV2 syntax,
and all remaining special characters are escaped.
Format message for Telegram.
Telegram uses a subset of markdown. We'll use the simpler
Markdown mode (not MarkdownV2) for compatibility.
"""
if not content:
return content
placeholders: dict = {}
counter = [0]
def _ph(value: str) -> str:
"""Stash *value* behind a placeholder token that survives escaping."""
key = f"\x00PH{counter[0]}\x00"
counter[0] += 1
placeholders[key] = value
return key
text = content
# 1) Protect fenced code blocks (``` ... ```)
text = re.sub(
r'(```(?:[^\n]*\n)?[\s\S]*?```)',
lambda m: _ph(m.group(0)),
text,
)
# 2) Protect inline code (`...`)
text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
# 3) Convert markdown links escape the display text; inside the URL
# only ')' and '\' need escaping per the MarkdownV2 spec.
def _convert_link(m):
display = _escape_mdv2(m.group(1))
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
return _ph(f'[{display}]({url})')
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
# 4) Convert markdown headers (## Title) → bold *Title*
def _convert_header(m):
inner = m.group(1).strip()
# Strip redundant bold markers that may appear inside a header
inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
return _ph(f'*{_escape_mdv2(inner)}*')
text = re.sub(
r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
)
# 5) Convert bold: **text** → *text* (MarkdownV2 bold)
text = re.sub(
r'\*\*(.+?)\*\*',
lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
text,
)
# 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
# [^*\n]+ prevents matching across newlines (which would corrupt
# bullet lists using * markers and multi-line content).
text = re.sub(
r'\*([^*\n]+)\*',
lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
text,
)
# 7) Escape remaining special characters in plain text
text = _escape_mdv2(text)
# 8) Restore placeholders in reverse insertion order so that
# nested references (a placeholder inside another) resolve correctly.
for key in reversed(list(placeholders.keys())):
text = text.replace(key, placeholders[key])
return text
# Basic escaping for Telegram Markdown
# In Markdown mode (not V2), only certain characters need escaping
return content
async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming text messages."""
@@ -487,8 +322,6 @@ class TelegramAdapter(BasePlatformAdapter):
msg_type = MessageType.AUDIO
elif msg.voice:
msg_type = MessageType.VOICE
elif msg.document:
msg_type = MessageType.DOCUMENT
else:
msg_type = MessageType.DOCUMENT
@@ -549,73 +382,7 @@ class TelegramAdapter(BasePlatformAdapter):
print(f"[Telegram] Cached user audio: {cached_path}", flush=True)
except Exception as e:
print(f"[Telegram] Failed to cache audio: {e}", flush=True)
# Download document files to cache for agent processing
elif msg.document:
doc = msg.document
try:
# Determine file extension
ext = ""
original_filename = doc.file_name or ""
if original_filename:
_, ext = os.path.splitext(original_filename)
ext = ext.lower()
# If no extension from filename, reverse-lookup from MIME type
if not ext and doc.mime_type:
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
ext = mime_to_ext.get(doc.mime_type, "")
# Check if supported
if ext not in SUPPORTED_DOCUMENT_TYPES:
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
event.text = (
f"Unsupported document type '{ext or 'unknown'}'. "
f"Supported types: {supported_list}"
)
print(f"[Telegram] Unsupported document type: {ext or 'unknown'}", flush=True)
await self.handle_message(event)
return
# Check file size (Telegram Bot API limit: 20 MB)
MAX_DOC_BYTES = 20 * 1024 * 1024
if not doc.file_size or doc.file_size > MAX_DOC_BYTES:
event.text = (
"The document is too large or its size could not be verified. "
"Maximum: 20 MB."
)
print(f"[Telegram] Document too large: {doc.file_size} bytes", flush=True)
await self.handle_message(event)
return
# Download and cache
file_obj = await doc.get_file()
doc_bytes = await file_obj.download_as_bytearray()
raw_bytes = bytes(doc_bytes)
cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext}")
mime_type = SUPPORTED_DOCUMENT_TYPES[ext]
event.media_urls = [cached_path]
event.media_types = [mime_type]
print(f"[Telegram] Cached user document: {cached_path}", flush=True)
# For text files, inject content into event.text (capped at 100 KB)
MAX_TEXT_INJECT_BYTES = 100 * 1024
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = original_filename or f"document{ext}"
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
injection = f"[Content of {display_name}]:\n{text_content}"
if event.text:
event.text = f"{injection}\n\n{event.text}"
else:
event.text = injection
except UnicodeDecodeError:
print(f"[Telegram] Could not decode text file as UTF-8, skipping content injection", flush=True)
except Exception as e:
print(f"[Telegram] Failed to cache document: {e}", flush=True)
await self.handle_message(event)
async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None:
+36 -314
View File
@@ -17,54 +17,12 @@ with different backends via a bridge pattern.
import asyncio
import json
import logging
import os
import platform
import subprocess
_IS_WINDOWS = platform.system() == "Windows"
from pathlib import Path
from typing import Dict, List, Optional, Any
logger = logging.getLogger(__name__)
def _kill_port_process(port: int) -> None:
"""Kill any process listening on the given TCP port."""
try:
if _IS_WINDOWS:
# Use netstat to find the PID bound to this port, then taskkill
result = subprocess.run(
["netstat", "-ano", "-p", "TCP"],
capture_output=True, text=True, timeout=5,
)
for line in result.stdout.splitlines():
parts = line.split()
if len(parts) >= 5 and parts[3] == "LISTENING":
local_addr = parts[1]
if local_addr.endswith(f":{port}"):
try:
subprocess.run(
["taskkill", "/PID", parts[4], "/F"],
capture_output=True, timeout=5,
)
except subprocess.SubprocessError:
pass
else:
result = subprocess.run(
["fuser", f"{port}/tcp"],
capture_output=True, timeout=5,
)
if result.returncode == 0:
subprocess.run(
["fuser", "-k", f"{port}/tcp"],
capture_output=True, timeout=5,
)
except Exception:
pass
import sys
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
@@ -119,24 +77,16 @@ class WhatsAppAdapter(BasePlatformAdapter):
# WhatsApp message limits
MAX_MESSAGE_LENGTH = 65536 # WhatsApp allows longer messages
# Default bridge location relative to the hermes-agent install
_DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge"
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.WHATSAPP)
self._bridge_process: Optional[subprocess.Popen] = None
self._bridge_port: int = config.extra.get("bridge_port", 3000)
self._bridge_script: Optional[str] = config.extra.get(
"bridge_script",
str(self._DEFAULT_BRIDGE_DIR / "bridge.js"),
)
self._bridge_script: Optional[str] = config.extra.get("bridge_script")
self._session_path: Path = Path(config.extra.get(
"session_path",
Path.home() / ".hermes" / "whatsapp" / "session"
))
self._message_queue: asyncio.Queue = asyncio.Queue()
self._bridge_log_fh = None
self._bridge_log: Optional[Path] = None
async def connect(self) -> bool:
"""
@@ -145,182 +95,72 @@ class WhatsAppAdapter(BasePlatformAdapter):
This launches the Node.js bridge process and waits for it to be ready.
"""
if not check_whatsapp_requirements():
logger.warning("[%s] Node.js not found. WhatsApp requires Node.js.", self.name)
print(f"[{self.name}] Node.js not found. WhatsApp requires Node.js.")
return False
if not self._bridge_script:
print(f"[{self.name}] No bridge script configured.")
print(f"[{self.name}] Set 'bridge_script' in whatsapp.extra config.")
print(f"[{self.name}] See docs/messaging.md for WhatsApp setup instructions.")
return False
bridge_path = Path(self._bridge_script)
if not bridge_path.exists():
logger.warning("[%s] Bridge script not found: %s", self.name, bridge_path)
print(f"[{self.name}] Bridge script not found: {bridge_path}")
return False
logger.info("[%s] Bridge found at %s", self.name, bridge_path)
# Auto-install npm dependencies if node_modules doesn't exist
bridge_dir = bridge_path.parent
if not (bridge_dir / "node_modules").exists():
print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
try:
install_result = subprocess.run(
["npm", "install", "--silent"],
cwd=str(bridge_dir),
capture_output=True,
text=True,
timeout=60,
)
if install_result.returncode != 0:
print(f"[{self.name}] npm install failed: {install_result.stderr}")
return False
print(f"[{self.name}] Dependencies installed")
except Exception as e:
print(f"[{self.name}] Failed to install dependencies: {e}")
return False
try:
# Ensure session directory exists
self._session_path.mkdir(parents=True, exist_ok=True)
# Kill any orphaned bridge from a previous gateway run
_kill_port_process(self._bridge_port)
import time
time.sleep(1)
# Start the bridge process in its own process group.
# Route output to a log file so QR codes, errors, and reconnection
# messages are preserved for troubleshooting.
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
self._bridge_log = self._session_path.parent / "bridge.log"
bridge_log_fh = open(self._bridge_log, "a")
self._bridge_log_fh = bridge_log_fh
# Start the bridge process
self._bridge_process = subprocess.Popen(
[
"node",
str(bridge_path),
"--port", str(self._bridge_port),
"--session", str(self._session_path),
"--mode", whatsapp_mode,
],
stdout=bridge_log_fh,
stderr=bridge_log_fh,
preexec_fn=None if _IS_WINDOWS else os.setsid,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
# Wait for the bridge to connect to WhatsApp.
# Phase 1: wait for the HTTP server to come up (up to 15s).
# Phase 2: wait for WhatsApp status: connected (up to 15s more).
import aiohttp
http_ready = False
data = {}
for attempt in range(15):
await asyncio.sleep(1)
if self._bridge_process.poll() is not None:
print(f"[{self.name}] Bridge process died (exit code {self._bridge_process.returncode})")
print(f"[{self.name}] Check log: {self._bridge_log}")
self._close_bridge_log()
return False
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://localhost:{self._bridge_port}/health",
timeout=aiohttp.ClientTimeout(total=2)
) as resp:
if resp.status == 200:
http_ready = True
data = await resp.json()
if data.get("status") == "connected":
print(f"[{self.name}] Bridge ready (status: connected)")
break
except Exception:
continue
if not http_ready:
print(f"[{self.name}] Bridge HTTP server did not start in 15s")
print(f"[{self.name}] Check log: {self._bridge_log}")
self._close_bridge_log()
return False
# Wait for bridge to be ready (look for ready signal)
# This is a simplified version - real implementation would
# wait for an HTTP health check or specific stdout message
await asyncio.sleep(5)
# Phase 2: HTTP is up but WhatsApp may still be connecting.
# Give it more time to authenticate with saved credentials.
if data.get("status") != "connected":
print(f"[{self.name}] Bridge HTTP ready, waiting for WhatsApp connection...")
for attempt in range(15):
await asyncio.sleep(1)
if self._bridge_process.poll() is not None:
print(f"[{self.name}] Bridge process died during connection")
print(f"[{self.name}] Check log: {self._bridge_log}")
self._close_bridge_log()
return False
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://localhost:{self._bridge_port}/health",
timeout=aiohttp.ClientTimeout(total=2)
) as resp:
if resp.status == 200:
data = await resp.json()
if data.get("status") == "connected":
print(f"[{self.name}] Bridge ready (status: connected)")
break
except Exception:
continue
else:
# Still not connected — warn but proceed (bridge may
# auto-reconnect later, e.g. after a code 515 restart).
print(f"[{self.name}] ⚠ WhatsApp not connected after 30s")
print(f"[{self.name}] Bridge log: {self._bridge_log}")
print(f"[{self.name}] If session expired, re-pair: hermes whatsapp")
if self._bridge_process.poll() is not None:
stderr = self._bridge_process.stderr.read() if self._bridge_process.stderr else ""
print(f"[{self.name}] Bridge process died: {stderr}")
return False
# Start message polling task
asyncio.create_task(self._poll_messages())
self._running = True
print(f"[{self.name}] Bridge started on port {self._bridge_port}")
print(f"[{self.name}] Scan QR code if prompted (check bridge output)")
return True
except Exception as e:
logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True)
self._close_bridge_log()
print(f"[{self.name}] Failed to start bridge: {e}")
return False
def _close_bridge_log(self) -> None:
"""Close the bridge log file handle if open."""
if self._bridge_log_fh:
try:
self._bridge_log_fh.close()
except Exception:
pass
self._bridge_log_fh = None
async def disconnect(self) -> None:
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
"""Stop the WhatsApp bridge."""
if self._bridge_process:
try:
# Kill the entire process group so child node processes die too
import signal
try:
if _IS_WINDOWS:
self._bridge_process.terminate()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
except (ProcessLookupError, PermissionError):
self._bridge_process.terminate()
self._bridge_process.terminate()
await asyncio.sleep(1)
if self._bridge_process.poll() is None:
try:
if _IS_WINDOWS:
self._bridge_process.kill()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
except (ProcessLookupError, PermissionError):
self._bridge_process.kill()
self._bridge_process.kill()
except Exception as e:
print(f"[{self.name}] Error stopping bridge: {e}")
# Also kill any orphaned bridge processes on our port
_kill_port_process(self._bridge_port)
self._running = False
self._bridge_process = None
self._close_bridge_log()
print(f"[{self.name}] Disconnected")
async def send(
@@ -368,131 +208,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
)
except Exception as e:
return SendResult(success=False, error=str(e))
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
) -> SendResult:
"""Edit a previously sent message via the WhatsApp bridge."""
if not self._running:
return SendResult(success=False, error="Not connected")
try:
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.post(
f"http://localhost:{self._bridge_port}/edit",
json={
"chatId": chat_id,
"messageId": message_id,
"message": content,
},
timeout=aiohttp.ClientTimeout(total=15)
) as resp:
if resp.status == 200:
return SendResult(success=True, message_id=message_id)
else:
error = await resp.text()
return SendResult(success=False, error=error)
except Exception as e:
return SendResult(success=False, error=str(e))
async def _send_media_to_bridge(
self,
chat_id: str,
file_path: str,
media_type: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
) -> SendResult:
"""Send any media file via bridge /send-media endpoint."""
if not self._running:
return SendResult(success=False, error="Not connected")
try:
import aiohttp
if not os.path.exists(file_path):
return SendResult(success=False, error=f"File not found: {file_path}")
payload: Dict[str, Any] = {
"chatId": chat_id,
"filePath": file_path,
"mediaType": media_type,
}
if caption:
payload["caption"] = caption
if file_name:
payload["fileName"] = file_name
async with aiohttp.ClientSession() as session:
async with session.post(
f"http://localhost:{self._bridge_port}/send-media",
json=payload,
timeout=aiohttp.ClientTimeout(total=120),
) as resp:
if resp.status == 200:
data = await resp.json()
return SendResult(
success=True,
message_id=data.get("messageId"),
raw_response=data,
)
else:
error = await resp.text()
return SendResult(success=False, error=error)
except Exception as e:
return SendResult(success=False, error=str(e))
async def send_image(
self,
chat_id: str,
image_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Download image URL to cache, send natively via bridge."""
try:
local_path = await cache_image_from_url(image_url)
return await self._send_media_to_bridge(chat_id, local_path, "image", caption)
except Exception:
return await super().send_image(chat_id, image_url, caption, reply_to)
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Send a local image file natively via bridge."""
return await self._send_media_to_bridge(chat_id, image_path, "image", caption)
async def send_video(
self,
chat_id: str,
video_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Send a video natively via bridge — plays inline in WhatsApp."""
return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Send a document/file as a downloadable attachment via bridge."""
return await self._send_media_to_bridge(
chat_id, file_path, "document", caption,
file_name or os.path.basename(file_path),
)
async def send_typing(self, chat_id: str) -> None:
"""Send typing indicator via bridge."""
if not self._running:
@@ -530,8 +246,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
"type": "group" if data.get("isGroup") else "dm",
"participants": data.get("participants", []),
}
except Exception as e:
logger.debug("Could not get WhatsApp chat info for %s: %s", chat_id, e)
except Exception:
pass
return {"name": chat_id, "type": "dm"}
@@ -636,3 +352,9 @@ class WhatsAppAdapter(BasePlatformAdapter):
print(f"[{self.name}] Error building event: {e}")
return None
# Note: A reference Node.js bridge script would be provided in scripts/whatsapp-bridge/
# It would use whatsapp-web.js or Baileys to:
# 1. Handle WhatsApp Web authentication (QR code)
# 2. Listen for incoming messages
# 3. Expose HTTP endpoints for send/receive/status
+115 -1491
View File
File diff suppressed because it is too large Load Diff
+35 -185
View File
@@ -8,7 +8,6 @@ Handles:
- Dynamic system prompt injection (agent knows its context)
"""
import logging
import os
import json
import uuid
@@ -17,8 +16,6 @@ from datetime import datetime, timedelta
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
logger = logging.getLogger(__name__)
from .config import (
Platform,
GatewayConfig,
@@ -44,7 +41,6 @@ class SessionSource:
user_id: Optional[str] = None
user_name: Optional[str] = None
thread_id: Optional[str] = None # For forum topics, Discord threads, etc.
chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack)
@property
def description(self) -> str:
@@ -76,7 +72,6 @@ class SessionSource:
"user_id": self.user_id,
"user_name": self.user_name,
"thread_id": self.thread_id,
"chat_topic": self.chat_topic,
}
@classmethod
@@ -89,7 +84,6 @@ class SessionSource:
user_id=data.get("user_id"),
user_name=data.get("user_name"),
thread_id=data.get("thread_id"),
chat_topic=data.get("chat_topic"),
)
@classmethod
@@ -158,16 +152,6 @@ def build_session_context_prompt(context: SessionContext) -> str:
else:
lines.append(f"**Source:** {platform_name} ({context.source.description})")
# Channel topic (if available - provides context about the channel's purpose)
if context.source.chat_topic:
lines.append(f"**Channel Topic:** {context.source.chat_topic}")
# User identity (especially useful for WhatsApp where multiple people DM)
if context.source.user_name:
lines.append(f"**User:** {context.source.user_name}")
elif context.source.user_id:
lines.append(f"**User ID:** {context.source.user_id}")
# Connected platforms
platforms_list = ["local (files on this machine)"]
for p in context.connected_platforms:
@@ -232,10 +216,6 @@ class SessionEntry:
output_tokens: int = 0
total_tokens: int = 0
# Set when a session was created because the previous one expired;
# consumed once by the message handler to inject a notice into context
was_auto_reset: bool = False
def to_dict(self) -> Dict[str, Any]:
result = {
"session_key": self.session_key,
@@ -281,48 +261,27 @@ class SessionEntry:
)
def build_session_key(source: SessionSource) -> str:
"""Build a deterministic session key from a message source.
This is the single source of truth for session key construction.
WhatsApp DMs include chat_id (multi-user), other DMs do not (single owner).
"""
platform = source.platform.value
if source.chat_type == "dm":
if platform == "whatsapp" and source.chat_id:
return f"agent:main:{platform}:dm:{source.chat_id}"
return f"agent:main:{platform}:dm"
return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
class SessionStore:
"""
Manages session storage and retrieval.
Uses SQLite (via SessionDB) for session metadata and message transcripts.
Falls back to legacy JSONL files if SQLite is unavailable.
Sessions are stored in:
- sessions.json: Index mapping session keys to session IDs
- {session_id}.jsonl: Conversation transcripts
"""
def __init__(self, sessions_dir: Path, config: GatewayConfig,
has_active_processes_fn=None,
on_auto_reset=None):
has_active_processes_fn=None):
self.sessions_dir = sessions_dir
self.config = config
self._entries: Dict[str, SessionEntry] = {}
self._loaded = False
# Optional callback to check if a session has active background processes.
# When set, sessions with running processes are exempt from reset.
self._has_active_processes_fn = has_active_processes_fn
self._on_auto_reset = on_auto_reset # callback(old_entry) before auto-reset
# Initialize SQLite session database
self._db = None
try:
from hermes_state import SessionDB
self._db = SessionDB()
except Exception as e:
print(f"[gateway] Warning: SQLite session store unavailable, falling back to JSONL: {e}")
def _ensure_loaded(self) -> None:
"""Load sessions index from disk if not already loaded."""
"""Load sessions from disk if not already loaded."""
if self._loaded:
return
@@ -341,7 +300,7 @@ class SessionStore:
self._loaded = True
def _save(self) -> None:
"""Save sessions index to disk (kept for session key -> ID mapping)."""
"""Save sessions index to disk."""
self.sessions_dir.mkdir(parents=True, exist_ok=True)
sessions_file = self.sessions_dir / "sessions.json"
@@ -351,14 +310,23 @@ class SessionStore:
def _generate_session_key(self, source: SessionSource) -> str:
"""Generate a session key from a source."""
return build_session_key(source)
platform = source.platform.value
if source.chat_type == "dm":
# DMs share the main session per platform
return f"agent:main:{platform}:dm"
else:
# Groups/channels get their own keys
return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
def _should_reset(self, entry: SessionEntry, source: SessionSource) -> bool:
"""
Check if a session should be reset based on policy.
Returns True if the session is stale and should start fresh.
Sessions with active background processes are never reset.
"""
# Don't reset sessions that have active background processes
if self._has_active_processes_fn:
session_key = self._generate_session_key(source)
if self._has_active_processes_fn(session_key):
@@ -369,17 +337,17 @@ class SessionStore:
session_type=source.chat_type
)
if policy.mode == "none":
return False
now = datetime.now()
# Check idle timeout
if policy.mode in ("idle", "both"):
idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
if now > idle_deadline:
return True
# Check daily reset
if policy.mode in ("daily", "both"):
# Find the most recent reset boundary
today_reset = now.replace(
hour=policy.at_hour,
minute=0,
@@ -387,6 +355,7 @@ class SessionStore:
microsecond=0
)
if now.hour < policy.at_hour:
# Reset boundary was yesterday
today_reset -= timedelta(days=1)
if entry.updated_at < today_reset:
@@ -394,27 +363,6 @@ class SessionStore:
return False
def has_any_sessions(self) -> bool:
"""Check if any sessions have ever been created (across all platforms).
Uses the SQLite database as the source of truth because it preserves
historical session records (ended sessions still count). The in-memory
``_entries`` dict replaces entries on reset, so ``len(_entries)`` would
stay at 1 for single-platform users which is the bug this fixes.
The current session is already in the DB by the time this is called
(get_or_create_session runs first), so we check ``> 1``.
"""
if self._db:
try:
return self._db.session_count() > 1
except Exception:
pass # fall through to heuristic
# Fallback: check if sessions.json was loaded with existing data.
# This covers the rare case where the DB is unavailable.
self._ensure_loaded()
return len(self._entries) > 1
def get_or_create_session(
self,
source: SessionSource,
@@ -424,35 +372,22 @@ class SessionStore:
Get an existing session or create a new one.
Evaluates reset policy to determine if the existing session is stale.
Creates a session record in SQLite when a new session starts.
"""
self._ensure_loaded()
session_key = self._generate_session_key(source)
now = datetime.now()
# Check for existing session
if session_key in self._entries and not force_new:
entry = self._entries[session_key]
# Check if session should be reset
if not self._should_reset(entry, source):
# Update timestamp and return existing
entry.updated_at = now
self._save()
return entry
else:
# Session is being auto-reset — flush memories before destroying
was_auto_reset = True
if self._on_auto_reset:
try:
self._on_auto_reset(entry)
except Exception as e:
logger.debug("Auto-reset callback failed: %s", e)
if self._db:
try:
self._db.end_session(entry.session_id, "session_reset")
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
else:
was_auto_reset = False
# Create new session
session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
@@ -466,23 +401,11 @@ class SessionStore:
display_name=source.chat_name,
platform=source.platform,
chat_type=source.chat_type,
was_auto_reset=was_auto_reset,
)
self._entries[session_key] = entry
self._save()
# Create session in SQLite
if self._db:
try:
self._db.create_session(
session_id=session_id,
source=source.platform.value,
user_id=source.user_id,
)
except Exception as e:
print(f"[gateway] Warning: Failed to create SQLite session: {e}")
return entry
def update_session(
@@ -501,14 +424,6 @@ class SessionStore:
entry.output_tokens += output_tokens
entry.total_tokens = entry.input_tokens + entry.output_tokens
self._save()
if self._db:
try:
self._db.update_token_counts(
entry.session_id, input_tokens, output_tokens
)
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
def reset_session(self, session_key: str) -> Optional[SessionEntry]:
"""Force reset a session, creating a new session ID."""
@@ -518,14 +433,6 @@ class SessionStore:
return None
old_entry = self._entries[session_key]
# End old session in SQLite
if self._db:
try:
self._db.end_session(old_entry.session_id, "session_reset")
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
now = datetime.now()
session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
@@ -543,21 +450,15 @@ class SessionStore:
self._entries[session_key] = new_entry
self._save()
# Create new session in SQLite
if self._db:
try:
self._db.create_session(
session_id=session_id,
source=old_entry.platform.value if old_entry.platform else "unknown",
user_id=old_entry.origin.user_id if old_entry.origin else None,
)
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
return new_entry
def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
"""List all sessions, optionally filtered by activity."""
"""
List all sessions, optionally filtered by activity.
Args:
active_minutes: If provided, only return sessions updated within this many minutes
"""
self._ensure_loaded()
entries = list(self._entries.values())
@@ -566,75 +467,24 @@ class SessionStore:
cutoff = datetime.now() - timedelta(minutes=active_minutes)
entries = [e for e in entries if e.updated_at >= cutoff]
# Sort by most recently updated
entries.sort(key=lambda e: e.updated_at, reverse=True)
return entries
def get_transcript_path(self, session_id: str) -> Path:
"""Get the path to a session's legacy transcript file."""
"""Get the path to a session's transcript file."""
return self.sessions_dir / f"{session_id}.jsonl"
def append_to_transcript(self, session_id: str, message: Dict[str, Any]) -> None:
"""Append a message to a session's transcript (SQLite + legacy JSONL)."""
# Write to SQLite
if self._db:
try:
self._db.append_message(
session_id=session_id,
role=message.get("role", "unknown"),
content=message.get("content"),
tool_name=message.get("tool_name"),
tool_calls=message.get("tool_calls"),
tool_call_id=message.get("tool_call_id"),
)
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
# Also write legacy JSONL (keeps existing tooling working during transition)
"""Append a message to a session's transcript."""
transcript_path = self.get_transcript_path(session_id)
with open(transcript_path, "a") as f:
f.write(json.dumps(message, ensure_ascii=False) + "\n")
def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
"""Replace the entire transcript for a session with new messages.
Used by /retry, /undo, and /compress to persist modified conversation history.
Rewrites both SQLite and legacy JSONL storage.
"""
# SQLite: clear old messages and re-insert
if self._db:
try:
self._db.clear_messages(session_id)
for msg in messages:
self._db.append_message(
session_id=session_id,
role=msg.get("role", "unknown"),
content=msg.get("content"),
tool_name=msg.get("tool_name"),
tool_calls=msg.get("tool_calls"),
tool_call_id=msg.get("tool_call_id"),
)
except Exception as e:
logger.debug("Failed to rewrite transcript in DB: %s", e)
# JSONL: overwrite the file
transcript_path = self.get_transcript_path(session_id)
with open(transcript_path, "w") as f:
for msg in messages:
f.write(json.dumps(msg, ensure_ascii=False) + "\n")
def load_transcript(self, session_id: str) -> List[Dict[str, Any]]:
"""Load all messages from a session's transcript."""
# Try SQLite first
if self._db:
try:
messages = self._db.get_messages_as_conversation(session_id)
if messages:
return messages
except Exception as e:
logger.debug("Could not load messages from DB: %s", e)
# Fall back to legacy JSONL
transcript_path = self.get_transcript_path(session_id)
if not transcript_path.exists():
-61
View File
@@ -1,61 +0,0 @@
"""
Gateway runtime status helpers.
Provides PID-file based detection of whether the gateway daemon is running,
used by send_message's check_fn to gate availability in the CLI.
The PID file lives at ``{HERMES_HOME}/gateway.pid``. HERMES_HOME defaults to
``~/.hermes`` but can be overridden via the environment variable. This means
separate HERMES_HOME directories naturally get separate PID files a property
that will be useful when we add named profiles (multiple agents running
concurrently under distinct configurations).
"""
import os
from pathlib import Path
from typing import Optional
def _get_pid_path() -> Path:
"""Return the path to the gateway PID file, respecting HERMES_HOME."""
home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
return home / "gateway.pid"
def write_pid_file() -> None:
"""Write the current process PID to the gateway PID file."""
pid_path = _get_pid_path()
pid_path.parent.mkdir(parents=True, exist_ok=True)
pid_path.write_text(str(os.getpid()))
def remove_pid_file() -> None:
"""Remove the gateway PID file if it exists."""
try:
_get_pid_path().unlink(missing_ok=True)
except Exception:
pass
def get_running_pid() -> Optional[int]:
"""Return the PID of a running gateway instance, or ``None``.
Checks the PID file and verifies the process is actually alive.
Cleans up stale PID files automatically.
"""
pid_path = _get_pid_path()
if not pid_path.exists():
return None
try:
pid = int(pid_path.read_text().strip())
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
return pid
except (ValueError, ProcessLookupError, PermissionError):
# Stale PID file — process is gone
remove_pid_file()
return None
def is_gateway_running() -> bool:
"""Check if the gateway daemon is currently running."""
return get_running_pid() is not None
+1 -1
View File
@@ -11,4 +11,4 @@ Provides subcommands for:
- hermes cron - Manage cron jobs
"""
__version__ = "v1.0.0"
__version__ = "0.1.0"
-1817
View File
File diff suppressed because it is too large Load Diff
-275
View File
@@ -1,275 +0,0 @@
"""Welcome banner, ASCII art, and skills summary for the CLI.
Pure display functions with no HermesCLI state dependency.
"""
from pathlib import Path
from typing import Dict, List, Any
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from prompt_toolkit import print_formatted_text as _pt_print
from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
# =========================================================================
# ANSI building blocks for conversation display
# =========================================================================
_GOLD = "\033[1;33m"
_BOLD = "\033[1m"
_DIM = "\033[2m"
_RST = "\033[0m"
def cprint(text: str):
"""Print ANSI-colored text through prompt_toolkit's renderer."""
_pt_print(_PT_ANSI(text))
# =========================================================================
# ASCII Art & Branding
# =========================================================================
from hermes_cli import __version__ as VERSION
HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/]
[bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/]
[#FFBF00]███████║█████╗ ██████╔╝██╔████╔██║█████╗ ███████╗█████╗███████║██║ ███╗█████╗ ██╔██╗ ██║ ██║[/]
[#FFBF00]██╔══██║██╔══╝ ██╔══██╗██║╚██╔╝██║██╔══╝ ╚════██║╚════╝██╔══██║██║ ██║██╔══╝ ██║╚██╗██║ ██║[/]
[#CD7F32]██║ ██║███████╗██║ ██║██║ ╚═╝ ██║███████╗███████║ ██║ ██║╚██████╔╝███████╗██║ ╚████║ ██║[/]
[#CD7F32]╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝ ╚═╝ ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═══╝ ╚═╝[/]"""
HERMES_CADUCEUS = """[#CD7F32]⠀⢀⣀⡀⠀⣀⣀⠀⢀⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#CD7F32]⠀⠀⠀⠀⠀⠀⢀⣠⣴⣾⣿⣿⣇⠸⣿⣿⠇⣸⣿⣿⣷⣦⣄⡀⠀⠀⠀⠀⠀⠀[/]
[#FFBF00]⠀⢀⣠⣴⣶⠿⠋⣩⡿⣿⡿⠻⣿⡇⢠⡄⢸⣿⠟⢿⣿⢿⣍⠙⠿⣶⣦⣄⡀⠀[/]
[#FFBF00]⠀⠀⠉⠉⠁⠶⠟⠋⠀⠉⠀⢀⣈⣁⡈⢁⣈⣁⡀⠀⠉⠀⠙⠻⠶⠈⠉⠉⠀⠀[/]
[#FFD700]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣴⣿⡿⠛⢁⡈⠛⢿⣿⣦⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#FFD700]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠿⣿⣦⣤⣈⠁⢠⣴⣿⠿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#FFBF00]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠉⠻⢿⣿⣦⡉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#FFBF00]⠀⠘⢷⣦⣈⠛⠃⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#CD7F32]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢠⣴⠦⠈⠙⠿⣦⡄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#CD7F32]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿⣤⡈⠁⢤⣿⠇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#B8860B]⠀⠉⠛⠷⠄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#B8860B]⠀⢀⣀⠑⢶⣄⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#B8860B]⠀⣿⠁⢰⡆⠈⡿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#B8860B]⠀⠈⠳⠈⣡⠞⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
[#B8860B]⠀⠈⠀[/]"""
COMPACT_BANNER = """
[bold #FFD700]╔══════════════════════════════════════════════════════════════╗[/]
[bold #FFD700]║[/] [#FFBF00]⚕ NOUS HERMES[/] [dim #B8860B]- AI Agent Framework[/] [bold #FFD700]║[/]
[bold #FFD700]║[/] [#CD7F32]Messenger of the Digital Gods[/] [dim #B8860B]Nous Research[/] [bold #FFD700]║[/]
[bold #FFD700]╚══════════════════════════════════════════════════════════════╝[/]
"""
# =========================================================================
# Skills scanning
# =========================================================================
def get_available_skills() -> Dict[str, List[str]]:
"""Scan ~/.hermes/skills/ and return skills grouped by category."""
import os
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
skills_dir = hermes_home / "skills"
skills_by_category = {}
if not skills_dir.exists():
return skills_by_category
for skill_file in skills_dir.rglob("SKILL.md"):
rel_path = skill_file.relative_to(skills_dir)
parts = rel_path.parts
if len(parts) >= 2:
category = parts[0]
skill_name = parts[-2]
else:
category = "general"
skill_name = skill_file.parent.name
skills_by_category.setdefault(category, []).append(skill_name)
return skills_by_category
# =========================================================================
# Welcome banner
# =========================================================================
def _format_context_length(tokens: int) -> str:
"""Format a token count for display (e.g. 128000 → '128K', 1048576 → '1M')."""
if tokens >= 1_000_000:
val = tokens / 1_000_000
return f"{val:g}M"
elif tokens >= 1_000:
val = tokens / 1_000
return f"{val:g}K"
return str(tokens)
def build_welcome_banner(console: Console, model: str, cwd: str,
tools: List[dict] = None,
enabled_toolsets: List[str] = None,
session_id: str = None,
get_toolset_for_tool=None,
context_length: int = None):
"""Build and print a welcome banner with caduceus on left and info on right.
Args:
console: Rich Console instance.
model: Current model name.
cwd: Current working directory.
tools: List of tool definitions.
enabled_toolsets: List of enabled toolset names.
session_id: Session identifier.
get_toolset_for_tool: Callable to map tool name -> toolset name.
context_length: Model's context window size in tokens.
"""
from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
if get_toolset_for_tool is None:
from model_tools import get_toolset_for_tool
tools = tools or []
enabled_toolsets = enabled_toolsets or []
_, unavailable_toolsets = check_tool_availability(quiet=True)
disabled_tools = set()
for item in unavailable_toolsets:
disabled_tools.update(item.get("tools", []))
layout_table = Table.grid(padding=(0, 2))
layout_table.add_column("left", justify="center")
layout_table.add_column("right", justify="left")
left_lines = ["", HERMES_CADUCEUS, ""]
model_short = model.split("/")[-1] if "/" in model else model
if len(model_short) > 28:
model_short = model_short[:25] + "..."
ctx_str = f" [dim #B8860B]·[/] [dim #B8860B]{_format_context_length(context_length)} context[/]" if context_length else ""
left_lines.append(f"[#FFBF00]{model_short}[/]{ctx_str} [dim #B8860B]·[/] [dim #B8860B]Nous Research[/]")
left_lines.append(f"[dim #B8860B]{cwd}[/]")
if session_id:
left_lines.append(f"[dim #8B8682]Session: {session_id}[/]")
left_content = "\n".join(left_lines)
right_lines = ["[bold #FFBF00]Available Tools[/]"]
toolsets_dict: Dict[str, list] = {}
for tool in tools:
tool_name = tool["function"]["name"]
toolset = get_toolset_for_tool(tool_name) or "other"
toolsets_dict.setdefault(toolset, []).append(tool_name)
for item in unavailable_toolsets:
toolset_id = item.get("id", item.get("name", "unknown"))
display_name = f"{toolset_id}_tools" if not toolset_id.endswith("_tools") else toolset_id
if display_name not in toolsets_dict:
toolsets_dict[display_name] = []
for tool_name in item.get("tools", []):
if tool_name not in toolsets_dict[display_name]:
toolsets_dict[display_name].append(tool_name)
sorted_toolsets = sorted(toolsets_dict.keys())
display_toolsets = sorted_toolsets[:8]
remaining_toolsets = len(sorted_toolsets) - 8
for toolset in display_toolsets:
tool_names = toolsets_dict[toolset]
colored_names = []
for name in sorted(tool_names):
if name in disabled_tools:
colored_names.append(f"[red]{name}[/]")
else:
colored_names.append(f"[#FFF8DC]{name}[/]")
tools_str = ", ".join(colored_names)
if len(", ".join(sorted(tool_names))) > 45:
short_names = []
length = 0
for name in sorted(tool_names):
if length + len(name) + 2 > 42:
short_names.append("...")
break
short_names.append(name)
length += len(name) + 2
colored_names = []
for name in short_names:
if name == "...":
colored_names.append("[dim]...[/]")
elif name in disabled_tools:
colored_names.append(f"[red]{name}[/]")
else:
colored_names.append(f"[#FFF8DC]{name}[/]")
tools_str = ", ".join(colored_names)
right_lines.append(f"[dim #B8860B]{toolset}:[/] {tools_str}")
if remaining_toolsets > 0:
right_lines.append(f"[dim #B8860B](and {remaining_toolsets} more toolsets...)[/]")
# MCP Servers section (only if configured)
try:
from tools.mcp_tool import get_mcp_status
mcp_status = get_mcp_status()
except Exception:
mcp_status = []
if mcp_status:
right_lines.append("")
right_lines.append("[bold #FFBF00]MCP Servers[/]")
for srv in mcp_status:
if srv["connected"]:
right_lines.append(
f"[dim #B8860B]{srv['name']}[/] [#FFF8DC]({srv['transport']})[/] "
f"[dim #B8860B]—[/] [#FFF8DC]{srv['tools']} tool(s)[/]"
)
else:
right_lines.append(
f"[red]{srv['name']}[/] [dim]({srv['transport']})[/] "
f"[red]— failed[/]"
)
right_lines.append("")
right_lines.append("[bold #FFBF00]Available Skills[/]")
skills_by_category = get_available_skills()
total_skills = sum(len(s) for s in skills_by_category.values())
if skills_by_category:
for category in sorted(skills_by_category.keys()):
skill_names = sorted(skills_by_category[category])
if len(skill_names) > 8:
display_names = skill_names[:8]
skills_str = ", ".join(display_names) + f" +{len(skill_names) - 8} more"
else:
skills_str = ", ".join(skill_names)
if len(skills_str) > 50:
skills_str = skills_str[:47] + "..."
right_lines.append(f"[dim #B8860B]{category}:[/] [#FFF8DC]{skills_str}[/]")
else:
right_lines.append("[dim #B8860B]No skills installed[/]")
right_lines.append("")
mcp_connected = sum(1 for s in mcp_status if s["connected"]) if mcp_status else 0
summary_parts = [f"{len(tools)} tools", f"{total_skills} skills"]
if mcp_connected:
summary_parts.append(f"{mcp_connected} MCP servers")
summary_parts.append("/help for commands")
right_lines.append(f"[dim #B8860B]{' · '.join(summary_parts)}[/]")
right_content = "\n".join(right_lines)
layout_table.add_row(left_content, right_content)
outer_panel = Panel(
layout_table,
title=f"[bold #FFD700]Hermes Agent {VERSION}[/]",
border_style="#CD7F32",
padding=(0, 2),
)
console.print()
console.print(HERMES_AGENT_LOGO)
console.print()
console.print(outer_panel)
-145
View File
@@ -1,145 +0,0 @@
"""Interactive prompt callbacks for terminal_tool integration.
These bridge terminal_tool's interactive prompts (clarify, sudo, approval)
into prompt_toolkit's event loop. Each function takes the HermesCLI instance
as its first argument and uses its state (queues, app reference) to coordinate
with the TUI.
"""
import queue
import time as _time
from hermes_cli.banner import cprint, _DIM, _RST
def clarify_callback(cli, question, choices):
"""Prompt for clarifying question through the TUI.
Sets up the interactive selection UI, then blocks until the user
responds. Returns the user's choice or a timeout message.
"""
from cli import CLI_CONFIG
timeout = CLI_CONFIG.get("clarify", {}).get("timeout", 120)
response_queue = queue.Queue()
is_open_ended = not choices or len(choices) == 0
cli._clarify_state = {
"question": question,
"choices": choices if not is_open_ended else [],
"selected": 0,
"response_queue": response_queue,
}
cli._clarify_deadline = _time.monotonic() + timeout
cli._clarify_freetext = is_open_ended
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
while True:
try:
result = response_queue.get(timeout=1)
cli._clarify_deadline = 0
return result
except queue.Empty:
remaining = cli._clarify_deadline - _time.monotonic()
if remaining <= 0:
break
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
cli._clarify_state = None
cli._clarify_freetext = False
cli._clarify_deadline = 0
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
cprint(f"\n{_DIM}(clarify timed out after {timeout}s — agent will decide){_RST}")
return (
"The user did not provide a response within the time limit. "
"Use your best judgement to make the choice and proceed."
)
def sudo_password_callback(cli) -> str:
"""Prompt for sudo password through the TUI.
Sets up a password input area and blocks until the user responds.
"""
timeout = 45
response_queue = queue.Queue()
cli._sudo_state = {"response_queue": response_queue}
cli._sudo_deadline = _time.monotonic() + timeout
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
while True:
try:
result = response_queue.get(timeout=1)
cli._sudo_state = None
cli._sudo_deadline = 0
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
if result:
cprint(f"\n{_DIM} ✓ Password received (cached for session){_RST}")
else:
cprint(f"\n{_DIM} ⏭ Skipped{_RST}")
return result
except queue.Empty:
remaining = cli._sudo_deadline - _time.monotonic()
if remaining <= 0:
break
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
cli._sudo_state = None
cli._sudo_deadline = 0
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
cprint(f"\n{_DIM} ⏱ Timeout — continuing without sudo{_RST}")
return ""
def approval_callback(cli, command: str, description: str) -> str:
"""Prompt for dangerous command approval through the TUI.
Shows a selection UI with choices: once / session / always / deny.
"""
timeout = 60
response_queue = queue.Queue()
choices = ["once", "session", "always", "deny"]
cli._approval_state = {
"command": command,
"description": description,
"choices": choices,
"selected": 0,
"response_queue": response_queue,
}
cli._approval_deadline = _time.monotonic() + timeout
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
while True:
try:
result = response_queue.get(timeout=1)
cli._approval_state = None
cli._approval_deadline = 0
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
return result
except queue.Empty:
remaining = cli._approval_deadline - _time.monotonic()
if remaining <= 0:
break
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
cli._approval_state = None
cli._approval_deadline = 0
if hasattr(cli, '_app') and cli._app:
cli._app.invalidate()
cprint(f"\n{_DIM} ⏱ Timeout — denying command{_RST}")
return "deny"
-352
View File
@@ -1,352 +0,0 @@
"""Clipboard image extraction for macOS, Linux, and WSL2.
Provides a single function `save_clipboard_image(dest)` that checks the
system clipboard for image data, saves it to *dest* as PNG, and returns
True on success. No external Python dependencies uses only OS-level
CLI tools that ship with the platform (or are commonly installed).
Platform support:
macOS osascript (always available), pngpaste (if installed)
WSL2 powershell.exe via .NET System.Windows.Forms.Clipboard
Linux wl-paste (Wayland), xclip (X11)
"""
import base64
import logging
import os
import subprocess
import sys
from pathlib import Path
logger = logging.getLogger(__name__)
# Cache WSL detection (checked once per process)
_wsl_detected: bool | None = None
def save_clipboard_image(dest: Path) -> bool:
"""Extract an image from the system clipboard and save it as PNG.
Returns True if an image was found and saved, False otherwise.
"""
dest.parent.mkdir(parents=True, exist_ok=True)
if sys.platform == "darwin":
return _macos_save(dest)
return _linux_save(dest)
def has_clipboard_image() -> bool:
"""Quick check: does the clipboard currently contain an image?
Lighter than save_clipboard_image doesn't extract or write anything.
"""
if sys.platform == "darwin":
return _macos_has_image()
if _is_wsl():
return _wsl_has_image()
if os.environ.get("WAYLAND_DISPLAY"):
return _wayland_has_image()
return _xclip_has_image()
# ── macOS ────────────────────────────────────────────────────────────────
def _macos_save(dest: Path) -> bool:
"""Try pngpaste first (fast, handles more formats), fall back to osascript."""
return _macos_pngpaste(dest) or _macos_osascript(dest)
def _macos_has_image() -> bool:
"""Check if macOS clipboard contains image data."""
try:
info = subprocess.run(
["osascript", "-e", "clipboard info"],
capture_output=True, text=True, timeout=3,
)
return "«class PNGf»" in info.stdout or "«class TIFF»" in info.stdout
except Exception:
return False
def _macos_pngpaste(dest: Path) -> bool:
"""Use pngpaste (brew install pngpaste) — fastest, cleanest."""
try:
r = subprocess.run(
["pngpaste", str(dest)],
capture_output=True, timeout=3,
)
if r.returncode == 0 and dest.exists() and dest.stat().st_size > 0:
return True
except FileNotFoundError:
pass # pngpaste not installed
except Exception as e:
logger.debug("pngpaste failed: %s", e)
return False
def _macos_osascript(dest: Path) -> bool:
"""Use osascript to extract PNG data from clipboard (always available)."""
if not _macos_has_image():
return False
# Extract as PNG
script = (
'try\n'
' set imgData to the clipboard as «class PNGf»\n'
f' set f to open for access POSIX file "{dest}" with write permission\n'
' write imgData to f\n'
' close access f\n'
'on error\n'
' return "fail"\n'
'end try\n'
)
try:
r = subprocess.run(
["osascript", "-e", script],
capture_output=True, text=True, timeout=5,
)
if r.returncode == 0 and "fail" not in r.stdout and dest.exists() and dest.stat().st_size > 0:
return True
except Exception as e:
logger.debug("osascript clipboard extract failed: %s", e)
return False
# ── Linux ────────────────────────────────────────────────────────────────
def _is_wsl() -> bool:
"""Detect if running inside WSL (1 or 2)."""
global _wsl_detected
if _wsl_detected is not None:
return _wsl_detected
try:
with open("/proc/version", "r") as f:
_wsl_detected = "microsoft" in f.read().lower()
except Exception:
_wsl_detected = False
return _wsl_detected
def _linux_save(dest: Path) -> bool:
"""Try clipboard backends in priority order: WSL → Wayland → X11."""
if _is_wsl():
if _wsl_save(dest):
return True
# Fall through — WSLg might have wl-paste or xclip working
if os.environ.get("WAYLAND_DISPLAY"):
if _wayland_save(dest):
return True
return _xclip_save(dest)
# ── WSL2 (powershell.exe) ────────────────────────────────────────────────
# PowerShell script: get clipboard image as base64-encoded PNG on stdout.
# Using .NET System.Windows.Forms.Clipboard — always available on Windows.
_PS_CHECK_IMAGE = (
"Add-Type -AssemblyName System.Windows.Forms;"
"[System.Windows.Forms.Clipboard]::ContainsImage()"
)
_PS_EXTRACT_IMAGE = (
"Add-Type -AssemblyName System.Windows.Forms;"
"Add-Type -AssemblyName System.Drawing;"
"$img = [System.Windows.Forms.Clipboard]::GetImage();"
"if ($null -eq $img) { exit 1 }"
"$ms = New-Object System.IO.MemoryStream;"
"$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
"[System.Convert]::ToBase64String($ms.ToArray())"
)
def _wsl_has_image() -> bool:
"""Check if Windows clipboard has an image (via powershell.exe)."""
try:
r = subprocess.run(
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
_PS_CHECK_IMAGE],
capture_output=True, text=True, timeout=8,
)
return r.returncode == 0 and "True" in r.stdout
except FileNotFoundError:
logger.debug("powershell.exe not found — WSL clipboard unavailable")
except Exception as e:
logger.debug("WSL clipboard check failed: %s", e)
return False
def _wsl_save(dest: Path) -> bool:
"""Extract clipboard image via powershell.exe → base64 → decode to PNG."""
try:
r = subprocess.run(
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
_PS_EXTRACT_IMAGE],
capture_output=True, text=True, timeout=15,
)
if r.returncode != 0:
return False
b64_data = r.stdout.strip()
if not b64_data:
return False
png_bytes = base64.b64decode(b64_data)
dest.write_bytes(png_bytes)
return dest.exists() and dest.stat().st_size > 0
except FileNotFoundError:
logger.debug("powershell.exe not found — WSL clipboard unavailable")
except Exception as e:
logger.debug("WSL clipboard extraction failed: %s", e)
dest.unlink(missing_ok=True)
return False
# ── Wayland (wl-paste) ──────────────────────────────────────────────────
def _wayland_has_image() -> bool:
"""Check if Wayland clipboard has image content."""
try:
r = subprocess.run(
["wl-paste", "--list-types"],
capture_output=True, text=True, timeout=3,
)
return r.returncode == 0 and any(
t.startswith("image/") for t in r.stdout.splitlines()
)
except FileNotFoundError:
logger.debug("wl-paste not installed — Wayland clipboard unavailable")
except Exception:
pass
return False
def _wayland_save(dest: Path) -> bool:
"""Use wl-paste to extract clipboard image (Wayland sessions)."""
try:
# Check available MIME types
types_r = subprocess.run(
["wl-paste", "--list-types"],
capture_output=True, text=True, timeout=3,
)
if types_r.returncode != 0:
return False
types = types_r.stdout.splitlines()
# Prefer PNG, fall back to other image formats
mime = None
for preferred in ("image/png", "image/jpeg", "image/bmp",
"image/gif", "image/webp"):
if preferred in types:
mime = preferred
break
if not mime:
return False
# Extract the image data
with open(dest, "wb") as f:
subprocess.run(
["wl-paste", "--type", mime],
stdout=f, stderr=subprocess.DEVNULL, timeout=5, check=True,
)
if not dest.exists() or dest.stat().st_size == 0:
return False
# BMP needs conversion to PNG (common in WSLg where only BMP
# is bridged from Windows clipboard via RDP).
if mime == "image/bmp":
return _convert_to_png(dest)
return True
except FileNotFoundError:
logger.debug("wl-paste not installed — Wayland clipboard unavailable")
except Exception as e:
logger.debug("wl-paste clipboard extraction failed: %s", e)
dest.unlink(missing_ok=True)
return False
def _convert_to_png(path: Path) -> bool:
"""Convert an image file to PNG in-place (requires Pillow or ImageMagick)."""
# Try Pillow first (likely installed in the venv)
try:
from PIL import Image
img = Image.open(path)
img.save(path, "PNG")
return True
except ImportError:
pass
except Exception as e:
logger.debug("Pillow BMP→PNG conversion failed: %s", e)
# Fall back to ImageMagick convert
try:
tmp = path.with_suffix(".bmp")
path.rename(tmp)
r = subprocess.run(
["convert", str(tmp), "png:" + str(path)],
capture_output=True, timeout=5,
)
tmp.unlink(missing_ok=True)
if r.returncode == 0 and path.exists() and path.stat().st_size > 0:
return True
except FileNotFoundError:
logger.debug("ImageMagick not installed — cannot convert BMP to PNG")
except Exception as e:
logger.debug("ImageMagick BMP→PNG conversion failed: %s", e)
# Can't convert — BMP is still usable as-is for most APIs
return path.exists() and path.stat().st_size > 0
# ── X11 (xclip) ─────────────────────────────────────────────────────────
def _xclip_has_image() -> bool:
"""Check if X11 clipboard has image content."""
try:
r = subprocess.run(
["xclip", "-selection", "clipboard", "-t", "TARGETS", "-o"],
capture_output=True, text=True, timeout=3,
)
return r.returncode == 0 and "image/png" in r.stdout
except FileNotFoundError:
pass
except Exception:
pass
return False
def _xclip_save(dest: Path) -> bool:
"""Use xclip to extract clipboard image (X11 sessions)."""
# Check if clipboard has image content
try:
targets = subprocess.run(
["xclip", "-selection", "clipboard", "-t", "TARGETS", "-o"],
capture_output=True, text=True, timeout=3,
)
if "image/png" not in targets.stdout:
return False
except FileNotFoundError:
logger.debug("xclip not installed — X11 clipboard image paste unavailable")
return False
except Exception:
return False
# Extract PNG data
try:
with open(dest, "wb") as f:
subprocess.run(
["xclip", "-selection", "clipboard", "-t", "image/png", "-o"],
stdout=f, stderr=subprocess.DEVNULL, timeout=5, check=True,
)
if dest.exists() and dest.stat().st_size > 0:
return True
except Exception as e:
logger.debug("xclip image extraction failed: %s", e)
dest.unlink(missing_ok=True)
return False
-145
View File
@@ -1,145 +0,0 @@
"""Codex model discovery from API, local cache, and config."""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import List, Optional
import os
logger = logging.getLogger(__name__)
DEFAULT_CODEX_MODELS: List[str] = [
"gpt-5.3-codex",
"gpt-5.2-codex",
"gpt-5.1-codex-max",
"gpt-5.1-codex-mini",
]
def _fetch_models_from_api(access_token: str) -> List[str]:
"""Fetch available models from the Codex API. Returns visible models sorted by priority."""
try:
import httpx
resp = httpx.get(
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
headers={"Authorization": f"Bearer {access_token}"},
timeout=10,
)
if resp.status_code != 200:
return []
data = resp.json()
entries = data.get("models", []) if isinstance(data, dict) else []
except Exception as exc:
logger.debug("Failed to fetch Codex models from API: %s", exc)
return []
sortable = []
for item in entries:
if not isinstance(item, dict):
continue
slug = item.get("slug")
if not isinstance(slug, str) or not slug.strip():
continue
slug = slug.strip()
if item.get("supported_in_api") is False:
continue
visibility = item.get("visibility", "")
if isinstance(visibility, str) and visibility.strip().lower() == "hide":
continue
priority = item.get("priority")
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
sortable.append((rank, slug))
sortable.sort(key=lambda x: (x[0], x[1]))
return [slug for _, slug in sortable]
def _read_default_model(codex_home: Path) -> Optional[str]:
config_path = codex_home / "config.toml"
if not config_path.exists():
return None
try:
import tomllib
except Exception:
return None
try:
payload = tomllib.loads(config_path.read_text(encoding="utf-8"))
except Exception:
return None
model = payload.get("model") if isinstance(payload, dict) else None
if isinstance(model, str) and model.strip():
return model.strip()
return None
def _read_cache_models(codex_home: Path) -> List[str]:
cache_path = codex_home / "models_cache.json"
if not cache_path.exists():
return []
try:
raw = json.loads(cache_path.read_text(encoding="utf-8"))
except Exception:
return []
entries = raw.get("models") if isinstance(raw, dict) else None
sortable = []
if isinstance(entries, list):
for item in entries:
if not isinstance(item, dict):
continue
slug = item.get("slug")
if not isinstance(slug, str) or not slug.strip():
continue
slug = slug.strip()
if "codex" not in slug.lower():
continue
if item.get("supported_in_api") is False:
continue
visibility = item.get("visibility")
if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
continue
priority = item.get("priority")
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
sortable.append((rank, slug))
sortable.sort(key=lambda item: (item[0], item[1]))
deduped: List[str] = []
for _, slug in sortable:
if slug not in deduped:
deduped.append(slug)
return deduped
def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]:
"""Return available Codex model IDs, trying API first, then local sources.
Resolution order: API (live, if token provided) > config.toml default >
local cache > hardcoded defaults.
"""
codex_home_str = os.getenv("CODEX_HOME", "").strip() or str(Path.home() / ".codex")
codex_home = Path(codex_home_str).expanduser()
ordered: List[str] = []
# Try live API if we have a token
if access_token:
api_models = _fetch_models_from_api(access_token)
if api_models:
return api_models
# Fall back to local sources
default_model = _read_default_model(codex_home)
if default_model:
ordered.append(default_model)
for model_id in _read_cache_models(codex_home):
if model_id not in ordered:
ordered.append(model_id)
for model_id in DEFAULT_CODEX_MODELS:
if model_id not in ordered:
ordered.append(model_id)
return ordered
-22
View File
@@ -1,22 +0,0 @@
"""Shared ANSI color utilities for Hermes CLI modules."""
import sys
class Colors:
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
BLUE = "\033[34m"
MAGENTA = "\033[35m"
CYAN = "\033[36m"
def color(text: str, *codes) -> str:
"""Apply color codes to text (only when output is a TTY)."""
if not sys.stdout.isatty():
return text
return "".join(codes) + text + Colors.RESET
-52
View File
@@ -1,52 +0,0 @@
"""Slash command definitions and autocomplete for the Hermes CLI.
Contains the COMMANDS dict and the SlashCommandCompleter class.
These are pure data/UI with no HermesCLI state dependency.
"""
from prompt_toolkit.completion import Completer, Completion
COMMANDS = {
"/help": "Show this help message",
"/tools": "List available tools",
"/toolsets": "List available toolsets",
"/model": "Show or change the current model",
"/prompt": "View/set custom system prompt",
"/personality": "Set a predefined personality",
"/clear": "Clear screen and reset conversation (fresh start)",
"/history": "Show conversation history",
"/new": "Start a new conversation (reset history)",
"/reset": "Reset conversation only (keep screen)",
"/retry": "Retry the last message (resend to agent)",
"/undo": "Remove the last user/assistant exchange",
"/save": "Save the current conversation",
"/config": "Show current configuration",
"/cron": "Manage scheduled tasks (list, add, remove)",
"/skills": "Search, install, inspect, or manage skills from online registries",
"/platforms": "Show gateway/messaging platform status",
"/verbose": "Cycle tool progress display: off → new → all → verbose",
"/compress": "Manually compress conversation context (flush memories + summarize)",
"/usage": "Show token usage for the current session",
"/insights": "Show usage insights and analytics (last 30 days)",
"/quit": "Exit the CLI (also: /exit, /q)",
}
class SlashCommandCompleter(Completer):
"""Autocomplete for /commands in the input area."""
def get_completions(self, document, complete_event):
text = document.text_before_cursor
if not text.startswith("/"):
return
word = text[1:]
for cmd, desc in COMMANDS.items():
cmd_name = cmd[1:]
if cmd_name.startswith(word):
yield Completion(
cmd_name,
start_position=-len(word),
display=cmd,
display_meta=desc,
)
+110 -256
View File
@@ -13,17 +13,29 @@ This module provides:
"""
import os
import platform
import sys
import subprocess
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
_IS_WINDOWS = platform.system() == "Windows"
import yaml
from hermes_cli.colors import Colors, color
# ANSI colors
class Colors:
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
BLUE = "\033[34m"
MAGENTA = "\033[35m"
CYAN = "\033[36m"
def color(text: str, *codes) -> str:
if not sys.stdout.isatty():
return text
return "".join(codes) + text + Colors.RESET
# =============================================================================
@@ -52,7 +64,6 @@ def ensure_hermes_home():
(home / "cron").mkdir(parents=True, exist_ok=True)
(home / "sessions").mkdir(parents=True, exist_ok=True)
(home / "logs").mkdir(parents=True, exist_ok=True)
(home / "memories").mkdir(parents=True, exist_ok=True)
# =============================================================================
@@ -71,12 +82,6 @@ DEFAULT_CONFIG = {
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
"singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
"modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
"daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
# Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
"container_cpu": 1,
"container_memory": 5120, # MB (default 5GB)
"container_disk": 51200, # MB (default 50GB)
"container_persistent": True, # Persist filesystem across sessions
},
"browser": {
@@ -123,86 +128,50 @@ DEFAULT_CONFIG = {
"max_ms": 2500,
},
# Persistent memory -- bounded curated memory injected into system prompt
"memory": {
"memory_enabled": True,
"user_profile_enabled": True,
"memory_char_limit": 2200, # ~800 tokens at 2.75 chars/token
"user_char_limit": 1375, # ~500 tokens at 2.75 chars/token
},
# Ephemeral prefill messages file — JSON list of {role, content} dicts
# injected at the start of every API call for few-shot priming.
# Never saved to sessions, logs, or trajectories.
"prefill_messages_file": "",
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
# This section is only needed for hermes-specific overrides; everything else
# (apiKey, workspace, peerName, sessions, enabled) comes from the global config.
"honcho": {},
# Permanently allowed dangerous command patterns (added via "always" approval)
"command_allowlist": [],
# Config schema version - bump this when adding new required fields
"_config_version": 5,
"_config_version": 2,
}
# =============================================================================
# Config Migration System
# =============================================================================
# Required environment variables with metadata for migration prompts.
# LLM provider is required but handled in the setup wizard's provider
# selection step (Nous Portal / OpenRouter / Custom endpoint), so this
# dict is intentionally empty — no single env var is universally required.
REQUIRED_ENV_VARS = {}
# Required environment variables with metadata for migration prompts
REQUIRED_ENV_VARS = {
"OPENROUTER_API_KEY": {
"description": "OpenRouter API key (required for vision, web scraping, and tools)",
"prompt": "OpenRouter API key",
"url": "https://openrouter.ai/keys",
"required": True,
"password": True,
},
}
# Optional environment variables that enhance functionality
OPTIONAL_ENV_VARS = {
# ── Provider (handled in provider selection, not shown in checklists) ──
"OPENROUTER_API_KEY": {
"description": "OpenRouter API key (for vision, web scraping helpers, and MoA)",
"prompt": "OpenRouter API key",
"url": "https://openrouter.ai/keys",
"password": True,
"tools": ["vision_analyze", "mixture_of_agents"],
"category": "provider",
"advanced": True,
},
# ── Tool API keys ──
"FIRECRAWL_API_KEY": {
"description": "Firecrawl API key for web search and scraping",
"prompt": "Firecrawl API key",
"url": "https://firecrawl.dev/",
"tools": ["web_search", "web_extract"],
"password": True,
"category": "tool",
},
"FIRECRAWL_API_URL": {
"description": "Firecrawl API URL for self-hosted instances (optional)",
"prompt": "Firecrawl API URL (leave empty for cloud)",
"url": None,
"password": False,
"category": "tool",
"advanced": True,
},
"BROWSERBASE_API_KEY": {
"description": "Browserbase API key for browser automation",
"prompt": "Browserbase API key",
"prompt": "Browserbase API key",
"url": "https://browserbase.com/",
"tools": ["browser_navigate", "browser_click"],
"tools": ["browser_navigate", "browser_click", "etc."],
"password": True,
"category": "tool",
},
"BROWSERBASE_PROJECT_ID": {
"description": "Browserbase project ID",
"prompt": "Browserbase project ID",
"url": "https://browserbase.com/",
"tools": ["browser_navigate", "browser_click"],
"tools": ["browser_navigate", "browser_click", "etc."],
"password": False,
"category": "tool",
},
"FAL_KEY": {
"description": "FAL API key for image generation",
@@ -210,7 +179,6 @@ OPTIONAL_ENV_VARS = {
"url": "https://fal.ai/",
"tools": ["image_generate"],
"password": True,
"category": "tool",
},
"TINKER_API_KEY": {
"description": "Tinker API key for RL training",
@@ -218,7 +186,6 @@ OPTIONAL_ENV_VARS = {
"url": "https://tinker-console.thinkingmachines.ai/keys",
"tools": ["rl_start_training", "rl_check_status", "rl_stop_training"],
"password": True,
"category": "tool",
},
"WANDB_API_KEY": {
"description": "Weights & Biases API key for experiment tracking",
@@ -226,145 +193,98 @@ OPTIONAL_ENV_VARS = {
"url": "https://wandb.ai/authorize",
"tools": ["rl_get_results", "rl_check_status"],
"password": True,
"category": "tool",
},
"VOICE_TOOLS_OPENAI_KEY": {
"OPENAI_BASE_URL": {
"description": "Custom OpenAI-compatible API endpoint (for VLLM/SGLang/etc.)",
"prompt": "OpenAI-compatible base URL (only if running your own endpoint)",
"url": None,
"password": False,
"advanced": True, # Hide from standard migrate flow
},
"HERMES_OPENAI_API_KEY": {
"description": "OpenAI API key for voice transcription (Whisper) and OpenAI TTS",
"prompt": "OpenAI API Key (for Whisper STT + TTS)",
"url": "https://platform.openai.com/api-keys",
"tools": ["voice_transcription", "openai_tts"],
"password": True,
"category": "tool",
},
"ELEVENLABS_API_KEY": {
"description": "ElevenLabs API key for premium text-to-speech voices",
"prompt": "ElevenLabs API key",
"url": "https://elevenlabs.io/",
"SLACK_BOT_TOKEN": {
"description": "Slack bot integration",
"prompt": "Slack Bot Token (xoxb-...)",
"url": "https://api.slack.com/apps",
"tools": ["slack"],
"password": True,
"category": "tool",
},
"GITHUB_TOKEN": {
"description": "GitHub token for Skills Hub (higher API rate limits, skill publish)",
"prompt": "GitHub Token",
"url": "https://github.com/settings/tokens",
"SLACK_APP_TOKEN": {
"description": "Slack Socket Mode connection",
"prompt": "Slack App Token (xapp-...)",
"url": "https://api.slack.com/apps",
"tools": ["slack"],
"password": True,
"category": "tool",
},
# ── Honcho ──
"HONCHO_API_KEY": {
"description": "Honcho API key for AI-native persistent memory",
"prompt": "Honcho API key",
"url": "https://app.honcho.dev",
"tools": ["query_user_context"],
"password": True,
"category": "tool",
},
# ── Messaging platforms ──
# Messaging platform tokens
"TELEGRAM_BOT_TOKEN": {
"description": "Telegram bot token from @BotFather",
"prompt": "Telegram bot token",
"url": "https://t.me/BotFather",
"password": True,
"category": "messaging",
},
"TELEGRAM_ALLOWED_USERS": {
"description": "Comma-separated Telegram user IDs allowed to use the bot (get ID from @userinfobot)",
"prompt": "Allowed Telegram user IDs (comma-separated)",
"url": "https://t.me/userinfobot",
"password": False,
"category": "messaging",
},
"DISCORD_BOT_TOKEN": {
"description": "Discord bot token from Developer Portal",
"prompt": "Discord bot token",
"url": "https://discord.com/developers/applications",
"password": True,
"category": "messaging",
},
"DISCORD_ALLOWED_USERS": {
"description": "Comma-separated Discord user IDs allowed to use the bot",
"prompt": "Allowed Discord user IDs (comma-separated)",
"url": None,
"password": False,
"category": "messaging",
},
"SLACK_BOT_TOKEN": {
"description": "Slack bot integration",
"prompt": "Slack Bot Token (xoxb-...)",
"url": "https://api.slack.com/apps",
# Text-to-speech (premium providers)
"ELEVENLABS_API_KEY": {
"description": "ElevenLabs API key for premium text-to-speech voices",
"prompt": "ElevenLabs API key",
"url": "https://elevenlabs.io/",
"password": True,
"category": "messaging",
},
"SLACK_APP_TOKEN": {
"description": "Slack Socket Mode connection",
"prompt": "Slack App Token (xapp-...)",
"url": "https://api.slack.com/apps",
"password": True,
"category": "messaging",
},
"GATEWAY_ALLOW_ALL_USERS": {
"description": "Allow all users to interact with messaging bots (true/false). Default: false.",
"prompt": "Allow all users (true/false)",
"url": None,
"password": False,
"category": "messaging",
"advanced": True,
},
# ── Agent settings ──
# Terminal configuration
"MESSAGING_CWD": {
"description": "Working directory for terminal commands via messaging",
"description": "Working directory for terminal commands via messaging (Telegram/Discord/etc). CLI always uses current directory.",
"prompt": "Messaging working directory (default: home)",
"url": None,
"password": False,
"category": "setting",
},
"SUDO_PASSWORD": {
"description": "Sudo password for terminal commands requiring root access",
"prompt": "Sudo password",
"url": None,
"password": True,
"category": "setting",
},
# Agent configuration
"HERMES_MAX_ITERATIONS": {
"description": "Maximum tool-calling iterations per conversation (default: 60)",
"prompt": "Max iterations",
"url": None,
"password": False,
"category": "setting",
},
# HERMES_TOOL_PROGRESS and HERMES_TOOL_PROGRESS_MODE are deprecated —
# now configured via display.tool_progress in config.yaml (off|new|all|verbose).
# Gateway falls back to these env vars for backward compatibility.
"HERMES_TOOL_PROGRESS": {
"description": "(deprecated) Use display.tool_progress in config.yaml instead",
"prompt": "Tool progress (deprecated — use config.yaml)",
"description": "Send tool progress messages in messaging channels (true/false)",
"prompt": "Enable tool progress messages",
"url": None,
"password": False,
"category": "setting",
},
"HERMES_TOOL_PROGRESS_MODE": {
"description": "(deprecated) Use display.tool_progress in config.yaml instead",
"prompt": "Progress mode (deprecated — use config.yaml)",
"description": "Progress mode: 'all' (every tool) or 'new' (only when tool changes)",
"prompt": "Progress mode (all/new)",
"url": None,
"password": False,
"category": "setting",
},
"HERMES_PREFILL_MESSAGES_FILE": {
"description": "Path to JSON file with ephemeral prefill messages for few-shot priming",
"prompt": "Prefill messages file path",
"url": None,
"password": False,
"category": "setting",
},
"HERMES_EPHEMERAL_SYSTEM_PROMPT": {
"description": "Ephemeral system prompt injected at API-call time (never persisted to sessions)",
"prompt": "Ephemeral system prompt",
"url": None,
"password": False,
"category": "setting",
},
}
@@ -391,46 +311,35 @@ def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
return missing
def _set_nested(config: dict, dotted_key: str, value):
"""Set a value at an arbitrarily nested dotted key path.
Creates intermediate dicts as needed, e.g. ``_set_nested(c, "a.b.c", 1)``
ensures ``c["a"]["b"]["c"] == 1``.
"""
parts = dotted_key.split(".")
current = config
for part in parts[:-1]:
if part not in current or not isinstance(current.get(part), dict):
current[part] = {}
current = current[part]
current[parts[-1]] = value
def get_missing_config_fields() -> List[Dict[str, Any]]:
"""
Check which config fields are missing or outdated (recursive).
Check which config fields are missing or outdated.
Walks the DEFAULT_CONFIG tree at arbitrary depth and reports any keys
present in defaults but absent from the user's loaded config.
Returns list of missing/outdated fields.
"""
config = load_config()
missing = []
def _check(defaults: dict, current: dict, prefix: str = ""):
for key, default_value in defaults.items():
if key.startswith('_'):
continue
full_key = key if not prefix else f"{prefix}.{key}"
if key not in current:
missing.append({
"key": full_key,
"default": default_value,
"description": f"New config option: {full_key}",
})
elif isinstance(default_value, dict) and isinstance(current.get(key), dict):
_check(default_value, current[key], full_key)
_check(DEFAULT_CONFIG, config)
# Check for new top-level keys in DEFAULT_CONFIG
for key, default_value in DEFAULT_CONFIG.items():
if key.startswith('_'):
continue # Skip internal keys
if key not in config:
missing.append({
"key": key,
"default": default_value,
"description": f"New config section: {key}",
})
elif isinstance(default_value, dict):
# Check nested keys
for subkey, subvalue in default_value.items():
if subkey not in config.get(key, {}):
missing.append({
"key": f"{key}.{subkey}",
"default": subvalue,
"description": f"New config option: {key}.{subkey}",
})
return missing
@@ -462,29 +371,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
# Check config version
current_ver, latest_ver = check_config_version()
# ── Version 3 → 4: migrate tool progress from .env to config.yaml ──
if current_ver < 4:
config = load_config()
display = config.get("display", {})
if not isinstance(display, dict):
display = {}
if "tool_progress" not in display:
old_enabled = get_env_value("HERMES_TOOL_PROGRESS")
old_mode = get_env_value("HERMES_TOOL_PROGRESS_MODE")
if old_enabled and old_enabled.lower() in ("false", "0", "no"):
display["tool_progress"] = "off"
results["config_added"].append("display.tool_progress=off (from HERMES_TOOL_PROGRESS=false)")
elif old_mode and old_mode.lower() in ("new", "all"):
display["tool_progress"] = old_mode.lower()
results["config_added"].append(f"display.tool_progress={old_mode.lower()} (from HERMES_TOOL_PROGRESS_MODE)")
else:
display["tool_progress"] = "all"
results["config_added"].append("display.tool_progress=all (default)")
config["display"] = display
save_config(config)
if not quiet:
print(f" ✓ Migrated tool progress to config.yaml: {display['tool_progress']}")
if current_ver < latest_ver and not quiet:
print(f"Config version: {current_ver}{latest_ver}")
@@ -564,7 +450,16 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
key = field["key"]
default = field["default"]
_set_nested(config, key, default)
# Add with default value
if "." in key:
# Nested key
parent, child = key.split(".", 1)
if parent not in config:
config[parent] = {}
config[parent][child] = default
else:
config[key] = default
results["config_added"].append(key)
if not quiet:
print(f" ✓ Added {key} = {default}")
@@ -581,31 +476,12 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
return results
def _deep_merge(base: dict, override: dict) -> dict:
"""Recursively merge *override* into *base*, preserving nested defaults.
Keys in *override* take precedence. If both values are dicts the merge
recurses, so a user who overrides only ``tts.elevenlabs.voice_id`` will
keep the default ``tts.elevenlabs.model_id`` intact.
"""
result = base.copy()
for key, value in override.items():
if (
key in result
and isinstance(result[key], dict)
and isinstance(value, dict)
):
result[key] = _deep_merge(result[key], value)
else:
result[key] = value
return result
def load_config() -> Dict[str, Any]:
"""Load configuration from ~/.hermes/config.yaml."""
import copy
config_path = get_config_path()
# Deep copy to avoid mutating DEFAULT_CONFIG
config = copy.deepcopy(DEFAULT_CONFIG)
if config_path.exists():
@@ -613,7 +489,12 @@ def load_config() -> Dict[str, Any]:
with open(config_path) as f:
user_config = yaml.safe_load(f) or {}
config = _deep_merge(config, user_config)
# Deep merge user values over defaults
for key, value in user_config.items():
if isinstance(value, dict) and key in config and isinstance(config[key], dict):
config[key].update(value)
else:
config[key] = value
except Exception as e:
print(f"Warning: Failed to load config: {e}")
@@ -635,10 +516,7 @@ def load_env() -> Dict[str, str]:
env_vars = {}
if env_path.exists():
# On Windows, open() defaults to the system locale (cp1252) which can
# fail on UTF-8 .env files. Use explicit UTF-8 only on Windows.
open_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
with open(env_path, **open_kw) as f:
with open(env_path) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
@@ -653,14 +531,10 @@ def save_env_value(key: str, value: str):
ensure_hermes_home()
env_path = get_env_path()
# On Windows, open() defaults to the system locale (cp1252) which can
# cause OSError errno 22 on UTF-8 .env files.
read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
write_kw = {"encoding": "utf-8"} if _IS_WINDOWS else {}
# Load existing
lines = []
if env_path.exists():
with open(env_path, **read_kw) as f:
with open(env_path) as f:
lines = f.readlines()
# Find and update or append
@@ -677,7 +551,7 @@ def save_env_value(key: str, value: str):
lines[-1] += "\n"
lines.append(f"{key}={value}\n")
with open(env_path, 'w', **write_kw) as f:
with open(env_path, 'w') as f:
f.writelines(lines)
@@ -712,7 +586,7 @@ def show_config():
print()
print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
print(color(" Hermes Configuration │", Colors.CYAN))
print(color("🦋 Hermes Configuration │", Colors.CYAN))
print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
# Paths
@@ -729,7 +603,7 @@ def show_config():
keys = [
("OPENROUTER_API_KEY", "OpenRouter"),
("ANTHROPIC_API_KEY", "Anthropic"),
("VOICE_TOOLS_OPENAI_KEY", "OpenAI (STT/TTS)"),
("HERMES_OPENAI_API_KEY", "OpenAI (STT/TTS)"),
("FIRECRAWL_API_KEY", "Firecrawl"),
("BROWSERBASE_API_KEY", "Browserbase"),
("FAL_KEY", "FAL"),
@@ -762,10 +636,6 @@ def show_config():
print(f" Modal image: {terminal.get('modal_image', 'python:3.11')}")
modal_token = get_env_value('MODAL_TOKEN_ID')
print(f" Modal token: {'configured' if modal_token else '(not set)'}")
elif terminal.get('backend') == 'daytona':
print(f" Daytona image: {terminal.get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')}")
daytona_key = get_env_value('DAYTONA_API_KEY')
print(f" API key: {'configured' if daytona_key else '(not set)'}")
elif terminal.get('backend') == 'ssh':
ssh_host = get_env_value('TERMINAL_SSH_HOST')
ssh_user = get_env_value('TERMINAL_SSH_USER')
@@ -833,16 +703,14 @@ def set_config_value(key: str, value: str):
"""Set a configuration value."""
# Check if it's an API key (goes to .env)
api_keys = [
'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID',
'OPENROUTER_API_KEY', 'ANTHROPIC_API_KEY', 'HERMES_OPENAI_API_KEY',
'FIRECRAWL_API_KEY', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID',
'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN',
'GITHUB_TOKEN', 'HONCHO_API_KEY', 'NOUS_API_KEY', 'WANDB_API_KEY',
'TINKER_API_KEY',
]
if key.upper() in api_keys or key.upper().endswith('_API_KEY') or key.upper().endswith('_TOKEN') or key.upper().startswith('TERMINAL_SSH'):
if key.upper() in api_keys or key.upper().startswith('TERMINAL_SSH'):
save_env_value(key.upper(), value)
print(f"✓ Set {key} in {get_env_path()}")
return
@@ -885,20 +753,6 @@ def set_config_value(key: str, value: str):
with open(config_path, 'w') as f:
yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
# Keep .env in sync for keys that terminal_tool reads directly from env vars.
# config.yaml is authoritative, but terminal_tool only reads TERMINAL_ENV etc.
_config_to_env_sync = {
"terminal.backend": "TERMINAL_ENV",
"terminal.docker_image": "TERMINAL_DOCKER_IMAGE",
"terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
"terminal.modal_image": "TERMINAL_MODAL_IMAGE",
"terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
"terminal.cwd": "TERMINAL_CWD",
"terminal.timeout": "TERMINAL_TIMEOUT",
}
if key in _config_to_env_sync:
save_env_value(_config_to_env_sync[key], str(value))
print(f"✓ Set {key} = {value} in {config_path}")
+46 -49
View File
@@ -1,20 +1,32 @@
"""
Cron subcommand for hermes CLI.
Handles: hermes cron [list|status|tick]
Cronjobs are executed automatically by the gateway daemon (hermes gateway).
Install the gateway as a service for background execution:
hermes gateway install
Handles: hermes cron [list|daemon|tick]
"""
import json
import sys
import time
from pathlib import Path
from datetime import datetime
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
sys.path.insert(0, str(PROJECT_ROOT))
from hermes_cli.colors import Colors, color
# ANSI colors
class Colors:
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
CYAN = "\033[36m"
def color(text: str, *codes) -> str:
if not sys.stdout.isatty():
return text
return "".join(codes) + text + Colors.RESET
def cron_list(show_all: bool = False):
@@ -25,7 +37,7 @@ def cron_list(show_all: bool = False):
if not jobs:
print(color("No scheduled jobs.", Colors.DIM))
print(color("Create one with the /cron add command in chat, or via Telegram.", Colors.DIM))
print(color("Create one with: hermes cron add <schedule> <prompt>", Colors.DIM))
return
print()
@@ -41,6 +53,7 @@ def cron_list(show_all: bool = False):
enabled = job.get("enabled", True)
next_run = job.get("next_run_at", "?")
# Repeat info
repeat_info = job.get("repeat", {})
repeat_times = repeat_info.get("times")
repeat_completed = repeat_info.get("completed", 0)
@@ -50,11 +63,13 @@ def cron_list(show_all: bool = False):
else:
repeat_str = ""
# Delivery targets
deliver = job.get("deliver", ["local"])
if isinstance(deliver, str):
deliver = [deliver]
deliver_str = ", ".join(deliver)
# Status indicator
if not enabled:
status = color("[disabled]", Colors.RED)
else:
@@ -67,51 +82,32 @@ def cron_list(show_all: bool = False):
print(f" Next run: {next_run}")
print(f" Deliver: {deliver_str}")
print()
def cron_daemon(interval: int = 60):
"""Run the cron daemon."""
from cron.scheduler import start_daemon
# Warn if gateway isn't running
from hermes_cli.gateway import find_gateway_pids
if not find_gateway_pids():
print(color(" ⚠ Gateway is not running — jobs won't fire automatically.", Colors.YELLOW))
print(color(" Start it with: hermes gateway install", Colors.DIM))
print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
print(color("│ 🦋 Hermes Cron Daemon │", Colors.CYAN))
print(color("├─────────────────────────────────────────────────────────┤", Colors.CYAN))
print(color(" Press Ctrl+C to stop │", Colors.CYAN))
print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
print()
try:
start_daemon(interval=interval)
except KeyboardInterrupt:
print()
print(color("Cron daemon stopped.", Colors.YELLOW))
def cron_tick():
"""Run due jobs once and exit."""
"""Run due jobs once (for system cron integration)."""
from cron.scheduler import tick
tick(verbose=True)
def cron_status():
"""Show cron execution status."""
from cron.jobs import list_jobs
from hermes_cli.gateway import find_gateway_pids
print()
pids = find_gateway_pids()
if pids:
print(color("✓ Gateway is running — cron jobs will fire automatically", Colors.GREEN))
print(f" PID: {', '.join(map(str, pids))}")
else:
print(color("✗ Gateway is not running — cron jobs will NOT fire", Colors.RED))
print()
print(" To enable automatic execution:")
print(" hermes gateway install # Install as system service (recommended)")
print(" hermes gateway # Or run in foreground")
print()
jobs = list_jobs(include_disabled=False)
if jobs:
next_runs = [j.get("next_run_at") for j in jobs if j.get("next_run_at")]
print(f" {len(jobs)} active job(s)")
if next_runs:
print(f" Next run: {min(next_runs)}")
else:
print(" No active jobs")
print()
print(f"[{datetime.now().isoformat()}] Running cron tick...")
tick()
def cron_command(args):
@@ -122,13 +118,14 @@ def cron_command(args):
show_all = getattr(args, 'all', False)
cron_list(show_all)
elif subcmd == "daemon":
interval = getattr(args, 'interval', 60)
cron_daemon(interval)
elif subcmd == "tick":
cron_tick()
elif subcmd == "status":
cron_status()
else:
print(f"Unknown cron command: {subcmd}")
print("Usage: hermes cron [list|status|tick]")
print("Usage: hermes cron [list|daemon|tick]")
sys.exit(1)
+48 -245
View File
@@ -19,19 +19,24 @@ HERMES_HOME = get_hermes_home()
from dotenv import load_dotenv
_env_path = get_env_path()
if _env_path.exists():
try:
load_dotenv(_env_path, encoding="utf-8")
except UnicodeDecodeError:
load_dotenv(_env_path, encoding="latin-1")
# Also try project .env as dev fallback
load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
load_dotenv(_env_path)
# Also try project .env as fallback
load_dotenv(PROJECT_ROOT / ".env", override=False)
# Point mini-swe-agent at ~/.hermes/ so it shares our config
os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(HERMES_HOME))
os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
# ANSI colors
class Colors:
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
CYAN = "\033[36m"
from hermes_cli.colors import Colors, color
from hermes_constants import OPENROUTER_MODELS_URL
def color(text: str, *codes) -> str:
if not sys.stdout.isatty():
return text
return "".join(codes) + text + Colors.RESET
def check_ok(text: str, detail: str = ""):
print(f" {color('', Colors.GREEN)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else ""))
@@ -51,8 +56,6 @@ def run_doctor(args):
should_fix = getattr(args, 'fix', False)
issues = []
manual_issues = [] # issues that can't be auto-fixed
fixed_count = 0
print()
print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
@@ -144,15 +147,8 @@ def run_doctor(args):
check_ok(".env file exists (in project directory)")
else:
check_fail("~/.hermes/.env file missing")
if should_fix:
env_path.parent.mkdir(parents=True, exist_ok=True)
env_path.touch()
check_ok("Created empty ~/.hermes/.env")
check_info("Run 'hermes setup' to configure API keys")
fixed_count += 1
else:
check_info("Run 'hermes setup' to create one")
issues.append("Run 'hermes setup' to create .env")
check_info("Run 'hermes setup' to create one")
issues.append("Run 'hermes setup' to create .env")
# Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback)
config_path = HERMES_HOME / 'config.yaml'
@@ -163,78 +159,19 @@ def run_doctor(args):
if fallback_config.exists():
check_ok("cli-config.yaml exists (in project directory)")
else:
example_config = PROJECT_ROOT / 'cli-config.yaml.example'
if should_fix and example_config.exists():
config_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(example_config), str(config_path))
check_ok("Created ~/.hermes/config.yaml from cli-config.yaml.example")
fixed_count += 1
elif should_fix:
check_warn("config.yaml not found and no example to copy from")
manual_issues.append("Create ~/.hermes/config.yaml manually")
else:
check_warn("config.yaml not found", "(using defaults)")
check_warn("config.yaml not found", "(using defaults)")
# =========================================================================
# Check: Auth providers
# =========================================================================
print()
print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
try:
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
nous_status = get_nous_auth_status()
if nous_status.get("logged_in"):
check_ok("Nous Portal auth", "(logged in)")
else:
check_warn("Nous Portal auth", "(not logged in)")
codex_status = get_codex_auth_status()
if codex_status.get("logged_in"):
check_ok("OpenAI Codex auth", "(logged in)")
else:
check_warn("OpenAI Codex auth", "(not logged in)")
if codex_status.get("error"):
check_info(codex_status["error"])
except Exception as e:
check_warn("Auth provider status", f"(could not check: {e})")
if shutil.which("codex"):
check_ok("codex CLI")
else:
check_warn("codex CLI not found", "(required for openai-codex login)")
# =========================================================================
# Check: Directory structure
# =========================================================================
print()
print(color("◆ Directory Structure", Colors.CYAN, Colors.BOLD))
hermes_home = HERMES_HOME
hermes_home = Path.home() / ".hermes"
if hermes_home.exists():
check_ok("~/.hermes directory exists")
else:
if should_fix:
hermes_home.mkdir(parents=True, exist_ok=True)
check_ok("Created ~/.hermes directory")
fixed_count += 1
else:
check_warn("~/.hermes not found", "(will be created on first use)")
# Check expected subdirectories
expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"]
for subdir_name in expected_subdirs:
subdir_path = hermes_home / subdir_name
if subdir_path.exists():
check_ok(f"~/.hermes/{subdir_name}/ exists")
else:
if should_fix:
subdir_path.mkdir(parents=True, exist_ok=True)
check_ok(f"Created ~/.hermes/{subdir_name}/")
fixed_count += 1
else:
check_warn(f"~/.hermes/{subdir_name}/ not found", "(will be created on first use)")
check_warn("~/.hermes not found", "(will be created on first use)")
# Check for SOUL.md persona file
soul_path = hermes_home / "SOUL.md"
@@ -250,52 +187,14 @@ def run_doctor(args):
check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)")
if should_fix:
soul_path.parent.mkdir(parents=True, exist_ok=True)
soul_path.write_text(
"# Hermes Agent Persona\n\n"
"<!-- Edit this file to customize how Hermes communicates. -->\n\n"
"You are Hermes, a helpful AI assistant.\n",
encoding="utf-8",
)
check_ok("Created ~/.hermes/SOUL.md with basic template")
fixed_count += 1
soul_path.write_text("# Hermes Agent Persona\n\n<!-- Edit this file to customize how Hermes communicates. -->\n", encoding="utf-8")
check_ok("Created ~/.hermes/SOUL.md")
# Check memory directory
memories_dir = hermes_home / "memories"
if memories_dir.exists():
check_ok("~/.hermes/memories/ directory exists")
memory_file = memories_dir / "MEMORY.md"
user_file = memories_dir / "USER.md"
if memory_file.exists():
size = len(memory_file.read_text(encoding="utf-8").strip())
check_ok(f"MEMORY.md exists ({size} chars)")
else:
check_info("MEMORY.md not created yet (will be created when the agent first writes a memory)")
if user_file.exists():
size = len(user_file.read_text(encoding="utf-8").strip())
check_ok(f"USER.md exists ({size} chars)")
else:
check_info("USER.md not created yet (will be created when the agent first writes a memory)")
logs_dir = PROJECT_ROOT / "logs"
if logs_dir.exists():
check_ok("logs/ directory exists")
else:
check_warn("~/.hermes/memories/ not found", "(will be created on first use)")
if should_fix:
memories_dir.mkdir(parents=True, exist_ok=True)
check_ok("Created ~/.hermes/memories/")
fixed_count += 1
# Check SQLite session store
state_db_path = hermes_home / "state.db"
if state_db_path.exists():
try:
import sqlite3
conn = sqlite3.connect(str(state_db_path))
cursor = conn.execute("SELECT COUNT(*) FROM sessions")
count = cursor.fetchone()[0]
conn.close()
check_ok(f"~/.hermes/state.db exists ({count} sessions)")
except Exception as e:
check_warn(f"~/.hermes/state.db exists but has issues: {e}")
else:
check_info("~/.hermes/state.db not created yet (will be created on first session)")
check_warn("logs/ not found", "(will be created on first use)")
# =========================================================================
# Check: External tools
@@ -355,21 +254,6 @@ def run_doctor(args):
check_fail("TERMINAL_SSH_HOST not set", "(required for TERMINAL_ENV=ssh)")
issues.append("Set TERMINAL_SSH_HOST in .env")
# Daytona (if using daytona backend)
if terminal_env == "daytona":
daytona_key = os.getenv("DAYTONA_API_KEY")
if daytona_key:
check_ok("Daytona API key", "(configured)")
else:
check_fail("DAYTONA_API_KEY not set", "(required for TERMINAL_ENV=daytona)")
issues.append("Set DAYTONA_API_KEY environment variable")
try:
from daytona import Daytona
check_ok("daytona SDK", "(installed)")
except ImportError:
check_fail("daytona SDK not installed", "(pip install daytona)")
issues.append("Install daytona SDK: pip install daytona")
# Node.js + agent-browser (for browser automation tools)
if shutil.which("node"):
check_ok("Node.js")
@@ -382,41 +266,6 @@ def run_doctor(args):
else:
check_warn("Node.js not found", "(optional, needed for browser tools)")
# npm audit for all Node.js packages
if shutil.which("npm"):
npm_dirs = [
(PROJECT_ROOT, "Browser tools (agent-browser)"),
(PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"),
]
for npm_dir, label in npm_dirs:
if not (npm_dir / "node_modules").exists():
continue
try:
audit_result = subprocess.run(
["npm", "audit", "--json"],
cwd=str(npm_dir),
capture_output=True, text=True, timeout=30,
)
import json as _json
audit_data = _json.loads(audit_result.stdout) if audit_result.stdout.strip() else {}
vuln_count = audit_data.get("metadata", {}).get("vulnerabilities", {})
critical = vuln_count.get("critical", 0)
high = vuln_count.get("high", 0)
moderate = vuln_count.get("moderate", 0)
total = critical + high + moderate
if total == 0:
check_ok(f"{label} deps", "(no known vulnerabilities)")
elif critical > 0 or high > 0:
check_warn(
f"{label} deps",
f"({critical} critical, {high} high, {moderate} moderate — run: cd {npm_dir} && npm audit fix)"
)
issues.append(f"{label} has {total} npm vulnerability(ies)")
else:
check_ok(f"{label} deps", f"({moderate} moderate vulnerability(ies))")
except Exception:
pass
# =========================================================================
# Check: API connectivity
# =========================================================================
@@ -425,30 +274,28 @@ def run_doctor(args):
openrouter_key = os.getenv("OPENROUTER_API_KEY")
if openrouter_key:
print(" Checking OpenRouter API...", end="", flush=True)
try:
import httpx
response = httpx.get(
OPENROUTER_MODELS_URL,
"https://openrouter.ai/api/v1/models",
headers={"Authorization": f"Bearer {openrouter_key}"},
timeout=10
)
if response.status_code == 200:
print(f"\r {color('', Colors.GREEN)} OpenRouter API ")
check_ok("OpenRouter API")
elif response.status_code == 401:
print(f"\r {color('', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ")
check_fail("OpenRouter API", "(invalid API key)")
issues.append("Check OPENROUTER_API_KEY in .env")
else:
print(f"\r {color('', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ")
check_fail("OpenRouter API", f"(HTTP {response.status_code})")
except Exception as e:
print(f"\r {color('', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)} ")
check_fail("OpenRouter API", f"({e})")
issues.append("Check network connectivity")
else:
check_warn("OpenRouter API", "(not configured)")
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key:
print(" Checking Anthropic API...", end="", flush=True)
try:
import httpx
response = httpx.get(
@@ -460,14 +307,14 @@ def run_doctor(args):
timeout=10
)
if response.status_code == 200:
print(f"\r {color('', Colors.GREEN)} Anthropic API ")
check_ok("Anthropic API")
elif response.status_code == 401:
print(f"\r {color('', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)} ")
check_fail("Anthropic API", "(invalid API key)")
else:
msg = "(couldn't verify)"
print(f"\r {color('', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)} ")
# Note: Anthropic may not have /models endpoint
check_warn("Anthropic API", "(couldn't verify)")
except Exception as e:
print(f"\r {color('', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)} ")
check_warn("Anthropic API", f"({e})")
# =========================================================================
# Check: Submodules
@@ -520,78 +367,34 @@ def run_doctor(args):
check_ok(info.get("name", tid))
for item in unavailable:
env_vars = item.get("missing_vars") or item.get("env_vars") or []
if env_vars:
vars_str = ", ".join(env_vars)
if item["missing_vars"]:
vars_str = ", ".join(item["missing_vars"])
check_warn(item["name"], f"(missing {vars_str})")
else:
check_warn(item["name"], "(system dependency not met)")
# Count disabled tools with API key requirements
api_disabled = [u for u in unavailable if (u.get("missing_vars") or u.get("env_vars"))]
api_disabled = [u for u in unavailable if u["missing_vars"]]
if api_disabled:
issues.append("Run 'hermes setup' to configure missing API keys for full tool access")
except Exception as e:
check_warn("Could not check tool availability", f"({e})")
# =========================================================================
# Check: Skills Hub
# =========================================================================
print()
print(color("◆ Skills Hub", Colors.CYAN, Colors.BOLD))
hub_dir = HERMES_HOME / "skills" / ".hub"
if hub_dir.exists():
check_ok("Skills Hub directory exists")
lock_file = hub_dir / "lock.json"
if lock_file.exists():
try:
import json
lock_data = json.loads(lock_file.read_text())
count = len(lock_data.get("installed", {}))
check_ok(f"Lock file OK ({count} hub-installed skill(s))")
except Exception:
check_warn("Lock file", "(corrupted or unreadable)")
quarantine = hub_dir / "quarantine"
q_count = sum(1 for d in quarantine.iterdir() if d.is_dir()) if quarantine.exists() else 0
if q_count > 0:
check_warn(f"{q_count} skill(s) in quarantine", "(pending review)")
else:
check_warn("Skills Hub directory not initialized", "(run: hermes skills list)")
from hermes_cli.config import get_env_value
github_token = get_env_value("GITHUB_TOKEN") or get_env_value("GH_TOKEN")
if github_token:
check_ok("GitHub token configured (authenticated API access)")
else:
check_warn("No GITHUB_TOKEN", "(60 req/hr rate limit — set in ~/.hermes/.env for better rates)")
# =========================================================================
# Summary
# =========================================================================
print()
remaining_issues = issues + manual_issues
if should_fix and fixed_count > 0:
print(color("" * 60, Colors.GREEN))
print(color(f" Fixed {fixed_count} issue(s).", Colors.GREEN, Colors.BOLD), end="")
if remaining_issues:
print(color(f" {len(remaining_issues)} issue(s) require manual intervention.", Colors.YELLOW, Colors.BOLD))
else:
print()
print()
if remaining_issues:
for i, issue in enumerate(remaining_issues, 1):
print(f" {i}. {issue}")
print()
elif remaining_issues:
if issues:
print(color("" * 60, Colors.YELLOW))
print(color(f" Found {len(remaining_issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
print(color(f" Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
print()
for i, issue in enumerate(remaining_issues, 1):
for i, issue in enumerate(issues, 1):
print(f" {i}. {issue}")
print()
if not should_fix:
print(color(" Tip: run 'hermes doctor --fix' to auto-fix what's possible.", Colors.DIM))
if should_fix:
print(color(" Attempting auto-fix is not yet implemented.", Colors.DIM))
print(color(" Please resolve issues manually.", Colors.DIM))
else:
print(color("" * 60, Colors.GREEN))
print(color(" All checks passed! 🎉", Colors.GREEN, Colors.BOLD))
+32 -423
View File
@@ -1,7 +1,7 @@
"""
Gateway subcommand for hermes CLI.
Handles: hermes gateway [run|start|stop|restart|status|install|uninstall|setup]
Handles: hermes gateway [run|start|stop|restart|status|install|uninstall]
"""
import asyncio
@@ -13,13 +13,6 @@ from pathlib import Path
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
from hermes_cli.config import get_env_value, save_env_value
from hermes_cli.setup import (
print_header, print_info, print_success, print_warning, print_error,
prompt, prompt_choice, prompt_yes_no,
)
from hermes_cli.colors import Colors, color
# =============================================================================
# Process Management (for manual gateway runs)
@@ -28,59 +21,39 @@ from hermes_cli.colors import Colors, color
def find_gateway_pids() -> list:
"""Find PIDs of running gateway processes."""
pids = []
patterns = [
"hermes_cli.main gateway",
"hermes gateway",
"gateway/run.py",
]
try:
if is_windows():
# Windows: use wmic to search command lines
result = subprocess.run(
["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"],
capture_output=True, text=True
)
# Parse WMIC LIST output: blocks of "CommandLine=...\nProcessId=...\n"
current_cmd = ""
for line in result.stdout.split('\n'):
line = line.strip()
if line.startswith("CommandLine="):
current_cmd = line[len("CommandLine="):]
elif line.startswith("ProcessId="):
pid_str = line[len("ProcessId="):]
if any(p in current_cmd for p in patterns):
# Look for gateway processes with multiple patterns
patterns = [
"hermes_cli.main gateway",
"hermes gateway",
"gateway/run.py",
]
result = subprocess.run(
["ps", "aux"],
capture_output=True,
text=True
)
for line in result.stdout.split('\n'):
# Skip grep and current process
if 'grep' in line or str(os.getpid()) in line:
continue
for pattern in patterns:
if pattern in line:
parts = line.split()
if len(parts) > 1:
try:
pid = int(pid_str)
if pid != os.getpid() and pid not in pids:
pid = int(parts[1])
if pid not in pids:
pids.append(pid)
except ValueError:
pass
current_cmd = ""
else:
result = subprocess.run(
["ps", "aux"],
capture_output=True,
text=True
)
for line in result.stdout.split('\n'):
# Skip grep and current process
if 'grep' in line or str(os.getpid()) in line:
continue
for pattern in patterns:
if pattern in line:
parts = line.split()
if len(parts) > 1:
try:
pid = int(parts[1])
if pid not in pids:
pids.append(pid)
except ValueError:
continue
break
continue
break
except Exception:
pass
return pids
@@ -91,7 +64,7 @@ def kill_gateway_processes(force: bool = False) -> int:
for pid in pids:
try:
if force and not is_windows():
if force:
os.kill(pid, signal.SIGKILL)
else:
os.kill(pid, signal.SIGTERM)
@@ -129,10 +102,7 @@ def get_launchd_plist_path() -> Path:
return Path.home() / "Library" / "LaunchAgents" / "ai.hermes.gateway.plist"
def get_python_path() -> str:
if is_windows():
venv_python = PROJECT_ROOT / "venv" / "Scripts" / "python.exe"
else:
venv_python = PROJECT_ROOT / "venv" / "bin" / "python"
venv_python = PROJECT_ROOT / "venv" / "bin" / "python"
if venv_python.exists():
return str(venv_python)
return sys.executable
@@ -384,9 +354,8 @@ def run_gateway(verbose: bool = False):
from gateway.run import start_gateway
print("┌─────────────────────────────────────────────────────────┐")
print(" Hermes Gateway Starting... │")
print("🦋 Hermes Gateway Starting... │")
print("├─────────────────────────────────────────────────────────┤")
print("│ Messaging platforms + cron scheduler │")
print("│ Press Ctrl+C to stop │")
print("└─────────────────────────────────────────────────────────┘")
print()
@@ -398,362 +367,6 @@ def run_gateway(verbose: bool = False):
sys.exit(1)
# =============================================================================
# Gateway Setup (Interactive Messaging Platform Configuration)
# =============================================================================
# Per-platform config: each entry defines the env vars, setup instructions,
# and prompts needed to configure a messaging platform.
_PLATFORMS = [
{
"key": "telegram",
"label": "Telegram",
"emoji": "📱",
"token_var": "TELEGRAM_BOT_TOKEN",
"setup_instructions": [
"1. Open Telegram and message @BotFather",
"2. Send /newbot and follow the prompts to create your bot",
"3. Copy the bot token BotFather gives you",
"4. To find your user ID: message @userinfobot — it replies with your numeric ID",
],
"vars": [
{"name": "TELEGRAM_BOT_TOKEN", "prompt": "Bot token", "password": True,
"help": "Paste the token from @BotFather (step 3 above)."},
{"name": "TELEGRAM_ALLOWED_USERS", "prompt": "Allowed user IDs (comma-separated)", "password": False,
"is_allowlist": True,
"help": "Paste your user ID from step 4 above."},
{"name": "TELEGRAM_HOME_CHANNEL", "prompt": "Home channel ID (for cron/notification delivery, or empty to set later with /set-home)", "password": False,
"help": "For DMs, this is your user ID. You can set it later by typing /set-home in chat."},
],
},
{
"key": "discord",
"label": "Discord",
"emoji": "💬",
"token_var": "DISCORD_BOT_TOKEN",
"setup_instructions": [
"1. Go to https://discord.com/developers/applications → New Application",
"2. Go to Bot → Reset Token → copy the bot token",
"3. Enable: Bot → Privileged Gateway Intents → Message Content Intent",
"4. Invite the bot to your server:",
" OAuth2 → URL Generator → check BOTH scopes:",
" - bot",
" - applications.commands (required for slash commands!)",
" Bot Permissions: Send Messages, Read Message History, Attach Files",
" Copy the URL and open it in your browser to invite.",
"5. Get your user ID: enable Developer Mode in Discord settings,",
" then right-click your name → Copy ID",
],
"vars": [
{"name": "DISCORD_BOT_TOKEN", "prompt": "Bot token", "password": True,
"help": "Paste the token from step 2 above."},
{"name": "DISCORD_ALLOWED_USERS", "prompt": "Allowed user IDs or usernames (comma-separated)", "password": False,
"is_allowlist": True,
"help": "Paste your user ID from step 5 above."},
{"name": "DISCORD_HOME_CHANNEL", "prompt": "Home channel ID (for cron/notification delivery, or empty to set later with /set-home)", "password": False,
"help": "Right-click a channel → Copy Channel ID (requires Developer Mode)."},
],
},
{
"key": "slack",
"label": "Slack",
"emoji": "💼",
"token_var": "SLACK_BOT_TOKEN",
"setup_instructions": [
"1. Go to https://api.slack.com/apps → Create New App → From Scratch",
"2. Enable Socket Mode: App Settings → Socket Mode → Enable",
"3. Get Bot Token: OAuth & Permissions → Install to Workspace → copy xoxb-... token",
"4. Get App Token: Basic Information → App-Level Tokens → Generate",
" Name it anything, add scope: connections:write → copy xapp-... token",
"5. Add bot scopes: OAuth & Permissions → Scopes → chat:write, im:history,",
" im:read, im:write, channels:history, channels:read",
"6. Reinstall the app to your workspace after adding scopes",
"7. Find your user ID: click your profile → three dots → Copy member ID",
],
"vars": [
{"name": "SLACK_BOT_TOKEN", "prompt": "Bot Token (xoxb-...)", "password": True,
"help": "Paste the bot token from step 3 above."},
{"name": "SLACK_APP_TOKEN", "prompt": "App Token (xapp-...)", "password": True,
"help": "Paste the app-level token from step 4 above."},
{"name": "SLACK_ALLOWED_USERS", "prompt": "Allowed user IDs (comma-separated)", "password": False,
"is_allowlist": True,
"help": "Paste your member ID from step 7 above."},
],
},
{
"key": "whatsapp",
"label": "WhatsApp",
"emoji": "📲",
"token_var": "WHATSAPP_ENABLED",
},
]
def _platform_status(platform: dict) -> str:
"""Return a plain-text status string for a platform.
Returns uncolored text so it can safely be embedded in
simple_term_menu items (ANSI codes break width calculation).
"""
token_var = platform["token_var"]
val = get_env_value(token_var)
if token_var == "WHATSAPP_ENABLED":
if val and val.lower() == "true":
session_file = Path.home() / ".hermes" / "whatsapp" / "session" / "creds.json"
if session_file.exists():
return "configured + paired"
return "enabled, not paired"
return "not configured"
if val:
return "configured"
return "not configured"
def _setup_standard_platform(platform: dict):
"""Interactive setup for Telegram, Discord, or Slack."""
emoji = platform["emoji"]
label = platform["label"]
token_var = platform["token_var"]
print()
print(color(f" ─── {emoji} {label} Setup ───", Colors.CYAN))
# Show step-by-step setup instructions if this platform has them
instructions = platform.get("setup_instructions")
if instructions:
print()
for line in instructions:
print_info(f" {line}")
existing_token = get_env_value(token_var)
if existing_token:
print()
print_success(f"{label} is already configured.")
if not prompt_yes_no(f" Reconfigure {label}?", False):
return
allowed_val_set = None # Track if user set an allowlist (for home channel offer)
for var in platform["vars"]:
print()
print_info(f" {var['help']}")
existing = get_env_value(var["name"])
if existing and var["name"] != token_var:
print_info(f" Current: {existing}")
# Allowlist fields get special handling for the deny-by-default security model
if var.get("is_allowlist"):
print_info(f" The gateway DENIES all users by default for security.")
print_info(f" Enter user IDs to create an allowlist, or leave empty")
print_info(f" and you'll be asked about open access next.")
value = prompt(f" {var['prompt']}", password=False)
if value:
cleaned = value.replace(" ", "")
save_env_value(var["name"], cleaned)
print_success(f" Saved — only these users can interact with the bot.")
allowed_val_set = cleaned
else:
# No allowlist — ask about open access vs DM pairing
print()
access_choices = [
"Enable open access (anyone can message the bot)",
"Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
"Skip for now (bot will deny all users until configured)",
]
access_idx = prompt_choice(" How should unauthorized users be handled?", access_choices, 1)
if access_idx == 0:
save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
print_warning(" Open access enabled — anyone can use your bot!")
elif access_idx == 1:
print_success(" DM pairing mode — users will receive a code to request access.")
print_info(" Approve with: hermes pairing approve {platform} {code}")
else:
print_info(" Skipped — configure later with 'hermes gateway setup'")
continue
value = prompt(f" {var['prompt']}", password=var.get("password", False))
if value:
save_env_value(var["name"], value)
print_success(f" Saved {var['name']}")
elif var["name"] == token_var:
print_warning(f" Skipped — {label} won't work without this.")
return
else:
print_info(f" Skipped (can configure later)")
# If an allowlist was set and home channel wasn't, offer to reuse
# the first user ID (common for Telegram DMs).
home_var = f"{label.upper()}_HOME_CHANNEL"
home_val = get_env_value(home_var)
if allowed_val_set and not home_val and label == "Telegram":
first_id = allowed_val_set.split(",")[0].strip()
if first_id and prompt_yes_no(f" Use your user ID ({first_id}) as the home channel?", True):
save_env_value(home_var, first_id)
print_success(f" Home channel set to {first_id}")
print()
print_success(f"{emoji} {label} configured!")
def _setup_whatsapp():
"""Delegate to the existing WhatsApp setup flow."""
from hermes_cli.main import cmd_whatsapp
import argparse
cmd_whatsapp(argparse.Namespace())
def _is_service_installed() -> bool:
"""Check if the gateway is installed as a system service."""
if is_linux():
return get_systemd_unit_path().exists()
elif is_macos():
return get_launchd_plist_path().exists()
return False
def _is_service_running() -> bool:
"""Check if the gateway service is currently running."""
if is_linux() and get_systemd_unit_path().exists():
result = subprocess.run(
["systemctl", "--user", "is-active", SERVICE_NAME],
capture_output=True, text=True
)
return result.stdout.strip() == "active"
elif is_macos() and get_launchd_plist_path().exists():
result = subprocess.run(
["launchctl", "list", "ai.hermes.gateway"],
capture_output=True, text=True
)
return result.returncode == 0
# Check for manual processes
return len(find_gateway_pids()) > 0
def gateway_setup():
"""Interactive setup for messaging platforms + gateway service."""
print()
print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA))
print(color("│ ⚕ Gateway Setup │", Colors.MAGENTA))
print(color("├─────────────────────────────────────────────────────────┤", Colors.MAGENTA))
print(color("│ Configure messaging platforms and the gateway service. │", Colors.MAGENTA))
print(color("│ Press Ctrl+C at any time to exit. │", Colors.MAGENTA))
print(color("└─────────────────────────────────────────────────────────┘", Colors.MAGENTA))
# ── Gateway service status ──
print()
service_installed = _is_service_installed()
service_running = _is_service_running()
if service_installed and service_running:
print_success("Gateway service is installed and running.")
elif service_installed:
print_warning("Gateway service is installed but not running.")
if prompt_yes_no(" Start it now?", True):
try:
if is_linux():
systemd_start()
elif is_macos():
launchd_start()
except subprocess.CalledProcessError as e:
print_error(f" Failed to start: {e}")
else:
print_info("Gateway service is not installed yet.")
print_info("You'll be offered to install it after configuring platforms.")
# ── Platform configuration loop ──
while True:
print()
print_header("Messaging Platforms")
menu_items = []
for plat in _PLATFORMS:
status = _platform_status(plat)
menu_items.append(f"{plat['label']} ({status})")
menu_items.append("Done")
choice = prompt_choice("Select a platform to configure:", menu_items, len(menu_items) - 1)
if choice == len(_PLATFORMS):
break
platform = _PLATFORMS[choice]
if platform["key"] == "whatsapp":
_setup_whatsapp()
else:
_setup_standard_platform(platform)
# ── Post-setup: offer to install/restart gateway ──
any_configured = any(
bool(get_env_value(p["token_var"]))
for p in _PLATFORMS
if p["key"] != "whatsapp"
) or (get_env_value("WHATSAPP_ENABLED") or "").lower() == "true"
if any_configured:
print()
print(color("" * 58, Colors.DIM))
service_installed = _is_service_installed()
service_running = _is_service_running()
if service_running:
if prompt_yes_no(" Restart the gateway to pick up changes?", True):
try:
if is_linux():
systemd_restart()
elif is_macos():
launchd_restart()
else:
kill_gateway_processes()
print_info("Start manually: hermes gateway")
except subprocess.CalledProcessError as e:
print_error(f" Restart failed: {e}")
elif service_installed:
if prompt_yes_no(" Start the gateway service?", True):
try:
if is_linux():
systemd_start()
elif is_macos():
launchd_start()
except subprocess.CalledProcessError as e:
print_error(f" Start failed: {e}")
else:
print()
if is_linux() or is_macos():
platform_name = "systemd" if is_linux() else "launchd"
if prompt_yes_no(f" Install the gateway as a {platform_name} service? (runs in background, starts on boot)", True):
try:
force = False
if is_linux():
systemd_install(force)
else:
launchd_install(force)
print()
if prompt_yes_no(" Start the service now?", True):
try:
if is_linux():
systemd_start()
else:
launchd_start()
except subprocess.CalledProcessError as e:
print_error(f" Start failed: {e}")
except subprocess.CalledProcessError as e:
print_error(f" Install failed: {e}")
print_info(" You can try manually: hermes gateway install")
else:
print_info(" You can install later: hermes gateway install")
print_info(" Or run in foreground: hermes gateway")
else:
print_info(" Service install not supported on this platform.")
print_info(" Run in foreground: hermes gateway")
else:
print()
print_info("No platforms configured. Run 'hermes gateway setup' when ready.")
print()
# =============================================================================
# Main Command Handler
# =============================================================================
@@ -767,11 +380,7 @@ def gateway_command(args):
verbose = getattr(args, 'verbose', False)
run_gateway(verbose)
return
if subcmd == "setup":
gateway_setup()
return
# Service management commands
if subcmd == "install":
force = getattr(args, 'force', False)
+29 -1231
View File
File diff suppressed because it is too large Load Diff
-36
View File
@@ -1,36 +0,0 @@
"""
Canonical list of OpenRouter models offered in CLI and setup wizards.
Add, remove, or reorder entries here both `hermes setup` and
`hermes` provider-selection will pick up the change automatically.
"""
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
("anthropic/claude-opus-4.6", "recommended"),
("anthropic/claude-sonnet-4.5", ""),
("openai/gpt-5.4-pro", ""),
("openai/gpt-5.4", ""),
("openai/gpt-5.3-codex", ""),
("google/gemini-3-pro-preview", ""),
("google/gemini-3-flash-preview", ""),
("qwen/qwen3.5-plus-02-15", ""),
("qwen/qwen3.5-35b-a3b", ""),
("stepfun/step-3.5-flash", ""),
("z-ai/glm-5", ""),
("moonshotai/kimi-k2.5", ""),
("minimax/minimax-m2.5", ""),
]
def model_ids() -> list[str]:
"""Return just the model-id strings (convenience helper)."""
return [mid for mid, _ in OPENROUTER_MODELS]
def menu_labels() -> list[str]:
"""Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
labels = []
for mid, desc in OPENROUTER_MODELS:
labels.append(f"{mid} ({desc})" if desc else mid)
return labels
+3
View File
@@ -8,6 +8,9 @@ Usage:
hermes pairing clear-pending # Clear all expired/pending codes
"""
import sys
def pairing_command(args):
"""Handle hermes pairing subcommands."""
from gateway.pairing import PairingStore
-160
View File
@@ -1,160 +0,0 @@
"""Shared runtime provider resolution for CLI, gateway, cron, and helpers."""
from __future__ import annotations
import os
from typing import Any, Dict, Optional
from hermes_cli.auth import (
AuthError,
format_auth_error,
resolve_provider,
resolve_nous_runtime_credentials,
resolve_codex_runtime_credentials,
)
from hermes_cli.config import load_config
from hermes_constants import OPENROUTER_BASE_URL
def _get_model_config() -> Dict[str, Any]:
config = load_config()
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
return dict(model_cfg)
if isinstance(model_cfg, str) and model_cfg.strip():
return {"default": model_cfg.strip()}
return {}
def resolve_requested_provider(requested: Optional[str] = None) -> str:
"""Resolve provider request from explicit arg, env, then config."""
if requested and requested.strip():
return requested.strip().lower()
env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
if env_provider:
return env_provider
model_cfg = _get_model_config()
cfg_provider = model_cfg.get("provider")
if isinstance(cfg_provider, str) and cfg_provider.strip():
return cfg_provider.strip().lower()
return "auto"
def _resolve_openrouter_runtime(
*,
requested_provider: str,
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
) -> Dict[str, Any]:
model_cfg = _get_model_config()
cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""
cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""
requested_norm = (requested_provider or "").strip().lower()
cfg_provider = cfg_provider.strip().lower()
env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
use_config_base_url = False
if requested_norm == "auto":
if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
if not cfg_provider or cfg_provider == "auto":
use_config_base_url = True
base_url = (
(explicit_base_url or "").strip()
or env_openai_base_url
or (cfg_base_url.strip() if use_config_base_url else "")
or env_openrouter_base_url
or OPENROUTER_BASE_URL
).rstrip("/")
# Choose API key based on whether the resolved base_url targets OpenRouter.
# When hitting OpenRouter, prefer OPENROUTER_API_KEY (issue #289).
# When hitting a custom endpoint, prefer OPENAI_API_KEY so the OpenRouter
# key doesn't leak to an unrelated provider (issue #560).
_is_openrouter_url = "openrouter.ai" in base_url
if _is_openrouter_url:
api_key = (
explicit_api_key
or os.getenv("OPENROUTER_API_KEY")
or os.getenv("OPENAI_API_KEY")
or ""
)
else:
api_key = (
explicit_api_key
or os.getenv("OPENAI_API_KEY")
or os.getenv("OPENROUTER_API_KEY")
or ""
)
source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"
return {
"provider": "openrouter",
"api_mode": "chat_completions",
"base_url": base_url,
"api_key": api_key,
"source": source,
}
def resolve_runtime_provider(
*,
requested: Optional[str] = None,
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
) -> Dict[str, Any]:
"""Resolve runtime provider credentials for agent execution."""
requested_provider = resolve_requested_provider(requested)
provider = resolve_provider(
requested_provider,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
)
if provider == "nous":
creds = resolve_nous_runtime_credentials(
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
)
return {
"provider": "nous",
"api_mode": "chat_completions",
"base_url": creds.get("base_url", "").rstrip("/"),
"api_key": creds.get("api_key", ""),
"source": creds.get("source", "portal"),
"expires_at": creds.get("expires_at"),
"requested_provider": requested_provider,
}
if provider == "openai-codex":
creds = resolve_codex_runtime_credentials()
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": creds.get("base_url", "").rstrip("/"),
"api_key": creds.get("api_key", ""),
"source": creds.get("source", "hermes-auth-store"),
"last_refresh": creds.get("last_refresh"),
"requested_provider": requested_provider,
}
runtime = _resolve_openrouter_runtime(
requested_provider=requested_provider,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
)
runtime["requested_provider"] = requested_provider
return runtime
def format_runtime_provider_error(error: Exception) -> str:
if isinstance(error, AuthError):
return format_auth_error(error)
return str(error)

Some files were not shown because too many files have changed in this diff Show More