Compare commits

..

1 Commits

Author SHA1 Message Date
kshitijk4poor 25d9fc8094 fix(flush_memories): always deduct headroom + resolve flush aux model + trim defence
Three fixes for flush_memories / compression context window overflow:

1. ALWAYS deduct headroom before comparing aux_context vs threshold.
   #15631 only deducted inside 'if aux_context < threshold' — which
   never fires in the common same-model case (threshold = context × 0.50
   means aux_context > threshold always). Now headroom is computed
   unconditionally and effective_limit = aux_context - headroom is
   compared against threshold.

2. Also resolve flush_memories auxiliary model in the feasibility check.
   If the user configures separate auxiliary.flush_memories provider,
   the flush model's smaller context was unchecked.

3. Defence-in-depth trimming in flush_memories() for CLI /new and
   gateway resets that bypass preflight compression entirely.
2026-04-25 19:53:54 +05:30
100 changed files with 2745 additions and 15883 deletions
+3 -3
View File
@@ -1680,9 +1680,9 @@ def build_anthropic_kwargs(
# ── Strip sampling params on 4.7+ ─────────────────────────────────
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
# Callers (auxiliary_client, etc.) may set these for older models;
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
# don't require coordinated edits everywhere.
# Callers (auxiliary_client, flush_memories, etc.) may set these for
# older models; drop them here as a safety net so upstream 4.6 → 4.7
# migrations don't require coordinated edits everywhere.
if _forbids_sampling_params(model):
for _sampling_key in ("temperature", "top_p", "top_k"):
kwargs.pop(_sampling_key, None)
+4 -4
View File
@@ -390,7 +390,7 @@ class _CodexCompletionsAdapter:
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
# support max_output_tokens or temperature — omit to avoid 400 errors.
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
# Tools support for flush_memories and similar callers
tools = kwargs.get("tools")
if tools:
converted = []
@@ -2803,8 +2803,8 @@ def _build_call_kwargs(
temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
# drop here so auxiliary callers that hardcode temperature (e.g. 0 on
# structured-JSON extraction) don't 400 the moment
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
# the aux model is flipped to 4.7.
if temperature is not None:
from agent.anthropic_adapter import _forbids_sampling_params
@@ -2892,7 +2892,7 @@ def call_llm(
Args:
task: Auxiliary task name ("compression", "vision", "web_extract",
"session_search", "skills_hub", "mcp", "title_generation").
"session_search", "skills_hub", "mcp", "flush_memories").
Reads provider:model from config/env. Ignored if provider is set.
provider: Explicit provider override.
model: Explicit model override.
+10 -23
View File
@@ -44,31 +44,22 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
# Multimodal content helpers
# ---------------------------------------------------------------------------
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
"""Convert chat-style multimodal content to Responses API input parts.
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
The ``role`` parameter controls the text content type:
- ``"user"`` (default) → ``"input_text"``
- ``"assistant"`` → ``"output_text"``
The Responses API rejects ``input_text`` inside assistant messages and
``output_text`` inside user messages, so callers MUST pass the correct
role for the message being converted.
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
Returns an empty list when ``content`` is not a list or contains no
recognized parts — callers fall back to the string path.
"""
text_type = "output_text" if role == "assistant" else "input_text"
if not isinstance(content, list):
return []
converted: List[Dict[str, Any]] = []
for part in content:
if isinstance(part, str):
if part:
converted.append({"type": text_type, "text": part})
converted.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
continue
@@ -76,7 +67,7 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
converted.append({"type": text_type, "text": text})
converted.append({"type": "input_text", "text": text})
continue
if ptype in {"image_url", "input_image"}:
image_ref = part.get("image_url")
@@ -242,10 +233,9 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
if role in {"user", "assistant"}:
content = msg.get("content", "")
if isinstance(content, list):
content_parts = _chat_content_to_responses_parts(content, role=role)
text_type = "output_text" if role == "assistant" else "input_text"
content_parts = _chat_content_to_responses_parts(content)
content_text = "".join(
p.get("text", "") for p in content_parts if p.get("type") == text_type
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
)
else:
content_parts = []
@@ -439,16 +429,13 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
content = ""
if isinstance(content, list):
# Multimodal content from ``_chat_messages_to_responses_input``
# is already in Responses format (``input_text`` / ``output_text``
# / ``input_image``). Validate each part and pass through.
# Use the correct text type for the role — ``output_text`` for
# assistant messages, ``input_text`` for user messages.
text_type = "output_text" if role == "assistant" else "input_text"
# is already in Responses format (``input_text`` / ``input_image``).
# Validate each part and pass through.
validated: List[Dict[str, Any]] = []
for part_idx, part in enumerate(content):
if isinstance(part, str):
if part:
validated.append({"type": text_type, "text": part})
validated.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
raise ValueError(
@@ -459,7 +446,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
text = part.get("text", "")
if not isinstance(text, str):
text = str(text or "")
validated.append({"type": text_type, "text": text})
validated.append({"type": "input_text", "text": text})
elif ptype in {"input_image", "image_url"}:
image_ref = part.get("image_url", "")
detail = part.get("detail")
-58
View File
@@ -1,58 +0,0 @@
# Hermes Apps
Platform apps live here. The first app is a cross-platform GUI shell around the
existing Hermes dashboard; it should not fork chat, config, logs, or session UI.
## Shape
```text
apps/
gui/ # cross-platform app shell: dev Chrome shell now, Tauri native next
shared/ # runtime bundle notes/scripts used by Windows + macOS packaging
```
## Desktop Dev
The backend-only GUI mode is:
```bash
hermes dashboard --gui
```
The fast GUI shell is:
```powershell
cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
npm run dev
```
The native Tauri shell is:
```powershell
cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
npm run dev:tauri
```
`--gui` implies the embedded TUI; do not pass `--tui` separately for GUI mode.
## MVP Boundary
Included:
- bundled Python runtime
- bundled Node/TUI runtime
- CLI install to PATH
- profile picker and first-run setup
- dashboard health/reconnect state
- tray controls
- desktop notifications
- Windows installer
Deferred:
- code signing
- native self-updater
- store distribution
For MVP updates, the desktop UI should run the existing `hermes update` flow and
surface progress/finish notifications.
-102
View File
@@ -1,102 +0,0 @@
# Hermes GUI
Cross-platform GUI shell for the Hermes dashboard.
## Fast Dev Shell
This gets a GUI window on Windows/WSL today by launching Chrome in app mode:
```bash
cd apps/gui
npm run dev
```
It starts `hermes dashboard --gui --no-open --port 9120`, waits for
`/api/health`, then opens a standalone app window at `http://127.0.0.1:9120`.
## Native Shell
The native Tauri shell is still scaffolded:
```bash
cd apps/gui
npm run dev:tauri
```
From Windows PowerShell on a `\\wsl$` path, use PowerShell `npm`, not
`npm.cmd`:
```powershell
Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
npm run dev:tauri
```
`npm.cmd` goes through `cmd.exe`, and `cmd.exe` cannot use UNC paths as the
current directory.
If `npm run` still falls through `cmd.exe`, bypass npm entirely:
```powershell
\\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1
```
The launcher builds into `%LOCALAPPDATA%\Hermes\cargo-target\gui` instead of
`\\wsl$` because Windows Cargo incremental locks do not work reliably on UNC
WSL filesystems.
In dev, either start Hermes yourself:
```bash
hermes dashboard --gui --no-open --port 9120
```
or let the native shell start it. The tray menu owns:
- Open Hermes
- Open in Browser
- Restart Hermes Runtime
- Quit Hermes
The native shell reuses a healthy GUI runtime when one is already running.
Otherwise it picks the first free port from `9120..9139`, passes that port into
the WSL/backend process, and navigates the Tauri window there. Set
`HERMES_GUI_PORT` to force a starting port.
## Fresh Install Emulation
Use an isolated Hermes home without touching your real `~/.hermes`:
```powershell
powershell.exe -ExecutionPolicy Bypass -File \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1 -Fresh
```
Reset that disposable home and run again:
```powershell
powershell.exe -ExecutionPolicy Bypass -File \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1 -Fresh -ResetFresh
```
Fresh mode stores state in `%LOCALAPPDATA%\Hermes\fresh-install-home` and starts
from port `9140` so it does not collide with your normal GUI dev session.
Set `HERMES_GUI_MIN_SPLASH_MS` only when debugging the startup screen; default
startup is instant once the backend is healthy.
## Boundary
GUI owns:
- app shell/window
- startup state
- sidecar process lifecycle
- future tray/notifications/installers
Hermes owns:
- dashboard UI
- auth/session token
- profiles/config/env
- TUI/PTT chat bridge
- tools/skills/gateway
- update flow
-57
View File
@@ -1,57 +0,0 @@
param(
[string]$Command = "dev",
[switch]$Fresh,
[switch]$ResetFresh
)
$ErrorActionPreference = "Stop"
Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
$AppRoot = Split-Path -Parent $MyInvocation.MyCommand.Path
$Script = Join-Path $AppRoot "scripts\tauri.mjs"
if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
throw "Windows Node.js was not found. Install it with: winget install OpenJS.NodeJS.LTS"
}
if (-not (Get-Command rustc -ErrorAction SilentlyContinue)) {
throw "Windows Rust was not found. Install it with: winget install Rustlang.Rustup"
}
$Tauri = Get-Command tauri -ErrorAction SilentlyContinue
$CargoTauri = Get-Command cargo-tauri -ErrorAction SilentlyContinue
if (-not $Tauri -and -not $CargoTauri) {
throw "Tauri CLI not found. Install it with: npm install -g @tauri-apps/cli (run from a normal Windows path, not \\wsl$)"
}
$env:CARGO_INCREMENTAL = "0"
$env:CARGO_TARGET_DIR = Join-Path $env:LOCALAPPDATA "Hermes\cargo-target\gui"
New-Item -ItemType Directory -Force -Path $env:CARGO_TARGET_DIR | Out-Null
if ($Fresh) {
$FreshHome = Join-Path $env:LOCALAPPDATA "Hermes\fresh-install-home"
if ($ResetFresh -and (Test-Path $FreshHome)) {
Remove-Item -Recurse -Force $FreshHome
}
New-Item -ItemType Directory -Force -Path $FreshHome | Out-Null
$env:HERMES_HOME = $FreshHome
$env:HERMES_GUI_PORT = "9140"
$env:HERMES_GUI_FRESH = "1"
Write-Host "Fresh GUI mode"
Write-Host " HERMES_HOME=$FreshHome"
Write-Host " HERMES_GUI_PORT=$env:HERMES_GUI_PORT"
}
Push-Location $AppRoot
try {
if ($Tauri) {
& tauri $Command
}
else {
& cargo tauri $Command
}
}
finally {
Pop-Location
}
-13
View File
@@ -1,13 +0,0 @@
{
"name": "@hermes/gui",
"version": "0.0.0",
"private": true,
"type": "module",
"scripts": {
"dev": "node scripts/dev-shell.mjs",
"dev:tauri": "node scripts/tauri.mjs dev",
"build": "node scripts/tauri.mjs build",
"dashboard": "node scripts/start-dashboard.mjs",
"tauri": "node scripts/tauri.mjs"
}
}
-156
View File
@@ -1,156 +0,0 @@
import { spawn, spawnSync } from "node:child_process";
import { createServer } from "node:net";
import { dirname, resolve } from "node:path";
import { setTimeout as delay } from "node:timers/promises";
import { fileURLToPath } from "node:url";
const here = dirname(fileURLToPath(import.meta.url));
const repoRoot = resolve(here, "../../..");
const python = process.env.HERMES_PYTHON || "python";
let port = process.env.HERMES_GUI_PORT || "9120";
let url = `http://127.0.0.1:${port}`;
let dashboard = null;
function stop() {
if (dashboard && !dashboard.killed) dashboard.kill();
}
process.on("SIGINT", () => {
stop();
process.exit(130);
});
process.on("SIGTERM", () => {
stop();
process.exit(143);
});
process.on("exit", stop);
async function waitForHealth() {
for (let i = 0; i < 120; i += 1) {
if (await isHealthy()) return true;
await delay(500);
}
return false;
}
async function isHealthy() {
try {
const res = await fetch(`${url}/api/health`, {
signal: AbortSignal.timeout(1000),
});
const data = await res.json();
return res.ok && data.status === "ok";
} catch {
return false;
}
}
function canBind(candidate) {
return new Promise((resolveBind) => {
const server = createServer();
server.once("error", () => resolveBind(false));
server.listen(Number(candidate), "127.0.0.1", () => {
server.close(() => resolveBind(true));
});
});
}
async function choosePort() {
if (process.env.HERMES_GUI_PORT) return;
let candidate = Number(port);
for (let i = 0; i < 20; i += 1) {
if (await canBind(candidate)) {
port = String(candidate);
url = `http://127.0.0.1:${port}`;
return;
}
candidate += 1;
}
}
function startDashboard() {
dashboard = spawn(
python,
[
"-m",
"hermes_cli.main",
"dashboard",
"--gui",
"--no-open",
"--host",
"127.0.0.1",
"--port",
port,
],
{
cwd: repoRoot,
env: {
...process.env,
HERMES_GUI: "1",
},
stdio: "inherit",
},
);
dashboard.on("exit", (code) => {
process.exit(code ?? 0);
});
}
function run(command, args) {
return (
spawnSync(command, args, {
shell: process.platform === "win32",
stdio: "ignore",
}).status === 0
);
}
function openGuiWindow() {
if (process.platform === "win32") {
return (
run("cmd.exe", ["/C", "start", "", "chrome", `--app=${url}`]) ||
run("cmd.exe", ["/C", "start", "", "msedge", `--app=${url}`]) ||
run("cmd.exe", ["/C", "start", "", url])
);
}
if (process.env.WSL_DISTRO_NAME) {
return (
run("cmd.exe", ["/C", "start", "", "chrome", `--app=${url}`]) ||
run("cmd.exe", ["/C", "start", "", "msedge", `--app=${url}`]) ||
run("cmd.exe", ["/C", "start", "", url])
);
}
if (process.platform === "darwin") {
return (
run("open", ["-na", "Google Chrome", "--args", `--app=${url}`]) ||
run("open", [url])
);
}
return (
run("google-chrome", [`--app=${url}`]) ||
run("chromium", [`--app=${url}`]) ||
run("xdg-open", [url])
);
}
if (await isHealthy()) {
console.log(`Hermes GUI already running -> ${url}`);
openGuiWindow();
process.exit(0);
}
await choosePort();
startDashboard();
if (await waitForHealth()) {
console.log(`Hermes GUI -> ${url}`);
openGuiWindow();
} else {
console.error(`Hermes GUI did not become healthy at ${url}`);
}
-95
View File
@@ -1,95 +0,0 @@
import { spawn } from "node:child_process";
import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";
const here = dirname(fileURLToPath(import.meta.url));
const repoRoot = resolve(here, "../../..");
const python = process.env.HERMES_PYTHON || "python";
const port = process.env.HERMES_GUI_PORT || "9120";
const url = `http://127.0.0.1:${port}`;
async function isHealthy() {
try {
const res = await fetch(`${url}/api/health`, {
signal: AbortSignal.timeout(1000),
});
const data = await res.json();
return res.ok && data.status === "ok";
} catch {
return false;
}
}
function wslRepoRoot() {
const normalized = repoRoot.replaceAll("\\", "/");
const parts = normalized.split("/");
const host = parts[2]?.toLowerCase();
if (process.platform !== "win32") return null;
if (host !== "wsl$" && host !== "wsl.localhost") return null;
const distro = parts[3];
const path = `/${parts.slice(4).join("/")}`;
return distro && path !== "/" ? { distro, path } : null;
}
function spawnDashboard() {
const wsl = wslRepoRoot();
if (wsl) {
return spawn(
"wsl.exe",
[
"-d",
wsl.distro,
"--cd",
wsl.path,
"env",
"HERMES_GUI=1",
process.env.HERMES_WSL_PYTHON || "python",
"-m",
"hermes_cli.main",
"dashboard",
"--gui",
"--no-open",
"--host",
"127.0.0.1",
"--port",
port,
],
{ stdio: "inherit" },
);
}
return spawn(
python,
[
"-m",
"hermes_cli.main",
"dashboard",
"--gui",
"--no-open",
"--host",
"127.0.0.1",
"--port",
port,
],
{
cwd: repoRoot,
env: {
...process.env,
HERMES_GUI: "1",
},
stdio: "inherit",
},
);
}
if (await isHealthy()) {
console.log(`Hermes GUI already running -> ${url}`);
process.exit(0);
}
const child = spawnDashboard();
child.on("exit", (code, signal) => {
if (signal) process.kill(process.pid, signal);
process.exit(code ?? 0);
});
-90
View File
@@ -1,90 +0,0 @@
import { spawnSync } from "node:child_process";
import { existsSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";
const here = dirname(fileURLToPath(import.meta.url));
const appRoot = resolve(here, "..");
const bin = process.platform === "win32" ? "tauri.cmd" : "tauri";
const localTauri = resolve(appRoot, "node_modules", ".bin", bin);
const args = process.argv.slice(2);
function isWsl() {
return process.platform === "linux" && !!process.env.WSL_DISTRO_NAME;
}
function quotePs(value) {
return `'${value.replaceAll("'", "''")}'`;
}
function dispatchToWindows() {
const pathResult = spawnSync("wslpath", ["-w", appRoot], {
encoding: "utf8",
});
const windowsPath = pathResult.stdout.trim();
if (!windowsPath) return false;
const command = [
"$ErrorActionPreference = 'Stop'",
"Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force",
"if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {",
' Write-Error "Windows npm was not found. Install Windows Node.js first: winget install OpenJS.NodeJS.LTS"',
"}",
"if (-not (Get-Command rustc -ErrorAction SilentlyContinue)) {",
' Write-Error "Windows Rust was not found. Install Rust first: winget install Rustlang.Rustup"',
"}",
`Set-Location -LiteralPath ${quotePs(windowsPath)}`,
"& npm run dev:tauri",
].join("; ");
const result = spawnSync(
"powershell.exe",
["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command],
{ stdio: "inherit" },
);
process.exit(result.status ?? 1);
}
function run(command, commandArgs, { exit = true } = {}) {
if (process.platform === "win32") {
const psCommand = [
"$ErrorActionPreference = 'Stop'",
"Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force",
`Set-Location -LiteralPath ${quotePs(appRoot)}`,
`& ${quotePs(command)} ${commandArgs.map(quotePs).join(" ")}`,
].join("; ");
const result = spawnSync(
"powershell.exe",
["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", psCommand],
{ stdio: "inherit" },
);
if (result.error && result.error.code === "ENOENT") return false;
if (exit) process.exit(result.status ?? 1);
return result.status === 0;
}
const result = spawnSync(command, commandArgs, {
cwd: appRoot,
env: process.env,
stdio: "inherit",
});
if (result.error && result.error.code === "ENOENT") return false;
if (exit) process.exit(result.status ?? 1);
return result.status === 0;
}
if (isWsl() && process.env.HERMES_GUI_TAURI_WSL !== "1") {
console.log("Launching native Windows Tauri from WSL...");
dispatchToWindows();
console.error(
"Could not hand off to Windows PowerShell. Run this from Windows PowerShell instead:",
);
console.error(" cd \\\\wsl$\\Ubuntu\\home\\bb\\hermes-agent\\apps\\gui");
console.error(" npm run dev:tauri");
process.exit(1);
}
if (existsSync(localTauri)) run(localTauri, args);
if (run("tauri", args, { exit: false })) process.exit(0);
if (run("cargo", ["tauri", ...args], { exit: false })) process.exit(0);
run("npx", ["--yes", "@tauri-apps/cli@latest", ...args]);
-1
View File
@@ -1 +0,0 @@
/target/
-5579
View File
File diff suppressed because it is too large Load Diff
-17
View File
@@ -1,17 +0,0 @@
[package]
name = "hermes-gui"
version = "0.0.0"
description = "Hermes GUI shell"
edition = "2021"
[lib]
name = "hermes_gui_lib"
crate-type = ["staticlib", "cdylib", "rlib"]
[build-dependencies]
tauri-build = { version = "2", features = [] }
[dependencies]
tauri = { version = "2", features = ["tray-icon"] }
tauri-plugin-notification = "2"
tauri-plugin-opener = "2"
-3
View File
@@ -1,3 +0,0 @@
fn main() {
tauri_build::build();
}
@@ -1,7 +0,0 @@
{
"$schema": "../gen/schemas/desktop-schema.json",
"identifier": "default",
"description": "Default Hermes GUI permissions",
"windows": ["main"],
"permissions": ["core:default", "notification:default", "opener:default"]
}
File diff suppressed because one or more lines are too long
@@ -1 +0,0 @@
{"default":{"identifier":"default","description":"Default Hermes GUI permissions","local":true,"windows":["main"],"permissions":["core:default","notification:default","opener:default"]}}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
Binary file not shown.

Before

Width:  |  Height:  |  Size: 135 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

-4
View File
@@ -1,4 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
<rect width="100" height="100" rx="18" fill="#071313"/>
<text x="50" y="70" text-anchor="middle" font-size="68" fill="#f0e6d2"></text>
</svg>

Before

Width:  |  Height:  |  Size: 212 B

-1
View File
@@ -1 +0,0 @@
-433
View File
@@ -1,433 +0,0 @@
use std::{
io::{Read, Write},
net::{TcpListener, TcpStream},
process::{Child, Command, Stdio},
sync::Mutex,
time::{Duration, Instant},
};
use tauri::{
image::Image,
menu::{Menu, MenuItem, PredefinedMenuItem},
tray::{MouseButton, MouseButtonState, TrayIconBuilder, TrayIconEvent},
App, AppHandle, Manager, WebviewWindow,
};
const GUI_HOST: &str = "127.0.0.1";
const DEFAULT_GUI_PORT: u16 = 9120;
const MIN_SPLASH_MS: u64 = 0;
const SPLASH_URL: &str = "data:text/html,%3C!doctype%20html%3E%3Cmeta%20charset%3Dutf-8%3E%3Cstyle%3Ebody%7Bmargin%3A0%3Bheight%3A100vh%3Bdisplay%3Agrid%3Bplace-items%3Acenter%3Bbackground%3A%23071313%3Bcolor%3A%23f0e6d2%3Bfont%3A14px%20monospace%3Bletter-spacing%3A.08em%3Btext-transform%3Auppercase%7D%3C%2Fstyle%3E%3Cbody%3EStarting%20Hermes%E2%80%A6%3C%2Fbody%3E";
struct GuiState {
child: Mutex<Option<Child>>,
port: Mutex<u16>,
}
fn gui_url(port: u16) -> String {
format!("http://{GUI_HOST}:{port}")
}
fn check_health(port: u16) -> bool {
let Ok(mut stream) = TcpStream::connect_timeout(
&format!("{GUI_HOST}:{port}").parse().unwrap(),
Duration::from_secs(1),
) else {
return false;
};
let _ = stream.set_read_timeout(Some(Duration::from_secs(1)));
let request =
format!("GET /api/health HTTP/1.1\r\nHost: {GUI_HOST}:{port}\r\nConnection: close\r\n\r\n");
if stream.write_all(request.as_bytes()).is_err() {
return false;
}
let mut response = String::new();
let _ = stream.read_to_string(&mut response);
response.contains("200 OK")
&& response.contains("\"status\":\"ok\"")
&& response.contains("\"mode\":\"gui\"")
}
fn can_bind(port: u16) -> bool {
TcpListener::bind((GUI_HOST, port)).is_ok()
}
fn base_port() -> u16 {
std::env::var("HERMES_GUI_PORT")
.ok()
.and_then(|raw| raw.parse().ok())
.unwrap_or(DEFAULT_GUI_PORT)
}
fn select_port() -> u16 {
let start = base_port();
for port in start..start.saturating_add(20) {
if check_health(port) || can_bind(port) {
return port;
}
}
start
}
fn repo_root() -> std::path::PathBuf {
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../../..")
.canonicalize()
.unwrap_or_else(|_| std::path::PathBuf::from("."))
}
fn runtime_dir() -> Option<std::path::PathBuf> {
std::env::var_os("HERMES_GUI_RUNTIME_DIR").map(std::path::PathBuf::from)
}
fn runtime_python(runtime: &std::path::Path) -> std::path::PathBuf {
if cfg!(target_os = "windows") {
runtime.join("venv").join("Scripts").join("python.exe")
} else {
runtime.join("venv").join("bin").join("python")
}
}
fn wsl_path(root: &std::path::Path) -> Option<(String, String)> {
let raw = root.to_string_lossy().replace('\\', "/");
let parts: Vec<&str> = raw.split('/').collect();
let host = parts.get(2)?.to_ascii_lowercase();
if host != "wsl$" && host != "wsl.localhost" {
return None;
}
let distro = parts.get(3)?.to_string();
let path = format!("/{}", parts.get(4..)?.join("/"));
Some((distro, path))
}
fn start_dashboard(port: u16) -> std::io::Result<Child> {
if let Some(runtime) = runtime_dir() {
let python = runtime_python(&runtime);
let web_dist = runtime.join("web_dist");
let tui_dir = runtime.join("ui-tui");
let port = port.to_string();
return Command::new(python)
.args([
"-m",
"hermes_cli.main",
"dashboard",
"--gui",
"--no-open",
"--host",
GUI_HOST,
"--port",
&port,
])
.env("HERMES_GUI", "1")
.env("HERMES_GUI_PORT", &port)
.env("HERMES_WEB_DIST", web_dist)
.env("HERMES_TUI_DIR", tui_dir)
.envs(
std::env::vars()
.filter(|(key, _)| matches!(key.as_str(), "HERMES_HOME" | "HERMES_GUI_FRESH")),
)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn();
}
let root = repo_root();
let port = port.to_string();
if let Some((distro, path)) = wsl_path(&root) {
let port_env = format!("HERMES_GUI_PORT={port}");
let mut env_args = vec!["HERMES_GUI=1".to_string(), port_env];
if let Ok(home) = std::env::var("HERMES_HOME") {
env_args.push(format!("HERMES_HOME={home}"));
}
if let Ok(fresh) = std::env::var("HERMES_GUI_FRESH") {
env_args.push(format!("HERMES_GUI_FRESH={fresh}"));
}
let mut args = vec![
"-d".to_string(),
distro,
"--cd".to_string(),
path,
"env".to_string(),
];
args.extend(env_args);
args.extend([
"python".to_string(),
"-m".to_string(),
"hermes_cli.main".to_string(),
"dashboard".to_string(),
"--gui".to_string(),
"--no-open".to_string(),
"--host".to_string(),
GUI_HOST.to_string(),
"--port".to_string(),
port.clone(),
]);
return Command::new("wsl.exe")
.args(args)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn();
}
Command::new("python")
.args([
"-m",
"hermes_cli.main",
"dashboard",
"--gui",
"--no-open",
"--host",
GUI_HOST,
"--port",
&port,
])
.current_dir(root)
.env("HERMES_GUI", "1")
.env("HERMES_GUI_PORT", &port)
.envs(
std::env::vars()
.filter(|(key, _)| matches!(key.as_str(), "HERMES_HOME" | "HERMES_GUI_FRESH")),
)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
}
fn stop_owned_dashboard(state: &GuiState) {
let Some(mut child) = state.child.lock().expect("gui child lock poisoned").take() else {
return;
};
let _ = child.kill();
let _ = child.wait();
}
fn current_port(state: &GuiState) -> u16 {
*state.port.lock().expect("gui port lock poisoned")
}
fn ensure_dashboard(state: &GuiState) -> Result<(), String> {
let current = current_port(state);
if check_health(current) {
return Ok(());
}
let port = select_port();
*state.port.lock().expect("gui port lock poisoned") = port;
if check_health(port) {
return Ok(());
}
let child = start_dashboard(port).map_err(|err| {
format!(
"Could not auto-start Hermes dashboard ({err}). Start it manually with: hermes dashboard --gui --no-open --port {port}"
)
})?;
*state.child.lock().expect("gui child lock poisoned") = Some(child);
Ok(())
}
fn navigate_when_ready(window: WebviewWindow, port: u16) {
std::thread::spawn(move || {
let started = Instant::now();
while started.elapsed() < Duration::from_secs(60) {
if check_health(port) {
let min_splash = std::env::var("HERMES_GUI_MIN_SPLASH_MS")
.ok()
.and_then(|raw| raw.parse::<u64>().ok())
.unwrap_or(MIN_SPLASH_MS);
let elapsed = started.elapsed();
if elapsed < Duration::from_millis(min_splash) {
std::thread::sleep(Duration::from_millis(min_splash) - elapsed);
}
if let Ok(url) = tauri::Url::parse(&gui_url(port)) {
let _ = window.navigate(url);
let _ = window.show();
let _ = window.set_focus();
}
return;
}
std::thread::sleep(Duration::from_millis(500));
}
});
}
fn show_main_window(app: &AppHandle) {
if let Some(window) = app.get_webview_window("main") {
let _ = window.show();
let _ = window.set_focus();
}
}
fn open_browser(port: u16) {
let url = gui_url(port);
#[cfg(target_os = "windows")]
let _ = Command::new("cmd")
.args(["/C", "start", "", &url])
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn();
#[cfg(target_os = "macos")]
let _ = Command::new("open").arg(&url).spawn();
#[cfg(all(unix, not(target_os = "macos")))]
let _ = Command::new("xdg-open").arg(&url).spawn();
}
fn tray_icon() -> Image<'static> {
let width = 32;
let height = 32;
let mut rgba = Vec::with_capacity(width * height * 4);
for y in 0..height {
for x in 0..width {
let mark = (14..=17).contains(&x) && (5..=26).contains(&y)
|| (8..=23).contains(&x) && (13..=16).contains(&y)
|| (10..=21).contains(&x) && (y == 5 || y == 26);
if mark {
rgba.extend_from_slice(&[0xF0, 0xE6, 0xD2, 0xFF]);
} else {
rgba.extend_from_slice(&[0x07, 0x13, 0x13, 0xFF]);
}
}
}
Image::new_owned(rgba, width as u32, height as u32)
}
fn restart_runtime(app: &AppHandle) -> Result<(), String> {
let state = app.state::<GuiState>();
stop_owned_dashboard(&state);
ensure_dashboard(&state)?;
if let Some(window) = app.get_webview_window("main") {
if let Ok(url) = tauri::Url::parse(SPLASH_URL) {
let _ = window.navigate(url);
}
let port = current_port(&state);
navigate_when_ready(window, port);
}
Ok(())
}
fn setup_tray(app: &App) -> tauri::Result<()> {
let open_item = MenuItem::with_id(app, "open", "Open Hermes", true, None::<&str>)?;
let browser_item = MenuItem::with_id(app, "browser", "Open in Browser", true, None::<&str>)?;
let restart_item =
MenuItem::with_id(app, "restart", "Restart Hermes Runtime", true, None::<&str>)?;
let status_item = MenuItem::with_id(app, "status", "Local runtime", false, None::<&str>)?;
let separator = PredefinedMenuItem::separator(app)?;
let separator2 = PredefinedMenuItem::separator(app)?;
let quit_item = MenuItem::with_id(app, "quit", "Quit Hermes", true, None::<&str>)?;
let menu = Menu::with_items(
app,
&[
&open_item,
&browser_item,
&restart_item,
&separator,
&status_item,
&separator2,
&quit_item,
],
)?;
let icon = tray_icon();
let _tray = TrayIconBuilder::new()
.icon(icon)
.menu(&menu)
.tooltip("Hermes")
.on_menu_event(|app, event| match event.id.as_ref() {
"open" => show_main_window(app),
"browser" => {
let state = app.state::<GuiState>();
open_browser(current_port(&state));
}
"restart" => {
if let Err(err) = restart_runtime(app) {
eprintln!("Failed to restart Hermes runtime: {err}");
}
}
"quit" => {
let state = app.state::<GuiState>();
stop_owned_dashboard(&state);
app.exit(0);
}
_ => {}
})
.on_tray_icon_event(|tray, event| {
if let TrayIconEvent::Click {
button: MouseButton::Left,
button_state: MouseButtonState::Up,
..
} = event
{
show_main_window(&tray.app_handle());
}
})
.build(app)?;
Ok(())
}
#[tauri::command]
fn runtime_running(app: AppHandle) -> bool {
let state = app.state::<GuiState>();
check_health(current_port(&state))
}
#[tauri::command]
fn restart_runtime_command(app: AppHandle) -> Result<(), String> {
restart_runtime(&app)
}
pub fn run() {
tauri::Builder::default()
.plugin(tauri_plugin_notification::init())
.plugin(tauri_plugin_opener::init())
.manage(GuiState {
child: Mutex::new(None),
port: Mutex::new(base_port()),
})
.invoke_handler(tauri::generate_handler![
runtime_running,
restart_runtime_command
])
.setup(|app| {
setup_tray(app)?;
if let Some(window) = app.get_webview_window("main") {
if let Ok(url) = tauri::Url::parse(SPLASH_URL) {
let _ = window.navigate(url);
}
let state = app.state::<GuiState>();
if let Err(err) = ensure_dashboard(&state) {
eprintln!("{err}");
}
let port = current_port(&state);
navigate_when_ready(window, port);
}
Ok(())
})
.on_window_event(|window, event| {
if let tauri::WindowEvent::CloseRequested { api, .. } = event {
api.prevent_close();
let _ = window.hide();
}
})
.run(tauri::generate_context!())
.expect("failed to run Hermes GUI");
}
-5
View File
@@ -1,5 +0,0 @@
#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
fn main() {
hermes_gui_lib::run();
}
-38
View File
@@ -1,38 +0,0 @@
{
"$schema": "https://schema.tauri.app/config/2",
"productName": "Hermes",
"version": "0.0.0",
"identifier": "ai.nous.hermes.gui",
"build": {
"beforeDevCommand": "",
"beforeBuildCommand": "",
"devUrl": "http://127.0.0.1:9120",
"frontendDist": "../dist"
},
"app": {
"withGlobalTauri": true,
"windows": [
{
"label": "main",
"title": "Hermes",
"width": 1400,
"height": 900,
"minWidth": 900,
"minHeight": 600,
"resizable": true,
"center": true
}
],
"security": {
"csp": "default-src 'self' http://127.0.0.1:* http://localhost:*; connect-src 'self' http://127.0.0.1:* http://localhost:* ws://127.0.0.1:* ws://localhost:*; img-src 'self' data: blob: http://127.0.0.1:* http://localhost:*; style-src 'self' 'unsafe-inline' http://127.0.0.1:* http://localhost:*; script-src 'self' 'unsafe-inline' 'unsafe-eval' http://127.0.0.1:* http://localhost:*"
}
},
"bundle": {
"active": true,
"icon": ["icons/32x32.png", "icons/icon.ico", "icons/icon.svg"],
"targets": ["nsis", "dmg", "app"],
"resources": {
"sidecars": "sidecars/"
}
}
}
-5
View File
@@ -1,5 +0,0 @@
// Browser-side GUI bridge entry.
//
// The dashboard remains in `web/`; this file is reserved for future shell-only
// glue if we need pre-navigation scripts or native event wiring.
export {};
-44
View File
@@ -1,44 +0,0 @@
param(
[string]$Out = "$PSScriptRoot\..\gui\src-tauri\sidecars\hermes-runtime",
[string]$Python = "python"
)
$Root = Resolve-Path "$PSScriptRoot\..\.."
Write-Host "Bundling Hermes GUI runtime"
Write-Host "repo: $Root"
Write-Host "out: $Out"
if (Test-Path $Out) {
Remove-Item -Recurse -Force $Out
}
New-Item -ItemType Directory -Force -Path $Out | Out-Null
Write-Host "-> Building dashboard"
npm --prefix "$Root\web" ci
npm --prefix "$Root\web" run build
Copy-Item -Recurse "$Root\web\dist" "$Out\web_dist"
Write-Host "-> Building TUI"
npm --prefix "$Root\ui-tui" ci
npm --prefix "$Root\ui-tui" run build
New-Item -ItemType Directory -Force -Path "$Out\ui-tui" | Out-Null
Copy-Item -Recurse "$Root\ui-tui\dist" "$Out\ui-tui\dist"
Copy-Item "$Root\ui-tui\package.json" "$Out\ui-tui\package.json"
Copy-Item "$Root\ui-tui\package-lock.json" "$Out\ui-tui\package-lock.json"
Copy-Item -Recurse "$Root\ui-tui\node_modules" "$Out\ui-tui\node_modules"
Write-Host "-> Creating Python runtime"
& $Python -m venv "$Out\venv"
& "$Out\venv\Scripts\python.exe" -m pip install --upgrade pip
& "$Out\venv\Scripts\python.exe" -m pip install -e "$Root[web,pty]"
@"
# Hermes GUI Runtime
Generated by apps/shared/bundle-runtime.ps1.
Set HERMES_GUI_RUNTIME_DIR to this directory before launching the Tauri shell.
"@ | Set-Content "$Out\README.md"
Write-Host "Runtime bundle ready: $Out"
-41
View File
@@ -1,41 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
OUT="${1:-"$ROOT/apps/gui/src-tauri/sidecars/hermes-runtime"}"
PYTHON="${PYTHON:-python}"
echo "Bundling Hermes GUI runtime"
echo "repo: $ROOT"
echo "out: $OUT"
rm -rf "$OUT"
mkdir -p "$OUT"
echo "→ Building dashboard"
npm --prefix "$ROOT/web" ci
npm --prefix "$ROOT/web" run build
cp -a "$ROOT/web/dist" "$OUT/web_dist"
echo "→ Building TUI"
npm --prefix "$ROOT/ui-tui" ci
npm --prefix "$ROOT/ui-tui" run build
mkdir -p "$OUT/ui-tui"
cp -a "$ROOT/ui-tui/dist" "$OUT/ui-tui/dist"
cp -a "$ROOT/ui-tui/package.json" "$ROOT/ui-tui/package-lock.json" "$OUT/ui-tui/"
cp -a "$ROOT/ui-tui/node_modules" "$OUT/ui-tui/node_modules"
echo "→ Creating Python runtime"
"$PYTHON" -m venv "$OUT/venv"
"$OUT/venv/bin/python" -m pip install --upgrade pip
"$OUT/venv/bin/python" -m pip install -e "$ROOT[web,pty]"
cat > "$OUT/README.md" <<EOF
# Hermes GUI Runtime
Generated by apps/shared/bundle-runtime.sh.
Set HERMES_GUI_RUNTIME_DIR to this directory before launching the Tauri shell.
EOF
echo "✓ Runtime bundle ready: $OUT"
-33
View File
@@ -1,33 +0,0 @@
# GUI Runtime Contract
The GUI shell starts Hermes with a small, explicit environment.
## Environment
```text
HERMES_GUI=1
HERMES_WEB_DIST=<bundled web dist>
HERMES_TUI_DIR=<bundled ui-tui dir>
```
The native shell uses `127.0.0.1:9120` as its initial GUI port during dev.
Bundled builds should keep the port private to the local machine and expose it
through `/api/health` and `/api/runtime`.
The shell should also pass the selected profile through the normal Hermes CLI
profile mechanism once the profile picker is wired.
## Ports
Use `127.0.0.1` only. Start with the GUI default port, then fall back to a
free port if occupied. Show the chosen port in the tray menu.
## User Data
The installer owns app files. Hermes owns user state under `HERMES_HOME`.
Uninstallers must not delete user state unless the user explicitly asks.
## Update Model
MVP does not use Tauri's native updater. GUI runs `hermes update`, tails the
action log, notifies completion, then offers to restart the runtime.
+10
View File
@@ -4668,6 +4668,10 @@ class HermesCLI:
def new_session(self, silent=False):
"""Start a fresh session with a new session ID and cleared agent state."""
if self.agent and self.conversation_history:
try:
self.agent.flush_memories(self.conversation_history)
except (Exception, KeyboardInterrupt):
pass
# Trigger memory extraction on the old session before session_id rotates.
self.agent.commit_memory_session(self.conversation_history)
self._notify_session_boundary("on_session_finalize")
@@ -10784,6 +10788,12 @@ class HermesCLI:
self.agent.interrupt()
except Exception:
pass
# Flush memories before exit (only for substantial conversations)
if self.agent and self.conversation_history:
try:
self.agent.flush_memories(self.conversation_history)
except (Exception, KeyboardInterrupt):
pass
# Shut down voice recorder (release persistent audio stream)
if hasattr(self, '_voice_recorder') and self._voice_recorder:
try:
+208 -65
View File
@@ -524,7 +524,7 @@ def _load_gateway_config() -> dict:
def _resolve_gateway_model(config: dict | None = None) -> str:
"""Read model from config.yaml — single source of truth.
Without this, temporary AIAgent instances (e.g. /compress) fall
Without this, temporary AIAgent instances (memory flush, /compress) fall
back to the hardcoded default which fails when the active provider is
openai-codex.
"""
@@ -915,6 +915,129 @@ class GatewayRunner:
e,
)
# -----------------------------------------------------------------
def _flush_memories_for_session(
self,
old_session_id: str,
session_key: Optional[str] = None,
):
"""Prompt the agent to save memories/skills before context is lost.
Synchronous worker meant to be called via run_in_executor from
an async context so it doesn't block the event loop.
"""
# Skip cron sessions — they run headless with no meaningful user
# conversation to extract memories from.
if old_session_id and old_session_id.startswith("cron_"):
logger.debug("Skipping memory flush for cron session: %s", old_session_id)
return
try:
history = self.session_store.load_transcript(old_session_id)
if not history or len(history) < 4:
return
from run_agent import AIAgent
model, runtime_kwargs = self._resolve_session_agent_runtime(
session_key=session_key,
)
if not runtime_kwargs.get("api_key"):
return
tmp_agent = AIAgent(
**runtime_kwargs,
model=model,
max_iterations=8,
quiet_mode=True,
skip_memory=True, # Flush agent — no memory provider
enabled_toolsets=["memory", "skills"],
session_id=old_session_id,
)
try:
# Fully silence the flush agent — quiet_mode only suppresses init
# messages; tool call output still leaks to the terminal through
# _safe_print → _print_fn. Set a no-op to prevent that.
tmp_agent._print_fn = lambda *a, **kw: None
# Build conversation history from transcript
msgs = [
{"role": m.get("role"), "content": m.get("content")}
for m in history
if m.get("role") in ("user", "assistant") and m.get("content")
]
# Read live memory state from disk so the flush agent can see
# what's already saved and avoid overwriting newer entries.
_current_memory = ""
try:
from tools.memory_tool import get_memory_dir
_mem_dir = get_memory_dir()
for fname, label in [
("MEMORY.md", "MEMORY (your personal notes)"),
("USER.md", "USER PROFILE (who the user is)"),
]:
fpath = _mem_dir / fname
if fpath.exists():
content = fpath.read_text(encoding="utf-8").strip()
if content:
_current_memory += f"\n\n## Current {label}:\n{content}"
except Exception:
pass # Non-fatal — flush still works, just without the guard
# Give the agent a real turn to think about what to save
flush_prompt = (
"[System: This session is about to be automatically reset due to "
"inactivity or a scheduled daily reset. The conversation context "
"will be cleared after this turn.\n\n"
"Review the conversation above and:\n"
"1. Save any important facts, preferences, or decisions to memory "
"(user profile or your notes) that would be useful in future sessions.\n"
"2. If you discovered a reusable workflow or solved a non-trivial "
"problem, consider saving it as a skill.\n"
"3. If nothing is worth saving, that's fine — just skip.\n\n"
)
if _current_memory:
flush_prompt += (
"IMPORTANT — here is the current live state of memory. Other "
"sessions, cron jobs, or the user may have updated it since this "
"conversation ended. Do NOT overwrite or remove entries unless "
"the conversation above reveals something that genuinely "
"supersedes them. Only add new information that is not already "
"captured below."
f"{_current_memory}\n\n"
)
flush_prompt += (
"Do NOT respond to the user. Just use the memory and skill_manage "
"tools if needed, then stop.]"
)
tmp_agent.run_conversation(
user_message=flush_prompt,
conversation_history=msgs,
)
finally:
self._cleanup_agent_resources(tmp_agent)
logger.info("Pre-reset memory flush completed for session %s", old_session_id)
except Exception as e:
logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
async def _async_flush_memories(
self,
old_session_id: str,
session_key: Optional[str] = None,
):
"""Run the sync memory flush in a thread pool so it won't block the event loop."""
loop = asyncio.get_running_loop()
await loop.run_in_executor(
None,
self._flush_memories_for_session,
old_session_id,
session_key,
)
@property
def should_exit_cleanly(self) -> bool:
return self._exit_cleanly
@@ -980,7 +1103,7 @@ class GatewayRunner:
if override_runtime.get("api_key"):
logger.debug(
"Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
resolved_session_key or "", model, override_model,
(resolved_session_key or "")[:30], model, override_model,
override_runtime.get("provider"),
)
return override_model, override_runtime
@@ -988,12 +1111,12 @@ class GatewayRunner:
# resolution and apply model/provider from the override on top.
logger.debug(
"Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
resolved_session_key or "", model, override_model,
(resolved_session_key or "")[:30], model, override_model,
)
else:
logger.debug(
"No session model override: session=%s config_model=%s override_keys=%s",
resolved_session_key or "", model,
(resolved_session_key or "")[:30], model,
list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
)
@@ -1564,7 +1687,7 @@ class GatewayRunner:
continue
try:
agent.interrupt(reason)
logger.debug("Interrupted running agent for session %s during shutdown", session_key)
logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
except Exception as e:
logger.debug("Failed interrupting agent during shutdown: %s", e)
@@ -1736,7 +1859,7 @@ class GatewayRunner:
logger.warning(
"Auto-suspended stuck session %s (active across %d "
"consecutive restarts — likely a stuck loop)",
session_key, counts[session_key],
session_key[:30], counts[session_key],
)
except Exception:
pass
@@ -2149,7 +2272,7 @@ class GatewayRunner:
except Exception as e:
logger.error("Recovered watcher setup error: %s", e)
# Start background session expiry watcher to finalize expired sessions
# Start background session expiry watcher for proactive memory flushing
asyncio.create_task(self._session_expiry_watcher())
# Start background reconnection watcher for platforms that failed at startup
@@ -2166,24 +2289,25 @@ class GatewayRunner:
return True
async def _session_expiry_watcher(self, interval: int = 300):
"""Background task that finalizes expired sessions.
"""Background task that proactively flushes memories for expired sessions.
Runs every `interval` seconds (default 5 min). For each session that
has expired according to its reset policy, flushes memories in a thread
pool and marks the session so it won't be flushed again.
Runs every ``interval`` seconds (default 5 min). For each session
whose reset policy has expired, invokes ``on_session_finalize``
hooks, cleans up the cached AIAgent's tool resources, evicts the
cache entry so it can be garbage-collected, and marks the session
so it won't be finalized again.
This means memories are already saved by the time the user sends their
next message, so there's no blocking delay.
"""
await asyncio.sleep(60) # initial delay — let the gateway fully start
_finalize_failures: dict[str, int] = {} # session_id -> consecutive failure count
_MAX_FINALIZE_RETRIES = 3
_flush_failures: dict[str, int] = {} # session_id -> consecutive failure count
_MAX_FLUSH_RETRIES = 3
while self._running:
try:
self.session_store._ensure_loaded()
# Collect expired sessions first, then log a single summary.
_expired_entries = []
for key, entry in list(self.session_store._entries.items()):
if entry.expiry_finalized:
if entry.memory_flushed:
continue
if not self.session_store._is_session_expired(entry):
continue
@@ -2201,12 +2325,13 @@ class GatewayRunner:
f"{p}:{c}" for p, c in sorted(_platforms.items())
)
logger.info(
"Session expiry: %d sessions to finalize (%s)",
"Session expiry: %d sessions to flush (%s)",
len(_expired_entries), _plat_summary,
)
for key, entry in _expired_entries:
try:
await self._async_flush_memories(entry.session_id, key)
try:
from hermes_cli.plugins import invoke_hook as _invoke_hook
_parts = key.split(":")
@@ -2238,48 +2363,48 @@ class GatewayRunner:
# be garbage-collected. Otherwise the cache grows
# unbounded across the gateway's lifetime.
self._evict_cached_agent(key)
# Mark as finalized and persist to disk so the flag
# Mark as flushed and persist to disk so the flag
# survives gateway restarts.
with self.session_store._lock:
entry.expiry_finalized = True
entry.memory_flushed = True
self.session_store._save()
logger.debug(
"Session expiry finalized for %s",
"Memory flush completed for session %s",
entry.session_id,
)
_finalize_failures.pop(entry.session_id, None)
_flush_failures.pop(entry.session_id, None)
except Exception as e:
failures = _finalize_failures.get(entry.session_id, 0) + 1
_finalize_failures[entry.session_id] = failures
if failures >= _MAX_FINALIZE_RETRIES:
failures = _flush_failures.get(entry.session_id, 0) + 1
_flush_failures[entry.session_id] = failures
if failures >= _MAX_FLUSH_RETRIES:
logger.warning(
"Session finalize gave up after %d attempts for %s: %s. "
"Marking as finalized to prevent infinite retry loop.",
"Memory flush gave up after %d attempts for %s: %s. "
"Marking as flushed to prevent infinite retry loop.",
failures, entry.session_id, e,
)
with self.session_store._lock:
entry.expiry_finalized = True
entry.memory_flushed = True
self.session_store._save()
_finalize_failures.pop(entry.session_id, None)
_flush_failures.pop(entry.session_id, None)
else:
logger.debug(
"Session finalize failed (%d/%d) for %s: %s",
failures, _MAX_FINALIZE_RETRIES, entry.session_id, e,
"Memory flush failed (%d/%d) for %s: %s",
failures, _MAX_FLUSH_RETRIES, entry.session_id, e,
)
if _expired_entries:
_done = sum(
1 for _, e in _expired_entries if e.expiry_finalized
_flushed = sum(
1 for _, e in _expired_entries if e.memory_flushed
)
_failed = len(_expired_entries) - _done
_failed = len(_expired_entries) - _flushed
if _failed:
logger.info(
"Session expiry done: %d finalized, %d pending retry",
_done, _failed,
"Session expiry done: %d flushed, %d pending retry",
_flushed, _failed,
)
else:
logger.info(
"Session expiry done: %d finalized", _done,
"Session expiry done: %d flushed", _flushed,
)
# Sweep agents that have been idle beyond the TTL regardless
@@ -2556,7 +2681,7 @@ class GatewayRunner:
except Exception as _e:
logger.debug(
"mark_resume_pending failed for %s: %s",
_sk, _e,
_sk[:20], _e,
)
self._interrupt_running_agents(
_INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
@@ -3222,7 +3347,7 @@ class GatewayRunner:
logger.warning(
"Evicting stale _running_agents entry for %s "
"(age: %.0fs, idle: %.0fs, timeout: %.0fs)%s",
_quick_key, _stale_age, _stale_idle,
_quick_key[:30], _stale_age, _stale_idle,
_raw_stale_timeout, _stale_detail,
)
self._invalidate_session_run_generation(
@@ -3258,7 +3383,7 @@ class GatewayRunner:
interrupt_reason=_INTERRUPT_REASON_STOP,
invalidation_reason="stop_command",
)
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key)
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
return "⚡ Stopped. You can continue this session."
# /reset and /new must bypass the running-agent guard so they
@@ -3324,7 +3449,7 @@ class GatewayRunner:
try:
accepted = running_agent.steer(steer_text)
except Exception as exc:
logger.warning("Steer failed for session %s: %s", _quick_key, exc)
logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc)
return f"⚠️ Steer failed: {exc}"
if accepted:
preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
@@ -3407,7 +3532,7 @@ class GatewayRunner:
)
if event.message_type == MessageType.PHOTO:
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key)
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
adapter = self.adapters.get(source.platform)
if adapter:
merge_pending_message_event(adapter._pending_messages, _quick_key, event)
@@ -3427,7 +3552,7 @@ class GatewayRunner:
logger.debug(
"Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
time.time() - _started_at,
_quick_key,
_quick_key[:20],
)
adapter = self.adapters.get(source.platform)
if adapter:
@@ -3445,7 +3570,7 @@ class GatewayRunner:
if event.get_command() == "stop":
# Force-clean the sentinel so the session is unlocked.
self._release_running_agent_state(_quick_key)
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key)
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
return "⚡ Force-stopped. The agent was still starting — session unlocked."
# Queue the message so it will be picked up after the
# agent starts.
@@ -3467,10 +3592,10 @@ class GatewayRunner:
else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
)
if self._busy_input_mode == "queue":
logger.debug("PRIORITY queue follow-up for session %s", _quick_key)
logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20])
self._queue_or_replace_pending_event(_quick_key, event)
return None
logger.debug("PRIORITY interrupt for session %s", _quick_key)
logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
running_agent.interrupt(event.text)
if _quick_key in self._pending_messages:
self._pending_messages[_quick_key] += "\n" + event.text
@@ -4468,7 +4593,7 @@ class GatewayRunner:
if not self._is_session_run_current(_quick_key, run_generation):
logger.info(
"Discarding stale agent result for %s — generation %d is no longer current",
_quick_key or "?",
_quick_key[:20] if _quick_key else "?",
run_generation,
)
_stale_adapter = self.adapters.get(source.platform)
@@ -4519,7 +4644,7 @@ class GatewayRunner:
except Exception as _e:
logger.debug(
"clear_resume_pending failed for %s: %s",
session_key, _e,
session_key[:20], _e,
)
# Surface error details when the agent failed silently (final_response=None)
@@ -4896,11 +5021,19 @@ class GatewayRunner:
# Get existing session key
session_key = self._session_key_for_source(source)
self._invalidate_session_run_generation(session_key, reason="session_reset")
# Snapshot the old entry so on_session_finalize can report the
# expiring session id before reset_session() rotates it.
old_entry = self.session_store._entries.get(session_key)
# Flush memories in the background (fire-and-forget) so the user
# gets the "Session reset!" response immediately.
try:
old_entry = self.session_store._entries.get(session_key)
if old_entry:
_flush_task = asyncio.create_task(
self._async_flush_memories(old_entry.session_id, session_key)
)
self._background_tasks.add(_flush_task)
_flush_task.add_done_callback(self._background_tasks.discard)
except Exception as e:
logger.debug("Gateway memory flush on reset failed: %s", e)
# Close tool resources on the old agent (terminal sandboxes, browser
# daemons, background processes) before evicting from cache.
# Guard with getattr because test fixtures may skip __init__.
@@ -5158,7 +5291,7 @@ class GatewayRunner:
interrupt_reason=_INTERRUPT_REASON_STOP,
invalidation_reason="stop_command_pending",
)
logger.info("STOP (pending) for session %s — sentinel cleared", session_key)
logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
if agent:
# Force-clean the session lock so a truly hung agent doesn't
@@ -7119,6 +7252,16 @@ class GatewayRunner:
if current_entry.session_id == target_id:
return f"📌 Already on session **{name}**."
# Flush memories for current session before switching
try:
_flush_task = asyncio.create_task(
self._async_flush_memories(current_entry.session_id, session_key)
)
self._background_tasks.add(_flush_task)
_flush_task.add_done_callback(self._background_tasks.discard)
except Exception as e:
logger.debug("Memory flush on resume failed: %s", e)
# Clear any running agent for this session key
self._release_running_agent_state(session_key)
@@ -8655,7 +8798,7 @@ class GatewayRunner:
if reason:
logger.info(
"Invalidated run generation for %s%d (%s)",
session_key,
session_key[:20],
generation,
reason,
)
@@ -9062,7 +9205,7 @@ class GatewayRunner:
if not _run_still_current():
logger.info(
"Discarding stale proxy stream for %s — generation %d is no longer current",
session_key or "?",
session_key[:20] if session_key else "?",
run_generation or 0,
)
return {
@@ -9126,7 +9269,7 @@ class GatewayRunner:
if not _run_still_current():
logger.info(
"Discarding stale proxy result for %s — generation %d is no longer current",
session_key or "?",
session_key[:20] if session_key else "?",
run_generation or 0,
)
return {
@@ -9568,7 +9711,7 @@ class GatewayRunner:
)
logger.debug(
"run_agent resolved: model=%s provider=%s session=%s",
model, runtime_kwargs.get("provider"), session_key or "",
model, runtime_kwargs.get("provider"), (session_key or "")[:30],
)
except Exception as exc:
return {
@@ -10179,7 +10322,7 @@ class GatewayRunner:
):
logger.info(
"Skipping stale agent promotion for %s — generation %s is no longer current",
session_key or "",
(session_key or "")[:20],
run_generation,
)
return
@@ -10326,7 +10469,7 @@ class GatewayRunner:
logger.info(
"Backup interrupt detected for session %s "
"(monitor task state: %s)",
session_key,
session_key[:20],
"done" if interrupt_monitor.done() else "running",
)
_backup_agent.interrupt(_bp_text)
@@ -10386,7 +10529,7 @@ class GatewayRunner:
logger.info(
"Backup interrupt detected for session %s "
"(monitor task state: %s)",
session_key,
session_key[:20],
"done" if interrupt_monitor.done() else "running",
)
_backup_agent.interrupt(_bp_text)
@@ -10488,7 +10631,7 @@ class GatewayRunner:
if _is_control_interrupt_message(interrupt_message):
logger.info(
"Ignoring control interrupt message for session %s: %s",
session_key or "?",
session_key[:20] if session_key else "?",
interrupt_message,
)
else:
@@ -10532,7 +10675,7 @@ class GatewayRunner:
if self._draining and (pending_event or pending):
logger.info(
"Discarding pending follow-up for session %s during gateway %s",
session_key or "?",
session_key[:20] if session_key else "?",
self._status_action_label(),
)
pending_event = None
@@ -10589,7 +10732,7 @@ class GatewayRunner:
try:
logger.info(
"Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
session_key or "?",
session_key[:20] if session_key else "?",
)
await adapter.send(
source.chat_id,
@@ -10601,7 +10744,7 @@ class GatewayRunner:
elif first_response:
logger.info(
"Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
session_key or "?",
session_key[:20] if session_key else "?",
)
# Release deferred bg-review notifications now that the
# first response has been delivered. Pop from the
@@ -10736,7 +10879,7 @@ class GatewayRunner:
if not _is_empty_sentinel and (_streamed or _previewed):
logger.info(
"Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
session_key or "?",
session_key[:20] if session_key else "?",
_streamed,
_previewed,
)
+7 -7
View File
@@ -439,11 +439,11 @@ class SessionEntry:
auto_reset_reason: Optional[str] = None # "idle" or "daily"
reset_had_activity: bool = False # whether the expired session had any messages
# Set by the background expiry watcher after it finalizes an expired
# session (invoking on_session_finalize hooks and evicting the cached
# agent). Persisted to sessions.json so the flag survives gateway
# restarts — prevents redundant finalization runs.
expiry_finalized: bool = False
# Set by the background expiry watcher after it successfully flushes
# memories for this session. Persisted to sessions.json so the flag
# survives gateway restarts (the old in-memory _pre_flushed_sessions
# set was lost on restart, causing redundant re-flushes).
memory_flushed: bool = False
# When True the next call to get_or_create_session() will auto-reset
# this session (create a new session_id) so the user starts fresh.
@@ -479,7 +479,7 @@ class SessionEntry:
"last_prompt_tokens": self.last_prompt_tokens,
"estimated_cost_usd": self.estimated_cost_usd,
"cost_status": self.cost_status,
"expiry_finalized": self.expiry_finalized,
"memory_flushed": self.memory_flushed,
"suspended": self.suspended,
"resume_pending": self.resume_pending,
"resume_reason": self.resume_reason,
@@ -531,7 +531,7 @@ class SessionEntry:
last_prompt_tokens=data.get("last_prompt_tokens", 0),
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
cost_status=data.get("cost_status", "unknown"),
expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
memory_flushed=data.get("memory_flushed", False),
suspended=data.get("suspended", False),
resume_pending=data.get("resume_pending", False),
resume_reason=data.get("resume_reason"),
+1 -2
View File
@@ -103,8 +103,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
# Configuration
CommandDef("config", "Show current configuration", "Configuration",
cli_only=True),
CommandDef("model", "Switch model for this session", "Configuration",
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
cli_only=True),
+8
View File
@@ -612,6 +612,14 @@ DEFAULT_CONFIG = {
"timeout": 30,
"extra_body": {},
},
"flush_memories": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
"timeout": 30,
"extra_body": {},
},
"title_generation": {
"provider": "auto",
"model": "",
+65 -294
View File
@@ -51,7 +51,6 @@ import sys
from pathlib import Path
from typing import Optional
def _add_accept_hooks_flag(parser) -> None:
"""Attach the ``--accept-hooks`` flag. Shared across every agent
subparser so the flag works regardless of CLI position."""
@@ -175,7 +174,6 @@ load_hermes_dotenv(project_env=PROJECT_ROOT / ".env")
try:
if "HERMES_REDACT_SECRETS" not in os.environ:
import yaml as _yaml_early
_cfg_path = get_hermes_home() / "config.yaml"
if _cfg_path.exists():
with open(_cfg_path, encoding="utf-8") as _f:
@@ -841,8 +839,6 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
def _tui_build_needed(tui_dir: Path) -> bool:
if _hermes_ink_bundle_stale(tui_dir):
return True
entry = tui_dir / "dist" / "entry.js"
if not entry.exists():
return True
@@ -1030,12 +1026,7 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
return [node, str(root / "dist" / "entry.js")], root
def _launch_tui(
resume_session_id: Optional[str] = None,
tui_dev: bool = False,
model: Optional[str] = None,
provider: Optional[str] = None,
):
def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
"""Replace current process with the TUI."""
tui_dir = PROJECT_ROOT / "ui-tui"
@@ -1045,12 +1036,6 @@ def _launch_tui(
)
env.setdefault("HERMES_PYTHON", sys.executable)
env.setdefault("HERMES_CWD", os.getcwd())
if model:
env["HERMES_MODEL"] = model
env["HERMES_INFERENCE_MODEL"] = model
if provider:
env["HERMES_TUI_PROVIDER"] = provider
env["HERMES_INFERENCE_PROVIDER"] = provider
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
# ~1.54GB depending on version and can fatal-OOM on long sessions with
# large transcripts / reasoning blobs. Token-level merge: respect any
@@ -1189,8 +1174,6 @@ def cmd_chat(args):
_launch_tui(
getattr(args, "resume", None),
tui_dev=getattr(args, "tui_dev", False),
model=getattr(args, "model", None),
provider=getattr(args, "provider", None),
)
# Import and run the CLI
@@ -1342,9 +1325,7 @@ def cmd_whatsapp(args):
return
if not (bridge_dir / "node_modules").exists():
print(
"\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)..."
)
print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
npm = shutil.which("npm")
if not npm:
print(" ✗ npm not found on PATH — install Node.js first")
@@ -1720,14 +1701,15 @@ def _clear_stale_openai_base_url():
# (task_key, display_name, short_description)
_AUX_TASKS: list[tuple[str, str, str]] = [
("vision", "Vision", "image/screenshot analysis"),
("compression", "Compression", "context summarization"),
("web_extract", "Web extract", "web page summarization"),
("session_search", "Session search", "past-conversation recall"),
("approval", "Approval", "smart command approval"),
("mcp", "MCP", "MCP tool reasoning"),
("vision", "Vision", "image/screenshot analysis"),
("compression", "Compression", "context summarization"),
("web_extract", "Web extract", "web page summarization"),
("session_search", "Session search", "past-conversation recall"),
("approval", "Approval", "smart command approval"),
("mcp", "MCP", "MCP tool reasoning"),
("flush_memories", "Flush memories", "memory consolidation"),
("title_generation", "Title generation", "session titles"),
("skills_hub", "Skills hub", "skills search/install"),
("skills_hub", "Skills hub", "skills search/install"),
]
@@ -1826,7 +1808,7 @@ def _aux_config_menu() -> None:
print(" Auxiliary models — side-task routing")
print()
print(" Side tasks (vision, compression, web extraction, etc.) default")
print(' to your main chat model. "auto" means "use my main model"')
print(" to your main chat model. \"auto\" means \"use my main model\"")
print(" Hermes only falls back to a lightweight backend (OpenRouter,")
print(" Nous Portal) if the main model is unavailable. Override a")
print(" task below if you want it pinned to a specific provider/model.")
@@ -1837,20 +1819,15 @@ def _aux_config_menu() -> None:
desc_col = max(len(desc) for _, _, desc in _AUX_TASKS) + 4
entries: list[tuple[str, str]] = []
for task_key, name, desc in _AUX_TASKS:
task_cfg = (
aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
)
task_cfg = aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
current = _format_aux_current(task_cfg)
label = (
f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}"
)
label = f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}"
entries.append((task_key, label))
entries.append(("__reset__", "Reset all to auto"))
entries.append(("__back__", "Back"))
entries.append(("__back__", "Back"))
idx = _prompt_provider_choice(
[label for _, label in entries],
default=0,
[label for _, label in entries], default=0,
)
if idx is None:
return
@@ -1898,9 +1875,7 @@ def _aux_select_for_task(task: str) -> None:
entries: list[tuple[str, str, list[str]]] = [] # (slug, label, models)
# "auto" always first
auto_marker = (
" ← current" if current_provider == "auto" and not current_base_url else ""
)
auto_marker = " ← current" if current_provider == "auto" and not current_base_url else ""
entries.append(("__auto__", f"auto (recommended){auto_marker}", []))
for p in providers:
@@ -1909,9 +1884,7 @@ def _aux_select_for_task(task: str) -> None:
total = p.get("total_models", 0)
models = p.get("models") or []
model_hint = f"{total} models" if total else ""
marker = (
" ← current" if slug == current_provider and not current_base_url else ""
)
marker = " ← current" if slug == current_provider and not current_base_url else ""
entries.append((slug, f"{name}{model_hint}{marker}", list(models)))
# Custom endpoint (raw base_url)
@@ -1979,17 +1952,14 @@ def _aux_flow_provider_model(
selected = val or ""
else:
selected = _prompt_model_selection(
model_list,
current_model=current_model,
pricing=pricing,
model_list, current_model=current_model, pricing=pricing,
)
if selected is None:
print("No change.")
return
_save_aux_choice(
task, provider=provider_slug, model=selected or "", base_url="", api_key=""
)
_save_aux_choice(task, provider=provider_slug, model=selected or "",
base_url="", api_key="")
if selected:
print(f"{display_name}: {provider_slug} · {selected}")
else:
@@ -2009,9 +1979,7 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
print(" Provide an OpenAI-compatible base URL (e.g. http://localhost:11434/v1)")
print()
try:
url_prompt = (
f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: "
)
url_prompt = f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: "
url = input(url_prompt).strip()
except (KeyboardInterrupt, EOFError):
print()
@@ -2021,30 +1989,20 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
print("No URL provided. No change.")
return
try:
model_prompt = (
f"Model slug (optional) [{current_model}]: "
if current_model
else "Model slug (optional): "
)
model_prompt = f"Model slug (optional) [{current_model}]: " if current_model else "Model slug (optional): "
model = input(model_prompt).strip()
except (KeyboardInterrupt, EOFError):
print()
return
model = model or current_model
try:
api_key = getpass.getpass(
"API key (optional, blank = use OPENAI_API_KEY): "
).strip()
api_key = getpass.getpass("API key (optional, blank = use OPENAI_API_KEY): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
_save_aux_choice(
task,
provider="custom",
model=model,
base_url=url,
api_key=api_key,
task, provider="custom", model=model, base_url=url, api_key=api_key,
)
short_url = url.replace("https://", "").replace("http://", "").rstrip("/")
print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else ""))
@@ -2160,9 +2118,7 @@ def _model_flow_ai_gateway(config, current_model=""):
api_key = get_env_value("AI_GATEWAY_API_KEY")
if not api_key:
print("No Vercel AI Gateway API key configured.")
print(
"Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway"
)
print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
print("Add a payment method to get $5 in free credits.")
print()
try:
@@ -2962,9 +2918,7 @@ def _model_flow_named_custom(config, provider_info):
print("Fetching available models...")
models = fetch_api_models(
api_key,
base_url,
timeout=8.0,
api_key, base_url, timeout=8.0,
api_mode=api_mode or None,
)
@@ -3635,12 +3589,7 @@ def _model_flow_stepfun(config, current_model=""):
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import (
get_env_value,
save_env_value,
load_config,
save_config,
)
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
from hermes_cli.models import fetch_api_models
provider_id = "stepfun"
@@ -3659,7 +3608,6 @@ def _model_flow_stepfun(config, current_model=""):
if key_env:
try:
import getpass
new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
except (KeyboardInterrupt, EOFError):
print()
@@ -3685,10 +3633,7 @@ def _model_flow_stepfun(config, current_model=""):
current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
region_choices = [
(
"international",
f"International ({_stepfun_base_url_for_region('international')})",
),
("international", f"International ({_stepfun_base_url_for_region('international')})"),
("china", f"China ({_stepfun_base_url_for_region('china')})"),
]
ordered_regions = []
@@ -4531,7 +4476,6 @@ def cmd_webhook(args):
def cmd_hooks(args):
"""Shell-hook inspection and management."""
from hermes_cli.hooks import hooks_command
hooks_command(args)
@@ -6103,9 +6047,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
import signal as _signal
def _wait_for_service_active(
scope_cmd_: list,
svc_name_: str,
timeout: float = 10.0,
scope_cmd_: list, svc_name_: str, timeout: float = 10.0,
) -> bool:
"""Poll ``systemctl is-active`` until the unit reports active.
@@ -6119,9 +6061,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
try:
_verify = subprocess.run(
scope_cmd_ + ["is-active", svc_name_],
capture_output=True,
text=True,
timeout=5,
capture_output=True, text=True, timeout=5,
)
if _verify.stdout.strip() == "active":
return True
@@ -6131,57 +6071,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
return False
_time.sleep(0.5)
def _service_restart_sec(
scope_cmd_: list,
svc_name_: str,
default: float = 0.0,
) -> float:
"""Read the unit's ``RestartUSec`` (RestartSec) in seconds.
After a graceful exit-75, systemd waits ``RestartSec`` before
respawning the unit. Callers that poll for ``is-active``
must use a timeout >= ``RestartSec`` + transition slack, or
they'll give up *during* the cooldown window and wrongly
conclude the unit didn't relaunch.
"""
try:
_show = subprocess.run(
scope_cmd_
+ [
"show",
svc_name_,
"--property=RestartUSec",
"--value",
],
capture_output=True,
text=True,
timeout=5,
)
except (FileNotFoundError, subprocess.TimeoutExpired):
return default
raw = (_show.stdout or "").strip()
# systemd emits values like "30s", "100ms", "1min 30s", or
# "infinity". Parse conservatively; on any miss return default.
if not raw or raw == "infinity":
return default
total = 0.0
matched = False
for part in raw.split():
for _suf, _mult in (
("ms", 0.001),
("us", 0.000001),
("min", 60.0),
("s", 1.0),
):
if part.endswith(_suf):
try:
total += float(part[: -len(_suf)]) * _mult
matched = True
except ValueError:
pass
break
return total if matched else default
# Drain budget for graceful SIGUSR1 restarts. The gateway drains
# for up to ``agent.restart_drain_timeout`` (default 60s) before
# exiting with code 75; we wait slightly longer so the drain
@@ -6197,17 +6086,12 @@ def _cmd_update_impl(args, gateway_mode: bool):
_cfg_drain = None
try:
from hermes_cli.config import load_config
_cfg_agent = load_config().get("agent") or {}
_cfg_agent = (load_config().get("agent") or {})
_cfg_drain = _cfg_agent.get("restart_drain_timeout")
except Exception:
pass
try:
_drain_budget = (
float(_cfg_drain)
if _cfg_drain is not None
else float(_DEFAULT_DRAIN)
)
_drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
except (TypeError, ValueError):
_drain_budget = float(_DEFAULT_DRAIN)
# Add a 15s margin so the drain loop + final exit finish before
@@ -6272,23 +6156,14 @@ def _cmd_update_impl(args, gateway_mode: bool):
_main_pid = 0
try:
_show = subprocess.run(
scope_cmd
+ [
"show",
svc_name,
"--property=MainPID",
"--value",
scope_cmd + [
"show", svc_name,
"--property=MainPID", "--value",
],
capture_output=True,
text=True,
timeout=5,
capture_output=True, text=True, timeout=5,
)
_main_pid = int((_show.stdout or "").strip() or 0)
except (
ValueError,
subprocess.TimeoutExpired,
FileNotFoundError,
):
except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
_main_pid = 0
_graceful_ok = False
@@ -6297,32 +6172,18 @@ def _cmd_update_impl(args, gateway_mode: bool):
f"{svc_name}: draining (up to {int(_drain_budget)}s)..."
)
_graceful_ok = _graceful_restart_via_sigusr1(
_main_pid,
drain_timeout=_drain_budget,
_main_pid, drain_timeout=_drain_budget,
)
if _graceful_ok:
# Gateway exited 75; systemd should relaunch
# via Restart=on-failure. The unit's
# RestartSec (default 30s on ours) gates the
# respawn — poll past that + slack so we
# don't give up mid-cooldown and falsely
# print "drained but didn't relaunch". For
# units without RestartSec set we fall back
# to the original 10s budget.
_restart_sec = _service_restart_sec(
scope_cmd,
svc_name,
default=0.0,
)
_post_drain_timeout = max(
10.0,
_restart_sec + 10.0,
)
# via Restart=on-failure. Poll is-active for
# up to ~10s because the unit's Stopped ->
# Started transition can take a few seconds
# after the old PID exits, and a one-shot
# check races that window.
if _wait_for_service_active(
scope_cmd,
svc_name,
timeout=_post_drain_timeout,
scope_cmd, svc_name, timeout=10.0,
):
restarted_services.append(svc_name)
continue
@@ -6350,9 +6211,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
# restart. systemctl restart returns 0 even
# if the new process crashes immediately.
if _wait_for_service_active(
scope_cmd,
svc_name,
timeout=10.0,
scope_cmd, svc_name, timeout=10.0,
):
restarted_services.append(svc_name)
else:
@@ -6369,9 +6228,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
timeout=15,
)
if _wait_for_service_active(
scope_cmd,
svc_name,
timeout=10.0,
scope_cmd, svc_name, timeout=10.0,
):
restarted_services.append(svc_name)
print(f"{svc_name} recovered on retry")
@@ -6890,17 +6747,13 @@ def cmd_dashboard(args):
from hermes_cli.web_server import start_server
gui_mode = getattr(args, "gui", False)
embedded_chat = (
gui_mode or args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
)
embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
start_server(
host=args.host,
port=args.port,
open_browser=not args.no_open,
allow_public=getattr(args, "insecure", False),
embedded_chat=embedded_chat,
gui_mode=gui_mode,
)
@@ -6983,40 +6836,6 @@ For more help on a command:
parser.add_argument(
"--version", "-V", action="store_true", help="Show version and exit"
)
parser.add_argument(
"-z",
"--oneshot",
metavar="PROMPT",
default=None,
help=(
"One-shot mode: send a single prompt and print ONLY the final "
"response text to stdout. No banner, no spinner, no tool "
"previews, no session_id line. Tools, memory, rules, and "
"AGENTS.md in the CWD are loaded as normal; approvals are "
"auto-bypassed. Intended for scripts / pipes."
),
)
# --model / --provider are accepted at the top level so they can pair
# with -z without needing the `chat` subcommand. If neither -z nor a
# subcommand consumes them, they fall through harmlessly as None.
# Mirrors `hermes chat --model ... --provider ...` semantics.
parser.add_argument(
"-m",
"--model",
default=None,
help=(
"Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
),
)
parser.add_argument(
"--provider",
default=None,
help=(
"Provider override for this invocation (e.g. openrouter, anthropic). "
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
),
)
parser.add_argument(
"--resume",
"-r",
@@ -7594,39 +7413,17 @@ For more help on a command:
"reset", help="Clear exhaustion status for all credentials for a provider"
)
auth_reset.add_argument("provider", help="Provider id")
auth_status = auth_subparsers.add_parser(
"status", help="Show auth status for a provider"
)
auth_status = auth_subparsers.add_parser("status", help="Show auth status for a provider")
auth_status.add_argument("provider", help="Provider id")
auth_logout = auth_subparsers.add_parser(
"logout", help="Log out a provider and clear stored auth state"
)
auth_logout = auth_subparsers.add_parser("logout", help="Log out a provider and clear stored auth state")
auth_logout.add_argument("provider", help="Provider id")
auth_spotify = auth_subparsers.add_parser(
"spotify", help="Authenticate Hermes with Spotify via PKCE"
)
auth_spotify.add_argument(
"spotify_action",
nargs="?",
choices=["login", "status", "logout"],
default="login",
)
auth_spotify.add_argument(
"--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)"
)
auth_spotify.add_argument(
"--redirect-uri",
help="Allow-listed localhost redirect URI for your Spotify app",
)
auth_spotify = auth_subparsers.add_parser("spotify", help="Authenticate Hermes with Spotify via PKCE")
auth_spotify.add_argument("spotify_action", nargs="?", choices=["login", "status", "logout"], default="login")
auth_spotify.add_argument("--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)")
auth_spotify.add_argument("--redirect-uri", help="Allow-listed localhost redirect URI for your Spotify app")
auth_spotify.add_argument("--scope", help="Override requested Spotify scopes")
auth_spotify.add_argument(
"--no-browser",
action="store_true",
help="Do not attempt to open the browser automatically",
)
auth_spotify.add_argument(
"--timeout", type=float, help="Callback/token exchange timeout in seconds"
)
auth_spotify.add_argument("--no-browser", action="store_true", help="Do not attempt to open the browser automatically")
auth_spotify.add_argument("--timeout", type=float, help="Callback/token exchange timeout in seconds")
auth_parser.set_defaults(func=cmd_auth)
# =========================================================================
@@ -7836,8 +7633,7 @@ For more help on a command:
hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")
hooks_subparsers.add_parser(
"list",
aliases=["ls"],
"list", aliases=["ls"],
help="List configured hooks with matcher, timeout, and consent status",
)
@@ -7850,18 +7646,14 @@ For more help on a command:
help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
)
_hk_test.add_argument(
"--for-tool",
dest="for_tool",
default=None,
"--for-tool", dest="for_tool", default=None,
help=(
"Only fire hooks whose matcher matches this tool name "
"(used for pre_tool_call / post_tool_call)"
),
)
_hk_test.add_argument(
"--payload-file",
dest="payload_file",
default=None,
"--payload-file", dest="payload_file", default=None,
help=(
"Path to a JSON file whose contents are merged into the "
"synthetic payload before execution"
@@ -7869,8 +7661,7 @@ For more help on a command:
)
_hk_revoke = hooks_subparsers.add_parser(
"revoke",
aliases=["remove", "rm"],
"revoke", aliases=["remove", "rm"],
help="Remove a command's allowlist entries (takes effect on next restart)",
)
_hk_revoke.add_argument(
@@ -9156,11 +8947,6 @@ Examples:
"Alternatively set HERMES_DASHBOARD_TUI=1."
),
)
dashboard_parser.add_argument(
"--gui",
action="store_true",
help="Run dashboard in GUI-shell mode; implies --tui",
)
dashboard_parser.set_defaults(func=cmd_dashboard)
# =========================================================================
@@ -9303,28 +9089,26 @@ Examples:
# the nested subcommand (dest varies by parser).
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
_AGENT_SUBCOMMANDS = {
"cron": ("cron_command", {"run", "tick"}),
"cron": ("cron_command", {"run", "tick"}),
"gateway": ("gateway_command", {"run"}),
"mcp": ("mcp_action", {"serve"}),
"mcp": ("mcp_action", {"serve"}),
}
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
if args.command in _AGENT_COMMANDS or (
_sub_attr and getattr(args, _sub_attr, None) in _sub_set
if (
args.command in _AGENT_COMMANDS
or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
):
_accept_hooks = bool(getattr(args, "accept_hooks", False))
try:
from hermes_cli.plugins import discover_plugins
discover_plugins()
except Exception:
logger.debug(
"plugin discovery failed at CLI startup",
exc_info=True,
"plugin discovery failed at CLI startup", exc_info=True,
)
try:
from hermes_cli.config import load_config
from agent.shell_hooks import register_from_config
register_from_config(load_config(), accept_hooks=_accept_hooks)
except Exception:
logger.debug(
@@ -9332,19 +9116,6 @@ Examples:
exc_info=True,
)
# Handle top-level --oneshot / -z: single-shot mode, stdout = final
# response only, nothing else. Bypasses cli.py entirely.
if getattr(args, "oneshot", None):
from hermes_cli.oneshot import run_oneshot
sys.exit(
run_oneshot(
args.oneshot,
model=getattr(args, "model", None),
provider=getattr(args, "provider", None),
)
)
# Handle top-level --resume / --continue as shortcut to chat
if (args.resume or args.continue_last) and args.command is None:
args.command = "chat"
+73 -124
View File
@@ -1379,124 +1379,6 @@ def curated_models_for_provider(
return [(m, "") for m in models]
def _provider_keys(provider: str) -> set[str]:
key = (provider or "").strip().lower()
normalized = normalize_provider(provider)
return {k for k in (key, normalized) if k}
def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
return any(
name_lower == model.lower()
for provider in providers
for model in _PROVIDER_MODELS.get(provider, [])
)
_AGGREGATOR_PROVIDERS = frozenset(
{"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
)
def _resolve_static_model_alias(
name_lower: str,
current_keys: set[str],
) -> Optional[tuple[str, str]]:
"""Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
try:
from hermes_cli.model_switch import MODEL_ALIASES
except Exception:
return None
identity = MODEL_ALIASES.get(name_lower)
if identity is None:
return None
vendor = identity.vendor
family = identity.family
def _match(provider: str) -> Optional[str]:
models = _PROVIDER_MODELS.get(provider, [])
if not models:
return None
prefix = (
f"{vendor}/{family}"
if provider in _AGGREGATOR_PROVIDERS
else family
).lower()
for model in models:
if model.lower().startswith(prefix):
return model
return None
for provider in current_keys:
if matched := _match(provider):
return provider, matched
for provider in _PROVIDER_MODELS:
if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
continue
if matched := _match(provider):
return provider, matched
for provider in _AGGREGATOR_PROVIDERS:
if provider in current_keys and (matched := _match(provider)):
return provider, matched
return None
def detect_static_provider_for_model(
model_name: str,
current_provider: str,
) -> Optional[tuple[str, str]]:
"""Auto-detect a provider from static catalogs only.
Returns ``(provider_id, model_name)``. The model name may be remapped
when a static alias or bare provider name resolves to a catalog default.
Returns ``None`` when no confident match is found.
"""
name = (model_name or "").strip()
if not name:
return None
name_lower = name.lower()
current_keys = _provider_keys(current_provider)
alias_match = _resolve_static_model_alias(name_lower, current_keys)
if alias_match:
return alias_match
# --- Step 0: bare provider name typed as model ---
# If someone types `/model nous` or `/model anthropic`, treat it as a
# provider switch and pick the first model from that provider's catalog.
# Skip "custom" and "openrouter" — custom has no model catalog, and
# openrouter requires an explicit model name to be useful.
resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
if resolved_provider not in {"custom", "openrouter"}:
default_models = _PROVIDER_MODELS.get(resolved_provider, [])
if (
resolved_provider in _PROVIDER_LABELS
and default_models
and resolved_provider not in current_keys
):
return (resolved_provider, default_models[0])
# Aggregators list other providers' models — never auto-switch TO them
# If the model belongs to the current provider's catalog, don't suggest switching
if _model_in_provider_catalog(name_lower, current_keys):
return None
# --- Step 1: check static provider catalogs for a direct match ---
for pid, models in _PROVIDER_MODELS.items():
if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
continue
if any(name_lower == m.lower() for m in models):
return (pid, name)
return None
def detect_provider_for_model(
model_name: str,
current_provider: str,
@@ -1509,19 +1391,86 @@ def detect_provider_for_model(
Priority:
0. Bare provider name switch to that provider's default model
1. Direct provider static catalog match
2. OpenRouter catalog match
1. Direct provider with credentials (highest)
2. Direct provider without credentials remap to OpenRouter slug
3. OpenRouter catalog match
"""
name = (model_name or "").strip()
if not name:
return None
static_match = detect_static_provider_for_model(name, current_provider)
if static_match:
return static_match
if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
name_lower = name.lower()
# --- Step 0: bare provider name typed as model ---
# If someone types `/model nous` or `/model anthropic`, treat it as a
# provider switch and pick the first model from that provider's catalog.
# Skip "custom" and "openrouter" — custom has no model catalog, and
# openrouter requires an explicit model name to be useful.
resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
if resolved_provider not in {"custom", "openrouter"}:
default_models = _PROVIDER_MODELS.get(resolved_provider, [])
if (
resolved_provider in _PROVIDER_LABELS
and default_models
and resolved_provider != normalize_provider(current_provider)
):
return (resolved_provider, default_models[0])
# Aggregators list other providers' models — never auto-switch TO them
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
# If the model belongs to the current provider's catalog, don't suggest switching
current_models = _PROVIDER_MODELS.get(current_provider, [])
if any(name_lower == m.lower() for m in current_models):
return None
# --- Step 1: check static provider catalogs for a direct match ---
direct_match: Optional[str] = None
for pid, models in _PROVIDER_MODELS.items():
if pid == current_provider or pid in _AGGREGATORS:
continue
if any(name_lower == m.lower() for m in models):
direct_match = pid
break
if direct_match:
# Check if we have credentials for this provider — env vars,
# credential pool, or auth store entries.
has_creds = False
try:
from hermes_cli.auth import PROVIDER_REGISTRY
pconfig = PROVIDER_REGISTRY.get(direct_match)
if pconfig:
for env_var in pconfig.api_key_env_vars:
if os.getenv(env_var, "").strip():
has_creds = True
break
except Exception:
pass
# Also check credential pool and auth store — covers OAuth,
# Claude Code tokens, and other non-env-var credentials (#10300).
if not has_creds:
try:
from agent.credential_pool import load_pool
pool = load_pool(direct_match)
if pool.has_credentials():
has_creds = True
except Exception:
pass
if not has_creds:
try:
from hermes_cli.auth import _load_auth_store
store = _load_auth_store()
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
has_creds = True
except Exception:
pass
# Always return the direct provider match. If credentials are
# missing, the client init will give a clear error rather than
# silently routing through the wrong provider (#10300).
return (direct_match, name)
# --- Step 2: check OpenRouter catalog ---
# First try exact match (handles provider/model format)
or_slug = _find_openrouter_slug(name)
-202
View File
@@ -1,202 +0,0 @@
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
no stderr chatter. Just the agent's final text to stdout.
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
Model / provider selection mirrors `hermes chat`:
- Both optional. If omitted, use the user's configured default.
- If both given, pair them exactly as given.
- If only --model given, auto-detect the provider that serves it.
- If only --provider given, error out (ambiguous caller must pick a model).
Env var fallbacks (used when the corresponding arg is not passed):
- HERMES_INFERENCE_MODEL
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
"""
from __future__ import annotations
import logging
import os
import sys
from contextlib import redirect_stderr, redirect_stdout
from typing import Optional
def run_oneshot(
prompt: str,
model: Optional[str] = None,
provider: Optional[str] = None,
) -> int:
"""Execute a single prompt and print only the final content block.
Args:
prompt: The user message to send.
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
env var, then config.yaml's model.default / model.model.
provider: Optional provider override. Falls back to
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
then "auto".
Returns the exit code. Caller should sys.exit() with the return.
"""
# Silence every stdlib logger for the duration. AIAgent, tools, and
# provider adapters all log to stderr through the root logger; file
# handlers added by setup_logging() keep working (they're attached to
# the root logger's handler list, not affected by level), but no
# bytes reach the terminal.
logging.disable(logging.CRITICAL)
# --provider without --model is ambiguous: carrying the user's configured
# model across to a different provider is usually wrong (that provider may
# not host it), and silently picking the provider's catalog default hides
# the mismatch. Require the caller to be explicit. Validate BEFORE the
# stderr redirect so the message actually reaches the terminal.
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
if provider and not ((model or "").strip() or env_model_early):
sys.stderr.write(
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
"Pass both explicitly, or neither to use your configured defaults.\n"
)
return 2
# Auto-approve any shell / tool approvals. Non-interactive by
# definition — a prompt would hang forever.
os.environ["HERMES_YOLO_MODE"] = "1"
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
# Redirect stderr AND stdout to devnull for the entire call tree.
# We'll print the final response to the real stdout at the end.
real_stdout = sys.stdout
devnull = open(os.devnull, "w")
try:
with redirect_stdout(devnull), redirect_stderr(devnull):
response = _run_agent(prompt, model=model, provider=provider)
finally:
try:
devnull.close()
except Exception:
pass
if response:
real_stdout.write(response)
if not response.endswith("\n"):
real_stdout.write("\n")
real_stdout.flush()
return 0
def _run_agent(
prompt: str,
model: Optional[str] = None,
provider: Optional[str] = None,
) -> str:
"""Build an AIAgent exactly like a normal CLI chat turn would, then
run a single conversation. Returns the final response string."""
# Imports are local so they don't run when hermes is invoked for
# other commands (keeps top-level CLI startup cheap).
from hermes_cli.config import load_config
from hermes_cli.models import detect_provider_for_model
from hermes_cli.runtime_provider import resolve_runtime_provider
from hermes_cli.tools_config import _get_platform_tools
from run_agent import AIAgent
cfg = load_config()
# Resolve effective model: explicit arg → env var → config.
model_cfg = cfg.get("model") or {}
if isinstance(model_cfg, str):
cfg_model = model_cfg
else:
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
effective_model = (model or "").strip() or env_model or cfg_model
# Resolve effective provider: explicit arg → (auto-detect from model if
# model was explicit) → env / config (handled inside resolve_runtime_provider).
#
# When --model is given without --provider, auto-detect the provider that
# serves that model — same semantic as `/model <name>` in an interactive
# session. Without this, resolve_runtime_provider() would fall back to
# the user's configured default provider, which may not host the model
# the caller just asked for.
effective_provider = (provider or "").strip() or None
if effective_provider is None and (model or env_model):
# Only auto-detect when the model was explicitly requested via arg or
# env var (not when it came from config — that's the "use my defaults"
# path and the configured provider is already correct).
explicit_model = (model or "").strip() or env_model
if explicit_model:
cfg_provider = ""
if isinstance(model_cfg, dict):
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
current_provider = (
cfg_provider
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
or "auto"
)
detected = detect_provider_for_model(explicit_model, current_provider)
if detected:
effective_provider, effective_model = detected
runtime = resolve_runtime_provider(
requested=effective_provider,
target_model=effective_model or None,
)
# Pull in whatever toolsets the user has enabled for "cli".
# sorted() gives stable ordering; set→list for AIAgent's signature.
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
agent = AIAgent(
api_key=runtime.get("api_key"),
base_url=runtime.get("base_url"),
provider=runtime.get("provider"),
api_mode=runtime.get("api_mode"),
model=effective_model,
enabled_toolsets=toolsets_list,
quiet_mode=True,
platform="cli",
credential_pool=runtime.get("credential_pool"),
# Interactive callbacks are intentionally NOT wired beyond this
# one. In oneshot mode there's no user sitting at a terminal:
# - clarify → returns a synthetic "pick a default" instruction
# so the agent continues instead of stalling on
# the tool's built-in "not available" error
# - sudo password prompt → terminal_tool gates on
# HERMES_INTERACTIVE which we never set
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
# (set above); also falls back to deny on non-tty
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
# - skill secret capture → returns gracefully when no callback set
clarify_callback=_oneshot_clarify_callback,
)
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
# display callbacks that would bypass our stdout capture.
agent.suppress_status_output = True
agent.stream_delta_callback = None
agent.tool_gen_callback = None
return agent.chat(prompt) or ""
def _oneshot_clarify_callback(question: str, choices=None) -> str:
"""Clarify is disabled in oneshot mode — tell the agent to pick a
default and proceed instead of stalling or erroring."""
if choices:
return (
f"[oneshot mode: no user available. Pick the best option from "
f"{choices} using your own judgment and continue.]"
)
return (
"[oneshot mode: no user available. Make the most reasonable "
"assumption you can and continue.]"
)
+146 -547
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -43,7 +43,7 @@ _TIMEOUT = 30.0
# ---------------------------------------------------------------------------
# Process-level atexit safety net — ensures pending sessions are committed
# even if shutdown_memory_provider is never called (e.g. gateway crash,
# SIGKILL, or exception in the session expiry watcher preventing shutdown).
# SIGKILL, or exception in _async_flush_memories preventing shutdown).
# ---------------------------------------------------------------------------
_last_active_provider: Optional["OpenVikingMemoryProvider"] = None
+359 -21
View File
@@ -1578,6 +1578,7 @@ class AIAgent:
self._memory_enabled = False
self._user_profile_enabled = False
self._memory_nudge_interval = 10
self._memory_flush_min_turns = 6
self._turns_since_memory = 0
self._iters_since_skill = 0
if not skip_memory:
@@ -1586,6 +1587,7 @@ class AIAgent:
self._memory_enabled = mem_config.get("memory_enabled", False)
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
if self._memory_enabled or self._user_profile_enabled:
from tools.memory_tool import MemoryStore
self._memory_store = MemoryStore(
@@ -2400,6 +2402,34 @@ class AIAgent:
provider=getattr(self, "provider", ""),
)
# Also resolve the flush_memories auxiliary model — it may differ
# from the compression model when the user configures separate
# auxiliary.flush_memories.provider/model, or when the fallback
# chain lands on a different provider. flush_memories runs with
# the FULL pre-compression conversation, so its model's context
# must also be respected.
try:
flush_client, flush_model = get_text_auxiliary_client(
"flush_memories",
main_runtime=self._current_main_runtime(),
)
if flush_client and flush_model:
_flush_ctx = get_model_context_length(
flush_model,
base_url=str(getattr(flush_client, "base_url", "") or ""),
api_key=str(getattr(flush_client, "api_key", "") or ""),
provider=getattr(self, "provider", ""),
)
if _flush_ctx and _flush_ctx < aux_context:
logger.info(
"flush_memories model %s context (%d) < compression "
"model %s context (%d) — using the smaller value",
flush_model, _flush_ctx, aux_model, aux_context,
)
aux_context = _flush_ctx
except Exception:
pass # Non-fatal — fall through with compression model's context
# Hard floor: the auxiliary compression model must have at least
# MINIMUM_CONTEXT_LENGTH (64K) tokens of context. The main model
# is already required to meet this floor (checked earlier in
@@ -2419,18 +2449,25 @@ class AIAgent:
)
threshold = self.context_compressor.threshold_tokens
if aux_context < threshold:
# Auto-correct: lower the live session threshold so
# compression actually works this session. The hard floor
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
# so the new threshold is always >= 64K.
#
# The compression summariser sends a single user-role
# prompt (no system prompt, no tools) to the aux model, so
# new_threshold == aux_context is safe: the request is
# the raw messages plus a small summarisation instruction.
# Headroom: the threshold budgets RAW MESSAGES only, but the
# actual request auxiliary callers (compression summariser and
# flush_memories) send also includes the system prompt and every
# tool schema. We must ensure threshold + headroom <= aux_context
# or the first compression/flush request will overflow.
#
# This applies even when aux_context > threshold (the common
# same-model case after a155b4a1) — e.g. 128K context, 85%
# threshold = 108K, 20K overhead → 108K + 20K = 128K exactly
# at the limit, and any token-estimate variance causes a 400.
from agent.model_metadata import estimate_request_tokens_rough
tool_overhead = estimate_request_tokens_rough([], tools=self.tools)
headroom = tool_overhead + 12_000
effective_limit = max(aux_context - headroom, MINIMUM_CONTEXT_LENGTH)
if effective_limit < threshold:
old_threshold = threshold
new_threshold = aux_context
new_threshold = effective_limit
self.context_compressor.threshold_tokens = new_threshold
# Keep threshold_percent in sync so future main-model
# context_length changes (update_model) re-derive from a
@@ -5141,8 +5178,6 @@ class AIAgent:
# response.incomplete instead of response.completed).
self._codex_streamed_text_parts: list = []
for attempt in range(max_stream_retries + 1):
if self._interrupt_requested:
raise InterruptedError("Agent interrupted before Codex stream retry")
collected_output_items: list = []
try:
with active_client.responses.stream(**api_kwargs) as stream:
@@ -6312,14 +6347,6 @@ class AIAgent:
try:
for _stream_attempt in range(_max_stream_retries + 1):
# Check for interrupt before each retry attempt. Without
# this, /stop closes the HTTP connection (outer poll loop),
# but the retry loop opens a FRESH connection — negating the
# interrupt entirely. On slow providers (ollama-cloud) each
# retry can block for the full stream-read timeout (120s+),
# causing multi-minute delays between /stop and response.
if self._interrupt_requested:
raise InterruptedError("Agent interrupted before stream retry")
try:
if self.api_mode == "anthropic_messages":
self._try_refresh_anthropic_client_credentials()
@@ -7924,6 +7951,315 @@ class AIAgent:
"""
return self.api_mode != "codex_responses"
def flush_memories(self, messages: list = None, min_turns: int = None):
"""Give the model one turn to persist memories before context is lost.
Called before compression, session reset, or CLI exit. Injects a flush
message, makes one API call, executes any memory tool calls, then
strips all flush artifacts from the message list.
Args:
messages: The current conversation messages. If None, uses
self._session_messages (last run_conversation state).
min_turns: Minimum user turns required to trigger the flush.
None = use config value (flush_min_turns).
0 = always flush (used for compression).
"""
if self._memory_flush_min_turns == 0 and min_turns is None:
return
if "memory" not in self.valid_tool_names or not self._memory_store:
return
effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
if self._user_turn_count < effective_min:
return
if messages is None:
messages = getattr(self, '_session_messages', None)
if not messages or len(messages) < 3:
return
flush_content = (
"[System: The session is being compressed. "
"Save anything worth remembering — prioritize user preferences, "
"corrections, and recurring patterns over task-specific details.]"
)
_sentinel = f"__flush_{id(self)}_{time.monotonic()}"
flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
messages.append(flush_msg)
try:
# Build API messages for the flush call
_needs_sanitize = self._should_sanitize_tool_calls()
api_messages = []
for msg in messages:
api_msg = msg.copy()
self._copy_reasoning_content_for_api(msg, api_msg)
api_msg.pop("reasoning", None)
api_msg.pop("finish_reason", None)
api_msg.pop("_flush_sentinel", None)
api_msg.pop("_thinking_prefill", None)
if _needs_sanitize:
self._sanitize_tool_calls_for_strict_api(api_msg)
api_messages.append(api_msg)
if self._cached_system_prompt:
api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
# Make one API call with only the memory tool available
memory_tool_def = None
for t in (self.tools or []):
if t.get("function", {}).get("name") == "memory":
memory_tool_def = t
break
if not memory_tool_def:
messages.pop() # remove flush msg
return
# ── Defence-in-depth: trim messages to fit auxiliary context ──
#
# _check_compression_model_feasibility already lowers the
# compression threshold so conversations *triggered by preflight
# compression* should fit. But flush_memories is also called
# from CLI /new and gateway session resets — paths that bypass
# the preflight check entirely. Trim here as a safety net.
try:
from agent.auxiliary_client import get_text_auxiliary_client
from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
_fc, _fm = get_text_auxiliary_client(
"flush_memories",
main_runtime=self._current_main_runtime(),
)
_fctx = 0
if _fc and _fm:
_fctx = get_model_context_length(
_fm,
base_url=str(getattr(_fc, "base_url", "") or ""),
api_key=str(getattr(_fc, "api_key", "") or ""),
provider=getattr(self, "provider", ""),
)
if not _fctx:
_fctx = getattr(
getattr(self, "context_compressor", None),
"context_length", 0,
)
if _fctx:
_budget = _fctx - 5120 - 500 # output + tool schema
if _budget > 0:
_est = estimate_messages_tokens_rough(api_messages)
if _est > _budget:
_sys = []
_conv = api_messages
if api_messages and api_messages[0].get("role") == "system":
_sys = [api_messages[0]]
_conv = api_messages[1:]
_rem = _budget - estimate_messages_tokens_rough(_sys)
_kept: list = []
_acc = 0
for _m in reversed(_conv):
_mt = estimate_messages_tokens_rough([_m])
if _acc + _mt > _rem:
break
_kept.append(_m)
_acc += _mt
_kept.reverse()
if len(_kept) < 3 and len(_conv) >= 3:
_kept = _conv[-3:]
api_messages = _sys + _kept
logger.info(
"flush_memories: trimmed %d%d msgs to fit "
"%d-token aux context",
len(_sys) + len(_conv), len(api_messages), _fctx,
)
except Exception as _te:
logger.debug("flush_memories: context trim failed: %s", _te)
# Use auxiliary client for the flush call when available --
# it's cheaper and avoids Codex Responses API incompatibility.
from agent.auxiliary_client import (
call_llm as _call_llm,
_fixed_temperature_for_model,
OMIT_TEMPERATURE,
)
_aux_available = True
# Kimi models manage temperature server-side — omit it entirely.
# Other models with a fixed contract get that value; everyone else
# gets the historical 0.3 default.
_fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
_omit_temperature = _fixed_temp is OMIT_TEMPERATURE
if _omit_temperature:
_flush_temperature = None
elif _fixed_temp is not None:
_flush_temperature = _fixed_temp
else:
_flush_temperature = 0.3
aux_error = None
try:
response = _call_llm(
task="flush_memories",
messages=api_messages,
tools=[memory_tool_def],
temperature=_flush_temperature,
max_tokens=5120,
# timeout resolved from auxiliary.flush_memories.timeout config
)
except Exception as e:
aux_error = e
_aux_available = False
response = None
if not _aux_available and self.api_mode == "codex_responses":
# No auxiliary client -- use the Codex Responses path directly.
# The Responses API does not accept `temperature` on any
# supported backend (chatgpt.com/backend-api/codex rejects it
# outright; api.openai.com + gpt-5/o-series reasoning models
# and Copilot Responses reject it on reasoning models). The
# transport intentionally never sets it — strip any leftover
# here so the flush fallback matches the main-loop behavior.
codex_kwargs = self._build_api_kwargs(api_messages)
_ct_flush = self._get_transport()
if _ct_flush is not None:
codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
elif not codex_kwargs.get("tools"):
codex_kwargs["tools"] = [memory_tool_def]
codex_kwargs.pop("temperature", None)
if "max_output_tokens" in codex_kwargs:
codex_kwargs["max_output_tokens"] = 5120
response = self._run_codex_stream(codex_kwargs)
elif not _aux_available and self.api_mode == "anthropic_messages":
# Native Anthropic — use the transport for kwargs
_tflush = self._get_transport()
ant_kwargs = _tflush.build_kwargs(
model=self.model, messages=api_messages,
tools=[memory_tool_def], max_tokens=5120,
reasoning_config=None,
preserve_dots=self._anthropic_preserve_dots(),
)
response = self._anthropic_messages_create(ant_kwargs)
elif not _aux_available:
api_kwargs = {
"model": self.model,
"messages": api_messages,
"tools": [memory_tool_def],
**self._max_tokens_param(5120),
}
if _flush_temperature is not None:
api_kwargs["temperature"] = _flush_temperature
from agent.auxiliary_client import _get_task_timeout
response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
**api_kwargs, timeout=_get_task_timeout("flush_memories")
)
if aux_error is not None:
logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
self._emit_auxiliary_failure("memory flush", aux_error)
def _openai_tool_calls(resp):
if resp is not None and hasattr(resp, "choices") and resp.choices:
msg = getattr(resp.choices[0], "message", None)
calls = getattr(msg, "tool_calls", None)
if calls:
return calls
return []
def _codex_output_tool_calls(resp):
calls = []
for item in getattr(resp, "output", []) or []:
if getattr(item, "type", None) == "function_call":
calls.append(SimpleNamespace(
id=getattr(item, "call_id", None),
type="function",
function=SimpleNamespace(
name=getattr(item, "name", ""),
arguments=getattr(item, "arguments", "{}"),
),
))
return calls
# Extract tool calls from the response, handling all API formats
tool_calls = []
if self.api_mode == "codex_responses" and not _aux_available:
_ct_flush = self._get_transport()
_cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
if _cnr_flush and _cnr_flush.tool_calls:
tool_calls = [
SimpleNamespace(
id=tc.id, type="function",
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
) for tc in _cnr_flush.tool_calls
]
else:
tool_calls = _codex_output_tool_calls(response)
elif self.api_mode == "anthropic_messages" and not _aux_available:
_tfn = self._get_transport()
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
if _flush_result and _flush_result.tool_calls:
tool_calls = [
SimpleNamespace(
id=tc.id, type="function",
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
) for tc in _flush_result.tool_calls
]
elif self.api_mode in ("chat_completions", "bedrock_converse"):
# chat_completions / bedrock — normalize through transport
_tfn = self._get_transport()
_flush_result = _tfn.normalize_response(response) if _tfn is not None else None
if _flush_result and _flush_result.tool_calls:
tool_calls = _flush_result.tool_calls
else:
tool_calls = _openai_tool_calls(response)
elif _aux_available and hasattr(response, "choices") and response.choices:
# Auxiliary client returned OpenAI-shaped response while main
# api_mode is codex/anthropic — extract tool_calls from .choices
tool_calls = _openai_tool_calls(response)
for tc in tool_calls:
if tc.function.name == "memory":
try:
args = json.loads(tc.function.arguments)
flush_target = args.get("target", "memory")
from tools.memory_tool import memory_tool as _memory_tool
_memory_tool(
action=args.get("action"),
target=flush_target,
content=args.get("content"),
old_text=args.get("old_text"),
store=self._memory_store,
)
if self._memory_manager and args.get("action") in ("add", "replace"):
try:
self._memory_manager.on_memory_write(
args.get("action", ""),
flush_target,
args.get("content", ""),
metadata=self._build_memory_write_metadata(
write_origin="memory_flush",
execution_context="flush_memories",
),
)
except Exception:
pass
if not self.quiet_mode:
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
except Exception as e:
logger.warning("Memory flush tool call failed: %s", e)
self._emit_auxiliary_failure("memory flush tool", e)
except Exception as e:
logger.warning("Memory flush API call failed: %s", e)
self._emit_auxiliary_failure("memory flush", e)
finally:
# Strip flush artifacts: remove everything from the flush message onward.
# Use sentinel marker instead of identity check for robustness.
while messages and messages[-1].get("_flush_sentinel") != _sentinel:
messages.pop()
if not messages:
break
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
messages.pop()
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
"""Compress conversation context and split the session in SQLite.
@@ -7942,6 +8278,8 @@ class AIAgent:
f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
focus_topic,
)
# Pre-compression memory flush: let the model save memories before they're lost
self.flush_memories(messages, min_turns=0)
# Notify external memory provider before compression discards context
if self._memory_manager:
@@ -386,7 +386,7 @@ class TestProvidersDictApiModeAnthropicMessages:
},
},
"auxiliary": {
"compression": {
"flush_memories": {
"provider": "myrelay",
"model": "claude-sonnet-4.6",
},
@@ -399,11 +399,11 @@ class TestProvidersDictApiModeAnthropicMessages:
AnthropicAuxiliaryClient,
AsyncAnthropicAuxiliaryClient,
)
async_client, async_model = get_async_text_auxiliary_client("compression")
async_client, async_model = get_async_text_auxiliary_client("flush_memories")
assert isinstance(async_client, AsyncAnthropicAuxiliaryClient)
assert async_model == "claude-sonnet-4.6"
sync_client, sync_model = get_text_auxiliary_client("compression")
sync_client, sync_model = get_text_auxiliary_client("flush_memories")
assert isinstance(sync_client, AnthropicAuxiliaryClient)
assert sync_model == "claude-sonnet-4.6"
@@ -1,7 +1,7 @@
"""Regression tests for the universal "unsupported temperature" retry in
``agent.auxiliary_client``.
Auxiliary callers (context compression, session search,
Auxiliary callers (``flush_memories``, context compression, session search,
web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical
reasons. Several provider/model combinations reject ``temperature`` with a
400:
@@ -100,7 +100,7 @@ class TestCallLlmUnsupportedTemperatureRetry:
side_effect=lambda resp, _task: resp),
):
result = call_llm(
task="compression",
task="flush_memories",
messages=[{"role": "user", "content": "remember this"}],
temperature=0.3,
max_tokens=500,
@@ -136,7 +136,7 @@ class TestCallLlmUnsupportedTemperatureRetry:
):
with pytest.raises(RuntimeError, match="Invalid value"):
call_llm(
task="compression",
task="flush_memories",
messages=[{"role": "user", "content": "x"}],
temperature=0.3,
max_tokens=500,
@@ -166,7 +166,7 @@ class TestCallLlmUnsupportedTemperatureRetry:
):
with pytest.raises(RuntimeError):
call_llm(
task="compression",
task="flush_memories",
messages=[{"role": "user", "content": "x"}],
temperature=None, # explicit: no temperature sent
max_tokens=500,
+2
View File
@@ -33,6 +33,7 @@ class _FakeAgent:
self._todo_store.write(
[{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
)
self.flush_memories = MagicMock()
self.commit_memory_session = MagicMock()
self._invalidate_system_prompt = MagicMock()
@@ -156,6 +157,7 @@ def test_new_command_creates_real_fresh_session_and_resets_agent_state(tmp_path)
assert cli.agent._todo_store.read() == []
assert cli.session_start > old_session_start
assert cli.agent.session_start == cli.session_start
cli.agent.flush_memories.assert_called_once_with([{"role": "user", "content": "hello"}])
cli.agent._invalidate_system_prompt.assert_called_once()
+249
View File
@@ -0,0 +1,249 @@
"""Tests for proactive memory flush on session expiry.
Verifies that:
1. _is_session_expired() works from a SessionEntry alone (no source needed)
2. The sync callback is no longer called in get_or_create_session
3. memory_flushed flag persists across save/load cycles (prevents restart re-flush)
4. The background watcher can detect expired sessions
"""
import pytest
from datetime import datetime, timedelta
from pathlib import Path
from unittest.mock import patch, MagicMock
from gateway.config import Platform, GatewayConfig, SessionResetPolicy
from gateway.session import SessionSource, SessionStore, SessionEntry
@pytest.fixture()
def idle_store(tmp_path):
"""SessionStore with a 60-minute idle reset policy."""
config = GatewayConfig(
default_reset_policy=SessionResetPolicy(mode="idle", idle_minutes=60),
)
with patch("gateway.session.SessionStore._ensure_loaded"):
s = SessionStore(sessions_dir=tmp_path, config=config)
s._db = None
s._loaded = True
return s
@pytest.fixture()
def no_reset_store(tmp_path):
"""SessionStore with no reset policy (mode=none)."""
config = GatewayConfig(
default_reset_policy=SessionResetPolicy(mode="none"),
)
with patch("gateway.session.SessionStore._ensure_loaded"):
s = SessionStore(sessions_dir=tmp_path, config=config)
s._db = None
s._loaded = True
return s
class TestIsSessionExpired:
"""_is_session_expired should detect expiry from entry alone."""
def test_idle_session_expired(self, idle_store):
entry = SessionEntry(
session_key="agent:main:telegram:dm",
session_id="sid_1",
created_at=datetime.now() - timedelta(hours=3),
updated_at=datetime.now() - timedelta(minutes=120),
platform=Platform.TELEGRAM,
chat_type="dm",
)
assert idle_store._is_session_expired(entry) is True
def test_active_session_not_expired(self, idle_store):
entry = SessionEntry(
session_key="agent:main:telegram:dm",
session_id="sid_2",
created_at=datetime.now() - timedelta(hours=1),
updated_at=datetime.now() - timedelta(minutes=10),
platform=Platform.TELEGRAM,
chat_type="dm",
)
assert idle_store._is_session_expired(entry) is False
def test_none_mode_never_expires(self, no_reset_store):
entry = SessionEntry(
session_key="agent:main:telegram:dm",
session_id="sid_3",
created_at=datetime.now() - timedelta(days=30),
updated_at=datetime.now() - timedelta(days=30),
platform=Platform.TELEGRAM,
chat_type="dm",
)
assert no_reset_store._is_session_expired(entry) is False
def test_active_processes_prevent_expiry(self, idle_store):
"""Sessions with active background processes should never expire."""
idle_store._has_active_processes_fn = lambda key: True
entry = SessionEntry(
session_key="agent:main:telegram:dm",
session_id="sid_4",
created_at=datetime.now() - timedelta(hours=5),
updated_at=datetime.now() - timedelta(hours=5),
platform=Platform.TELEGRAM,
chat_type="dm",
)
assert idle_store._is_session_expired(entry) is False
def test_daily_mode_expired(self, tmp_path):
"""Daily mode should expire sessions from before today's reset hour."""
config = GatewayConfig(
default_reset_policy=SessionResetPolicy(mode="daily", at_hour=4),
)
with patch("gateway.session.SessionStore._ensure_loaded"):
store = SessionStore(sessions_dir=tmp_path, config=config)
store._db = None
store._loaded = True
entry = SessionEntry(
session_key="agent:main:telegram:dm",
session_id="sid_5",
created_at=datetime.now() - timedelta(days=2),
updated_at=datetime.now() - timedelta(days=2),
platform=Platform.TELEGRAM,
chat_type="dm",
)
assert store._is_session_expired(entry) is True
class TestGetOrCreateSessionNoCallback:
"""get_or_create_session should NOT call a sync flush callback."""
def test_auto_reset_creates_new_session_after_flush(self, idle_store):
"""When a flushed session auto-resets, a new session_id is created."""
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="123",
chat_type="dm",
)
# Create initial session
entry1 = idle_store.get_or_create_session(source)
old_sid = entry1.session_id
# Simulate the watcher having flushed it
entry1.memory_flushed = True
# Simulate the session going idle
entry1.updated_at = datetime.now() - timedelta(minutes=120)
idle_store._save()
# Next call should auto-reset
entry2 = idle_store.get_or_create_session(source)
assert entry2.session_id != old_sid
assert entry2.was_auto_reset is True
# New session starts with memory_flushed=False
assert entry2.memory_flushed is False
def test_no_sync_callback_invoked(self, idle_store):
"""No synchronous callback should block during auto-reset."""
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="123",
chat_type="dm",
)
entry1 = idle_store.get_or_create_session(source)
entry1.updated_at = datetime.now() - timedelta(minutes=120)
idle_store._save()
# Verify no _on_auto_reset attribute
assert not hasattr(idle_store, '_on_auto_reset')
# This should NOT block (no sync LLM call)
entry2 = idle_store.get_or_create_session(source)
assert entry2.was_auto_reset is True
class TestMemoryFlushedFlag:
"""The memory_flushed flag on SessionEntry prevents double-flushing."""
def test_defaults_to_false(self):
entry = SessionEntry(
session_key="agent:main:telegram:dm:123",
session_id="sid_new",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
)
assert entry.memory_flushed is False
def test_persists_through_save_load(self, idle_store):
"""memory_flushed=True must survive a save/load cycle (simulates restart)."""
key = "agent:main:discord:thread:789"
entry = SessionEntry(
session_key=key,
session_id="sid_flushed",
created_at=datetime.now() - timedelta(hours=5),
updated_at=datetime.now() - timedelta(hours=5),
platform=Platform.DISCORD,
chat_type="thread",
memory_flushed=True,
)
idle_store._entries[key] = entry
idle_store._save()
# Simulate restart: clear in-memory state, reload from disk
idle_store._entries.clear()
idle_store._loaded = False
idle_store._ensure_loaded()
reloaded = idle_store._entries[key]
assert reloaded.memory_flushed is True
def test_unflushed_entry_survives_restart_as_unflushed(self, idle_store):
"""An entry without memory_flushed stays False after reload."""
key = "agent:main:telegram:dm:456"
entry = SessionEntry(
session_key=key,
session_id="sid_not_flushed",
created_at=datetime.now() - timedelta(hours=2),
updated_at=datetime.now() - timedelta(hours=2),
platform=Platform.TELEGRAM,
chat_type="dm",
)
idle_store._entries[key] = entry
idle_store._save()
idle_store._entries.clear()
idle_store._loaded = False
idle_store._ensure_loaded()
reloaded = idle_store._entries[key]
assert reloaded.memory_flushed is False
def test_roundtrip_to_dict_from_dict(self):
"""to_dict/from_dict must preserve memory_flushed."""
entry = SessionEntry(
session_key="agent:main:telegram:dm:999",
session_id="sid_rt",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
memory_flushed=True,
)
d = entry.to_dict()
assert d["memory_flushed"] is True
restored = SessionEntry.from_dict(d)
assert restored.memory_flushed is True
def test_legacy_entry_without_field_defaults_false(self):
"""Old sessions.json entries missing memory_flushed should default to False."""
data = {
"session_key": "agent:main:telegram:dm:legacy",
"session_id": "sid_legacy",
"created_at": datetime.now().isoformat(),
"updated_at": datetime.now().isoformat(),
"platform": "telegram",
"chat_type": "dm",
# no memory_flushed key
}
entry = SessionEntry.from_dict(data)
assert entry.memory_flushed is False
@@ -0,0 +1,240 @@
"""Tests for memory flush stale-overwrite prevention (#2670).
Verifies that:
1. Cron sessions are skipped (no flush for headless cron runs)
2. Current memory state is injected into the flush prompt so the
flush agent can see what's already saved and avoid overwrites
3. The flush still works normally when memory files don't exist
"""
import sys
import types
import pytest
from pathlib import Path
from unittest.mock import MagicMock, patch, call
@pytest.fixture(autouse=True)
def _mock_dotenv(monkeypatch):
"""gateway.run imports dotenv at module level; stub it so tests run without the package."""
fake = types.ModuleType("dotenv")
fake.load_dotenv = lambda *a, **kw: None
monkeypatch.setitem(sys.modules, "dotenv", fake)
def _make_runner():
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner._honcho_managers = {}
runner._honcho_configs = {}
runner._running_agents = {}
runner._pending_messages = {}
runner._pending_approvals = {}
runner.adapters = {}
runner.hooks = MagicMock()
runner.session_store = MagicMock()
return runner
_TRANSCRIPT_4_MSGS = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi there"},
{"role": "user", "content": "remember my name is Alice"},
{"role": "assistant", "content": "Got it, Alice!"},
]
class TestCronSessionBypass:
"""Cron sessions should never trigger a memory flush."""
def test_cron_session_skipped(self):
runner = _make_runner()
runner._flush_memories_for_session("cron_job123_20260323_120000")
# session_store.load_transcript should never be called
runner.session_store.load_transcript.assert_not_called()
def test_cron_session_with_prefix_skipped(self):
"""Cron sessions with different prefixes are still skipped."""
runner = _make_runner()
runner._flush_memories_for_session("cron_daily_20260323")
runner.session_store.load_transcript.assert_not_called()
def test_non_cron_session_proceeds(self):
"""Non-cron sessions should still attempt the flush."""
runner = _make_runner()
runner.session_store.load_transcript.return_value = []
runner._flush_memories_for_session("session_abc123")
runner.session_store.load_transcript.assert_called_once_with("session_abc123")
def _make_flush_context(monkeypatch, memory_dir=None):
"""Return (runner, tmp_agent, fake_run_agent) with run_agent mocked in sys.modules."""
tmp_agent = MagicMock()
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = MagicMock(return_value=tmp_agent)
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
runner = _make_runner()
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
return runner, tmp_agent, memory_dir
class TestMemoryInjection:
"""The flush prompt should include current memory state from disk."""
def test_memory_content_injected_into_flush_prompt(self, tmp_path, monkeypatch):
"""When memory files exist, their content appears in the flush prompt."""
memory_dir = tmp_path / "memories"
memory_dir.mkdir()
(memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
(memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
runner, tmp_agent, _ = _make_flush_context(monkeypatch, memory_dir)
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
):
runner._flush_memories_for_session("session_123")
tmp_agent.run_conversation.assert_called_once()
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
assert "Agent knows Python" in flush_prompt
assert "User prefers dark mode" in flush_prompt
assert "Name: Alice" in flush_prompt
assert "Timezone: PST" in flush_prompt
assert "Do NOT overwrite or remove entries" in flush_prompt
assert "current live state of memory" in flush_prompt
def test_flush_works_without_memory_files(self, tmp_path, monkeypatch):
"""When no memory files exist, flush still runs without the guard."""
empty_dir = tmp_path / "no_memories"
empty_dir.mkdir()
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: empty_dir)}),
):
runner._flush_memories_for_session("session_456")
tmp_agent.run_conversation.assert_called_once()
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
assert "Do NOT overwrite or remove entries" not in flush_prompt
assert "Review the conversation above" in flush_prompt
def test_empty_memory_files_no_injection(self, tmp_path, monkeypatch):
"""Empty memory files should not trigger the guard section."""
memory_dir = tmp_path / "memories"
memory_dir.mkdir()
(memory_dir / "MEMORY.md").write_text("")
(memory_dir / "USER.md").write_text(" \n ") # whitespace only
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
):
runner._flush_memories_for_session("session_789")
tmp_agent.run_conversation.assert_called_once()
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
assert "current live state of memory" not in flush_prompt
class TestFlushAgentSilenced:
"""The flush agent must not produce any terminal output."""
def test_print_fn_set_to_noop(self, tmp_path, monkeypatch):
"""_print_fn on the flush agent must be a no-op so tool output never leaks."""
runner = _make_runner()
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
captured_agent = {}
def _fake_ai_agent(*args, **kwargs):
agent = MagicMock()
captured_agent["instance"] = agent
return agent
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = _fake_ai_agent
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: tmp_path)}),
):
runner._flush_memories_for_session("session_silent")
agent = captured_agent["instance"]
assert agent._print_fn is not None, "_print_fn should be overridden to suppress output"
# Confirm it is callable and produces no output (no exception)
agent._print_fn("should be silenced")
def test_kawaii_spinner_respects_print_fn(self):
"""KawaiiSpinner must route all output through print_fn when supplied."""
from agent.display import KawaiiSpinner
written = []
spinner = KawaiiSpinner("test", print_fn=lambda *a, **kw: written.append(a))
spinner._write("hello")
assert written == [("hello",)], "spinner should route through print_fn"
# A no-op print_fn must produce no output to stdout
import io, sys
buf = io.StringIO()
old_stdout = sys.stdout
sys.stdout = buf
try:
silent_spinner = KawaiiSpinner("silent", print_fn=lambda *a, **kw: None)
silent_spinner._write("should not appear")
silent_spinner.stop("done")
finally:
sys.stdout = old_stdout
assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
def test_flush_agent_closes_resources_after_run(self, monkeypatch):
"""Memory flush should close temporary agent resources after the turn."""
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
tmp_agent.shutdown_memory_provider = MagicMock()
tmp_agent.close = MagicMock()
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
):
runner._flush_memories_for_session("session_cleanup")
tmp_agent.shutdown_memory_provider.assert_called_once()
tmp_agent.close.assert_called_once()
class TestFlushPromptStructure:
"""Verify the flush prompt retains its core instructions."""
def test_core_instructions_present(self, monkeypatch):
"""The flush prompt should still contain the original guidance."""
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
):
runner._flush_memories_for_session("session_struct")
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
assert "automatically reset" in flush_prompt
assert "Save any important facts" in flush_prompt
assert "consider saving it as a skill" in flush_prompt
assert "Do NOT respond to the user" in flush_prompt
+29 -1
View File
@@ -4,7 +4,7 @@ Tests the _handle_resume_command handler (switch to a previously-named session)
across gateway messenger platforms.
"""
from unittest.mock import MagicMock
from unittest.mock import MagicMock, AsyncMock
import pytest
@@ -53,6 +53,9 @@ def _make_runner(session_db=None, current_session_id="current_session_001",
mock_store.switch_session.return_value = mock_session_entry
runner.session_store = mock_store
# Stub out memory flushing
runner._async_flush_memories = AsyncMock()
return runner
@@ -230,3 +233,28 @@ class TestHandleResumeCommand:
assert real_key not in runner._running_agents
db.close()
@pytest.mark.asyncio
async def test_resume_flushes_memories(self, tmp_path):
"""Resume should flush memories from the current session before switching."""
from hermes_state import SessionDB
db = SessionDB(db_path=tmp_path / "state.db")
db.create_session("old_session", "telegram")
db.set_session_title("old_session", "Old Work")
db.create_session("current_session_001", "telegram")
event = _make_event(text="/resume Old Work")
runner = _make_runner(
session_db=db,
current_session_id="current_session_001",
event=event,
)
await runner._handle_resume_command(event)
runner._async_flush_memories.assert_called_once_with(
"current_session_001",
"agent:main:telegram:dm:67890",
)
db.close()
+10 -10
View File
@@ -177,8 +177,8 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
its reset policy (idle timeout, scheduled reset), it must fire
``on_session_finalize`` so plugin providers get the same final-pass
extraction opportunity they'd get from /new or CLI shutdown. Before
the fix, the expiry path evicted the agent but silently skipped the
hook.
the fix, the expiry path flushed memories and evicted the agent but
silently skipped the hook.
"""
from datetime import datetime, timedelta
@@ -200,7 +200,7 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
platform=Platform.TELEGRAM,
chat_type="dm",
)
expired_entry.expiry_finalized = False
expired_entry.memory_flushed = False
runner.session_store = MagicMock()
runner.session_store._ensure_loaded = MagicMock()
@@ -211,24 +211,24 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
runner.session_store._lock.__exit__ = MagicMock(return_value=None)
runner.session_store._save = MagicMock()
runner._async_flush_memories = AsyncMock()
runner._evict_cached_agent = MagicMock()
runner._cleanup_agent_resources = MagicMock()
runner._sweep_idle_cached_agents = MagicMock(return_value=0)
# The watcher starts with `await asyncio.sleep(60)` and loops while
# `self._running`. Patch sleep so the 60s initial delay is instant, and
# make the expiry hook invocation flip `_running` false so the loop
# exits cleanly after one pass.
# `self._running`. Patch sleep so the 60s initial delay is instant, then
# flip `_running` false inside the flush call so the loop exits cleanly
# after one pass.
_orig_sleep = __import__("asyncio").sleep
async def _fast_sleep(_):
await _orig_sleep(0)
def _hook_and_stop(*a, **kw):
runner._running = False
return None
async def _flush_and_stop(session_id, key):
runner._running = False # terminate the loop after this iteration
mock_invoke_hook.side_effect = _hook_and_stop
runner._async_flush_memories = AsyncMock(side_effect=_flush_and_stop)
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
await runner._session_expiry_watcher(interval=0)
@@ -1,7 +1,7 @@
"""Regression tests for approval-state cleanup on session boundaries."""
from datetime import datetime
from unittest.mock import MagicMock
from unittest.mock import AsyncMock, MagicMock
import pytest
@@ -72,6 +72,7 @@ def _make_resume_runner():
runner = object.__new__(GatewayRunner)
runner.adapters = {}
runner._background_tasks = set()
runner._async_flush_memories = AsyncMock()
runner._running_agents = {}
runner._running_agents_ts = {}
runner._busy_ack_ts = {}
-11
View File
@@ -256,17 +256,6 @@ class TestDetectProviderForModel:
"""Models belonging to the current provider should not trigger a switch."""
assert detect_provider_for_model("gpt-5.3-codex", "openai-codex") is None
def test_short_alias_resolves_to_static_model(self):
"""Short aliases (e.g. sonnet) should resolve without network lookups."""
with patch(
"hermes_cli.models.fetch_openrouter_models",
side_effect=AssertionError("network lookup should not run"),
):
result = detect_provider_for_model("sonnet", "auto")
assert result is not None
assert result[0] == "anthropic"
assert result[1].startswith("claude-sonnet")
def test_openrouter_slug_match(self):
"""Models in the OpenRouter catalog should be found."""
with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
-28
View File
@@ -19,18 +19,6 @@ def _touch_ink(root: Path) -> None:
ink.write_text("{}")
def _touch_tui_entry(root: Path) -> None:
entry = root / "dist" / "entry.js"
entry.parent.mkdir(parents=True, exist_ok=True)
entry.write_text("console.log('tui')")
def _touch_ink_bundle(root: Path) -> None:
bundle = root / "packages" / "hermes-ink" / "dist" / "ink-bundle.js"
bundle.parent.mkdir(parents=True, exist_ok=True)
bundle.write_text("export {}")
def test_need_install_when_ink_missing(tmp_path: Path, main_mod) -> None:
(tmp_path / "package-lock.json").write_text("{}")
assert main_mod._tui_need_npm_install(tmp_path) is True
@@ -63,19 +51,3 @@ def test_need_install_when_marker_missing(tmp_path: Path, main_mod) -> None:
def test_no_install_without_lockfile_when_ink_present(tmp_path: Path, main_mod) -> None:
_touch_ink(tmp_path)
assert main_mod._tui_need_npm_install(tmp_path) is False
def test_build_needed_when_local_ink_bundle_missing(tmp_path: Path, main_mod) -> None:
_touch_tui_entry(tmp_path)
_touch_ink(tmp_path)
assert main_mod._tui_need_npm_install(tmp_path) is False
assert main_mod._tui_build_needed(tmp_path) is True
def test_build_not_needed_when_entry_and_ink_bundle_present(tmp_path: Path, main_mod) -> None:
_touch_tui_entry(tmp_path)
_touch_ink(tmp_path)
_touch_ink_bundle(tmp_path)
assert main_mod._tui_build_needed(tmp_path) is False
+3 -61
View File
@@ -1,5 +1,4 @@
from argparse import Namespace
from pathlib import Path
import sys
import types
@@ -9,11 +8,8 @@ import pytest
def _args(**overrides):
base = {
"continue_last": None,
"model": None,
"provider": None,
"resume": None,
"tui": True,
"tui_dev": False,
}
base.update(overrides)
return Namespace(**base)
@@ -35,7 +31,7 @@ def test_cmd_chat_tui_continue_uses_latest_tui_session(monkeypatch, main_mod):
calls.append(source)
return "20260408_235959_a1b2c3" if source == "tui" else None
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
def fake_launch(resume_session_id=None, tui_dev=False):
captured["resume"] = resume_session_id
raise SystemExit(0)
@@ -62,7 +58,7 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
return "20260408_235959_d4e5f6"
return None
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
def fake_launch(resume_session_id=None, tui_dev=False):
captured["resume"] = resume_session_id
raise SystemExit(0)
@@ -80,7 +76,7 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod):
captured = {}
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
def fake_launch(resume_session_id=None, tui_dev=False):
captured["resume"] = resume_session_id
raise SystemExit(0)
@@ -93,60 +89,6 @@ def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod)
assert captured["resume"] == "20260409_000000_aa11bb"
def test_cmd_chat_tui_passes_model_and_provider(monkeypatch, main_mod):
captured = {}
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
captured.update(
{
"model": model,
"provider": provider,
"resume": resume_session_id,
"tui_dev": tui_dev,
}
)
raise SystemExit(0)
monkeypatch.setattr(main_mod, "_launch_tui", fake_launch)
with pytest.raises(SystemExit):
main_mod.cmd_chat(
_args(model="anthropic/claude-sonnet-4.6", provider="anthropic")
)
assert captured == {
"model": "anthropic/claude-sonnet-4.6",
"provider": "anthropic",
"resume": None,
"tui_dev": False,
}
def test_launch_tui_exports_model_and_provider(monkeypatch, main_mod):
captured = {}
monkeypatch.setattr(
main_mod,
"_make_tui_argv",
lambda tui_dir, tui_dev: (["node", "dist/entry.js"], Path(".")),
)
def fake_call(argv, cwd=None, env=None):
captured.update({"argv": argv, "cwd": cwd, "env": env})
return 1
monkeypatch.setattr(main_mod.subprocess, "call", fake_call)
with pytest.raises(SystemExit):
main_mod._launch_tui(model="nous/hermes-test", provider="nous")
env = captured["env"]
assert env["HERMES_MODEL"] == "nous/hermes-test"
assert env["HERMES_INFERENCE_MODEL"] == "nous/hermes-test"
assert env["HERMES_TUI_PROVIDER"] == "nous"
assert env["HERMES_INFERENCE_PROVIDER"] == "nous"
def test_print_tui_exit_summary_includes_resume_and_token_totals(monkeypatch, capsys):
import hermes_cli.main as main_mod
File diff suppressed because it is too large Load Diff
@@ -31,6 +31,7 @@ def _make_agent_with_engine(engine):
agent._vprint = lambda *a, **kw: None
agent._last_flushed_db_idx = 0
# Stub the few AIAgent methods _compress_context uses.
agent.flush_memories = lambda *a, **kw: None
agent._invalidate_system_prompt = lambda *a, **kw: None
agent._build_system_prompt = lambda *a, **kw: "new-system-prompt"
agent.commit_memory_session = lambda *a, **kw: None
+122 -33
View File
@@ -41,6 +41,8 @@ def _make_agent(
agent.tool_progress_callback = None
agent._compression_warning = None
agent._aux_compression_context_length_config = None
# Tools feed into the headroom calculation in _check_compression_model_feasibility.
# Tests that want to assert specific threshold values can override this.
agent.tools = []
compressor = MagicMock(spec=ContextCompressor)
@@ -83,8 +85,9 @@ def test_auto_corrects_threshold_when_aux_context_below_threshold(mock_get_clien
assert "threshold:" in messages[0]
# Warning stored for gateway replay
assert agent._compression_warning is not None
# Threshold on the live compressor was actually lowered to aux_context.
assert agent.context_compressor.threshold_tokens == 80_000
# Threshold on the live compressor was actually lowered, accounting for
# the request-overhead headroom (empty tools list → ~12K headroom only).
assert agent.context_compressor.threshold_tokens == 68_000
@patch("agent.model_metadata.get_model_context_length", return_value=32_768)
@@ -148,15 +151,14 @@ def test_feasibility_check_passes_live_main_runtime():
agent._emit_status = lambda msg: None
agent._check_compression_model_feasibility()
mock_get_client.assert_called_once_with(
"compression",
main_runtime={
"model": "gpt-5.4",
"provider": "openai-codex",
# Called for both compression + flush_memories; verify compression call present
assert any(
c == (("compression",), {"main_runtime": {
"model": "gpt-5.4", "provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-token",
"api_mode": "codex_responses",
},
"api_key": "codex-token", "api_mode": "codex_responses",
}})
for c in mock_get_client.call_args_list
)
@@ -176,12 +178,12 @@ def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ct
agent._emit_status = lambda msg: None
agent._check_compression_model_feasibility()
mock_ctx_len.assert_called_once_with(
"custom/big-model",
base_url="http://custom-endpoint:8080/v1",
api_key="sk-custom",
config_context_length=1_000_000,
provider="openrouter",
# First call is the compression model
assert mock_ctx_len.call_args_list[0] == (
("custom/big-model",),
{"base_url": "http://custom-endpoint:8080/v1",
"api_key": "sk-custom", "config_context_length": 1_000_000,
"provider": "openrouter"},
)
@@ -199,12 +201,11 @@ def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_
agent._emit_status = lambda msg: None
agent._check_compression_model_feasibility()
mock_ctx_len.assert_called_once_with(
"custom/model",
base_url="http://custom:8080/v1",
api_key="sk-test",
config_context_length=None,
provider="openrouter",
assert mock_ctx_len.call_args_list[0] == (
("custom/model",),
{"base_url": "http://custom:8080/v1",
"api_key": "sk-test", "config_context_length": None,
"provider": "openrouter"},
)
@@ -252,13 +253,10 @@ def test_init_feasibility_check_uses_aux_context_override_from_config():
)
assert agent._aux_compression_context_length_config == 1_000_000
mock_ctx_len.assert_called_once_with(
"custom/big-model",
base_url="http://custom-endpoint:8080/v1",
api_key="sk-custom",
config_context_length=1_000_000,
provider="",
)
c0 = mock_ctx_len.call_args_list[0]
assert c0.args == ("custom/big-model",)
assert c0.kwargs["base_url"] == "http://custom-endpoint:8080/v1"
assert c0.kwargs["config_context_length"] == 1_000_000
@patch("agent.auxiliary_client.get_text_auxiliary_client")
@@ -308,8 +306,10 @@ def test_exception_does_not_crash(mock_get_client):
@patch("agent.model_metadata.get_model_context_length", return_value=100_000)
@patch("agent.auxiliary_client.get_text_auxiliary_client")
def test_exact_threshold_boundary_no_warning(mock_get_client, mock_ctx_len):
"""No warning when aux context exactly equals the threshold."""
def test_exact_threshold_boundary_triggers_headroom_correction(mock_get_client, mock_ctx_len):
"""When aux context exactly equals the threshold, headroom deduction
still fires flush_memories adds system prompt + tool schema on top
of the conversation messages, so threshold must be lowered."""
agent = _make_agent(main_context=200_000, threshold_percent=0.50)
mock_client = MagicMock()
mock_client.base_url = "https://openrouter.ai/api/v1"
@@ -321,7 +321,10 @@ def test_exact_threshold_boundary_no_warning(mock_get_client, mock_ctx_len):
agent._check_compression_model_feasibility()
assert len(messages) == 0
# 100K - headroom < 100K → auto-corrects
assert len(messages) == 1
assert "Auto-lowered" in messages[0]
assert agent.context_compressor.threshold_tokens < 100_000
@patch("agent.model_metadata.get_model_context_length", return_value=99_999)
@@ -343,7 +346,93 @@ def test_just_below_threshold_auto_corrects(mock_get_client, mock_ctx_len):
assert len(messages) == 1
assert "small-model" in messages[0]
assert "Auto-lowered" in messages[0]
assert agent.context_compressor.threshold_tokens == 99_999
assert agent.context_compressor.threshold_tokens == 87_999
# ── Headroom for system prompt + tool schemas ────────────────────────
@patch("agent.model_metadata.get_model_context_length", return_value=128_000)
@patch("agent.auxiliary_client.get_text_auxiliary_client")
def test_auto_lowered_threshold_reserves_headroom_for_tools_and_system(mock_get_client, mock_ctx_len):
"""When aux context binds the threshold, new_threshold must leave room
for the system prompt and tool schemas that auxiliary callers
(compression summariser, flush_memories) prepend to the message list.
Without headroom, a full-budget message window + ~25K system/tool
overhead overflows the aux model with HTTP 400. Regression guard for
the flush_memories-on-busy-toolset overflow path.
"""
# Main context 200K, threshold 70% = 140K. Aux pins at 128K (below
# threshold → triggers auto-correct).
agent = _make_agent(main_context=200_000, threshold_percent=0.70)
# Build a realistic tool schema load.
agent.tools = [
{
"type": "function",
"function": {
"name": f"tool_{i}",
"description": "x" * 200,
"parameters": {"type": "object", "properties": {"arg": {"type": "string", "description": "y" * 120}}},
},
}
for i in range(50)
]
mock_client = MagicMock()
mock_client.base_url = "https://openrouter.ai/api/v1"
mock_client.api_key = "sk-aux"
mock_get_client.return_value = (mock_client, "model-with-128k")
agent._emit_status = lambda msg: None
agent._check_compression_model_feasibility()
new_threshold = agent.context_compressor.threshold_tokens
# Must have strictly reserved headroom: new_threshold < aux_context.
assert new_threshold < 128_000, (
f"threshold {new_threshold} did not reserve headroom below aux=128,000 "
f"— system prompt + tools would overflow the aux model"
)
# Must respect the 64K hard floor.
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
assert new_threshold >= MINIMUM_CONTEXT_LENGTH
@patch("agent.model_metadata.get_model_context_length", return_value=80_000)
@patch("agent.auxiliary_client.get_text_auxiliary_client")
def test_headroom_floors_at_minimum_context(mock_get_client, mock_ctx_len):
"""If headroom subtraction would push below 64K floor, clamp to 64K
rather than refusing the session the aux is still workable for a
smaller message window.
"""
# Aux at 80K, with enough tools to push headroom > 16K → naive subtract
# would land at < 64K. The max(..., MINIMUM_CONTEXT_LENGTH) clamp must
# keep the session running.
agent = _make_agent(main_context=200_000, threshold_percent=0.50)
agent.tools = [
{
"type": "function",
"function": {
"name": f"tool_{i}",
"description": "z" * 2_000, # fat descriptions
"parameters": {},
},
}
for i in range(30)
]
mock_client = MagicMock()
mock_client.base_url = "https://openrouter.ai/api/v1"
mock_client.api_key = "sk-aux"
mock_get_client.return_value = (mock_client, "small-aux-model")
agent._emit_status = lambda msg: None
agent._check_compression_model_feasibility()
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
assert agent.context_compressor.threshold_tokens == MINIMUM_CONTEXT_LENGTH
# ── Two-phase: __init__ + run_conversation replay ───────────────────
@@ -0,0 +1,398 @@
"""Tests for flush_memories() working correctly across all provider modes.
Catches the bug where Codex mode called chat.completions.create on a
Responses-only client, which would fail silently or with a 404.
"""
import json
import os
import sys
import types
from types import SimpleNamespace
from unittest.mock import patch, MagicMock, call
import pytest
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())
import run_agent
class _FakeOpenAI:
def __init__(self, **kwargs):
self.kwargs = kwargs
self.api_key = kwargs.get("api_key", "test")
self.base_url = kwargs.get("base_url", "http://test")
def close(self):
pass
def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
"""Build an AIAgent with mocked internals, ready for flush_memories testing."""
monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
{
"type": "function",
"function": {
"name": "memory",
"description": "Manage memories.",
"parameters": {
"type": "object",
"properties": {
"action": {"type": "string"},
"target": {"type": "string"},
"content": {"type": "string"},
},
},
},
},
])
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
agent = run_agent.AIAgent(
api_key="test-key",
base_url="https://test.example.com/v1",
provider=provider,
api_mode=api_mode,
max_iterations=4,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
# Give it a valid memory store
agent._memory_store = MagicMock()
agent._memory_flush_min_turns = 1
agent._user_turn_count = 5
return agent
def _chat_response_with_memory_call():
"""Simulated chat completions response with a memory tool call."""
return SimpleNamespace(
choices=[SimpleNamespace(
finish_reason="tool_calls",
message=SimpleNamespace(
content=None,
tool_calls=[SimpleNamespace(
id="call_mem_0",
type="function",
function=SimpleNamespace(
name="memory",
arguments=json.dumps({
"action": "add",
"target": "notes",
"content": "User prefers dark mode.",
}),
),
)],
),
)],
usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
)
class TestFlushMemoriesRespectsConfigTimeout:
"""flush_memories() must NOT hardcode timeout=30.0 — it should defer
to the config value via auxiliary.flush_memories.timeout."""
def test_auxiliary_path_omits_explicit_timeout(self, monkeypatch):
"""When calling _call_llm, timeout should NOT be passed so that
_get_task_timeout('flush_memories') reads from config."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
mock_response = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Note this"},
]
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
agent.flush_memories(messages)
mock_call.assert_called_once()
call_kwargs = mock_call.call_args
# timeout must NOT be explicitly passed (so _get_task_timeout resolves it)
assert "timeout" not in call_kwargs.kwargs, (
"flush_memories should not pass explicit timeout to _call_llm; "
"let _get_task_timeout('flush_memories') resolve from config"
)
def test_fallback_path_uses_config_timeout(self, monkeypatch):
"""When auxiliary client is unavailable and we fall back to direct
OpenAI client, timeout should come from _get_task_timeout, not hardcoded."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
agent.client = MagicMock()
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
custom_timeout = 180.0
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
patch("agent.auxiliary_client._get_task_timeout", return_value=custom_timeout) as mock_gtt, \
patch("tools.memory_tool.memory_tool", return_value="Saved."):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Save this"},
]
agent.flush_memories(messages)
mock_gtt.assert_called_once_with("flush_memories")
agent.client.chat.completions.create.assert_called_once()
call_kwargs = agent.client.chat.completions.create.call_args
assert call_kwargs.kwargs.get("timeout") == custom_timeout, (
f"Expected timeout={custom_timeout} from config, got {call_kwargs.kwargs.get('timeout')}"
)
class TestFlushMemoriesUsesAuxiliaryClient:
"""When an auxiliary client is available, flush_memories should use it
instead of self.client -- especially critical in Codex mode."""
def test_flush_uses_auxiliary_when_available(self, monkeypatch):
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
mock_response = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
{"role": "user", "content": "Remember this"},
]
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
agent.flush_memories(messages)
mock_call.assert_called_once()
call_kwargs = mock_call.call_args
assert call_kwargs.kwargs.get("task") == "flush_memories"
def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
"""Non-Codex mode with no auxiliary falls back to self.client."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
agent.client = MagicMock()
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
{"role": "user", "content": "Save this"},
]
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
agent.flush_memories(messages)
agent.client.chat.completions.create.assert_called_once()
def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
"""Provider/API failures from auxiliary flush must be visible.
Exhausted keys and rate limits are not always RuntimeError. They used
to fall into the broad outer handler and disappear into debug logs.
"""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
agent.client = MagicMock()
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
events = []
agent.status_callback = lambda kind, text=None: events.append((kind, text))
with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
patch("tools.memory_tool.memory_tool", return_value="Saved."):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
{"role": "user", "content": "Save this"},
]
agent.flush_memories(messages)
agent.client.chat.completions.create.assert_called_once()
assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
def test_flush_executes_memory_tool_calls(self, monkeypatch):
"""Verify that memory tool calls from the flush response actually get executed."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
mock_response = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Note this"},
]
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
agent.flush_memories(messages)
mock_memory.assert_called_once()
call_kwargs = mock_memory.call_args
assert call_kwargs.kwargs["action"] == "add"
assert call_kwargs.kwargs["target"] == "notes"
assert "dark mode" in call_kwargs.kwargs["content"]
def test_flush_bridges_memory_write_metadata(self, monkeypatch):
"""Flush memory writes notify external providers with flush provenance."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
agent._memory_manager = MagicMock()
agent.session_id = "sess-flush"
agent.platform = "cli"
mock_response = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Note this"},
]
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
agent.flush_memories(messages)
agent._memory_manager.on_memory_write.assert_called_once()
call_kwargs = agent._memory_manager.on_memory_write.call_args
assert call_kwargs.args[:3] == ("add", "notes", "User prefers dark mode.")
assert call_kwargs.kwargs["metadata"]["write_origin"] == "memory_flush"
assert call_kwargs.kwargs["metadata"]["execution_context"] == "flush_memories"
assert call_kwargs.kwargs["metadata"]["session_id"] == "sess-flush"
def test_flush_strips_artifacts_from_messages(self, monkeypatch):
"""After flush, the flush prompt and any response should be removed from messages."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
mock_response = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Remember X"},
]
original_len = len(messages)
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
agent.flush_memories(messages)
# Messages should not grow from the flush
assert len(messages) <= original_len
# No flush sentinel should remain
for msg in messages:
assert "_flush_sentinel" not in msg
class TestFlushMemoriesCodexFallback:
"""When no auxiliary client exists and we're in Codex mode, flush should
use the Codex Responses API path instead of chat.completions."""
def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
codex_response = SimpleNamespace(
output=[
SimpleNamespace(
type="function_call",
call_id="call_1",
name="memory",
arguments=json.dumps({
"action": "add",
"target": "notes",
"content": "Codex flush test",
}),
),
],
usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
status="completed",
model="gpt-5-codex",
)
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
patch.object(agent, "_build_api_kwargs") as mock_build, \
patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
mock_build.return_value = {
"model": "gpt-5-codex",
"instructions": "test",
"input": [],
"tools": [],
"max_output_tokens": 4096,
}
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Save this"},
]
agent.flush_memories(messages)
mock_stream.assert_called_once()
mock_memory.assert_called_once()
assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
@pytest.mark.parametrize(
"provider,base_url",
[
# chatgpt.com/backend-api/codex — rejects temperature unconditionally
("openai-codex", "https://chatgpt.com/backend-api/codex"),
# Native OpenAI Responses — rejects temperature on gpt-5/o-series reasoning models
("openai", "https://api.openai.com/v1"),
# Copilot Responses — rejects temperature on reasoning models
("copilot", "https://api.githubcopilot.com"),
],
)
def test_codex_fallback_never_sends_temperature(self, monkeypatch, provider, base_url):
"""Regression for the ``⚠ Auxiliary memory flush failed: HTTP 400:
Unsupported parameter: temperature`` error.
The codex_responses fallback must strip temperature before calling
_run_codex_stream the Responses API does not accept it on any
supported backend, matching the transport's behavior."""
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider=provider)
agent.base_url = base_url
codex_response = SimpleNamespace(
output=[
SimpleNamespace(
type="function_call",
call_id="call_1",
name="memory",
arguments=json.dumps({
"action": "add",
"target": "notes",
"content": "no-temp test",
}),
),
],
usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
status="completed",
model="gpt-5.5",
)
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
patch.object(agent, "_build_api_kwargs") as mock_build, \
patch("tools.memory_tool.memory_tool", return_value="Saved."):
# Simulate a transport that (correctly) never includes temperature,
# but also verify we strip any stray temperature the fallback used
# to inject before the fix.
mock_build.return_value = {
"model": "gpt-5.5",
"instructions": "test",
"input": [],
"tools": [],
"max_output_tokens": 4096,
# Intentionally poison the dict to prove we pop it:
"temperature": 0.3,
}
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Save this"},
]
agent.flush_memories(messages)
mock_stream.assert_called_once()
sent_kwargs = mock_stream.call_args.args[0]
assert "temperature" not in sent_kwargs, (
f"codex_responses fallback must strip temperature before calling "
f"_run_codex_stream, got: {sent_kwargs.get('temperature')!r}"
)
@@ -0,0 +1,219 @@
"""Tests for flush_memories context-overflow prevention.
1. _check_compression_model_feasibility now also resolves the
flush_memories auxiliary model and uses min(compression, flush) as the
effective aux context.
2. Headroom is always deducted before comparing aux_context vs threshold
(not only when aux_context < threshold).
3. flush_memories() trims oversized conversations before the LLM call as
defence-in-depth for paths that bypass preflight compression.
"""
import sys
import types
from types import SimpleNamespace
from unittest.mock import patch, MagicMock
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())
import run_agent
# ── Helpers ──────────────────────────────────────────────────────────────
class _FakeOpenAI:
def __init__(self, **kw):
self.api_key = kw.get("api_key", "test")
self.base_url = kw.get("base_url", "http://test")
def close(self):
pass
def _make_agent(monkeypatch, **kw):
monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **k: [
{"type": "function", "function": {
"name": "memory", "description": "m",
"parameters": {"type": "object", "properties": {
"action": {"type": "string"},
"target": {"type": "string"},
"content": {"type": "string"},
}},
}},
])
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
agent = run_agent.AIAgent(
api_key="test-key", base_url="https://test.example.com/v1",
provider=kw.get("provider", "openrouter"),
api_mode=kw.get("api_mode", "chat_completions"),
max_iterations=4, quiet_mode=True,
skip_context_files=True, skip_memory=True,
)
agent._memory_store = MagicMock()
agent._memory_flush_min_turns = 1
agent._user_turn_count = 5
return agent
def _make_msgs(n, chars=400):
return [{"role": "user" if i % 2 == 0 else "assistant",
"content": f"M{i}: " + "x" * max(0, chars - 6)}
for i in range(n)]
def _noop_response():
return SimpleNamespace(
choices=[SimpleNamespace(
finish_reason="stop",
message=SimpleNamespace(content="Nothing.", tool_calls=None),
)],
usage=SimpleNamespace(prompt_tokens=50, completion_tokens=10, total_tokens=60),
)
# ── Feasibility: flush model + always-deduct headroom ────────────────────
class TestFeasibilityFixes:
def test_smaller_flush_model_lowers_effective_context(self, monkeypatch):
"""flush_memories model with smaller context drives the threshold."""
agent = _make_agent(monkeypatch)
agent.context_compressor.context_length = 200_000
agent.context_compressor.threshold_tokens = 100_000
fc = SimpleNamespace(base_url="http://test", api_key="k")
def _aux(task, **kw):
if task == "compression":
return fc, "big-model"
return fc, "small-flush-model"
def _ctx(model, **kw):
return 200_000 if model == "big-model" else 80_000
with patch("agent.auxiliary_client.get_text_auxiliary_client", side_effect=_aux), \
patch("agent.model_metadata.get_model_context_length", side_effect=_ctx):
agent._check_compression_model_feasibility()
assert agent.context_compressor.threshold_tokens < 100_000
def test_same_model_overhead_still_triggers_correction(self, monkeypatch):
"""The primary bug: aux == main model, aux_context > threshold, but
threshold + overhead > aux_context. Headroom must fire even when
aux_context >= threshold."""
agent = _make_agent(monkeypatch)
agent.context_compressor.context_length = 128_000
agent.context_compressor.threshold_tokens = 120_000
fc = SimpleNamespace(base_url="http://test", api_key="k")
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fc, "same-model")), \
patch("agent.model_metadata.get_model_context_length",
return_value=128_000):
agent._check_compression_model_feasibility()
# 128K - headroom (~12.1K) ≈ 115.9K < 120K → threshold lowered
assert agent.context_compressor.threshold_tokens < 120_000
def test_flush_resolution_failure_is_non_fatal(self, monkeypatch):
"""If flush model resolution raises, check proceeds with compression model."""
agent = _make_agent(monkeypatch)
agent.context_compressor.context_length = 200_000
agent.context_compressor.threshold_tokens = 100_000
fc = SimpleNamespace(base_url="http://test", api_key="k")
n = [0]
def _aux(task, **kw):
n[0] += 1
if task == "flush_memories":
raise RuntimeError("boom")
return fc, "model"
with patch("agent.auxiliary_client.get_text_auxiliary_client", side_effect=_aux), \
patch("agent.model_metadata.get_model_context_length", return_value=200_000):
agent._check_compression_model_feasibility()
assert n[0] == 2 # both tasks attempted
# ── flush_memories trimming ──────────────────────────────────────────────
class TestFlushMemoriesTrimming:
def test_oversized_conversation_trimmed(self, monkeypatch):
agent = _make_agent(monkeypatch)
agent._cached_system_prompt = "System."
messages = _make_msgs(200, chars=500)
fc = SimpleNamespace(base_url="http://test", api_key="k")
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fc, "small")), \
patch("agent.model_metadata.get_model_context_length",
return_value=8_000), \
patch("agent.auxiliary_client.call_llm",
return_value=_noop_response()) as mock:
agent.flush_memories(messages)
sent = mock.call_args.kwargs.get("messages", [])
assert len(sent) < 100
def test_small_conversation_untouched(self, monkeypatch):
agent = _make_agent(monkeypatch)
agent._cached_system_prompt = "System."
messages = [
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hey"},
{"role": "user", "content": "Save"},
]
fc = SimpleNamespace(base_url="http://test", api_key="k")
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fc, "big")), \
patch("agent.model_metadata.get_model_context_length",
return_value=200_000), \
patch("agent.auxiliary_client.call_llm",
return_value=_noop_response()) as mock:
agent.flush_memories(messages)
sent = mock.call_args.kwargs.get("messages", [])
assert len(sent) == 5 # sys + 3 conv + flush
def test_trim_failure_does_not_block_flush(self, monkeypatch):
agent = _make_agent(monkeypatch)
messages = _make_msgs(10, chars=100)
with patch("agent.auxiliary_client.get_text_auxiliary_client",
side_effect=RuntimeError("no provider")), \
patch("agent.auxiliary_client.call_llm",
return_value=_noop_response()) as mock:
agent.flush_memories(messages)
assert mock.called
def test_sentinel_cleaned_after_trim(self, monkeypatch):
agent = _make_agent(monkeypatch)
messages = [
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hey"},
{"role": "user", "content": "Save"},
]
n = len(messages)
fc = SimpleNamespace(base_url="http://test", api_key="k")
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fc, "m")), \
patch("agent.model_metadata.get_model_context_length",
return_value=128_000), \
patch("agent.auxiliary_client.call_llm",
return_value=_noop_response()):
agent.flush_memories(messages)
assert len(messages) == n
assert not any(m.get("_flush_sentinel") for m in messages)
+1 -106
View File
@@ -12,7 +12,7 @@ from types import SimpleNamespace
from unittest.mock import patch, MagicMock
import pytest
from agent.codex_responses_adapter import _chat_content_to_responses_parts, _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@@ -520,111 +520,6 @@ class TestChatMessagesToResponsesInput:
reasoning_items = [i for i in items if i.get("type") == "reasoning"]
assert len(reasoning_items) == 0
def test_user_multimodal_content_uses_input_text(self, monkeypatch):
"""User messages with list content must use input_text type."""
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "user", "content": [
{"type": "text", "text": "find files"},
]}]
items = _chat_messages_to_responses_input(messages)
assert len(items) == 1
assert items[0]["role"] == "user"
content = items[0]["content"]
assert isinstance(content, list)
assert content[0]["type"] == "input_text"
assert content[0]["text"] == "find files"
def test_assistant_multimodal_content_uses_output_text(self, monkeypatch):
"""Assistant messages with list content must use output_text type.
This is the fix for #15687 — the Responses API rejects input_text
inside assistant messages.
"""
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "assistant", "content": [
{"type": "text", "text": "I found the files."},
]}]
items = _chat_messages_to_responses_input(messages)
assert len(items) == 1
assert items[0]["role"] == "assistant"
content = items[0]["content"]
assert isinstance(content, list)
assert content[0]["type"] == "output_text"
assert content[0]["text"] == "I found the files."
def test_preflight_preserves_assistant_output_text(self, monkeypatch):
"""_preflight_codex_input_items must preserve output_text for assistant."""
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
raw_input = [
{"role": "user", "content": [{"type": "input_text", "text": "hi"}]},
{"role": "assistant", "content": [{"type": "output_text", "text": "hello"}]},
]
normalized = _preflight_codex_input_items(raw_input)
user_content = normalized[0]["content"]
asst_content = normalized[1]["content"]
assert user_content[0]["type"] == "input_text"
assert asst_content[0]["type"] == "output_text"
def test_full_round_trip_with_list_content(self, monkeypatch):
"""End-to-end: user + assistant with list content through both stages."""
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [
{"role": "user", "content": [{"type": "text", "text": "hello"}]},
{"role": "assistant", "content": [{"type": "text", "text": "hi there"}]},
{"role": "user", "content": [{"type": "text", "text": "continue"}]},
]
items = _chat_messages_to_responses_input(messages)
normalized = _preflight_codex_input_items(items)
# User items use input_text
assert normalized[0]["content"][0]["type"] == "input_text"
assert normalized[2]["content"][0]["type"] == "input_text"
# Assistant item uses output_text
assert normalized[1]["content"][0]["type"] == "output_text"
class TestChatContentToResponsesParts:
"""Unit tests for _chat_content_to_responses_parts role parameter (#15687)."""
def test_default_role_emits_input_text(self):
"""Default (user) role emits input_text."""
result = _chat_content_to_responses_parts([{"type": "text", "text": "hello"}])
assert result[0]["type"] == "input_text"
def test_explicit_user_role_emits_input_text(self):
result = _chat_content_to_responses_parts(
[{"type": "text", "text": "hello"}], role="user"
)
assert result[0]["type"] == "input_text"
def test_assistant_role_emits_output_text(self):
result = _chat_content_to_responses_parts(
[{"type": "text", "text": "hello"}], role="assistant"
)
assert result[0]["type"] == "output_text"
def test_assistant_role_with_string_parts(self):
"""String parts in assistant content also get output_text."""
result = _chat_content_to_responses_parts(["hello"], role="assistant")
assert result[0]["type"] == "output_text"
assert result[0]["text"] == "hello"
def test_assistant_role_with_mixed_input_output_text_types(self):
"""Parts already marked input_text or output_text get normalized to role's type."""
parts = [
{"type": "input_text", "text": "a"},
{"type": "output_text", "text": "b"},
{"type": "text", "text": "c"},
]
result = _chat_content_to_responses_parts(parts, role="assistant")
# All text parts should become output_text regardless of original type
assert all(p["type"] == "output_text" for p in result)
assert [p["text"] for p in result] == ["a", "b", "c"]
# ── Response normalization tests ─────────────────────────────────────────────
+42
View File
@@ -3078,6 +3078,48 @@ class TestRetryExhaustion:
assert "bad messages" in result["error"]
# ---------------------------------------------------------------------------
# Flush sentinel leak
# ---------------------------------------------------------------------------
class TestFlushSentinelNotLeaked:
"""_flush_sentinel must be stripped before sending messages to the API."""
def test_flush_sentinel_stripped_from_api_messages(self, agent_with_memory_tool):
"""Verify _flush_sentinel is not sent to the API provider."""
agent = agent_with_memory_tool
agent._memory_store = MagicMock()
agent._memory_flush_min_turns = 1
agent._user_turn_count = 10
agent._cached_system_prompt = "system"
messages = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
{"role": "user", "content": "remember this"},
]
# Mock the API to return a simple response (no tool calls)
mock_msg = SimpleNamespace(content="OK", tool_calls=None)
mock_choice = SimpleNamespace(message=mock_msg)
mock_response = SimpleNamespace(choices=[mock_choice])
agent.client.chat.completions.create.return_value = mock_response
# Bypass auxiliary client so flush uses agent.client directly
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
agent.flush_memories(messages, min_turns=0)
# Check what was actually sent to the API
call_args = agent.client.chat.completions.create.call_args
assert call_args is not None, "flush_memories never called the API"
api_messages = call_args.kwargs.get("messages") or call_args[1].get("messages")
for msg in api_messages:
assert "_flush_sentinel" not in msg, (
f"_flush_sentinel leaked to API in message: {msg}"
)
# ---------------------------------------------------------------------------
# Conversation history mutation
# ---------------------------------------------------------------------------
@@ -1,162 +0,0 @@
"""Tests that /stop interrupts streaming retry loops immediately.
When the agent is interrupted during a streaming API call, the outer poll
loop closes the HTTP connection. The inner `_call()` thread sees a
connection error and enters its retry loop. Before this fix, the retry
loop would open a FRESH connection without checking `_interrupt_requested`,
making /stop take multiple retry cycles × read-timeout to actually stop
(510+ seconds observed on slow ollama-cloud providers).
The fix adds an `_interrupt_requested` check at the top of the retry loop
so the agent exits immediately instead of retrying.
"""
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
def _make_agent(**kwargs):
"""Create a minimal AIAgent for streaming tests."""
from run_agent import AIAgent
defaults = dict(
api_key="test-key",
base_url="https://example.com/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
defaults.update(kwargs)
agent = AIAgent(**defaults)
agent.api_mode = "chat_completions"
return agent
class TestStreamInterruptBeforeRetry:
"""Verify _interrupt_requested is checked before each streaming retry."""
@pytest.mark.filterwarnings(
"ignore::pytest.PytestUnhandledThreadExceptionWarning"
)
@patch("run_agent.AIAgent._create_request_openai_client")
@patch("run_agent.AIAgent._close_request_openai_client")
def test_interrupt_prevents_stream_retry(self, mock_close, mock_create):
"""When _interrupt_requested is set during a transient stream error,
the retry loop must NOT retry it should raise InterruptedError
immediately instead of opening a fresh connection."""
import httpx
attempt_count = [0]
def fail_once_then_interrupt(*args, **kwargs):
attempt_count[0] += 1
if attempt_count[0] == 1:
# First attempt: simulate normal failure, then set interrupt
# (as if /stop arrived while the retry loop processes the error)
agent._interrupt_requested = True
raise httpx.ConnectError("connection reset by /stop")
# Should never reach here — the interrupt check should fire first
raise httpx.ConnectError("unexpected retry — interrupt not checked!")
mock_client = MagicMock()
mock_client.chat.completions.create.side_effect = fail_once_then_interrupt
mock_create.return_value = mock_client
agent = _make_agent()
agent._interrupt_requested = False
with pytest.raises(InterruptedError, match="interrupted"):
agent._interruptible_streaming_api_call({})
# Only 1 attempt should have been made — the interrupt should prevent retry
assert attempt_count[0] == 1, (
f"Expected 1 attempt but got {attempt_count[0]}. "
"The retry loop retried despite _interrupt_requested being set."
)
@pytest.mark.filterwarnings(
"ignore::pytest.PytestUnhandledThreadExceptionWarning"
)
@patch("run_agent.AIAgent._create_request_openai_client")
@patch("run_agent.AIAgent._close_request_openai_client")
def test_interrupt_before_first_attempt(self, mock_close, mock_create):
"""If _interrupt_requested is already set when the streaming call
starts, it should exit immediately without making any API call."""
mock_client = MagicMock()
mock_create.return_value = mock_client
agent = _make_agent()
agent._interrupt_requested = True # Pre-set before call
with pytest.raises(InterruptedError, match="interrupted"):
agent._interruptible_streaming_api_call({})
# No API call should have been made at all
assert mock_client.chat.completions.create.call_count == 0
@patch("run_agent.AIAgent._create_request_openai_client")
@patch("run_agent.AIAgent._close_request_openai_client")
def test_normal_retry_still_works_without_interrupt(self, mock_close, mock_create):
"""Without an interrupt, transient errors should still retry normally."""
import httpx
attempts = [0]
def fail_twice_then_succeed(*args, **kwargs):
attempts[0] += 1
if attempts[0] <= 2:
raise httpx.ConnectError("transient failure")
# Third attempt succeeds
chunks = [
SimpleNamespace(
choices=[
SimpleNamespace(
index=0,
delta=SimpleNamespace(
content="ok",
tool_calls=None,
reasoning_content=None,
reasoning=None,
),
finish_reason=None,
)
],
model="test/model",
usage=None,
),
SimpleNamespace(
choices=[
SimpleNamespace(
index=0,
delta=SimpleNamespace(
content=None,
tool_calls=None,
reasoning_content=None,
reasoning=None,
),
finish_reason="stop",
)
],
model="test/model",
usage=None,
),
]
stream = MagicMock()
stream.__iter__ = MagicMock(return_value=iter(chunks))
stream.response = MagicMock()
stream.response.headers = {}
return stream
mock_client = MagicMock()
mock_client.chat.completions.create.side_effect = fail_twice_then_succeed
mock_create.return_value = mock_client
agent = _make_agent()
agent._interrupt_requested = False
# Should succeed on the third attempt
result = agent._interruptible_streaming_api_call({})
assert result is not None
assert attempts[0] == 3
-153
View File
@@ -83,100 +83,6 @@ def test_status_callback_accepts_single_message_argument():
)
def test_resolve_model_uses_inference_model_env(monkeypatch):
monkeypatch.delenv("HERMES_MODEL", raising=False)
monkeypatch.setenv("HERMES_INFERENCE_MODEL", " anthropic/claude-sonnet-4.6\n")
assert server._resolve_model() == "anthropic/claude-sonnet-4.6"
def test_resolve_model_strips_config_model(monkeypatch):
monkeypatch.delenv("HERMES_MODEL", raising=False)
monkeypatch.delenv("HERMES_INFERENCE_MODEL", raising=False)
monkeypatch.setattr(
server, "_load_cfg", lambda: {"model": {"default": " nous/hermes-test "}}
)
assert server._resolve_model() == "nous/hermes-test"
def test_startup_runtime_uses_tui_provider_env(monkeypatch):
monkeypatch.setenv("HERMES_MODEL", "nous/hermes-test")
monkeypatch.setenv("HERMES_TUI_PROVIDER", "nous")
monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
assert server._resolve_startup_runtime() == ("nous/hermes-test", "nous")
def test_startup_runtime_does_not_treat_inference_provider_as_explicit(monkeypatch):
monkeypatch.setenv("HERMES_MODEL", "nous/hermes-test")
monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
monkeypatch.setattr(
"hermes_cli.models.detect_static_provider_for_model",
lambda model, provider: None,
)
assert server._resolve_startup_runtime() == ("nous/hermes-test", None)
def test_startup_runtime_detects_provider_for_model_env(monkeypatch):
monkeypatch.setenv("HERMES_MODEL", "sonnet")
monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
def fake_detect(model, current_provider):
assert model == "sonnet"
assert current_provider == "auto"
return "anthropic", "anthropic/claude-sonnet-4.6"
monkeypatch.setattr(
"hermes_cli.models.detect_static_provider_for_model", fake_detect
)
assert server._resolve_startup_runtime() == (
"anthropic/claude-sonnet-4.6",
"anthropic",
)
def test_startup_runtime_resolves_short_alias_without_network(monkeypatch):
monkeypatch.setenv("HERMES_MODEL", "sonnet")
monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
monkeypatch.setattr(
"hermes_cli.models.fetch_openrouter_models",
lambda *_args, **_kwargs: (_ for _ in ()).throw(
AssertionError("network lookup should not run")
),
)
model, provider = server._resolve_startup_runtime()
assert provider == "anthropic"
assert model.startswith("claude-sonnet")
def test_startup_runtime_does_not_call_network_detector(monkeypatch):
monkeypatch.setenv("HERMES_MODEL", "sonnet")
monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
monkeypatch.setattr(
"hermes_cli.models.detect_provider_for_model",
lambda *_args, **_kwargs: (_ for _ in ()).throw(
AssertionError("network detector called")
),
)
model, provider = server._resolve_startup_runtime()
assert model
assert provider in {None, "anthropic"}
def _session(agent=None, **extra):
return {
"agent": agent if agent is not None else types.SimpleNamespace(),
@@ -339,14 +245,6 @@ def test_setup_status_reports_provider_config(monkeypatch):
assert resp["result"]["provider_configured"] is False
def test_complete_slash_includes_provider_alias():
resp = server.handle_request(
{"id": "1", "method": "complete.slash", "params": {"text": "/pro"}}
)
assert any(item["text"] == "provider" for item in resp["result"]["items"])
def test_config_set_reasoning_updates_live_session_and_agent(tmp_path, monkeypatch):
monkeypatch.setattr(server, "_hermes_home", tmp_path)
agent = types.SimpleNamespace(reasoning_config=None)
@@ -517,57 +415,6 @@ def test_config_set_model_syncs_inference_provider_env(monkeypatch):
assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic"
def test_config_set_model_syncs_tui_provider_env(monkeypatch):
class Agent:
model = "gpt-5.3-codex"
provider = "openai-codex"
base_url = ""
api_key = ""
def switch_model(self, **kwargs):
self.model = kwargs["new_model"]
self.provider = kwargs["new_provider"]
agent = Agent()
server._sessions["sid"] = _session(agent=agent)
monkeypatch.setenv("HERMES_TUI_PROVIDER", "openai-codex")
monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
def fake_switch_model(**kwargs):
return types.SimpleNamespace(
success=True,
new_model="anthropic/claude-sonnet-4.6",
target_provider="anthropic",
api_key="key",
base_url="https://api.anthropic.com",
api_mode="anthropic_messages",
warning_message="",
)
monkeypatch.setattr("hermes_cli.model_switch.switch_model", fake_switch_model)
try:
resp = server.handle_request(
{
"id": "1",
"method": "config.set",
"params": {
"session_id": "sid",
"key": "model",
"value": "anthropic/claude-sonnet-4.6 --provider anthropic",
},
}
)
assert resp["result"]["value"] == "anthropic/claude-sonnet-4.6"
assert os.environ["HERMES_TUI_PROVIDER"] == "anthropic"
assert os.environ["HERMES_MODEL"] == "anthropic/claude-sonnet-4.6"
assert os.environ["HERMES_INFERENCE_MODEL"] == "anthropic/claude-sonnet-4.6"
finally:
server._sessions.clear()
def test_config_set_personality_rejects_unknown_name(monkeypatch):
monkeypatch.setattr(
server,
+5 -50
View File
@@ -560,55 +560,17 @@ def resolve_skin() -> dict:
def _resolve_model() -> str:
env = (
os.environ.get("HERMES_MODEL", "")
or os.environ.get("HERMES_INFERENCE_MODEL", "")
).strip()
env = os.environ.get("HERMES_MODEL", "")
if env:
return env
m = _load_cfg().get("model", "")
if isinstance(m, dict):
return str(m.get("default", "") or "").strip()
return m.get("default", "")
if isinstance(m, str) and m:
return m.strip()
return m
return "anthropic/claude-sonnet-4"
def _resolve_startup_runtime() -> tuple[str, str | None]:
model = _resolve_model()
explicit_provider = os.environ.get("HERMES_TUI_PROVIDER", "").strip()
if explicit_provider:
return model, explicit_provider
explicit_model = (
os.environ.get("HERMES_MODEL", "")
or os.environ.get("HERMES_INFERENCE_MODEL", "")
).strip()
if not explicit_model:
return model, None
try:
from hermes_cli.models import detect_static_provider_for_model
cfg = _load_cfg().get("model") or {}
current_provider = (
(
str(cfg.get("provider") or "").strip().lower()
if isinstance(cfg, dict)
else ""
)
or os.environ.get("HERMES_INFERENCE_PROVIDER", "").strip().lower()
or "auto"
)
detected = detect_static_provider_for_model(explicit_model, current_provider)
if detected:
provider, detected_model = detected
return detected_model, provider
except Exception:
pass
return model, None
def _write_config_key(key_path: str, value):
cfg = _load_cfg()
current = cfg
@@ -774,15 +736,12 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
_emit("session.info", sid, _session_info(agent))
os.environ["HERMES_MODEL"] = result.new_model
os.environ["HERMES_INFERENCE_MODEL"] = result.new_model
# Keep the process-level provider env var in sync with the user's explicit
# choice so any ambient re-resolution (credential pool refresh, compressor
# rebuild, aux clients) resolves to the new provider instead of the
# original one persisted in config or env.
if result.target_provider:
os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider
if os.environ.get("HERMES_TUI_PROVIDER"):
os.environ["HERMES_TUI_PROVIDER"] = result.target_provider
if persist_global:
_persist_model_switch(result)
return {"value": result.new_model, "warning": result.warning_message or ""}
@@ -1318,13 +1277,9 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
cfg = _load_cfg()
system_prompt = ((cfg.get("agent") or {}).get("system_prompt", "") or "").strip()
model, requested_provider = _resolve_startup_runtime()
runtime = resolve_runtime_provider(
requested=requested_provider,
target_model=model or None,
)
runtime = resolve_runtime_provider(requested=None)
return AIAgent(
model=model,
model=_resolve_model(),
provider=runtime.get("provider"),
base_url=runtime.get("base_url"),
api_key=runtime.get("api_key"),
@@ -53,11 +53,7 @@ export function AlternateScreen(t0: Props) {
}
writeRaw(
ENTER_ALT_SCREEN +
ERASE_SCROLLBACK +
ERASE_SCREEN +
CURSOR_HOME +
(mouseTracking ? ENABLE_MOUSE_TRACKING : DISABLE_MOUSE_TRACKING)
ENTER_ALT_SCREEN + ERASE_SCROLLBACK + ERASE_SCREEN + CURSOR_HOME + (mouseTracking ? ENABLE_MOUSE_TRACKING : DISABLE_MOUSE_TRACKING)
)
ink?.setAltScreenActive(true, mouseTracking)
+1 -14
View File
@@ -323,39 +323,27 @@ const measureTextNode = function (
widthMode: LayoutMeasureMode
): { width: number; height: number } {
const elem = node.nodeName !== '#text' ? (node as DOMElement) : node.parentNode
if (elem && elem.nodeName === 'ink-text') {
let cache = elem._textMeasureCache
if (!cache) {
cache = { gen: 0, entries: new Map() }
elem._textMeasureCache = cache
}
const key = `${width}|${widthMode}`
const hit = cache.entries.get(key)
if (hit && hit._gen === cache.gen) {
return hit.result
}
const result = computeTextMeasure(node, width, widthMode)
// Enforce cap with FIFO eviction to avoid unbounded growth during
// pathological frames where yoga probes many widths.
if (cache.entries.size >= MEASURE_CACHE_CAP) {
const firstKey = cache.entries.keys().next().value
if (firstKey !== undefined) {
cache.entries.delete(firstKey)
}
cache.entries.delete(firstKey)
}
cache.entries.set(key, { _gen: cache.gen, result })
return result
}
return computeTextMeasure(node, width, widthMode)
}
@@ -487,7 +475,6 @@ export const clearYogaNodeReferences = (node: DOMElement | TextNode): void => {
for (const child of node.childNodes) {
clearYogaNodeReferences(child)
}
node._textMeasureCache = undefined
}
+2 -41
View File
@@ -1,6 +1,6 @@
import { ansiCodesToString, diffAnsiCodes, type AnsiCode } from '@alcalzone/ansi-tokenize'
import { type AnsiCode, ansiCodesToString, diffAnsiCodes } from '@alcalzone/ansi-tokenize'
import { unionRect, type Point, type Rectangle, type Size } from './layout/geometry.js'
import { type Point, type Rectangle, type Size, unionRect } from './layout/geometry.js'
import { BEL, ESC, SEP } from './termio/ansi.js'
import * as warn from './warn.js'
@@ -436,13 +436,6 @@ export type Screen = Size & {
*/
noSelect: Uint8Array
/**
* Per-cell written bitmap. A written plain space and never-written padding
* share the same packed cell value, so selection needs this side channel to
* preserve code indentation without selecting blank UI margins.
*/
written: Uint8Array
/**
* Per-ROW soft-wrap continuation marker. softWrap[r]=N>0 means row r
* is a word-wrap continuation of row r-1 (the `\n` before it was
@@ -482,14 +475,6 @@ export function isEmptyCellAt(screen: Screen, x: number, y: number): boolean {
return isEmptyCellByIndex(screen, y * screen.width + x)
}
export function isWrittenCellAt(screen: Screen, x: number, y: number): boolean {
if (x < 0 || y < 0 || x >= screen.width || y >= screen.height) {
return false
}
return screen.written[y * screen.width + x] === 1
}
/**
* Check if a Cell (view object) represents an empty cell.
*/
@@ -548,7 +533,6 @@ export function createScreen(
emptyStyleId: styles.none,
damage: undefined,
noSelect: new Uint8Array(size),
written: new Uint8Array(size),
softWrap: new Int32Array(height)
}
}
@@ -582,7 +566,6 @@ export function resetScreen(screen: Screen, width: number, height: number): void
screen.cells = new Int32Array(buf)
screen.cells64 = new BigInt64Array(buf)
screen.noSelect = new Uint8Array(size)
screen.written = new Uint8Array(size)
}
if (screen.softWrap.length < height) {
@@ -592,7 +575,6 @@ export function resetScreen(screen: Screen, width: number, height: number): void
// Reset all cells — single fill call, no loop
screen.cells64.fill(EMPTY_CELL_VALUE, 0, size)
screen.noSelect.fill(0, 0, size)
screen.written.fill(0, 0, size)
screen.softWrap.fill(0, 0, height)
// Update dimensions
@@ -788,7 +770,6 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
if ((cells[spacerCI + 1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
cells[spacerCI] = EMPTY_CHAR_INDEX
cells[spacerCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
screen.written[y * screen.width + spacerX] = 0
}
}
}
@@ -806,7 +787,6 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
if ((cells[wideCI + 1]! & WIDTH_MASK) === CellWidth.Wide) {
cells[wideCI] = EMPTY_CHAR_INDEX
cells[wideCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
screen.written[y * screen.width + x - 1] = 0
clearedWideX = x - 1
}
}
@@ -815,7 +795,6 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
// Pack cell data into cells array
cells[ci] = internCharString(screen, cell.char)
cells[ci + 1] = packWord1(cell.styleId, internHyperlink(screen, cell.hyperlink), cell.width)
screen.written[y * screen.width + x] = 1
// Track damage - expand bounds in place instead of allocating new objects
// Include the main cell position and any cleared orphan cells
@@ -862,13 +841,11 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
if (spacerX + 1 < screen.width && (cells[orphanCI + 1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
cells[orphanCI] = EMPTY_CHAR_INDEX
cells[orphanCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
screen.written[y * screen.width + spacerX + 1] = 0
}
}
cells[spacerCI] = SPACER_CHAR_INDEX
cells[spacerCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.SpacerTail)
screen.written[y * screen.width + spacerX] = 1
// Expand damage to include SpacerTail so diff() scans it
const d = screen.damage
@@ -952,8 +929,6 @@ export function blitRegion(
const dstCells = dst.cells
const srcNoSel = src.noSelect
const dstNoSel = dst.noSelect
const srcWritten = src.written
const dstWritten = dst.written
// softWrap is per-row — copy the row range regardless of stride/width.
// Partial-width blits still carry the row's wrap provenance since the
@@ -972,7 +947,6 @@ export function blitRegion(
const nsStart = regionY * src.width
const nsLen = (maxY - regionY) * src.width
dstNoSel.set(srcNoSel.subarray(nsStart, nsStart + nsLen), nsStart)
dstWritten.set(srcWritten.subarray(nsStart, nsStart + nsLen), nsStart)
} else {
// Per-row copy for partial-width or mismatched-stride regions
let srcRowCI = regionY * srcStride + (regionX << 1)
@@ -983,7 +957,6 @@ export function blitRegion(
for (let y = regionY; y < maxY; y++) {
dstCells.set(srcCells.subarray(srcRowCI, srcRowCI + rowBytes), dstRowCI)
dstNoSel.set(srcNoSel.subarray(srcRowNS, srcRowNS + rowLen), dstRowNS)
dstWritten.set(srcWritten.subarray(srcRowNS, srcRowNS + rowLen), dstRowNS)
srcRowCI += srcStride
dstRowCI += dstStride
srcRowNS += src.width
@@ -1016,7 +989,6 @@ export function blitRegion(
if ((srcCells[srcLastCI + 1]! & WIDTH_MASK) === CellWidth.Wide) {
dstCells[dstSpacerCI] = SPACER_CHAR_INDEX
dstCells[dstSpacerCI + 1] = packWord1(dst.emptyStyleId, 0, CellWidth.SpacerTail)
dstWritten[y * dst.width + maxX] = 1
wroteSpacerOutsideRegion = true
}
@@ -1058,7 +1030,6 @@ export function clearRegion(
const cells = screen.cells
const cells64 = screen.cells64
const written = screen.written
const screenWidth = screen.width
const rowBase = startY * screenWidth
let damageMinX = startX
@@ -1069,7 +1040,6 @@ export function clearRegion(
if (startX === 0 && maxX === screenWidth) {
// Full-width: single fill, no boundary checks needed
cells64.fill(EMPTY_CELL_VALUE, rowBase, rowBase + (maxY - startY) * screenWidth)
written.fill(0, rowBase, rowBase + (maxY - startY) * screenWidth)
} else {
// Partial-width: single loop handles boundary cleanup and fill per row.
const stride = screenWidth << 1 // 2 Int32s per cell
@@ -1092,7 +1062,6 @@ export function clearRegion(
if ((cells[prevW1]! & WIDTH_MASK) === CellWidth.Wide) {
cells[prevW1 - 1] = EMPTY_CHAR_INDEX
cells[prevW1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
written[y * screenWidth + startX - 1] = 0
damageMinX = startX - 1
}
}
@@ -1109,14 +1078,12 @@ export function clearRegion(
if ((cells[nextW1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
cells[nextW1 - 1] = EMPTY_CHAR_INDEX
cells[nextW1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
written[y * screenWidth + maxX] = 0
damageMaxX = maxX + 1
}
}
}
cells64.fill(EMPTY_CELL_VALUE, fillStart, fillStart + rowLen)
written.fill(0, fillStart, fillStart + rowLen)
leftEdge += stride
rightEdge += stride
fillStart += screenWidth
@@ -1153,14 +1120,12 @@ export function shiftRows(screen: Screen, top: number, bottom: number, n: number
const w = screen.width
const cells64 = screen.cells64
const noSel = screen.noSelect
const written = screen.written
const sw = screen.softWrap
const absN = Math.abs(n)
if (absN > bottom - top) {
cells64.fill(EMPTY_CELL_VALUE, top * w, (bottom + 1) * w)
noSel.fill(0, top * w, (bottom + 1) * w)
written.fill(0, top * w, (bottom + 1) * w)
sw.fill(0, top, bottom + 1)
return
@@ -1170,21 +1135,17 @@ export function shiftRows(screen: Screen, top: number, bottom: number, n: number
// SU: row top+n..bottom → top..bottom-n; clear bottom-n+1..bottom
cells64.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
noSel.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
written.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
sw.copyWithin(top, top + n, bottom + 1)
cells64.fill(EMPTY_CELL_VALUE, (bottom - n + 1) * w, (bottom + 1) * w)
noSel.fill(0, (bottom - n + 1) * w, (bottom + 1) * w)
written.fill(0, (bottom - n + 1) * w, (bottom + 1) * w)
sw.fill(0, bottom - n + 1, bottom + 1)
} else {
// SD: row top..bottom+n → top-n..bottom; clear top..top-n-1
cells64.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
noSel.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
written.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
sw.copyWithin(top - n, top, bottom + n + 1)
cells64.fill(EMPTY_CELL_VALUE, top * w, (top - n) * w)
noSel.fill(0, top * w, (top - n) * w)
written.fill(0, top * w, (top - n) * w)
sw.fill(0, top, top - n)
}
}
@@ -1,82 +0,0 @@
import { describe, expect, it } from 'vitest'
import { cellAt, CellWidth, CharPool, createScreen, HyperlinkPool, setCellAt, StylePool } from './screen.js'
import {
applySelectionOverlay,
createSelectionState,
getSelectedText,
startSelection,
updateSelection
} from './selection.js'
const screenWithText = () => {
const styles = new StylePool()
const screen = createScreen(10, 3, styles, new CharPool(), new HyperlinkPool())
setCellAt(screen, 2, 1, { char: 'h', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
setCellAt(screen, 3, 1, { char: 'i', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
return { screen, styles }
}
describe('selection whitespace handling', () => {
it('does not copy whitespace-only selections', () => {
const { screen } = screenWithText()
const selection = createSelectionState()
startSelection(selection, 0, 0)
updateSelection(selection, 9, 0)
expect(getSelectedText(selection, screen)).toBe('')
})
it('trims outer drag padding while preserving selected content', () => {
const { screen } = screenWithText()
const selection = createSelectionState()
startSelection(selection, 0, 1)
updateSelection(selection, 9, 1)
expect(getSelectedText(selection, screen)).toBe('hi')
})
it('preserves selected indentation when spaces are rendered content', () => {
const styles = new StylePool()
const screen = createScreen(10, 1, styles, new CharPool(), new HyperlinkPool())
const selection = createSelectionState()
setCellAt(screen, 0, 0, { char: ' ', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
setCellAt(screen, 1, 0, { char: ' ', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
setCellAt(screen, 2, 0, { char: 'x', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
startSelection(selection, 0, 0)
updateSelection(selection, 9, 0)
expect(getSelectedText(selection, screen)).toBe(' x')
})
it('clamps copied selection bounds to screen width', () => {
const { screen } = screenWithText()
const selection = createSelectionState()
startSelection(selection, 0, 1)
updateSelection(selection, 99, 1)
expect(getSelectedText(selection, screen)).toBe('hi')
})
it('does not paint selection background on leading/trailing empty cells or empty rows', () => {
const { screen, styles } = screenWithText()
const selection = createSelectionState()
startSelection(selection, 0, 0)
updateSelection(selection, 9, 2)
applySelectionOverlay(screen, selection, styles)
expect(cellAt(screen, 0, 0)?.styleId).toBe(screen.emptyStyleId)
expect(cellAt(screen, 0, 1)?.styleId).toBe(screen.emptyStyleId)
expect(cellAt(screen, 2, 1)?.styleId).not.toBe(screen.emptyStyleId)
expect(cellAt(screen, 4, 1)?.styleId).toBe(screen.emptyStyleId)
expect(cellAt(screen, 0, 2)?.styleId).toBe(screen.emptyStyleId)
})
})
@@ -12,7 +12,7 @@
import { clamp } from './layout/geometry.js'
import type { Screen, StylePool } from './screen.js'
import { cellAt, cellAtIndex, CellWidth, isWrittenCellAt, setCellStyleId } from './screen.js'
import { cellAt, cellAtIndex, CellWidth, setCellStyleId } from './screen.js'
type Point = { col: number; row: number }
@@ -842,43 +842,6 @@ export function isCellSelected(s: SelectionState, col: number, row: number): boo
return true
}
function selectableCell(screen: Screen, row: number, col: number): boolean {
const cell = cellAt(screen, col, row)
return (
screen.noSelect[row * screen.width + col] !== 1 &&
isWrittenCellAt(screen, col, row) &&
!!cell &&
cell.width !== CellWidth.SpacerTail &&
cell.width !== CellWidth.SpacerHead
)
}
function selectionContentBounds(
screen: Screen,
row: number,
start: number,
end: number
): { first: number; last: number } | null {
let first = start
while (first <= end && !selectableCell(screen, row, first)) {
first++
}
if (first > end) {
return null
}
let last = end
while (last >= first && !selectableCell(screen, row, last)) {
last--
}
return { first, last }
}
/** Extract text from one screen row. When the next row is a soft-wrap
* continuation (screen.softWrap[row+1]>0), clamp to that content-end
* column and skip the trailing trim so the word-separator space survives
@@ -927,21 +890,6 @@ function joinRows(lines: string[], text: string, sw: boolean | undefined): void
}
}
function trimEmptyEdgeRows(lines: string[]): string[] {
let start = 0
let end = lines.length
while (start < end && !lines[start]!.trim()) {
start++
}
while (end > start && !lines[end - 1]!.trim()) {
end--
}
return lines.slice(start, end)
}
/**
* Extract text from the screen buffer within the selection range.
* Rows are joined with newlines unless the screen's softWrap bitmap
@@ -969,18 +917,16 @@ export function getSelectedText(s: SelectionState, screen: Screen): string {
}
for (let row = start.row; row <= end.row; row++) {
const rowStart = Math.max(0, row === start.row ? start.col : 0)
const rowEnd = Math.min(row === end.row ? end.col : screen.width - 1, screen.width - 1)
const bounds = selectionContentBounds(screen, row, rowStart, rowEnd)
joinRows(lines, bounds ? extractRowText(screen, row, bounds.first, bounds.last) : '', sw[row]! > 0)
const rowStart = row === start.row ? start.col : 0
const rowEnd = row === end.row ? end.col : screen.width - 1
joinRows(lines, extractRowText(screen, row, rowStart, rowEnd), sw[row]! > 0)
}
for (let i = 0; i < s.scrolledOffBelow.length; i++) {
joinRows(lines, s.scrolledOffBelow[i]!, s.scrolledOffBelowSW[i])
}
return trimEmptyEdgeRows(lines).join('\n')
return lines.join('\n')
}
/**
@@ -1105,14 +1051,9 @@ export function applySelectionOverlay(screen: Screen, selection: SelectionState,
for (let row = start.row; row <= end.row && row < screen.height; row++) {
const colStart = row === start.row ? start.col : 0
const colEnd = row === end.row ? Math.min(end.col, width - 1) : width - 1
const bounds = selectionContentBounds(screen, row, colStart, colEnd)
const rowOff = row * width
if (!bounds) {
continue
}
for (let col = bounds.first; col <= bounds.last; col++) {
for (let col = colStart; col <= colEnd; col++) {
const idx = rowOff + col
// Skip noSelect cells — gutters stay visually unchanged so it's
@@ -9,21 +9,18 @@ describe('shouldEmitClipboardSequence', () => {
})
it('keeps OSC enabled for remote or plain local terminals', () => {
expect(
shouldEmitClipboardSequence({ SSH_CONNECTION: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)
).toBe(true)
expect(shouldEmitClipboardSequence({ SSH_CONNECTION: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe(
true
)
expect(shouldEmitClipboardSequence({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true)
})
it('honors explicit env override', () => {
expect(
shouldEmitClipboardSequence({
HERMES_TUI_CLIPBOARD_OSC52: '1',
TMUX: '/tmp/tmux-1/default,1,0'
} as NodeJS.ProcessEnv)
).toBe(true)
expect(
shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)
).toBe(false)
expect(shouldEmitClipboardSequence({ HERMES_TUI_CLIPBOARD_OSC52: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe(
true
)
expect(shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(
false
)
})
})
@@ -226,10 +226,7 @@ describe('createGatewayEventHandler', () => {
const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'
onEvent({
payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
type: 'tool.complete'
} as any)
onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)
expect(appended).toHaveLength(1)
@@ -17,14 +17,6 @@ describe('createSlashHandler', () => {
expect(getOverlayState().picker).toBe(true)
})
it('treats /provider as a local /model alias', () => {
const ctx = buildCtx()
expect(createSlashHandler(ctx)('/provider')).toBe(true)
expect(getOverlayState().modelPicker).toBe(true)
expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
})
it('opens the skills hub locally for bare /skills', () => {
const ctx = buildCtx()
@@ -126,7 +118,9 @@ describe('createSlashHandler', () => {
const ctx = buildCtx()
createSlashHandler(ctx)('/details tools blink')
expect(getUiState().sections.tools).toBeUndefined()
expect(ctx.transcript.sys).toHaveBeenCalledWith('usage: /details <section> [hidden|collapsed|expanded|reset]')
expect(ctx.transcript.sys).toHaveBeenCalledWith(
'usage: /details <section> [hidden|collapsed|expanded|reset]'
)
})
it('shows tool enable usage when names are missing', () => {
+1 -1
View File
@@ -1,6 +1,6 @@
import { describe, expect, it } from 'vitest'
import { isSectionName, parseDetailsMode, resolveSections, SECTION_NAMES, sectionMode } from '../domain/details.js'
import { isSectionName, parseDetailsMode, resolveSections, sectionMode, SECTION_NAMES } from '../domain/details.js'
describe('parseDetailsMode', () => {
it('accepts the canonical modes case-insensitively', () => {
-22
View File
@@ -31,28 +31,6 @@ describe('platform action modifier', () => {
})
})
describe('isCopyShortcut', () => {
it('keeps Ctrl+C as the local non-macOS copy chord', async () => {
const { isCopyShortcut } = await importPlatform('linux')
expect(isCopyShortcut({ ctrl: true, meta: false, super: false }, 'c', {})).toBe(true)
})
it('accepts client Cmd+C over SSH even when running on Linux', async () => {
const { isCopyShortcut } = await importPlatform('linux')
const env = { SSH_CONNECTION: '1 2 3 4' } as NodeJS.ProcessEnv
expect(isCopyShortcut({ ctrl: false, meta: false, super: true }, 'c', env)).toBe(true)
expect(isCopyShortcut({ ctrl: false, meta: true, super: false }, 'c', env)).toBe(true)
})
it('does not treat local Linux Alt+C as copy', async () => {
const { isCopyShortcut } = await importPlatform('linux')
expect(isCopyShortcut({ ctrl: false, meta: true, super: false }, 'c', {})).toBe(false)
})
})
describe('isVoiceToggleKey', () => {
it('matches raw Ctrl+B on macOS (doc-default across platforms)', async () => {
const { isVoiceToggleKey } = await importPlatform('darwin')
+1 -1
View File
@@ -1,8 +1,8 @@
import { useStore } from '@nanostores/react'
import { GatewayProvider } from './app/gatewayContext.js'
import { $uiState } from './app/uiStore.js'
import { useMainApp } from './app/useMainApp.js'
import { $uiState } from './app/uiStore.js'
import { AppLayout } from './components/appLayout.js'
import type { GatewayClient } from './gatewayClient.js'
+6 -7
View File
@@ -1,7 +1,7 @@
import { NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
import { HOTKEYS } from '../../../content/hotkeys.js'
import { isSectionName, nextDetailsMode, parseDetailsMode, SECTION_NAMES } from '../../../domain/details.js'
import { SECTION_NAMES, isSectionName, nextDetailsMode, parseDetailsMode } from '../../../domain/details.js'
import type {
ConfigGetValueResponse,
ConfigSetResponse,
@@ -40,10 +40,8 @@ const flagFromArg = (arg: string, current: boolean): boolean | null => {
const RESET_WORDS = new Set(['reset', 'clear', 'default'])
const CYCLE_WORDS = new Set(['cycle', 'toggle'])
const DETAILS_USAGE =
'usage: /details [hidden|collapsed|expanded|cycle] or /details <section> [hidden|collapsed|expanded|reset]'
const DETAILS_SECTION_USAGE = 'usage: /details <section> [hidden|collapsed|expanded|reset]'
export const coreCommands: SlashCommand[] = [
@@ -99,7 +97,9 @@ export const coreCommands: SlashCommand[] = [
}
patchUiState({ mouseTracking: next })
ctx.gateway.rpc<ConfigSetResponse>('config.set', { key: 'mouse', value: next ? 'on' : 'off' }).catch(() => {})
ctx.gateway
.rpc<ConfigSetResponse>('config.set', { key: 'mouse', value: next ? 'on' : 'off' })
.catch(() => {})
queueMicrotask(() => ctx.transcript.sys(`mouse tracking ${next ? 'on' : 'off'}`))
}
@@ -178,9 +178,7 @@ export const coreCommands: SlashCommand[] = [
gateway
.rpc<ConfigGetValueResponse>('config.get', { key: 'details_mode' })
.then(r => {
if (ctx.stale()) {
return
}
if (ctx.stale()) return
const mode = parseDetailsMode(r?.value) ?? ui.detailsMode
patchUiState({ detailsMode: mode })
@@ -269,6 +267,7 @@ export const coreCommands: SlashCommand[] = [
}
writeOsc52Clipboard(target.text)
sys(`copied ${target.text.length} chars`)
}
},
-1
View File
@@ -58,7 +58,6 @@ export const sessionCommands: SlashCommand[] = [
{
help: 'change or show model',
aliases: ['provider'],
name: 'model',
run: (arg, ctx) => {
if (ctx.session.guardBusySessionSwitch('change models')) {
+12
View File
@@ -5,6 +5,18 @@ import { runExternalSetup } from '../../setupHandoff.js'
import type { SlashCommand } from '../types.js'
export const setupCommands: SlashCommand[] = [
{
help: 'configure LLM provider + model (launches `hermes model`)',
name: 'provider',
run: (_arg, ctx) =>
void runExternalSetup({
args: ['model'],
ctx,
done: 'provider updated — starting session…',
launcher: launchHermesCommand,
suspend: withInkSuspended
})
},
{
help: 'run full setup wizard (launches `hermes setup`)',
name: 'setup',
-1
View File
@@ -300,7 +300,6 @@ class TurnController {
const hasDiffSegment = segments.some(msg => msg.kind === 'diff')
const detailsBelongBeforeDiff = hasDiffSegment && (tools.length > 0 || Boolean(savedReasoning))
const finalMessages = detailsBelongBeforeDiff
? insertBeforeFirstDiff(segments, {
kind: 'trail',
+1 -1
View File
@@ -1,8 +1,8 @@
import { atom } from 'nanostores'
import { MOUSE_TRACKING } from '../config/env.js'
import { ZERO } from '../domain/usage.js'
import { DEFAULT_THEME } from '../theme.js'
import { MOUSE_TRACKING } from '../config/env.js'
import type { UiState } from './interfaces.js'
+16 -10
View File
@@ -8,7 +8,7 @@ import type {
SudoRespondResponse,
VoiceRecordResponse
} from '../gatewayTypes.js'
import { isAction, isCopyShortcut, isMac, isVoiceToggleKey } from '../lib/platform.js'
import { isAction, isMac, isVoiceToggleKey } from '../lib/platform.js'
import { getInputSelection } from './inputSelectionStore.js'
import type { InputHandlerContext, InputHandlerResult } from './interfaces.js'
@@ -30,7 +30,11 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
const copySelection = () => {
// ink's copySelection() already calls setClipboard() which handles
// pbcopy (macOS), wl-copy/xclip (Linux), tmux, and OSC 52 fallback.
terminal.selection.copySelection()
const text = terminal.selection.copySelection()
if (text) {
actions.sys(`copied ${text.length} chars`)
}
}
const clearSelection = () => {
@@ -155,14 +159,16 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
voice.setProcessing(false)
}
gateway.rpc<VoiceRecordResponse>('voice.record', { action }).catch((e: Error) => {
// Revert optimistic UI on failure.
if (starting) {
voice.setRecording(false)
}
gateway
.rpc<VoiceRecordResponse>('voice.record', { action })
.catch((e: Error) => {
// Revert optimistic UI on failure.
if (starting) {
voice.setRecording(false)
}
actions.sys(`voice error: ${e.message}`)
})
actions.sys(`voice error: ${e.message}`)
})
}
useInput((ch, key) => {
@@ -311,7 +317,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
}
}
if (isCopyShortcut(key, ch)) {
if (isAction(key, ch, 'c')) {
if (terminal.hasSelection) {
return copySelection()
}
+8 -8
View File
@@ -640,14 +640,14 @@ export function useMainApp(gw: GatewayClient) {
const showProgressArea = anyPanelVisible
? Boolean(
ui.busy ||
turn.outcome ||
turn.streamPendingTools.length ||
turn.streamSegments.length ||
turn.subagents.length ||
turn.tools.length ||
turn.turnTrail.length ||
hasReasoning ||
turn.activity.length
turn.outcome ||
turn.streamPendingTools.length ||
turn.streamSegments.length ||
turn.subagents.length ||
turn.tools.length ||
turn.turnTrail.length ||
hasReasoning ||
turn.activity.length
)
: turn.activity.some(item => item.tone !== 'info')
+5 -1
View File
@@ -218,7 +218,11 @@ export function StatusRule({
{voiceLabel ? (
<Text
color={
voiceLabel.startsWith('●') ? t.color.error : voiceLabel.startsWith('◉') ? t.color.warn : t.color.dim
voiceLabel.startsWith('●')
? t.color.error
: voiceLabel.startsWith('◉')
? t.color.warn
: t.color.dim
}
>
{' │ '}
+4 -5
View File
@@ -9,7 +9,6 @@ import { $uiState } from '../app/uiStore.js'
import { FloatBox } from './appChrome.js'
import { MaskedPrompt } from './maskedPrompt.js'
import { ModelPicker } from './modelPicker.js'
import { OverlayHint } from './overlayControls.js'
import { ApprovalPrompt, ClarifyPrompt, ConfirmPrompt } from './prompts.js'
import { SessionPicker } from './sessionPicker.js'
import { SkillsHub } from './skillsHub.js'
@@ -163,11 +162,11 @@ export function FloatingOverlays({
))}
<Box marginTop={1}>
<OverlayHint t={ui.theme}>
<Text color={ui.theme.color.dim}>
{overlay.pager.offset + pagerPageSize < overlay.pager.lines.length
? `↑↓/jk line · Enter/Space/PgDn page · b/PgUp back · g/G top/bottom · Esc/q close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
: `end · ↑↓/jk · b/PgUp back · g top · Esc/q close (${overlay.pager.lines.length} lines)`}
</OverlayHint>
? `↑↓/jk line · Enter/Space/PgDn page · b/PgUp back · g/G top/bottom · q close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
: `end · ↑↓/jk · b/PgUp back · g top · q close (${overlay.pager.lines.length} lines)`}
</Text>
</Box>
</Box>
</FloatBox>
+3 -2
View File
@@ -1,8 +1,8 @@
import { Ansi, Box, NoSelect, Text } from '@hermes/ink'
import { memo } from 'react'
import { LONG_MSG } from '../config/limits.js'
import { sectionMode } from '../domain/details.js'
import { LONG_MSG } from '../config/limits.js'
import { userDisplay } from '../domain/messages.js'
import { ROLE } from '../domain/roles.js'
import { compactPreview, hasAnsi, isPasteBackedText, stripAnsi } from '../lib/text.js'
@@ -72,7 +72,8 @@ export const MessageLine = memo(function MessageLine({
const { body, glyph, prefix } = ROLE[msg.role](t)
const showDetails =
(toolsMode !== 'hidden' && Boolean(msg.tools?.length)) || (thinkingMode !== 'hidden' && Boolean(thinking))
(toolsMode !== 'hidden' && Boolean(msg.tools?.length)) ||
(thinkingMode !== 'hidden' && Boolean(thinking))
const content = (() => {
if (msg.kind === 'slash') {
+38 -30
View File
@@ -7,12 +7,18 @@ import type { ModelOptionProvider, ModelOptionsResponse } from '../gatewayTypes.
import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
import type { Theme } from '../theme.js'
import { OverlayHint, useOverlayKeys, windowItems, windowOffset } from './overlayControls.js'
const VISIBLE = 12
const MIN_WIDTH = 40
const MAX_WIDTH = 90
const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
const visibleItems = (items: string[], sel: number) => {
const off = pageOffset(items.length, sel)
return { items: items.slice(off, off + VISIBLE), off }
}
export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPickerProps) {
const [providers, setProviders] = useState<ModelOptionProvider[]>([])
const [currentModel, setCurrentModel] = useState('')
@@ -65,20 +71,20 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
const models = provider?.models ?? []
const names = useMemo(() => providerDisplayNames(providers), [providers])
const back = () => {
if (stage === 'model') {
setStage('provider')
setModelIdx(0)
useInput((ch, key) => {
if (key.escape) {
if (stage === 'model') {
setStage('provider')
setModelIdx(0)
return
}
onCancel()
return
}
onCancel()
}
useOverlayKeys({ onBack: back, onClose: onCancel })
useInput((ch, key) => {
const count = stage === 'provider' ? providers.length : models.length
const sel = stage === 'provider' ? providerIdx : modelIdx
const setSel = stage === 'provider' ? setProviderIdx : setModelIdx
@@ -127,16 +133,16 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
const n = ch === '0' ? 10 : parseInt(ch, 10)
if (!Number.isNaN(n) && n >= 1 && n <= Math.min(10, count)) {
const offset = windowOffset(count, sel, VISIBLE)
const off = pageOffset(count, sel)
if (stage === 'provider') {
const next = offset + n - 1
const next = off + n - 1
if (providers[next]) {
setProviderIdx(next)
}
} else if (provider && models[offset + n - 1]) {
onSelect(`${models[offset + n - 1]} --provider ${provider.slug}${persistGlobal ? ' --global' : ''}`)
} else if (provider && models[off + n - 1]) {
onSelect(`${models[off + n - 1]} --provider ${provider.slug}${persistGlobal ? ' --global' : ''}`)
}
}
})
@@ -149,7 +155,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
return (
<Box flexDirection="column">
<Text color={t.color.label}>error: {err}</Text>
<OverlayHint t={t}>Esc/q cancel</OverlayHint>
<Text color={t.color.dim}>Esc to cancel</Text>
</Box>
)
}
@@ -158,7 +164,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
return (
<Box flexDirection="column">
<Text color={t.color.dim}>no authenticated providers</Text>
<OverlayHint t={t}>Esc/q cancel</OverlayHint>
<Text color={t.color.dim}>Esc to cancel</Text>
</Box>
)
}
@@ -168,7 +174,7 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
(p, i) => `${p.is_current ? '*' : ' '} ${names[i]} · ${p.total_models ?? p.models?.length ?? 0} models`
)
const { items, offset } = windowItems(rows, providerIdx, VISIBLE)
const { items, off } = visibleItems(rows, providerIdx)
return (
<Box flexDirection="column" width={width}>
@@ -183,12 +189,12 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
{provider?.warning ? `warning: ${provider.warning}` : ' '}
</Text>
<Text color={t.color.dim} wrap="truncate-end">
{offset > 0 ? `${offset} more` : ' '}
{off > 0 ? `${off} more` : ' '}
</Text>
{Array.from({ length: VISIBLE }, (_, i) => {
const row = items[i]
const idx = offset + i
const idx = off + i
return row ? (
<Text
@@ -209,18 +215,20 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
})}
<Text color={t.color.dim} wrap="truncate-end">
{offset + VISIBLE < rows.length ? `${rows.length - offset - VISIBLE} more` : ' '}
{off + VISIBLE < rows.length ? `${rows.length - off - VISIBLE} more` : ' '}
</Text>
<Text color={t.color.dim} wrap="truncate-end">
persist: {persistGlobal ? 'global' : 'session'} · g toggle
</Text>
<OverlayHint t={t}>/ select · Enter choose · 1-9,0 quick · Esc/q cancel</OverlayHint>
<Text color={t.color.dim} wrap="truncate-end">
/ select · Enter choose · 1-9,0 quick · Esc cancel
</Text>
</Box>
)
}
const { items, offset } = windowItems(models, modelIdx, VISIBLE)
const { items, off } = visibleItems(models, modelIdx)
return (
<Box flexDirection="column" width={width}>
@@ -235,12 +243,12 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
{provider?.warning ? `warning: ${provider.warning}` : ' '}
</Text>
<Text color={t.color.dim} wrap="truncate-end">
{offset > 0 ? `${offset} more` : ' '}
{off > 0 ? `${off} more` : ' '}
</Text>
{Array.from({ length: VISIBLE }, (_, i) => {
const row = items[i]
const idx = offset + i
const idx = off + i
if (!row) {
return !models.length && i === 0 ? (
@@ -269,15 +277,15 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
})}
<Text color={t.color.dim} wrap="truncate-end">
{offset + VISIBLE < models.length ? `${models.length - offset - VISIBLE} more` : ' '}
{off + VISIBLE < models.length ? `${models.length - off - VISIBLE} more` : ' '}
</Text>
<Text color={t.color.dim} wrap="truncate-end">
persist: {persistGlobal ? 'global' : 'session'} · g toggle
</Text>
<OverlayHint t={t}>
{models.length ? '↑/↓ select · Enter switch · 1-9,0 quick · Esc back · q close' : 'Enter/Esc back · q close'}
</OverlayHint>
<Text color={t.color.dim} wrap="truncate-end">
{models.length ? '↑/↓ select · Enter switch · 1-9,0 quick · Esc back' : 'Enter/Esc back'}
</Text>
</Box>
)
}
-50
View File
@@ -1,50 +0,0 @@
import { Text, useInput } from '@hermes/ink'
import type { Theme } from '../theme.js'
export function useOverlayKeys({ disabled = false, onBack, onClose }: OverlayKeysOptions) {
useInput((ch, key) => {
if (disabled) {
return
}
if (ch === 'q') {
return onClose()
}
if (key.escape) {
return onBack ? onBack() : onClose()
}
})
}
export function OverlayHint({ children, t }: OverlayHintProps) {
return (
<Text color={t.color.dim} wrap="truncate-end">
{children}
</Text>
)
}
export const windowOffset = (count: number, selected: number, visible: number) =>
Math.max(0, Math.min(selected - Math.floor(visible / 2), count - visible))
export function windowItems<T>(items: T[], selected: number, visible: number) {
const offset = windowOffset(items.length, selected, visible)
return {
items: items.slice(offset, offset + visible),
offset
}
}
interface OverlayHintProps {
children: string
t: Theme
}
interface OverlayKeysOptions {
disabled?: boolean
onBack?: () => void
onClose: () => void
}
+12 -12
View File
@@ -6,8 +6,6 @@ import type { SessionListItem, SessionListResponse } from '../gatewayTypes.js'
import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
import type { Theme } from '../theme.js'
import { OverlayHint, useOverlayKeys, windowOffset } from './overlayControls.js'
const VISIBLE = 15
const MIN_WIDTH = 60
const MAX_WIDTH = 120
@@ -35,8 +33,6 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
const { stdout } = useStdout()
const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
useOverlayKeys({ onClose: onCancel })
useEffect(() => {
gw.request<SessionListResponse>('session.list', { limit: 20 })
.then(raw => {
@@ -60,6 +56,10 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
}, [gw])
useInput((ch, key) => {
if (key.escape) {
return onCancel()
}
if (key.upArrow && sel > 0) {
setSel(s => s - 1)
}
@@ -87,7 +87,7 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
return (
<Box flexDirection="column">
<Text color={t.color.label}>error: {err}</Text>
<OverlayHint t={t}>Esc/q cancel</OverlayHint>
<Text color={t.color.dim}>Esc to cancel</Text>
</Box>
)
}
@@ -96,12 +96,12 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
return (
<Box flexDirection="column">
<Text color={t.color.dim}>no previous sessions</Text>
<OverlayHint t={t}>Esc/q cancel</OverlayHint>
<Text color={t.color.dim}>Esc to cancel</Text>
</Box>
)
}
const offset = windowOffset(items.length, sel, VISIBLE)
const off = Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), items.length - VISIBLE))
return (
<Box flexDirection="column" width={width}>
@@ -109,10 +109,10 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
Resume Session
</Text>
{offset > 0 && <Text color={t.color.dim}> {offset} more</Text>}
{off > 0 && <Text color={t.color.dim}> {off} more</Text>}
{items.slice(offset, offset + VISIBLE).map((s, vi) => {
const i = offset + vi
{items.slice(off, off + VISIBLE).map((s, vi) => {
const i = off + vi
const selected = sel === i
return (
@@ -140,8 +140,8 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
)
})}
{offset + VISIBLE < items.length && <Text color={t.color.dim}> {items.length - offset - VISIBLE} more</Text>}
<OverlayHint t={t}>/ select · Enter resume · 1-9 quick · Esc/q cancel</OverlayHint>
{off + VISIBLE < items.length && <Text color={t.color.dim}> {items.length - off - VISIBLE} more</Text>}
<Text color={t.color.dim}>/ select · Enter resume · 1-9 quick · Esc cancel</Text>
</Box>
)
}
+46 -41
View File
@@ -5,12 +5,18 @@ import type { GatewayClient } from '../gatewayClient.js'
import { rpcErrorMessage } from '../lib/rpc.js'
import type { Theme } from '../theme.js'
import { OverlayHint, useOverlayKeys, windowItems, windowOffset } from './overlayControls.js'
const VISIBLE = 12
const MIN_WIDTH = 40
const MAX_WIDTH = 90
const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
const visibleItems = (items: string[], sel: number) => {
const off = pageOffset(items.length, sel)
return { items: items.slice(off, off + VISIBLE), off }
}
export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
const [skillsByCat, setSkillsByCat] = useState<Record<string, string[]>>({})
const [selectedCat, setSelectedCat] = useState('')
@@ -42,27 +48,6 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
const skills = selectedCat ? (skillsByCat[selectedCat] ?? []) : []
const skillName = skills[skillIdx] ?? ''
const back = () => {
if (stage === 'actions') {
setStage('skill')
setInfo(null)
setErr('')
return
}
if (stage === 'skill') {
setStage('category')
setSkillIdx(0)
return
}
onClose()
}
useOverlayKeys({ disabled: installing, onBack: back, onClose })
const inspect = (name: string) => {
setInfo(null)
setErr('')
@@ -87,6 +72,27 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
return
}
if (key.escape) {
if (stage === 'actions') {
setStage('skill')
setInfo(null)
setErr('')
return
}
if (stage === 'skill') {
setStage('category')
setSkillIdx(0)
return
}
onClose()
return
}
if (stage === 'actions') {
if (key.return) {
setStage('skill')
@@ -153,7 +159,8 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
const n = ch === '0' ? 10 : parseInt(ch, 10)
if (!Number.isNaN(n) && n >= 1 && n <= Math.min(10, count)) {
const next = windowOffset(count, sel, VISIBLE) + n - 1
const off = pageOffset(count, sel)
const next = off + n - 1
if (stage === 'category') {
const cat = cats[next]
@@ -186,7 +193,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
return (
<Box flexDirection="column" width={width}>
<Text color={t.color.label}>error: {err}</Text>
<OverlayHint t={t}>Esc/q cancel</OverlayHint>
<Text color={t.color.dim}>Esc to cancel</Text>
</Box>
)
}
@@ -195,14 +202,14 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
return (
<Box flexDirection="column" width={width}>
<Text color={t.color.dim}>no skills available</Text>
<OverlayHint t={t}>Esc/q cancel</OverlayHint>
<Text color={t.color.dim}>Esc to cancel</Text>
</Box>
)
}
if (stage === 'category') {
const rows = cats.map(c => `${c} · ${skillsByCat[c]?.length ?? 0} skills`)
const { items, offset } = windowItems(rows, catIdx, VISIBLE)
const { items, off } = visibleItems(rows, catIdx)
return (
<Box flexDirection="column" width={width}>
@@ -211,10 +218,10 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
</Text>
<Text color={t.color.dim}>select a category</Text>
{offset > 0 && <Text color={t.color.dim}> {offset} more</Text>}
{off > 0 && <Text color={t.color.dim}> {off} more</Text>}
{items.map((row, i) => {
const idx = offset + i
const idx = off + i
return (
<Text
@@ -230,14 +237,14 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
)
})}
{offset + VISIBLE < rows.length && <Text color={t.color.dim}> {rows.length - offset - VISIBLE} more</Text>}
<OverlayHint t={t}>/ select · Enter open · 1-9,0 quick · Esc/q cancel</OverlayHint>
{off + VISIBLE < rows.length && <Text color={t.color.dim}> {rows.length - off - VISIBLE} more</Text>}
<Text color={t.color.dim}>/ select · Enter open · 1-9,0 quick · Esc cancel</Text>
</Box>
)
}
if (stage === 'skill') {
const { items, offset } = windowItems(skills, skillIdx, VISIBLE)
const { items, off } = visibleItems(skills, skillIdx)
return (
<Box flexDirection="column" width={width}>
@@ -247,10 +254,10 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
<Text color={t.color.dim}>{skills.length} skill(s)</Text>
{!skills.length ? <Text color={t.color.dim}>no skills in this category</Text> : null}
{offset > 0 && <Text color={t.color.dim}> {offset} more</Text>}
{off > 0 && <Text color={t.color.dim}> {off} more</Text>}
{items.map((row, i) => {
const idx = offset + i
const idx = off + i
return (
<Text
@@ -266,12 +273,10 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
)
})}
{offset + VISIBLE < skills.length && (
<Text color={t.color.dim}> {skills.length - offset - VISIBLE} more</Text>
)}
<OverlayHint t={t}>
{skills.length ? '↑/↓ select · Enter open · 1-9,0 quick · Esc back · q close' : 'Esc back · q close'}
</OverlayHint>
{off + VISIBLE < skills.length && <Text color={t.color.dim}> {skills.length - off - VISIBLE} more</Text>}
<Text color={t.color.dim}>
{skills.length ? '↑/↓ select · Enter open · 1-9,0 quick · Esc back' : 'Esc back'}
</Text>
</Box>
)
}
@@ -289,7 +294,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
{err ? <Text color={t.color.label}>error: {err}</Text> : null}
{installing ? <Text color={t.color.amber}>installing</Text> : null}
<OverlayHint t={t}>i reinspect · x reinstall · Enter/Esc back · q close</OverlayHint>
<Text color={t.color.dim}>i reinspect · x reinstall · Enter/Esc back</Text>
</Box>
)
}
+4 -13
View File
@@ -1,5 +1,5 @@
import { Box, NoSelect, Text } from '@hermes/ink'
import { memo, type ReactNode, useEffect, useMemo, useState } from 'react'
import { memo, useEffect, useMemo, useState, type ReactNode } from 'react'
import spinners, { type BrailleSpinnerName } from 'unicode-animations'
import { THINKING_COT_MAX } from '../config/limits.js'
@@ -919,22 +919,13 @@ export const ToolTrail = memo(function ToolTrail({
// hidden sections stay hidden so the override is honoured.
const expandAll = () => {
if (visible.thinking !== 'hidden') {
setOpenThinking(true)
}
if (visible.tools !== 'hidden') {
setOpenTools(true)
}
if (visible.thinking !== 'hidden') setOpenThinking(true)
if (visible.tools !== 'hidden') setOpenTools(true)
if (visible.subagents !== 'hidden') {
setOpenSubagents(true)
setDeepSubagents(true)
}
if (visible.activity !== 'hidden') {
setOpenMeta(true)
}
if (visible.activity !== 'hidden') setOpenMeta(true)
}
const metaTone: 'dim' | 'error' | 'warn' = activity.some(i => i.tone === 'error')
+7 -14
View File
@@ -1,22 +1,15 @@
import { isMac, isRemoteShell } from '../lib/platform.js'
import { isMac } from '../lib/platform.js'
const action = isMac ? 'Cmd' : 'Ctrl'
const paste = isMac ? 'Cmd' : 'Alt'
const copyHotkeys: [string, string][] = isMac
? [
['Cmd+C', 'copy selection'],
['Ctrl+C', 'interrupt / clear draft / exit']
]
: isRemoteShell()
? [
['Cmd+C', 'copy selection when forwarded by the terminal'],
['Ctrl+C', 'copy selection / interrupt / clear draft / exit']
]
: [['Ctrl+C', 'copy selection / interrupt / clear draft / exit']]
export const HOTKEYS: [string, string][] = [
...copyHotkeys,
...(isMac
? ([
['Cmd+C', 'copy selection'],
['Ctrl+C', 'interrupt / clear draft / exit']
] as [string, string][])
: ([['Ctrl+C', 'copy selection / interrupt / clear draft / exit']] as [string, string][])),
[action + '+D', 'exit'],
[action + '+G', 'open $EDITOR for prompt'],
[action + '+L', 'new session (clear)'],
+6 -14
View File
@@ -5,8 +5,8 @@
* as `key.meta`. Some macOS terminals also translate Cmd+Left/Right/Backspace
* into readline-style Ctrl+A/Ctrl+E/Ctrl+U before the app sees them.
* On other platforms the action modifier is Ctrl.
* Ctrl+C stays the interrupt key on macOS. On non-mac terminals it can also
* copy an active TUI selection, matching common terminal selection behavior.
* Ctrl+C is ALWAYS the interrupt key regardless of platform it must never be
* remapped to copy.
*/
export const isMac = process.platform === 'darwin'
@@ -34,16 +34,6 @@ export const isMacActionFallback = (
export const isAction = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string, target: string): boolean =>
isActionMod(key) && ch.toLowerCase() === target
export const isRemoteShell = (env: NodeJS.ProcessEnv = process.env): boolean =>
Boolean(env.SSH_CONNECTION || env.SSH_CLIENT || env.SSH_TTY)
export const isCopyShortcut = (
key: { ctrl: boolean; meta: boolean; super?: boolean },
ch: string,
env: NodeJS.ProcessEnv = process.env
): boolean =>
isAction(key, ch, 'c') || (isRemoteShell(env) && (key.meta || key.super === true) && ch.toLowerCase() === 'c')
/**
* Voice recording toggle key (Ctrl+B).
*
@@ -53,5 +43,7 @@ export const isCopyShortcut = (
* accept Cmd+B (the platform action modifier) so existing macOS muscle memory
* keeps working.
*/
export const isVoiceToggleKey = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string): boolean =>
(key.ctrl || isActionMod(key)) && ch.toLowerCase() === 'b'
export const isVoiceToggleKey = (
key: { ctrl: boolean; meta: boolean; super?: boolean },
ch: string
): boolean => (key.ctrl || isActionMod(key)) && ch.toLowerCase() === 'b'
+42 -101
View File
@@ -1,34 +1,19 @@
import { Backdrop } from "@/components/Backdrop";
import { DesktopBridge } from "@/components/DesktopBridge";
import { LanguageSwitcher } from "@/components/LanguageSwitcher";
import { RuntimeOverlay } from "@/components/RuntimeOverlay";
import { SidebarFooter } from "@/components/SidebarFooter";
import { SidebarStatusStrip } from "@/components/SidebarStatusStrip";
import { ThemeSwitcher } from "@/components/ThemeSwitcher";
import { PageHeaderProvider } from "@/contexts/PageHeaderProvider";
import type { SystemAction } from "@/contexts/system-actions-context";
import { useSystemActions } from "@/contexts/useSystemActions";
import { useI18n } from "@/i18n";
import { api, type SetupStateResponse } from "@/lib/api";
import {
isDashboardEmbeddedChatEnabled,
isDashboardGuiEnabled,
} from "@/lib/dashboard-flags";
import { cn } from "@/lib/utils";
import AnalyticsPage from "@/pages/AnalyticsPage";
import ChatPage from "@/pages/ChatPage";
import ConfigPage from "@/pages/ConfigPage";
import CronPage from "@/pages/CronPage";
import DocsPage from "@/pages/DocsPage";
import EnvPage from "@/pages/EnvPage";
import LogsPage from "@/pages/LogsPage";
import SessionsPage from "@/pages/SessionsPage";
import SetupPage from "@/pages/SetupPage";
import SkillsPage from "@/pages/SkillsPage";
import type { PluginManifest } from "@/plugins";
import { PluginPage, PluginSlot, usePlugins } from "@/plugins";
import { useTheme } from "@/themes";
import { SelectionSwitcher, Typography } from "@nous-research/ui";
useCallback,
useEffect,
useMemo,
useState,
type ComponentType,
type ReactNode,
} from "react";
import {
Routes,
Route,
NavLink,
Navigate,
useLocation,
useNavigate,
} from "react-router-dom";
import {
Activity,
BarChart3,
@@ -57,22 +42,30 @@ import {
X,
Zap,
} from "lucide-react";
import {
useCallback,
useEffect,
useMemo,
useState,
type ComponentType,
type ReactNode,
} from "react";
import {
NavLink,
Navigate,
Route,
Routes,
useLocation,
useNavigate,
} from "react-router-dom";
import { SelectionSwitcher, Typography } from "@nous-research/ui";
import { cn } from "@/lib/utils";
import { Backdrop } from "@/components/Backdrop";
import { SidebarFooter } from "@/components/SidebarFooter";
import { SidebarStatusStrip } from "@/components/SidebarStatusStrip";
import { PageHeaderProvider } from "@/contexts/PageHeaderProvider";
import { useSystemActions } from "@/contexts/useSystemActions";
import type { SystemAction } from "@/contexts/system-actions-context";
import ConfigPage from "@/pages/ConfigPage";
import DocsPage from "@/pages/DocsPage";
import EnvPage from "@/pages/EnvPage";
import SessionsPage from "@/pages/SessionsPage";
import LogsPage from "@/pages/LogsPage";
import AnalyticsPage from "@/pages/AnalyticsPage";
import CronPage from "@/pages/CronPage";
import SkillsPage from "@/pages/SkillsPage";
import ChatPage from "@/pages/ChatPage";
import { LanguageSwitcher } from "@/components/LanguageSwitcher";
import { ThemeSwitcher } from "@/components/ThemeSwitcher";
import { useI18n } from "@/i18n";
import { PluginPage, PluginSlot, usePlugins } from "@/plugins";
import type { PluginManifest } from "@/plugins";
import { useTheme } from "@/themes";
import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";
function RootRedirect() {
return <Navigate to="/sessions" replace />;
@@ -151,10 +144,7 @@ function resolveIcon(name: string): ComponentType<{ className?: string }> {
return ICON_MAP[name] ?? Puzzle;
}
function buildNavItems(
builtIn: NavItem[],
manifests: PluginManifest[],
): NavItem[] {
function buildNavItems(builtIn: NavItem[], manifests: PluginManifest[]): NavItem[] {
const items = [...builtIn];
for (const manifest of manifests) {
@@ -250,25 +240,21 @@ function buildRoutes(
export default function App() {
const { t } = useI18n();
const { pathname } = useLocation();
const navigate = useNavigate();
const { manifests } = usePlugins();
const { theme } = useTheme();
const [mobileOpen, setMobileOpen] = useState(false);
const [setupState, setSetupState] = useState<SetupStateResponse | null>(null);
const closeMobile = useCallback(() => setMobileOpen(false), []);
const isDocsRoute = pathname === "/docs" || pathname === "/docs/";
const normalizedPath = pathname.replace(/\/$/, "") || "/";
const isChatRoute = normalizedPath === "/chat";
const guiMode = isDashboardGuiEnabled();
const embeddedChat = isDashboardEmbeddedChatEnabled();
const builtinRoutes = useMemo(
() => ({
...BUILTIN_ROUTES_CORE,
...(guiMode ? { "/setup": SetupPage } : {}),
...(embeddedChat ? { "/chat": ChatPage } : {}),
}),
[embeddedChat, guiMode],
[embeddedChat],
);
const builtinNav = useMemo(
@@ -298,48 +284,6 @@ export default function App() {
const layoutVariant = theme.layoutVariant ?? "standard";
useEffect(() => {
if (!guiMode) return;
let cancelled = false;
const refresh = async () => {
try {
const state = await api.getSetupState();
if (!cancelled) {
setSetupState(state);
}
} catch {
if (!cancelled) {
setSetupState(null);
}
}
};
const onRefresh = () => {
void refresh();
};
void refresh();
window.addEventListener("hermes:setup-refresh", onRefresh);
const id = window.setInterval(refresh, 2500);
return () => {
cancelled = true;
window.clearInterval(id);
window.removeEventListener("hermes:setup-refresh", onRefresh);
};
}, [guiMode]);
useEffect(() => {
if (!guiMode || !setupState) return;
if (setupState.needs_setup && normalizedPath !== "/setup") {
navigate("/setup", { replace: true });
return;
}
if (!setupState.needs_setup && normalizedPath === "/setup") {
navigate("/sessions", { replace: true });
}
}, [guiMode, navigate, normalizedPath, setupState]);
useEffect(() => {
if (!mobileOpen) return;
const onKey = (e: KeyboardEvent) => {
@@ -563,8 +507,7 @@ export default function App() {
<div
className={cn(
"w-full min-w-0",
(isDocsRoute || isChatRoute) &&
"min-h-0 flex flex-1 flex-col",
(isDocsRoute || isChatRoute) && "min-h-0 flex flex-1 flex-col",
)}
>
<Routes>
@@ -584,8 +527,6 @@ export default function App() {
</div>
<PluginSlot name="overlay" />
<DesktopBridge />
<RuntimeOverlay />
</div>
);
}
-47
View File
@@ -1,47 +0,0 @@
import { useEffect } from "react";
import { isDashboardGuiEnabled } from "@/lib/dashboard-flags";
declare global {
interface Window {
__TAURI__?: {
notification?: {
isPermissionGranted: () => Promise<boolean>;
requestPermission: () => Promise<"default" | "denied" | "granted">;
sendNotification: (notification: {
body?: string;
title: string;
}) => void;
};
};
}
}
export function DesktopBridge() {
useEffect(() => {
if (!isDashboardGuiEnabled()) return;
const notify = async (title: string, body?: string) => {
const api = window.__TAURI__?.notification;
if (!api) return;
let granted = await api.isPermissionGranted();
if (!granted) {
granted = (await api.requestPermission()) === "granted";
}
if (granted) api.sendNotification({ body, title });
};
const onNotify = (event: Event) => {
const detail = (event as CustomEvent<{ body?: string; title?: string }>)
.detail;
if (!detail?.title) return;
void notify(detail.title, detail.body);
};
window.addEventListener("hermes:desktop-notify", onNotify);
return () => window.removeEventListener("hermes:desktop-notify", onNotify);
}, []);
return null;
}
-117
View File
@@ -1,117 +0,0 @@
import { RotateCw } from "lucide-react";
import { useEffect, useMemo, useState } from "react";
import { Button } from "@/components/ui/button";
import { api } from "@/lib/api";
import { isDashboardGuiEnabled } from "@/lib/dashboard-flags";
import { cn } from "@/lib/utils";
type RuntimeState = "checking" | "healthy" | "reconnecting";
const POLL_MS = 2_500;
export function RuntimeOverlay() {
const [state, setState] = useState<RuntimeState>("checking");
const [isGui, setIsGui] = useState(() => isDashboardGuiEnabled());
const [lastOkAt, setLastOkAt] = useState<number | null>(null);
const [notifiedDown, setNotifiedDown] = useState(false);
useEffect(() => {
let cancelled = false;
const poll = async () => {
try {
const runtime = await api.getRuntime();
if (cancelled) return;
setIsGui(runtime.gui);
setLastOkAt(Date.now());
if (notifiedDown) {
window.dispatchEvent(
new CustomEvent("hermes:desktop-notify", {
detail: {
body: "The dashboard runtime is healthy again.",
title: "Hermes Reconnected",
},
}),
);
setNotifiedDown(false);
}
setState("healthy");
} catch {
if (cancelled) return;
setNotifiedDown((already) => {
if (!already && isGui) {
window.dispatchEvent(
new CustomEvent("hermes:desktop-notify", {
detail: {
body: "Trying to reconnect to the local Hermes runtime.",
title: "Hermes Runtime Disconnected",
},
}),
);
}
return true;
});
setState((prev) => (prev === "checking" ? "checking" : "reconnecting"));
}
};
void poll();
const id = setInterval(poll, POLL_MS);
return () => {
cancelled = true;
clearInterval(id);
};
}, [isGui, notifiedDown]);
const detail = useMemo(() => {
if (state === "checking") return "Checking local Hermes runtime...";
if (!lastOkAt) return "Trying to reconnect to the local Hermes runtime.";
return `Runtime connection dropped. Last healthy ${Math.max(
1,
Math.round((Date.now() - lastOkAt) / 1000),
)}s ago.`;
}, [lastOkAt, state]);
if (!isGui || state === "healthy") return null;
return (
<div
className={cn(
"fixed inset-0 z-80 flex items-center justify-center",
"bg-black/70 backdrop-blur-sm",
)}
role="status"
aria-live="polite"
>
<div
className={cn(
"w-[min(92vw,28rem)] border border-current/20 bg-background-base/95",
"px-6 py-5 text-midground shadow-2xl",
)}
>
<div className="flex items-start gap-3">
<RotateCw className="mt-0.5 h-4 w-4 shrink-0 animate-spin" />
<div className="min-w-0">
<p className="font-mondwest text-sm tracking-[0.16em]">
Hermes GUI Runtime
</p>
<p className="mt-2 text-xs normal-case leading-5 text-muted-foreground">
{detail}
</p>
</div>
</div>
<Button
type="button"
variant="outline"
size="sm"
className="mt-5 h-8 text-xs"
onClick={() => window.location.reload()}
>
Reload Window
</Button>
</div>
</div>
);
}
+4 -6
View File
@@ -1,7 +1,7 @@
import { useSidebarStatus } from "@/hooks/useSidebarStatus";
import { useI18n } from "@/i18n";
import { cn } from "@/lib/utils";
import { Typography } from "@nous-research/ui";
import { useSidebarStatus } from "@/hooks/useSidebarStatus";
import { cn } from "@/lib/utils";
import { useI18n } from "@/i18n";
export function SidebarFooter() {
const status = useSidebarStatus();
@@ -19,9 +19,7 @@ export function SidebarFooter() {
mondwest
className="font-mono-ui text-[0.7rem] tabular-nums tracking-[0.1em] text-muted-foreground/70"
>
{status?.version != null
? `v${status.version}${status.gui ? " · GUI" : ""}`
: "—"}
{status?.version != null ? `v${status.version}` : "—"}
</Typography>
<a
+15 -109
View File
@@ -18,10 +18,7 @@ function setSessionHeader(headers: Headers, token: string): void {
}
}
export async function fetchJSON<T>(
url: string,
init?: RequestInit,
): Promise<T> {
export async function fetchJSON<T>(url: string, init?: RequestInit): Promise<T> {
// Inject the session token into all /api/ requests.
const headers = new Headers(init?.headers);
const token = window.__HERMES_SESSION_TOKEN__;
@@ -43,50 +40,32 @@ async function getSessionToken(): Promise<string> {
_sessionToken = injected;
return _sessionToken;
}
throw new Error(
"Session token not available — page must be served by the Hermes dashboard server",
);
throw new Error("Session token not available — page must be served by the Hermes dashboard server");
}
export const api = {
getHealth: () => fetchJSON<HealthResponse>("/api/health"),
getRuntime: () => fetchJSON<RuntimeResponse>("/api/runtime"),
getSetupState: () => fetchJSON<SetupStateResponse>("/api/setup/state"),
getStatus: () => fetchJSON<StatusResponse>("/api/status"),
getSessions: (limit = 20, offset = 0) =>
fetchJSON<PaginatedSessions>(
`/api/sessions?limit=${limit}&offset=${offset}`,
),
fetchJSON<PaginatedSessions>(`/api/sessions?limit=${limit}&offset=${offset}`),
getSessionMessages: (id: string) =>
fetchJSON<SessionMessagesResponse>(
`/api/sessions/${encodeURIComponent(id)}/messages`,
),
fetchJSON<SessionMessagesResponse>(`/api/sessions/${encodeURIComponent(id)}/messages`),
deleteSession: (id: string) =>
fetchJSON<{ ok: boolean }>(`/api/sessions/${encodeURIComponent(id)}`, {
method: "DELETE",
}),
getLogs: (params: {
file?: string;
lines?: number;
level?: string;
component?: string;
}) => {
getLogs: (params: { file?: string; lines?: number; level?: string; component?: string }) => {
const qs = new URLSearchParams();
if (params.file) qs.set("file", params.file);
if (params.lines) qs.set("lines", String(params.lines));
if (params.level && params.level !== "ALL") qs.set("level", params.level);
if (params.component && params.component !== "all")
qs.set("component", params.component);
if (params.component && params.component !== "all") qs.set("component", params.component);
return fetchJSON<LogsResponse>(`/api/logs?${qs.toString()}`);
},
getAnalytics: (days: number) =>
fetchJSON<AnalyticsResponse>(`/api/analytics/usage?days=${days}`),
getConfig: () => fetchJSON<Record<string, unknown>>("/api/config"),
getDefaults: () => fetchJSON<Record<string, unknown>>("/api/config/defaults"),
getSchema: () =>
fetchJSON<{ fields: Record<string, unknown>; category_order: string[] }>(
"/api/config/schema",
),
getSchema: () => fetchJSON<{ fields: Record<string, unknown>; category_order: string[] }>("/api/config/schema"),
getModelInfo: () => fetchJSON<ModelInfoResponse>("/api/model/info"),
saveConfig: (config: Record<string, unknown>) =>
fetchJSON<{ ok: boolean }>("/api/config", {
@@ -128,29 +107,18 @@ export const api = {
// Cron jobs
getCronJobs: () => fetchJSON<CronJob[]>("/api/cron/jobs"),
createCronJob: (job: {
prompt: string;
schedule: string;
name?: string;
deliver?: string;
}) =>
createCronJob: (job: { prompt: string; schedule: string; name?: string; deliver?: string }) =>
fetchJSON<CronJob>("/api/cron/jobs", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(job),
}),
pauseCronJob: (id: string) =>
fetchJSON<{ ok: boolean }>(`/api/cron/jobs/${id}/pause`, {
method: "POST",
}),
fetchJSON<{ ok: boolean }>(`/api/cron/jobs/${id}/pause`, { method: "POST" }),
resumeCronJob: (id: string) =>
fetchJSON<{ ok: boolean }>(`/api/cron/jobs/${id}/resume`, {
method: "POST",
}),
fetchJSON<{ ok: boolean }>(`/api/cron/jobs/${id}/resume`, { method: "POST" }),
triggerCronJob: (id: string) =>
fetchJSON<{ ok: boolean }>(`/api/cron/jobs/${id}/trigger`, {
method: "POST",
}),
fetchJSON<{ ok: boolean }>(`/api/cron/jobs/${id}/trigger`, { method: "POST" }),
deleteCronJob: (id: string) =>
fetchJSON<{ ok: boolean }>(`/api/cron/jobs/${id}`, { method: "DELETE" }),
@@ -166,9 +134,7 @@ export const api = {
// Session search (FTS5)
searchSessions: (q: string) =>
fetchJSON<SessionSearchResponse>(
`/api/sessions/search?q=${encodeURIComponent(q)}`,
),
fetchJSON<SessionSearchResponse>(`/api/sessions/search?q=${encodeURIComponent(q)}`),
// OAuth provider management
getOAuthProviders: () =>
@@ -197,11 +163,7 @@ export const api = {
},
);
},
submitOAuthCode: async (
providerId: string,
sessionId: string,
code: string,
) => {
submitOAuthCode: async (providerId: string, sessionId: string, code: string) => {
const token = await getSessionToken();
return fetchJSON<OAuthSubmitResponse>(
`/api/providers/oauth/${encodeURIComponent(providerId)}/submit`,
@@ -247,7 +209,8 @@ export const api = {
fetchJSON<{ ok: boolean; count: number }>("/api/dashboard/plugins/rescan"),
// Dashboard themes
getThemes: () => fetchJSON<DashboardThemesResponse>("/api/dashboard/themes"),
getThemes: () =>
fetchJSON<DashboardThemesResponse>("/api/dashboard/themes"),
setTheme: (name: string) =>
fetchJSON<{ ok: boolean; theme: string }>("/api/dashboard/theme", {
method: "PUT",
@@ -281,7 +244,6 @@ export interface StatusResponse {
active_sessions: number;
config_path: string;
config_version: number;
embedded_chat: boolean;
env_path: string;
gateway_exit_reason: string | null;
gateway_health_url: string | null;
@@ -290,68 +252,12 @@ export interface StatusResponse {
gateway_running: boolean;
gateway_state: string | null;
gateway_updated_at: string | null;
gui: boolean;
hermes_home: string;
latest_config_version: number;
release_date: string;
version: string;
}
export interface HealthResponse {
embedded_chat: boolean;
mode: "browser" | "gui";
profile: string;
status: "ok";
version: string;
}
export interface RuntimeResponse {
dashboard: {
embedded_chat: boolean;
};
gateway: {
pid: number | null;
platforms: Record<string, PlatformStatus>;
running: boolean;
state: string | null;
};
gui: boolean;
hermes_home: string;
profile: string;
status: string;
}
export interface SetupStateResponse {
checklist: {
model: boolean;
provider: boolean;
terminal: boolean;
};
gui: boolean;
hermes_home: string;
is_fresh_mode: boolean;
model: {
configured: boolean;
value: string;
};
needs_setup: boolean;
profile: string;
provider: {
active_provider: string | null;
configured_env_keys: string[];
recommended_keys: Array<{
description: string;
is_set: boolean;
name: string;
url: string | null;
}>;
};
terminal: {
backend: string;
configured: boolean;
};
}
export interface SessionInfo {
id: string;
source: string | null;
-7
View File
@@ -2,8 +2,6 @@ declare global {
interface Window {
/** Set true by the server only for `hermes dashboard --tui` (or HERMES_DASHBOARD_TUI=1). */
__HERMES_DASHBOARD_EMBEDDED_CHAT__?: boolean;
/** Set true by the server for `hermes dashboard --gui`. */
__HERMES_DASHBOARD_GUI__?: boolean;
/** @deprecated Older injected name; treated as on when true. */
__HERMES_DASHBOARD_TUI__?: boolean;
}
@@ -15,8 +13,3 @@ export function isDashboardEmbeddedChatEnabled(): boolean {
if (window.__HERMES_DASHBOARD_EMBEDDED_CHAT__ === true) return true;
return window.__HERMES_DASHBOARD_TUI__ === true;
}
export function isDashboardGuiEnabled(): boolean {
if (typeof window === "undefined") return false;
return window.__HERMES_DASHBOARD_GUI__ === true;
}
-3
View File
@@ -18,9 +18,6 @@ export function resolvePageTitle(
pluginTabs: { path: string; label: string }[],
): string {
const normalized = pathname.replace(/\/$/, "") || "/";
if (normalized === "/setup") {
return "Setup";
}
if (normalized === "/") {
return t.app.nav.sessions;
}
-434
View File
@@ -1,434 +0,0 @@
import { OAuthProvidersCard } from "@/components/OAuthProvidersCard";
import { Toast } from "@/components/Toast";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useToast } from "@/hooks/useToast";
import { api, type EnvVarInfo, type SetupStateResponse } from "@/lib/api";
import { PluginSlot } from "@/plugins";
import {
ArrowRight,
CheckCircle2,
Circle,
KeyRound,
Loader2,
Settings2,
Sparkles,
} from "lucide-react";
import { useCallback, useEffect, useMemo, useState } from "react";
import { useNavigate } from "react-router-dom";
const MODEL_PRESETS = [
"anthropic/claude-sonnet-4.6",
"openai/gpt-4.1",
"google/gemini-2.5-pro",
"deepseek/deepseek-reasoner",
];
const FALLBACK_PROVIDER_KEYS = [
"OPENROUTER_API_KEY",
"ANTHROPIC_API_KEY",
"OPENAI_API_KEY",
"NOUS_API_KEY",
];
function readModelValue(config: Record<string, unknown> | null): string {
if (!config) return "";
const modelValue = config.model;
if (typeof modelValue === "string") return modelValue;
if (
modelValue &&
typeof modelValue === "object" &&
!Array.isArray(modelValue)
) {
const defaultModel = (modelValue as Record<string, unknown>).default;
if (typeof defaultModel === "string") return defaultModel;
}
return "";
}
export default function SetupPage() {
const navigate = useNavigate();
const { toast, showToast } = useToast();
const [setupState, setSetupState] = useState<SetupStateResponse | null>(null);
const [envVars, setEnvVars] = useState<Record<string, EnvVarInfo> | null>(
null,
);
const [config, setConfig] = useState<Record<string, unknown> | null>(null);
const [loading, setLoading] = useState(true);
const [savingKey, setSavingKey] = useState(false);
const [savingConfig, setSavingConfig] = useState(false);
const [providerKey, setProviderKey] = useState("");
const [providerValue, setProviderValue] = useState("");
const [modelValue, setModelValue] = useState("");
const [terminalBackend, setTerminalBackend] = useState("local");
const load = useCallback(async () => {
setLoading(true);
try {
const [state, vars, cfg] = await Promise.all([
api.getSetupState(),
api.getEnvVars(),
api.getConfig(),
]);
setSetupState(state);
setEnvVars(vars);
setConfig(cfg);
setModelValue(state.model.value || readModelValue(cfg));
setTerminalBackend(state.terminal.backend || "local");
const preferredKeys = [
...state.provider.recommended_keys.map((k) => k.name),
...FALLBACK_PROVIDER_KEYS,
];
const availableKeys = preferredKeys.filter((key) => vars[key] != null);
const firstUnset = availableKeys.find((key) => !vars[key]?.is_set);
const nextKey = firstUnset || availableKeys[0] || "";
setProviderKey((prev) => (prev && vars[prev] ? prev : nextKey));
} catch (error) {
showToast(`Failed to load setup state: ${error}`, "error");
} finally {
setLoading(false);
}
}, [showToast]);
useEffect(() => {
void load();
}, [load]);
const providerOptions = useMemo(() => {
if (!envVars || !setupState) return [];
const keySet = new Set<string>();
const options: Array<{
key: string;
description: string;
isSet: boolean;
url: string | null;
}> = [];
for (const item of setupState.provider.recommended_keys) {
if (!envVars[item.name]) continue;
keySet.add(item.name);
options.push({
key: item.name,
description: item.description,
isSet: envVars[item.name]?.is_set ?? item.is_set,
url: item.url,
});
}
for (const [key, info] of Object.entries(envVars)) {
if (keySet.has(key)) continue;
if (info.category !== "provider") continue;
if (!(key.endsWith("_API_KEY") || key.endsWith("_TOKEN"))) continue;
options.push({
key,
description: info.description,
isSet: info.is_set,
url: info.url,
});
}
return options;
}, [envVars, setupState]);
const selectedProviderMeta = providerOptions.find(
(o) => o.key === providerKey,
);
const checklist = setupState?.checklist;
const ready = !!checklist?.provider && !!checklist?.model;
const completeCount =
Number(!!checklist?.provider) +
Number(!!checklist?.model) +
Number(!!checklist?.terminal);
const saveProviderKey = async () => {
if (!providerKey || !providerValue.trim()) return;
setSavingKey(true);
try {
await api.setEnvVar(providerKey, providerValue.trim());
setProviderValue("");
showToast(`Saved ${providerKey}`, "success");
window.dispatchEvent(new Event("hermes:setup-refresh"));
await load();
} catch (error) {
showToast(`Failed to save ${providerKey}: ${error}`, "error");
} finally {
setSavingKey(false);
}
};
const saveModelAndDefaults = async () => {
if (!config) return;
const trimmedModel = modelValue.trim();
if (!trimmedModel) {
showToast("Model is required.", "error");
return;
}
const nextConfig = structuredClone(config);
nextConfig.model = trimmedModel;
const rawTerminal = nextConfig.terminal;
const terminal =
rawTerminal &&
typeof rawTerminal === "object" &&
!Array.isArray(rawTerminal)
? { ...(rawTerminal as Record<string, unknown>) }
: {};
terminal.backend = terminalBackend || "local";
nextConfig.terminal = terminal;
setSavingConfig(true);
try {
await api.saveConfig(nextConfig);
setConfig(nextConfig);
showToast("Saved model and runtime defaults.", "success");
window.dispatchEvent(new Event("hermes:setup-refresh"));
await load();
} catch (error) {
showToast(`Failed to save setup config: ${error}`, "error");
} finally {
setSavingConfig(false);
}
};
if (loading) {
return (
<div className="flex items-center justify-center py-24">
<Loader2 className="h-6 w-6 animate-spin text-muted-foreground" />
</div>
);
}
if (!setupState) {
return (
<div className="border border-destructive/30 bg-destructive/6 p-4 text-sm text-destructive">
Setup state unavailable. Reload the dashboard.
</div>
);
}
return (
<div className="flex flex-col gap-4">
<PluginSlot name="setup:top" />
<Toast toast={toast} />
<Card>
<CardHeader>
<div className="flex items-center gap-2">
<Sparkles className="h-5 w-5 text-muted-foreground" />
<CardTitle>Hermes GUI Setup</CardTitle>
</div>
</CardHeader>
<CardContent className="grid gap-3 text-sm">
<div className="flex items-center gap-2">
<Badge variant={ready ? "success" : "outline"}>
{ready ? "Ready" : "Setup Required"}
</Badge>
<span className="text-muted-foreground">
{completeCount}/3 checks complete
</span>
</div>
{setupState.is_fresh_mode && (
<p className="text-xs text-success">
Fresh mode active. This GUI run is isolated from your default
install.
</p>
)}
<p className="text-xs text-muted-foreground">
Profile: <code>{setupState.profile}</code> · Home:{" "}
<code>{setupState.hermes_home}</code>
</p>
<div className="grid gap-1 text-xs">
<ChecklistItem
done={setupState.checklist.provider}
label="Provider credential connected"
/>
<ChecklistItem
done={setupState.checklist.model}
label="Model selected"
/>
<ChecklistItem
done={setupState.checklist.terminal}
label="Terminal backend configured"
/>
</div>
</CardContent>
</Card>
<Card>
<CardHeader>
<div className="flex items-center gap-2">
<KeyRound className="h-5 w-5 text-muted-foreground" />
<CardTitle>1) Connect a provider</CardTitle>
</div>
</CardHeader>
<CardContent className="grid gap-4">
<OAuthProvidersCard
onError={(msg) => showToast(msg, "error")}
onSuccess={(msg) => showToast(msg, "success")}
/>
<div className="grid gap-2 border border-border p-3">
<Label className="text-xs uppercase tracking-wide text-muted-foreground">
API Key (manual)
</Label>
<div className="grid gap-2 sm:grid-cols-[minmax(0,1fr)_minmax(0,2fr)_auto]">
<select
value={providerKey}
onChange={(e) => setProviderKey(e.target.value)}
className="h-9 border border-border bg-background px-2 text-xs"
>
{providerOptions.map((option) => (
<option key={option.key} value={option.key}>
{option.key}
{option.isSet ? " (set)" : ""}
</option>
))}
</select>
<Input
type="password"
value={providerValue}
onChange={(e) => setProviderValue(e.target.value)}
placeholder="Paste API key"
className="h-9 font-mono-ui text-xs"
/>
<Button
type="button"
size="sm"
className="h-9"
onClick={saveProviderKey}
disabled={savingKey || !providerKey || !providerValue.trim()}
>
{savingKey ? "Saving..." : "Save key"}
</Button>
</div>
{selectedProviderMeta?.description && (
<p className="text-xs text-muted-foreground">
{selectedProviderMeta.description}
</p>
)}
{selectedProviderMeta?.url && (
<a
href={selectedProviderMeta.url}
target="_blank"
rel="noreferrer"
className="text-xs text-primary hover:underline"
>
Get key
</a>
)}
</div>
</CardContent>
</Card>
<Card>
<CardHeader>
<div className="flex items-center gap-2">
<Settings2 className="h-5 w-5 text-muted-foreground" />
<CardTitle>2) Choose model + runtime defaults</CardTitle>
</div>
</CardHeader>
<CardContent className="grid gap-3">
<div className="grid gap-2">
<Label className="text-xs uppercase tracking-wide text-muted-foreground">
Model
</Label>
<Input
value={modelValue}
onChange={(e) => setModelValue(e.target.value)}
placeholder="anthropic/claude-sonnet-4.6"
className="font-mono-ui text-xs"
/>
<div className="flex flex-wrap gap-1">
{MODEL_PRESETS.map((preset) => (
<button
key={preset}
type="button"
onClick={() => setModelValue(preset)}
className="border border-border px-2 py-1 text-[11px] hover:bg-secondary/40"
>
{preset}
</button>
))}
</div>
</div>
<div className="grid gap-2">
<Label className="text-xs uppercase tracking-wide text-muted-foreground">
Terminal backend
</Label>
<select
value={terminalBackend}
onChange={(e) => setTerminalBackend(e.target.value)}
className="h-9 border border-border bg-background px-2 text-xs"
>
<option value="local">local</option>
<option value="docker">docker</option>
<option value="ssh">ssh</option>
<option value="modal">modal</option>
<option value="daytona">daytona</option>
<option value="singularity">singularity</option>
</select>
</div>
<Button
type="button"
onClick={saveModelAndDefaults}
disabled={savingConfig || !modelValue.trim()}
className="w-fit"
>
{savingConfig ? "Saving..." : "Save setup defaults"}
</Button>
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle>3) Continue</CardTitle>
</CardHeader>
<CardContent className="flex flex-wrap items-center gap-2">
<Button
type="button"
onClick={() => navigate("/sessions", { replace: true })}
disabled={!ready}
className="gap-1.5"
>
Enter Hermes
<ArrowRight className="h-3.5 w-3.5" />
</Button>
<Button
type="button"
variant="outline"
onClick={() => navigate("/env")}
>
Advanced keys
</Button>
<Button
type="button"
variant="outline"
onClick={() => navigate("/config")}
>
Advanced config
</Button>
</CardContent>
</Card>
<PluginSlot name="setup:bottom" />
</div>
);
}
function ChecklistItem({ done, label }: { done: boolean; label: string }) {
return (
<div className="flex items-center gap-2">
{done ? (
<CheckCircle2 className="h-3.5 w-3.5 text-success" />
) : (
<Circle className="h-3.5 w-3.5 text-muted-foreground/60" />
)}
<span className="text-muted-foreground">{label}</span>
</div>
);
}
+8
View File
@@ -721,6 +721,14 @@ auxiliary:
base_url: ""
api_key: ""
timeout: 30
# Memory flush — summarizes conversation for persistent memory
flush_memories:
provider: "auto"
model: ""
base_url: ""
api_key: ""
timeout: 30
```
:::tip
@@ -168,6 +168,7 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr
| Session Search | Past session summarization | `auxiliary.session_search` |
| Skills Hub | Skill search and discovery | `auxiliary.skills_hub` |
| MCP | MCP helper operations | `auxiliary.mcp` |
| Memory Flush | Memory consolidation | `auxiliary.flush_memories` |
| Approval | Smart command-approval classification | `auxiliary.approval` |
| Title Generation | Session title summaries | `auxiliary.title_generation` |
@@ -225,6 +226,10 @@ auxiliary:
mcp:
provider: "auto"
model: ""
flush_memories:
provider: "auto"
model: ""
```
Every task above follows the same **provider / model / base_url** pattern. Context compression is configured under `auxiliary.compression`:
@@ -360,6 +365,7 @@ See [Scheduled Tasks (Cron)](/docs/user-guide/features/cron) for full configurat
| Session search | Auto-detection chain | `auxiliary.session_search` |
| Skills hub | Auto-detection chain | `auxiliary.skills_hub` |
| MCP helpers | Auto-detection chain | `auxiliary.mcp` |
| Memory flush | Auto-detection chain | `auxiliary.flush_memories` |
| Approval classification | Auto-detection chain | `auxiliary.approval` |
| Title generation | Auto-detection chain | `auxiliary.title_generation` |
| Delegation | Provider override only (no automatic fallback) | `delegation.provider` / `delegation.model` |