Compare commits
10 Commits
perf/fts-o
...
opencode-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c0b3b73bf4 | ||
|
|
fb51253620 | ||
|
|
ef009a987a | ||
|
|
130396c658 | ||
|
|
11d93096b3 | ||
|
|
d464d08a5f | ||
|
|
5a95fb2e14 | ||
|
|
ea5a6c216b | ||
|
|
4df62d239e | ||
|
|
490b3e76b1 |
7
.github/workflows/deploy-site.yml
vendored
7
.github/workflows/deploy-site.yml
vendored
@@ -22,7 +22,12 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
deploy-vercel:
|
||||
if: github.event_name == 'release'
|
||||
# Triggered automatically on release publish (production cuts) and
|
||||
# manually via `gh workflow run deploy-site.yml` when an out-of-band
|
||||
# main commit needs to ship live before the next release tag — e.g.
|
||||
# a skills-index PR that doesn't touch website/** paths and so
|
||||
# doesn't auto-deploy via the deploy-docs path.
|
||||
if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Trigger Vercel Deploy
|
||||
|
||||
24
.github/workflows/docker-publish.yml
vendored
24
.github/workflows/docker-publish.yml
vendored
@@ -196,10 +196,26 @@ jobs:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (arm64, smoke test)
|
||||
# Build once, load into the local daemon for smoke testing. PR arm64
|
||||
# builds deliberately avoid the gha cache: cold-cache arm64 builds can
|
||||
# outlive GitHub's short-lived Azure cache SAS token, then fail while
|
||||
# reading or writing cache blobs before the smoke test can run.
|
||||
- name: Build image (arm64, smoke test, uncached PR)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
build-args: |
|
||||
HERMES_GIT_SHA=${{ github.sha }}
|
||||
|
||||
# Main/release builds still use the per-arch gha cache so the digest
|
||||
# push below can reuse layers from this smoke-test build.
|
||||
- name: Build image (arm64, smoke test, cached publish)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
|
||||
@@ -980,6 +980,48 @@ def _extract_responses_reasoning_text(item: Any) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def _format_responses_error(error_obj: Any, response_status: str) -> str:
|
||||
"""Build a human-readable error string from a Responses ``response.error`` payload.
|
||||
|
||||
The OpenAI Responses API carries failure details under ``response.error``
|
||||
on terminal ``response.failed`` events, in the shape
|
||||
``{"code": "rate_limit_exceeded", "message": "Slow down", "param": ...}``.
|
||||
Earlier code only surfaced ``message``, which left users staring at bare
|
||||
strings like ``"Slow down"`` while the failure mode (rate limit vs
|
||||
context-length vs internal_error vs model-overloaded) was hidden in
|
||||
``code``. We now prefix ``code`` when both are present so consumers can
|
||||
distinguish failure modes without parsing the bare message.
|
||||
|
||||
Falls back to ``code`` alone when ``message`` is empty, and to a stable
|
||||
default referencing the response status when no error payload is
|
||||
available at all. Adapted from anomalyco/opencode#28757.
|
||||
"""
|
||||
# Pull code and message from either dict or attribute-style payloads.
|
||||
code: Any = None
|
||||
message: Any = None
|
||||
if isinstance(error_obj, dict):
|
||||
code = error_obj.get("code")
|
||||
message = error_obj.get("message")
|
||||
elif error_obj is not None:
|
||||
code = getattr(error_obj, "code", None)
|
||||
message = getattr(error_obj, "message", None)
|
||||
|
||||
code_str = str(code).strip() if isinstance(code, str) else (str(code).strip() if code else "")
|
||||
message_str = str(message).strip() if isinstance(message, str) else (str(message).strip() if message else "")
|
||||
|
||||
if code_str and message_str:
|
||||
return f"{code_str}: {message_str}"
|
||||
if message_str:
|
||||
return message_str
|
||||
if code_str:
|
||||
return code_str
|
||||
if error_obj:
|
||||
# Last-resort: stringify whatever the provider sent so it's at least
|
||||
# visible in logs/UI rather than silently swallowed.
|
||||
return str(error_obj)
|
||||
return f"Responses API returned status '{response_status}'"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full response normalization
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1023,10 +1065,7 @@ def _normalize_codex_response(
|
||||
|
||||
if response_status in {"failed", "cancelled"}:
|
||||
error_obj = getattr(response, "error", None)
|
||||
if isinstance(error_obj, dict):
|
||||
error_msg = error_obj.get("message") or str(error_obj)
|
||||
else:
|
||||
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
|
||||
error_msg = _format_responses_error(error_obj, response_status)
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
content_parts: List[str] = []
|
||||
|
||||
@@ -4561,6 +4561,7 @@ def run_conversation(
|
||||
original_user_message=original_user_message,
|
||||
final_response=final_response,
|
||||
interrupted=interrupted,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
# Background memory/skill review — runs AFTER the response is delivered
|
||||
|
||||
@@ -368,11 +368,42 @@ class MemoryManager:
|
||||
|
||||
# -- Sync ----------------------------------------------------------------
|
||||
|
||||
def sync_all(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
@staticmethod
|
||||
def _provider_sync_accepts_messages(provider: MemoryProvider) -> bool:
|
||||
"""Return whether sync_turn accepts a messages keyword."""
|
||||
try:
|
||||
signature = inspect.signature(provider.sync_turn)
|
||||
except (TypeError, ValueError):
|
||||
return True
|
||||
params = list(signature.parameters.values())
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
|
||||
return True
|
||||
return "messages" in signature.parameters
|
||||
|
||||
def sync_all(
|
||||
self,
|
||||
user_content: str,
|
||||
assistant_content: str,
|
||||
*,
|
||||
session_id: str = "",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
"""Sync a completed turn to all providers."""
|
||||
for provider in self._providers:
|
||||
try:
|
||||
provider.sync_turn(user_content, assistant_content, session_id=session_id)
|
||||
if messages is not None and self._provider_sync_accepts_messages(provider):
|
||||
provider.sync_turn(
|
||||
user_content,
|
||||
assistant_content,
|
||||
session_id=session_id,
|
||||
messages=messages,
|
||||
)
|
||||
else:
|
||||
provider.sync_turn(
|
||||
user_content,
|
||||
assistant_content,
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Memory provider '%s' sync_turn failed: %s",
|
||||
|
||||
@@ -112,11 +112,22 @@ class MemoryProvider(ABC):
|
||||
that do background prefetching should override this.
|
||||
"""
|
||||
|
||||
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
def sync_turn(
|
||||
self,
|
||||
user_content: str,
|
||||
assistant_content: str,
|
||||
*,
|
||||
session_id: str = "",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
"""Persist a completed turn to the backend.
|
||||
|
||||
Called after each turn. Should be non-blocking — queue for
|
||||
background processing if the backend has latency.
|
||||
|
||||
``messages`` is the OpenAI-style conversation message list as of the
|
||||
completed turn, including any assistant tool calls and tool results.
|
||||
Providers that do not need raw turn context can ignore it.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -31,6 +31,7 @@ import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from agent.codex_responses_adapter import _format_responses_error
|
||||
from agent.redact import redact_sensitive_text
|
||||
from agent.transports.codex_app_server import (
|
||||
CodexAppServerClient,
|
||||
@@ -581,7 +582,7 @@ class CodexAppServerSession:
|
||||
(note.get("params") or {}).get("turn") or {}
|
||||
).get("error")
|
||||
if err_obj:
|
||||
err_msg = err_obj.get("message") or str(err_obj)
|
||||
err_msg = _format_responses_error(err_obj, str(turn_status))
|
||||
# If the turn failed for an auth/refresh reason,
|
||||
# rewrite the error into a re-auth hint AND mark
|
||||
# the session for retirement.
|
||||
|
||||
@@ -30,13 +30,21 @@ cd /opt/data
|
||||
dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}"
|
||||
dash_port="${HERMES_DASHBOARD_PORT:-9119}"
|
||||
|
||||
# Binding to anything other than localhost requires --insecure — the
|
||||
# dashboard refuses otherwise because it exposes API keys. Inside a
|
||||
# container this is the expected deployment.
|
||||
# `--insecure` is opt-in via HERMES_DASHBOARD_INSECURE. The dashboard's
|
||||
# OAuth auth gate engages automatically on non-loopback binds when a
|
||||
# DashboardAuthProvider is registered (e.g. the bundled dashboard_auth/nous
|
||||
# provider, which auto-registers when HERMES_DASHBOARD_OAUTH_CLIENT_ID is
|
||||
# set). If no provider is registered, start_server fails closed with a
|
||||
# specific operator-facing error.
|
||||
#
|
||||
# This used to derive --insecure from the bind host ("anything non-loopback
|
||||
# implies insecure"), but that predates the OAuth gate and silently
|
||||
# disabled it on every container-deployed dashboard. The gate is now the
|
||||
# authority; operators on trusted LANs / behind a reverse proxy without
|
||||
# the OAuth contract opt in explicitly.
|
||||
insecure=""
|
||||
case "$dash_host" in
|
||||
127.0.0.1|localhost) ;;
|
||||
*) insecure="--insecure" ;;
|
||||
case "${HERMES_DASHBOARD_INSECURE:-}" in
|
||||
1|true|TRUE|True|yes|YES|Yes) insecure="--insecure" ;;
|
||||
esac
|
||||
|
||||
# shellcheck disable=SC2086 # word-splitting of $insecure is intentional
|
||||
|
||||
@@ -13391,11 +13391,6 @@ Examples:
|
||||
"--yes", "-y", action="store_true", help="Skip confirmation"
|
||||
)
|
||||
|
||||
sessions_subparsers.add_parser(
|
||||
"optimize",
|
||||
help="Reclaim disk space: merge FTS5 segments + VACUUM (no data change)",
|
||||
)
|
||||
|
||||
sessions_subparsers.add_parser("stats", help="Show session store statistics")
|
||||
|
||||
sessions_rename = sessions_subparsers.add_parser(
|
||||
@@ -13568,39 +13563,6 @@ Examples:
|
||||
relaunch(["--resume", selected_id])
|
||||
return # won't reach here after execvp
|
||||
|
||||
elif action == "optimize":
|
||||
db_path = db.db_path
|
||||
before_mb = (
|
||||
os.path.getsize(db_path) / (1024 * 1024)
|
||||
if db_path.exists()
|
||||
else 0.0
|
||||
)
|
||||
print("Optimizing session store (FTS merge + VACUUM)…")
|
||||
try:
|
||||
# vacuum() merges FTS5 segments (optimize_fts) then VACUUMs.
|
||||
# Probe the index count first for the summary line.
|
||||
n = sum(
|
||||
1
|
||||
for t in db._FTS_TABLES
|
||||
if db._fts_table_exists(t)
|
||||
)
|
||||
db.vacuum()
|
||||
except Exception as e:
|
||||
print(f"Error: optimization failed: {e}")
|
||||
db.close()
|
||||
return
|
||||
after_mb = (
|
||||
os.path.getsize(db_path) / (1024 * 1024)
|
||||
if db_path.exists()
|
||||
else 0.0
|
||||
)
|
||||
saved = before_mb - after_mb
|
||||
print(f"Optimized {n} FTS index(es).")
|
||||
print(
|
||||
f"Database size: {before_mb:.1f} MB -> {after_mb:.1f} MB "
|
||||
f"(reclaimed {saved:.1f} MB)"
|
||||
)
|
||||
|
||||
elif action == "stats":
|
||||
total = db.session_count()
|
||||
msgs = db.message_count()
|
||||
|
||||
@@ -3116,58 +3116,6 @@ class SessionDB:
|
||||
|
||||
# ── Space reclamation ──
|
||||
|
||||
# FTS5 virtual tables whose b-tree segments we merge on optimize. The
|
||||
# trigram table is created lazily / may be disabled, so we probe before
|
||||
# touching it (see optimize_fts).
|
||||
_FTS_TABLES = ("messages_fts", "messages_fts_trigram")
|
||||
|
||||
def _fts_table_exists(self, name: str) -> bool:
|
||||
"""True if an FTS5 virtual table is queryable in this DB."""
|
||||
try:
|
||||
self._conn.execute(f"SELECT 1 FROM {name} LIMIT 0")
|
||||
return True
|
||||
except sqlite3.OperationalError:
|
||||
return False
|
||||
|
||||
def optimize_fts(self) -> int:
|
||||
"""Merge fragmented FTS5 b-tree segments into one per index.
|
||||
|
||||
FTS5 indexes grow as a series of incremental segments — one per
|
||||
``INSERT`` batch driven by the message triggers. Over tens of
|
||||
thousands of messages these segments accumulate, which both bloats
|
||||
the ``*_data`` shadow tables and slows ``MATCH`` queries that must
|
||||
scan every segment. The special ``'optimize'`` command rewrites each
|
||||
index as a single merged segment.
|
||||
|
||||
This is purely a maintenance operation — it changes neither search
|
||||
results nor ``snippet()`` output, only on-disk layout and query
|
||||
speed. It is complementary to VACUUM: ``optimize`` compacts the FTS
|
||||
index internally, then VACUUM returns the freed pages to the OS.
|
||||
|
||||
Skips any FTS table that does not exist (e.g. the trigram index when
|
||||
disabled via ``HERMES_DISABLE_FTS_TRIGRAM`` or not yet created), so
|
||||
it is safe to call unconditionally.
|
||||
|
||||
Returns the number of FTS indexes that were optimized.
|
||||
"""
|
||||
optimized = 0
|
||||
with self._lock:
|
||||
for tbl in self._FTS_TABLES:
|
||||
if not self._fts_table_exists(tbl):
|
||||
continue
|
||||
try:
|
||||
# The column name in the INSERT must match the table name
|
||||
# for FTS5 special commands.
|
||||
self._conn.execute(
|
||||
f"INSERT INTO {tbl}({tbl}) VALUES('optimize')"
|
||||
)
|
||||
optimized += 1
|
||||
except sqlite3.OperationalError as exc:
|
||||
logger.warning(
|
||||
"FTS optimize failed for %s: %s", tbl, exc
|
||||
)
|
||||
return optimized
|
||||
|
||||
def vacuum(self) -> None:
|
||||
"""Run VACUUM to reclaim disk space after large deletes.
|
||||
|
||||
@@ -3181,17 +3129,7 @@ class SessionDB:
|
||||
exclusive lock, so callers must ensure no other writers are
|
||||
active. Safe to call at startup before the gateway/CLI starts
|
||||
serving traffic.
|
||||
|
||||
FTS5 segments are merged first via :meth:`optimize_fts` so the
|
||||
subsequent VACUUM reclaims the pages freed by the merge. This is a
|
||||
layout-only optimization — search results are unchanged.
|
||||
"""
|
||||
# Merge FTS5 segments before VACUUM so the freed pages are returned
|
||||
# to the OS in the same pass. optimize_fts() manages its own lock.
|
||||
try:
|
||||
self.optimize_fts()
|
||||
except Exception as exc:
|
||||
logger.warning("FTS optimize before VACUUM failed: %s", exc)
|
||||
# VACUUM cannot be executed inside a transaction.
|
||||
with self._lock:
|
||||
# Best-effort WAL checkpoint first, then VACUUM.
|
||||
|
||||
@@ -75,8 +75,17 @@ Config file: `~/.hermes/hindsight/config.json`
|
||||
| `recall_prompt_preamble` | — | Custom preamble for recalled memories in context |
|
||||
| `recall_tags` | — | Tags to filter when searching memories |
|
||||
| `recall_tags_match` | `any` | Tag matching mode: `any` / `all` / `any_strict` / `all_strict` |
|
||||
| `recall_types` | `observation` | Fact types surfaced by recall (both auto-recall and the `hindsight_recall` tool). Comma-separated string or JSON list. **Default narrowed to `observation` only** (see "Behavior change" below). Set to `observation,world,experience` to also include raw facts. |
|
||||
| `auto_recall` | `true` | Automatically recall memories before each turn |
|
||||
|
||||
> **Behavior change — `recall_types` defaults to `observation` only.**
|
||||
>
|
||||
> Previously recall returned all three fact types. It now returns only observations.
|
||||
>
|
||||
> Per [Hindsight's docs](https://hindsight.vectorize.io/developer/observations), observations are the **consolidated** knowledge layer Hindsight builds on top of raw facts: deduplicated beliefs grounded in evidence, refined as new facts arrive, with proof counts and freshness signals. Raw `world` / `experience` facts are the individual supporting evidence that feeds them. For per-turn context injection, observations are denser per token and avoid feeding the model multiple raw facts that one observation already summarizes.
|
||||
>
|
||||
> Restore the broad recall with `"recall_types": "observation,world,experience"` (string or JSON list) in `~/.hermes/hindsight/config.json`. This applies to **both** auto-recall and the `hindsight_recall` tool — both read the same `recall_types` setting (the tool schema has no per-call `types` argument), so narrowing the default narrows both paths.
|
||||
|
||||
### Retain
|
||||
|
||||
| Key | Default | Description |
|
||||
|
||||
@@ -579,7 +579,15 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
# Recall controls
|
||||
self._auto_recall = True
|
||||
self._recall_max_tokens = 4096
|
||||
self._recall_types: list[str] | None = None
|
||||
# Default to observation-only recall. Observations are Hindsight's
|
||||
# consolidated knowledge layer — deduplicated, evidence-grounded
|
||||
# beliefs built from many raw facts, with proof counts and
|
||||
# freshness signals (see hindsight.vectorize.io/developer/observations).
|
||||
# Including raw world/experience facts re-ships the supporting
|
||||
# evidence that observations already summarize, burning the
|
||||
# `recall_max_tokens` budget. Users can restore the broader
|
||||
# recall via the `recall_types` config key.
|
||||
self._recall_types: list[str] = ["observation"]
|
||||
self._recall_prompt_preamble = ""
|
||||
self._recall_max_input_chars = 800
|
||||
|
||||
@@ -856,6 +864,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
{"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"},
|
||||
{"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
|
||||
{"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
|
||||
{"key": "recall_types", "description": "Fact types to surface on recall — applies to both auto-recall and the hindsight_recall tool (comma-separated or list). Defaults to observation-only — observations are Hindsight's consolidated, deduplicated, evidence-grounded knowledge layer; raw world/experience facts are the supporting evidence observations already summarize. Set to e.g. 'observation,world,experience' to also include raw facts.", "default": "observation"},
|
||||
{"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
|
||||
{"key": "auto_retain", "description": "Automatically retain conversation turns", "default": True},
|
||||
{"key": "retain_every_n_turns", "description": "Retain every N turns (1 = every turn)", "default": 1},
|
||||
@@ -1187,7 +1196,17 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
# Recall controls
|
||||
self._auto_recall = self._config.get("auto_recall", True)
|
||||
self._recall_max_tokens = int(self._config.get("recall_max_tokens", 4096))
|
||||
self._recall_types = self._config.get("recall_types") or None
|
||||
# Default narrows recall to observation-only; pass an explicit
|
||||
# `recall_types` list in config.json to broaden (e.g. include
|
||||
# "world" / "experience") or to disable the filter entirely.
|
||||
configured_types = self._config.get("recall_types")
|
||||
if configured_types is None:
|
||||
self._recall_types = ["observation"]
|
||||
elif isinstance(configured_types, str):
|
||||
# Allow comma-separated strings for parity with recall_tags.
|
||||
self._recall_types = [t.strip() for t in configured_types.split(",") if t.strip()]
|
||||
else:
|
||||
self._recall_types = list(configured_types) or ["observation"]
|
||||
self._recall_prompt_preamble = self._config.get("recall_prompt_preamble", "")
|
||||
self._recall_max_input_chars = int(self._config.get("recall_max_input_chars", 800))
|
||||
self._retain_async = self._config.get("retain_async", True)
|
||||
|
||||
@@ -2302,6 +2302,7 @@ class AIAgent:
|
||||
original_user_message: Any,
|
||||
final_response: Any,
|
||||
interrupted: bool,
|
||||
messages: list | None = None,
|
||||
) -> None:
|
||||
"""Mirror a completed turn into external memory providers.
|
||||
|
||||
@@ -2334,9 +2335,13 @@ class AIAgent:
|
||||
if not (self._memory_manager and final_response and original_user_message):
|
||||
return
|
||||
try:
|
||||
sync_kwargs = {"session_id": self.session_id or ""}
|
||||
if messages is not None:
|
||||
sync_kwargs["messages"] = messages
|
||||
self._memory_manager.sync_all(
|
||||
original_user_message, final_response,
|
||||
session_id=self.session_id or "",
|
||||
original_user_message,
|
||||
final_response,
|
||||
**sync_kwargs,
|
||||
)
|
||||
self._memory_manager.queue_prefetch_all(
|
||||
original_user_message,
|
||||
|
||||
@@ -101,6 +101,8 @@ AUTHOR_MAP = {
|
||||
"kronexoi13@gmail.com": "kronexoi",
|
||||
"hua.zhong@kingsmith.com": "vgocoder",
|
||||
"hermes@marian.local": "Schrotti77",
|
||||
"david@memorilabs.ai": "devwdave",
|
||||
"dave@devwdave.com": "devwdave",
|
||||
"1920071390@campus.ouj.ac.jp": "zapabob",
|
||||
"gaia@gaia.local": "jfuenmayor",
|
||||
"jiahuigu@users.noreply.github.com": "Jiahui-Gu",
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.codex_responses_adapter import _normalize_codex_response
|
||||
import pytest
|
||||
|
||||
from agent.codex_responses_adapter import (
|
||||
_format_responses_error,
|
||||
_normalize_codex_response,
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_codex_response_drops_transient_rs_tmp_reasoning_items():
|
||||
@@ -61,3 +66,111 @@ def test_normalize_codex_response_treats_summary_only_reasoning_as_incomplete():
|
||||
assert assistant_message.content == ""
|
||||
assert assistant_message.reasoning == "still thinking"
|
||||
assert assistant_message.codex_reasoning_items is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _format_responses_error — adapted from anomalyco/opencode#28757.
|
||||
# Provider failures should surface BOTH the code (rate_limit_exceeded /
|
||||
# context_length_exceeded / internal_error / server_error) and the message,
|
||||
# so consumers can tell rate limits apart from context-length failures and
|
||||
# both apart from generic stream drops.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_format_responses_error_combines_code_and_message():
|
||||
err = {"code": "rate_limit_exceeded", "message": "Slow down"}
|
||||
assert _format_responses_error(err, "failed") == "rate_limit_exceeded: Slow down"
|
||||
|
||||
|
||||
def test_format_responses_error_message_only():
|
||||
err = {"message": "Upstream model unavailable"}
|
||||
assert _format_responses_error(err, "failed") == "Upstream model unavailable"
|
||||
|
||||
|
||||
def test_format_responses_error_code_only_when_message_empty():
|
||||
# Some providers/proxies emit a code with an empty message body. We
|
||||
# used to fall back to ``str(error_obj)`` — a dict dump — which leaked
|
||||
# ``{'code': 'internal_error', 'message': ''}`` into chat output. Now
|
||||
# the bare code is surfaced, which is the meaningful field.
|
||||
err = {"code": "internal_error", "message": ""}
|
||||
assert _format_responses_error(err, "failed") == "internal_error"
|
||||
|
||||
|
||||
def test_format_responses_error_code_only_when_message_missing():
|
||||
err = {"code": "server_error"}
|
||||
assert _format_responses_error(err, "failed") == "server_error"
|
||||
|
||||
|
||||
def test_format_responses_error_attribute_style_payload():
|
||||
# SDK objects expose ``code``/``message`` as attributes rather than dict
|
||||
# keys. The helper must accept both shapes since the Responses SDK
|
||||
# returns SimpleNamespace-style objects on ``response.failed``.
|
||||
err = SimpleNamespace(code="context_length_exceeded", message="too long")
|
||||
assert _format_responses_error(err, "failed") == "context_length_exceeded: too long"
|
||||
|
||||
|
||||
def test_format_responses_error_falls_back_to_status_when_empty():
|
||||
assert (
|
||||
_format_responses_error(None, "failed")
|
||||
== "Responses API returned status 'failed'"
|
||||
)
|
||||
assert (
|
||||
_format_responses_error(None, "cancelled")
|
||||
== "Responses API returned status 'cancelled'"
|
||||
)
|
||||
|
||||
|
||||
def test_format_responses_error_stringifies_opaque_payload():
|
||||
# Last-resort: a provider sent something that isn't a dict and has no
|
||||
# code/message attributes. Surface its repr rather than swallow it
|
||||
# silently — at least it's visible in logs.
|
||||
assert _format_responses_error("opaque sentinel", "failed") == "opaque sentinel"
|
||||
|
||||
|
||||
def test_format_responses_error_ignores_non_string_code_message():
|
||||
# Defensive: a malformed gateway could send numbers/objects in these
|
||||
# fields. We don't want to crash; we want a best-effort string.
|
||||
err = {"code": 500, "message": None}
|
||||
assert _format_responses_error(err, "failed") == "500"
|
||||
|
||||
|
||||
def test_normalize_codex_response_failed_includes_code_in_error():
|
||||
"""Regression: response_status == 'failed' should surface the error
|
||||
code, not just the message. Used to leak a bare 'Slow down' string
|
||||
that was indistinguishable from a generic stream truncation."""
|
||||
# ``output`` non-empty so we don't trip the "no output items" guard
|
||||
# before reaching the failed-status branch. Real failed responses
|
||||
# often DO carry a partial message item alongside the error.
|
||||
response = SimpleNamespace(
|
||||
status="failed",
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="incomplete",
|
||||
content=[SimpleNamespace(type="output_text", text="partial")],
|
||||
),
|
||||
],
|
||||
error={"code": "rate_limit_exceeded", "message": "Slow down"},
|
||||
)
|
||||
with pytest.raises(RuntimeError, match=r"^rate_limit_exceeded: Slow down$"):
|
||||
_normalize_codex_response(response)
|
||||
|
||||
|
||||
def test_normalize_codex_response_failed_with_message_only():
|
||||
"""Backwards-compat: a failed response with only a message field
|
||||
(no code) should still surface that message verbatim."""
|
||||
response = SimpleNamespace(
|
||||
status="failed",
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="incomplete",
|
||||
content=[SimpleNamespace(type="output_text", text="partial")],
|
||||
),
|
||||
],
|
||||
error={"message": "model error"},
|
||||
)
|
||||
with pytest.raises(RuntimeError, match=r"^model error$"):
|
||||
_normalize_codex_response(response)
|
||||
|
||||
@@ -84,6 +84,13 @@ class MetadataMemoryProvider(FakeMemoryProvider):
|
||||
self.memory_writes.append((action, target, content, metadata or {}))
|
||||
|
||||
|
||||
class MessagesMemoryProvider(FakeMemoryProvider):
|
||||
"""Provider that opts into completed-turn message context."""
|
||||
|
||||
def sync_turn(self, user_content, assistant_content, *, session_id="", messages=None):
|
||||
self.synced_turns.append((user_content, assistant_content, session_id, messages))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MemoryProvider ABC tests
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -236,6 +243,28 @@ class TestMemoryManager:
|
||||
assert p1.synced_turns == [("user msg", "assistant msg")]
|
||||
assert p2.synced_turns == [("user msg", "assistant msg")]
|
||||
|
||||
def test_sync_all_passes_messages_to_opted_in_provider(self):
|
||||
mgr = MemoryManager()
|
||||
p = MessagesMemoryProvider("external")
|
||||
mgr.add_provider(p)
|
||||
messages = [
|
||||
{"role": "assistant", "tool_calls": [{"id": "call-1"}]},
|
||||
{"role": "tool", "tool_call_id": "call-1", "content": "ok"},
|
||||
]
|
||||
|
||||
mgr.sync_all("user msg", "assistant msg", session_id="sess-1", messages=messages)
|
||||
|
||||
assert p.synced_turns == [("user msg", "assistant msg", "sess-1", messages)]
|
||||
|
||||
def test_sync_all_omits_messages_for_legacy_provider(self):
|
||||
mgr = MemoryManager()
|
||||
p = FakeMemoryProvider("external")
|
||||
mgr.add_provider(p)
|
||||
|
||||
mgr.sync_all("user msg", "assistant msg", messages=[{"role": "tool"}])
|
||||
|
||||
assert p.synced_turns == [("user msg", "assistant msg")]
|
||||
|
||||
def test_sync_failure_doesnt_block_others(self):
|
||||
"""If one provider's sync fails, others still run."""
|
||||
mgr = MemoryManager()
|
||||
|
||||
@@ -12,6 +12,7 @@ the realistic runtime context. See the conftest module docstring.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
@@ -201,3 +202,106 @@ def test_dashboard_restarts_after_crash(
|
||||
raise AssertionError(
|
||||
f"Dashboard not restarted after kill (first_pid={first_pid})"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OAuth auth-gate behaviour — regression guard for the dashboard-insecure
|
||||
# auto-injection bug. Pre-fix, the s6 run script appended `--insecure`
|
||||
# whenever `HERMES_DASHBOARD_HOST` was non-loopback, silently disabling
|
||||
# the OAuth gate on every container-deployed dashboard. The matching
|
||||
# static-text guard lives in tests/test_docker_home_override_scripts.py;
|
||||
# this is the behavioural end-to-end check.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _fetch_api_status(container: str, *, deadline_s: float = 60.0) -> dict:
|
||||
"""Poll ``/api/status`` from inside the container via the venv python.
|
||||
|
||||
The dashboard binds to ``HERMES_DASHBOARD_HOST`` (typically ``0.0.0.0``)
|
||||
so loopback inside the container works. The image doesn't ship
|
||||
``curl`` but Python's stdlib ``urllib`` is good enough.
|
||||
|
||||
Returns the decoded JSON dict on success; raises AssertionError on
|
||||
timeout.
|
||||
"""
|
||||
probe = (
|
||||
"/opt/hermes/.venv/bin/python -c "
|
||||
"'import json,urllib.request as u;"
|
||||
"print(u.urlopen(\"http://127.0.0.1:9119/api/status\",timeout=5)"
|
||||
".read().decode())'"
|
||||
)
|
||||
end = time.monotonic() + deadline_s
|
||||
last_err = ""
|
||||
while time.monotonic() < end:
|
||||
r = docker_exec_sh(container, probe, timeout=10)
|
||||
if r.returncode == 0 and r.stdout.strip():
|
||||
try:
|
||||
return json.loads(r.stdout)
|
||||
except (ValueError, json.JSONDecodeError) as exc: # noqa: F841
|
||||
last_err = f"json parse: {exc!r} / stdout={r.stdout!r}"
|
||||
else:
|
||||
last_err = f"rc={r.returncode} stderr={r.stderr!r}"
|
||||
time.sleep(0.5)
|
||||
raise AssertionError(
|
||||
f"/api/status never returned valid JSON within {deadline_s}s; "
|
||||
f"last error: {last_err}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_oauth_gate_engages_on_non_loopback_bind(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""The s6 dashboard run script must NOT auto-add ``--insecure`` when the
|
||||
dashboard binds to ``0.0.0.0``. The OAuth auth gate engages on its own
|
||||
when a ``DashboardAuthProvider`` is registered (the bundled nous
|
||||
provider activates whenever ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` is
|
||||
set).
|
||||
|
||||
Regression guard for the wildcard-subdomain rollout where every
|
||||
portal-provisioned agent binds ``0.0.0.0`` and relies on the OAuth
|
||||
gate to authenticate browser callers. Before this fix, the run script
|
||||
flipped ``--insecure`` on for any non-loopback bind, which routed
|
||||
``start_server`` straight back into the legacy ``allow_public=True``
|
||||
branch and disabled the gate every time.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
|
||||
"-e", "HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
status = _fetch_api_status(container_name)
|
||||
assert status.get("auth_required") is True, (
|
||||
"OAuth gate must be engaged on 0.0.0.0 bind when a provider is "
|
||||
"registered and HERMES_DASHBOARD_INSECURE is unset. Got: "
|
||||
f"{status!r}"
|
||||
)
|
||||
assert "nous" in status.get("auth_providers", []), (
|
||||
"Bundled dashboard_auth/nous provider should register when "
|
||||
f"HERMES_DASHBOARD_OAUTH_CLIENT_ID is set. Got: {status!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_insecure_env_var_opts_out_of_gate(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""``HERMES_DASHBOARD_INSECURE=1`` re-enables the legacy no-gate mode
|
||||
for operators running on trusted LANs behind a reverse proxy without
|
||||
the OAuth contract. Same opt-out shape as the rest of the s6 boolean
|
||||
envs (``HERMES_DASHBOARD``, ``HERMES_DASHBOARD_TUI``).
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1",
|
||||
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
|
||||
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
status = _fetch_api_status(container_name)
|
||||
assert status.get("auth_required") is False, (
|
||||
"HERMES_DASHBOARD_INSECURE=1 must disable the auth gate (explicit "
|
||||
f"opt-in for trusted-LAN deployments). Got: {status!r}"
|
||||
)
|
||||
|
||||
@@ -197,10 +197,32 @@ class TestConfig:
|
||||
assert provider._recall_max_input_chars == 800
|
||||
assert provider._tags is None
|
||||
assert provider._recall_tags is None
|
||||
# Default recall narrowed to observation-only; world/experience are
|
||||
# aggregate facts that often crowd out concrete-event signal during
|
||||
# auto-recall. Users opt back in via the recall_types config key.
|
||||
assert provider._recall_types == ["observation"]
|
||||
assert provider._bank_mission == ""
|
||||
assert provider._bank_retain_mission is None
|
||||
assert provider._retain_context == "conversation between Hermes Agent and the User"
|
||||
|
||||
def test_recall_types_default_is_observation_only(self, provider):
|
||||
"""Auto-recall must filter to observation by default."""
|
||||
assert provider._recall_types == ["observation"]
|
||||
|
||||
def test_recall_types_explicit_list_overrides_default(self, provider_with_config):
|
||||
p = provider_with_config(recall_types=["world", "experience", "observation"])
|
||||
assert p._recall_types == ["world", "experience", "observation"]
|
||||
|
||||
def test_recall_types_csv_string_accepted(self, provider_with_config):
|
||||
"""For parity with recall_tags, comma-separated strings work too."""
|
||||
p = provider_with_config(recall_types="observation, world")
|
||||
assert p._recall_types == ["observation", "world"]
|
||||
|
||||
def test_recall_types_empty_list_falls_back_to_default(self, provider_with_config):
|
||||
"""An empty list shouldn't disable the filter (would be wider than default)."""
|
||||
p = provider_with_config(recall_types=[])
|
||||
assert p._recall_types == ["observation"]
|
||||
|
||||
def test_custom_config_values(self, provider_with_config):
|
||||
p = provider_with_config(
|
||||
retain_tags=["tag1", "tag2"],
|
||||
|
||||
@@ -91,6 +91,45 @@ class TestSyncExternalMemoryForTurn:
|
||||
session_id="test_session_001",
|
||||
)
|
||||
|
||||
def test_completed_turn_syncs_messages_when_present(self):
|
||||
agent = _bare_agent()
|
||||
messages = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call-1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "terminal",
|
||||
"arguments": "{\"command\":\"pytest\"}",
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"name": "terminal",
|
||||
"tool_call_id": "call-1",
|
||||
"content": "final Hermes-processed output",
|
||||
}
|
||||
]
|
||||
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="run tests",
|
||||
final_response="tests passed",
|
||||
interrupted=False,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
agent._memory_manager.sync_all.assert_called_once_with(
|
||||
"run tests",
|
||||
"tests passed",
|
||||
session_id="test_session_001",
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
# --- Edge cases (pre-existing behaviour preserved) ------------------
|
||||
|
||||
def test_no_final_response_skips(self):
|
||||
|
||||
@@ -13,3 +13,36 @@ def test_dashboard_run_resets_home_before_dropping_privileges() -> None:
|
||||
assert "#!/command/with-contenv sh" in text
|
||||
assert "export HOME=/opt/data" in text
|
||||
assert "exec s6-setuidgid hermes hermes dashboard" in text
|
||||
|
||||
|
||||
def test_dashboard_run_does_not_derive_insecure_from_bind_host() -> None:
|
||||
"""The s6 dashboard run script MUST NOT auto-add ``--insecure`` based on
|
||||
``HERMES_DASHBOARD_HOST``. Doing so disables the OAuth auth gate on
|
||||
every non-loopback bind even when an auth provider is registered —
|
||||
the exact regression that exposed every wildcard-subdomain agent
|
||||
dashboard publicly until early 2026.
|
||||
|
||||
The opt-in is now explicit: ``HERMES_DASHBOARD_INSECURE=1`` (truthy).
|
||||
The auth gate is the authority on whether non-loopback binds are safe.
|
||||
"""
|
||||
text = DASHBOARD_RUN.read_text(encoding="utf-8")
|
||||
|
||||
# No legacy host-derived flip.
|
||||
assert '127.0.0.1|localhost' not in text, (
|
||||
"Run script still derives --insecure from the bind host. The gate "
|
||||
"is the authority now — opt in via HERMES_DASHBOARD_INSECURE instead."
|
||||
)
|
||||
assert 'case "$dash_host" in' not in text, (
|
||||
"Legacy host-derived --insecure case-statement is back."
|
||||
)
|
||||
|
||||
# New opt-in env var present.
|
||||
assert "HERMES_DASHBOARD_INSECURE" in text, (
|
||||
"Explicit HERMES_DASHBOARD_INSECURE opt-in is missing."
|
||||
)
|
||||
# Truthy values aligned with the rest of the s6 scripts
|
||||
# (HERMES_DASHBOARD, HERMES_DASHBOARD_TUI).
|
||||
for truthy in ("1", "true", "TRUE", "True", "yes", "YES", "Yes"):
|
||||
assert truthy in text, (
|
||||
f"HERMES_DASHBOARD_INSECURE should accept truthy value {truthy!r}"
|
||||
)
|
||||
|
||||
@@ -2679,64 +2679,6 @@ class TestVacuum:
|
||||
db.vacuum()
|
||||
|
||||
|
||||
class TestOptimizeFts:
|
||||
def test_optimize_returns_index_count(self, db):
|
||||
"""A fresh DB has both FTS indexes; optimize merges both."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message(session_id="s1", role="user", content="hello world")
|
||||
assert db.optimize_fts() == 2
|
||||
|
||||
def test_optimize_preserves_search_and_snippet(self, db):
|
||||
"""Optimize is layout-only: MATCH results + snippets are unchanged."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
for i in range(50):
|
||||
db.append_message(
|
||||
session_id="s1",
|
||||
role="user",
|
||||
content=f"needle alpha bravo charlie message {i}",
|
||||
)
|
||||
before = db.search_messages("needle")
|
||||
n = db.optimize_fts()
|
||||
assert n == 2
|
||||
after = db.search_messages("needle")
|
||||
assert len(after) == len(before)
|
||||
assert len(after) > 0
|
||||
# Snippet must still be populated (would be empty/None if the FTS
|
||||
# content shadow were lost during optimize).
|
||||
assert all(row.get("snippet") for row in after)
|
||||
# IDs and snippets are identical before/after — pure layout change.
|
||||
assert [r["id"] for r in after] == [r["id"] for r in before]
|
||||
assert [r["snippet"] for r in after] == [r["snippet"] for r in before]
|
||||
|
||||
def test_optimize_skips_missing_trigram_table(self, db):
|
||||
"""When the trigram index is absent, optimize handles only the porter
|
||||
index and does not raise."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message(session_id="s1", role="user", content="hello")
|
||||
# Drop the trigram table + triggers to simulate a disabled/absent index.
|
||||
with db._lock:
|
||||
for trig in (
|
||||
"messages_fts_trigram_insert",
|
||||
"messages_fts_trigram_delete",
|
||||
"messages_fts_trigram_update",
|
||||
):
|
||||
db._conn.execute(f"DROP TRIGGER IF EXISTS {trig}")
|
||||
db._conn.execute("DROP TABLE IF EXISTS messages_fts_trigram")
|
||||
assert db._fts_table_exists("messages_fts_trigram") is False
|
||||
assert db._fts_table_exists("messages_fts") is True
|
||||
# Only the porter index remains -> 1 optimized, no error.
|
||||
assert db.optimize_fts() == 1
|
||||
|
||||
def test_optimize_idempotent(self, db):
|
||||
"""Running optimize twice is safe (second pass is a no-op merge)."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message(session_id="s1", role="user", content="repeat me")
|
||||
assert db.optimize_fts() == 2
|
||||
assert db.optimize_fts() == 2
|
||||
# Search still works after repeated optimization.
|
||||
assert len(db.search_messages("repeat")) == 1
|
||||
|
||||
|
||||
class TestAutoMaintenance:
|
||||
def _make_old_ended(self, db, sid: str, days_old: int = 100):
|
||||
"""Create a session that is ended and was started `days_old` days ago."""
|
||||
|
||||
@@ -154,10 +154,10 @@ hooks:
|
||||
**`sync_turn()` MUST be non-blocking.** If your backend has latency (API calls, LLM processing), run the work in a daemon thread:
|
||||
|
||||
```python
|
||||
def sync_turn(self, user_content, assistant_content):
|
||||
def sync_turn(self, user_content, assistant_content, *, session_id="", messages=None):
|
||||
def _sync():
|
||||
try:
|
||||
self._api.ingest(user_content, assistant_content)
|
||||
self._api.ingest(user_content, assistant_content, session_id=session_id, messages=messages)
|
||||
except Exception as e:
|
||||
logger.warning("Sync failed: %s", e)
|
||||
|
||||
@@ -167,6 +167,16 @@ def sync_turn(self, user_content, assistant_content):
|
||||
self._sync_thread.start()
|
||||
```
|
||||
|
||||
`messages` is optional OpenAI-style conversation context as of the completed
|
||||
turn. When present, it includes user/assistant messages, assistant tool calls,
|
||||
and tool result messages. Providers that do not need raw turn context can omit
|
||||
the `messages` parameter; Hermes will continue calling them with the legacy
|
||||
signature.
|
||||
|
||||
Cloud providers should document what parts of `messages` are sent off-device.
|
||||
Tool calls and tool results may contain file paths, command output, or other
|
||||
workspace data.
|
||||
|
||||
## Profile Isolation
|
||||
|
||||
All storage paths **must** use the `hermes_home` kwarg from `initialize()`, not hardcoded `~/.hermes`:
|
||||
|
||||
@@ -227,6 +227,7 @@ Options:
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--all` | On `start` / `restart` / `stop`: act on **every profile's** gateway, not just the active `HERMES_HOME`. Useful if you run multiple profiles side-by-side and want to restart them all after `hermes update`. |
|
||||
| `--no-supervise` | On `run`: inside the s6-overlay Docker image, opt out of auto-supervision and use pre-s6 foreground semantics — gateway runs as the container's main process with no auto-restart. No-op outside the s6 image. Equivalent to setting `HERMES_GATEWAY_NO_SUPERVISE=1`. |
|
||||
|
||||
:::tip WSL users
|
||||
Use `hermes gateway run` instead of `hermes gateway start` — WSL's systemd support is unreliable. Wrap it in tmux for persistence: `tmux new -s hermes 'hermes gateway run'`. See [WSL FAQ](/reference/faq#wsl-gateway-keeps-disconnecting-or-hermes-gateway-start-fails) for details.
|
||||
|
||||
@@ -518,6 +518,7 @@ Advanced per-platform knobs for throttling the outbound message batcher. Most us
|
||||
| `HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT` | Per-platform connect timeout during gateway startup (seconds). |
|
||||
| `HERMES_GATEWAY_BUSY_INPUT_MODE` | Default gateway busy-input behavior: `queue`, `steer`, or `interrupt`. Can be overridden per chat with `/busy`. |
|
||||
| `HERMES_GATEWAY_BUSY_ACK_ENABLED` | Whether the gateway sends an acknowledgment message (⚡/⏳/⏩) when a user sends input while the agent is busy (default: `true`). Set to `false` to suppress these messages entirely — the input is still queued/steered/interrupts as normal, only the chat reply is silenced. Bridged from `display.busy_ack_enabled` in `config.yaml`. |
|
||||
| `HERMES_GATEWAY_NO_SUPERVISE` | Inside the s6-overlay Docker image, opt out of auto-supervision when running `hermes gateway run` and use pre-s6 foreground semantics (no auto-restart, gateway is the container's main process). Truthy values: `1`, `true`, `yes`. Equivalent to the `--no-supervise` CLI flag. No-op outside the s6 image. |
|
||||
| `HERMES_FILE_MUTATION_VERIFIER` | Enable the per-turn file-mutation verifier footer (default: `true`). When enabled, Hermes appends an advisory listing any `write_file` / `patch` calls that failed during the turn and were not superseded by a successful write. Set to `0`, `false`, `no`, or `off` to suppress. Mirrors `display.file_mutation_verifier` in `config.yaml`; the env var wins when set. |
|
||||
| `HERMES_CRON_TIMEOUT` | Inactivity timeout for cron job agent runs in seconds (default: `600`). The agent can run indefinitely while actively calling tools or receiving stream tokens — this only triggers when idle. Set to `0` for unlimited. |
|
||||
| `HERMES_CRON_SCRIPT_TIMEOUT` | Timeout for pre-run scripts attached to cron jobs in seconds (default: `120`). Override for scripts that need longer execution (e.g., randomized delays for anti-bot timing). Also configurable via `cron.script_timeout_seconds` in `config.yaml`. |
|
||||
|
||||
@@ -101,8 +101,29 @@ The entrypoint starts `hermes dashboard` in the background (running as the non-r
|
||||
| `HERMES_DASHBOARD_HOST` | Bind address for the dashboard HTTP server | `127.0.0.1` |
|
||||
| `HERMES_DASHBOARD_PORT` | Port for the dashboard HTTP server | `9119` |
|
||||
| `HERMES_DASHBOARD_TUI` | Set to `1` to expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket) | *(unset)* |
|
||||
| `HERMES_DASHBOARD_INSECURE` | Set to `1` (or `true` / `yes`) to bind without the OAuth auth gate. Only use on trusted networks behind a reverse proxy without the OAuth contract — the dashboard exposes API keys and session data | *(unset — gate enforced when a `DashboardAuthProvider` is registered)* |
|
||||
|
||||
By default, the dashboard stays on loopback to avoid exposing the unauthenticated web surface over the network. To publish it intentionally, set `HERMES_DASHBOARD_HOST=0.0.0.0` and configure your own trusted network boundary/reverse proxy. In that case you must explicitly add `--insecure` behavior by passing host/flags in your command path (the entrypoint no longer auto-enables insecure mode).
|
||||
By default, the dashboard stays on loopback (`127.0.0.1`) to avoid exposing
|
||||
the web surface over the network. To publish it intentionally, set
|
||||
`HERMES_DASHBOARD_HOST=0.0.0.0`. The dashboard's OAuth auth gate engages
|
||||
automatically whenever:
|
||||
|
||||
1. The bind host is non-loopback, **and**
|
||||
2. A `DashboardAuthProvider` plugin is registered.
|
||||
|
||||
The bundled `dashboard_auth/nous` provider activates whenever
|
||||
`HERMES_DASHBOARD_OAUTH_CLIENT_ID` is set (see
|
||||
[Web Dashboard → Authentication](features/web-dashboard.md)). With the
|
||||
gate engaged, browser callers are redirected to the configured portal's
|
||||
OAuth flow before they can reach any protected route.
|
||||
|
||||
If no provider is registered and the bind is non-loopback, the dashboard
|
||||
**fails closed at startup** with a specific error pointing at the
|
||||
missing env var. To opt out of the gate explicitly — for a trusted-LAN
|
||||
deployment behind your own reverse proxy without the OAuth contract —
|
||||
set `HERMES_DASHBOARD_INSECURE=1`. This re-enables the legacy "no auth,
|
||||
loud warning" mode and is the only path that disables the gate; the bind
|
||||
host does not implicitly determine `--insecure` anymore.
|
||||
|
||||
:::note
|
||||
The dashboard runs as a supervised s6 service inside the container. If
|
||||
|
||||
@@ -520,6 +520,27 @@ echo 'SUPERMEMORY_API_KEY=***' >> ~/.hermes/.env
|
||||
|
||||
**Support:** [Discord](https://supermemory.link/discord) · [support@supermemory.com](mailto:support@supermemory.com)
|
||||
|
||||
### Memori
|
||||
|
||||
Structured long-term memory using Memori Cloud, with background completed-turn capture, tool-aware turn context, and explicit recall tools for facts, summaries, quota, signup, and feedback.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
| **Best for** | Agent-controlled recall with structured project and session attribution |
|
||||
| **Requires** | `pip install hermes-memori` + `hermes-memori install` + [Memori API key](https://app.memorilabs.ai/signup) |
|
||||
| **Data storage** | Memori Cloud |
|
||||
| **Cost** | Memori pricing |
|
||||
|
||||
**Tools:** `memori_recall` (search long-term memory), `memori_recall_summary` (summarized context), `memori_quota` (usage/quota), `memori_signup` (request signup email), `memori_feedback` (send integration feedback)
|
||||
|
||||
**Setup:**
|
||||
```bash
|
||||
pip install hermes-memori
|
||||
hermes-memori install
|
||||
hermes config set memory.provider memori
|
||||
hermes memory setup
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Provider Comparison
|
||||
@@ -534,6 +555,7 @@ echo 'SUPERMEMORY_API_KEY=***' >> ~/.hermes/.env
|
||||
| **RetainDB** | Cloud | $20/mo | 5 | `requests` | Delta compression |
|
||||
| **ByteRover** | Local/Cloud | Free/Paid | 3 | `brv` CLI | Pre-compression extraction |
|
||||
| **Supermemory** | Cloud | Paid | 4 | `supermemory` | Context fencing + session graph ingest + multi-container |
|
||||
| **Memori** | Cloud | Free/Paid | 5 | `hermes-memori` | Tool-aware memory + structured recall |
|
||||
|
||||
## Profile Isolation
|
||||
|
||||
|
||||
@@ -224,6 +224,7 @@ hermes gateway <subcommand>
|
||||
| 选项 | 说明 |
|
||||
|--------|-------------|
|
||||
| `--all` | 在 `start` / `restart` / `stop` 时:对**每个 profile** 的 gateway 执行操作,而不仅限于活跃的 `HERMES_HOME`。当你并行运行多个 profile 并希望在 `hermes update` 后全部重启时很有用。 |
|
||||
| `--no-supervise` | 在 `run` 时:在 s6-overlay Docker 镜像内部,跳过 s6 自动监管,退回到 pre-s6 前台语义——gateway 作为容器主进程运行,无自动重启。在 s6 镜像之外为空操作。等同于设置 `HERMES_GATEWAY_NO_SUPERVISE=1`。 |
|
||||
|
||||
:::tip WSL 用户
|
||||
使用 `hermes gateway run` 而非 `hermes gateway start`——WSL 的 systemd 支持不稳定。用 tmux 包裹以保持持久运行:`tmux new -s hermes 'hermes gateway run'`。详见 [WSL FAQ](/reference/faq#wsl-gateway-keeps-disconnecting-or-hermes-gateway-start-fails)。
|
||||
|
||||
@@ -518,6 +518,7 @@ Graph 事件(Teams 会议、日历、聊天等)的入站变更通知监听
|
||||
| `HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT` | gateway 启动期间每个平台的连接超时(秒)。 |
|
||||
| `HERMES_GATEWAY_BUSY_INPUT_MODE` | 默认 gateway 繁忙输入行为:`queue`、`steer` 或 `interrupt`。可通过 `/busy` 按聊天覆盖。 |
|
||||
| `HERMES_GATEWAY_BUSY_ACK_ENABLED` | gateway 是否在用户 agent 繁忙时发送确认消息(⚡/⏳/⏩)(默认:`true`)。设为 `false` 可完全抑制这些消息——输入仍会正常排队/引导/中断,只是聊天回复被静默。从 `config.yaml` 中的 `display.busy_ack_enabled` 桥接。 |
|
||||
| `HERMES_GATEWAY_NO_SUPERVISE` | 在 s6-overlay Docker 镜像内部运行 `hermes gateway run` 时跳过 s6 自动监管,退回到 pre-s6 前台语义(无自动重启,gateway 作为容器主进程)。真值:`1`、`true`、`yes`。等同于 `--no-supervise` CLI 标志。在 s6 镜像之外为空操作。 |
|
||||
| `HERMES_FILE_MUTATION_VERIFIER` | 启用每轮文件变更验证器页脚(默认:`true`)。启用后,Hermes 附加一个建议列表,列出本轮中失败且未被成功写入覆盖的 `write_file`/`patch` 调用。设为 `0`、`false`、`no` 或 `off` 可抑制。镜像 `config.yaml` 中的 `display.file_mutation_verifier`;设置时环境变量优先。 |
|
||||
| `HERMES_CRON_TIMEOUT` | cron 任务 agent 运行的不活动超时(秒,默认:`600`)。agent 在主动调用工具或接收流 token 时可无限运行——仅在空闲时触发。设为 `0` 表示无限制。 |
|
||||
| `HERMES_CRON_SCRIPT_TIMEOUT` | cron 任务附加的预运行脚本超时(秒,默认:`120`)。对需要更长执行时间的脚本(例如随机延迟的反机器人计时)可增大此值。也可通过 `config.yaml` 中的 `cron.script_timeout_seconds` 配置。 |
|
||||
|
||||
@@ -80,8 +80,28 @@ docker run -d \
|
||||
| `HERMES_DASHBOARD_HOST` | dashboard HTTP 服务器的绑定地址 | `127.0.0.1` |
|
||||
| `HERMES_DASHBOARD_PORT` | dashboard HTTP 服务器的端口 | `9119` |
|
||||
| `HERMES_DASHBOARD_TUI` | 设为 `1` 以启用浏览器内 Chat 标签页(通过 PTY/WebSocket 嵌入 `hermes --tui`) | *(未设置)* |
|
||||
| `HERMES_DASHBOARD_INSECURE` | 设为 `1`(或 `true` / `yes`)以在不启用 OAuth 鉴权门控的情况下绑定。仅在可信网络(且通过没有 OAuth 契约的反向代理时)使用——dashboard 会暴露 API 密钥与会话数据 | *(未设置——当注册了 `DashboardAuthProvider` 时启用门控)* |
|
||||
|
||||
默认情况下,dashboard 保持在回环地址,以避免将未经身份验证的 Web 界面暴露到网络。若要有意发布,请设置 `HERMES_DASHBOARD_HOST=0.0.0.0` 并配置你自己的可信网络边界/反向代理。在这种情况下,你必须通过命令路径中的 host/flags 显式添加 `--insecure` 行为(入口点不再自动启用不安全模式)。
|
||||
默认情况下,dashboard 保持在回环地址(`127.0.0.1`),以避免将
|
||||
Web 界面暴露到网络。若要有意发布,请设置
|
||||
`HERMES_DASHBOARD_HOST=0.0.0.0`。当以下两项同时满足时,
|
||||
dashboard 的 OAuth 鉴权门控会自动启用:
|
||||
|
||||
1. 绑定地址为非回环地址,**且**
|
||||
2. 注册了一个 `DashboardAuthProvider` 插件。
|
||||
|
||||
捆绑的 `dashboard_auth/nous` 提供者会在设置
|
||||
`HERMES_DASHBOARD_OAUTH_CLIENT_ID` 时自动激活(参见
|
||||
[Web Dashboard → 鉴权](features/web-dashboard.md))。门控启用后,
|
||||
浏览器调用方会先被重定向到所配置门户的 OAuth 流,然后才能
|
||||
访问任何受保护路由。
|
||||
|
||||
如果未注册提供者且绑定为非回环地址,dashboard **会在启动时
|
||||
失败关闭**,并给出指向缺失环境变量的具体错误信息。要显式
|
||||
退出门控——用于不使用 OAuth 契约、通过你自己的反向代理部署
|
||||
在可信局域网中的场景——请设置 `HERMES_DASHBOARD_INSECURE=1`。
|
||||
这会恢复旧的“无鉴权,但发出告警”模式,也是唯一可以禁用门控的
|
||||
路径;绑定地址不再隐式决定 `--insecure`。
|
||||
|
||||
:::note
|
||||
dashboard 在容器内作为受监管的 s6 服务运行。如果
|
||||
|
||||
Reference in New Issue
Block a user