Compare commits

..

1 Commits

Author SHA1 Message Date
kshitijk4poor 42aeb4ecac fix(dashboard): include cache tokens in totals, track real API call count
The analytics dashboard had three accuracy issues:

1. TOTAL TOKENS excluded cache_read and cache_write tokens — only counted
   the non-cached input portion. With 90%+ cache hit rates typical in
   Hermes, this dramatically undercounted actual token usage (e.g. showing
   9.1M when the real total was 169M+).

2. The 'API Calls' card displayed session count (COUNT(*) from sessions
   table), not actual LLM API requests. A single session makes 10-90 API
   calls through the tool loop, so this was ~30x lower than reality.

3. cache_write_tokens was stored in the DB but never exposed through the
   analytics API endpoint or frontend.

Changes:
- Add api_call_count column to sessions table (schema v7 migration)
- Persist api_call_count=1 per LLM API call in run_agent.py
- Analytics SQL queries now include cache_write_tokens and api_call_count
  in daily, by_model, and totals aggregations
- Frontend TOTAL TOKENS card now shows input + cache_read + cache_write +
  output (the full prompt total + output)
- API CALLS card now uses real api_call_count from DB
- New Cache Hit Rate card shows cache efficiency percentage
- Bar chart, tooltips, daily table, model table all use prompt totals
  (input + cache_read + cache_write) instead of just input
- Labels changed from 'Input' to 'Prompt' to reflect the full prompt total
- TypeScript interfaces and i18n strings updated (en + zh)
2026-04-15 12:31:05 +05:30
42 changed files with 193 additions and 998 deletions
+1 -3
View File
@@ -12,8 +12,6 @@ from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
@@ -110,7 +108,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
if not resolved:
return
lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"]
lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
for key, value in resolved.items():
display_val = str(value) if value else "(not set)"
lines.append(f" {key} = {display_val}")
+10 -13
View File
@@ -4588,19 +4588,16 @@ class HermesCLI:
self._close_model_picker()
return
provider_data = providers[selected]
# Use the curated model list from list_authenticated_providers()
# (same lists as `hermes model` and gateway pickers).
# Only fall back to the live provider catalog when the curated
# list is empty (e.g. user-defined endpoints with no curated list).
model_list = provider_data.get("models", [])
model_list = []
try:
from hermes_cli.models import provider_model_ids
live = provider_model_ids(provider_data["slug"])
if live:
model_list = live
except Exception:
pass
if not model_list:
try:
from hermes_cli.models import provider_model_ids
live = provider_model_ids(provider_data["slug"])
if live:
model_list = live
except Exception:
pass
model_list = provider_data.get("models", [])
state["stage"] = "model"
state["provider_data"] = provider_data
state["model_list"] = model_list
@@ -5956,7 +5953,7 @@ class HermesCLI:
parts = cmd.strip().split(None, 1)
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
_DEFAULT_CDP = "http://127.0.0.1:9222"
_DEFAULT_CDP = "http://localhost:9222"
current = os.environ.get("BROWSER_CDP_URL", "").strip()
if sub.startswith("connect"):
+1 -3
View File
@@ -288,13 +288,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
if wrap_response:
task_name = job.get("name", job["id"])
job_id = job.get("id", "")
delivery_content = (
f"Cronjob Response: {task_name}\n"
f"(job_id: {job_id})\n"
f"-------------\n\n"
f"{content}\n\n"
f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")."
f"Note: The agent cannot see this message, and therefore cannot respond to it."
)
else:
delivery_content = content
-15
View File
@@ -1624,21 +1624,6 @@ class BasePlatformAdapter(ABC):
# streaming already delivered the text (already_sent=True) or
# when the message was queued behind an active agent. Log at
# DEBUG to avoid noisy warnings for expected behavior.
#
# Suppress stale response when the session was interrupted by a
# new message that hasn't been consumed yet. The pending message
# is processed by the pending-message handler below (#8221/#2483).
if (
response
and interrupt_event.is_set()
and session_key in self._pending_messages
):
logger.info(
"[%s] Suppressing stale response for interrupted session %s",
self.name,
session_key,
)
response = None
if not response:
logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
if response:
-62
View File
@@ -1379,68 +1379,6 @@ class DiscordAdapter(BasePlatformAdapter):
)
return await super().send_image(chat_id, image_url, caption, reply_to)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send an animated GIF natively as a Discord file attachment."""
if not self._client:
return SendResult(success=False, error="Not connected")
if not is_safe_url(animation_url):
logger.warning("[%s] Blocked unsafe animation URL during Discord send_animation", self.name)
return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
try:
import aiohttp
channel = self._client.get_channel(int(chat_id))
if not channel:
channel = await self._client.fetch_channel(int(chat_id))
if not channel:
return SendResult(success=False, error=f"Channel {chat_id} not found")
# Download the GIF and send as a Discord file attachment
# (Discord renders .gif attachments as auto-playing animations inline)
from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
_proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
async with aiohttp.ClientSession(**_sess_kw) as session:
async with session.get(animation_url, timeout=aiohttp.ClientTimeout(total=30), **_req_kw) as resp:
if resp.status != 200:
raise Exception(f"Failed to download animation: HTTP {resp.status}")
animation_data = await resp.read()
import io
file = discord.File(io.BytesIO(animation_data), filename="animation.gif")
msg = await channel.send(
content=caption if caption else None,
file=file,
)
return SendResult(success=True, message_id=str(msg.id))
except ImportError:
logger.warning(
"[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp",
self.name,
exc_info=True,
)
return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
except Exception as e: # pragma: no cover - defensive logging
logger.error(
"[%s] Failed to send animation attachment, falling back to URL: %s",
self.name,
e,
exc_info=True,
)
return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
async def send_video(
self,
chat_id: str,
-8
View File
@@ -729,14 +729,6 @@ class MatrixAdapter(BasePlatformAdapter):
except Exception:
pass
async def stop_typing(self, chat_id: str) -> None:
"""Stop the Matrix typing indicator."""
if self._client:
try:
await self._client.set_typing(RoomID(chat_id), timeout=0)
except Exception:
pass
async def edit_message(
self, chat_id: str, message_id: str, content: str
) -> SendResult:
+10 -2
View File
@@ -9231,11 +9231,15 @@ class GatewayRunner:
pass
except Exception as e:
logger.debug("Stream consumer wait before queued message failed: %s", e)
_response_previewed = bool(result.get("response_previewed"))
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
or (
_response_previewed
and getattr(_sc, "already_sent", False)
)
)
)
first_response = result.get("final_response", "")
@@ -9319,9 +9323,13 @@ class GatewayRunner:
# them even if streaming had sent earlier partial output.
_sc = stream_consumer_holder[0]
if _sc and isinstance(response, dict) and not response.get("failed"):
_response_previewed = bool(response.get("response_previewed"))
if (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
or (
_response_previewed
and getattr(_sc, "already_sent", False)
)
):
response["already_sent"] = True
-1
View File
@@ -167,7 +167,6 @@ def _resolve_runtime_from_pool_entry(
api_mode = "chat_completions"
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Honour model.base_url from config.yaml when the configured provider
+13 -5
View File
@@ -1977,7 +1977,8 @@ async def update_config_raw(body: RawConfigUpdate):
@app.get("/api/analytics/usage")
async def get_usage_analytics(days: int = 30):
from hermes_state import SessionDB
db = SessionDB()
from hermes_constants import get_hermes_home
db = SessionDB(db_path=get_hermes_home() / "state.db")
try:
cutoff = time.time() - (days * 86400)
cur = db._conn.execute("""
@@ -1985,10 +1986,12 @@ async def get_usage_analytics(days: int = 30):
SUM(input_tokens) as input_tokens,
SUM(output_tokens) as output_tokens,
SUM(cache_read_tokens) as cache_read_tokens,
SUM(cache_write_tokens) as cache_write_tokens,
SUM(reasoning_tokens) as reasoning_tokens,
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
COUNT(*) as sessions
COUNT(*) as sessions,
SUM(COALESCE(api_call_count, 0)) as api_calls
FROM sessions WHERE started_at > ?
GROUP BY day ORDER BY day
""", (cutoff,))
@@ -1998,10 +2001,13 @@ async def get_usage_analytics(days: int = 30):
SELECT model,
SUM(input_tokens) as input_tokens,
SUM(output_tokens) as output_tokens,
SUM(cache_read_tokens) as cache_read_tokens,
SUM(cache_write_tokens) as cache_write_tokens,
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
COUNT(*) as sessions
COUNT(*) as sessions,
SUM(COALESCE(api_call_count, 0)) as api_calls
FROM sessions WHERE started_at > ? AND model IS NOT NULL
GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
GROUP BY model ORDER BY SUM(input_tokens) + SUM(cache_read_tokens) + SUM(cache_write_tokens) + SUM(output_tokens) DESC
""", (cutoff,))
by_model = [dict(r) for r in cur2.fetchall()]
@@ -2009,10 +2015,12 @@ async def get_usage_analytics(days: int = 30):
SELECT SUM(input_tokens) as total_input,
SUM(output_tokens) as total_output,
SUM(cache_read_tokens) as total_cache_read,
SUM(cache_write_tokens) as total_cache_write,
SUM(reasoning_tokens) as total_reasoning,
COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
COUNT(*) as total_sessions
COUNT(*) as total_sessions,
SUM(COALESCE(api_call_count, 0)) as total_api_calls
FROM sessions WHERE started_at > ?
""", (cutoff,))
totals = dict(cur3.fetchone())
+19 -3
View File
@@ -31,7 +31,7 @@ T = TypeVar("T")
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
SCHEMA_VERSION = 6
SCHEMA_VERSION = 7
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS schema_version (
@@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions (
cost_source TEXT,
pricing_version TEXT,
title TEXT,
api_call_count INTEGER DEFAULT 0,
FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
);
@@ -329,6 +330,17 @@ class SessionDB:
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 6")
if current_version < 7:
# v7: add api_call_count column to sessions — tracks the number
# of individual LLM API calls made within a session (as opposed
# to the session count itself).
try:
cursor.execute(
'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
)
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 7")
# Unique title index — always ensure it exists (safe to run after migrations
# since the title column is guaranteed to exist at this point)
@@ -426,6 +438,7 @@ class SessionDB:
billing_provider: Optional[str] = None,
billing_base_url: Optional[str] = None,
billing_mode: Optional[str] = None,
api_call_count: int = 0,
absolute: bool = False,
) -> None:
"""Update token counters and backfill model if not already set.
@@ -455,7 +468,8 @@ class SessionDB:
billing_provider = COALESCE(billing_provider, ?),
billing_base_url = COALESCE(billing_base_url, ?),
billing_mode = COALESCE(billing_mode, ?),
model = COALESCE(model, ?)
model = COALESCE(model, ?),
api_call_count = ?
WHERE id = ?"""
else:
sql = """UPDATE sessions SET
@@ -475,7 +489,8 @@ class SessionDB:
billing_provider = COALESCE(billing_provider, ?),
billing_base_url = COALESCE(billing_base_url, ?),
billing_mode = COALESCE(billing_mode, ?),
model = COALESCE(model, ?)
model = COALESCE(model, ?),
api_call_count = COALESCE(api_call_count, 0) + ?
WHERE id = ?"""
params = (
input_tokens,
@@ -493,6 +508,7 @@ class SessionDB:
billing_base_url,
billing_mode,
model,
api_call_count,
session_id,
)
def _do(conn):
+4 -10
View File
@@ -3589,12 +3589,7 @@ class AIAgent:
item_id = ri.get("id")
if item_id and item_id in seen_item_ids:
continue
# Strip the "id" field — with store=False the
# Responses API cannot look up items by ID and
# returns 404. The encrypted_content blob is
# self-contained for reasoning chain continuity.
replay_item = {k: v for k, v in ri.items() if k != "id"}
items.append(replay_item)
items.append(ri)
if item_id:
seen_item_ids.add(item_id)
has_codex_reasoning = True
@@ -3735,10 +3730,8 @@ class AIAgent:
continue
seen_ids.add(item_id)
reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
# Do NOT include the "id" in the outgoing item — with
# store=False (our default) the API tries to resolve the
# id server-side and returns 404. The id is still used
# above for local deduplication via seen_ids.
if isinstance(item_id, str) and item_id:
reasoning_item["id"] = item_id
summary = item.get("summary")
if isinstance(summary, list):
reasoning_item["summary"] = summary
@@ -8924,6 +8917,7 @@ class AIAgent:
billing_mode="subscription_included"
if cost_result.status == "included" else None,
model=self.model,
api_call_count=1,
)
except Exception:
pass # never block the agent loop
@@ -98,7 +98,7 @@ def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, li
# Get coordinates (nodes have lat/lon directly, ways/relations use center)
plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
if plat is None or plon is None:
if not plat or not plon:
continue
dist = haversine(lat, lon, plat, plon)
@@ -25,13 +25,6 @@ def refresh_token(token_data: dict) -> dict:
import urllib.parse
import urllib.request
required_keys = ["client_id", "client_secret", "refresh_token", "token_uri"]
missing = [k for k in required_keys if k not in token_data]
if missing:
print(f"ERROR: google_token.json is missing required fields: {', '.join(missing)}", file=sys.stderr)
print("Please re-authenticate by running the Google Workspace setup script.", file=sys.stderr)
sys.exit(1)
params = urllib.parse.urlencode({
"client_id": token_data["client_id"],
"client_secret": token_data["client_secret"],
@@ -232,7 +232,7 @@ class TestResolveVisionProviderClientModelNormalization:
assert provider == "zai"
assert client is not None
assert model == "glm-5v-turbo" # zai has dedicated vision model in _PROVIDER_VISION_MODELS
assert model == "glm-5.1"
class TestVisionPathApiMode:
+1 -2
View File
@@ -233,10 +233,9 @@ class TestDeliverResultWrapping:
send_mock.assert_called_once()
sent_content = send_mock.call_args.kwargs.get("content") or send_mock.call_args[0][-1]
assert "Cronjob Response: daily-report" in sent_content
assert "(job_id: test-job)" in sent_content
assert "-------------" in sent_content
assert "Here is today's summary." in sent_content
assert "To stop or manage this job" in sent_content
assert "The agent cannot see this message" in sent_content
def test_delivery_uses_job_id_when_no_name(self):
"""When a job has no name, the wrapper should fall back to job id."""
-66
View File
@@ -1,66 +0,0 @@
"""Shared fixtures for gateway tests.
The ``_ensure_telegram_mock`` helper guarantees that a minimal mock of
the ``telegram`` package is registered in :data:`sys.modules` **before**
any test file triggers ``from gateway.platforms.telegram import ...``.
Without this, ``pytest-xdist`` workers that happen to collect
``test_telegram_caption_merge.py`` (bare top-level import, no per-file
mock) first will cache ``ChatType = None`` from the production
ImportError fallback, causing 30+ downstream test failures wherever
``ChatType.GROUP`` / ``ChatType.SUPERGROUP`` is accessed.
Individual test files may still call their own ``_ensure_telegram_mock``
it short-circuits when the mock is already present.
"""
import sys
from unittest.mock import MagicMock
def _ensure_telegram_mock() -> None:
"""Install a comprehensive telegram mock in sys.modules.
Idempotent skips when the real library is already imported.
Uses ``sys.modules[name] = mod`` (overwrite) instead of
``setdefault`` so it wins even if a partial/broken import
already cached a module with ``ChatType = None``.
"""
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
return # Real library is installed — nothing to mock
mod = MagicMock()
mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
mod.constants.ParseMode.MARKDOWN = "Markdown"
mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
mod.constants.ParseMode.HTML = "HTML"
mod.constants.ChatType.PRIVATE = "private"
mod.constants.ChatType.GROUP = "group"
mod.constants.ChatType.SUPERGROUP = "supergroup"
mod.constants.ChatType.CHANNEL = "channel"
# Real exception classes so ``except (NetworkError, ...)`` clauses
# in production code don't blow up with TypeError.
mod.error.NetworkError = type("NetworkError", (OSError,), {})
mod.error.TimedOut = type("TimedOut", (OSError,), {})
mod.error.BadRequest = type("BadRequest", (Exception,), {})
mod.error.Forbidden = type("Forbidden", (Exception,), {})
mod.error.InvalidToken = type("InvalidToken", (Exception,), {})
mod.error.RetryAfter = type("RetryAfter", (Exception,), {"retry_after": 1})
mod.error.Conflict = type("Conflict", (Exception,), {})
# Update.ALL_TYPES used in start_polling()
mod.Update.ALL_TYPES = []
for name in (
"telegram",
"telegram.ext",
"telegram.constants",
"telegram.request",
):
sys.modules[name] = mod
sys.modules["telegram.error"] = mod.error
# Run at collection time — before any test file's module-level imports.
_ensure_telegram_mock()
@@ -1,291 +0,0 @@
"""Tests for duplicate reply suppression across the gateway stack.
Covers three fix paths:
1. base.py: stale response suppressed when interrupt_event is set and a
pending message exists (#8221 / #2483)
2. run.py return path: already_sent propagated from stream consumer's
already_sent flag without requiring response_previewed (#8375)
3. run.py queued-message path: first response correctly detected as
already-streamed when already_sent is True without response_previewed
"""
import asyncio
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
ProcessingOutcome,
SendResult,
)
from gateway.session import SessionSource, build_session_key
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
class StubAdapter(BasePlatformAdapter):
"""Minimal concrete adapter for testing."""
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="fake"), Platform.DISCORD)
self.sent = []
async def connect(self):
return True
async def disconnect(self):
pass
async def send(self, chat_id, content, reply_to=None, metadata=None):
self.sent.append({"chat_id": chat_id, "content": content})
return SendResult(success=True, message_id="msg1")
async def send_typing(self, chat_id, metadata=None):
pass
async def get_chat_info(self, chat_id):
return {"id": chat_id}
def _make_event(text="hello", chat_id="c1", user_id="u1"):
return MessageEvent(
text=text,
source=SessionSource(
platform=Platform.DISCORD,
chat_id=chat_id,
chat_type="dm",
user_id=user_id,
),
message_id="m1",
)
# ===================================================================
# Test 1: base.py — stale response suppressed on interrupt (#8221)
# ===================================================================
class TestBaseInterruptSuppression:
@pytest.mark.asyncio
async def test_stale_response_suppressed_when_interrupted(self):
"""When interrupt_event is set AND a pending message exists,
base.py should suppress the stale response instead of sending it."""
adapter = StubAdapter()
stale_response = "This is the stale answer to the first question."
pending_response = "This is the answer to the second question."
call_count = 0
async def fake_handler(event):
nonlocal call_count
call_count += 1
if call_count == 1:
return stale_response
return pending_response
adapter.set_message_handler(fake_handler)
event_a = _make_event(text="first question")
session_key = build_session_key(event_a.source)
# Simulate: message A is being processed, message B arrives
# The interrupt event is set and B is in pending_messages
interrupt_event = asyncio.Event()
interrupt_event.set()
adapter._active_sessions[session_key] = interrupt_event
event_b = _make_event(text="second question")
adapter._pending_messages[session_key] = event_b
await adapter._process_message_background(event_a, session_key)
# The stale response should NOT have been sent.
stale_sends = [s for s in adapter.sent if s["content"] == stale_response]
assert len(stale_sends) == 0, (
f"Stale response was sent {len(stale_sends)} time(s) — should be suppressed"
)
# The pending message's response SHOULD have been sent.
pending_sends = [s for s in adapter.sent if s["content"] == pending_response]
assert len(pending_sends) == 1, "Pending message response should be sent"
@pytest.mark.asyncio
async def test_response_not_suppressed_without_interrupt(self):
"""Normal case: no interrupt, response should be sent."""
adapter = StubAdapter()
async def fake_handler(event):
return "Normal response"
adapter.set_message_handler(fake_handler)
event = _make_event()
session_key = build_session_key(event.source)
await adapter._process_message_background(event, session_key)
assert any(s["content"] == "Normal response" for s in adapter.sent)
@pytest.mark.asyncio
async def test_response_not_suppressed_with_interrupt_but_no_pending(self):
"""Interrupt event set but no pending message (race already resolved) —
response should still be sent."""
adapter = StubAdapter()
async def fake_handler(event):
return "Valid response"
adapter.set_message_handler(fake_handler)
event = _make_event()
session_key = build_session_key(event.source)
# Set interrupt but no pending message
interrupt_event = asyncio.Event()
interrupt_event.set()
adapter._active_sessions[session_key] = interrupt_event
await adapter._process_message_background(event, session_key)
assert any(s["content"] == "Valid response" for s in adapter.sent)
# ===================================================================
# Test 2: run.py — already_sent without response_previewed (#8375)
# ===================================================================
class TestAlreadySentWithoutResponsePreviewed:
"""The already_sent flag on the response dict should be set when the
stream consumer's already_sent is True, even if response_previewed is
False. This prevents duplicate sends when streaming was interrupted
by flood control."""
def _make_mock_stream_consumer(self, already_sent=False, final_response_sent=False):
sc = SimpleNamespace(
already_sent=already_sent,
final_response_sent=final_response_sent,
)
return sc
def test_already_sent_set_without_response_previewed(self):
"""Stream consumer already_sent=True should propagate to response
dict even when response_previewed is False."""
sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
response = {"final_response": "text", "response_previewed": False}
# Reproduce the logic from run.py return path (post-fix)
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
response["already_sent"] = True
assert response.get("already_sent") is True
def test_already_sent_not_set_when_nothing_sent(self):
"""When stream consumer hasn't sent anything, already_sent should
not be set on the response."""
sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=False)
response = {"final_response": "text", "response_previewed": False}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
response["already_sent"] = True
assert "already_sent" not in response
def test_already_sent_set_on_final_response_sent(self):
"""final_response_sent=True should still work as before."""
sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=True)
response = {"final_response": "text"}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
response["already_sent"] = True
assert response.get("already_sent") is True
def test_already_sent_not_set_on_failed_response(self):
"""Failed responses should never be suppressed — user needs to see
the error message even if streaming sent earlier partial output."""
sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
response = {"final_response": "Error: something broke", "failed": True}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
response["already_sent"] = True
assert "already_sent" not in response
# ===================================================================
# Test 3: run.py queued-message path — _already_streamed detection
# ===================================================================
class TestQueuedMessageAlreadyStreamed:
"""The queued-message path should detect that the first response was
already streamed (already_sent=True) even without response_previewed."""
def _make_mock_sc(self, already_sent=False, final_response_sent=False):
return SimpleNamespace(
already_sent=already_sent,
final_response_sent=final_response_sent,
)
def test_queued_path_detects_already_streamed(self):
"""already_sent=True on stream consumer means first response was
streamed skip re-sending before processing queued message."""
_sc = self._make_mock_sc(already_sent=True)
# Reproduce the queued-message logic from run.py (post-fix)
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
)
assert _already_streamed is True
def test_queued_path_sends_when_not_streamed(self):
"""Nothing was streamed — first response should be sent before
processing the queued message."""
_sc = self._make_mock_sc(already_sent=False)
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
)
assert _already_streamed is False
def test_queued_path_with_no_stream_consumer(self):
"""No stream consumer at all (streaming disabled) — not streamed."""
_sc = None
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
)
assert _already_streamed is False
+1 -23
View File
@@ -335,29 +335,6 @@ def _make_adapter():
return adapter
# ---------------------------------------------------------------------------
# Typing indicator
# ---------------------------------------------------------------------------
class TestMatrixTypingIndicator:
def setup_method(self):
self.adapter = _make_adapter()
self.adapter._client = MagicMock()
self.adapter._client.set_typing = AsyncMock()
@pytest.mark.asyncio
async def test_stop_typing_clears_matrix_typing_state(self):
"""stop_typing() should send typing=false instead of waiting for timeout expiry."""
from gateway.platforms.matrix import RoomID
await self.adapter.stop_typing("!room:example.org")
self.adapter._client.set_typing.assert_awaited_once_with(
RoomID("!room:example.org"),
timeout=0,
)
# ---------------------------------------------------------------------------
# mxc:// URL conversion
# ---------------------------------------------------------------------------
@@ -1854,3 +1831,4 @@ class TestMatrixPresence:
assert result is False
-4
View File
@@ -613,7 +613,6 @@ class TestDetectVenvDir:
# Not inside a virtualenv
monkeypatch.setattr("sys.prefix", "/usr")
monkeypatch.setattr("sys.base_prefix", "/usr")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
dot_venv = tmp_path / ".venv"
@@ -625,7 +624,6 @@ class TestDetectVenvDir:
def test_falls_back_to_venv_directory(self, tmp_path, monkeypatch):
monkeypatch.setattr("sys.prefix", "/usr")
monkeypatch.setattr("sys.base_prefix", "/usr")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
venv = tmp_path / "venv"
@@ -637,7 +635,6 @@ class TestDetectVenvDir:
def test_prefers_dot_venv_over_venv(self, tmp_path, monkeypatch):
monkeypatch.setattr("sys.prefix", "/usr")
monkeypatch.setattr("sys.base_prefix", "/usr")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
(tmp_path / ".venv").mkdir()
@@ -649,7 +646,6 @@ class TestDetectVenvDir:
def test_returns_none_when_no_virtualenv(self, tmp_path, monkeypatch):
monkeypatch.setattr("sys.prefix", "/usr")
monkeypatch.setattr("sys.base_prefix", "/usr")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
result = gateway_cli._detect_venv_dir()
+2
View File
@@ -694,6 +694,8 @@ class TestNewEndpoints:
assert "totals" in data
assert isinstance(data["daily"], list)
assert "total_sessions" in data["totals"]
assert "total_cache_write" in data["totals"]
assert "total_api_calls" in data["totals"]
def test_session_token_endpoint_removed(self):
"""GET /api/auth/session-token no longer exists."""
@@ -9,8 +9,6 @@ def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-
if custom_providers is not None:
cfg["custom_providers"] = custom_providers
base_url = model_cfg.get("base_url", "")
with (
patch("hermes_cli.config.load_config", return_value=cfg),
patch("agent.model_metadata.get_model_context_length", return_value=128_000),
@@ -23,7 +21,6 @@ def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-
agent = AIAgent(
model=model,
api_key="test-key-1234567890",
base_url=base_url,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
+1 -4
View File
@@ -805,10 +805,7 @@ class TestCodexReasoningPreflight:
reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
assert len(reasoning_items) == 1
assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
# Note: "id" is intentionally excluded from normalized output —
# with store=False the API returns 404 on server-side id resolution.
# The id is only used for local deduplication via seen_ids.
assert "id" not in reasoning_items[0]
assert reasoning_items[0]["id"] == "r_001"
assert reasoning_items[0]["summary"] == [{"type": "summary_text", "text": "Thinking about it"}]
def test_reasoning_item_without_id(self, monkeypatch):
@@ -1249,17 +1249,13 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
]
items = agent._chat_messages_to_responses_input(messages)
reasoning_items = [it for it in items if it.get("type") == "reasoning"]
# Dedup: rs_aaa appears in both turns but should only be emitted once.
# 3 unique items total: enc_1 (from rs_aaa), enc_2 (rs_bbb), enc_3 (rs_ccc).
assert len(reasoning_items) == 3
encrypted = [it["encrypted_content"] for it in reasoning_items]
assert encrypted.count("enc_1") == 1
assert "enc_2" in encrypted
assert "enc_3" in encrypted
# IDs must be stripped — with store=False the API 404s on id lookups.
for it in reasoning_items:
assert "id" not in it
reasoning_ids = [it["id"] for it in items if it.get("type") == "reasoning"]
# rs_aaa should appear only once (first occurrence kept)
assert reasoning_ids.count("rs_aaa") == 1
# rs_bbb and rs_ccc should each appear once
assert reasoning_ids.count("rs_bbb") == 1
assert reasoning_ids.count("rs_ccc") == 1
assert len(reasoning_ids) == 3
def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
@@ -1276,11 +1272,7 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
normalized = agent._preflight_codex_input_items(raw_input)
reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
# rs_xyz duplicate should be collapsed to one item; rs_zzz kept.
reasoning_ids = [it["id"] for it in reasoning_items]
assert reasoning_ids.count("rs_xyz") == 1
assert reasoning_ids.count("rs_zzz") == 1
assert len(reasoning_items) == 2
encrypted = [it["encrypted_content"] for it in reasoning_items]
assert encrypted.count("enc_a") == 1
assert "enc_b" in encrypted
# IDs must be stripped — with store=False the API 404s on id lookups.
for it in reasoning_items:
assert "id" not in it
+1 -12
View File
@@ -46,18 +46,9 @@ def api_module(monkeypatch, tmp_path):
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
# Ensure the gws CLI code path is taken even when the binary isn't
# installed (CI). Without this, calendar_list() falls through to the
# Python SDK path which imports ``googleapiclient`` — not in deps.
module._gws_binary = lambda: "/usr/bin/gws"
# Bypass authentication check — no real token file in CI.
module._ensure_authenticated = lambda: None
return module
_gws_installed = importlib.util.find_spec("shutil") and __import__("shutil").which("gws")
def _write_token(path: Path, *, token="ya29.test", expiry=None, **extra):
data = {
"token": token,
@@ -133,14 +124,13 @@ def test_bridge_main_injects_token_env(bridge_module, tmp_path):
assert captured["cmd"] == ["gws", "gmail", "+triage"]
@pytest.mark.skipif(not _gws_installed, reason="gws CLI not installed")
def test_api_calendar_list_uses_agenda_by_default(api_module):
"""calendar list without dates uses +agenda helper."""
captured = {}
def capture_run(cmd, **kwargs):
captured["cmd"] = cmd
return MagicMock(returncode=0, stdout="{}", stderr="")
return MagicMock(returncode=0)
args = api_module.argparse.Namespace(
start="", end="", max=25, calendar="primary", func=api_module.calendar_list,
@@ -156,7 +146,6 @@ def test_api_calendar_list_uses_agenda_by_default(api_module):
assert "--days" in gws_args
@pytest.mark.skipif(not _gws_installed, reason="gws CLI not installed")
def test_api_calendar_list_respects_date_range(api_module):
"""calendar list with --start/--end uses raw events list API."""
captured = {}
+29 -2
View File
@@ -62,6 +62,27 @@ class TestSessionLifecycle:
assert session["input_tokens"] == 300
assert session["output_tokens"] == 150
def test_update_token_counts_tracks_api_call_count(self, db):
"""api_call_count increments with each update_token_counts call."""
db.create_session(session_id="s1", source="cli")
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
session = db.get_session("s1")
assert session["api_call_count"] == 3
def test_update_token_counts_api_call_count_absolute(self, db):
"""absolute mode sets api_call_count directly."""
db.create_session(session_id="s1", source="cli")
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
db.update_token_counts("s1", input_tokens=300, output_tokens=150,
api_call_count=5, absolute=True)
session = db.get_session("s1")
assert session["api_call_count"] == 5
assert session["input_tokens"] == 300
def test_update_token_counts_backfills_model_when_null(self, db):
db.create_session(session_id="s1", source="telegram")
db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4")
@@ -935,7 +956,7 @@ class TestSchemaInit:
def test_schema_version(self, db):
cursor = db._conn.execute("SELECT version FROM schema_version")
version = cursor.fetchone()[0]
assert version == 6
assert version == 7
def test_title_column_exists(self, db):
"""Verify the title column was created in the sessions table."""
@@ -996,13 +1017,19 @@ class TestSchemaInit:
# Verify migration
cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
assert cursor.fetchone()[0] == 6
assert cursor.fetchone()[0] == 7
# Verify title column exists and is NULL for existing sessions
session = migrated_db.get_session("existing")
assert session is not None
assert session["title"] is None
# Verify api_call_count column was added with default 0
cursor = migrated_db._conn.execute(
"SELECT api_call_count FROM sessions WHERE id = 'existing'"
)
assert cursor.fetchone()[0] == 0
# Verify we can set title on migrated session
assert migrated_db.set_session_title("existing", "Migrated Title") is True
session = migrated_db.get_session("existing")
@@ -123,7 +123,7 @@ class TestSendMatrix:
session.put.assert_called_once()
call_kwargs = session.put.call_args
url = call_kwargs[0][0]
assert url.startswith("https://matrix.example.com/_matrix/client/v3/rooms/%21room%3Aexample.com/send/m.room.message/")
assert url.startswith("https://matrix.example.com/_matrix/client/v3/rooms/!room:example.com/send/m.room.message/")
assert call_kwargs[1]["headers"]["Authorization"] == "Bearer syt_tok"
payload = call_kwargs[1]["json"]
assert payload["msgtype"] == "m.text"
-254
View File
@@ -752,38 +752,6 @@ class TestParseTargetRefDiscord:
assert is_explicit is True
class TestParseTargetRefMatrix:
"""_parse_target_ref correctly handles Matrix room IDs and user MXIDs."""
def test_matrix_room_id_is_explicit(self):
"""Matrix room IDs (!) are recognized as explicit targets."""
chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "!HLOQwxYGgFPMPJUSNR:matrix.org")
assert chat_id == "!HLOQwxYGgFPMPJUSNR:matrix.org"
assert thread_id is None
assert is_explicit is True
def test_matrix_user_mxid_is_explicit(self):
"""Matrix user MXIDs (@) are recognized as explicit targets."""
chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "@hermes:matrix.org")
assert chat_id == "@hermes:matrix.org"
assert thread_id is None
assert is_explicit is True
def test_matrix_alias_is_not_explicit(self):
"""Matrix room aliases (#) are NOT explicit — they need resolution."""
chat_id, thread_id, is_explicit = _parse_target_ref("matrix", "#general:matrix.org")
assert chat_id is None
assert is_explicit is False
def test_matrix_prefix_only_matches_matrix_platform(self):
"""! and @ prefixes are only treated as explicit for the matrix platform."""
chat_id, _, is_explicit = _parse_target_ref("telegram", "!something")
assert is_explicit is False
chat_id, _, is_explicit = _parse_target_ref("discord", "@someone")
assert is_explicit is False
class TestSendDiscordThreadId:
"""_send_discord uses thread_id when provided."""
@@ -886,225 +854,3 @@ class TestSendToPlatformDiscordThread:
send_mock.assert_awaited_once()
_, call_kwargs = send_mock.await_args
assert call_kwargs["thread_id"] is None
# ---------------------------------------------------------------------------
# Discord media attachment support
# ---------------------------------------------------------------------------
class TestSendDiscordMedia:
"""_send_discord uploads media files via multipart/form-data."""
@staticmethod
def _build_mock(response_status, response_data=None, response_text="error body"):
"""Build a properly-structured aiohttp mock chain."""
mock_resp = MagicMock()
mock_resp.status = response_status
mock_resp.json = AsyncMock(return_value=response_data or {"id": "msg123"})
mock_resp.text = AsyncMock(return_value=response_text)
mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
mock_resp.__aexit__ = AsyncMock(return_value=None)
mock_session = MagicMock()
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
mock_session.post = MagicMock(return_value=mock_resp)
return mock_session, mock_resp
def test_text_and_media_sends_both(self, tmp_path):
"""Text message is sent first, then each media file as multipart."""
img = tmp_path / "photo.png"
img.write_bytes(b"\x89PNG fake image data")
mock_session, _ = self._build_mock(200, {"id": "msg999"})
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "111", "hello", media_files=[(str(img), False)])
)
assert result["success"] is True
assert result["message_id"] == "msg999"
# Two POSTs: one text JSON, one multipart upload
assert mock_session.post.call_count == 2
def test_media_only_skips_text_post(self, tmp_path):
"""When message is empty and media is present, text POST is skipped."""
img = tmp_path / "photo.png"
img.write_bytes(b"\x89PNG fake image data")
mock_session, _ = self._build_mock(200, {"id": "media_only"})
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "222", " ", media_files=[(str(img), False)])
)
assert result["success"] is True
# Only one POST: the media upload (text was whitespace-only)
assert mock_session.post.call_count == 1
def test_missing_media_file_collected_as_warning(self):
"""Non-existent media paths produce warnings but don't fail."""
mock_session, _ = self._build_mock(200, {"id": "txt_ok"})
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "333", "hello", media_files=[("/nonexistent/file.png", False)])
)
assert result["success"] is True
assert "warnings" in result
assert any("not found" in w for w in result["warnings"])
# Only the text POST was made, media was skipped
assert mock_session.post.call_count == 1
def test_media_upload_failure_collected_as_warning(self, tmp_path):
"""Failed media upload becomes a warning, text still succeeds."""
img = tmp_path / "photo.png"
img.write_bytes(b"\x89PNG fake image data")
# First call (text) succeeds, second call (media) returns 413
text_resp = MagicMock()
text_resp.status = 200
text_resp.json = AsyncMock(return_value={"id": "txt_ok"})
text_resp.__aenter__ = AsyncMock(return_value=text_resp)
text_resp.__aexit__ = AsyncMock(return_value=None)
media_resp = MagicMock()
media_resp.status = 413
media_resp.text = AsyncMock(return_value="Request Entity Too Large")
media_resp.__aenter__ = AsyncMock(return_value=media_resp)
media_resp.__aexit__ = AsyncMock(return_value=None)
mock_session = MagicMock()
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
mock_session.post = MagicMock(side_effect=[text_resp, media_resp])
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "444", "hello", media_files=[(str(img), False)])
)
assert result["success"] is True
assert result["message_id"] == "txt_ok"
assert "warnings" in result
assert any("413" in w for w in result["warnings"])
def test_no_text_no_media_returns_error(self):
"""Empty text with no media returns error dict."""
mock_session, _ = self._build_mock(200)
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "555", "", media_files=[])
)
# Text is empty but media_files is empty, so text POST fires
# (the "skip text if media present" condition isn't met)
assert result["success"] is True
def test_multiple_media_files_uploaded_separately(self, tmp_path):
"""Each media file gets its own multipart POST."""
img1 = tmp_path / "a.png"
img1.write_bytes(b"img1")
img2 = tmp_path / "b.jpg"
img2.write_bytes(b"img2")
mock_session, _ = self._build_mock(200, {"id": "last"})
with patch("aiohttp.ClientSession", return_value=mock_session):
result = asyncio.run(
_send_discord("tok", "666", "hi", media_files=[
(str(img1), False), (str(img2), False)
])
)
assert result["success"] is True
# 1 text POST + 2 media POSTs = 3
assert mock_session.post.call_count == 3
class TestSendToPlatformDiscordMedia:
"""_send_to_platform routes Discord media correctly."""
def test_media_files_passed_on_last_chunk_only(self):
"""Discord media_files are only passed on the final chunk."""
call_log = []
async def mock_send_discord(token, chat_id, message, thread_id=None, media_files=None):
call_log.append({"message": message, "media_files": media_files or []})
return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": "1"}
# A message long enough to get chunked (Discord limit is 2000)
long_msg = "A" * 1900 + " " + "B" * 1900
with patch("tools.send_message_tool._send_discord", side_effect=mock_send_discord):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
SimpleNamespace(enabled=True, token="tok", extra={}),
"999",
long_msg,
media_files=[("/fake/img.png", False)],
)
)
assert result["success"] is True
assert len(call_log) == 2 # Message was chunked
assert call_log[0]["media_files"] == [] # First chunk: no media
assert call_log[1]["media_files"] == [("/fake/img.png", False)] # Last chunk: media attached
def test_single_chunk_gets_media(self):
"""Short message (single chunk) gets media_files directly."""
send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
with patch("tools.send_message_tool._send_discord", send_mock):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
SimpleNamespace(enabled=True, token="tok", extra={}),
"888",
"short message",
media_files=[("/fake/img.png", False)],
)
)
assert result["success"] is True
send_mock.assert_awaited_once()
call_kwargs = send_mock.await_args.kwargs
assert call_kwargs["media_files"] == [("/fake/img.png", False)]
class TestSendMatrixUrlEncoding:
"""_send_matrix URL-encodes Matrix room IDs in the API path."""
def test_room_id_is_percent_encoded_in_url(self):
"""Matrix room IDs with ! and : are percent-encoded in the PUT URL."""
import aiohttp
mock_resp = MagicMock()
mock_resp.status = 200
mock_resp.json = AsyncMock(return_value={"event_id": "$evt123"})
mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
mock_resp.__aexit__ = AsyncMock(return_value=None)
mock_session = MagicMock()
mock_session.put = MagicMock(return_value=mock_resp)
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
with patch("aiohttp.ClientSession", return_value=mock_session):
from tools.send_message_tool import _send_matrix
result = asyncio.get_event_loop().run_until_complete(
_send_matrix(
"test_token",
{"homeserver": "https://matrix.example.org"},
"!HLOQwxYGgFPMPJUSNR:matrix.org",
"hello",
)
)
assert result["success"] is True
# Verify the URL was called with percent-encoded room ID
put_url = mock_session.put.call_args[0][0]
assert "%21HLOQwxYGgFPMPJUSNR%3Amatrix.org" in put_url
assert "!HLOQwxYGgFPMPJUSNR:matrix.org" not in put_url
+1 -5
View File
@@ -13,8 +13,6 @@ import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
# Import from cron module (will be available when properly installed)
@@ -393,8 +391,6 @@ Use action='create' to schedule a new job from a prompt or one or more skills.
Use action='list' to inspect jobs.
Use action='update', 'pause', 'resume', 'remove', or 'run' to manage an existing job.
To stop a job the user no longer wants: first action='list' to find the job_id, then action='remove' with that job_id. Never guess job IDs always list first.
Jobs run in a fresh session with no current-chat context, so prompts must be self-contained.
If skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
On update, passing skills=[] clears attached skills.
@@ -457,7 +453,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
},
"script": {
"type": "string",
"description": f"Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under {display_hermes_home()}/scripts/. On update, pass empty string to clear."
"description": "Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under ~/.hermes/scripts/. On update, pass empty string to clear."
},
},
"required": ["action"]
+16 -81
View File
@@ -68,7 +68,7 @@ SEND_MESSAGE_SCHEMA = {
},
"target": {
"type": "string",
"description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or 'platform:chat_id:thread_id' for Telegram topics and Discord threads. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:999888777:555444333', 'discord:#bot-home', 'slack:#engineering', 'signal:+155****4567', 'matrix:!roomid:server.org', 'matrix:@user:server.org'"
"description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or 'platform:chat_id:thread_id' for Telegram topics and Discord threads. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:999888777:555444333', 'discord:#bot-home', 'slack:#engineering', 'signal:+155****4567'"
},
"message": {
"type": "string",
@@ -248,9 +248,6 @@ def _parse_target_ref(platform_name: str, target_ref: str):
return match.group(1), None, True
if target_ref.lstrip("-").isdigit():
return target_ref, None, True
# Matrix room IDs (start with !) and user IDs (start with @) are explicit
if platform_name == "matrix" and (target_ref.startswith("!") or target_ref.startswith("@")):
return target_ref, None, True
return None, None, False
@@ -387,28 +384,11 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
if platform == Platform.WEIXIN:
return await _send_weixin(pconfig, chat_id, message, media_files=media_files)
# --- Discord: special handling for media attachments ---
if platform == Platform.DISCORD:
last_result = None
for i, chunk in enumerate(chunks):
is_last = (i == len(chunks) - 1)
result = await _send_discord(
pconfig.token,
chat_id,
chunk,
media_files=media_files if is_last else [],
thread_id=thread_id,
)
if isinstance(result, dict) and result.get("error"):
return result
last_result = result
return last_result
# --- Non-Telegram/Discord platforms ---
# --- Non-Telegram platforms ---
if media_files and not message.strip():
return {
"error": (
f"send_message MEDIA delivery is currently only supported for telegram, discord, and weixin; "
f"send_message MEDIA delivery is currently only supported for telegram; "
f"target {platform.value} had only media attachments"
)
}
@@ -416,12 +396,14 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
if media_files:
warning = (
f"MEDIA attachments were omitted for {platform.value}; "
"native send_message media delivery is currently only supported for telegram, discord, and weixin"
"native send_message media delivery is currently only supported for telegram"
)
last_result = None
for chunk in chunks:
if platform == Platform.SLACK:
if platform == Platform.DISCORD:
result = await _send_discord(pconfig.token, chat_id, chunk, thread_id=thread_id)
elif platform == Platform.SLACK:
result = await _send_slack(pconfig.token, chat_id, chunk)
elif platform == Platform.WHATSAPP:
result = await _send_whatsapp(pconfig.extra, chat_id, chunk)
@@ -586,16 +568,13 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
return _error(f"Telegram send failed: {e}")
async def _send_discord(token, chat_id, message, thread_id=None, media_files=None):
async def _send_discord(token, chat_id, message, thread_id=None):
"""Send a single message via Discord REST API (no websocket client needed).
Chunking is handled by _send_to_platform() before this is called.
When thread_id is provided, the message is sent directly to that thread
via the /channels/{thread_id}/messages endpoint.
Media files are uploaded one-by-one via multipart/form-data after the
text message is sent (same pattern as Telegram).
"""
try:
import aiohttp
@@ -610,56 +589,14 @@ async def _send_discord(token, chat_id, message, thread_id=None, media_files=Non
url = f"https://discord.com/api/v10/channels/{thread_id}/messages"
else:
url = f"https://discord.com/api/v10/channels/{chat_id}/messages"
auth_headers = {"Authorization": f"Bot {token}"}
media_files = media_files or []
last_data = None
warnings = []
headers = {"Authorization": f"Bot {token}", "Content-Type": "application/json"}
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30), **_sess_kw) as session:
# Send text message (skip if empty and media is present)
if message.strip() or not media_files:
headers = {**auth_headers, "Content-Type": "application/json"}
async with session.post(url, headers=headers, json={"content": message}, **_req_kw) as resp:
if resp.status not in (200, 201):
body = await resp.text()
return _error(f"Discord API error ({resp.status}): {body}")
last_data = await resp.json()
# Send each media file as a separate multipart upload
for media_path, _is_voice in media_files:
if not os.path.exists(media_path):
warning = f"Media file not found, skipping: {media_path}"
logger.warning(warning)
warnings.append(warning)
continue
try:
form = aiohttp.FormData()
filename = os.path.basename(media_path)
with open(media_path, "rb") as f:
form.add_field("files[0]", f, filename=filename)
async with session.post(url, headers=auth_headers, data=form, **_req_kw) as resp:
if resp.status not in (200, 201):
body = await resp.text()
warning = _sanitize_error_text(f"Failed to send media {media_path}: Discord API error ({resp.status}): {body}")
logger.error(warning)
warnings.append(warning)
continue
last_data = await resp.json()
except Exception as e:
warning = _sanitize_error_text(f"Failed to send media {media_path}: {e}")
logger.error(warning)
warnings.append(warning)
if last_data is None:
error = "No deliverable text or media remained after processing"
if warnings:
return {"error": error, "warnings": warnings}
return {"error": error}
result = {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": last_data.get("id")}
if warnings:
result["warnings"] = warnings
return result
async with session.post(url, headers=headers, json={"content": message}, **_req_kw) as resp:
if resp.status not in (200, 201):
body = await resp.text()
return _error(f"Discord API error ({resp.status}): {body}")
data = await resp.json()
return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": data.get("id")}
except Exception as e:
return _error(f"Discord send failed: {e}")
@@ -879,9 +816,7 @@ async def _send_matrix(token, extra, chat_id, message):
if not homeserver or not token:
return {"error": "Matrix not configured (MATRIX_HOMESERVER, MATRIX_ACCESS_TOKEN required)"}
txn_id = f"hermes_{int(time.time() * 1000)}_{os.urandom(4).hex()}"
from urllib.parse import quote
encoded_room = quote(chat_id, safe="")
url = f"{homeserver}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
url = f"{homeserver}/_matrix/client/v3/rooms/{chat_id}/send/m.room.message/{txn_id}"
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
# Build message payload with optional HTML formatted_body.
+2 -2
View File
@@ -39,7 +39,7 @@ import re
import shutil
import tempfile
from pathlib import Path
from hermes_constants import get_hermes_home, display_hermes_home
from hermes_constants import get_hermes_home
from typing import Dict, Any, Optional, Tuple
logger = logging.getLogger(__name__)
@@ -655,7 +655,7 @@ SKILL_MANAGE_SCHEMA = {
"description": (
"Manage skills (create, update, delete). Skills are your procedural "
"memory — reusable approaches for recurring task types. "
f"New skills go to {display_hermes_home()}/skills/; existing skills can be modified wherever they live.\n\n"
"New skills go to ~/.hermes/skills/; existing skills can be modified wherever they live.\n\n"
"Actions: create (full SKILL.md + optional category), "
"patch (old_string/new_string — preferred for fixes), "
"edit (full SKILL.md rewrite — major overhauls only), "
+3 -3
View File
@@ -69,7 +69,7 @@ Usage:
import json
import logging
from hermes_constants import get_hermes_home, display_hermes_home
from hermes_constants import get_hermes_home
import os
import re
from enum import Enum
@@ -408,7 +408,7 @@ def _gateway_setup_hint() -> str:
return GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE
except Exception:
return f"Secure secret entry is not available. Load this skill in the local CLI to be prompted, or add the key to {display_hermes_home()}/.env manually."
return "Secure secret entry is not available. Load this skill in the local CLI to be prompted, or add the key to ~/.hermes/.env manually."
def _build_setup_note(
@@ -666,7 +666,7 @@ def skills_list(category: str = None, task_id: str = None) -> str:
"success": True,
"skills": [],
"categories": [],
"message": f"No skills found. Skills directory created at {display_hermes_home()}/skills/",
"message": "No skills found. Skills directory created at ~/.hermes/skills/",
},
ensure_ascii=False,
)
+1 -3
View File
@@ -40,8 +40,6 @@ from pathlib import Path
from typing import Callable, Dict, Any, Optional
from urllib.parse import urljoin
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
@@ -1052,7 +1050,7 @@ TTS_SCHEMA = {
},
"output_path": {
"type": "string",
"description": f"Optional custom file path to save the audio. Defaults to {display_hermes_home()}/audio_cache/<timestamp>.mp3"
"description": "Optional custom file path to save the audio. Defaults to ~/.hermes/audio_cache/<timestamp>.mp3"
}
},
"required": ["text"]
+3 -1
View File
@@ -112,11 +112,14 @@ export const en: Translations = {
totalTokens: "Total Tokens",
totalSessions: "Total Sessions",
apiCalls: "API Calls",
cacheHitRate: "Cache Hit Rate",
dailyTokenUsage: "Daily Token Usage",
dailyBreakdown: "Daily Breakdown",
perModelBreakdown: "Per-Model Breakdown",
prompt: "Prompt",
input: "Input",
output: "Output",
cached: "cached",
total: "Total",
noUsageData: "No usage data for this period",
startSession: "Start a session to see analytics here",
@@ -125,7 +128,6 @@ export const en: Translations = {
tokens: "Tokens",
perDayAvg: "/day avg",
acrossModels: "across {count} models",
inOut: "{input} in / {output} out",
},
logs: {
+3 -1
View File
@@ -117,11 +117,14 @@ export interface Translations {
totalTokens: string;
totalSessions: string;
apiCalls: string;
cacheHitRate: string;
dailyTokenUsage: string;
dailyBreakdown: string;
perModelBreakdown: string;
prompt: string;
input: string;
output: string;
cached: string;
total: string;
noUsageData: string;
startSession: string;
@@ -130,7 +133,6 @@ export interface Translations {
tokens: string;
perDayAvg: string;
acrossModels: string;
inOut: string;
};
// ── Logs page ──
+3 -1
View File
@@ -112,11 +112,14 @@ export const zh: Translations = {
totalTokens: "总 Token 数",
totalSessions: "总会话数",
apiCalls: "API 调用",
cacheHitRate: "缓存命中率",
dailyTokenUsage: "每日 Token 用量",
dailyBreakdown: "每日明细",
perModelBreakdown: "模型用量明细",
prompt: "提示",
input: "输入",
output: "输出",
cached: "已缓存",
total: "总计",
noUsageData: "该时间段暂无使用数据",
startSession: "开始会话后将在此显示分析数据",
@@ -125,7 +128,6 @@ export const zh: Translations = {
tokens: "Token",
perDayAvg: "/天 平均",
acrossModels: "共 {count} 个模型",
inOut: "输入 {input} / 输出 {output}",
},
logs: {
+7
View File
@@ -269,18 +269,23 @@ export interface AnalyticsDailyEntry {
input_tokens: number;
output_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
reasoning_tokens: number;
estimated_cost: number;
actual_cost: number;
sessions: number;
api_calls: number;
}
export interface AnalyticsModelEntry {
model: string;
input_tokens: number;
output_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
estimated_cost: number;
sessions: number;
api_calls: number;
}
export interface AnalyticsResponse {
@@ -290,10 +295,12 @@ export interface AnalyticsResponse {
total_input: number;
total_output: number;
total_cache_read: number;
total_cache_write: number;
total_reasoning: number;
total_estimated_cost: number;
total_actual_cost: number;
total_sessions: number;
total_api_calls: number;
};
}
+41 -14
View File
@@ -4,6 +4,7 @@ import {
Cpu,
Hash,
TrendingUp,
Zap,
} from "lucide-react";
import { api } from "@/lib/api";
import type { AnalyticsResponse, AnalyticsDailyEntry, AnalyticsModelEntry } from "@/lib/api";
@@ -19,6 +20,11 @@ const PERIODS = [
const CHART_HEIGHT_PX = 160;
/** Compute total prompt tokens (input + cache_read + cache_write). */
function getPromptTokens(d: { input_tokens: number; cache_read_tokens?: number; cache_write_tokens?: number }): number {
return d.input_tokens + (d.cache_read_tokens ?? 0) + (d.cache_write_tokens ?? 0);
}
function formatTokens(n: number): string {
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
@@ -63,7 +69,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
const { t } = useI18n();
if (daily.length === 0) return null;
const maxTokens = Math.max(...daily.map((d) => d.input_tokens + d.output_tokens), 1);
const maxTokens = Math.max(...daily.map((d) => getPromptTokens(d) + d.output_tokens), 1);
return (
<Card>
@@ -75,7 +81,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
<div className="flex items-center gap-4 text-xs text-muted-foreground">
<div className="flex items-center gap-1.5">
<div className="h-2.5 w-2.5 bg-[#ffe6cb]" />
{t.analytics.input}
{t.analytics.prompt}
</div>
<div className="flex items-center gap-1.5">
<div className="h-2.5 w-2.5 bg-emerald-500" />
@@ -86,8 +92,9 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
<CardContent>
<div className="flex items-end gap-[2px]" style={{ height: CHART_HEIGHT_PX }}>
{daily.map((d) => {
const total = d.input_tokens + d.output_tokens;
const inputH = Math.round((d.input_tokens / maxTokens) * CHART_HEIGHT_PX);
const promptTokens = getPromptTokens(d);
const total = promptTokens + d.output_tokens;
const inputH = Math.round((promptTokens / maxTokens) * CHART_HEIGHT_PX);
const outputH = Math.round((d.output_tokens / maxTokens) * CHART_HEIGHT_PX);
return (
<div
@@ -99,7 +106,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
<div className="absolute bottom-full left-1/2 -translate-x-1/2 mb-2 hidden group-hover:block z-10 pointer-events-none">
<div className="bg-card border border-border px-2.5 py-1.5 text-[10px] text-foreground shadow-lg whitespace-nowrap">
<div className="font-medium">{formatDate(d.day)}</div>
<div>{t.analytics.input}: {formatTokens(d.input_tokens)}</div>
<div>{t.analytics.prompt}: {formatTokens(promptTokens)}</div>
<div>{t.analytics.output}: {formatTokens(d.output_tokens)}</div>
<div>{t.analytics.total}: {formatTokens(total)}</div>
</div>
@@ -152,18 +159,19 @@ function DailyTable({ daily }: { daily: AnalyticsDailyEntry[] }) {
<tr className="border-b border-border text-muted-foreground text-xs">
<th className="text-left py-2 pr-4 font-medium">{t.analytics.date}</th>
<th className="text-right py-2 px-4 font-medium">{t.sessions.title}</th>
<th className="text-right py-2 px-4 font-medium">{t.analytics.input}</th>
<th className="text-right py-2 px-4 font-medium">{t.analytics.prompt}</th>
<th className="text-right py-2 pl-4 font-medium">{t.analytics.output}</th>
</tr>
</thead>
<tbody>
{sorted.map((d) => {
const promptTokens = getPromptTokens(d);
return (
<tr key={d.day} className="border-b border-border/50 hover:bg-secondary/20 transition-colors">
<td className="py-2 pr-4 font-medium">{formatDate(d.day)}</td>
<td className="text-right py-2 px-4 text-muted-foreground">{d.sessions}</td>
<td className="text-right py-2 px-4">
<span className="text-[#ffe6cb]">{formatTokens(d.input_tokens)}</span>
<span className="text-[#ffe6cb]">{formatTokens(promptTokens)}</span>
</td>
<td className="text-right py-2 pl-4">
<span className="text-emerald-400">{formatTokens(d.output_tokens)}</span>
@@ -184,7 +192,7 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) {
if (models.length === 0) return null;
const sorted = [...models].sort(
(a, b) => b.input_tokens + b.output_tokens - (a.input_tokens + a.output_tokens),
(a, b) => (getPromptTokens(b) + b.output_tokens) - (getPromptTokens(a) + a.output_tokens),
);
return (
@@ -213,7 +221,7 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) {
</td>
<td className="text-right py-2 px-4 text-muted-foreground">{m.sessions}</td>
<td className="text-right py-2 pl-4">
<span className="text-[#ffe6cb]">{formatTokens(m.input_tokens)}</span>
<span className="text-[#ffe6cb]">{formatTokens(getPromptTokens(m))}</span>
{" / "}
<span className="text-emerald-400">{formatTokens(m.output_tokens)}</span>
</td>
@@ -283,12 +291,17 @@ export default function AnalyticsPage() {
{data && (
<>
{/* Summary cards */}
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-3">
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
<SummaryCard
icon={Hash}
label={t.analytics.totalTokens}
value={formatTokens(data.totals.total_input + data.totals.total_output)}
sub={t.analytics.inOut.replace("{input}", formatTokens(data.totals.total_input)).replace("{output}", formatTokens(data.totals.total_output))}
value={formatTokens(
(data.totals.total_input ?? 0) +
(data.totals.total_cache_read ?? 0) +
(data.totals.total_cache_write ?? 0) +
(data.totals.total_output ?? 0)
)}
sub={`${formatTokens((data.totals.total_input ?? 0) + (data.totals.total_cache_read ?? 0) + (data.totals.total_cache_write ?? 0))} ${t.analytics.prompt} / ${formatTokens(data.totals.total_output ?? 0)} ${t.analytics.output.toLowerCase()}`}
/>
<SummaryCard
icon={BarChart3}
@@ -297,11 +310,25 @@ export default function AnalyticsPage() {
sub={`~${(data.totals.total_sessions / days).toFixed(1)}${t.analytics.perDayAvg}`}
/>
<SummaryCard
icon={TrendingUp}
icon={Zap}
label={t.analytics.apiCalls}
value={String(data.daily.reduce((sum, d) => sum + d.sessions, 0))}
value={String(data.totals.total_api_calls ?? data.daily.reduce((sum, d) => sum + d.sessions, 0))}
sub={t.analytics.acrossModels.replace("{count}", String(data.by_model.length))}
/>
{(() => {
const promptSent = (data.totals.total_input ?? 0) + (data.totals.total_cache_read ?? 0);
const rate = promptSent > 0
? `${((data.totals.total_cache_read ?? 0) / promptSent * 100).toFixed(0)}%`
: "—";
return (
<SummaryCard
icon={TrendingUp}
label={t.analytics.cacheHitRate}
value={rate}
sub={`${formatTokens(data.totals.total_cache_read ?? 0)} ${t.analytics.cached}`}
/>
);
})()}
</div>
{/* Bar chart */}
+1 -20
View File
@@ -49,17 +49,6 @@ The OpenAI Codex provider authenticates via device code (open a URL, enter a cod
Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](/docs/user-guide/configuration#auxiliary-models).
:::
### Two Commands for Model Management
Hermes has **two** model commands that serve different purposes:
| Command | Where to run | What it does |
|---------|-------------|--------------|
| **`hermes model`** | Your terminal (outside any session) | Full setup wizard — add providers, run OAuth, enter API keys, configure endpoints |
| **`/model`** | Inside a Hermes chat session | Quick switch between **already-configured** providers and models |
If you're trying to switch to a provider you haven't set up yet (e.g. you only have OpenRouter configured and want to use Anthropic), you need `hermes model`, not `/model`. Exit your session first (`Ctrl+C` or `/quit`), run `hermes model`, complete the provider setup, then start a new session.
### Anthropic (Native)
Use Claude models directly through the Anthropic API — no OpenRouter proxy needed. Supports three auth methods:
@@ -263,15 +252,7 @@ Both approaches persist to `config.yaml`, which is the source of truth for model
### Switching Models with `/model`
:::warning hermes model vs /model
**`hermes model`** (run from your terminal, outside any chat session) is the **full provider setup wizard**. Use it to add new providers, run OAuth flows, enter API keys, and configure custom endpoints.
**`/model`** (typed inside an active Hermes chat session) can only **switch between providers and models you've already set up**. It cannot add new providers, run OAuth, or prompt for API keys. If you've only configured one provider (e.g. OpenRouter), `/model` will only show models for that provider.
**To add a new provider:** Exit your session (`Ctrl+C` or `/quit`), run `hermes model`, set up the new provider, then start a new session.
:::
Once you have at least one custom endpoint configured, you can switch models mid-session:
Once a custom endpoint is configured, you can switch models mid-session:
```
/model custom:qwen-2.5 # Switch to a model on your custom endpoint
+4 -23
View File
@@ -109,31 +109,22 @@ hermes chat --worktree -q "Review this repo and open a PR"
## `hermes model`
Interactive provider + model selector. **This is the command for adding new providers, setting up API keys, and running OAuth flows.** Run it from your terminal — not from inside an active Hermes chat session.
Interactive provider + model selector.
```bash
hermes model
```
Use this when you want to:
- **add a new provider** (OpenRouter, Anthropic, Copilot, DeepSeek, custom, etc.)
- log into OAuth-backed providers (Anthropic, Copilot, Codex, Nous Portal)
- enter or update API keys
- switch default providers
- log into OAuth-backed providers during model selection
- pick from provider-specific model lists
- configure a custom/self-hosted endpoint
- save the new default into config
:::warning hermes model vs /model — know the difference
**`hermes model`** (run from your terminal, outside any Hermes session) is the **full provider setup wizard**. It can add new providers, run OAuth flows, prompt for API keys, and configure endpoints.
**`/model`** (typed inside an active Hermes chat session) can only **switch between providers and models you've already set up**. It cannot add new providers, run OAuth, or prompt for API keys.
**If you need to add a new provider:** Exit your Hermes session first (`Ctrl+C` or `/quit`), then run `hermes model` from your terminal prompt.
:::
### `/model` slash command (mid-session)
Switch between already-configured models without leaving a session:
Switch models without leaving a session:
```
/model # Show current model and available options
@@ -145,16 +136,6 @@ Switch between already-configured models without leaving a session:
/model openrouter:anthropic/claude-sonnet-4 # Switch back to cloud
```
By default, `/model` changes apply **to the current session only**. Add `--global` to persist the change to `config.yaml`:
```
/model claude-sonnet-4 --global # Switch and save as new default
```
:::info What if I only see OpenRouter models?
If you've only configured OpenRouter, `/model` will only show OpenRouter models. To add another provider (Anthropic, DeepSeek, Copilot, etc.), exit your session and run `hermes model` from the terminal.
:::
Provider and base URL changes are persisted to `config.yaml` automatically. When switching away from a custom endpoint, the stale base URL is cleared to prevent it leaking into other providers.
## `hermes gateway`
-26
View File
@@ -187,32 +187,6 @@ curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scri
### Provider & Model Issues
#### `/model` only shows one provider / can't switch providers
**Cause:** `/model` (inside a chat session) can only switch between providers you've **already configured**. If you've only set up OpenRouter, that's all `/model` will show.
**Solution:** Exit your session and use `hermes model` from your terminal to add new providers:
```bash
# Exit the Hermes chat session first (Ctrl+C or /quit)
# Run the full provider setup wizard
hermes model
# This lets you: add providers, run OAuth, enter API keys, configure endpoints
```
After adding a new provider via `hermes model`, start a new chat session — `/model` will now show all your configured providers.
:::tip Quick reference
| Want to... | Use |
|-----------|-----|
| Add a new provider | `hermes model` (from terminal) |
| Enter/change API keys | `hermes model` (from terminal) |
| Switch model mid-session | `/model <name>` (inside session) |
| Switch to different configured provider | `/model provider:model` (inside session) |
:::
#### API key not working
**Cause:** Key is missing, expired, incorrectly set, or for the wrong provider.
+2 -2
View File
@@ -46,7 +46,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
| Command | Description |
|---------|-------------|
| `/config` | Show current configuration |
| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider, exit the session and run `hermes model` from your terminal. |
| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. |
| `/provider` | Show available providers and current provider |
| `/personality` | Set a predefined personality |
| `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. |
@@ -124,7 +124,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
| `/reset` | Reset conversation history. |
| `/status` | Show session info. |
| `/stop` | Kill all running background processes and interrupt the running agent. |
| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider or set up API keys, use `hermes model` from your terminal (outside the chat session). |
| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. |
| `/provider` | Show provider availability and auth status. |
| `/personality [name]` | Set a personality overlay for the session. |
| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. |
-2
View File
@@ -119,7 +119,6 @@ const sidebars: SidebarsConfig = {
'user-guide/messaging/wecom-callback',
'user-guide/messaging/weixin',
'user-guide/messaging/bluebubbles',
'user-guide/messaging/qqbot',
'user-guide/messaging/open-webui',
'user-guide/messaging/webhooks',
],
@@ -154,7 +153,6 @@ const sidebars: SidebarsConfig = {
'guides/use-voice-mode-with-hermes',
'guides/build-a-hermes-plugin',
'guides/automate-with-cron',
'guides/automation-templates',
'guides/cron-troubleshooting',
'guides/work-with-skills',
'guides/delegation-patterns',