Compare commits

..

1 Commits

Author SHA1 Message Date
Teknium
f2b1b3f1a3 feat(config): make tool output truncation limits configurable
Port from anomalyco/opencode#23770: expose a new `tool_output` config
section so users can tune the hardcoded truncation caps that apply to
terminal output and read_file pagination.

Three knobs under `tool_output`:
- max_bytes (default 50_000) — terminal stdout/stderr cap
- max_lines (default 2000) — read_file pagination cap
- max_line_length (default 2000) — per-line cap in line-numbered view

All three keep their existing hardcoded values as defaults, so behaviour
is unchanged when the section is absent. Power users on big-context
models can raise them; small-context local models can lower them.

Implementation:
- New `tools/tool_output_limits.py` reads the section with defensive
  fallback (missing/invalid values → defaults, never raises).
- `tools/terminal_tool.py` MAX_OUTPUT_CHARS now comes from
  get_max_bytes().
- `tools/file_operations.py` normalize_read_pagination() and
  _add_line_numbers() now pull the limits at call time.
- `hermes_cli/config.py` DEFAULT_CONFIG gains the `tool_output` section
  so `hermes setup` writes defaults into fresh configs.
- Docs page `user-guide/configuration.md` gains a "Tool Output
  Truncation Limits" section with large-context and small-context
  example configs.

Tests (18 new in tests/tools/test_tool_output_limits.py):
- Default resolution with missing / malformed / non-dict config.
- Full and partial user overrides.
- Coercion of bad values (None, negative, wrong type, str int).
- Shortcut accessors delegate correctly.
- DEFAULT_CONFIG exposes the section with the right defaults.
- Integration: normalize_read_pagination clamps to the configured
  max_lines.
2026-04-23 17:05:10 -07:00
17 changed files with 462 additions and 585 deletions

View File

@@ -2821,7 +2821,6 @@ def _prompt_model_selection(
pricing: Optional[Dict[str, Dict[str, str]]] = None,
unavailable_models: Optional[List[str]] = None,
portal_url: str = "",
allow_custom = True
) -> Optional[str]:
"""Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.
@@ -2910,16 +2909,8 @@ def _prompt_model_selection(
from simple_term_menu import TerminalMenu
choices = [f" {_label(mid)}" for mid in ordered]
custom_idx = None
if allow_custom:
custom_idx = len(choices)
choices.append(" Enter custom model name")
skip_idx = None
if current_model:
skip_idx = len(choices)
choices.append(" Skip (keep current)")
choices.append(" Enter custom model name")
choices.append(" Skip (keep current)")
# Print the unavailable block BEFORE the menu via regular print().
# simple_term_menu pads title lines to terminal width (causes wrapping),
@@ -2956,29 +2947,21 @@ def _prompt_model_selection(
print()
if idx < len(ordered):
return ordered[idx]
if idx == custom_idx:
elif idx == len(ordered):
custom = input("Enter model name: ").strip()
return custom if custom else None
if idx == skip_idx:
return None
return None
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
pass
# Fallback: numbered list
print(menu_title)
n = len(ordered)
extra = []
if allow_custom:
extra.append("Enter custom model name")
if current_model:
extra.append("Skip (keep current)")
total = n + len(extra)
num_width = len(str(total))
num_width = len(str(len(ordered) + 2))
for i, mid in enumerate(ordered, 1):
print(f" {i:>{num_width}}. {_label(mid)}")
for j, label in enumerate(extra, n + 1):
print(f" {j:>{num_width}}. {label}")
n = len(ordered)
print(f" {n + 1:>{num_width}}. Enter custom model name")
print(f" {n + 2:>{num_width}}. Skip (keep current)")
if _unavailable:
_upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
@@ -2990,19 +2973,18 @@ def _prompt_model_selection(
while True:
try:
choice = input(f"Choice [1-{total}]: ").strip()
choice = input(f"Choice [1-{n + 2}] (default: skip): ").strip()
if not choice:
return None
val = int(choice)
if 1 <= val <= n:
return ordered[val - 1]
extra_idx = val - n - 1
if 0 <= extra_idx < len(extra):
if extra[extra_idx] == "Enter custom model name":
custom = input("Enter model name: ").strip()
return custom if custom else None
return None # skip
print(f"Please enter 1-{total}")
idx = int(choice)
if 1 <= idx <= n:
return ordered[idx - 1]
elif idx == n + 1:
custom = input("Enter model name: ").strip()
return custom if custom else None
elif idx == n + 2:
return None
print(f"Please enter 1-{n + 2}")
except ValueError:
print("Please enter a number")
except (KeyboardInterrupt, EOFError):
@@ -3278,6 +3260,7 @@ def _nous_device_code_login(
open_browser = False
print(f"Starting Hermes login via {pconfig.name}...")
print(f"Portal: {portal_base_url}")
if insecure:
print("TLS verification: disabled (--insecure)")
elif ca_bundle:
@@ -3297,18 +3280,19 @@ def _nous_device_code_login(
interval = int(device_data["interval"])
print()
print("To continue:")
print(f" 1. Open: {verification_url}")
print(f" 2. If prompted, enter code: {user_code}")
if open_browser:
opened = webbrowser.open(verification_url)
if opened:
print("If you don't see a browser window open, navigate to this URL:")
print(" (Opened browser for verification)")
else:
print("Navigate to this URL to continue:")
print(verification_url)
print(f"If you're prompted for a code, use {user_code}")
print()
print(" Could not open browser automatically — use the URL above.")
effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
print(f"Waiting for approval (checking every {effective_interval}s)...")
print(f"Waiting for approval (polling every {effective_interval}s)...")
token_data = _poll_for_token(
client=client,
@@ -3373,7 +3357,7 @@ def _nous_device_code_login(
raise
def login_nous(args, pconfig: ProviderConfig) -> None:
def _login_nous(args, pconfig: ProviderConfig) -> None:
"""Nous Portal device authorization flow."""
timeout_seconds = getattr(args, "timeout", None) or 15.0
insecure = bool(getattr(args, "insecure", False))
@@ -3435,10 +3419,7 @@ def login_nous(args, pconfig: ProviderConfig) -> None:
)
model_ids = _PROVIDER_MODELS.get("nous", [])
_portal = auth_state.get("portal_base_url", "")
print()
unavailable_models: list = []
if model_ids:
pricing = get_pricing_for_provider("nous")
@@ -3447,17 +3428,14 @@ def login_nous(args, pconfig: ProviderConfig) -> None:
model_ids, unavailable_models = partition_nous_models_by_tier(
model_ids, pricing, free_tier=True,
)
if not free_tier:
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
if len(model_ids) > 1:
selected_model = _prompt_model_selection(
model_ids, pricing=pricing,
unavailable_models=unavailable_models,
portal_url=_portal,
allow_custom=not free_tier
)
else:
selected_model = model_ids[0]
_portal = auth_state.get("portal_base_url", "")
if model_ids:
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
selected_model = _prompt_model_selection(
model_ids, pricing=pricing,
unavailable_models=unavailable_models,
portal_url=_portal,
)
elif unavailable_models:
_url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
print("No free models currently available.")

View File

@@ -486,7 +486,27 @@ DEFAULT_CONFIG = {
# exceed this are rejected with guidance to use offset+limit.
# 100K chars ≈ 2535K tokens across typical tokenisers.
"file_read_max_chars": 100_000,
# Tool-output truncation thresholds. When terminal output or a
# single read_file page exceeds these limits, Hermes truncates the
# payload sent to the model (keeping head + tail for terminal,
# enforcing pagination for read_file). Tuning these trades context
# footprint against how much raw output the model can see in one
# shot. Ported from anomalyco/opencode PR #23770.
#
# - max_bytes: terminal_tool output cap, in chars
# (default 50_000 ≈ 12-15K tokens).
# - max_lines: read_file pagination cap — the maximum `limit`
# a single read_file call can request before
# being clamped (default 2000).
# - max_line_length: per-line cap applied when read_file emits a
# line-numbered view (default 2000 chars).
"tool_output": {
"max_bytes": 50_000,
"max_lines": 2000,
"max_line_length": 2000,
},
"compression": {
"enabled": True,
"threshold": 0.50, # compress when context usage exceeds this ratio

View File

@@ -1085,6 +1085,9 @@ def cmd_chat(args):
print(
"It looks like Hermes isn't configured yet -- no API keys or providers found."
)
print()
print(" Run: hermes setup")
print()
from hermes_cli.setup import (
is_interactive_stdin,
@@ -1097,8 +1100,16 @@ def cmd_chat(args):
)
sys.exit(1)
cmd_setup(args)
return
try:
reply = input("Run setup now? [Y/n] ").strip().lower()
except (EOFError, KeyboardInterrupt):
reply = "n"
if reply in ("", "y", "yes"):
cmd_setup(args)
return
print()
print("You can run 'hermes setup' at any time to configure.")
sys.exit(1)
# Start update check in background (runs while other init happens)
try:
@@ -2124,7 +2135,7 @@ def _model_flow_nous(config, current_model="", args=None):
resolve_nous_runtime_credentials,
AuthError,
format_auth_error,
login_nous,
_login_nous,
PROVIDER_REGISTRY,
)
from hermes_cli.config import (
@@ -2137,6 +2148,8 @@ def _model_flow_nous(config, current_model="", args=None):
state = get_provider_auth_state("nous")
if not state or not state.get("access_token"):
print("Not logged into Nous Portal. Starting login...")
print()
try:
mock_args = argparse.Namespace(
portal_url=getattr(args, "portal_url", None),
@@ -2148,7 +2161,7 @@ def _model_flow_nous(config, current_model="", args=None):
ca_bundle=getattr(args, "ca_bundle", None),
insecure=bool(getattr(args, "insecure", False)),
)
login_nous(mock_args, PROVIDER_REGISTRY["nous"])
_login_nous(mock_args, PROVIDER_REGISTRY["nous"])
# Offer Tool Gateway enablement for paid subscribers
try:
_refreshed = load_config() or {}
@@ -2199,7 +2212,7 @@ def _model_flow_nous(config, current_model="", args=None):
ca_bundle=None,
insecure=False,
)
login_nous(mock_args, PROVIDER_REGISTRY["nous"])
_login_nous(mock_args, PROVIDER_REGISTRY["nous"])
except Exception as login_exc:
print(f"Re-login failed: {login_exc}")
return

View File

@@ -18,10 +18,9 @@ import shutil
import sys
import copy
from pathlib import Path
from typing import Literal, Optional, Dict, Any
from typing import Optional, Dict, Any
from hermes_cli.nous_subscription import get_nous_subscription_features
from hermes_cli.main import _model_flow_nous
from tools.tool_backend_helpers import managed_nous_tools_enabled
from utils import base_url_hostname
from hermes_constants import get_optional_skills_dir
@@ -656,7 +655,7 @@ def _prompt_container_resources(config: dict):
def setup_model_provider(config: dict, *, quick: bool | Literal["nous_portal"] = False):
def setup_model_provider(config: dict, *, quick: bool = False):
"""Configure the inference provider and default model.
Delegates to ``cmd_model()`` (the same flow used by ``hermes model``)
@@ -678,11 +677,7 @@ def setup_model_provider(config: dict, *, quick: bool | Literal["nous_portal"] =
# credential prompting, model selection, and config persistence.
from hermes_cli.main import select_provider_and_model
try:
if quick == "nous_portal":
config = load_config()
_model_flow_nous(config)
else:
select_provider_and_model()
select_provider_and_model()
except (SystemExit, KeyboardInterrupt):
print()
print_info("Provider setup skipped.")
@@ -3035,15 +3030,11 @@ def run_setup_wizard(args):
config = load_config()
setup_mode = prompt_choice("How would you like to set up Hermes?", [
"Nous Account setup — model & messaging (recommended)",
"Quick setup — provider, model & messaging",
"Quick setup — provider, model & messaging (recommended)",
"Full setup — configure everything",
], 0)
if setup_mode == 0:
_run_first_time_quick_setup(config, hermes_home, is_existing, nous_quick=True)
return
if setup_mode == 1:
_run_first_time_quick_setup(config, hermes_home, is_existing)
return
@@ -3104,7 +3095,7 @@ def _resolve_hermes_chat_argv() -> Optional[list[str]]:
return None
def _offer_launch_chat(auto_launch = False):
def _offer_launch_chat():
"""Prompt the user to jump straight into chat after setup."""
print()
if not prompt_yes_no("Launch hermes chat now?", True):
@@ -3118,7 +3109,7 @@ def _offer_launch_chat(auto_launch = False):
os.execvp(chat_argv[0], chat_argv)
def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool, nous_quick=False):
def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
"""Streamlined first-time setup: provider + model only.
Applies sensible defaults for TTS (Edge), terminal (local), agent
@@ -3126,7 +3117,7 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool, no
``hermes setup <section>``.
"""
# Step 1: Model & Provider (essential — skips rotation/vision/TTS)
setup_model_provider(config, quick="nous_portal" if nous_quick else True )
setup_model_provider(config, quick=True)
# Step 2: Apply defaults for everything else
_apply_default_agent_settings(config)
@@ -3159,9 +3150,7 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool, no
_print_setup_summary(config, hermes_home)
# if the user hasn't set up the gateway, assume they want to launch chat.
force_launch_chat = gateway_choice == 0
_offer_launch_chat(force_launch_chat)
_offer_launch_chat()
def _run_quick_setup(config: dict, hermes_home):

View File

@@ -571,7 +571,7 @@ def test_cmd_model_forwards_nous_login_tls_options(monkeypatch):
captured["ca_bundle"] = login_args.ca_bundle
captured["insecure"] = login_args.insecure
monkeypatch.setattr("hermes_cli.auth.login_nous", _fake_login)
monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login)
hermes_main.cmd_model(
SimpleNamespace(

View File

@@ -1,28 +1,22 @@
"""Regression tests for the TUI gateway's `complete.path` handler.
Reported during the TUI v2 blitz retest:
- typing `@folder:` (and `@folder` with no colon yet) surfaced files
alongside directories — the gateway-side completion lives in
`tui_gateway/server.py` and was never touched by the earlier fix to
`hermes_cli/commands.py`.
- typing `@appChrome` required the full `@ui-tui/src/components/app…`
path to find the file — users expect Cmd-P-style fuzzy basename
matching across the repo, not a strict directory prefix filter.
Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
with no colon yet) still surfaced files alongside directories in the
TUI composer, because the gateway-side completion lives in
`tui_gateway/server.py` and was never touched by the earlier fix to
`hermes_cli/commands.py`.
Covers:
- `@folder:` only yields directories
- `@file:` only yields regular files
- Bare `@folder` / `@file` (no colon) lists cwd directly
- Explicit prefix is preserved in the completion text
- `@<name>` with no slash fuzzy-matches basenames anywhere in the tree
"""
from __future__ import annotations
from pathlib import Path
import pytest
from tui_gateway import server
@@ -39,15 +33,6 @@ def _items(word: str):
return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
@pytest.fixture(autouse=True)
def _reset_fuzzy_cache(monkeypatch):
# Each test walks a fresh tmp dir; clear the cached listing so prior
# roots can't leak through the TTL window.
server._fuzzy_cache.clear()
yield
server._fuzzy_cache.clear()
def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
_fixture(tmp_path)
@@ -104,176 +89,3 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
# ── Fuzzy basename matching ──────────────────────────────────────────────
# Users shouldn't have to know the full path — typing `@appChrome` should
# find `ui-tui/src/components/appChrome.tsx`.
def _nested_fixture(tmp_path: Path):
(tmp_path / "readme.md").write_text("x")
(tmp_path / ".env").write_text("x")
(tmp_path / "ui-tui/src/components").mkdir(parents=True)
(tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x")
(tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x")
(tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x")
(tmp_path / "ui-tui/src/hooks").mkdir(parents=True)
(tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x")
(tmp_path / "tui_gateway").mkdir()
(tmp_path / "tui_gateway/server.py").write_text("x")
def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch):
"""`@appChrome` — with no slash — should surface the nested file."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
entries = _items("@appChrome")
texts = [t for t, _, _ in entries]
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
# Display is the basename, meta is the containing directory, so the
# picker can show `appChrome.tsx ui-tui/src/components` on one row.
row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx")
assert row[1] == "appChrome.tsx"
assert row[2] == "ui-tui/src/components"
def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch):
"""Better matches sort before weaker matches regardless of path depth."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
(tmp_path / "server.py").write_text("x") # exact basename match at root
texts = [t for t, _, _ in _items("@server")]
# Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both
# rank 1 on basename but exact basename wins on the sort key; shorter
# rel path breaks ties.
assert texts[0] == "@file:server.py", texts
assert "@file:tui_gateway/server.py" in texts
def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch):
"""Mid-basename camelCase pieces match without substring scanning."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@Chrome")]
# `Chrome` starts a camelCase word inside `appChrome.tsx`.
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch):
"""`@uCo` → `useCompletion.ts` via subsequence, last-resort tier."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@uCo")]
assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts
def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch):
"""Explicit `@file:` prefix still wins the completion tag."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@file:appChrome")]
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch):
"""Any `/` in the query = user is navigating; keep directory listing."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@ui-tui/src/components/app")]
# Directory-listing mode prefixes with `@file:` / `@folder:` per entry.
# It should only surface direct children of the named dir — not the
# nested `useCompletion.ts`.
assert any("appChrome.tsx" in t for t in texts), texts
assert not any("useCompletion.ts" in t for t in texts), texts
def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch):
"""`@folder:<name>` still lists directories — fuzzy scanner only walks
files (git-tracked + untracked), so defer to the dir-listing path."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@folder:ui")]
# Root has `ui-tui/` as a directory; the listing branch should surface it.
assert any(t.startswith("@folder:ui-tui") for t in texts), texts
def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch):
"""`.env` doesn't leak into `@env` but does show for `@.env`."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
assert not any(".env" in t for t, _, _ in _items("@env"))
assert any(t.endswith(".env") for t, _, _ in _items("@.env"))
def test_fuzzy_caps_results(tmp_path, monkeypatch):
"""The 30-item cap survives a big tree."""
monkeypatch.chdir(tmp_path)
for i in range(60):
(tmp_path / f"mod_{i:03d}.py").write_text("x")
items = _items("@mod")
assert len(items) == 30
def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch):
"""When the gateway runs from a subdirectory of a git repo, fuzzy
completion paths must resolve under that cwd — not under the repo root.
Without this, `@appChrome` from inside `apps/web/` would suggest
`@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would
look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every
`git ls-files` result back to a `relpath(root)` and drop anything
outside `root` so the completion contract stays "paths are cwd-relative".
"""
import subprocess
subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True)
subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True)
(tmp_path / "apps" / "web" / "src").mkdir(parents=True)
(tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x")
(tmp_path / "apps" / "api" / "src").mkdir(parents=True)
(tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x")
(tmp_path / "README.md").write_text("x")
subprocess.run(["git", "add", "."], cwd=tmp_path, check=True)
subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True)
# Run from `apps/web/` — completions should be relative to here, and
# files outside this subtree (apps/api, README.md at root) shouldn't
# appear at all.
monkeypatch.chdir(tmp_path / "apps" / "web")
texts = [t for t, _, _ in _items("@appChrome")]
assert "@file:src/appChrome.tsx" in texts, texts
assert not any("apps/web/" in t for t in texts), texts
server._fuzzy_cache.clear()
other_texts = [t for t, _, _ in _items("@server")]
assert not any("server.ts" in t for t in other_texts), other_texts
server._fuzzy_cache.clear()
readme_texts = [t for t, _, _ in _items("@README")]
assert not any("README.md" in t for t in readme_texts), readme_texts

View File

@@ -0,0 +1,152 @@
"""Tests for tools.tool_output_limits.
Covers:
1. Default values when no config is provided.
2. Config override picks up user-supplied max_bytes / max_lines /
max_line_length.
3. Malformed values (None, negative, wrong type) fall back to defaults
rather than raising.
4. Integration: the helpers return what the terminal_tool and
file_operations call paths will actually consume.
Port-tracking: anomalyco/opencode PR #23770
(feat(truncate): allow configuring tool output truncation limits).
"""
from __future__ import annotations
from unittest.mock import patch
import pytest
from tools import tool_output_limits as tol
class TestDefaults:
def test_defaults_match_previous_hardcoded_values(self):
assert tol.DEFAULT_MAX_BYTES == 50_000
assert tol.DEFAULT_MAX_LINES == 2000
assert tol.DEFAULT_MAX_LINE_LENGTH == 2000
def test_get_limits_returns_defaults_when_config_missing(self):
with patch("hermes_cli.config.load_config", return_value={}):
limits = tol.get_tool_output_limits()
assert limits == {
"max_bytes": tol.DEFAULT_MAX_BYTES,
"max_lines": tol.DEFAULT_MAX_LINES,
"max_line_length": tol.DEFAULT_MAX_LINE_LENGTH,
}
def test_get_limits_returns_defaults_when_config_not_a_dict(self):
# load_config should always return a dict but be defensive anyway.
with patch("hermes_cli.config.load_config", return_value="not a dict"):
limits = tol.get_tool_output_limits()
assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES
def test_get_limits_returns_defaults_when_load_config_raises(self):
def _boom():
raise RuntimeError("boom")
with patch("hermes_cli.config.load_config", side_effect=_boom):
limits = tol.get_tool_output_limits()
assert limits["max_lines"] == tol.DEFAULT_MAX_LINES
class TestOverrides:
def test_user_config_overrides_all_three(self):
cfg = {
"tool_output": {
"max_bytes": 100_000,
"max_lines": 5000,
"max_line_length": 4096,
}
}
with patch("hermes_cli.config.load_config", return_value=cfg):
limits = tol.get_tool_output_limits()
assert limits == {
"max_bytes": 100_000,
"max_lines": 5000,
"max_line_length": 4096,
}
def test_partial_override_preserves_other_defaults(self):
cfg = {"tool_output": {"max_bytes": 200_000}}
with patch("hermes_cli.config.load_config", return_value=cfg):
limits = tol.get_tool_output_limits()
assert limits["max_bytes"] == 200_000
assert limits["max_lines"] == tol.DEFAULT_MAX_LINES
assert limits["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH
def test_section_not_a_dict_falls_back(self):
cfg = {"tool_output": "nonsense"}
with patch("hermes_cli.config.load_config", return_value=cfg):
limits = tol.get_tool_output_limits()
assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES
class TestCoercion:
@pytest.mark.parametrize("bad", [None, "not a number", -1, 0, [], {}])
def test_invalid_values_fall_back_to_defaults(self, bad):
cfg = {"tool_output": {"max_bytes": bad, "max_lines": bad, "max_line_length": bad}}
with patch("hermes_cli.config.load_config", return_value=cfg):
limits = tol.get_tool_output_limits()
assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES
assert limits["max_lines"] == tol.DEFAULT_MAX_LINES
assert limits["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH
def test_string_integer_is_coerced(self):
cfg = {"tool_output": {"max_bytes": "75000"}}
with patch("hermes_cli.config.load_config", return_value=cfg):
limits = tol.get_tool_output_limits()
assert limits["max_bytes"] == 75_000
class TestShortcuts:
def test_individual_accessors_delegate_to_get_tool_output_limits(self):
cfg = {
"tool_output": {
"max_bytes": 111,
"max_lines": 222,
"max_line_length": 333,
}
}
with patch("hermes_cli.config.load_config", return_value=cfg):
assert tol.get_max_bytes() == 111
assert tol.get_max_lines() == 222
assert tol.get_max_line_length() == 333
class TestDefaultConfigHasSection:
"""The DEFAULT_CONFIG in hermes_cli.config must expose tool_output so
that ``hermes setup`` and default installs stay in sync with the
helpers here."""
def test_default_config_contains_tool_output_section(self):
from hermes_cli.config import DEFAULT_CONFIG
assert "tool_output" in DEFAULT_CONFIG
section = DEFAULT_CONFIG["tool_output"]
assert isinstance(section, dict)
assert section["max_bytes"] == tol.DEFAULT_MAX_BYTES
assert section["max_lines"] == tol.DEFAULT_MAX_LINES
assert section["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH
class TestIntegrationReadPagination:
"""normalize_read_pagination uses get_max_lines() — verify the plumbing."""
def test_pagination_limit_clamped_by_config_value(self):
from tools.file_operations import normalize_read_pagination
cfg = {"tool_output": {"max_lines": 50}}
with patch("hermes_cli.config.load_config", return_value=cfg):
offset, limit = normalize_read_pagination(offset=1, limit=1000)
# limit should have been clamped to 50 (the configured max_lines)
assert limit == 50
assert offset == 1
def test_pagination_default_when_config_missing(self):
from tools.file_operations import normalize_read_pagination
with patch("hermes_cli.config.load_config", return_value={}):
offset, limit = normalize_read_pagination(offset=10, limit=100000)
# Clamped to default MAX_LINES (2000).
assert limit == tol.DEFAULT_MAX_LINES
assert offset == 10

View File

@@ -292,10 +292,15 @@ def normalize_read_pagination(offset: Any = DEFAULT_READ_OFFSET,
Tool schemas declare minimum/maximum values, but not every caller or
provider enforces schemas before dispatch. Clamp here so invalid values
cannot leak into sed ranges like ``0,-1p``.
The upper bound on ``limit`` comes from ``tool_output.max_lines`` in
config.yaml (defaults to the module-level ``MAX_LINES`` constant).
"""
from tools.tool_output_limits import get_max_lines
max_lines = get_max_lines()
normalized_offset = max(1, _coerce_int(offset, DEFAULT_READ_OFFSET))
normalized_limit = _coerce_int(limit, DEFAULT_READ_LIMIT)
normalized_limit = max(1, min(normalized_limit, MAX_LINES))
normalized_limit = max(1, min(normalized_limit, max_lines))
return normalized_offset, normalized_limit
@@ -414,12 +419,14 @@ class ShellFileOperations(FileOperations):
def _add_line_numbers(self, content: str, start_line: int = 1) -> str:
"""Add line numbers to content in LINE_NUM|CONTENT format."""
from tools.tool_output_limits import get_max_line_length
max_line_length = get_max_line_length()
lines = content.split('\n')
numbered = []
for i, line in enumerate(lines, start=start_line):
# Truncate long lines
if len(line) > MAX_LINE_LENGTH:
line = line[:MAX_LINE_LENGTH] + "... [truncated]"
if len(line) > max_line_length:
line = line[:max_line_length] + "... [truncated]"
numbered.append(f"{i:6d}|{line}")
return '\n'.join(numbered)

View File

@@ -1805,7 +1805,8 @@ def terminal_tool(
pass
# Truncate output if too long, keeping both head and tail
MAX_OUTPUT_CHARS = 50000
from tools.tool_output_limits import get_max_bytes
MAX_OUTPUT_CHARS = get_max_bytes()
if len(output) > MAX_OUTPUT_CHARS:
head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early)
tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output)

View File

@@ -0,0 +1,92 @@
"""Configurable tool-output truncation limits.
Ported from anomalyco/opencode PR #23770 (``feat(truncate): allow
configuring tool output truncation limits``).
OpenCode hardcoded ``MAX_LINES = 2000`` and ``MAX_BYTES = 50 * 1024``
as tool-output truncation thresholds. Hermes-agent had the same
hardcoded constants in two places:
* ``tools/terminal_tool.py`` — ``MAX_OUTPUT_CHARS = 50000`` (terminal
stdout/stderr cap)
* ``tools/file_operations.py`` — ``MAX_LINES = 2000`` /
``MAX_LINE_LENGTH = 2000`` (read_file pagination cap + per-line cap)
This module centralises those values behind a single config section
(``tool_output`` in ``config.yaml``) so power users can tune them
without patching the source. The existing hardcoded numbers remain as
defaults, so behaviour is unchanged when the config key is absent.
Example ``config.yaml``::
tool_output:
max_bytes: 100000 # terminal output cap (chars)
max_lines: 5000 # read_file pagination + truncation cap
max_line_length: 2000 # per-line length cap before '... [truncated]'
The limits reader is defensive: any error (missing config file, invalid
value type, etc.) falls back to the built-in defaults so tools never
fail because of a malformed config.
"""
from __future__ import annotations
from typing import Any, Dict
# Hardcoded defaults — these match the pre-existing values, so adding
# this module is behaviour-preserving for users who don't set
# ``tool_output`` in config.yaml.
DEFAULT_MAX_BYTES = 50_000 # terminal_tool.MAX_OUTPUT_CHARS
DEFAULT_MAX_LINES = 2000 # file_operations.MAX_LINES
DEFAULT_MAX_LINE_LENGTH = 2000 # file_operations.MAX_LINE_LENGTH
def _coerce_positive_int(value: Any, default: int) -> int:
"""Return ``value`` as a positive int, or ``default`` on any issue."""
try:
iv = int(value)
except (TypeError, ValueError):
return default
if iv <= 0:
return default
return iv
def get_tool_output_limits() -> Dict[str, int]:
"""Return resolved tool-output limits, reading ``tool_output`` from config.
Keys: ``max_bytes``, ``max_lines``, ``max_line_length``. Missing or
invalid entries fall through to the ``DEFAULT_*`` constants. This
function NEVER raises.
"""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
section = cfg.get("tool_output") if isinstance(cfg, dict) else None
if not isinstance(section, dict):
section = {}
except Exception:
section = {}
return {
"max_bytes": _coerce_positive_int(section.get("max_bytes"), DEFAULT_MAX_BYTES),
"max_lines": _coerce_positive_int(section.get("max_lines"), DEFAULT_MAX_LINES),
"max_line_length": _coerce_positive_int(
section.get("max_line_length"), DEFAULT_MAX_LINE_LENGTH
),
}
def get_max_bytes() -> int:
"""Shortcut for terminal-tool callers that only need the byte cap."""
return get_tool_output_limits()["max_bytes"]
def get_max_lines() -> int:
"""Shortcut for file-ops callers that only need the line cap."""
return get_tool_output_limits()["max_lines"]
def get_max_line_length() -> int:
"""Shortcut for file-ops callers that only need the per-line cap."""
return get_tool_output_limits()["max_line_length"]

View File

@@ -3256,162 +3256,6 @@ def _(rid, params: dict) -> dict:
# ── Methods: complete ─────────────────────────────────────────────────
_FUZZY_CACHE_TTL_S = 5.0
_FUZZY_CACHE_MAX_FILES = 20000
_FUZZY_FALLBACK_EXCLUDES = frozenset(
{
".git",
".hg",
".svn",
".next",
".cache",
".venv",
"venv",
"node_modules",
"__pycache__",
"dist",
"build",
"target",
".mypy_cache",
".pytest_cache",
".ruff_cache",
}
)
_fuzzy_cache_lock = threading.Lock()
_fuzzy_cache: dict[str, tuple[float, list[str]]] = {}
def _list_repo_files(root: str) -> list[str]:
"""Return file paths relative to ``root``.
Uses ``git ls-files`` from the repo top (resolved via
``rev-parse --show-toplevel``) so the listing covers tracked + untracked
files anywhere in the repo, then converts each path back to be relative
to ``root``. Files outside ``root`` (parent directories of cwd, sibling
subtrees) are excluded so the picker stays scoped to what's reachable
from the gateway's cwd. Falls back to a bounded ``os.walk(root)`` when
``root`` isn't inside a git repo. Result cached per-root for
``_FUZZY_CACHE_TTL_S`` so rapid keystrokes don't respawn git processes.
"""
now = time.monotonic()
with _fuzzy_cache_lock:
cached = _fuzzy_cache.get(root)
if cached and now - cached[0] < _FUZZY_CACHE_TTL_S:
return cached[1]
files: list[str] = []
try:
top_result = subprocess.run(
["git", "-C", root, "rev-parse", "--show-toplevel"],
capture_output=True,
timeout=2.0,
check=False,
)
if top_result.returncode == 0:
top = top_result.stdout.decode("utf-8", "replace").strip()
list_result = subprocess.run(
["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"],
capture_output=True,
timeout=2.0,
check=False,
)
if list_result.returncode == 0:
for p in list_result.stdout.decode("utf-8", "replace").split("\0"):
if not p:
continue
rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/")
# Skip parents/siblings of cwd — keep the picker scoped
# to root-and-below, matching Cmd-P workspace semantics.
if rel.startswith("../"):
continue
files.append(rel)
if len(files) >= _FUZZY_CACHE_MAX_FILES:
break
except (OSError, subprocess.TimeoutExpired):
pass
if not files:
# Fallback walk: skip vendor/build dirs + dot-dirs so the walk stays
# tractable. Dotfiles themselves survive — the ranker decides based
# on whether the query starts with `.`.
try:
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
dirnames[:] = [
d
for d in dirnames
if d not in _FUZZY_FALLBACK_EXCLUDES and not d.startswith(".")
]
rel_dir = os.path.relpath(dirpath, root)
for f in filenames:
rel = f if rel_dir == "." else f"{rel_dir}/{f}"
files.append(rel.replace(os.sep, "/"))
if len(files) >= _FUZZY_CACHE_MAX_FILES:
break
if len(files) >= _FUZZY_CACHE_MAX_FILES:
break
except OSError:
pass
with _fuzzy_cache_lock:
_fuzzy_cache[root] = (now, files)
return files
def _fuzzy_basename_rank(name: str, query: str) -> tuple[int, int] | None:
"""Rank ``name`` against ``query``; lower is better. Returns None to reject.
Tiers (kind):
0 — exact basename
1 — basename prefix (e.g. `app` → `appChrome.tsx`)
2 — word-boundary / camelCase hit (e.g. `chrome` → `appChrome.tsx`)
3 — substring anywhere in basename
4 — subsequence match (every query char appears in order)
Secondary key is `len(name)` so shorter names win ties.
"""
if not query:
return (3, len(name))
nl = name.lower()
ql = query.lower()
if nl == ql:
return (0, len(name))
if nl.startswith(ql):
return (1, len(name))
# Word-boundary split: `foo-bar_baz.qux` → ["foo","bar","baz","qux"].
# camelCase split: `appChrome` → ["app","Chrome"]. Cheap approximation;
# falls through to substring/subsequence if it misses.
parts: list[str] = []
buf = ""
for ch in name:
if ch in "-_." or (ch.isupper() and buf and not buf[-1].isupper()):
if buf:
parts.append(buf)
buf = ch if ch not in "-_." else ""
else:
buf += ch
if buf:
parts.append(buf)
for p in parts:
if p.lower().startswith(ql):
return (2, len(name))
if ql in nl:
return (3, len(name))
i = 0
for ch in nl:
if ch == ql[i]:
i += 1
if i == len(ql):
return (4, len(name))
return None
@method("complete.path")
def _(rid, params: dict) -> dict:
@@ -3447,42 +3291,6 @@ def _(rid, params: dict) -> dict:
prefix_tag = ""
path_part = query if is_context else query
# Fuzzy basename search across the repo when the user types a bare
# name with no path separator — `@appChrome` surfaces every file
# whose basename matches, regardless of directory depth. Matches what
# editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with
# `/`, `./`, `~/`, `/abs`) fall through to the directory-listing
# path so explicit navigation intent is preserved.
if (
is_context
and path_part
and "/" not in path_part
and prefix_tag != "folder"
):
root = os.getcwd()
ranked: list[tuple[tuple[int, int], str, str]] = []
for rel in _list_repo_files(root):
basename = os.path.basename(rel)
if basename.startswith(".") and not path_part.startswith("."):
continue
rank = _fuzzy_basename_rank(basename, path_part)
if rank is None:
continue
ranked.append((rank, rel, basename))
ranked.sort(key=lambda r: (r[0], len(r[1]), r[1]))
tag = prefix_tag or "file"
for _, rel, basename in ranked[:30]:
items.append(
{
"text": f"@{tag}:{rel}",
"display": basename,
"meta": os.path.dirname(rel),
}
)
return _ok(rid, {"items": items})
expanded = _normalize_completion_path(path_part) if path_part else "."
if expanded == "." or not expanded:
search_dir, match = ".", ""

View File

@@ -152,79 +152,91 @@ describe('createGatewayEventHandler', () => {
expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
})
it('anchors inline_diff as its own segment where the edit happened', () => {
it('attaches inline_diff to the assistant completion body', () => {
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))
const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
const block = `\`\`\`diff\n${cleaned}\n\`\`\``
// Narration → tool → tool-complete → more narration → message-complete.
// The diff MUST land between the two narration segments, not tacked
// onto the final one.
onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any)
onEvent({ payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, type: 'tool.start' } as any)
onEvent({ payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
onEvent({
payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
type: 'tool.start'
} as any)
onEvent({
payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' },
type: 'tool.complete'
} as any)
// Diff is already committed to segmentMessages as its own segment.
// Diff is buffered for message.complete and sanitized (ANSI stripped).
expect(appended).toHaveLength(0)
expect(turnController.segmentMessages).toEqual([
{ role: 'assistant', text: 'Editing the file' },
{ kind: 'diff', role: 'assistant', text: block }
])
expect(turnController.pendingInlineDiffs).toEqual([cleaned])
onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any)
onEvent({
payload: { text: 'patch applied' },
type: 'message.complete'
} as any)
// Three transcript messages: pre-tool narration → diff (kind='diff',
// so MessageLine gives it blank-line breathing room) → post-tool
// narration. The final message does NOT contain a diff.
expect(appended).toHaveLength(3)
expect(appended[0]?.text).toBe('Editing the file')
expect(appended[1]).toMatchObject({ kind: 'diff', text: block })
expect(appended[2]?.text).toBe('patch applied')
expect(appended[2]?.text).not.toContain('```diff')
// Diff is rendered in the same assistant message body as the completion.
expect(appended).toHaveLength(1)
expect(appended[0]).toMatchObject({ role: 'assistant' })
expect(appended[0]?.text).toContain('patch applied')
expect(appended[0]?.text).toContain('```diff')
expect(appended[0]?.text).toContain(cleaned)
})
it('drops the diff segment when the final assistant text narrates the same diff', () => {
it('does not append inline_diff twice when assistant text already contains it', () => {
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))
const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\``
onEvent({ payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)
onEvent({
payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
type: 'tool.complete'
} as any)
onEvent({
payload: { text: assistantText },
type: 'message.complete'
} as any)
// Only the final message — diff-only segment dropped so we don't
// render two stacked copies of the same patch.
expect(appended).toHaveLength(1)
expect(appended[0]?.text).toBe(assistantText)
expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
})
it('strips the CLI "┊ review diff" header from inline diff segments', () => {
it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))
const raw = ' \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
onEvent({ payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)
onEvent({
payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
type: 'tool.complete'
} as any)
onEvent({
payload: { text: 'done' },
type: 'message.complete'
} as any)
// diff segment first (kind='diff'), final narration second
expect(appended).toHaveLength(2)
expect(appended[0]?.kind).toBe('diff')
expect(appended).toHaveLength(1)
expect(appended[0]?.text).not.toContain('┊ review diff')
expect(appended[0]?.text).toContain('--- a/foo.ts')
expect(appended[1]?.text).toBe('done')
})
it('drops the diff segment when assistant writes its own ```diff fence', () => {
it('suppresses inline_diff when assistant already wrote a diff fence', () => {
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))
const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'
onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)
onEvent({
payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
type: 'tool.complete'
} as any)
onEvent({
payload: { text: assistantText },
type: 'message.complete'
} as any)
expect(appended).toHaveLength(1)
expect(appended[0]?.text).toBe(assistantText)
@@ -240,18 +252,15 @@ describe('createGatewayEventHandler', () => {
payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
type: 'tool.complete'
} as any)
onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)
onEvent({
payload: { text: 'done' },
type: 'message.complete'
} as any)
// Two segments: the diff block (kind='diff', no tool row) and the final
// narration (tool row belongs here since pendingSegmentTools carries
// across the flushStreamingSegment call).
expect(appended).toHaveLength(2)
expect(appended[0]?.kind).toBe('diff')
expect(appended).toHaveLength(1)
expect(appended[0]?.tools?.[0]).toContain('Review Diff')
expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
expect(appended[0]?.text).toContain('```diff')
expect(appended[0]?.tools ?? []).toEqual([])
expect(appended[1]?.text).toBe('done')
expect(appended[1]?.tools?.[0]).toContain('Review Diff')
expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts')
})
it('shows setup panel for missing provider startup error', () => {

View File

@@ -385,12 +385,10 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
return
}
// Anchor the diff to where the edit happened in the turn — between
// the narration that preceded the tool call and whatever the agent
// streams afterwards. The previous end-merge put the diff at the
// bottom of the final message even when the edit fired mid-turn,
// which read as "the agent wrote this after saying that".
turnController.pushInlineDiffSegment(inlineDiffText)
// Keep inline diffs attached to the assistant completion body so
// they render in the same message flow, not as a standalone system
// artifact that can look out-of-place around tool rows.
turnController.queueInlineDiff(inlineDiffText)
return
}

View File

@@ -19,20 +19,6 @@ const INTERRUPT_COOLDOWN_MS = 1500
const ACTIVITY_LIMIT = 8
const TRAIL_LIMIT = 8
// Extracts the raw patch from a diff-only segment produced by
// pushInlineDiffSegment. Used at message.complete to dedupe against final
// assistant text that narrates the same patch. Returns null for anything
// else so real assistant narration never gets touched.
const diffSegmentBody = (msg: Msg): null | string => {
if (msg.kind !== 'diff') {
return null
}
const m = msg.text.match(/^```diff\n([\s\S]*?)\n```$/)
return m ? m[1]! : null
}
export interface InterruptDeps {
appendMessage: (msg: Msg) => void
gw: { request: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T> }
@@ -54,6 +40,7 @@ class TurnController {
bufRef = ''
interrupted = false
lastStatusNote = ''
pendingInlineDiffs: string[] = []
persistedToolLabels = new Set<string>()
persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
protocolWarned = false
@@ -92,6 +79,7 @@ class TurnController {
this.activeTools = []
this.streamTimer = clear(this.streamTimer)
this.bufRef = ''
this.pendingInlineDiffs = []
this.pendingSegmentTools = []
this.segmentMessages = []
@@ -198,35 +186,18 @@ class TurnController {
}, REASONING_PULSE_MS)
}
pushInlineDiffSegment(diffText: string) {
queueInlineDiff(diffText: string) {
// Strip CLI chrome the gateway emits before the unified diff (e.g. a
// leading "┊ review diff" header written by `_emit_inline_diff` for the
// terminal printer). That header only makes sense as stdout dressing,
// not inside a markdown ```diff block.
const stripped = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
if (!stripped) {
if (!text || this.pendingInlineDiffs.includes(text)) {
return
}
// Flush any in-progress streaming text as its own segment first, so the
// diff lands BETWEEN the assistant narration that preceded the edit and
// whatever the agent streams afterwards — not glued onto the final
// message. This is the whole point of segment-anchored diffs: the diff
// renders where the edit actually happened.
this.flushStreamingSegment()
const block = `\`\`\`diff\n${stripped}\n\`\`\``
// Skip consecutive duplicates (same tool firing tool.complete twice, or
// two edits producing the same patch). Keeping this cheap — deeper
// dedupe against the final assistant text happens at message.complete.
if (this.segmentMessages.at(-1)?.text === block) {
return
}
this.segmentMessages = [...this.segmentMessages, { kind: 'diff', role: 'assistant', text: block }]
patchTurnState({ streamSegments: this.segmentMessages })
this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
}
pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
@@ -263,6 +234,7 @@ class TurnController {
this.idle()
this.clearReasoning()
this.clearStatusTimer()
this.pendingInlineDiffs = []
this.pendingSegmentTools = []
this.segmentMessages = []
this.turnTools = []
@@ -273,31 +245,31 @@ class TurnController {
const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
const split = splitReasoning(rawText)
const finalText = split.text
// Skip appending if the assistant already narrated the diff inside a
// markdown fence of its own — otherwise we render two stacked diff
// blocks for the same edit.
const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
const remainingInlineDiffs = assistantAlreadyHasDiff
? []
: this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
const inlineDiffBlock = remainingInlineDiffs.length
? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
: ''
const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
const savedToolTokens = this.toolTokenAcc
const tools = this.pendingSegmentTools
const finalMessages = [...this.segmentMessages]
// Drop diff-only segments the agent is about to narrate in the final
// reply. Without this, a closing "here's the diff …" message would
// render two stacked copies of the same patch. Only touches segments
// with `kind: 'diff'` emitted by pushInlineDiffSegment — real
// assistant narration stays put.
const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText)
const segments = this.segmentMessages.filter(msg => {
const body = diffSegmentBody(msg)
return body === null || (!finalHasOwnDiffFence && !finalText.includes(body))
})
const finalMessages = [...segments]
if (finalText) {
if (mergedText) {
finalMessages.push({
role: 'assistant',
text: finalText,
text: mergedText,
thinking: savedReasoning || undefined,
thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
toolTokens: savedToolTokens || undefined,
@@ -328,7 +300,7 @@ class TurnController {
this.bufRef = ''
patchTurnState({ activity: [], outcome: '' })
return { finalMessages, finalText, wasInterrupted }
return { finalMessages, finalText: mergedText, wasInterrupted }
}
recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
@@ -434,6 +406,7 @@ class TurnController {
this.bufRef = ''
this.interrupted = false
this.lastStatusNote = ''
this.pendingInlineDiffs = []
this.pendingSegmentTools = []
this.protocolWarned = false
this.segmentMessages = []
@@ -479,6 +452,7 @@ class TurnController {
this.endReasoningPhase()
this.clearReasoning()
this.activeTools = []
this.pendingInlineDiffs = []
this.turnTools = []
this.toolTokenAcc = 0
this.persistedToolLabels.clear()

View File

@@ -81,16 +81,11 @@ export const MessageLine = memo(function MessageLine({
return <Text {...(body ? { color: body } : {})}>{msg.text}</Text>
})()
// Diff segments (emitted by pushInlineDiffSegment between narration
// segments) need a blank line on both sides so the patch doesn't butt up
// against the prose around it.
const isDiffSegment = msg.kind === 'diff'
return (
<Box
flexDirection="column"
marginBottom={msg.role === 'user' || isDiffSegment ? 1 : 0}
marginTop={msg.role === 'user' || msg.kind === 'slash' || isDiffSegment ? 1 : 0}
marginBottom={msg.role === 'user' ? 1 : 0}
marginTop={msg.role === 'user' || msg.kind === 'slash' ? 1 : 0}
>
{showDetails && (
<Box flexDirection="column" marginBottom={1}>

View File

@@ -102,7 +102,7 @@ export interface ClarifyReq {
export interface Msg {
info?: SessionInfo
kind?: 'diff' | 'intro' | 'panel' | 'slash' | 'trail'
kind?: 'intro' | 'panel' | 'slash' | 'trail'
panelData?: PanelData
role: Role
text: string

View File

@@ -431,6 +431,35 @@ file_read_max_chars: 30000
The agent also deduplicates file reads automatically — if the same file region is read twice and the file hasn't changed, a lightweight stub is returned instead of re-sending the content. This resets on context compression so the agent can re-read files after their content is summarized away.
## Tool Output Truncation Limits
Three related caps control how much raw output a tool can return before Hermes truncates it:
```yaml
tool_output:
max_bytes: 50000 # terminal output cap (chars)
max_lines: 2000 # read_file pagination cap
max_line_length: 2000 # per-line cap in read_file's line-numbered view
```
- **`max_bytes`** — When a `terminal` command produces more than this many characters of combined stdout/stderr, Hermes keeps the first 40% and last 60% and inserts a `[OUTPUT TRUNCATED]` notice between them. Default `50000` (≈12-15K tokens across typical tokenisers).
- **`max_lines`** — Upper bound on the `limit` parameter of a single `read_file` call. Requests above this are clamped so a single read can't flood the context window. Default `2000`.
- **`max_line_length`** — Per-line cap applied when `read_file` emits the line-numbered view. Lines longer than this are truncated to this many chars followed by `... [truncated]`. Default `2000`.
Raise the limits on models with large context windows that can afford more raw output per call. Lower them for small-context models to keep tool results compact:
```yaml
# Large context model (200K+)
tool_output:
max_bytes: 150000
max_lines: 5000
# Small local model (16K context)
tool_output:
max_bytes: 20000
max_lines: 500
```
## Git Worktree Isolation
Enable isolated git worktrees for running multiple agents in parallel on the same repo: