fix(ci): upload artifacts for lint-reports

fix(ci): diff typecheck fixes against PR branch-off point
fix: TerminalMenu returntype is int when multiselect is false
2026-05-06 17:13:38 -04:00 · 2026-05-06 17:05:00 -04:00 · 2026-05-06 17:05:00 -04:00 · 2026-05-06 17:05:00 -04:00 · 2026-05-06 14:01:56 -07:00 · 2026-05-06 13:54:46 -07:00
470 changed files with 46057 additions and 3965 deletions
@@ -25,3 +25,7 @@ ui-tui/packages/hermes-ink/dist/

 # Runtime data (bind-mounted at /opt/data; must not leak into build context)
 data/
+
+# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
+hermes-config/
+runtime/
@@ -244,6 +244,15 @@ BROWSERBASE_PROXIES=true
 # Uses custom Chromium build to avoid bot detection altogether
 BROWSERBASE_ADVANCED_STEALTH=false

+# Browser engine for local mode (default: auto = Chrome)
+# "auto"       — use Chrome (don't pass --engine flag)
+# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
+# "chrome"     — explicitly request Chrome
+# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return
+# empty results are automatically retried with Chrome.
+# Also configurable via browser.engine in config.yaml.
+# AGENT_BROWSER_ENGINE=auto
+
 # Browser session timeout in seconds (default: 300)
 # Sessions are cleaned up after this duration of inactivity
 BROWSER_SESSION_TIMEOUT=300
@@ -0,0 +1,44 @@
+# Dependabot configuration for hermes-agent.
+#
+# Deliberately scoped to github-actions only.
+#
+# We do NOT enable Dependabot for pip / npm / any source-dependency ecosystem
+# because we pin source dependencies exactly (uv.lock, package-lock.json) as
+# part of our supply-chain posture. Automatic version-bump PRs against those
+# pins would undermine the strategy — pins are moved deliberately, after
+# review, not on a schedule.
+#
+# github-actions is the exception: action pins (we use full commit SHAs per
+# supply-chain policy) must be updated when upstream actions publish
+# patches — usually themselves security fixes. Dependabot opens a PR with
+# the new SHA and release notes; we review and merge like any other PR.
+#
+# Security-update PRs for source dependencies (opened ONLY when a CVE is
+# published affecting a currently-pinned version) are enabled separately
+# via the repo's Dependabot security updates setting
+# (Settings → Code security → Dependabot → Dependabot security updates).
+# Those are CVE-only, not schedule-driven, and do not conflict with our
+# pinning strategy — they fire when a pinned version becomes known-bad,
+# which is exactly when we want to move the pin.
+
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "monday"
+    open-pull-requests-limit: 5
+    labels:
+      - "dependencies"
+      - "github-actions"
+    commit-message:
+      prefix: "chore(actions)"
+      include: "scope"
+    groups:
+      # Batch routine action bumps into one PR per week to reduce noise.
+      # Security updates still open individually and bypass grouping.
+      actions-minor-patch:
+        update-types:
+          - "minor"
+          - "patch"
@@ -0,0 +1,158 @@
+name: Lint (ruff + ty)
+
+# Surface ruff and ty diagnostics as a diff vs the target branch.
+# This check is advisory only ATM it always exits zero and never blocks merge.
+# It posts a Markdown summary to the workflow run and, for pull requests,
+# comments the same summary on the PR.
+
+on:
+  push:
+    branches: [main]
+    paths-ignore:
+      - "**/*.md"
+      - "docs/**"
+      - "website/**"
+  pull_request:
+    branches: [main]
+    paths-ignore:
+      - "**/*.md"
+      - "docs/**"
+      - "website/**"
+
+permissions:
+  contents: read
+  pull-requests: write # needed to post/update PR comments
+
+concurrency:
+  group: lint-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint-diff:
+    name: ruff + ty diff
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+        with:
+          fetch-depth: 0 # need full history for merge-base + worktree
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+
+      - name: Install ruff + ty
+        run: |
+          uv tool install ruff
+          uv tool install ty
+
+      - name: Determine base ref
+        id: base
+        env:
+          PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
+        run: |
+          # For PRs, diff against the PR's pinned parent commit
+          # (github.event.pull_request.base.sha — snapshot at PR open time,
+          # so later pushes to main don't leak into the diff).
+          # For pushes to main, diff against the previous commit on main.
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            BASE_SHA="${PR_BASE_SHA}"
+            BASE_REF="PR base (${BASE_SHA:0:7})"
+          else
+            BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || git rev-parse HEAD)
+            BASE_REF="HEAD~1"
+          fi
+          echo "sha=${BASE_SHA}" >> "$GITHUB_OUTPUT"
+          echo "ref=${BASE_REF}" >> "$GITHUB_OUTPUT"
+          echo "Base SHA: ${BASE_SHA}"
+          echo "Base ref: ${BASE_REF}"
+
+      - name: Run ruff + ty on HEAD
+        run: |
+          mkdir -p .lint-reports/head
+          ruff check --output-format json --exit-zero \
+            > .lint-reports/head/ruff.json || true
+          ty check --output-format gitlab --exit-zero \
+            > .lint-reports/head/ty.json || true
+          echo "HEAD ruff: $(wc -c < .lint-reports/head/ruff.json) bytes"
+          echo "HEAD ty:   $(wc -c < .lint-reports/head/ty.json) bytes"
+
+      - name: Run ruff + ty on base (via git worktree)
+        run: |
+          mkdir -p .lint-reports/base
+          # Use a worktree so we don't clobber the main checkout. If the basex
+          # SHA is identical to HEAD (e.g. first commit), skip and leave the
+          # base reports empty — the diff script handles missing files.
+          HEAD_SHA=$(git rev-parse HEAD)
+          BASE_SHA="${{ steps.base.outputs.sha }}"
+          if [ "$BASE_SHA" = "$HEAD_SHA" ]; then
+            echo "Base SHA == HEAD SHA, skipping base scan."
+            echo '[]' > .lint-reports/base/ruff.json
+            echo '[]' > .lint-reports/base/ty.json
+          else
+            git worktree add --detach /tmp/lint-base "$BASE_SHA"
+            (
+              cd /tmp/lint-base
+              ruff check --output-format json --exit-zero \
+                > "$GITHUB_WORKSPACE/.lint-reports/base/ruff.json" || true
+              ty check --output-format gitlab --exit-zero \
+                > "$GITHUB_WORKSPACE/.lint-reports/base/ty.json" || true
+            )
+            git worktree remove --force /tmp/lint-base
+          fi
+          echo "base ruff: $(wc -c < .lint-reports/base/ruff.json) bytes"
+          echo "base ty:   $(wc -c < .lint-reports/base/ty.json) bytes"
+
+      - name: Generate diff summary
+        run: |
+          python scripts/lint_diff.py \
+            --base-ruff .lint-reports/base/ruff.json \
+            --head-ruff .lint-reports/head/ruff.json \
+            --base-ty   .lint-reports/base/ty.json \
+            --head-ty   .lint-reports/head/ty.json \
+            --base-ref  "${{ steps.base.outputs.ref }}" \
+            --head-ref  "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
+            --output    .lint-reports/summary.md
+          cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload reports as artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        with:
+          name: lint-reports
+          path: .lint-reports/
+          retention-days: 14
+          # .lint-reports/ is a dotfile-prefixed directory, and upload-artifact@v4
+          # skips hidden files by default (breaking change from v3). Opt back in.
+          include-hidden-files: true
+
+      - name: Post / update PR comment
+        if: github.event_name == 'pull_request'
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
+        with:
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
+            const marker = '<!-- lint-diff-summary -->';
+            const fullBody = marker + '\n' + body;
+
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo:  context.repo.repo,
+              issue_number: context.issue.number,
+            });
+            const existing = comments.find(c => c.body && c.body.includes(marker));
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo:  context.repo.repo,
+                comment_id: existing.id,
+                body: fullBody,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo:  context.repo.repo,
+                issue_number: context.issue.number,
+                body: fullBody,
+              });
+            }
@@ -0,0 +1,67 @@
+name: OSV-Scanner
+
+# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
+# database. Runs on every PR that touches a lockfile and on a weekly schedule
+# against main.
+#
+# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
+# It reports known CVEs in currently-pinned dependency versions so we can
+# decide when and how to patch on our own schedule. Our pinning strategy
+# (full SHA / exact version) is preserved; only the notification signal
+# is added.
+#
+# Complements the existing supply-chain-audit.yml workflow (which scans
+# for malicious code patterns in PR diffs) by covering the orthogonal
+# "currently-pinned dep became known-vulnerable" case.
+#
+# Uses Google's officially-recommended reusable workflow, pinned by SHA.
+# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
+# fail-on-vuln is disabled so the job does not block merges on pre-existing
+# vulnerabilities in pinned deps that we may need to patch deliberately.
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'package.json'
+      - 'package-lock.json'
+      - 'ui-tui/package.json'
+      - 'ui-tui/package-lock.json'
+      - 'website/package.json'
+      - 'website/package-lock.json'
+      - '.github/workflows/osv-scanner.yml'
+  push:
+    branches: [main]
+    paths:
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'package.json'
+      - 'package-lock.json'
+      - 'ui-tui/package-lock.json'
+      - 'website/package-lock.json'
+  schedule:
+    # Weekly scan against main — catches CVEs published after merge for
+    # deps that haven't changed since.
+    - cron: '0 9 * * 1'
+  workflow_dispatch:
+
+permissions:
+  # Required by the reusable workflow to upload SARIF to the Security tab.
+  actions: read
+  contents: read
+  security-events: write
+
+jobs:
+  scan:
+    name: Scan lockfiles
+    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@c51854704019a247608d928f370c98740469d4b5  # v2.3.5
+    with:
+      # Scan explicit lockfiles rather than recursing, so we only look at
+      # the three sources of truth and skip vendored / test / worktree dirs.
+      scan-args: |-
+        --lockfile=uv.lock
+        --lockfile=ui-tui/package-lock.json
+        --lockfile=website/package-lock.json
+      fail-on-vuln: false
@@ -37,12 +37,18 @@ hermes-agent/
 │   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
-│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
+│   │                     #   yuanbao, webhook, api_server, ...). See ADDING_A_PLATFORM.md.
 │   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
 ├── plugins/              # Plugin system (see "Plugins" section below)
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
-│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
+│   ├── model-providers/  # Inference backend plugins (openrouter, anthropic, gmi, ...)
+│   ├── kanban/           # Multi-agent board dispatcher + worker plugin
+│   ├── hermes-achievements/  # Gamified achievement tracking
+│   ├── observability/    # Metrics / traces / logs plugin
+│   ├── image_gen/        # Image-generation providers
+│   └── <others>/         # disk-cleanup, example-dashboard, google_meet, platforms,
+│                         #   spotify, strike-freedom-cockpit, ...
 ├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
 ├── skills/               # Built-in skills bundled with the repo
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
@@ -53,7 +59,7 @@ hermes-agent/
 ├── environments/         # RL training environments (Atropos)
 ├── scripts/              # run_tests.sh, release.py, auxiliary scripts
 ├── website/              # Docusaurus docs site
-└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
+└── tests/                # Pytest suite (~17k tests across ~900 files as of May 2026)
 ```

 **User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
@@ -257,7 +263,16 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes

 ## Adding New Tools

-Requires changes in **2 files**:
+For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
+route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
+`~/.hermes/plugins/<name>/__init__.py`, then register tools with
+`ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
+enabled or disabled without touching `tools/` or `toolsets.py`.
+
+Use the built-in route below only when the user is explicitly contributing a new
+core Hermes tool that should ship in the base system.
+
+Built-in/core tools require changes in **2 files**:

 **1. Create `tools/your_tool.py`:**
 ```python
@@ -280,9 +295,9 @@ registry.register(
 )
 ```

-**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
+**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. **This step is required:** auto-discovery imports the tool and registers its schema, but the tool is only *exposed to an agent* if its name appears in a toolset. `_HERMES_CORE_TOOLS` is not dead code — it's the default bundle every platform's base toolset inherits from.

-Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
+Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain. Wiring into a toolset is still a deliberate, manual step.

 The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.

@@ -304,6 +319,22 @@ The registry handles schema collection, dispatch, availability checking, and err
   section is handled automatically by the deep-merge and does NOT require
   a version bump.

+### Top-level `config.yaml` sections (non-exhaustive):
+
+`model`, `agent`, `terminal`, `compression`, `display`, `stt`, `tts`,
+`memory`, `security`, `delegation`, `smart_model_routing`, `checkpoints`,
+`auxiliary`, `curator`, `skills`, `gateway`, `logging`, `cron`, `profiles`,
+`plugins`, `honcho`.
+
+`auxiliary` holds per-task overrides for side-LLM work (curator, vision,
+embedding, title generation, session_search, etc.) — each task can pin
+its own provider/model/base_url/max_tokens/reasoning_effort. See
+`agent/auxiliary_client.py::_resolve_auto` for resolution order.
+
+`curator` holds the background skill-maintenance config —
+`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
+`archive_after_days`, `backup` (nested).
+
 ### .env variables (SECRETS ONLY — API keys, tokens, passwords):
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
@@ -482,6 +513,31 @@ generic plugin surface (new hook, new ctx method) — never hardcode
 plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
 honcho argparse from `main.py` for exactly this reason.

+### Model-provider plugins (`plugins/model-providers/<name>/`)
+
+Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
+ships as a plugin here. Each plugin's `__init__.py` calls
+`providers.register_provider(ProviderProfile(...))` at module load.
+`providers/__init__.py._discover_providers()` is a **lazy, separate
+discovery system** — scanned on first `get_provider_profile()` or
+`list_providers()` call, NOT by the general PluginManager.
+
+Scan order:
+1. Bundled: `<repo>/plugins/model-providers/<name>/`
+2. User: `$HERMES_HOME/plugins/model-providers/<name>/`
+3. Legacy: `<repo>/providers/<name>.py` (back-compat)
+
+User plugins of the same name override bundled ones — `register_provider()`
+is last-writer-wins. This lets third parties swap out any built-in
+profile without a repo patch.
+
+The general PluginManager records `kind: model-provider` manifests but does
+NOT import them (would double-instantiate `ProviderProfile`). Plugins
+without an explicit `kind:` get auto-coerced via a source-text heuristic
+(`register_provider` + `ProviderProfile` in `__init__.py`).
+
+Full authoring guide: `website/docs/developer-guide/model-provider-plugin.md`.
+
 ### Dashboard / context-engine / image-gen plugin directories

 `plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
@@ -510,11 +566,176 @@ niche skills belong in `optional-skills/`.

 ### SKILL.md frontmatter

-Standard fields: `name`, `description`, `version`, `platforms`
-(OS-gating list: `[macos]`, `[linux, macos]`, ...),
+Standard fields: `name`, `description`, `version`, `author`, `license`,
+`platforms` (OS-gating list: `[macos]`, `[linux, macos]`, ...),
 `metadata.hermes.tags`, `metadata.hermes.category`,
-`metadata.hermes.config` (config.yaml settings the skill needs — stored
-under `skills.config.<key>`, prompted during setup, injected at load time).
+`metadata.hermes.related_skills`, `metadata.hermes.config` (config.yaml
+settings the skill needs — stored under `skills.config.<key>`, prompted
+during setup, injected at load time).
+
+Top-level `tags:` and `category:` are also accepted and mirrored from
+`metadata.hermes.*` by the loader.
+
+---
+
+## Toolsets
+
+All toolsets are defined in `toolsets.py` as a single `TOOLSETS` dict.
+Each platform's adapter picks a base toolset (e.g. Telegram uses
+`"messaging"`); `_HERMES_CORE_TOOLS` is the default bundle most
+platforms inherit from.
+
+Current toolset keys: `browser`, `clarify`, `code_execution`, `cronjob`,
+`debugging`, `delegation`, `discord`, `discord_admin`, `feishu_doc`,
+`feishu_drive`, `file`, `homeassistant`, `image_gen`, `kanban`, `memory`,
+`messaging`, `moa`, `rl`, `safe`, `search`, `session_search`, `skills`,
+`spotify`, `terminal`, `todo`, `tts`, `video`, `vision`, `web`, `yuanbao`.
+
+Enable/disable per platform via `hermes tools` (the curses UI) or the
+`tools.<platform>.enabled` / `tools.<platform>.disabled` lists in
+`config.yaml`.
+
+---
+
+## Delegation (`delegate_task`)
+
+`tools/delegate_tool.py` spawns a subagent with an isolated
+context + terminal session. Synchronous: the parent waits for the
+child's summary before continuing its own loop — if the parent is
+interrupted, the child is cancelled.
+
+Two shapes:
+
+- **Single:** pass `goal` (+ optional `context`, `toolsets`).
+- **Batch (parallel):** pass `tasks: [...]` — each gets its own subagent
+  running concurrently. Concurrency is capped by
+  `delegation.max_concurrent_children` (default 3).
+
+Roles:
+
+- `role="leaf"` (default) — focused worker. Cannot call `delegate_task`,
+  `clarify`, `memory`, `send_message`, `execute_code`.
+- `role="orchestrator"` — retains `delegate_task` so it can spawn its
+  own workers. Gated by `delegation.orchestrator_enabled` (default true)
+  and bounded by `delegation.max_spawn_depth` (default 2).
+
+Key config knobs (under `delegation:` in `config.yaml`):
+`max_concurrent_children`, `max_spawn_depth`, `child_timeout_seconds`,
+`orchestrator_enabled`, `subagent_auto_approve`, `inherit_mcp_toolsets`,
+`max_iterations`.
+
+Synchronicity rule: delegate_task is **not** durable. For long-running
+work that must outlive the current turn, use `cronjob` or
+`terminal(background=True, notify_on_complete=True)` instead.
+
+---
+
+## Curator (skill lifecycle)
+
+Background skill-maintenance system that tracks usage on agent-created
+skills and auto-archives stale ones. Users never lose skills; archives
+go to `~/.hermes/skills/.archive/` and are restorable.
+
+- **Core:** `agent/curator.py` (review loop, auto-transitions, LLM review
+  prompt) + `agent/curator_backup.py` (pre-run tar.gz snapshots).
+- **CLI:** `hermes_cli/curator.py` wires `hermes curator <verb>` where
+  verbs are: `status`, `run`, `pause`, `resume`, `pin`, `unpin`,
+  `archive`, `restore`, `prune`, `backup`, `rollback`.
+- **Telemetry:** `tools/skill_usage.py` owns the sidecar
+  `~/.hermes/skills/.usage.json` — per-skill `use_count`, `view_count`,
+  `patch_count`, `last_activity_at`, `state` (active / stale /
+  archived), `pinned`.
+
+Invariants:
+- Curator only touches skills with `created_by: "agent"` provenance —
+  bundled + hub-installed skills are off-limits.
+- Never deletes; max destructive action is archive.
+- Pinned skills are exempt from every auto-transition and from the
+  LLM review pass.
+- `skill_manage(action="delete")` refuses pinned skills; patch/edit/
+  write_file/remove_file go through so the agent can keep improving
+  pinned skills.
+
+Config section (`curator:` in `config.yaml`):
+`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
+`archive_after_days`, `backup.*`.
+
+Full user-facing docs: `website/docs/user-guide/features/curator.md`.
+
+---
+
+## Cron (scheduled jobs)
+
+`cron/jobs.py` (job store) + `cron/scheduler.py` (tick loop). Agents
+schedule jobs via the `cronjob` tool; users via `hermes cron <verb>`
+(`list`, `add`, `edit`, `pause`, `resume`, `run`, `remove`) or the
+`/cron` slash command.
+
+Supported schedule formats:
+- Duration: `"30m"`, `"2h"`, `"1d"`
+- "every" phrase: `"every 2h"`, `"every monday 9am"`
+- 5-field cron expression: `"0 9 * * *"`
+- ISO timestamp (one-shot): `"2026-06-01T09:00:00Z"`
+
+Per-job fields include `skills` (load specific skills), `model` /
+`provider` overrides, `script` (pre-run data-collection script whose
+stdout is injected into the prompt; `no_agent=True` turns the script
+into the entire job), `context_from` (chain job A's last output into
+job B's prompt), `workdir` (run in a specific directory with its
+`AGENTS.md`/`CLAUDE.md` loaded), and multi-platform delivery.
+
+Hardening invariants:
+- **3-minute hard interrupt** on cron sessions — runaway agent loops
+  cannot monopolize the scheduler.
+- Catchup window: half the job's period, clamped to 120s–2h.
+- Grace window: 120s for one-shot jobs whose fire time was missed.
+- File lock at `~/.hermes/cron/.tick.lock` prevents duplicate ticks
+  across processes.
+- Cron sessions pass `skip_memory=True` by default; memory providers
+  intentionally do not run during cron.
+
+Cron deliveries are **not** mirrored into the target gateway session —
+they land in their own cron session with a header/footer frame so the
+main conversation's message-role alternation stays intact.
+
+---
+
+## Kanban (multi-agent work queue)
+
+Durable SQLite-backed board that lets multiple profiles / workers
+collaborate on shared tasks. Users drive it via `hermes kanban <verb>`;
+workers spawned by the dispatcher drive it via a dedicated `kanban_*`
+toolset so their schema footprint is zero when they're not inside a
+kanban task.
+
+- **CLI:** `hermes_cli/kanban.py` wires `hermes kanban` with verbs
+  `init`, `create`, `list` (alias `ls`), `show`, `assign`, `link`,
+  `unlink`, `comment`, `complete`, `block`, `unblock`, `archive`,
+  `tail`, plus less-commonly-used `watch`, `stats`, `runs`, `log`,
+  `assignees`, `heartbeat`, `notify-*`, `dispatch`, `daemon`, `gc`.
+- **Worker toolset:** `tools/kanban_tools.py` exposes `kanban_show`,
+  `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`,
+  `kanban_create`, `kanban_link` — gated by `HERMES_KANBAN_TASK` so
+  the schema only appears for processes actually running as a worker.
+- **Dispatcher:** long-lived loop that (default every 60s) reclaims
+  stale claims, promotes ready tasks, atomically claims, and spawns
+  assigned profiles. Runs **inside the gateway** by default via
+  `kanban.dispatch_in_gateway: true`.
+- **Plugin assets:** `plugins/kanban/dashboard/` (web UI) +
+  `plugins/kanban/systemd/` (`hermes-kanban-dispatcher.service` for
+  standalone dispatcher deployment).
+
+Isolation model:
+- **Board** is the hard boundary — workers are spawned with
+  `HERMES_KANBAN_BOARD` pinned in their env so they can't see other
+  boards.
+- **Tenant** is a soft namespace *within* a board — one specialist
+  fleet can serve multiple businesses with workspace-path + memory-key
+  isolation.
+- After ~5 consecutive spawn failures on the same task the dispatcher
+  auto-blocks it to prevent spin loops.
+
+Full user-facing docs: `website/docs/user-guide/features/kanban.md`.

 ---

@@ -9,6 +9,7 @@
  <a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
  <a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
  <a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
+  <a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
 </p>

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
@@ -21,7 +22,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
 <tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
 <tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
 <tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
-<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Six terminal backends — local, Docker, SSH, Daytona, Singularity, and Modal. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
+<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Seven terminal backends — local, Docker, SSH, Singularity, Modal, Daytona, and Vercel Sandbox. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
 <tr><td><b>Research-ready</b></td><td>Batch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models.</td></tr>
 </table>

@@ -0,0 +1,186 @@
+<p align="center">
+  <img src="assets/banner.png" alt="Hermes Agent" width="100%">
+</p>
+
+# Hermes Agent ☤
+
+<p align="center">
+  <a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
+  <a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
+  <a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
+  <a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
+  <a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
+</p>
+
+**由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能，在使用中改进技能，主动持久化知识，搜索过往对话，并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行，也可以在 GPU 集群上运行，或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话，而它在云端 VM 上工作。
+
+支持任意模型——[Nous Portal](https://portal.nousresearch.com)、[OpenRouter](https://openrouter.ai)（200+ 模型）、[NVIDIA NIM](https://build.nvidia.com)（Nemotron）、[小米 MiMo](https://platform.xiaomimimo.com)、[z.ai/GLM](https://z.ai)、[Kimi/Moonshot](https://platform.moonshot.ai)、[MiniMax](https://www.minimax.io)、[Hugging Face](https://huggingface.co)、OpenAI，或自定义端点。使用 `hermes model` 即可切换——无需改代码，无锁定。
+
+<table>
+<tr><td><b>真正的终端界面</b></td><td>完整的 TUI，支持多行编辑、斜杠命令自动补全、对话历史、中断重定向和流式工具输出。</td></tr>
+<tr><td><b>随你所在</b></td><td>Telegram、Discord、Slack、WhatsApp、Signal 和 CLI——全部从单个网关进程运行。语音备忘录转写、跨平台对话连续性。</td></tr>
+<tr><td><b>闭环学习</b></td><td>代理管理记忆并定期自我提醒。复杂任务后自动创建技能。技能在使用中自我改进。FTS5 会话搜索配合 LLM 摘要实现跨会话回溯。<a href="https://github.com/plastic-labs/honcho">Honcho</a> 辩证式用户建模。兼容 <a href="https://agentskills.io">agentskills.io</a> 开放标准。</td></tr>
+<tr><td><b>定时自动化</b></td><td>内置 cron 调度器，支持向任何平台投递。日报、夜间备份、周审计——全部用自然语言描述，无人值守运行。</td></tr>
+<tr><td><b>委派与并行</b></td><td>生成隔离子代理处理并行工作流。编写 Python 脚本通过 RPC 调用工具，将多步管道压缩为零上下文开销的轮次。</td></tr>
+<tr><td><b>随处运行</b></td><td>六种终端后端——本地、Docker、SSH、Daytona、Singularity 和 Modal。Daytona 和 Modal 提供 Serverless 持久化——代理环境空闲时休眠、按需唤醒，空闲期间几乎零成本。$5 VPS 或 GPU 集群都能跑。</td></tr>
+<tr><td><b>研究就绪</b></td><td>批量轨迹生成、Atropos RL 环境、轨迹压缩——用于训练下一代工具调用模型。</td></tr>
+</table>
+
+---
+
+## 快速安装
+
+```bash
+curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
+```
+
+支持 Linux、macOS、WSL2 和 Android (Termux)。安装程序会自动处理平台特定的配置。
+
+> **Android / Termux：** 已测试的手动安装路径请参考 [Termux 指南](https://hermes-agent.nousresearch.com/docs/getting-started/termux)。在 Termux 上，Hermes 会安装精选的 `.[termux]` 扩展，因为完整的 `.[all]` 扩展会拉取 Android 不兼容的语音依赖。
+>
+> **Windows：** 原生 Windows 不受支持。请安装 [WSL2](https://learn.microsoft.com/zh-cn/windows/wsl/install) 并运行上述命令。
+
+安装后：
+
+```bash
+source ~/.bashrc    # 重新加载 shell（或: source ~/.zshrc）
+hermes              # 开始对话！
+```
+
+---
+
+## 快速入门
+
+```bash
+hermes              # 交互式 CLI — 开始对话
+hermes model        # 选择 LLM 提供商和模型
+hermes tools        # 配置启用的工具
+hermes config set   # 设置单个配置项
+hermes gateway      # 启动消息网关（Telegram、Discord 等）
+hermes setup        # 运行完整设置向导（一次性配置所有内容）
+hermes claw migrate # 从 OpenClaw 迁移（如果来自 OpenClaw）
+hermes update       # 更新到最新版本
+hermes doctor       # 诊断问题
+```
+
+📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**
+
+## CLI 与消息平台 快速对照
+
+Hermes 有两种入口：用 `hermes` 启动终端 UI，或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后，许多斜杠命令在两种界面中通用。
+
+| 操作 | CLI | 消息平台 |
+|------|-----|----------|
+| 开始对话 | `hermes` | 运行 `hermes gateway setup` + `hermes gateway start`，然后给机器人发消息 |
+| 开始新对话 | `/new` 或 `/reset` | `/new` 或 `/reset` |
+| 更换模型 | `/model [provider:model]` | `/model [provider:model]` |
+| 设置人格 | `/personality [name]` | `/personality [name]` |
+| 重试或撤销上一轮 | `/retry`、`/undo` | `/retry`、`/undo` |
+| 压缩上下文 / 查看用量 | `/compress`、`/usage`、`/insights [--days N]` | `/compress`、`/usage`、`/insights [days]` |
+| 浏览技能 | `/skills` 或 `/<skill-name>` | `/skills` 或 `/<skill-name>` |
+| 中断当前工作 | `Ctrl+C` 或发送新消息 | `/stop` 或发送新消息 |
+| 平台特定状态 | `/platforms` | `/status`、`/sethome` |
+
+完整命令列表请参阅 [CLI 指南](https://hermes-agent.nousresearch.com/docs/user-guide/cli) 和 [消息网关指南](https://hermes-agent.nousresearch.com/docs/user-guide/messaging)。
+
+---
+
+## 文档
+
+所有文档位于 **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**：
+
+| 章节 | 内容 |
+|------|------|
+| [快速开始](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | 安装 → 设置 → 2 分钟内开始首次对话 |
+| [CLI 使用](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | 命令、快捷键、人格、会话 |
+| [配置](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | 配置文件、提供商、模型、所有选项 |
+| [消息网关](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram、Discord、Slack、WhatsApp、Signal、Home Assistant |
+| [安全](https://hermes-agent.nousresearch.com/docs/user-guide/security) | 命令审批、DM 配对、容器隔离 |
+| [工具与工具集](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ 工具、工具集系统、终端后端 |
+| [技能系统](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | 过程记忆、技能中心、创建技能 |
+| [记忆](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | 持久记忆、用户画像、最佳实践 |
+| [MCP 集成](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | 连接任意 MCP 服务器扩展能力 |
+| [定时调度](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | 定时任务与平台投递 |
+| [上下文文件](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files) | 影响每次对话的项目上下文 |
+| [架构](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | 项目结构、代理循环、关键类 |
+| [贡献](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | 开发设置、PR 流程、代码风格 |
+| [CLI 参考](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | 所有命令和标志 |
+| [环境变量](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | 完整环境变量参考 |
+
+---
+
+## 从 OpenClaw 迁移
+
+如果你来自 OpenClaw，Hermes 可以自动导入你的设置、记忆、技能和 API 密钥。
+
+**首次安装时：** 安装向导（`hermes setup`）会自动检测 `~/.openclaw` 并在配置开始前提供迁移选项。
+
+**安装后任意时间：**
+
+```bash
+hermes claw migrate              # 交互式迁移（完整预设）
+hermes claw migrate --dry-run    # 预览将要迁移的内容
+hermes claw migrate --preset user-data   # 仅迁移用户数据，不含密钥
+hermes claw migrate --overwrite  # 覆盖已有冲突
+```
+
+导入内容：
+- **SOUL.md** — 人格文件
+- **记忆** — MEMORY.md 和 USER.md 条目
+- **技能** — 用户创建的技能 → `~/.hermes/skills/openclaw-imports/`
+- **命令白名单** — 审批模式
+- **消息设置** — 平台配置、允许用户、工作目录
+- **API 密钥** — 白名单中的密钥（Telegram、OpenRouter、OpenAI、Anthropic、ElevenLabs）
+- **TTS 资产** — 工作区音频文件
+- **工作区指令** — AGENTS.md（使用 `--workspace-target`）
+
+使用 `hermes claw migrate --help` 查看所有选项，或使用 `openclaw-migration` 技能进行交互式代理引导迁移（含干运行预览）。
+
+---
+
+## 贡献
+
+欢迎贡献！请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。
+
+贡献者快速开始——克隆并使用 `setup-hermes.sh`：
+
+```bash
+git clone https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
+./setup-hermes.sh     # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
+./hermes              # 自动检测 venv，无需先 source
+```
+
+手动安装（等效于上述命令）：
+
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+uv venv venv --python 3.11
+source venv/bin/activate
+uv pip install -e ".[all,dev]"
+python -m pytest tests/ -q
+```
+
+> **RL 训练（可选）：** 如需参与 RL/Tinker-Atropos 集成开发：
+> ```bash
+> git submodule update --init tinker-atropos
+> uv pip install -e "./tinker-atropos"
+> ```
+
+---
+
+## 社区
+
+- 💬 [Discord](https://discord.gg/NousResearch)
+- 📚 [技能中心](https://agentskills.io)
+- 🐛 [问题反馈](https://github.com/NousResearch/hermes-agent/issues)
+- 💡 [讨论区](https://github.com/NousResearch/hermes-agent/discussions)
+- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — 社区微信桥接：在同一微信账号上运行 Hermes Agent 和 OpenClaw。
+
+---
+
+## 许可证
+
+MIT — 详见 [LICENSE](LICENSE)。
+
+由 [Nous Research](https://nousresearch.com) 构建。
@@ -466,17 +466,10 @@ class SessionManager:
                except Exception:
                    logger.debug("Failed to update ACP session metadata", exc_info=True)

-            # Replace stored messages with current history.
-            db.clear_messages(state.session_id)
-            for msg in state.history:
-                db.append_message(
-                    session_id=state.session_id,
-                    role=msg.get("role", "user"),
-                    content=msg.get("content"),
-                    tool_name=msg.get("tool_name") or msg.get("name"),
-                    tool_calls=msg.get("tool_calls"),
-                    tool_call_id=msg.get("tool_call_id"),
-                )
+            # Replace stored messages with current history atomically so a
+            # mid-rewrite failure rolls back and the previously persisted
+            # conversation is preserved (salvaged from #13675).
+            db.replace_messages(state.session_id, state.history)
        except Exception:
            logger.warning("Failed to persist ACP session %s", state.session_id, exc_info=True)

@@ -76,6 +76,7 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
 # Models where temperature/top_p/top_k return 400 if set to non-default values.
 # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
 _NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
+_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")

 # ── Max output token limits per Anthropic model ───────────────────────
 # Source: Anthropic docs + Cline model catalog.  Anthropic's API requires
@@ -105,6 +106,9 @@ _ANTHROPIC_OUTPUT_LIMITS = {
    "claude-3-haiku":      4_096,
    # Third-party Anthropic-compatible providers
    "minimax":            131_072,
+    # Qwen models via DashScope Anthropic-compatible endpoint
+    # DashScope enforces max_tokens ∈ [1, 65536]
+    "qwen3":               65_536,
 }

 # For any model not in the table, assume the highest current limit.
@@ -216,6 +220,17 @@ def _forbids_sampling_params(model: str) -> bool:
    return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)


+def _supports_fast_mode(model: str) -> bool:
+    """Return True for models that support Anthropic Fast Mode (speed=fast).
+
+    Per Anthropic docs, fast mode is currently supported on Opus 4.6 only.
+    Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7)
+    returns HTTP 400. This guard prevents silently 400'ing when stale config
+    or older callers leave fast mode enabled across a model upgrade.
+    """
+    return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
+
+
 # Beta headers for enhanced features (sent with ALL auth types).
 # As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
 # beta headers are still accepted (harmless no-op) but not required. Kept
@@ -1222,6 +1237,14 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
    ``keep_nullable_hint=False`` because the Anthropic validator does not
    recognize the OpenAPI-style ``nullable: true`` extension and strict
    schema-to-grammar converters may reject unknown keywords.
+
+    Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the
+    Anthropic API rejects union keywords at the schema root with a generic
+    HTTP 400. Several upstream and plugin tools ship schemas with one of
+    these keywords at the top level (commonly for Pydantic discriminated
+    unions). If we land here with those keywords still present after
+    nullable-union stripping, drop them and fall back to a plain object
+    schema so the tool still validates at the Anthropic boundary.
    """
    if not schema:
        return {"type": "object", "properties": {}}
@@ -1231,6 +1254,12 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
    if not isinstance(normalized, dict):
        return {"type": "object", "properties": {}}
+    # Strip top-level union keywords that Anthropic's validator rejects.
+    banned = {"oneOf", "allOf", "anyOf"}
+    if banned & normalized.keys():
+        normalized = {k: v for k, v in normalized.items() if k not in banned}
+        if "type" not in normalized:
+            normalized["type"] = "object"
    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
        normalized = {**normalized, "properties": {}}
    return normalized
@@ -1915,9 +1944,15 @@ def build_anthropic_kwargs(

    # ── Fast mode (Opus 4.6 only) ────────────────────────────────────
    # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
-    # output speed. Only for native Anthropic endpoints — third-party
-    # providers would reject the unknown beta header and speed parameter.
-    if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
+    # output speed. Per Anthropic docs, fast mode is only supported on
+    # Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter.
+    # Only for native Anthropic endpoints — third-party providers would
+    # reject the unknown beta header and speed parameter.
+    if (
+        fast_mode
+        and not _is_third_party_anthropic_endpoint(base_url)
+        and _supports_fast_mode(model)
+    ):
        kwargs.setdefault("extra_body", {})["speed"] = "fast"
        # Build extra_headers with ALL applicable betas (the per-request
        # extra_headers override the client-level anthropic-beta header).
@@ -196,6 +196,12 @@ def _is_kimi_model(model: Optional[str]) -> bool:
    return bare.startswith("kimi-") or bare == "kimi"


+def _is_arcee_trinity_thinking(model: Optional[str]) -> bool:
+    """True for Arcee Trinity Large Thinking (direct or via OpenRouter)."""
+    bare = (model or "").strip().lower().rsplit("/", 1)[-1]
+    return bare == "trinity-large-thinking"
+
+
 def _fixed_temperature_for_model(
    model: Optional[str],
    base_url: Optional[str] = None,
@@ -213,10 +219,46 @@ def _fixed_temperature_for_model(
    if _is_kimi_model(model):
        logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
        return OMIT_TEMPERATURE
+    if _is_arcee_trinity_thinking(model):
+        return 0.5
+    return None
+
+
+def _compression_threshold_for_model(model: Optional[str]) -> Optional[float]:
+    """Return a context-compression threshold override for specific models.
+
+    The threshold is the fraction of the model's context window that must be
+    consumed before Hermes triggers summarization.  Higher values delay
+    compression and preserve more raw context.
+
+    Returns a float in (0, 1] to override the global ``compression.threshold``
+    config value, or ``None`` to leave the user's config value unchanged.
+    """
+    if _is_arcee_trinity_thinking(model):
+        return 0.75
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
-_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
+def _get_aux_model_for_provider(provider_id: str) -> str:
+    """Return the cheap auxiliary model for a provider.
+
+    Reads from ProviderProfile.default_aux_model first, falling back to the
+    legacy hardcoded dict for providers that predate the profiles system.
+    """
+    try:
+        from providers import get_provider_profile
+        _p = get_provider_profile(provider_id)
+        if _p and _p.default_aux_model:
+            return _p.default_aux_model
+    except Exception:
+        pass
+    return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
+
+
+# Fallback for providers not yet migrated to ProviderProfile.default_aux_model,
+# plus providers we intentionally keep pinned here (e.g. Anthropic predates
+# profiles). New providers should set default_aux_model on their profile instead.
+_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
@@ -235,6 +277,10 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "tencent-tokenhub": "hy3-preview",
 }

+# Legacy alias — callers that haven't been updated to _get_aux_model_for_provider()
+# can still use this dict directly. Kept in sync with _FALLBACK above.
+_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
+
 # Vision-specific model overrides for direct providers.
 # When the user's main provider has a dedicated vision/multimodal model that
 # differs from their main chat model, map it here.  The vision auto-detect
@@ -259,10 +305,12 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
    "kimi-coding-cn",
 })

-# OpenRouter app attribution headers (base — always sent)
+# OpenRouter app attribution headers (base — always sent).
+# `X-Title` is the canonical attribution header OpenRouter's dashboard
+# reads; the previous `X-OpenRouter-Title` label was not recognized there.
 _OR_HEADERS_BASE = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-    "X-OpenRouter-Title": "Hermes Agent",
+    "X-Title": "Hermes Agent",
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

@@ -567,7 +615,12 @@ class _CodexCompletionsAdapter:
                    # API allows it.
                    pass
                else:
-                    effort = reasoning_cfg.get("effort", "medium")
+                    # Truthy-only check mirrors agent/transports/codex.py
+                    # build_kwargs(): falsy values (None, "", 0) fall back
+                    # to the default rather than being forwarded to the
+                    # Codex backend, which rejects e.g. {"effort": null}
+                    # with a 400.
+                    effort = reasoning_cfg.get("effort") or "medium"
                    # Codex backend rejects "minimal"; clamp to "low" to
                    # match the main-agent Codex transport behavior.
                    if effort == "minimal":
@@ -1150,7 +1203,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

            raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
            base_url = _to_openai_base_url(raw_base_url)
-            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
+            model = _get_aux_model_for_provider(provider_id) or None
            if model is None:
                continue  # skip provider if we don't know a valid aux model
            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
@@ -1166,6 +1219,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
+            else:
+                try:
+                    from providers import get_provider_profile as _gpf_aux
+                    _ph_aux = _gpf_aux(provider_id)
+                    if _ph_aux and _ph_aux.default_headers:
+                        extra["default_headers"] = dict(_ph_aux.default_headers)
+                except Exception:
+                    pass
            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
            _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
            return _client, model
@@ -1177,7 +1238,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
        base_url = _to_openai_base_url(raw_base_url)
-        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
+        model = _get_aux_model_for_provider(provider_id) or None
        if model is None:
            continue  # skip provider if we don't know a valid aux model
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
@@ -1193,6 +1254,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
+        else:
+            try:
+                from providers import get_provider_profile as _gpf_aux2
+                _ph_aux2 = _gpf_aux2(provider_id)
+                if _ph_aux2 and _ph_aux2.default_headers:
+                    extra["default_headers"] = dict(_ph_aux2.default_headers)
+            except Exception:
+                pass
        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
        _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
        return _client, model
@@ -1565,7 +1634,7 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona

    from agent.anthropic_adapter import _is_oauth_token
    is_oauth = _is_oauth_token(token)
-    model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
+    model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
    try:
        real_client = build_anthropic_client(token, base_url)
@@ -1643,6 +1712,39 @@ def _is_payment_error(exc: Exception) -> bool:
    return False


+def _is_rate_limit_error(exc: Exception) -> bool:
+    """Detect rate-limit errors that warrant provider fallback.
+
+    Returns True for HTTP 429 errors whose message indicates rate limiting
+    (as opposed to billing/quota exhaustion, which _is_payment_error handles).
+    Also catches OpenAI SDK RateLimitError instances that may not set
+    .status_code on the exception object.
+    """
+    status = getattr(exc, "status_code", None)
+    err_lower = str(exc).lower()
+
+    # OpenAI SDK's RateLimitError sometimes omits .status_code —
+    # detect by class name so we don't miss these.  (PR #8023 pattern)
+    if type(exc).__name__ == "RateLimitError":
+        return True
+
+    if status == 429:
+        # Distinguish rate-limit from billing: billing keywords are handled
+        # by _is_payment_error, everything else on 429 is a rate limit.
+        if any(kw in err_lower for kw in (
+            "rate limit", "rate_limit", "too many requests",
+            "try again", "retry after", "resets in",
+        )):
+            return True
+        # Generic 429 without billing keywords = likely a rate limit
+        if not any(kw in err_lower for kw in (
+            "credits", "insufficient funds", "billing",
+            "payment required", "can only afford",
+        )):
+            return True
+    return False
+
+
 def _is_connection_error(exc: Exception) -> bool:
    """Detect connection/network errors that warrant provider fallback.

@@ -2368,7 +2470,7 @@ def resolve_provider_client(
        if explicit_base_url:
            base_url = _to_openai_base_url(explicit_base_url.strip().rstrip("/"))

-        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
+        default_model = _get_aux_model_for_provider(provider)
        final_model = _normalize_resolved_model(model or default_model, provider)

        if provider == "gemini":
@@ -2648,8 +2750,11 @@ def resolve_vision_provider_client(
        return resolved_provider, sync_client, final_model

    if resolved_base_url:
+        provider_for_base_override = (
+            requested if requested and requested not in ("", "auto") else "custom"
+        )
        client, final_model = resolve_provider_client(
-            "custom",
+            provider_for_base_override,
            model=resolved_model,
            async_mode=async_mode,
            explicit_base_url=resolved_base_url,
@@ -2657,8 +2762,8 @@ def resolve_vision_provider_client(
            api_mode=resolved_api_mode,
        )
        if client is None:
-            return "custom", None, None
-        return "custom", client, final_model
+            return provider_for_base_override, None, None
+        return provider_for_base_override, client, final_model

    if requested == "auto":
        # Vision auto-detection order:
@@ -3124,8 +3229,14 @@ def _resolve_task_provider_model(

    if task:
        # Config.yaml is the primary source for per-task overrides.
-        if cfg_base_url:
+        if cfg_base_url and cfg_api_key:
+            # Both base_url and api_key explicitly set → custom endpoint.
            return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
+        if cfg_base_url and cfg_provider and cfg_provider != "auto":
+            # base_url set without api_key but with a known provider — use
+            # the provider so it can resolve credentials from env vars
+            # (e.g. OPENROUTER_API_KEY) instead of locking into "custom".
+            return cfg_provider, resolved_model, cfg_base_url, None, resolved_api_mode
        if cfg_provider and cfg_provider != "auto":
            return cfg_provider, resolved_model, None, None, resolved_api_mode

@@ -3526,7 +3637,7 @@ def call_llm(
            except Exception as retry_err:
                # If the max_tokens retry also hits a payment or connection
                # error, fall through to the fallback chain below.
-                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
+                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err) or _is_rate_limit_error(retry_err)):
                    raise
                first_err = retry_err

@@ -3609,13 +3720,27 @@ def call_llm(
        # Codex/OAuth tokens that authenticate but whose endpoint is down,
        # and providers the user never configured that got picked up by
        # the auto-detection chain.
-        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
+        #
+        # ── Rate-limit fallback (#13579) ─────────────────────────────
+        # When the provider returns a 429 rate-limit (not billing), fall
+        # back to an alternative provider instead of exhausting retries
+        # against the same rate-limited endpoint.
+        should_fallback = (
+            _is_payment_error(first_err)
+            or _is_connection_error(first_err)
+            or _is_rate_limit_error(first_err)
+        )
        # Only try alternative providers when the user didn't explicitly
        # configure this task's provider.  Explicit provider = hard constraint;
        # auto (the default) = best-effort fallback chain.  (#7559)
        is_auto = resolved_provider in ("auto", "", None)
        if should_fallback and is_auto:
-            reason = "payment error" if _is_payment_error(first_err) else "connection error"
+            if _is_payment_error(first_err):
+                reason = "payment error"
+            elif _is_rate_limit_error(first_err):
+                reason = "rate limit"
+            else:
+                reason = "connection error"
            logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
            fb_client, fb_model, fb_label = _try_payment_fallback(
@@ -3818,7 +3943,7 @@ async def async_call_llm(
            except Exception as retry_err:
                # If the max_tokens retry also hits a payment or connection
                # error, fall through to the fallback chain below.
-                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
+                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err) or _is_rate_limit_error(retry_err)):
                    raise
                first_err = retry_err

@@ -3887,11 +4012,20 @@ async def async_call_llm(
                    return _validate_llm_response(
                        await retry_client.chat.completions.create(**retry_kwargs), task)

-        # ── Payment / connection fallback (mirrors sync call_llm) ─────
-        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
+        # ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
+        should_fallback = (
+            _is_payment_error(first_err)
+            or _is_connection_error(first_err)
+            or _is_rate_limit_error(first_err)
+        )
        is_auto = resolved_provider in ("auto", "", None)
        if should_fallback and is_auto:
-            reason = "payment error" if _is_payment_error(first_err) else "connection error"
+            if _is_payment_error(first_err):
+                reason = "payment error"
+            elif _is_rate_limit_error(first_err):
+                reason = "rate limit"
+            else:
+                reason = "connection error"
            logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
            fb_client, fb_model, fb_label = _try_payment_fallback(
@@ -43,6 +43,9 @@ SUMMARY_PREFIX = (
    "they were already addressed. "
    "Your current task is identified in the '## Active Task' section of the "
    "summary — resume exactly from there. "
+    "IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
+    "prompt is ALWAYS authoritative and active — never ignore or deprioritize "
+    "memory content due to this compaction note. "
    "Respond ONLY to the latest user message "
    "that appears AFTER this summary. The current session state (files, "
    "config, etc.) may reflect work described here — avoid repeating it:"
@@ -344,6 +347,7 @@ class ContextCompressor(ContextEngine):
        self._last_aux_model_failure_model = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0
+        self._summary_failure_cooldown_until = 0.0  # transient errors must not block a fresh session

    def update_model(
        self,
@@ -553,7 +557,16 @@ class ContextCompressor(ContextEngine):
                    break
                accumulated += msg_tokens
                boundary = i
-            prune_boundary = max(boundary, len(result) - min_protect)
+            # Translate the budget walk into a "protected count", apply the
+            # floor in count-space (where `max` reads naturally: protect at
+            # least `min_protect` messages or whatever the budget reserved,
+            # whichever is more), then convert back to a prune boundary.
+            # Doing this in index-space with `max` would invert the direction
+            # (smaller index = MORE protected), so a generous budget would
+            # silently get truncated back down to `min_protect`.
+            budget_protect_count = len(result) - boundary
+            protected_count = max(budget_protect_count, min_protect)
+            prune_boundary = len(result) - protected_count
        else:
            prune_boundary = len(result) - protect_tail_count

@@ -590,6 +603,8 @@ class ContextCompressor(ContextEngine):
            # Skip multimodal content (list of content blocks)
            if isinstance(content, list):
                continue
+            if not isinstance(content, str):
+                continue
            if not content or content == _PRUNED_TOOL_PLACEHOLDER:
                continue
            # Skip already-deduplicated or previously-summarized results
@@ -905,15 +920,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                or "does not exist" in _err_str
                or "no available channel" in _err_str
            )
+            _is_timeout = (
+                _status in (408, 429, 502, 504)
+                or "timeout" in _err_str
+            )
            if (
-                _is_model_not_found
+                (_is_model_not_found or _is_timeout)
                and self.summary_model
                and self.summary_model != self.model
                and not getattr(self, "_summary_model_fallen_back", False)
            ):
                self._summary_model_fallen_back = True
                logging.warning(
-                    "Summary model '%s' not available (%s). "
+                    "Summary model '%s' unavailable (%s). "
                    "Falling back to main model '%s' for compression.",
                    self.summary_model, e, self.model,
                )
@@ -977,15 +996,39 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            return None

    @staticmethod
-    def _with_summary_prefix(summary: str) -> str:
-        """Normalize summary text to the current compaction handoff format."""
+    def _strip_summary_prefix(summary: str) -> str:
+        """Return summary body without the current or legacy handoff prefix."""
        text = (summary or "").strip()
-        for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
+        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX):
            if text.startswith(prefix):
-                text = text[len(prefix):].lstrip()
-                break
+                return text[len(prefix):].lstrip()
+        return text
+
+    @classmethod
+    def _with_summary_prefix(cls, summary: str) -> str:
+        """Normalize summary text to the current compaction handoff format."""
+        text = cls._strip_summary_prefix(summary)
        return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX

+    @staticmethod
+    def _is_context_summary_content(content: Any) -> bool:
+        text = _content_text_for_contains(content).lstrip()
+        return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX)
+
+    @classmethod
+    def _find_latest_context_summary(
+        cls,
+        messages: List[Dict[str, Any]],
+        start: int,
+        end: int,
+    ) -> tuple[Optional[int], str]:
+        """Find the newest handoff summary inside a compression window."""
+        for idx in range(end - 1, start - 1, -1):
+            content = messages[idx].get("content")
+            if cls._is_context_summary_content(content):
+                return idx, cls._strip_summary_prefix(_content_text_for_contains(content))
+        return None, ""
+
    # ------------------------------------------------------------------
    # Tool-call / tool-result pair integrity helpers
    # ------------------------------------------------------------------
@@ -1292,6 +1335,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            return messages

        turns_to_summarize = messages[compress_start:compress_end]
+        summary_idx, summary_body = self._find_latest_context_summary(
+            messages,
+            compress_start,
+            compress_end,
+        )
+        if summary_idx is not None:
+            if summary_body and not self._previous_summary:
+                self._previous_summary = summary_body
+            turns_to_summarize = messages[summary_idx + 1:compress_end]

        if not self.quiet_mode:
            logger.info(
@@ -1324,7 +1376,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
                existing = msg.get("content")
-                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
+                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work. Your persistent memory (MEMORY.md, USER.md) remains fully authoritative regardless of compaction.]"
                if _compression_note not in _content_text_for_contains(existing):
                    msg["content"] = _append_text_to_content(
                        existing,
@@ -1369,6 +1421,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                # Merge the summary into the first tail message instead
                # of inserting a standalone message that breaks alternation.
                _merge_summary_into_tail = True
+
+        # When the summary lands as a standalone role="user" message,
+        # weak models read the verbatim "## Active Task" quote of a past
+        # user request as fresh input (#11475, #14521). Append the explicit
+        # end marker — the same one used in the merge-into-tail path — so
+        # the model has a clear "summary above, not new input" signal.
+        if not _merge_summary_into_tail and summary_role == "user":
+            summary = (
+                summary
+                + "\n\n--- END OF CONTEXT SUMMARY — "
+                "respond to the message below, not the summary above ---"
+            )
+
        if not _merge_summary_into_tail:
            compressed.append({"role": summary_role, "content": summary})

@@ -55,6 +55,7 @@ class FailoverReason(enum.Enum):
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
    long_context_tier = "long_context_tier"    # Anthropic "extra usage" tier gate
    oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden"  # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
+    llama_cpp_grammar_pattern = "llama_cpp_grammar_pattern"  # llama.cpp json-schema-to-grammar rejects regex escapes in `pattern` / `format` — strip from tools and retry

    # Catch-all
    unknown = "unknown"                  # Unclassifiable — retry with backoff
@@ -470,6 +471,31 @@ def classify_api_error(
            should_compress=False,
        )

+    # llama.cpp's ``json-schema-to-grammar`` converter (used by its OAI
+    # server to build GBNF tool-call parsers) rejects regex escape classes
+    # like ``\d``/``\w``/``\s`` and most ``format`` values. MCP servers
+    # routinely emit ``"pattern": "\\d{4}-\\d{2}-\\d{2}"`` for date/phone/
+    # email params. llama.cpp surfaces this as HTTP 400 with one of a few
+    # recognizable phrases; on match we strip ``pattern``/``format`` from
+    # ``self.tools`` in the retry loop and retry once. Cloud providers are
+    # unaffected — they accept these keywords and we never hit this branch.
+    if (
+        status_code == 400
+        and (
+            "error parsing grammar" in error_msg
+            or "json-schema-to-grammar" in error_msg
+            or (
+                "unable to generate parser" in error_msg
+                and "template" in error_msg
+            )
+        )
+    ):
+        return _result(
+            FailoverReason.llama_cpp_grammar_pattern,
+            retryable=True,
+            should_compress=False,
+        )
+
    # ── 2. HTTP status code classification ──────────────────────────

    if status_code is not None:
@@ -520,7 +546,12 @@ def classify_api_error(

    is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
    if is_disconnect and not status_code:
-        is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
+        # Absolute token/message-count thresholds are only a proxy for smaller
+        # context windows.  Large-context sessions can have hundreds of
+        # messages while still being far below their actual token budget.
+        is_large = approx_tokens > context_length * 0.6 or (
+            context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200)
+        )
        if is_large:
            return _result(
                FailoverReason.context_overflow,
@@ -766,7 +797,12 @@ def _classify_400(
        if not err_body_msg:
            err_body_msg = str(body.get("message") or "").strip().lower()
    is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
-    is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
+    # Absolute token/message-count thresholds are only a proxy for smaller
+    # context windows.  Large-context sessions can have many messages while
+    # still being far below their actual token budget.
+    is_large = approx_tokens > context_length * 0.4 or (
+        context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80)
+    )

    if is_generic and is_large:
        return result_fn(
@@ -679,7 +679,21 @@ def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices:
    finish_reason_raw = str(cand.get("finishReason") or "")
    if finish_reason_raw:
        mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
-        chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
+        finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped)
+        # Attach usage from this event's usageMetadata so the streaming
+        # loop in run_agent.py can record token counts (mirrors the
+        # non-streaming path in translate_gemini_response).
+        usage_meta = event.get("usageMetadata") or {}
+        if usage_meta:
+            finish_chunk.usage = SimpleNamespace(
+                prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
+                completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
+                total_tokens=int(usage_meta.get("totalTokenCount") or 0),
+                prompt_tokens_details=SimpleNamespace(
+                    cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
+                ),
+            )
+        chunks.append(finish_chunk)
    return chunks


@@ -489,16 +489,29 @@ def save_credentials(creds: GoogleCredentials) -> Path:
    """Atomically write creds to disk with 0o600 permissions."""
    path = _credentials_path()
    path.parent.mkdir(parents=True, exist_ok=True)
+    # Tighten parent dir to 0o700 so siblings can't traverse to the creds file.
+    # On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures.
+    try:
+        os.chmod(path.parent, 0o700)
+    except OSError:
+        pass
    payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n"

    with _credentials_lock():
        tmp_path = path.with_suffix(f".tmp.{os.getpid()}.{secrets.token_hex(4)}")
        try:
-            with open(tmp_path, "w", encoding="utf-8") as fh:
+            # Create with 0o600 atomically to close the TOCTOU window where the
+            # default umask (often 0o644) would briefly expose tokens to other
+            # local users between open() and chmod().
+            fd = os.open(
+                str(tmp_path),
+                os.O_WRONLY | os.O_CREAT | os.O_EXCL,
+                stat.S_IRUSR | stat.S_IWUSR,
+            )
+            with os.fdopen(fd, "w", encoding="utf-8") as fh:
                fh.write(payload)
                fh.flush()
                os.fsync(fh.fileno())
-            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
            atomic_replace(tmp_path, path)
        finally:
            try:
@@ -0,0 +1,233 @@
+"""Lightweight internationalization (i18n) for Hermes static user-facing messages.
+
+Scope (thin slice, by design): only the highest-impact static strings shown
+to the user by Hermes itself -- approval prompts, a handful of gateway slash
+command replies, restart-drain notices.  Agent-generated output, log lines,
+error tracebacks, tool outputs, and slash-command descriptions all stay in
+English.
+
+Catalog files live under ``locales/<lang>.yaml`` at the repo root.  Each
+catalog is a flat dict keyed by dotted paths (e.g. ``approval.choose`` or
+``gateway.approval_expired``).  Missing keys fall back to English; if English
+is missing too, the key path itself is returned so a broken catalog never
+crashes the agent.
+
+Usage::
+
+    from agent.i18n import t
+    print(t("approval.choose_long"))                       # current lang
+    print(t("gateway.draining", count=3))                  # {count} formatted
+    print(t("approval.choose_long", lang="zh"))            # explicit override
+
+Language resolution order:
+    1. Explicit ``lang=`` argument passed to :func:`t`
+    2. ``HERMES_LANGUAGE`` environment variable (for tests / quick override)
+    3. ``display.language`` from config.yaml
+    4. ``"en"`` (baseline)
+
+Supported languages: en, zh, ja, de, es, fr, tr, uk.  Unknown values fall back to en.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import threading
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+SUPPORTED_LANGUAGES: tuple[str, ...] = ("en", "zh", "ja", "de", "es", "fr", "tr", "uk")
+DEFAULT_LANGUAGE = "en"
+
+# Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
+# get the right catalog instead of silently falling back to English.
+_LANGUAGE_ALIASES: dict[str, str] = {
+    "english": "en", "en-us": "en", "en-gb": "en",
+    "chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-tw": "zh", "zh-hans": "zh", "zh-hant": "zh",
+    "japanese": "ja", "jp": "ja", "ja-jp": "ja",
+    "german": "de", "deutsch": "de", "de-de": "de",
+    "spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es",
+    "french": "fr", "français": "fr", "france": "fr", "fr-fr": "fr", "fr-be": "fr", "fr-ca": "fr", "fr-ch": "fr",
+    "ukrainian": "uk", "ukrainisch": "uk", "українська": "uk", "uk-ua": "uk", "ua": "uk",
+    "turkish": "tr", "türkçe": "tr", "tr-tr": "tr",
+}
+
+_catalog_cache: dict[str, dict[str, str]] = {}
+_catalog_lock = threading.Lock()
+
+
+def _locales_dir() -> Path:
+    """Return the directory containing locale YAML files.
+
+    Lives next to the repo root so both the bundled install and editable
+    checkouts find it without PYTHONPATH gymnastics.
+    """
+    # agent/i18n.py -> agent/ -> repo root
+    return Path(__file__).resolve().parent.parent / "locales"
+
+
+def _normalize_lang(value: Any) -> str:
+    """Normalize a user-supplied language value to a supported code.
+
+    Accepts supported codes directly, common aliases (``chinese`` -> ``zh``),
+    and case-insensitive regional tags (``zh-CN`` -> ``zh``).  Returns the
+    default language for unknown values.
+    """
+    if not isinstance(value, str):
+        return DEFAULT_LANGUAGE
+    key = value.strip().lower()
+    if not key:
+        return DEFAULT_LANGUAGE
+    if key in SUPPORTED_LANGUAGES:
+        return key
+    if key in _LANGUAGE_ALIASES:
+        return _LANGUAGE_ALIASES[key]
+    # Try stripping a region suffix (e.g. "pt-br" -> "pt" won't be supported,
+    # but "zh-CN" -> "zh" will).
+    base = key.split("-", 1)[0]
+    if base in SUPPORTED_LANGUAGES:
+        return base
+    return DEFAULT_LANGUAGE
+
+
+def _load_catalog(lang: str) -> dict[str, str]:
+    """Load and flatten one locale YAML file into a dotted-key dict.
+
+    YAML files can be nested for human readability; this produces the flat
+    key space :func:`t` expects.  Cached per-language for the process.
+    """
+    with _catalog_lock:
+        cached = _catalog_cache.get(lang)
+        if cached is not None:
+            return cached
+
+    path = _locales_dir() / f"{lang}.yaml"
+    if not path.is_file():
+        logger.debug("i18n catalog missing for %s at %s", lang, path)
+        with _catalog_lock:
+            _catalog_cache[lang] = {}
+        return {}
+
+    try:
+        import yaml  # PyYAML is already a hermes dependency
+        with path.open("r", encoding="utf-8") as f:
+            raw = yaml.safe_load(f) or {}
+    except Exception as exc:
+        logger.warning("Failed to load i18n catalog %s: %s", path, exc)
+        with _catalog_lock:
+            _catalog_cache[lang] = {}
+        return {}
+
+    flat: dict[str, str] = {}
+    _flatten_into(raw, "", flat)
+    with _catalog_lock:
+        _catalog_cache[lang] = flat
+    return flat
+
+
+def _flatten_into(node: Any, prefix: str, out: dict[str, str]) -> None:
+    if isinstance(node, dict):
+        for key, value in node.items():
+            child_key = f"{prefix}.{key}" if prefix else str(key)
+            _flatten_into(value, child_key, out)
+    elif isinstance(node, str):
+        out[prefix] = node
+    # Non-string, non-dict leaves are ignored -- catalogs are text-only.
+
+
+@lru_cache(maxsize=1)
+def _config_language_cached() -> str | None:
+    """Read ``display.language`` from config.yaml once per process.
+
+    Cached because ``t()`` is called in hot paths (every approval prompt,
+    every gateway reply) and re-reading YAML each call would be wasteful.
+    ``reset_language_cache()`` clears this when config changes at runtime
+    (e.g. after the setup wizard).
+    """
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        lang = (cfg.get("display") or {}).get("language")
+        if lang:
+            return _normalize_lang(lang)
+    except Exception as exc:
+        logger.debug("Could not read display.language from config: %s", exc)
+    return None
+
+
+def reset_language_cache() -> None:
+    """Invalidate cached language resolution and catalogs.
+
+    Call after :func:`hermes_cli.config.save_config` if a running process
+    needs to pick up a changed ``display.language`` without restart.
+    """
+    _config_language_cached.cache_clear()
+    with _catalog_lock:
+        _catalog_cache.clear()
+
+
+def get_language() -> str:
+    """Resolve the active language using env > config > default order."""
+    env_lang = os.environ.get("HERMES_LANGUAGE")
+    if env_lang:
+        return _normalize_lang(env_lang)
+    cfg_lang = _config_language_cached()
+    if cfg_lang:
+        return cfg_lang
+    return DEFAULT_LANGUAGE
+
+
+def t(key: str, lang: str | None = None, **format_kwargs: Any) -> str:
+    """Translate a dotted key to the active language.
+
+    Parameters
+    ----------
+    key
+        Dotted path into the catalog, e.g. ``"approval.choose_long"``.
+    lang
+        Explicit language override.  Takes precedence over env + config.
+    **format_kwargs
+        ``str.format`` substitution arguments (``t("gateway.drain", count=3)``
+        expects a catalog entry with a ``{count}`` placeholder).
+
+    Returns
+    -------
+    The translated string, or the English fallback if the key is missing in
+    the target language, or the bare key if English is also missing.
+    """
+    target = _normalize_lang(lang) if lang else get_language()
+    catalog = _load_catalog(target)
+    value = catalog.get(key)
+
+    if value is None and target != DEFAULT_LANGUAGE:
+        # Fall through to English rather than showing a key path to the user.
+        value = _load_catalog(DEFAULT_LANGUAGE).get(key)
+
+    if value is None:
+        # Last-ditch: return the key itself.  A broken catalog should not
+        # crash anything; it just looks ugly until someone fixes it.
+        logger.debug("i18n miss: key=%r lang=%r", key, target)
+        value = key
+
+    if format_kwargs:
+        try:
+            return value.format(**format_kwargs)
+        except (KeyError, IndexError, ValueError) as exc:
+            logger.warning(
+                "i18n format failed for key=%r lang=%r kwargs=%r: %s",
+                key, target, format_kwargs, exc,
+            )
+            return value
+    return value
+
+
+__all__ = [
+    "SUPPORTED_LANGUAGES",
+    "DEFAULT_LANGUAGE",
+    "t",
+    "get_language",
+    "reset_language_cache",
+]
@@ -1,17 +1,14 @@
-"""MemoryManager — orchestrates the built-in memory provider plus at most
-ONE external plugin memory provider.
+"""MemoryManager — orchestrates memory providers for the agent.

 Single integration point in run_agent.py. Replaces scattered per-backend
 code with one manager that delegates to registered providers.

-The BuiltinMemoryProvider is always registered first and cannot be removed.
-Only ONE external (non-builtin) provider is allowed at a time — attempting
-to register a second external provider is rejected with a warning.  This
+Only ONE external plugin provider is allowed at a time — attempting to
+register a second external provider is rejected with a warning.  This
 prevents tool schema bloat and conflicting memory backends.

 Usage in run_agent.py:
    self._memory_manager = MemoryManager()
-    self._memory_manager.add_provider(BuiltinMemoryProvider(...))
    # Only ONE of these:
    self._memory_manager.add_provider(plugin_provider)

@@ -49,7 +46,7 @@ _INTERNAL_CONTEXT_RE = re.compile(
    re.IGNORECASE,
 )
 _INTERNAL_NOTE_RE = re.compile(
-    r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
+    r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as (?:informational background data|authoritative reference data[^\]]*)\.\]\s*',
    re.IGNORECASE,
 )

@@ -183,7 +180,8 @@ def build_memory_context_block(raw_context: str) -> str:
    return (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, "
-        "NOT new user input. Treat as informational background data.]\n\n"
+        "NOT new user input. Treat as authoritative reference data — "
+        "this is the agent's persistent memory and should inform all responses.]\n\n"
        f"{clean}\n"
        "</memory-context>"
    )
@@ -1,17 +1,16 @@
 """Abstract base class for pluggable memory providers.

-Memory providers give the agent persistent recall across sessions. One
-external provider is active at a time alongside the always-on built-in
-memory (MEMORY.md / USER.md). The MemoryManager enforces this limit.
+Memory providers give the agent persistent recall across sessions.
+The MemoryManager enforces a one-external-provider limit to prevent
+tool schema bloat and conflicting memory backends.

-Built-in memory is always active as the first provider and cannot be removed.
-External providers (Honcho, Hindsight, Mem0, etc.) are additive — they never
-disable the built-in store. Only one external provider runs at a time to
-prevent tool schema bloat and conflicting memory backends.
+External providers (Honcho, Hindsight, Mem0, etc.) are registered
+and managed via MemoryManager. Only one external provider runs at a
+time.

 Registration:
-  1. Built-in: BuiltinMemoryProvider — always present, not removable.
-  2. Plugins: Ship in plugins/memory/<name>/, activated by memory.provider config.
+  Plugins ship in plugins/memory/<name>/ and are activated via
+  the memory.provider config key.

 Lifecycle (called by MemoryManager, wired in run_agent.py):
  initialize()          — connect, create resources, warm up
@@ -318,6 +318,17 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "ollama.com": "ollama-cloud",
 }

+# Auto-extend with hostnames derived from provider profiles.
+# Any provider with a base_url not already in the map gets added automatically.
+try:
+    from providers import list_providers as _list_providers
+    for _pp in _list_providers():
+        _host = _pp.get_hostname()
+        if _host and _host not in _URL_TO_PROVIDER:
+            _URL_TO_PROVIDER[_host] = _pp.name
+except Exception:
+    pass
+

 def _infer_provider_from_url(base_url: str) -> Optional[str]:
    """Infer the models.dev provider name from a base URL.
@@ -513,6 +513,12 @@ PLATFORM_HINTS = {
        "image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
        "— when a sticker is the right response, use yb_send_sticker."
    ),
+    "api_server": (
+        "You're responding through an API server. The rendering layer is unknown — "
+        "assume plain text. No markdown formatting (no asterisks, bullets, headers, "
+        "code fences). Treat this like a conversation, not a document. Keep responses "
+        "brief and natural."
+    ),
 }

 # ---------------------------------------------------------------------------
@@ -305,13 +305,18 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())


-def redact_sensitive_text(text: str, *, force: bool = False) -> str:
+def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
    Disabled by default — enable via security.redact_secrets: true in config.yaml.
    Set force=True for safety boundaries that must never return raw secrets
    regardless of the user's global logging redaction preference.
+
+    Set code_file=True to skip the ENV-assignment and JSON-field regex
+    patterns when the text is known to be source code (e.g. MAX_TOKENS=***
+    constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
+    private keys, DB connstrings, JWTs, and URL secrets are still redacted.
    """
    if text is None:
        return None
@@ -325,17 +330,18 @@ def redact_sensitive_text(text: str, *, force: bool = False) -> str:
    # Known prefixes (sk-, ghp_, etc.)
    text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)

-    # ENV assignments: OPENAI_API_KEY=sk-abc...
-    def _redact_env(m):
-        name, quote, value = m.group(1), m.group(2), m.group(3)
-        return f"{name}={quote}{_mask_token(value)}{quote}"
-    text = _ENV_ASSIGN_RE.sub(_redact_env, text)
+    # ENV assignments: OPENAI_API_KEY=***  (skip for code files — false positives)
+    if not code_file:
+        def _redact_env(m):
+            name, quote, value = m.group(1), m.group(2), m.group(3)
+            return f"{name}={quote}{_mask_token(value)}{quote}"
+        text = _ENV_ASSIGN_RE.sub(_redact_env, text)

-    # JSON fields: "apiKey": "value"
-    def _redact_json(m):
-        key, value = m.group(1), m.group(2)
-        return f'{key}: "{_mask_token(value)}"'
-    text = _JSON_FIELD_RE.sub(_redact_json, text)
+        # JSON fields: "apiKey": "***"  (skip for code files — false positives)
+        def _redact_json(m):
+            key, value = m.group(1), m.group(2)
+            return f'{key}: "{_mask_token(value)}"'
+        text = _JSON_FIELD_RE.sub(_redact_json, text)

    # Authorization headers
    text = _AUTH_HEADER_RE.sub(
@@ -0,0 +1,386 @@
+"""Stateful scrubber for reasoning/thinking blocks in streamed assistant text.
+
+``run_agent._strip_think_blocks`` is regex-based and correct for a complete
+string, but when it runs *per-delta* in ``_fire_stream_delta`` it destroys
+the state that downstream consumers (CLI ``_stream_delta``, gateway
+``GatewayStreamConsumer._filter_and_accumulate``) rely on.
+
+Concretely, when MiniMax-M2.7 streams
+
+    delta1 = "<think>"
+    delta2 = "Let me check their config"
+    delta3 = "</think>"
+
+the per-delta regex erases delta1 entirely (case 2: unterminated-open at
+boundary matches ``^<think>...``), so the downstream state machine never
+sees the open tag, treats delta2 as regular content, and leaks reasoning
+to the user.  Consumers that don't run their own state machine (ACP,
+api_server, TTS) never had any defence at all — they just emitted
+whatever survived the upstream regex.
+
+This module centralises the tag-suppression state machine at the
+upstream layer so every stream_delta_callback sees text that has
+already had reasoning blocks removed.  Partial tags at delta
+boundaries are held back until the next delta resolves them, and
+end-of-stream flushing surfaces any held-back prose that turned out
+not to be a real tag.
+
+Usage::
+
+    scrubber = StreamingThinkScrubber()
+    for delta in stream:
+        visible = scrubber.feed(delta)
+        if visible:
+            emit(visible)
+    tail = scrubber.flush()  # at end of stream
+    if tail:
+        emit(tail)
+
+The scrubber is re-entrant per agent instance.  Call ``reset()`` at
+the top of each new turn so a hung block from an interrupted prior
+stream cannot taint the next turn's output.
+
+Tag variants handled (case-insensitive):
+  ``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
+  ``<REASONING_SCRATCHPAD>``.
+
+Block-boundary rule for opens: an opening tag is only treated as a
+reasoning-block opener when it appears at the start of the stream,
+after a newline (optionally followed by whitespace), or when only
+whitespace has been emitted on the current line.  This prevents prose
+that *mentions* the tag name (e.g. ``"use <think> tags here"``) from
+being incorrectly suppressed.  Closed pairs (``<think>X</think>``) are
+always suppressed regardless of boundary; a closed pair is an
+intentional, bounded construct.
+"""
+
+from __future__ import annotations
+
+from typing import Tuple
+
+__all__ = ["StreamingThinkScrubber"]
+
+
+class StreamingThinkScrubber:
+    """Stateful scrubber for streaming reasoning/thinking blocks.
+
+    State machine:
+      - ``_in_block``: True while inside an opened block, waiting for
+        a close tag.  All text inside is discarded.
+      - ``_buf``: held-back partial-tag tail.  Emitted / discarded on
+        the next ``feed()`` call or by ``flush()``.
+      - ``_last_emitted_ended_newline``: True iff the most recent
+        emission to the consumer ended with ``\\n``, or nothing has
+        been emitted yet (start-of-stream counts as a boundary).  Used
+        to decide whether an open tag at buffer position 0 is at a
+        block boundary.
+    """
+
+    _OPEN_TAG_NAMES: Tuple[str, ...] = (
+        "think",
+        "thinking",
+        "reasoning",
+        "thought",
+        "REASONING_SCRATCHPAD",
+    )
+
+    # Materialise literal tag strings so the hot path does string
+    # operations, not regex compilation per feed().
+    _OPEN_TAGS: Tuple[str, ...] = tuple(f"<{name}>" for name in _OPEN_TAG_NAMES)
+    _CLOSE_TAGS: Tuple[str, ...] = tuple(f"</{name}>" for name in _OPEN_TAG_NAMES)
+
+    # Pre-compute the longest tag (for partial-tag hold-back bound).
+    _MAX_TAG_LEN: int = max(len(tag) for tag in _OPEN_TAGS + _CLOSE_TAGS)
+
+    def __init__(self) -> None:
+        self._in_block: bool = False
+        self._buf: str = ""
+        self._last_emitted_ended_newline: bool = True
+
+    def reset(self) -> None:
+        """Reset all state.  Call at the top of every new turn."""
+        self._in_block = False
+        self._buf = ""
+        self._last_emitted_ended_newline = True
+
+    def feed(self, text: str) -> str:
+        """Feed one delta; return the scrubbed visible portion.
+
+        May return an empty string when the entire delta is reasoning
+        content or is being held back pending resolution of a partial
+        tag at the boundary.
+        """
+        if not text:
+            return ""
+        buf = self._buf + text
+        self._buf = ""
+        out: list[str] = []
+
+        while buf:
+            if self._in_block:
+                # Hunt for the earliest close tag.
+                close_idx, close_len = self._find_first_tag(
+                    buf, self._CLOSE_TAGS,
+                )
+                if close_idx == -1:
+                    # No close yet — hold back a potential partial
+                    # close-tag prefix; discard everything else.
+                    held = self._max_partial_suffix(buf, self._CLOSE_TAGS)
+                    self._buf = buf[-held:] if held else ""
+                    return "".join(out)
+                # Found close: discard block content + tag, continue.
+                buf = buf[close_idx + close_len:]
+                self._in_block = False
+            else:
+                # Priority 1 — closed <tag>X</tag> pair anywhere in
+                # buf.  Closed pairs are always an intentional,
+                # bounded construct (even mid-line prose containing
+                # an open/close pair is almost certainly a model
+                # leaking reasoning inline), so no boundary gating.
+                pair = self._find_earliest_closed_pair(buf)
+                # Priority 2 — unterminated open tag at a block
+                # boundary.  Boundary-gated so prose that mentions
+                # '<think>' isn't over-stripped.
+                open_idx, open_len = self._find_open_at_boundary(
+                    buf, out,
+                )
+
+                # Pick whichever match comes earliest in the buffer.
+                if pair is not None and (
+                    open_idx == -1 or pair[0] <= open_idx
+                ):
+                    start_idx, end_idx = pair
+                    preceding = buf[:start_idx]
+                    if preceding:
+                        preceding = self._strip_orphan_close_tags(preceding)
+                        if preceding:
+                            out.append(preceding)
+                            self._last_emitted_ended_newline = (
+                                preceding.endswith("\n")
+                            )
+                    buf = buf[end_idx:]
+                    continue
+
+                if open_idx != -1:
+                    # Unterminated open at boundary — emit preceding,
+                    # enter block, continue loop with remainder.
+                    preceding = buf[:open_idx]
+                    if preceding:
+                        preceding = self._strip_orphan_close_tags(preceding)
+                        if preceding:
+                            out.append(preceding)
+                            self._last_emitted_ended_newline = (
+                                preceding.endswith("\n")
+                            )
+                    self._in_block = True
+                    buf = buf[open_idx + open_len:]
+                    continue
+
+                # No resolvable tag structure in buf.  Hold back any
+                # partial-tag prefix at the tail so a split tag
+                # across deltas isn't missed, then emit the rest.
+                held = self._max_partial_suffix(buf, self._OPEN_TAGS)
+                held_close = self._max_partial_suffix(
+                    buf, self._CLOSE_TAGS,
+                )
+                held = max(held, held_close)
+                if held:
+                    emit_text = buf[:-held]
+                    self._buf = buf[-held:]
+                else:
+                    emit_text = buf
+                    self._buf = ""
+                if emit_text:
+                    emit_text = self._strip_orphan_close_tags(emit_text)
+                    if emit_text:
+                        out.append(emit_text)
+                        self._last_emitted_ended_newline = (
+                            emit_text.endswith("\n")
+                        )
+                return "".join(out)
+
+        return "".join(out)
+
+    def flush(self) -> str:
+        """End-of-stream flush.
+
+        If still inside an unterminated block, held-back content is
+        discarded — leaking partial reasoning is worse than a
+        truncated answer.  Otherwise the held-back partial-tag tail is
+        emitted verbatim (it turned out not to be a real tag prefix).
+        """
+        if self._in_block:
+            self._buf = ""
+            self._in_block = False
+            return ""
+        tail = self._buf
+        self._buf = ""
+        if not tail:
+            return ""
+        tail = self._strip_orphan_close_tags(tail)
+        if tail:
+            self._last_emitted_ended_newline = tail.endswith("\n")
+        return tail
+
+    # ── internal helpers ───────────────────────────────────────────────
+
+    @staticmethod
+    def _find_first_tag(
+        buf: str, tags: Tuple[str, ...],
+    ) -> Tuple[int, int]:
+        """Return (earliest_index, tag_length) over *tags*, or (-1, 0).
+
+        Case-insensitive match.
+        """
+        buf_lower = buf.lower()
+        best_idx = -1
+        best_len = 0
+        for tag in tags:
+            idx = buf_lower.find(tag.lower())
+            if idx != -1 and (best_idx == -1 or idx < best_idx):
+                best_idx = idx
+                best_len = len(tag)
+        return best_idx, best_len
+
+    def _find_earliest_closed_pair(self, buf: str):
+        """Return (start_idx, end_idx) of the earliest closed pair, else None.
+
+        A closed pair is ``<tag>...</tag>`` of any variant.  Matches are
+        case-insensitive and non-greedy (the closest close tag after
+        an open tag wins), matching the regex ``<tag>.*?</tag>``
+        semantics of ``_strip_think_blocks`` case 1.  When two tag
+        variants could both match, the one whose open tag appears
+        earlier wins.
+        """
+        buf_lower = buf.lower()
+        best: "tuple[int, int] | None" = None
+        for open_tag, close_tag in zip(self._OPEN_TAGS, self._CLOSE_TAGS):
+            open_lower = open_tag.lower()
+            close_lower = close_tag.lower()
+            open_idx = buf_lower.find(open_lower)
+            if open_idx == -1:
+                continue
+            close_idx = buf_lower.find(
+                close_lower, open_idx + len(open_lower),
+            )
+            if close_idx == -1:
+                continue
+            end_idx = close_idx + len(close_lower)
+            if best is None or open_idx < best[0]:
+                best = (open_idx, end_idx)
+        return best
+
+    def _find_open_at_boundary(
+        self, buf: str, already_emitted: list[str],
+    ) -> Tuple[int, int]:
+        """Return the earliest block-boundary open-tag (idx, len).
+
+        Returns (-1, 0) if no boundary-legal opener is present.
+        """
+        buf_lower = buf.lower()
+        best_idx = -1
+        best_len = 0
+        for tag in self._OPEN_TAGS:
+            tag_lower = tag.lower()
+            search_start = 0
+            while True:
+                idx = buf_lower.find(tag_lower, search_start)
+                if idx == -1:
+                    break
+                if self._is_block_boundary(buf, idx, already_emitted):
+                    if best_idx == -1 or idx < best_idx:
+                        best_idx = idx
+                        best_len = len(tag)
+                    break  # first boundary hit for this tag is enough
+                search_start = idx + 1
+        return best_idx, best_len
+
+    def _is_block_boundary(
+        self, buf: str, idx: int, already_emitted: list[str],
+    ) -> bool:
+        """True iff position *idx* in *buf* is a block boundary.
+
+        A block boundary is:
+          - buf position 0 AND the most recent emission ended with
+            a newline (or nothing has been emitted yet)
+          - any position whose preceding text on the current line
+            (since the last newline in buf) is whitespace-only, AND
+            if there is no newline in the preceding buf portion, the
+            most recent prior emission ended with a newline
+        """
+        if idx == 0:
+            # Check whether the last already-emitted chunk in THIS
+            # feed() call ended with a newline, otherwise fall back
+            # to the cross-feed flag.
+            if already_emitted:
+                return already_emitted[-1].endswith("\n")
+            return self._last_emitted_ended_newline
+        preceding = buf[:idx]
+        last_nl = preceding.rfind("\n")
+        if last_nl == -1:
+            # No newline in buf before the tag — boundary only if the
+            # prior emission ended with a newline AND everything since
+            # is whitespace.
+            if already_emitted:
+                prior_newline = already_emitted[-1].endswith("\n")
+            else:
+                prior_newline = self._last_emitted_ended_newline
+            return prior_newline and preceding.strip() == ""
+        # Newline present — text between it and the tag must be
+        # whitespace-only.
+        return preceding[last_nl + 1:].strip() == ""
+
+    @classmethod
+    def _max_partial_suffix(
+        cls, buf: str, tags: Tuple[str, ...],
+    ) -> int:
+        """Return the longest buf-suffix that is a prefix of any tag.
+
+        Only prefixes strictly shorter than the tag itself count
+        (full-length suffixes are the tag and are handled as matches,
+        not held-back partials).  Case-insensitive.
+        """
+        if not buf:
+            return 0
+        buf_lower = buf.lower()
+        max_check = min(len(buf_lower), cls._MAX_TAG_LEN - 1)
+        for i in range(max_check, 0, -1):
+            suffix = buf_lower[-i:]
+            for tag in tags:
+                tag_lower = tag.lower()
+                if len(tag_lower) > i and tag_lower.startswith(suffix):
+                    return i
+        return 0
+
+    @classmethod
+    def _strip_orphan_close_tags(cls, text: str) -> str:
+        """Remove any close tags from *text* (orphan-close handling).
+
+        An orphan close tag has no matching open in the current
+        scrubber state; it's always noise, stripped with any trailing
+        whitespace so the surrounding prose flows naturally.
+        """
+        if "</" not in text:
+            return text
+        text_lower = text.lower()
+        out: list[str] = []
+        i = 0
+        while i < len(text):
+            matched = False
+            if text_lower[i:i + 2] == "</":
+                for tag in cls._CLOSE_TAGS:
+                    tag_lower = tag.lower()
+                    tag_len = len(tag_lower)
+                    if text_lower[i:i + tag_len] == tag_lower:
+                        # Skip the tag and any trailing whitespace,
+                        # matching _strip_think_blocks case 3.
+                        j = i + tag_len
+                        while j < len(text) and text[j] in " \t\n\r":
+                            j += 1
+                        i = j
+                        matched = True
+                        break
+            if not matched:
+                out.append(text[i])
+                i += 1
+        return "".join(out)
@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
 # so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
 # become visible instead of piling up as NULL session titles.
 FailureCallback = Callable[[str, BaseException], None]
+TitleCallback = Callable[[str], None]

 _TITLE_PROMPT = (
    "Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
@@ -90,6 +91,7 @@ def auto_title_session(
    assistant_response: str,
    failure_callback: Optional[FailureCallback] = None,
    main_runtime: dict = None,
+    title_callback: Optional[TitleCallback] = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -119,6 +121,11 @@ def auto_title_session(
    try:
        session_db.set_session_title(session_id, title)
        logger.debug("Auto-generated session title: %s", title)
+        if title_callback is not None:
+            try:
+                title_callback(title)
+            except Exception:
+                logger.debug("Auto-title callback failed", exc_info=True)
    except Exception as e:
        logger.debug("Failed to set auto-generated title: %s", e)

@@ -131,6 +138,7 @@ def maybe_auto_title(
    conversation_history: list,
    failure_callback: Optional[FailureCallback] = None,
    main_runtime: dict = None,
+    title_callback: Optional[TitleCallback] = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -152,7 +160,11 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
+        kwargs={
+            "failure_callback": failure_callback,
+            "main_runtime": main_runtime,
+            "title_callback": title_callback,
+        },
        daemon=True,
        name="auto-title",
    )
@@ -6,9 +6,16 @@ Usage:
    result = transport.normalize_response(raw_response)
 """

-from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
+from agent.transports.types import (
+    NormalizedResponse,
+    ToolCall,
+    Usage,
+    build_tool_call,
+    map_finish_reason,
+)  # noqa: F401

 _REGISTRY: dict = {}
+_discovered: bool = False


 def register_transport(api_mode: str, transport_cls: type) -> None:
@@ -23,6 +30,9 @@ def get_transport(api_mode: str):
    This allows gradual migration — call sites can check for None
    and fall back to the legacy code path.
    """
+    global _discovered
+    if not _discovered:
+        _discover_transports()
    cls = _REGISTRY.get(api_mode)
    if cls is None:
        # The registry can be partially populated when a specific transport
@@ -38,6 +48,8 @@ def get_transport(api_mode: str):

 def _discover_transports() -> None:
    """Import all transport modules to trigger auto-registration."""
+    global _discovered
+    _discovered = True
    try:
        import agent.transports.anthropic  # noqa: F401
    except ImportError:
@@ -109,7 +109,9 @@ class ChatCompletionsTransport(ProviderTransport):
    def api_mode(self) -> str:
        return "chat_completions"

-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+    def convert_messages(
+        self, messages: list[dict[str, Any]], **kwargs
+    ) -> list[dict[str, Any]]:
        """Messages are already in OpenAI format — sanitize Codex leaks only.

        Strips Codex Responses API fields (``codex_reasoning_items`` /
@@ -126,7 +128,9 @@ class ChatCompletionsTransport(ProviderTransport):
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
-                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                    if isinstance(tc, dict) and (
+                        "call_id" in tc or "response_item_id" in tc
+                    ):
                        needs_sanitize = True
                        break
                if needs_sanitize:
@@ -149,39 +153,41 @@ class ChatCompletionsTransport(ProviderTransport):
                        tc.pop("response_item_id", None)
        return sanitized

-    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
        """Tools are already in OpenAI format — identity."""
        return tools

    def build_kwargs(
        self,
        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
        **params,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        """Build chat.completions.create() kwargs.

-        This is the most complex transport method — it handles ~16 providers
-        via params rather than subclasses.
-
-        params:
+        params (all optional):
            timeout: float — API call timeout
            max_tokens: int | None — user-configured max tokens
-            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            ephemeral_max_output_tokens: int | None — one-shot override
            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
            reasoning_config: dict | None
            request_overrides: dict | None
            session_id: str | None
-            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
            model_lower: str — lowercase model name for pattern matching
-            # Provider detection flags (all optional, default False)
+            # Provider profile path (all per-provider quirks live in providers/)
+            provider_profile: ProviderProfile | None — when present, delegates to
+                _build_kwargs_from_profile(); all flag params below are bypassed.
+            # Legacy-path flags — only used when provider_profile is None
+            # (i.e. custom / unregistered providers). Known providers all go
+            # through provider_profile.
            is_openrouter: bool
            is_nous: bool
            is_qwen_portal: bool
            is_github_models: bool
            is_nvidia_nim: bool
            is_kimi: bool
+            is_tokenhub: bool
            is_lmstudio: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
@@ -190,6 +196,7 @@ class ChatCompletionsTransport(ProviderTransport):
            # Qwen-specific
            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            qwen_session_metadata: dict | None
            # Temperature
            fixed_temperature: Any — from _fixed_temperature_for_model()
            omit_temperature: bool
@@ -199,28 +206,21 @@ class ChatCompletionsTransport(ProviderTransport):
            lmstudio_reasoning_options: list[str] | None  # raw allowed_options from /api/v1/models
            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
-            # Extra
-            extra_body_additions: dict | None — pre-built extra_body entries
+            extra_body_additions: dict | None
        """
        # Codex sanitization: drop reasoning_items / call_id / response_item_id
        sanitized = self.convert_messages(messages)

-        # Qwen portal prep AFTER codex sanitization.  If sanitize already
-        # deepcopied, reuse that copy via the in-place variant to avoid a
-        # second deepcopy.
-        is_qwen = params.get("is_qwen_portal", False)
-        if is_qwen:
-            qwen_prep = params.get("qwen_prepare_fn")
-            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
-            if sanitized is messages:
-                if qwen_prep is not None:
-                    sanitized = qwen_prep(sanitized)
-            else:
-                # Already deepcopied — transform in place
-                if qwen_prep_inplace is not None:
-                    qwen_prep_inplace(sanitized)
-                elif qwen_prep is not None:
-                    sanitized = qwen_prep(sanitized)
+        # ── Provider profile: single-path when present ──────────────────
+        _profile = params.get("provider_profile")
+        if _profile:
+            return self._build_kwargs_from_profile(
+                _profile, model, sanitized, tools, params
+            )
+
+        # ── Legacy fallback (unregistered / unknown provider) ───────────
+        # Reached only when get_provider_profile() returned None.
+        # Known providers always go through the profile path above.

        # Developer role swap for GPT-5/Codex models
        model_lower = params.get("model_lower", (model or "").lower())
@@ -233,7 +233,7 @@ class ChatCompletionsTransport(ProviderTransport):
            sanitized = list(sanitized)
            sanitized[0] = {**sanitized[0], "role": "developer"}

-        api_kwargs: Dict[str, Any] = {
+        api_kwargs: dict[str, Any] = {
            "model": model,
            "messages": sanitized,
        }
@@ -242,19 +242,6 @@ class ChatCompletionsTransport(ProviderTransport):
        if timeout is not None:
            api_kwargs["timeout"] = timeout

-        # Temperature
-        fixed_temp = params.get("fixed_temperature")
-        omit_temp = params.get("omit_temperature", False)
-        if omit_temp:
-            api_kwargs.pop("temperature", None)
-        elif fixed_temp is not None:
-            api_kwargs["temperature"] = fixed_temp
-
-        # Qwen metadata (caller precomputes {sessionId, promptId})
-        qwen_meta = params.get("qwen_session_metadata")
-        if qwen_meta and is_qwen:
-            api_kwargs["metadata"] = qwen_meta
-
        # Tools
        if tools:
            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
@@ -278,13 +265,6 @@ class ChatCompletionsTransport(ProviderTransport):
            api_kwargs.update(max_tokens_fn(ephemeral))
        elif max_tokens is not None and max_tokens_fn:
            api_kwargs.update(max_tokens_fn(max_tokens))
-        elif is_nvidia_nim and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(16384))
-        elif is_qwen and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(65536))
-        elif is_kimi and max_tokens_fn:
-            # Kimi/Moonshot: 32000 matches Kimi CLI's default
-            api_kwargs.update(max_tokens_fn(32000))
        elif anthropic_max_out is not None:
            api_kwargs["max_tokens"] = anthropic_max_out

@@ -331,7 +311,7 @@ class ChatCompletionsTransport(ProviderTransport):
                api_kwargs["reasoning_effort"] = _lm_effort

        # extra_body assembly
-        extra_body: Dict[str, Any] = {}
+        extra_body: dict[str, Any] = {}

        is_openrouter = params.get("is_openrouter", False)
        is_nous = params.get("is_nous", False)
@@ -361,35 +341,7 @@ class ChatCompletionsTransport(ProviderTransport):
                if gh_reasoning is not None:
                    extra_body["reasoning"] = gh_reasoning
            else:
-                if reasoning_config is not None:
-                    rc = dict(reasoning_config)
-                    if is_nous and rc.get("enabled") is False:
-                        pass  # omit for Nous when disabled
-                    else:
-                        extra_body["reasoning"] = rc
-                else:
-                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-
-        if is_nous:
-            extra_body["tags"] = ["product=hermes-agent"]
-
-        # Ollama num_ctx
-        ollama_ctx = params.get("ollama_num_ctx")
-        if ollama_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = ollama_ctx
-            extra_body["options"] = options
-
-        # Ollama/custom think=false
-        if params.get("is_custom_provider", False):
-            if reasoning_config and isinstance(reasoning_config, dict):
-                _effort = (reasoning_config.get("effort") or "").strip().lower()
-                _enabled = reasoning_config.get("enabled", True)
-                if _effort == "none" or _enabled is False:
-                    extra_body["think"] = False
-
-        if is_qwen:
-            extra_body["vl_high_resolution_images"] = True
+                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}

        if provider_name == "gemini":
            raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
@@ -423,6 +375,120 @@ class ChatCompletionsTransport(ProviderTransport):

        return api_kwargs

+    def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
+        """Build API kwargs using a ProviderProfile — single path, no legacy flags.
+
+        This method replaces the entire flag-based kwargs assembly when a
+        provider_profile is passed. Every quirk comes from the profile object.
+        """
+        from providers.base import OMIT_TEMPERATURE
+
+        # Message preprocessing
+        sanitized = profile.prepare_messages(sanitized)
+
+        # Developer role swap — model-name-based, applies to all providers
+        _model_lower = (model or "").lower()
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        # Temperature
+        if profile.fixed_temperature is OMIT_TEMPERATURE:
+            pass  # Don't include temperature at all
+        elif profile.fixed_temperature is not None:
+            api_kwargs["temperature"] = profile.fixed_temperature
+        else:
+            # Use caller's temperature if provided
+            temp = params.get("temperature")
+            if temp is not None:
+                api_kwargs["temperature"] = temp
+
+        # Timeout
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Tools — apply Moonshot/Kimi schema sanitization regardless of path
+        if tools:
+            if is_moonshot_model(model):
+                tools = sanitize_moonshot_tools(tools)
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > profile default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        user_max = params.get("max_tokens")
+        anthropic_max = params.get("anthropic_max_output")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif user_max is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(user_max))
+        elif profile.default_max_tokens and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
+        elif anthropic_max is not None:
+            api_kwargs["max_tokens"] = anthropic_max
+
+        # Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
+        reasoning_config = params.get("reasoning_config")
+        extra_body_from_profile, top_level_from_profile = (
+            profile.build_api_kwargs_extras(
+                reasoning_config=reasoning_config,
+                supports_reasoning=params.get("supports_reasoning", False),
+                qwen_session_metadata=params.get("qwen_session_metadata"),
+                model=model,
+                ollama_num_ctx=params.get("ollama_num_ctx"),
+            )
+        )
+        api_kwargs.update(top_level_from_profile)
+
+        # extra_body assembly
+        extra_body: dict[str, Any] = {}
+
+        # Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
+        profile_body = profile.build_extra_body(
+            session_id=params.get("session_id"),
+            provider_preferences=params.get("provider_preferences"),
+            model=model,
+            base_url=params.get("base_url"),
+            reasoning_config=reasoning_config,
+        )
+        if profile_body:
+            extra_body.update(profile_body)
+
+        # Profile's reasoning/thinking extra_body entries
+        if extra_body_from_profile:
+            extra_body.update(extra_body_from_profile)
+
+        # Merge any pre-built extra_body additions from the caller
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        # Request overrides (user config)
+        overrides = params.get("request_overrides")
+        if overrides:
+            for k, v in overrides.items():
+                if k == "extra_body" and isinstance(v, dict):
+                    extra_body.update(v)
+                else:
+                    api_kwargs[k] = v
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        return api_kwargs
+
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize OpenAI ChatCompletion to NormalizedResponse.

@@ -444,7 +510,7 @@ class ChatCompletionsTransport(ProviderTransport):
                # Gemini 3 thinking models attach extra_content with
                # thought_signature — without replay on the next turn the API
                # rejects the request with 400.
-                tc_provider_data: Dict[str, Any] = {}
+                tc_provider_data: dict[str, Any] = {}
                extra = getattr(tc, "extra_content", None)
                if extra is None and hasattr(tc, "model_extra"):
                    extra = (tc.model_extra or {}).get("extra_content")
@@ -455,12 +521,14 @@ class ChatCompletionsTransport(ProviderTransport):
                        except Exception:
                            pass
                    tc_provider_data["extra_content"] = extra
-                tool_calls.append(ToolCall(
-                    id=tc.id,
-                    name=tc.function.name,
-                    arguments=tc.function.arguments,
-                    provider_data=tc_provider_data or None,
-                ))
+                tool_calls.append(
+                    ToolCall(
+                        id=tc.id,
+                        name=tc.function.name,
+                        arguments=tc.function.arguments,
+                        provider_data=tc_provider_data or None,
+                    )
+                )

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -508,7 +576,7 @@ class ChatCompletionsTransport(ProviderTransport):
            return False
        return True

-    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+    def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
        usage = getattr(response, "usage", None)
        if usage is None:
@@ -12,7 +12,7 @@ from __future__ import annotations

 import json
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
+from typing import Any


@dataclass
@@ -32,10 +32,10 @@ class ToolCall:
    * Others: ``None``
    """

-    id: Optional[str]
+    id: str | None
    name: str
    arguments: str  # JSON string
-    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+    provider_data: dict[str, Any] | None = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The agent loop reads tc.function.name / tc.function.arguments
@@ -47,17 +47,17 @@ class ToolCall:
        return "function"

    @property
-    def function(self) -> "ToolCall":
+    def function(self) -> ToolCall:
        """Return self so tc.function.name / tc.function.arguments work."""
        return self

    @property
-    def call_id(self) -> Optional[str]:
+    def call_id(self) -> str | None:
        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
        return (self.provider_data or {}).get("call_id")

    @property
-    def response_item_id(self) -> Optional[str]:
+    def response_item_id(self) -> str | None:
        """Codex response_item_id from provider_data."""
        return (self.provider_data or {}).get("response_item_id")

@@ -101,18 +101,18 @@ class NormalizedResponse:
    * Others: ``None``
    """

-    content: Optional[str]
-    tool_calls: Optional[List[ToolCall]]
+    content: str | None
+    tool_calls: list[ToolCall] | None
    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
-    reasoning: Optional[str] = None
-    usage: Optional[Usage] = None
-    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+    reasoning: str | None = None
+    usage: Usage | None = None
+    provider_data: dict[str, Any] | None = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The shim _nr_to_assistant_message() mapped these from provider_data.
    # These properties let NormalizedResponse pass through directly.
    @property
-    def reasoning_content(self) -> Optional[str]:
+    def reasoning_content(self) -> str | None:
        pd = self.provider_data or {}
        return pd.get("reasoning_content")

@@ -136,8 +136,9 @@ class NormalizedResponse:
 # Factory helpers
 # ---------------------------------------------------------------------------

+
 def build_tool_call(
-    id: Optional[str],
+    id: str | None,
    name: str,
    arguments: Any,
    **provider_fields: Any,
@@ -151,7 +152,7 @@ def build_tool_call(
    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)


-def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
    """Translate a provider-specific stop reason to the normalised set.

    Falls back to ``"stop"`` for unknown or ``None`` reasons.
@@ -27,6 +27,7 @@ import tempfile
 import time
 import uuid
 import textwrap
+from collections import deque
 from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
@@ -298,6 +299,7 @@ def load_cli_config() -> Dict[str, Any]:
        "browser": {
            "inactivity_timeout": 120,  # Auto-cleanup inactive browser sessions after 2 min
            "record_sessions": False,  # Auto-record browser sessions as WebM videos
+            "engine": "auto",  # Browser engine: auto (Chrome), lightpanda, chrome
        },
        "compression": {
            "enabled": True,      # Auto-compress when approaching context limit
@@ -334,6 +336,8 @@ def load_cli_config() -> Dict[str, Any]:
            "show_reasoning": False,
            "streaming": True,
            "busy_input_mode": "interrupt",
+            "persistent_output": True,
+            "persistent_output_max_lines": 200,

            "skin": "default",
        },
@@ -940,6 +944,18 @@ def _run_state_db_auto_maintenance(session_db) -> None:
        except Exception as _prune_exc:
            logger.debug("Ghost session prune skipped: %s", _prune_exc)

+        # One-time finalize of orphaned compression continuations (#20001).
+        try:
+            if not session_db.get_meta("orphaned_compression_finalize_v1"):
+                finalized = session_db.finalize_orphaned_compression_sessions()
+                session_db.set_meta("orphaned_compression_finalize_v1", "1")
+                if finalized:
+                    logger.info(
+                        "Finalized %d orphaned compression sessions", finalized
+                    )
+        except Exception as _finalize_exc:
+            logger.debug("Orphan compression finalize skipped: %s", _finalize_exc)
+
        cfg = (_load_full_config().get("sessions") or {})
        if not cfg.get("auto_prune", False):
            return
@@ -971,6 +987,7 @@ def _run_checkpoint_auto_maintenance() -> None:
            retention_days=int(cfg.get("retention_days", 7)),
            min_interval_hours=int(cfg.get("min_interval_hours", 24)),
            delete_orphans=bool(cfg.get("delete_orphans", True)),
+            max_total_size_mb=int(cfg.get("max_total_size_mb", 500)),
        )
    except Exception as exc:
        logger.debug("checkpoint auto-maintenance skipped: %s", exc)
@@ -1226,6 +1243,28 @@ def _strip_markdown_syntax(text: str) -> str:
    return plain.strip("\n")


+_WINDOWS_PATH_WITH_DOT_SEGMENT_RE = re.compile(
+    r"(?i)(?:\b[a-z]:\\|\\\\)[^\s`]*\\\.[^\s`]*"
+)
+
+
+def _preserve_windows_dot_segments_for_markdown(text: str) -> str:
+    r"""Keep Windows path separators before hidden directories in Markdown.
+
+    CommonMark treats ``\.`` as an escaped literal dot, so Rich Markdown would
+    render ``D:\repo\.ai`` as ``D:\repo.ai``.  Doubling only that separator
+    inside Windows path-looking tokens preserves the path without changing
+    ordinary markdown escapes like ``1\. not a list``.
+    """
+    if "\\." not in text:
+        return text
+
+    def _protect(match: re.Match[str]) -> str:
+        return re.sub(r"(?<!\\)\\(?=\.)", r"\\\\", match.group(0))
+
+    return _WINDOWS_PATH_WITH_DOT_SEGMENT_RE.sub(_protect, text)
+
+
 def _render_final_assistant_content(text: str, mode: str = "render"):
    """Render final assistant content as markdown, stripped text, or raw text."""
    from rich.markdown import Markdown
@@ -1237,9 +1276,91 @@ def _render_final_assistant_content(text: str, mode: str = "render"):
        return _rich_text_from_ansi(text or "")

    plain = _rich_text_from_ansi(text or "").plain
+    plain = _preserve_windows_dot_segments_for_markdown(plain)
    return Markdown(plain)


+_OUTPUT_HISTORY_ENABLED = True
+_OUTPUT_HISTORY_REPLAYING = False
+_OUTPUT_HISTORY_SUPPRESSED = False
+_OUTPUT_HISTORY_MAX_LINES = 200
+_OUTPUT_HISTORY = deque(maxlen=_OUTPUT_HISTORY_MAX_LINES)
+_ANSI_CONTROL_RE = re.compile(
+    r"\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]|\][^\x07]*(?:\x07|\x1b\\))"
+)
+
+
+def _coerce_output_history_limit(value) -> int:
+    try:
+        return max(10, int(value))
+    except (TypeError, ValueError):
+        return 200
+
+
+def _configure_output_history(enabled: bool, max_lines=200) -> None:
+    """Configure recent CLI output replayed after terminal redraws."""
+    global _OUTPUT_HISTORY_ENABLED, _OUTPUT_HISTORY_MAX_LINES, _OUTPUT_HISTORY
+    _OUTPUT_HISTORY_ENABLED = bool(enabled)
+    _OUTPUT_HISTORY_MAX_LINES = _coerce_output_history_limit(max_lines)
+    _OUTPUT_HISTORY = deque(maxlen=_OUTPUT_HISTORY_MAX_LINES)
+
+
+def _clear_output_history() -> None:
+    _OUTPUT_HISTORY.clear()
+
+
+@contextmanager
+def _suspend_output_history():
+    global _OUTPUT_HISTORY_SUPPRESSED
+    old_value = _OUTPUT_HISTORY_SUPPRESSED
+    _OUTPUT_HISTORY_SUPPRESSED = True
+    try:
+        yield
+    finally:
+        _OUTPUT_HISTORY_SUPPRESSED = old_value
+
+
+def _record_output_history_entry(entry) -> None:
+    if not _OUTPUT_HISTORY_ENABLED or _OUTPUT_HISTORY_REPLAYING or _OUTPUT_HISTORY_SUPPRESSED:
+        return
+    _OUTPUT_HISTORY.append(entry)
+
+
+def _record_output_history(text: str) -> None:
+    if not _OUTPUT_HISTORY_ENABLED or _OUTPUT_HISTORY_REPLAYING or _OUTPUT_HISTORY_SUPPRESSED:
+        return
+    clean = _ANSI_CONTROL_RE.sub("", str(text)).replace("\r", "").rstrip("\n")
+    if not clean:
+        return
+    for line in clean.splitlines():
+        _record_output_history_entry(line)
+
+
+def _replay_output_history() -> None:
+    """Repaint recent output above the prompt after a full screen clear."""
+    global _OUTPUT_HISTORY_REPLAYING
+    if not _OUTPUT_HISTORY_ENABLED or not _OUTPUT_HISTORY:
+        return
+    _OUTPUT_HISTORY_REPLAYING = True
+    try:
+        for entry in tuple(_OUTPUT_HISTORY):
+            if callable(entry):
+                try:
+                    lines = entry()
+                except Exception:
+                    continue
+                if isinstance(lines, str):
+                    lines = lines.splitlines()
+            else:
+                lines = [entry]
+            for line in lines:
+                _pt_print(_PT_ANSI(str(line)))
+    except Exception:
+        pass
+    finally:
+        _OUTPUT_HISTORY_REPLAYING = False
+
+
 def _cprint(text: str):
    """Print ANSI-colored text through prompt_toolkit's native renderer.

@@ -1256,6 +1377,8 @@ def _cprint(text: str):
    ``loop.call_soon_threadsafe``, which pauses the input area, prints
    the line above it, and redraws the prompt cleanly.
    """
+    _record_output_history(text)
+
    try:
        from prompt_toolkit.application import get_app_or_none, run_in_terminal
    except Exception:
@@ -1427,7 +1550,21 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
    except Exception:
        resolved = path

-    if not resolved.exists() or not resolved.is_file():
+    # Path.exists() / is_file() invoke os.stat(), which raises OSError when
+    # the candidate string is structurally invalid as a path — most commonly
+    # ENAMETOOLONG (errno 63 on macOS, errno 36 on Linux) when the input
+    # exceeds NAME_MAX (typically 255 bytes). This bites pasted slash
+    # commands like `/goal <long prose>` because `_detect_file_drop()`'s
+    # `starts_like_path` prefilter accepts any input starting with `/`,
+    # then this resolver tries to stat it before short-circuiting on the
+    # slash-command path. Without this guard the OSError propagates up to
+    # the process_loop catch-all in _interactive_loop and the user input
+    # is silently lost (the warning ends up in agent.log but the user sees
+    # nothing — the prompt just hangs).
+    try:
+        if not resolved.exists() or not resolved.is_file():
+            return None
+    except OSError:
        return None
    return resolved

@@ -1505,6 +1642,10 @@ def _detect_file_drop(user_input: str) -> "dict | None":
        or stripped.startswith('"~')
        or stripped.startswith("'/")
        or stripped.startswith("'~")
+        or stripped.startswith('"./')
+        or stripped.startswith('"../')
+        or stripped.startswith("'./")
+        or stripped.startswith("'../")
        or (len(stripped) >= 4 and stripped[0] in ("'", '"') and stripped[2] == ":" and stripped[3] in ("\\", "/") and stripped[1].isalpha())
    )
    if not starts_like_path:
@@ -1633,6 +1774,20 @@ _TERMINAL_INPUT_MODE_RESET_SEQ = (
 )


+def _bind_prompt_submit_keys(kb, handler) -> None:
+    """Bind both CR and LF terminal Enter forms to the submit handler."""
+    for key in ("enter", "c-j"):
+        kb.add(key)(handler)
+
+
+def _disable_prompt_toolkit_cpr_warning(app) -> None:
+    """Let prompt_toolkit fall back from CPR without printing into the prompt."""
+    try:
+        app.renderer.cpr_not_supported_callback = None
+    except Exception:
+        pass
+
+
 def _strip_leaked_terminal_responses_with_meta(text: str) -> tuple[str, bool]:
    """Strip leaked terminal control-response sequences from user input.

@@ -1863,8 +2018,8 @@ _skill_commands = scan_skill_commands()
 def _get_plugin_cmd_handler_names() -> set:
    """Return plugin command names (without slash prefix) for dispatch matching."""
    try:
-        from hermes_cli.plugins import get_plugin_manager
-        return set(get_plugin_manager()._plugin_commands.keys())
+        from hermes_cli.plugins import get_plugin_commands
+        return set(get_plugin_commands().keys())
    except Exception:
        return set()

@@ -2008,6 +2163,10 @@ class HermesCLI:
        self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False)
        # show_reasoning: display model thinking/reasoning before the response
        self.show_reasoning = CLI_CONFIG["display"].get("show_reasoning", False)
+        _configure_output_history(
+            enabled=CLI_CONFIG["display"].get("persistent_output", True),
+            max_lines=CLI_CONFIG["display"].get("persistent_output_max_lines", 200),
+        )
        # busy_input_mode: "interrupt" (Enter interrupts current run),
        # "queue" (Enter queues for next turn), or "steer" (Enter injects
        # mid-run via /steer, arriving after the next tool call).
@@ -2118,7 +2277,10 @@ class HermesCLI:
        elif CLI_CONFIG.get("max_turns"):  # Backwards compat: root-level max_turns
            self.max_turns = CLI_CONFIG["max_turns"]
        elif os.getenv("HERMES_MAX_ITERATIONS"):
-            self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
+            try:
+                self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS", ""))
+            except (TypeError, ValueError):
+                self.max_turns = 90
        else:
            self.max_turns = 90
        
@@ -2140,7 +2302,9 @@ class HermesCLI:
        if isinstance(cp_cfg, bool):
            cp_cfg = {"enabled": cp_cfg}
        self.checkpoints_enabled = checkpoints or cp_cfg.get("enabled", False)
-        self.checkpoint_max_snapshots = cp_cfg.get("max_snapshots", 50)
+        self.checkpoint_max_snapshots = cp_cfg.get("max_snapshots", 20)
+        self.checkpoint_max_total_size_mb = cp_cfg.get("max_total_size_mb", 500)
+        self.checkpoint_max_file_size_mb = cp_cfg.get("max_file_size_mb", 10)
        self.pass_session_id = pass_session_id
        # --ignore-rules: honor either the constructor flag or the env var set
        # by `hermes chat --ignore-rules` in hermes_cli/main.py. When true we
@@ -2282,6 +2446,9 @@ class HermesCLI:

        # Status bar visibility (toggled via /statusbar)
        self._status_bar_visible = True
+        self._resize_recovery_lock = threading.Lock()
+        self._resize_recovery_timer = None
+        self._resize_recovery_pending = False

        # Background task tracking: {task_id: threading.Thread}
        self._background_tasks: Dict[str, threading.Thread] = {}
@@ -2289,6 +2456,8 @@ class HermesCLI:

    def _invalidate(self, min_interval: float = 0.25) -> None:
        """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
+        if getattr(self, "_resize_recovery_pending", False):
+            return
        now = time.monotonic()
        if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval:
            self._last_invalidate = now
@@ -2312,11 +2481,25 @@ class HermesCLI:
        app = getattr(self, "_app", None)
        if not app:
            return
+        self._clear_prompt_toolkit_screen(app)
+        _replay_output_history()
+        try:
+            app.invalidate()
+        except Exception:
+            pass
+
+    def _clear_prompt_toolkit_screen(self, app, *, rebuild_scrollback: bool = False) -> None:
+        """Clear the terminal and reset prompt_toolkit renderer state."""
        try:
            renderer = app.renderer
            out = renderer.output
            out.reset_attributes()
            out.erase_screen()
+            if rebuild_scrollback:
+                try:
+                    out.write_raw("\x1b[3J")
+                except Exception:
+                    pass
            out.cursor_goto(0, 0)
            out.flush()
            # Drop prompt_toolkit's cached screen + cursor state so the
@@ -2325,10 +2508,57 @@ class HermesCLI:
            renderer.reset(leave_alternate_screen=False)
        except Exception:
            pass
+
+    def _recover_after_resize(self, app, original_on_resize) -> None:
+        """Recover a resized classic CLI without desynchronizing cursor state."""
+        self._clear_prompt_toolkit_screen(app, rebuild_scrollback=True)
+        _replay_output_history()
+        original_on_resize()
+
+    def _schedule_resize_recovery(self, app, original_on_resize, delay: float = 0.12) -> None:
+        """Debounce resize redraws so footer chrome is not stamped into scrollback."""
        try:
-            app.invalidate()
+            old_timer = getattr(self, "_resize_recovery_timer", None)
+            lock = getattr(self, "_resize_recovery_lock", None)
+            if lock is None:
+                lock = threading.Lock()
+                self._resize_recovery_lock = lock
+
+            def _timer_fired(timer_ref):
+                def _run_recovery():
+                    with lock:
+                        if getattr(self, "_resize_recovery_timer", None) is not timer_ref:
+                            return
+                        self._resize_recovery_timer = None
+                        self._resize_recovery_pending = False
+                    self._recover_after_resize(app, original_on_resize)
+
+                try:
+                    loop = app.loop  # type: ignore[attr-defined]
+                except Exception:
+                    loop = None
+                if loop is not None:
+                    try:
+                        loop.call_soon_threadsafe(_run_recovery)
+                        return
+                    except Exception:
+                        pass
+                _run_recovery()
+
+            with lock:
+                if old_timer is not None:
+                    try:
+                        old_timer.cancel()
+                    except Exception:
+                        pass
+                self._resize_recovery_pending = True
+                timer = threading.Timer(delay, lambda: _timer_fired(timer))
+                timer.daemon = True
+                self._resize_recovery_timer = timer
+                timer.start()
        except Exception:
-            pass
+            self._resize_recovery_pending = False
+            self._recover_after_resize(app, original_on_resize)

    def _status_bar_context_style(self, percent_used: Optional[int]) -> str:
        if percent_used is None:
@@ -2546,29 +2776,68 @@ class HermesCLI:
            elapsed = time.monotonic() - t0
            if elapsed >= 60:
                _m, _s = int(elapsed // 60), int(elapsed % 60)
-                elapsed_str = f"{_m}m {_s}s"
+                # Fixed-width timer to avoid status-line wrap jitter while
+                # scrolling/repainting (e.g. 01m05s, 12m09s).
+                elapsed_str = f"{_m:02d}m{_s:02d}s"
            else:
-                elapsed_str = f"{elapsed:.1f}s"
+                # Keep width stable before the 60s rollover as well.
+                elapsed_str = f"{elapsed:5.1f}s"
            return f"  {txt}  ({elapsed_str})"
        return f"  {txt}"

+    def _voice_record_key_label(self) -> str:
+        """Return the configured voice push-to-talk key formatted for UI.
+
+        Shared helper so every voice-facing status line / placeholder /
+        recording hint advertises the SAME label as the registered
+        prompt_toolkit binding.
+
+        Cached at startup (see ``set_voice_record_key_cache``) rather
+        than re-read per render. Two reasons (Copilot round-13 on
+        #19835):
+
+        * The prompt_toolkit binding is registered once at session
+          start via ``@kb.add(_voice_key)``; re-reading config per
+          render meant the status bar could advertise a new shortcut
+          after a config edit while the actual binding was still the
+          startup chord — exactly the display/binding drift this PR
+          is trying to eliminate.
+        * The label is on the hot render path (status bar + composer
+          placeholder invalidated every 150ms during recording), so
+          reading config on every call added avoidable UI overhead.
+        """
+        return getattr(self, "_voice_record_key_display_cache", None) or "Ctrl+B"
+
+    def set_voice_record_key_cache(self, raw_key: object) -> None:
+        """Populate the voice label cache from a raw ``voice.record_key``.
+
+        Called at CLI startup after the prompt_toolkit binding is
+        registered so the cached label always matches the live binding.
+        """
+        try:
+            from hermes_cli.voice import format_voice_record_key_for_status
+            self._voice_record_key_display_cache = format_voice_record_key_for_status(raw_key)
+        except Exception:
+            self._voice_record_key_display_cache = "Ctrl+B"
+
    def _get_voice_status_fragments(self, width: Optional[int] = None):
        """Return the voice status bar fragments for the interactive TUI."""
        width = width or self._get_tui_terminal_width()
        compact = self._use_minimal_tui_chrome(width=width)
+        label = self._voice_record_key_label()
        if self._voice_recording:
            if compact:
                return [("class:voice-status-recording", " ● REC ")]
-            return [("class:voice-status-recording", " ● REC  Ctrl+B to stop ")]
+            return [("class:voice-status-recording", f" ● REC  {label} to stop ")]
        if self._voice_processing:
            if compact:
                return [("class:voice-status", " ◉ STT ")]
            return [("class:voice-status", " ◉ Transcribing... ")]
        if compact:
-            return [("class:voice-status", " 🎤 Ctrl+B ")]
+            return [("class:voice-status", f" 🎤 {label} ")]
        tts = " | TTS on" if self._voice_tts else ""
        cont = " | Continuous" if self._voice_continuous else ""
-        return [("class:voice-status", f" 🎤 Voice mode{tts}{cont}  —  Ctrl+B to record ")]
+        return [("class:voice-status", f" 🎤 Voice mode{tts}{cont}  —  {label} to record ")]

    def _build_status_bar_text(self, width: Optional[int] = None) -> str:
        """Return a compact one-line session status string for the TUI footer."""
@@ -3607,6 +3876,8 @@ class HermesCLI:
                thinking_callback=self._on_thinking,
                checkpoints_enabled=self.checkpoints_enabled,
                checkpoint_max_snapshots=self.checkpoint_max_snapshots,
+                checkpoint_max_total_size_mb=self.checkpoint_max_total_size_mb,
+                checkpoint_max_file_size_mb=self.checkpoint_max_file_size_mb,
                pass_session_id=self.pass_session_id,
                skip_context_files=self.ignore_rules,
                skip_memory=self.ignore_rules,
@@ -3964,7 +4235,26 @@ class HermesCLI:
            padding=(0, 1),
            style=_history_text_c,
        )
-        self._console_print(panel)
+        _record_output_history_entry(lambda: self._render_resume_history_panel_lines(panel))
+        with _suspend_output_history():
+            self._console_print(panel)
+
+    def _render_resume_history_panel_lines(self, panel) -> list[str]:
+        """Render the resume panel at the current terminal width for resize replay."""
+        from io import StringIO
+
+        buf = StringIO()
+        width = shutil.get_terminal_size((80, 24)).columns
+        console = Console(
+            file=buf,
+            force_terminal=True,
+            color_system="truecolor",
+            highlight=False,
+            width=width,
+        )
+        with _suspend_output_history():
+            console.print(panel)
+        return buf.getvalue().rstrip("\n").splitlines()

    def _try_attach_clipboard_image(self) -> bool:
        """Check clipboard for an image and attach it if found.
@@ -4928,7 +5218,7 @@ class HermesCLI:
        except Exception:
            pass

-    def new_session(self, silent=False):
+    def new_session(self, silent=False, title=None):
        """Start a fresh session with a new session ID and cleared agent state."""
        if self.agent and self.conversation_history:
            # Trigger memory extraction on the old session before session_id rotates.
@@ -4983,6 +5273,28 @@ class HermesCLI:
                    self.agent._session_db_created = True
                except Exception:
                    pass
+                if title and self._session_db:
+                    from hermes_state import SessionDB
+                    try:
+                        sanitized = SessionDB.sanitize_title(title)
+                    except ValueError as e:
+                        _cprint(f"  Title rejected: {e}")
+                        sanitized = None
+                        title = None
+                    if sanitized:
+                        try:
+                            self._session_db.set_session_title(self.session_id, sanitized)
+                            self._pending_title = None
+                            title = sanitized
+                        except ValueError as e:
+                            _cprint(f"  {e} — session started untitled.")
+                            title = None
+                        except Exception:
+                            title = None
+                    elif title is not None:
+                        # sanitize_title returned empty (whitespace-only / unprintable)
+                        _cprint("  Title is empty after cleanup — session started untitled.")
+                        title = None
            # Notify memory providers that session_id rotated to a fresh
            # conversation. reset=True signals providers to flush accumulated
            # per-session state (_session_turns, _turn_counter, _document_id).
@@ -5002,7 +5314,10 @@ class HermesCLI:
            self._notify_session_boundary("on_session_reset")

        if not silent:
-            print("(^_^)v New session started!")
+            if title:
+                print(f"(^_^)v New session started: {title}")
+            else:
+                print("(^_^)v New session started!")

    def _handle_resume_command(self, cmd_original: str) -> None:
        """Handle /resume <session_id_or_title> — switch to a previous session mid-conversation."""
@@ -6278,7 +6593,7 @@ class HermesCLI:
        _cmd_def = _resolve_cmd(_base_word)
        canonical = _cmd_def.name if _cmd_def else _base_word
        
-        if canonical in ("quit", "exit", "q"):
+        if canonical in ("quit", "exit"):
            return False
        elif canonical == "help":
            self.show_help()
@@ -6298,6 +6613,7 @@ class HermesCLI:
            _cprint(f"  {_DIM}✓ UI redrawn{_RST}")
        elif canonical == "clear":
            self.new_session(silent=True)
+            _clear_output_history()
            # Clear terminal screen.  Inside the TUI, Rich's console.clear()
            # goes through patch_stdout's StdoutProxy which swallows the
            # screen-clear escape sequences.  Use prompt_toolkit's output
@@ -6414,7 +6730,9 @@ class HermesCLI:
                else:
                    _cprint("  Session database not available.")
        elif canonical == "new":
-            self.new_session()
+            parts = cmd_original.split(maxsplit=1)
+            title = parts[1].strip() if len(parts) > 1 else None
+            self.new_session(title=title)
        elif canonical == "resume":
            self._handle_resume_command(cmd_original)
        elif canonical == "model":
@@ -7026,7 +7344,20 @@ class HermesCLI:
                if provider is not None:
                    print(f"🌐 Browser: {provider.provider_name()} (cloud)")
                else:
-                    print("🌐 Browser: local headless Chromium (agent-browser)")
+                    # Show engine info for local mode
+                    try:
+                        from tools.browser_tool import _get_browser_engine
+                        engine = _get_browser_engine()
+                    except Exception:
+                        engine = "auto"
+                    if engine == "lightpanda":
+                        print("🌐 Browser: local Lightpanda (agent-browser --engine lightpanda)")
+                        print("   ⚡ Lightpanda: faster navigation, no screenshot support")
+                        print("   Automatic Chrome fallback for screenshots and failed commands")
+                    elif engine == "chrome":
+                        print("🌐 Browser: local headless Chrome (agent-browser --engine chrome)")
+                    else:
+                        print("🌐 Browser: local headless Chromium (agent-browser)")
            print()
            print("   /browser connect      — connect to your live Chrome")
            print("   /browser disconnect   — revert to default")
@@ -7569,6 +7900,10 @@ class HermesCLI:
                ):
                    self.session_id = self.agent.session_id
                    self._pending_title = None
+                    # Manual /compress replaces conversation_history with a new
+                    # compressed handoff for the child session. Persist it from
+                    # offset 0 so resume can recover the continuation after exit.
+                    self.agent._flush_messages_to_session_db(self.conversation_history, None)
                new_tokens = estimate_request_tokens_rough(
                    self.conversation_history,
                    system_prompt=_sys_prompt,
@@ -8216,20 +8551,38 @@ class HermesCLI:
                return
            self._voice_recording = True

-        # Load silence detection params from config
-        voice_cfg = {}
+        # Load silence detection params from config. Shape-safe: a
+        # hand-edited ``voice: true`` / ``voice: cmd+b`` leaves
+        # ``load_config()['voice']`` as a non-dict; coerce to {} so
+        # continuous recording falls back to the documented defaults
+        # instead of crashing on ``.get()``.
+        voice_cfg: dict = {}
        try:
            from hermes_cli.config import load_config
-            voice_cfg = load_config().get("voice", {})
+            _cfg = load_config().get("voice")
+            voice_cfg = _cfg if isinstance(_cfg, dict) else {}
        except Exception:
            pass

        if self._voice_recorder is None:
            self._voice_recorder = create_audio_recorder()

-        # Apply config-driven silence params
-        self._voice_recorder._silence_threshold = voice_cfg.get("silence_threshold", 200)
-        self._voice_recorder._silence_duration = voice_cfg.get("silence_duration", 3.0)
+        # Apply config-driven silence params (numeric-guarded so YAML
+        # scalar corruption doesn't break recording start-up).
+        #
+        # ``bool`` is explicitly excluded from the numeric check — in
+        # Python bool is a subclass of int, so a hand-edited
+        # ``silence_threshold: true`` would otherwise be forwarded as
+        # ``1`` instead of falling back to the 200 default (Copilot
+        # round-12 on #19835).
+        _threshold = voice_cfg.get("silence_threshold")
+        _duration = voice_cfg.get("silence_duration")
+        self._voice_recorder._silence_threshold = (
+            _threshold if isinstance(_threshold, (int, float)) and not isinstance(_threshold, bool) else 200
+        )
+        self._voice_recorder._silence_duration = (
+            _duration if isinstance(_duration, (int, float)) and not isinstance(_duration, bool) else 3.0
+        )

        def _on_silence():
            """Called by AudioRecorder when silence is detected after speech."""
@@ -8255,12 +8608,13 @@ class HermesCLI:
            with self._voice_lock:
                self._voice_recording = False
            raise
+        _label = self._voice_record_key_label()
        if getattr(self._voice_recorder, "supports_silence_autostop", True):
-            _recording_hint = "auto-stops on silence | Ctrl+B to stop & exit continuous"
+            _recording_hint = f"auto-stops on silence | {_label} to stop & exit continuous"
        elif _is_termux_environment():
-            _recording_hint = "Termux:API capture | Ctrl+B to stop"
+            _recording_hint = f"Termux:API capture | {_label} to stop"
        else:
-            _recording_hint = "Ctrl+B to stop"
+            _recording_hint = f"{_label} to stop"
        _cprint(f"\n{_ACCENT}● Recording...{_RST} {_DIM}({_recording_hint}){_RST}")

        # Periodically refresh prompt to update audio level indicator
@@ -8505,10 +8859,12 @@ class HermesCLI:
        with self._voice_lock:
            self._voice_mode = True

-        # Check config for auto_tts
+        # Check config for auto_tts (shape-safe — malformed ``voice:`` YAML
+        # leaves ``voice_config`` as a non-dict, so guard before .get()).
        try:
            from hermes_cli.config import load_config
-            voice_config = load_config().get("voice", {})
+            _raw_voice = load_config().get("voice")
+            voice_config = _raw_voice if isinstance(_raw_voice, dict) else {}
            if voice_config.get("auto_tts", False):
                with self._voice_lock:
                    self._voice_tts = True
@@ -8520,13 +8876,11 @@ class HermesCLI:
        # _voice_message_prefix property and its usage in _process_message().

        tts_status = " (TTS enabled)" if self._voice_tts else ""
-        try:
-            from hermes_cli.config import load_config
-            _raw_ptt = load_config().get("voice", {}).get("record_key", "ctrl+b")
-            _ptt_key = _raw_ptt.lower().replace("ctrl+", "c-").replace("alt+", "a-")
-        except Exception:
-            _ptt_key = "c-b"
-        _ptt_display = _ptt_key.replace("c-", "Ctrl+").upper()
+        # Use the startup-pinned cache so the advertised shortcut always
+        # matches the live prompt_toolkit binding — reading live config
+        # here would drift after a mid-session config edit (Copilot
+        # round-14 on #19835, same class as round-13).
+        _ptt_display = self._voice_record_key_label()
        _cprint(f"\n{_ACCENT}Voice mode enabled{tts_status}{_RST}")
        _cprint(f"  {_DIM}{_ptt_display} to start/stop recording{_RST}")
        _cprint(f"  {_DIM}/voice tts  to toggle speech output{_RST}")
@@ -8583,7 +8937,6 @@ class HermesCLI:

    def _show_voice_status(self):
        """Show current voice mode status."""
-        from hermes_cli.config import load_config
        from tools.voice_mode import check_voice_requirements

        reqs = check_voice_requirements()
@@ -8592,9 +8945,11 @@ class HermesCLI:
        _cprint(f"  Mode:      {'ON' if self._voice_mode else 'OFF'}")
        _cprint(f"  TTS:       {'ON' if self._voice_tts else 'OFF'}")
        _cprint(f"  Recording: {'YES' if self._voice_recording else 'no'}")
-        _raw_key = load_config().get("voice", {}).get("record_key", "ctrl+b")
-        _display_key = _raw_key.replace("ctrl+", "Ctrl+").upper() if "ctrl+" in _raw_key.lower() else _raw_key
-        _cprint(f"  Record key: {_display_key}")
+        # Display the startup-pinned label so /voice status always
+        # matches the live prompt_toolkit binding (Copilot round-14 on
+        # #19835, same class as round-13). Reading live config here
+        # would drift after a mid-session config edit.
+        _cprint(f"  Record key: {self._voice_record_key_label()}")
        _cprint(f"\n  {_BOLD}Requirements:{_RST}")
        for line in reqs["details"].split("\n"):
            _cprint(f"    {line}")
@@ -9997,7 +10352,6 @@ class HermesCLI:
        # Key bindings for the input area
        kb = KeyBindings()
        
-        @kb.add('enter')
        def handle_enter(event):
            """Handle Enter key - submit input.
            
@@ -10156,17 +10510,14 @@ class HermesCLI:
                else:
                    self._pending_input.put(payload)
                event.app.current_buffer.reset(append_to_history=True)
+
+        _bind_prompt_submit_keys(kb, handle_enter)
        
        @kb.add('escape', 'enter')
        def handle_alt_enter(event):
            """Alt+Enter inserts a newline for multi-line input."""
            event.current_buffer.insert_text('\n')

-        @kb.add('c-j')
-        def handle_ctrl_enter(event):
-            """Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter."""
-            event.current_buffer.insert_text('\n')
-
        # VSCode/Cursor bind Ctrl+G to "Find Next" at the editor level, so
        # the keystroke never reaches the embedded terminal. Alt+G is unbound
        # in those IDEs and arrives here as ('escape', 'g') — register it as
@@ -10429,7 +10780,92 @@ class HermesCLI:
                else:
                    self._should_exit = True
                    event.app.exit()
-        
+
+        # Ctrl+Shift+C: no binding needed. Terminal emulators (GNOME Terminal,
+        # iTerm2, kitty, Windows Terminal, etc.) intercept Ctrl+Shift+C before
+        # the keystroke reaches the application's stdin — prompt_toolkit never
+        # sees it, and prompt_toolkit's key spec parser doesn't even recognise
+        # 'c-S-c' anyway (the Shift modifier is meaningless on control-sequence
+        # keys). #19884 added a handler for this; #19895 patched the resulting
+        # startup crash with try/except. Both were based on a misreading of how
+        # terminal key events propagate. Deleting the dead handler outright.
+
+        @kb.add('c-q')  # Ctrl+Q
+        def handle_ctrl_q(event):
+            """Alternative interrupt/exit shortcut (Ctrl+Q).
+
+            Behaves like Ctrl+C: cancels active prompts, interrupts the
+            running agent, or clears the input buffer. Does not support
+            the double-press 'force exit' feature of Ctrl+C.
+            """
+            # Cancel active voice recording.
+            _should_cancel_voice = False
+            _recorder_ref = None
+            with cli_ref._voice_lock:
+                if cli_ref._voice_recording and cli_ref._voice_recorder:
+                    _recorder_ref = cli_ref._voice_recorder
+                    cli_ref._voice_recording = False
+                    cli_ref._voice_continuous = False
+                    _should_cancel_voice = True
+            if _should_cancel_voice:
+                _cprint(f"\n{_DIM}Recording cancelled.{_RST}")
+                threading.Thread(
+                    target=_recorder_ref.cancel, daemon=True
+                ).start()
+                event.app.invalidate()
+                return
+
+            # Cancel sudo prompt
+            if self._sudo_state:
+                self._sudo_state["response_queue"].put("")
+                self._sudo_state = None
+                event.app.invalidate()
+                return
+
+            # Cancel secret prompt
+            if self._secret_state:
+                self._cancel_secret_capture()
+                event.app.current_buffer.reset()
+                event.app.invalidate()
+                return
+
+            # Cancel approval prompt (deny)
+            if self._approval_state:
+                self._approval_state["response_queue"].put("deny")
+                self._approval_state = None
+                event.app.invalidate()
+                return
+
+            # Cancel /model picker
+            if self._model_picker_state:
+                self._close_model_picker()
+                event.app.current_buffer.reset()
+                event.app.invalidate()
+                return
+
+            # Cancel clarify prompt
+            if self._clarify_state:
+                self._clarify_state["response_queue"].put(
+                    "The user cancelled. Use your best judgement to proceed."
+                )
+                self._clarify_state = None
+                self._clarify_freetext = False
+                event.app.current_buffer.reset()
+                event.app.invalidate()
+                return
+
+            if self._agent_running and self.agent:
+                print("\n⚡ Interrupting agent...")
+                self.agent.interrupt()
+            else:
+                if event.app.current_buffer.text or self._attached_images:
+                    event.app.current_buffer.reset()
+                    self._attached_images.clear()
+                    event.app.invalidate()
+                else:
+                    self._should_exit = True
+                    event.app.exit()
+
        @kb.add('c-d')
        def handle_ctrl_d(event):
            """Ctrl+D: delete char under cursor (standard readline behaviour).
@@ -10483,15 +10919,44 @@ class HermesCLI:
            run_in_terminal(_suspend)

        # Voice push-to-talk key: configurable via config.yaml (voice.record_key)
-        # Default: Ctrl+B (avoids conflict with Ctrl+R readline reverse-search)
-        # Config uses "ctrl+b" format; prompt_toolkit expects "c-b" format.
+        # Default: Ctrl+B (avoids conflict with Ctrl+R readline reverse-search).
+        # Config spellings (ctrl/control/alt/option/opt) are normalized to
+        # prompt_toolkit's c-x / a-x format via ``normalize_voice_record_key_for_prompt_toolkit``
+        # so the same config value binds identically in the TUI and CLI
+        # (Copilot round-9 review on #19835). ``super``/``win``/``windows``
+        # configs silently fall back to the default here since prompt_toolkit
+        # has no super modifier — log a warning so users notice the
+        # TUI/CLI split instead of a silent mismatch (round-11).
+        _raw_key: object = "ctrl+b"
        try:
            from hermes_cli.config import load_config
-            _raw_key = load_config().get("voice", {}).get("record_key", "ctrl+b")
-            _voice_key = _raw_key.lower().replace("ctrl+", "c-").replace("alt+", "a-")
+            from hermes_cli.voice import (
+                normalize_voice_record_key_for_prompt_toolkit,
+                voice_record_key_from_config,
+            )
+            _raw_key = voice_record_key_from_config(load_config())
+            _voice_key = normalize_voice_record_key_for_prompt_toolkit(_raw_key)
+            if (
+                isinstance(_raw_key, str)
+                and _raw_key.strip().lower().split("+", 1)[0].strip() in {"super", "win", "windows"}
+                and _voice_key == "c-b"
+            ):
+                logger.warning(
+                    "voice.record_key %r uses a TUI-only modifier (super/win); "
+                    "CLI fell back to Ctrl+B. Use ctrl+<key> or alt+<key> for "
+                    "cross-runtime parity.",
+                    _raw_key,
+                )
        except Exception:
            _voice_key = "c-b"

+        # Cache the UI label here — same ``_raw_key`` that drives the
+        # prompt_toolkit binding below. Every status / placeholder /
+        # recording-hint render reads this cached value so display can
+        # never drift from the live keybinding even if the user edits
+        # voice.record_key mid-session (Copilot round-13 on #19835).
+        self.set_voice_record_key_cache(_raw_key)
+
        @kb.add(_voice_key)
        def handle_voice_record(event):
            """Toggle voice recording when voice mode is active.
@@ -10651,7 +11116,7 @@ class HermesCLI:
        def get_prompt():
            return cli_ref._get_tui_prompt_fragments()

-        # Create the input area with multiline (shift+enter), autocomplete, and paste handling
+        # Create the input area with multiline (Alt+Enter), autocomplete, and paste handling
        from prompt_toolkit.auto_suggest import AutoSuggestFromHistory


@@ -10794,7 +11259,8 @@ class HermesCLI:

        def _get_placeholder():
            if cli_ref._voice_recording:
-                return "recording... Ctrl+B to stop, Ctrl+C to cancel"
+                _label = cli_ref._voice_record_key_label()
+                return f"recording... {_label} to stop, Ctrl+C to cancel"
            if cli_ref._voice_processing:
                return "transcribing..."
            if cli_ref._sudo_state:
@@ -10814,7 +11280,8 @@ class HermesCLI:
            if cli_ref._agent_running:
                return "msg=interrupt · /queue · /bg · /steer · Ctrl+C cancel"
            if cli_ref._voice_mode:
-                return "type or Ctrl+B to record"
+                _label = cli_ref._voice_record_key_label()
+                return f"type or {_label} to record"
            return ""

        input_area.control.input_processors.append(_PlaceholderProcessor(_get_placeholder))
@@ -11391,6 +11858,7 @@ class HermesCLI:
            mouse_support=False,
            **({'cursor': _STEADY_CURSOR} if _STEADY_CURSOR is not None else {}),
        )
+        _disable_prompt_toolkit_cpr_warning(app)
        self._app = app  # Store reference for clarify_callback

        # ── Fix ghost status-bar lines on terminal resize ──────────────
@@ -11410,23 +11878,7 @@ class HermesCLI:
        _original_on_resize = app._on_resize

        def _resize_clear_ghosts():
-            renderer = app.renderer
-            try:
-                out = renderer.output
-                # Reset attributes, erase the entire screen, and home the
-                # cursor. This overwrites any reflowed status-bar rows or
-                # stale content the terminal kept from the prior layout.
-                out.reset_attributes()
-                out.erase_screen()
-                out.cursor_goto(0, 0)
-                out.flush()
-                # Tell the renderer its tracked position is fresh so its
-                # own erase() inside _on_resize doesn't cursor_up() past
-                # the top of the screen.
-                renderer.reset(leave_alternate_screen=False)
-            except Exception:
-                pass  # never break resize handling
-            _original_on_resize()
+            self._schedule_resize_recovery(app, _original_on_resize)

        app._on_resize = _resize_clear_ghosts

@@ -11617,8 +12069,22 @@ class HermesCLI:
            call _kill_process (SIGTERM + 1 s wait + SIGKILL if needed) →
            return from _wait_for_process.  ``time.sleep`` releases the
            GIL so the daemon actually runs during the window.
+
+            Guarded ``logger.debug``: CPython's ``logging`` module is not
+            reentrant-safe.  ``Logger.isEnabledFor`` caches level results
+            in ``Logger._cache``; under shutdown races the cache can be
+            cleared (``_clear_cache``) or mid-mutation when the signal
+            fires, raising ``KeyError: <level_int>`` (e.g. ``KeyError: 10``
+            for DEBUG) inside the handler.  That KeyError then escapes
+            before ``raise KeyboardInterrupt()`` can fire, which bypasses
+            prompt_toolkit's normal interrupt unwind and surfaces as the
+            EIO cascade from issue #13710.  Wrap the log in a bare
+            ``try/except`` so the handler can never raise through it.
            """
-            logger.debug("Received signal %s, triggering graceful shutdown", signum)
+            try:
+                logger.debug("Received signal %s, triggering graceful shutdown", signum)
+            except Exception:
+                pass  # never let logging raise from a signal handler (#13710 regression)
            try:
                if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
                    self.agent.interrupt(f"received signal {signum}")
@@ -420,7 +420,7 @@ def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:


 def create_job(
-    prompt: str,
+    prompt: Optional[str],
    schedule: str,
    name: Optional[str] = None,
    repeat: Optional[int] = None,
@@ -435,12 +435,14 @@ def create_job(
    context_from: Optional[Union[str, List[str]]] = None,
    enabled_toolsets: Optional[List[str]] = None,
    workdir: Optional[str] = None,
+    no_agent: bool = False,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.

    Args:
-        prompt: The prompt to run (must be self-contained, or a task instruction when skill is set)
+        prompt: The prompt to run (must be self-contained, or a task instruction when skill is set).
+                Ignored when ``no_agent=True`` except as an optional name hint.
        schedule: Schedule string (see parse_schedule)
        name: Optional friendly name
        repeat: How many times to run (None = forever, 1 = once)
@@ -451,21 +453,33 @@ def create_job(
        model: Optional per-job model override
        provider: Optional per-job provider override
        base_url: Optional per-job base URL override
-        script: Optional path to a Python script whose stdout is injected into the
-                prompt each run.  The script runs before the agent turn, and its output
-                is prepended as context.  Useful for data collection / change detection.
+        script: Optional path to a script whose stdout feeds the job. With
+                ``no_agent=True`` the script IS the job — its stdout is
+                delivered verbatim. Without ``no_agent``, its stdout is
+                injected into the agent's prompt as context (data-collection /
+                change-detection pattern). Paths resolve under
+                ~/.hermes/scripts/; ``.sh`` / ``.bash`` files run via bash,
+                anything else via Python.
        context_from: Optional job ID (or list of job IDs) whose most recent output
                      is injected into the prompt as context before each run.
                      Useful for chaining cron jobs: job A finds data, job B processes it.
        enabled_toolsets: Optional list of toolset names to restrict the agent to.
                          When set, only tools from these toolsets are loaded, reducing
                          token overhead. When omitted, all default tools are loaded.
+                          Ignored when ``no_agent=True``.
        workdir: Optional absolute path.  When set, the job runs as if launched
                from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
                that directory are injected into the system prompt, and the
                terminal/file/code_exec tools use it as their working directory
                (via TERMINAL_CWD).  When unset, the old behaviour is preserved
                (no context files injected, tools use the scheduler's cwd).
+                With ``no_agent=True``, ``workdir`` is still applied as the
+                script's cwd so relative paths inside the script behave
+                predictably.
+        no_agent: When True, skip the agent entirely — run ``script`` on schedule
+                and deliver its stdout directly. Empty stdout = silent (no
+                delivery). Requires ``script`` to be set. Ideal for classic
+                watchdogs and periodic alerts that don't need LLM reasoning.

    Returns:
        The created job dict
@@ -499,6 +513,16 @@ def create_job(
    normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
    normalized_toolsets = normalized_toolsets or None
    normalized_workdir = _normalize_workdir(workdir)
+    normalized_no_agent = bool(no_agent)
+
+    # no_agent jobs are meaningless without a script — the script IS the job.
+    # Surface this as a clear ValueError at create time so bad configs never
+    # reach the scheduler.
+    if normalized_no_agent and not normalized_script:
+        raise ValueError(
+            "no_agent=True requires a script — with no agent and no script "
+            "there is nothing for the job to run."
+        )

    # Normalize context_from: accept str or list of str, store as list or None
    if isinstance(context_from, str):
@@ -508,7 +532,7 @@ def create_job(
    else:
        context_from = None

-    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
+    label_source = (prompt or (normalized_skills[0] if normalized_skills else None) or (normalized_script if normalized_no_agent else None)) or "cron job"
    job = {
        "id": job_id,
        "name": name or label_source[:50].strip(),
@@ -519,6 +543,7 @@ def create_job(
        "provider": normalized_provider,
        "base_url": normalized_base_url,
        "script": normalized_script,
+        "no_agent": normalized_no_agent,
        "context_from": context_from,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
@@ -785,6 +810,12 @@ def get_due_jobs() -> List[Dict[str, Any]]:
    the job is fast-forwarded to the next future run instead of firing
    immediately.  This prevents a burst of missed jobs on gateway restart.
    """
+    with _jobs_file_lock:
+        return _get_due_jobs_locked()
+
+
+def _get_due_jobs_locked() -> List[Dict[str, Any]]:
+    """Inner implementation of get_due_jobs(); must be called with _jobs_file_lock held."""
    now = _hermes_now()
    raw_jobs = load_jobs()
    jobs = [_apply_skill_fields(j) for j in copy.deepcopy(raw_jobs)]
@@ -35,7 +35,7 @@ from typing import List, Optional
 sys.path.insert(0, str(Path(__file__).parent.parent))

 from hermes_constants import get_hermes_home
-from hermes_cli.config import load_config
+from hermes_cli.config import load_config, _expand_env_vars
 from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)
@@ -114,12 +114,20 @@ from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_
 # locally for audit.
 SILENT_MARKER = "[SILENT]"

-# Resolve Hermes home directory (respects HERMES_HOME override)
-_hermes_home = get_hermes_home()
+# Backward-compatible module override used by tests and emergency monkeypatches.
+_hermes_home: Path | None = None

-# File-based lock prevents concurrent ticks from gateway + daemon + systemd timer
-_LOCK_DIR = _hermes_home / "cron"
-_LOCK_FILE = _LOCK_DIR / ".tick.lock"
+
+def _get_hermes_home() -> Path:
+    """Resolve Hermes home dynamically while preserving test monkeypatch hooks."""
+    return _hermes_home or get_hermes_home()
+
+
+def _get_lock_paths() -> tuple[Path, Path]:
+    """Resolve cron lock paths at call time so profile/env changes are honored."""
+    hermes_home = _get_hermes_home()
+    lock_dir = hermes_home / "cron"
+    return lock_dir, lock_dir / ".tick.lock"


 def _resolve_origin(job: dict) -> Optional[dict]:
@@ -576,8 +584,18 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
    prevent arbitrary script execution via path traversal or absolute
    path injection.

+    Supported interpreters (chosen by file extension):
+
+    * ``.sh`` / ``.bash`` — run with ``/bin/bash``
+    * anything else — run with the current Python interpreter
+      (``sys.executable``), preserving the original behaviour for
+      Python-based pre-check and data-collection scripts.
+
+    Shell support lets ``no_agent=True`` jobs ship classic bash watchdogs
+    (the `memory-watchdog.sh` pattern) without wrapping them in Python.
+
    Args:
-        script_path: Path to a Python script.  Relative paths are resolved
+        script_path: Path to the script.  Relative paths are resolved
            against HERMES_HOME/scripts/.  Absolute and ~-prefixed paths
            are also validated to ensure they stay within the scripts dir.

@@ -587,7 +605,7 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
    """
    from hermes_constants import get_hermes_home

-    scripts_dir = get_hermes_home() / "scripts"
+    scripts_dir = _get_hermes_home() / "scripts"
    scripts_dir.mkdir(parents=True, exist_ok=True)
    scripts_dir_resolved = scripts_dir.resolve()

@@ -614,9 +632,19 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:

    script_timeout = _get_script_timeout()

+    # Pick an interpreter by extension.  Bash for .sh/.bash, Python for
+    # everything else.  We deliberately do NOT honour the file's own
+    # shebang: the scripts dir is trusted, but keeping the interpreter
+    # choice explicit here keeps the allowed surface small and auditable.
+    suffix = path.suffix.lower()
+    if suffix in (".sh", ".bash"):
+        argv = ["/bin/bash", str(path)]
+    else:
+        argv = [sys.executable, str(path)]
+
    try:
        result = subprocess.run(
-            [sys.executable, str(path)],
+            argv,
            capture_output=True,
            text=True,
            timeout=script_timeout,
@@ -706,10 +734,8 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
                    f"{prompt}"
                )
            else:
-                prompt = (
-                    "[Script ran successfully but produced no output.]\n\n"
-                    f"{prompt}"
-                )
+                # Script produced no output — nothing to report, skip AI call.
+                return None
        else:
            prompt = (
                "## Script Error\n"
@@ -832,8 +858,120 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    Returns:
        Tuple of (success, full_output_doc, final_response, error_message)
    """
+    job_id = job["id"]
+    job_name = job["name"]
+
+    # ---------------------------------------------------------------
+    # no_agent short-circuit — the script IS the job, no LLM involvement.
+    # ---------------------------------------------------------------
+    # This mirrors the classic "run a bash script on a timer, send its
+    # stdout to telegram" watchdog pattern. The agent path is skipped
+    # entirely: no AIAgent, no prompt, no tool loop, no token spend.
+    #
+    # We check this BEFORE importing run_agent / constructing SessionDB so
+    # a pure-script tick never pays for the agent machinery it isn't going
+    # to use. Keep this block self-contained.
+    #
+    # Semantics:
+    #   - script stdout (trimmed) → delivered verbatim as the final message
+    #   - empty stdout            → silent run (no delivery, success=True)
+    #   - non-zero exit / timeout → delivered as an error alert, success=False
+    #   - wakeAgent=false gate    → treated like empty stdout (silent), since
+    #                               the whole point of no_agent is that there
+    #                               is no agent to wake
+    if job.get("no_agent"):
+        script_path = job.get("script")
+        if not script_path:
+            err = "no_agent=True but no script is set for this job"
+            logger.error("Job '%s': %s", job_id, err)
+            return False, "", "", err
+
+        # Apply workdir if configured — lets scripts use predictable relative
+        # paths. For no_agent jobs this is just the subprocess cwd (not an
+        # agent TERMINAL_CWD bridge).
+        _job_workdir = (job.get("workdir") or "").strip() or None
+        _prior_cwd = None
+        if _job_workdir and Path(_job_workdir).is_dir():
+            _prior_cwd = os.getcwd()
+            try:
+                os.chdir(_job_workdir)
+            except OSError:
+                _prior_cwd = None
+
+        try:
+            ok, output = _run_job_script(script_path)
+        finally:
+            if _prior_cwd is not None:
+                try:
+                    os.chdir(_prior_cwd)
+                except OSError:
+                    pass
+
+        now_iso = _hermes_now().strftime("%Y-%m-%d %H:%M:%S")
+
+        if not ok:
+            # Script crashed / timed out / exited non-zero.  Deliver the
+            # error so the user knows the watchdog itself broke — silent
+            # failure for an alerting job is the worst-case outcome.
+            alert = (
+                f"⚠ Cron watchdog '{job_name}' script failed\n\n"
+                f"{output}\n\n"
+                f"Time: {now_iso}"
+            )
+            doc = (
+                f"# Cron Job: {job_name}\n\n"
+                f"**Job ID:** {job_id}\n"
+                f"**Run Time:** {now_iso}\n"
+                f"**Mode:** no_agent (script)\n"
+                f"**Status:** script failed\n\n"
+                f"{output}\n"
+            )
+            return False, doc, alert, output
+
+        # Honour the wakeAgent gate as a silent signal — `wakeAgent: false`
+        # means "nothing to report this tick", same as empty stdout.
+        if not _parse_wake_gate(output):
+            logger.info(
+                "Job '%s' (no_agent): wakeAgent=false gate — silent run", job_id
+            )
+            silent_doc = (
+                f"# Cron Job: {job_name}\n\n"
+                f"**Job ID:** {job_id}\n"
+                f"**Run Time:** {now_iso}\n"
+                f"**Mode:** no_agent (script)\n"
+                f"**Status:** silent (wakeAgent=false)\n"
+            )
+            return True, silent_doc, SILENT_MARKER, None
+
+        if not output.strip():
+            logger.info("Job '%s' (no_agent): empty stdout — silent run", job_id)
+            silent_doc = (
+                f"# Cron Job: {job_name}\n\n"
+                f"**Job ID:** {job_id}\n"
+                f"**Run Time:** {now_iso}\n"
+                f"**Mode:** no_agent (script)\n"
+                f"**Status:** silent (empty output)\n"
+            )
+            return True, silent_doc, SILENT_MARKER, None
+
+        doc = (
+            f"# Cron Job: {job_name}\n\n"
+            f"**Job ID:** {job_id}\n"
+            f"**Run Time:** {now_iso}\n"
+            f"**Mode:** no_agent (script)\n\n"
+            f"---\n\n"
+            f"{output}\n"
+        )
+        return True, doc, output, None
+
+    # ---------------------------------------------------------------
+    # Default (LLM) path — import and construct the agent machinery now
+    # that we know we actually need it. Doing these imports here instead of
+    # at module top keeps no_agent ticks from paying for AIAgent / SessionDB
+    # construction costs.
+    # ---------------------------------------------------------------
    from run_agent import AIAgent
-    
+
    # Initialize SQLite session store so cron job messages are persisted
    # and discoverable via session_search (same pattern as gateway/run.py).
    _session_db = None
@@ -842,9 +980,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        _session_db = SessionDB()
    except Exception as e:
        logger.debug("Job '%s': SQLite session store not available: %s", job.get("id", "?"), e)
-    
-    job_id = job["id"]
-    job_name = job["name"]

    # Wake-gate: if this job has a pre-check script, run it BEFORE building
    # the prompt so a ``{"wakeAgent": false}`` response can short-circuit
@@ -869,6 +1004,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            return True, silent_doc, SILENT_MARKER, None

    prompt = _build_job_prompt(job, prerun_script=prerun_script)
+    if prompt is None:
+        logger.info("Job '%s': script produced no output, skipping AI call.", job_name)
+        return True, "", SILENT_MARKER, None
    origin = _resolve_origin(job)
    _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"

@@ -928,9 +1066,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        # changes take effect without a gateway restart.
        from dotenv import load_dotenv
        try:
-            load_dotenv(str(_hermes_home / ".env"), override=True, encoding="utf-8")
+            load_dotenv(str(_get_hermes_home() / ".env"), override=True, encoding="utf-8")
        except UnicodeDecodeError:
-            load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
+            load_dotenv(str(_get_hermes_home() / ".env"), override=True, encoding="latin-1")

        delivery_target = _resolve_delivery_target(job)
        if delivery_target:
@@ -948,10 +1086,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        _cfg = {}
        try:
            import yaml
-            _cfg_path = str(_hermes_home / "config.yaml")
+            _cfg_path = str(_get_hermes_home() / "config.yaml")
            if os.path.exists(_cfg_path):
                with open(_cfg_path) as _f:
                    _cfg = yaml.safe_load(_f) or {}
+                _cfg = _expand_env_vars(_cfg)
                _model_cfg = _cfg.get("model", {})
                if not job.get("model"):
                    if isinstance(_model_cfg, str):
@@ -981,7 +1120,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        if prefill_file:
            pfpath = Path(prefill_file).expanduser()
            if not pfpath.is_absolute():
-                pfpath = _hermes_home / pfpath
+                pfpath = _get_hermes_home() / pfpath
            if pfpath.exists():
                try:
                    with open(pfpath, "r", encoding="utf-8") as _pf:
@@ -1004,8 +1143,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        )
        from hermes_cli.auth import AuthError
        try:
+            # Do not inject HERMES_INFERENCE_PROVIDER here. resolve_runtime_provider()
+            # already prefers persisted config over stale shell/env overrides when
+            # no explicit provider is requested. Passing the env var here short-
+            # circuits that precedence and can resurrect old providers (for
+            # example DeepSeek) for cron jobs that do not pin provider/model.
            runtime_kwargs = {
-                "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
+                "requested": job.get("provider"),
            }
            if job.get("base_url"):
                runtime_kwargs["explicit_base_url"] = job.get("base_url")
@@ -1300,12 +1444,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
    Returns:
        Number of jobs executed (0 if another tick is already running)
    """
-    _LOCK_DIR.mkdir(parents=True, exist_ok=True)
+    lock_dir, lock_file = _get_lock_paths()
+    lock_dir.mkdir(parents=True, exist_ok=True)

    # Cross-platform file locking: fcntl on Unix, msvcrt on Windows
    lock_fd = None
    try:
-        lock_fd = open(_LOCK_FILE, "w")
+        lock_fd = open(lock_file, "w")
        if fcntl:
            fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
        elif msvcrt:
@@ -0,0 +1,473 @@
+# Telegram DM User-Managed Multi-Session Topics Implementation Plan
+
+> **For Hermes:** Use test-driven-development for implementation. Use subagent-driven-development only after this plan is split into small reviewed tasks.
+
+**Goal:** Add an opt-in Telegram DM multi-session mode where Telegram user-created private-chat topics become independent Hermes session lanes, while the root DM becomes a system lobby.
+
+**Architecture:** Rely on Telegram's native private-chat topic UI. Users create new topics with the `+` button; Hermes maps each `message_thread_id` to a separate session lane. Hermes does not create topics for normal `/new` flow and does not try to manage topic lifecycle beyond activation/status, root-lobby behavior, and restoring legacy sessions into a user-created topic.
+
+**Tech Stack:** Hermes gateway, Telegram Bot API 9.4+, python-telegram-bot adapter, SQLite SessionDB / side tables, pytest.
+
+---
+
+## 1. Product decisions
+
+### Accepted
+
+- PR-quality implementation: migrations, tests, docs, backwards compatibility.
+- Use SQLite persistence, not JSON sidecars.
+- Live status suffixes in topic titles are out of MVP.
+- Topic title sync/editing is out of MVP except future-compatible storage if cheap.
+- User creates Telegram topics manually through the Telegram bot interface.
+- `/new` does **not** create Telegram topics.
+- Root/main DM becomes a system lobby after activation.
+- Existing Telegram behavior remains unchanged until the feature is activated/enabled.
+- Migration of old sessions is supported through `/topic` listing and `/topic <session_id>` restore inside a user-created topic.
+
+### Telegram API assumptions verified from Bot API docs
+
+- `getMe` returns bot `User` fields:
+  - `has_topics_enabled`: forum/topic mode enabled in private chats.
+  - `allows_users_to_create_topics`: users may create/delete topics in private chats.
+- `createForumTopic` works for private chats with a user, but MVP does not rely on it for normal flow.
+- `Message.message_thread_id` identifies a topic in private chats.
+- `sendMessage` supports `message_thread_id` for private-chat topics.
+- `pinChatMessage` is allowed in private chats.
+
+---
+
+## 2. Target UX
+
+### 2.1 Activation from root/main DM
+
+User sends:
+
+```text
+/topic
+```
+
+Hermes:
+
+1. calls Telegram `getMe`;
+2. verifies `has_topics_enabled` and `allows_users_to_create_topics`;
+3. enables multi-session topic mode for this Telegram DM user/chat;
+4. sends an onboarding message;
+5. pins the onboarding message if configured;
+6. shows old/unlinked sessions that can be restored into topics.
+
+Suggested onboarding text:
+
+```text
+Multi-session mode is enabled.
+
+Create new Hermes chats with the + button in this bot interface. Each Telegram topic is an independent Hermes session, so you can work on different tasks in parallel.
+
+This main chat is reserved for system commands, status, and session management.
+
+To restore an old session:
+1. Use /topic here to see unlinked sessions.
+2. Create a new topic with the + button.
+3. Send /topic <session_id> inside that topic.
+```
+
+### 2.2 Root/main DM after activation
+
+Root DM is a system lobby.
+
+Allowed/system commands include at least:
+
+- `/topic`
+- `/status`
+- `/sessions` if available
+- `/usage`
+- `/help`
+- `/platforms`
+
+Normal user prompts in root DM do not enter the agent loop. Reply:
+
+```text
+This main chat is reserved for system commands.
+
+To chat with Hermes, create a new topic using the + button in this bot interface. Each topic works as an independent Hermes session.
+```
+
+`/new` in root DM does not create a session/topic. Reply:
+
+```text
+To start a new parallel Hermes chat, create a new topic with the + button in this bot interface.
+
+Each topic is an independent Hermes session. Use /new inside a topic only if you want to replace that topic's current session.
+```
+
+### 2.3 First message in a user-created topic
+
+When a user creates a Telegram topic and sends the first message there:
+
+1. Hermes receives a Telegram DM message with `message_thread_id`.
+2. Hermes derives the existing thread-aware `session_key` from `(platform=telegram, chat_type=dm, chat_id, thread_id)`.
+3. If no binding exists, Hermes creates a fresh Hermes session for this topic lane and persists the binding.
+4. The message runs through the normal agent loop for that lane.
+
+### 2.4 `/new` inside a non-main topic
+
+`/new` remains supported but replaces the session attached to the current topic lane.
+
+Hermes should warn:
+
+```text
+Started a new Hermes session in this topic.
+
+Tip: for parallel work, create a new topic with the + button instead of using /new here. /new replaces the session attached to the current topic.
+```
+
+### 2.5 `/topic` in root/main DM after activation
+
+Shows:
+
+- mode enabled/disabled;
+- last capability check result;
+- whether intro message is pinned if known;
+- count of known topic bindings;
+- list of old/unlinked sessions.
+
+Example:
+
+```text
+Telegram multi-session topics are enabled.
+
+Create new Hermes chats with the + button in this bot interface.
+
+Unlinked previous sessions:
+1. 2026-05-01 Research notes — id: abc123
+2. 2026-04-30 Deploy debugging — id: def456
+3. Untitled session — id: ghi789
+
+To restore one:
+1. Create a new topic with the + button.
+2. Open that topic.
+3. Send /topic <id>
+```
+
+### 2.6 `/topic` inside a non-main topic
+
+Without args, show the current topic binding:
+
+```text
+This topic is linked to:
+Session: Research notes
+ID: abc123
+
+Use /new to replace this topic with a fresh session.
+For parallel work, create another topic with the + button.
+```
+
+### 2.7 `/topic <session_id>` inside a non-main topic
+
+Restore an old/unlinked session into the current user-created topic.
+
+Behavior:
+
+1. reject if not in Telegram DM topic;
+2. verify session belongs to the same Telegram user/chat or is a safe legacy root DM session for this user;
+3. reject if session is already linked to another active topic in MVP;
+4. `SessionStore.switch_session(current_topic_session_key, target_session_id)`;
+5. upsert binding with `managed_mode = restored`;
+6. send two messages into the topic:
+   - session restored confirmation;
+   - last Hermes assistant message if available.
+
+Example:
+
+```text
+Session restored: Research notes
+
+Last Hermes message:
+...
+```
+
+---
+
+## 3. Persistence model
+
+Use SQLite, but topic-mode schema changes are **explicit opt-in migrations**, not automatic startup reconciliation.
+
+Important rollback-safety rule:
+
+- upgrading Hermes and starting the gateway must not create Telegram topic-mode tables or columns;
+- old/default Telegram behavior must keep working on the existing `state.db`;
+- the first `/topic` activation path calls an idempotent explicit migration, then enables topic mode for that chat;
+- if activation fails before the migration is needed, the database remains in the pre-topic-mode shape.
+
+### 3.1 No eager `sessions` table mutation for MVP
+
+Do **not** add `chat_id`, `chat_type`, `thread_id`, or `session_key` columns to `sessions` as part of ordinary `SessionDB()` startup. The existing declarative `_reconcile_columns()` mechanism would add them eagerly on every process start, which violates the managed-migration requirement.
+
+For MVP, keep origin/session-lane data in topic-specific side tables created only by the explicit `/topic` migration. Legacy unlinked sessions can be discovered conservatively from existing data (`source = telegram`, `user_id = current Telegram user`) plus absence from topic bindings.
+
+If future PRs need richer origin metadata for all gateway sessions, introduce it behind a separate explicit migration/command or a compatibility-reviewed schema bump.
+
+### 3.2 Explicit `/topic` migration API
+
+Add an idempotent method such as:
+
+```python
+def apply_telegram_topic_migration(self) -> None: ...
+```
+
+It creates only topic-mode side tables/indexes and records:
+
+```text
+state_meta.telegram_dm_topic_schema_version = 1
+```
+
+This method is called from `/topic` activation/status paths before reading or writing topic-mode state. It is not called from generic `SessionDB.__init__`, gateway startup, CLI startup, or auto-maintenance.
+
+### 3.3 `telegram_dm_topic_mode`
+
+Stores per-user/chat activation state. Created only by `apply_telegram_topic_migration()`.
+
+Suggested fields:
+
+- `chat_id` primary key
+- `user_id`
+- `enabled`
+- `activated_at`
+- `updated_at`
+- `has_topics_enabled`
+- `allows_users_to_create_topics`
+- `capability_checked_at`
+- `intro_message_id`
+- `pinned_message_id`
+
+### 3.4 `telegram_dm_topic_bindings`
+
+Stores Telegram topic/thread to Hermes session binding. Created only by `apply_telegram_topic_migration()`.
+
+Suggested fields:
+
+- `chat_id`
+- `thread_id`
+- `user_id`
+- `session_key`
+- `session_id`
+- `managed_mode`
+  - `auto`
+  - `restored`
+  - `new_replaced`
+- `linked_at`
+- `updated_at`
+
+Recommended constraints:
+
+- primary key `(chat_id, thread_id)`;
+- unique index on `session_id` for MVP to prevent one session linked to multiple topics;
+- index `(user_id, chat_id)` for status/listing.
+
+### 3.5 Unlinked session semantics
+
+For MVP, a session is unlinked if:
+
+- `source = telegram`;
+- `user_id = current Telegram user`;
+- no row in `telegram_dm_topic_bindings` has `session_id = session_id`.
+
+This is intentionally conservative until a future explicit migration adds richer cross-platform origin metadata.
+
+Never dedupe by title.
+
+---
+
+## 4. Config
+
+Suggested config block:
+
+```yaml
+platforms:
+  telegram:
+    extra:
+      multisession_topics:
+        enabled: false
+        mode: user_managed_topics
+        root_chat_behavior: system_lobby
+        pin_intro_message: true
+```
+
+Notes:
+
+- `enabled: false` means existing Telegram behavior is unchanged.
+- Activation via `/topic` may create per-chat enabled state only if global config permits it.
+- `root_chat_behavior: system_lobby` is the MVP behavior for activated chats.
+
+---
+
+## 5. Command behavior summary
+
+### `/topic` root/main DM
+
+- If not activated: capability check, activate, send/pin onboarding, list unlinked sessions.
+- If activated: show status and unlinked sessions.
+
+### `/topic` non-main topic
+
+- Show current binding.
+
+### `/topic <session_id>` root/main DM
+
+Reject with instructions:
+
+```text
+Create a new topic with the + button, open it, then send /topic <session_id> there to restore this session.
+```
+
+### `/topic <session_id>` non-main topic
+
+Restore that session into this topic if ownership/linking checks pass.
+
+### `/new` root/main DM when activated
+
+Reply with instructions to use the `+` button. Do not enter agent loop.
+
+### `/new` non-main topic
+
+Create a new session in the current topic lane, persist/update binding, warn that `+` is preferred for parallel work.
+
+### Normal text root/main DM when activated
+
+Reply with system-lobby instruction. Do not enter agent loop.
+
+### Normal text non-main topic
+
+Normal Hermes agent flow for that topic's session lane.
+
+---
+
+## 6. PR breakdown
+
+### PR 1 — Explicit topic-mode schema migration
+
+**Goal:** Add rollback-safe SQLite support for Telegram topic mode without mutating `state.db` on ordinary upgrade/startup.
+
+**Files likely touched:**
+
+- `hermes_state.py`
+- tests under `tests/`
+
+**Tests first:**
+
+1. opening an old/current DB with `SessionDB()` does not create topic-mode tables or `sessions` origin columns;
+2. calling `apply_telegram_topic_migration()` creates `telegram_dm_topic_mode` and `telegram_dm_topic_bindings` idempotently;
+3. migration records `state_meta.telegram_dm_topic_schema_version = 1`.
+
+### PR 2 — Topic mode activation and binding APIs
+
+**Goal:** Add SQLite persistence for activation and topic bindings.
+
+**Tests first:**
+
+1. enable/check mode row round-trips;
+2. binding upsert and lookup by `(chat_id, user_id, thread_id)`;
+3. linked sessions are excluded from unlinked list.
+
+### PR 3 — `/topic` activation/status command
+
+**Goal:** Implement root activation/status/listing behavior.
+
+**Tests first:**
+
+1. `/topic` in root checks `getMe` capabilities and records activation;
+2. capability failure returns readable instructions;
+3. activated root `/topic` lists unlinked sessions.
+
+### PR 4 — System lobby behavior
+
+**Goal:** Prevent root chat from entering agent loop after activation.
+
+**Tests first:**
+
+1. normal text in activated root returns lobby instruction;
+2. `/new` in activated root returns `+` button instruction;
+3. non-activated root behavior is unchanged.
+
+### PR 5 — Auto-bind user-created topics
+
+**Goal:** First message in non-main topic creates/uses an independent session lane.
+
+**Tests first:**
+
+1. new topic message creates binding with `auto_created`;
+2. repeated topic message reuses same binding/lane;
+3. two topics in same DM do not share sessions.
+
+### PR 6 — Restore legacy sessions into a topic
+
+**Goal:** Implement `/topic <session_id>` in non-main topics.
+
+**Tests first:**
+
+1. root `/topic <id>` rejects with instructions;
+2. topic `/topic <id>` switches current topic lane to target session;
+3. restore rejects sessions from other users/chats;
+4. restore rejects already-linked sessions;
+5. restore emits confirmation and last Hermes assistant message.
+
+### PR 7 — `/new` inside topic updates binding
+
+**Goal:** Keep existing `/new` semantics but persist topic binding replacement.
+
+**Tests first:**
+
+1. `/new` in topic creates a new session for same topic lane;
+2. binding updates to `managed_mode = new_replaced`;
+3. response includes guidance to use `+` for parallel work.
+
+### PR 8 — Docs and polish
+
+**Goal:** Document the feature and Telegram setup.
+
+**Files likely touched:**
+
+- `website/docs/user-guide/messaging/telegram.md`
+- maybe `website/docs/user-guide/sessions.md`
+
+Docs must explain:
+
+- BotFather/Telegram settings for topic mode and user-created topics;
+- `/topic` activation;
+- root system lobby;
+- using `+` for new parallel chats;
+- restoring old sessions with `/topic <id>` inside a topic;
+- limitations.
+
+---
+
+## 7. Testing / quality gates
+
+Run targeted tests after each TDD cycle, then broader tests before completion.
+
+Suggested commands after inspection confirms test paths:
+
+```bash
+python -m pytest tests/test_hermes_state.py -q
+python -m pytest tests/gateway/ -q
+python -m pytest tests/ -o 'addopts=' -q
+```
+
+Do not ship without verifying disabled-feature backwards compatibility.
+
+---
+
+## 8. Definition of done for MVP
+
+- `/topic` activates/checks Telegram DM multi-session mode.
+- Root DM becomes a system lobby after activation.
+- Onboarding message tells users to create new chats with the Telegram `+` button.
+- Onboarding message can be pinned in private chat.
+- User-created topics automatically become independent Hermes session lanes.
+- `/new` in root gives instructions, not a new agent run.
+- `/new` in a topic creates a new session in that topic and warns that `+` is preferred for parallel work.
+- `/topic` in root lists unlinked old sessions.
+- `/topic <session_id>` inside a topic restores that session and sends confirmation + last Hermes assistant message.
+- Ownership checks prevent restoring other users' sessions.
+- Already-linked sessions are not restored into a second topic in MVP.
+- Existing Telegram behavior is unchanged when the feature is disabled.
+- Tests and docs are included.
@@ -40,7 +40,7 @@ This directory contains the integration layer between **hermes-agent's** tool-ca
 - `evaluate_log()` for saving eval results to JSON + samples.jsonl

 **HermesAgentBaseEnv** (`hermes_base_env.py`) extends BaseEnv with hermes-agent specifics:
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, daytona, ssh, singularity)
+- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, ssh, singularity, modal, daytona, vercel_sandbox)
 - Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` which queries `tools/registry.py`)
 - Implements `collect_trajectory()` which runs the full agent loop and computes rewards
 - Supports two-phase operation (Phase 1: OpenAI server, Phase 2: VLLM ManagedServer)
@@ -271,15 +271,23 @@ class PlatformConfig:
    # - "first": Only first chunk threads to user's message (default)
    # - "all": All chunks in multi-part replies thread to user's message
    reply_to_mode: str = "first"
-    
+
+    # Whether the gateway is allowed to send "♻️ Gateway online" /
+    # "♻ Gateway restarted" lifecycle notifications on this platform.
+    # Default True preserves prior behavior. Set False on platforms used
+    # by end users (e.g. Slack) where operator-flavored restart pings are
+    # noise; keep True for back-channels where the operator wants them.
+    gateway_restart_notification: bool = True
+
    # Platform-specific settings
    extra: Dict[str, Any] = field(default_factory=dict)
-    
+
    def to_dict(self) -> Dict[str, Any]:
        result = {
            "enabled": self.enabled,
            "extra": self.extra,
            "reply_to_mode": self.reply_to_mode,
+            "gateway_restart_notification": self.gateway_restart_notification,
        }
        if self.token:
            result["token"] = self.token
@@ -288,19 +296,22 @@ class PlatformConfig:
        if self.home_channel:
            result["home_channel"] = self.home_channel.to_dict()
        return result
-    
+
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "PlatformConfig":
        home_channel = None
        if "home_channel" in data:
            home_channel = HomeChannel.from_dict(data["home_channel"])
-        
+
        return cls(
            enabled=_coerce_bool(data.get("enabled"), False),
            token=data.get("token"),
            api_key=data.get("api_key"),
            home_channel=home_channel,
            reply_to_mode=data.get("reply_to_mode", "first"),
+            gateway_restart_notification=_coerce_bool(
+                data.get("gateway_restart_notification"), True
+            ),
            extra=data.get("extra", {}),
        )

@@ -845,6 +856,16 @@ def load_gateway_config() -> GatewayConfig:
                    ):
                        if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
                            os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()
+                # reply_to_mode: top-level preferred, falls back to extra.reply_to_mode
+                # YAML 1.1 parses bare 'off' as boolean False — coerce to string "off".
+                _discord_extra = discord_cfg.get("extra") if isinstance(discord_cfg.get("extra"), dict) else {}
+                _discord_rtm = (
+                    discord_cfg["reply_to_mode"] if "reply_to_mode" in discord_cfg
+                    else _discord_extra.get("reply_to_mode")
+                )
+                if _discord_rtm is not None and not os.getenv("DISCORD_REPLY_TO_MODE"):
+                    _rtm_str = "off" if _discord_rtm is False else str(_discord_rtm).lower()
+                    os.environ["DISCORD_REPLY_TO_MODE"] = _rtm_str

            # Bridge top-level require_mention to Telegram when the telegram: section
            # does not already provide one.  Users often write "require_mention: true"
@@ -881,6 +902,16 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
+                # reply_to_mode: top-level preferred, falls back to extra.reply_to_mode
+                # YAML 1.1 parses bare 'off' as boolean False — coerce to string "off".
+                _telegram_extra = telegram_cfg.get("extra") if isinstance(telegram_cfg.get("extra"), dict) else {}
+                _telegram_rtm = (
+                    telegram_cfg["reply_to_mode"] if "reply_to_mode" in telegram_cfg
+                    else _telegram_extra.get("reply_to_mode")
+                )
+                if _telegram_rtm is not None and not os.getenv("TELEGRAM_REPLY_TO_MODE"):
+                    _rtm_str = "off" if _telegram_rtm is False else str(_telegram_rtm).lower()
+                    os.environ["TELEGRAM_REPLY_TO_MODE"] = _rtm_str
                allowed_users = telegram_cfg.get("allow_from")
                if allowed_users is not None and not os.getenv("TELEGRAM_ALLOWED_USERS"):
                    if isinstance(allowed_users, list):
@@ -2,8 +2,8 @@
 OpenAI-compatible API server platform adapter.

 Exposes an HTTP server with endpoints:
- POST /v1/chat/completions        — OpenAI Chat Completions format (stateless; opt-in session continuity via X-Hermes-Session-Id header)
- POST /v1/responses               — OpenAI Responses API format (stateful via previous_response_id)
+- POST /v1/chat/completions        — OpenAI Chat Completions format (stateless; opt-in session continuity via X-Hermes-Session-Id header; opt-in long-term memory scoping via X-Hermes-Session-Key header)
+- POST /v1/responses               — OpenAI Responses API format (stateful via previous_response_id; X-Hermes-Session-Key supported)
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
 - GET  /v1/models                  — lists hermes-agent as an available model
@@ -56,7 +56,7 @@ logger = logging.getLogger(__name__)
 DEFAULT_HOST = "127.0.0.1"
 DEFAULT_PORT = 8642
 MAX_STORED_RESPONSES = 100
-MAX_REQUEST_BYTES = 1_000_000  # 1 MB default limit for POST bodies
+MAX_REQUEST_BYTES = 10_000_000  # 10 MB — accommodates long agent conversations with tool calls
 CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS = 30.0
 MAX_NORMALIZED_TEXT_LENGTH = 65_536  # 64 KB cap for normalized content parts
 MAX_CONTENT_LIST_SIZE = 1_000  # Max items when content is an array
@@ -698,6 +698,71 @@ class APIServerAdapter(BasePlatformAdapter):
            status=401,
        )

+    # ------------------------------------------------------------------
+    # Session header helpers
+    # ------------------------------------------------------------------
+
+    # Soft length cap for session identifiers.  Headers are bounded in
+    # aggregate by aiohttp (``client_max_size`` / default 8 KiB per
+    # header), but we impose a tighter limit on the session headers so a
+    # caller can't burn memory by passing a multi-kilobyte "session key".
+    # 256 chars is well above any realistic stable channel identifier
+    # (e.g. ``agent:main:webui:dm:user-42``) while staying small enough
+    # that the sanitized form is safe to pass into Honcho / state.db.
+    _MAX_SESSION_HEADER_LEN = 256
+
+    def _parse_session_key_header(
+        self, request: "web.Request"
+    ) -> tuple[Optional[str], Optional["web.Response"]]:
+        """Extract and validate the ``X-Hermes-Session-Key`` header.
+
+        The session key is a stable per-channel identifier that scopes
+        long-term memory (e.g. Honcho sessions) across transcripts.  It
+        is independent of ``X-Hermes-Session-Id``: callers may send
+        either, both, or neither.
+
+        Returns ``(session_key, None)`` on success (with an empty/absent
+        header yielding ``None`` for the key), or ``(None, error_response)``
+        on validation failure.
+
+        Security: like session continuation, accepting a caller-supplied
+        memory scope requires API-key authentication so that an
+        unauthenticated client on a local-only server can't inject itself
+        into another user's long-term memory scope by guessing a key.
+        """
+        raw = request.headers.get("X-Hermes-Session-Key", "").strip()
+        if not raw:
+            return None, None
+
+        if not self._api_key:
+            logger.warning(
+                "X-Hermes-Session-Key rejected: no API key configured. "
+                "Set API_SERVER_KEY to enable long-term memory scoping."
+            )
+            return None, web.json_response(
+                _openai_error(
+                    "X-Hermes-Session-Key requires API key authentication. "
+                    "Configure API_SERVER_KEY to enable this feature."
+                ),
+                status=403,
+            )
+
+        # Reject control characters that could enable header injection on
+        # the echo path.
+        if re.search(r'[\r\n\x00]', raw):
+            return None, web.json_response(
+                {"error": {"message": "Invalid session key", "type": "invalid_request_error"}},
+                status=400,
+            )
+
+        if len(raw) > self._MAX_SESSION_HEADER_LEN:
+            return None, web.json_response(
+                {"error": {"message": "Session key too long", "type": "invalid_request_error"}},
+                status=400,
+            )
+
+        return raw, None
+
    # ------------------------------------------------------------------
    # Session DB helper
    # ------------------------------------------------------------------
@@ -728,6 +793,7 @@ class APIServerAdapter(BasePlatformAdapter):
        tool_progress_callback=None,
        tool_start_callback=None,
        tool_complete_callback=None,
+        gateway_session_key: Optional[str] = None,
    ) -> Any:
        """
        Create an AIAgent instance using the gateway's runtime config.
@@ -736,6 +802,13 @@ class APIServerAdapter(BasePlatformAdapter):
        base_url, etc. from config.yaml / env vars.  Toolsets are resolved
        from config.yaml platform_toolsets.api_server (same as all other
        gateway platforms), falling back to the hermes-api-server default.
+
+        ``gateway_session_key`` is a stable per-channel identifier supplied
+        by the client (via ``X-Hermes-Session-Key``).  Unlike ``session_id``
+        which scopes the short-term transcript and rotates on /new, this
+        key is meant to persist across transcripts so long-term memory
+        providers (e.g. Honcho) can scope their per-chat state correctly
+        — matching the semantics of the native gateway's ``session_key``.
        """
        from run_agent import AIAgent
        from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner
@@ -771,6 +844,7 @@ class APIServerAdapter(BasePlatformAdapter):
            session_db=self._ensure_session_db(),
            fallback_model=fallback_model,
            reasoning_config=reasoning_config,
+            gateway_session_key=gateway_session_key,
        )
        return agent

@@ -854,6 +928,7 @@ class APIServerAdapter(BasePlatformAdapter):
                "run_stop": True,
                "tool_progress_events": True,
                "session_continuity_header": "X-Hermes-Session-Id",
+                "session_key_header": "X-Hermes-Session-Key",
                "cors": bool(self._cors_origins),
            },
            "endpoints": {
@@ -925,6 +1000,15 @@ class APIServerAdapter(BasePlatformAdapter):
                status=400,
            )

+        # Allow caller to scope long-term memory (e.g. Honcho) with a
+        # stable per-channel identifier via X-Hermes-Session-Key.  This
+        # is independent of X-Hermes-Session-Id: the key persists across
+        # transcripts while the id rotates when the caller starts a new
+        # transcript (i.e. /new semantics).  See _parse_session_key_header.
+        gateway_session_key, key_err = self._parse_session_key_header(request)
+        if key_err is not None:
+            return key_err
+
        # Allow caller to continue an existing session by passing X-Hermes-Session-Id.
        # When provided, history is loaded from state.db instead of from the request body.
        #
@@ -1059,11 +1143,13 @@ class APIServerAdapter(BasePlatformAdapter):
                tool_start_callback=_on_tool_start,
                tool_complete_callback=_on_tool_complete,
                agent_ref=agent_ref,
+                gateway_session_key=gateway_session_key,
            ))

            return await self._write_sse_chat_completion(
                request, completion_id, model_name, created, _stream_q,
                agent_task, agent_ref, session_id=session_id,
+                gateway_session_key=gateway_session_key,
            )

        # Non-streaming: run the agent (with optional Idempotency-Key)
@@ -1073,6 +1159,7 @@ class APIServerAdapter(BasePlatformAdapter):
                conversation_history=history,
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
+                gateway_session_key=gateway_session_key,
            )

        idempotency_key = request.headers.get("Idempotency-Key")
@@ -1122,11 +1209,17 @@ class APIServerAdapter(BasePlatformAdapter):
            },
        }

-        return web.json_response(response_data, headers={"X-Hermes-Session-Id": session_id})
+        response_headers = {
+            "X-Hermes-Session-Id": result.get("session_id", session_id),
+        }
+        if gateway_session_key:
+            response_headers["X-Hermes-Session-Key"] = gateway_session_key
+        return web.json_response(response_data, headers=response_headers)

    async def _write_sse_chat_completion(
        self, request: "web.Request", completion_id: str, model: str,
        created: int, stream_q, agent_task, agent_ref=None, session_id: str = None,
+        gateway_session_key: str = None,
    ) -> "web.StreamResponse":
        """Write real streaming SSE from agent's stream_delta_callback queue.

@@ -1149,6 +1242,8 @@ class APIServerAdapter(BasePlatformAdapter):
            sse_headers.update(cors)
        if session_id:
            sse_headers["X-Hermes-Session-Id"] = session_id
+        if gateway_session_key:
+            sse_headers["X-Hermes-Session-Key"] = gateway_session_key
        response = web.StreamResponse(status=200, headers=sse_headers)
        await response.prepare(request)

@@ -1254,6 +1349,22 @@ class APIServerAdapter(BasePlatformAdapter):
                except (asyncio.CancelledError, Exception):
                    pass
            logger.info("SSE client disconnected; interrupted agent task %s", completion_id)
+        except Exception as _exc:
+            # Agent crashed mid-stream.  Try to emit an error chunk
+            # so the client gets a proper response instead of a
+            # TransferEncodingError from incomplete chunked encoding.
+            import traceback as _tb
+            logger.error("Agent crashed mid-stream for %s: %s", completion_id, _tb.format_exc()[:300])
+            try:
+                error_chunk = {
+                    "id": completion_id, "object": "chat.completion.chunk",
+                    "created": created, "model": model,
+                    "choices": [{"index": 0, "delta": {}, "finish_reason": "error"}],
+                }
+                await response.write(f"data: {json.dumps(error_chunk)}\n\n".encode())
+                await response.write(b"data: [DONE]\n\n")
+            except Exception:
+                pass

        return response

@@ -1272,6 +1383,7 @@ class APIServerAdapter(BasePlatformAdapter):
        conversation: Optional[str],
        store: bool,
        session_id: str,
+        gateway_session_key: Optional[str] = None,
    ) -> "web.StreamResponse":
        """Write an SSE stream for POST /v1/responses (OpenAI Responses API).

@@ -1314,6 +1426,8 @@ class APIServerAdapter(BasePlatformAdapter):
            sse_headers.update(cors)
        if session_id:
            sse_headers["X-Hermes-Session-Id"] = session_id
+        if gateway_session_key:
+            sse_headers["X-Hermes-Session-Key"] = gateway_session_key
        response = web.StreamResponse(status=200, headers=sse_headers)
        await response.prepare(request)

@@ -1571,20 +1685,54 @@ class APIServerAdapter(BasePlatformAdapter):
            async def _dispatch(it) -> None:
                """Route a queue item to the correct SSE emitter.

-                Plain strings are text deltas.  Tagged tuples with
-                ``__tool_started__`` / ``__tool_completed__`` prefixes
-                are tool lifecycle events.
+                Plain strings are text deltas — they are batched (50ms)
+                to reduce Open WebUI re-render storms.  Tagged tuples
+                with ``__tool_started__`` / ``__tool_completed__``
+                prefixes are tool lifecycle events and flush the buffer
+                before emitting.
                """
+                nonlocal _batch_timer
                if isinstance(it, tuple) and len(it) == 2 and isinstance(it[0], str):
                    tag, payload = it
+                    # Flush batched text before tool events
+                    if _batch_buf:
+                        await _flush_batch()
                    if tag == "__tool_started__":
                        await _emit_tool_started(payload)
                    elif tag == "__tool_completed__":
                        await _emit_tool_completed(payload)
-                    # Unknown tags are silently ignored (forward-compat).
                elif isinstance(it, str):
-                    await _emit_text_delta(it)
-                # Other types (non-string, non-tuple) are silently dropped.
+                    # Batch text deltas — append to buffer, flush on timer
+                    _batch_buf.append(it)
+                    if _batch_timer is None:
+                        _batch_timer = asyncio.create_task(_batch_flush_after(0.05))
+                # Other types are silently dropped.
+
+            # ── Batching state ──
+            _batch_buf: List[str] = []
+            _batch_timer: Optional[asyncio.Task] = None
+            _batch_lock = asyncio.Lock()
+
+            async def _batch_flush_after(delay: float) -> None:
+                """Wait delay seconds, then flush accumulated text deltas."""
+                try:
+                    await asyncio.sleep(delay)
+                except asyncio.CancelledError:
+                    return
+                # Clear timer reference BEFORE flush so new deltas
+                # can start a fresh timer while we emit
+                nonlocal _batch_buf, _batch_timer
+                _batch_timer = None
+                await _flush_batch()
+
+            async def _flush_batch() -> None:
+                """Emit a single SSE delta for all accumulated text."""
+                nonlocal _batch_buf
+                async with _batch_lock:
+                    if _batch_buf:
+                        combined = "".join(_batch_buf)
+                        _batch_buf = []
+                        await _emit_text_delta(combined)

            loop = asyncio.get_running_loop()
            while True:
@@ -1609,11 +1757,21 @@ class APIServerAdapter(BasePlatformAdapter):
                    continue

                if item is None:  # EOS sentinel
+                    # Cancel pending timer and flush remaining batched text
+                    if _batch_timer and not _batch_timer.done():
+                        _batch_timer.cancel()
+                        _batch_timer = None
+                    if _batch_buf:
+                        await _flush_batch()
                    break

                await _dispatch(item)
                last_activity = time.monotonic()

+            # Flush any final batched text before processing result
+            if _batch_buf:
+                await _flush_batch()
+
            # Pick up agent result + usage from the completed task
            try:
                result, agent_usage = await agent_task
@@ -1664,6 +1822,31 @@ class APIServerAdapter(BasePlatformAdapter):
            # payload still see the assistant text.  This mirrors the
            # shape produced by _extract_output_items in the batch path.
            final_items: List[Dict[str, Any]] = list(emitted_items)
+
+            # Trim large content from tool call arguments to keep the
+            # response.completed event under ~100KB.  Clients already
+            # received full details via incremental events.
+            for _item in final_items:
+                if _item.get("type") == "function_call":
+                    try:
+                        _args = json.loads(_item.get("arguments", "{}")) if isinstance(_item.get("arguments"), str) else _item.get("arguments", {})
+                        if isinstance(_args, dict):
+                            for _k in ("content", "query", "pattern", "old_string", "new_string"):
+                                if isinstance(_args.get(_k), str) and len(_args[_k]) > 500:
+                                    _args[_k] = "[" + str(len(_args[_k])) + " chars — truncated for response.completed]"
+                            _item["arguments"] = json.dumps(_args)
+                    except Exception:
+                        pass
+                elif _item.get("type") == "function_call_output":
+                    _output = _item.get("output", [])
+                    if isinstance(_output, list) and _output:
+                        _first = _output[0]
+                        if isinstance(_first, dict) and _first.get("type") == "input_text":
+                            _text = _first.get("text", "")
+                            if len(_text) > 1000:
+                                _first["text"] = _text[:500] + "...[" + str(len(_text) - 500) + " more chars]"
+                                _item["output"] = [_first]
+
            final_items.append({
                "type": "message",
                "role": "assistant",
@@ -1754,6 +1937,30 @@ class APIServerAdapter(BasePlatformAdapter):
                agent_task.cancel()
            logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
            raise
+        except Exception as _exc:
+            # Agent crashed with an unhandled error (e.g. model API error like
+            # BadRequestError, AuthenticationError).  Emit a response.failed
+            # event and properly terminate the SSE stream so the client doesn't
+            # get a TransferEncodingError from incomplete chunked encoding.
+            import traceback as _tb
+            _persist_incomplete_if_needed()
+            agent_error = _tb.format_exc()
+            try:
+                failed_env = _envelope("failed")
+                failed_env["output"] = list(emitted_items)
+                failed_env["error"] = {"message": str(_exc)[:500], "type": "server_error"}
+                failed_env["usage"] = {
+                    "input_tokens": usage.get("input_tokens", 0),
+                    "output_tokens": usage.get("output_tokens", 0),
+                    "total_tokens": usage.get("total_tokens", 0),
+                }
+                await _write_event("response.failed", {
+                    "type": "response.failed",
+                    "response": failed_env,
+                })
+            except Exception:
+                pass
+            logger.error("Agent crashed mid-stream for %s: %s", response_id, str(agent_error)[:300])

        return response

@@ -1763,6 +1970,11 @@ class APIServerAdapter(BasePlatformAdapter):
        if auth_err:
            return auth_err

+        # Long-term memory scope header (see chat_completions for details).
+        gateway_session_key, key_err = self._parse_session_key_header(request)
+        if key_err is not None:
+            return key_err
+
        # Parse request body
        try:
            body = await request.json()
@@ -1914,6 +2126,7 @@ class APIServerAdapter(BasePlatformAdapter):
                tool_start_callback=_on_tool_start,
                tool_complete_callback=_on_tool_complete,
                agent_ref=agent_ref,
+                gateway_session_key=gateway_session_key,
            ))

            response_id = f"resp_{uuid.uuid4().hex[:28]}"
@@ -1934,6 +2147,7 @@ class APIServerAdapter(BasePlatformAdapter):
                conversation=conversation,
                store=store,
                session_id=session_id,
+                gateway_session_key=gateway_session_key,
            )

        async def _compute_response():
@@ -1942,6 +2156,7 @@ class APIServerAdapter(BasePlatformAdapter):
                conversation_history=conversation_history,
                ephemeral_system_prompt=instructions,
                session_id=session_id,
+                gateway_session_key=gateway_session_key,
            )

        idempotency_key = request.headers.get("Idempotency-Key")
@@ -2016,7 +2231,10 @@ class APIServerAdapter(BasePlatformAdapter):
            if conversation:
                self._response_store.set_conversation(conversation, response_id)

-        return web.json_response(response_data)
+        response_headers = {"X-Hermes-Session-Id": session_id}
+        if gateway_session_key:
+            response_headers["X-Hermes-Session-Key"] = gateway_session_key
+        return web.json_response(response_data, headers=response_headers)

    # ------------------------------------------------------------------
    # GET / DELETE response endpoints
@@ -2338,6 +2556,7 @@ class APIServerAdapter(BasePlatformAdapter):
        tool_start_callback=None,
        tool_complete_callback=None,
        agent_ref: Optional[list] = None,
+        gateway_session_key: Optional[str] = None,
    ) -> tuple:
        """
        Create an agent and run a conversation in a thread executor.
@@ -2360,6 +2579,7 @@ class APIServerAdapter(BasePlatformAdapter):
                tool_progress_callback=tool_progress_callback,
                tool_start_callback=tool_start_callback,
                tool_complete_callback=tool_complete_callback,
+                gateway_session_key=gateway_session_key,
            )
            if agent_ref is not None:
                agent_ref[0] = agent
@@ -2374,6 +2594,12 @@ class APIServerAdapter(BasePlatformAdapter):
                "output_tokens": getattr(agent, "session_completion_tokens", 0) or 0,
                "total_tokens": getattr(agent, "session_total_tokens", 0) or 0,
            }
+            # Include the effective session ID in the result so callers
+            # (e.g. X-Hermes-Session-Id header) can track compression-
+            # triggered session rotations. (#16938)
+            _eff_sid = getattr(agent, "session_id", session_id)
+            if isinstance(_eff_sid, str) and _eff_sid:
+                result["session_id"] = _eff_sid
            return result, usage

        return await loop.run_in_executor(None, _run)
@@ -2453,6 +2679,11 @@ class APIServerAdapter(BasePlatformAdapter):
        if auth_err:
            return auth_err

+        # Long-term memory scope header (see chat_completions for details).
+        gateway_session_key, key_err = self._parse_session_key_header(request)
+        if key_err is not None:
+            return key_err
+
        # Enforce concurrency limit
        if len(self._run_streams) >= self._MAX_CONCURRENT_RUNS:
            return web.json_response(
@@ -2561,6 +2792,7 @@ class APIServerAdapter(BasePlatformAdapter):
                    session_id=session_id,
                    stream_delta_callback=_text_cb,
                    tool_progress_callback=event_cb,
+                    gateway_session_key=gateway_session_key,
                )
                self._active_run_agents[run_id] = agent
                def _run_sync():
@@ -2578,21 +2810,39 @@ class APIServerAdapter(BasePlatformAdapter):
                    return r, u

                result, usage = await asyncio.get_running_loop().run_in_executor(None, _run_sync)
-                final_response = result.get("final_response", "") if isinstance(result, dict) else ""
-                q.put_nowait({
-                    "event": "run.completed",
-                    "run_id": run_id,
-                    "timestamp": time.time(),
-                    "output": final_response,
-                    "usage": usage,
-                })
-                self._set_run_status(
-                    run_id,
-                    "completed",
-                    output=final_response,
-                    usage=usage,
-                    last_event="run.completed",
-                )
+                # Check for structured failure (non-retryable client errors like
+                # 401/400 return failed=True instead of raising, so the except
+                # block below never fires — issue #15561).
+                if isinstance(result, dict) and result.get("failed"):
+                    error_msg = result.get("error") or "agent run failed"
+                    q.put_nowait({
+                        "event": "run.failed",
+                        "run_id": run_id,
+                        "timestamp": time.time(),
+                        "error": error_msg,
+                    })
+                    self._set_run_status(
+                        run_id,
+                        "failed",
+                        error=error_msg,
+                        last_event="run.failed",
+                    )
+                else:
+                    final_response = result.get("final_response", "") if isinstance(result, dict) else ""
+                    q.put_nowait({
+                        "event": "run.completed",
+                        "run_id": run_id,
+                        "timestamp": time.time(),
+                        "output": final_response,
+                        "usage": usage,
+                    })
+                    self._set_run_status(
+                        run_id,
+                        "completed",
+                        output=final_response,
+                        usage=usage,
+                        last_event="run.completed",
+                    )
            except asyncio.CancelledError:
                self._set_run_status(
                    run_id,
@@ -2643,7 +2893,14 @@ class APIServerAdapter(BasePlatformAdapter):
        if hasattr(task, "add_done_callback"):
            task.add_done_callback(self._background_tasks.discard)

-        return web.json_response({"run_id": run_id, "status": "started"}, status=202)
+        response_headers = (
+            {"X-Hermes-Session-Key": gateway_session_key} if gateway_session_key else {}
+        )
+        return web.json_response(
+            {"run_id": run_id, "status": "started"},
+            status=202,
+            headers=response_headers,
+        )

    async def _handle_get_run(self, request: "web.Request") -> "web.Response":
        """GET /v1/runs/{run_id} — return pollable run status for external UIs."""
@@ -2787,7 +3044,7 @@ class APIServerAdapter(BasePlatformAdapter):

        try:
            mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
-            self._app = web.Application(middlewares=mws)
+            self._app = web.Application(middlewares=mws, client_max_size=MAX_REQUEST_BYTES)
            self._app["api_server_adapter"] = self
            self._app.router.add_get("/health", self._handle_health)
            self._app.router.add_get("/health/detailed", self._handle_health_detailed)
@@ -2506,7 +2506,13 @@ class BasePlatformAdapter(ABC):
                _r = await self._send_with_retry(
                    chat_id=event.source.chat_id,
                    content=_text,
-                    reply_to=event.message_id,
+                    reply_to=(
+                        event.reply_to_message_id
+                        if event.source.platform == Platform.FEISHU
+                        and event.source.thread_id
+                        and event.reply_to_message_id
+                        else event.message_id
+                    ),
                    metadata=thread_meta,
                )
                if _eph_ttl > 0 and _r.success and _r.message_id:
@@ -2606,7 +2612,13 @@ class BasePlatformAdapter(ABC):
                        _r = await self._send_with_retry(
                            chat_id=event.source.chat_id,
                            content=_text,
-                            reply_to=event.message_id,
+                            reply_to=(
+                                event.reply_to_message_id
+                                if event.source.platform == Platform.FEISHU
+                                and event.source.thread_id
+                                and event.reply_to_message_id
+                                else event.message_id
+                            ),
                            metadata=_thread_meta,
                        )
                        if _eph_ttl > 0 and _r.success and _r.message_id:
@@ -2663,10 +2675,18 @@ class BasePlatformAdapter(ABC):
        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
        if mode == "off":
            return 0.0
-        min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
-        max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
        if mode == "natural":
            min_ms, max_ms = 800, 2500
+            return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
+        # custom mode — tolerate malformed env vars instead of crashing.
+        try:
+            min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
+        except (TypeError, ValueError):
+            min_ms = 800
+        try:
+            max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
+        except (TypeError, ValueError):
+            max_ms = 2500
        return random.uniform(min_ms / 1000.0, max_ms / 1000.0)

    async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
@@ -2810,10 +2830,15 @@ class BasePlatformAdapter(ABC):
                # Send the text portion
                if text_content:
                    logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
+                    _reply_anchor = (
+                        event.reply_to_message_id
+                        if event.source.platform == Platform.FEISHU and event.source.thread_id and event.reply_to_message_id
+                        else event.message_id
+                    )
                    result = await self._send_with_retry(
                        chat_id=event.source.chat_id,
                        content=text_content,
-                        reply_to=event.message_id,
+                        reply_to=_reply_anchor,
                        metadata=_thread_metadata,
                    )
                    _record_delivery(result)
@@ -720,11 +720,22 @@ class DiscordAdapter(BasePlatformAdapter):
                        return
                    # If humans are mentioned but we're not → not for us
                    # (preserves old DISCORD_IGNORE_NO_MENTION=true behavior)
+                    # EXCEPT in free-response channels where the bot should
+                    # answer regardless of who is mentioned.
                    _ignore_no_mention = os.getenv(
                        "DISCORD_IGNORE_NO_MENTION", "true"
                    ).lower() in ("true", "1", "yes")
                    if _ignore_no_mention and not _self_mentioned and not _other_bots_mentioned:
-                        return
+                        _channel_id = str(message.channel.id)
+                        _parent_id = None
+                        if hasattr(message.channel, "parent_id") and message.channel.parent_id:
+                            _parent_id = str(message.channel.parent_id)
+                        _free_channels = adapter_self._discord_free_response_channels()
+                        _channel_ids = {_channel_id}
+                        if _parent_id:
+                            _channel_ids.add(_parent_id)
+                        if "*" not in _free_channels and not (_channel_ids & _free_channels):
+                            return

                await self._handle_message(message)

@@ -2643,9 +2654,14 @@ class DiscordAdapter(BasePlatformAdapter):
            await self._run_simple_slash(interaction, "/reload-skills")

        @tree.command(name="voice", description="Toggle voice reply mode")
-        @discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
+        @discord.app_commands.describe(mode="Voice mode: join, channel, leave, on, tts, off, or status")
        @discord.app_commands.choices(mode=[
-            discord.app_commands.Choice(name="channel — join your voice channel", value="channel"),
+            # `join` and `channel` both route to _handle_voice_channel_join in
+            # gateway/run.py — expose both in the slash UI so autocomplete
+            # matches what the docs advertise and what the runner accepts when
+            # the command is typed as plain text.
+            discord.app_commands.Choice(name="join — join your voice channel", value="join"),
+            discord.app_commands.Choice(name="channel — join your voice channel (alias)", value="channel"),
            discord.app_commands.Choice(name="leave — leave voice channel", value="leave"),
            discord.app_commands.Choice(name="on — voice reply to voice messages", value="on"),
            discord.app_commands.Choice(name="tts — voice reply to all messages", value="tts"),
@@ -3797,7 +3813,7 @@ class DiscordAdapter(BasePlatformAdapter):
        if not is_thread and not isinstance(message.channel, discord.DMChannel):
            no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "")
            no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
-            skip_thread = bool(channel_ids & no_thread_channels) or is_free_channel
+            skip_thread = bool(channel_ids & no_thread_channels)
            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
            is_reply_message = getattr(message, "type", None) == discord.MessageType.reply
            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
@@ -416,6 +416,18 @@ class EmailAdapter(BasePlatformAdapter):
            logger.debug("[Email] Dropping automated sender at dispatch: %s", sender_addr)
            return

+        # Skip senders not in EMAIL_ALLOWED_USERS — prevents the adapter
+        # from creating a MessageEvent (and thus thread context) for senders
+        # that the gateway will never authorize.  Without this early guard,
+        # a race between dispatch and authorization can result in the adapter
+        # sending a reply even though the handler returned None.
+        allowed_raw = os.getenv("EMAIL_ALLOWED_USERS", "").strip()
+        if allowed_raw:
+            allowed = {addr.strip().lower() for addr in allowed_raw.split(",") if addr.strip()}
+            if sender_addr.lower() not in allowed:
+                logger.debug("[Email] Dropping non-allowlisted sender at dispatch: %s", sender_addr)
+                return
+
        subject = msg_data["subject"]
        body = msg_data["body"].strip()
        attachments = msg_data["attachments"]
@@ -153,6 +153,9 @@ _MARKDOWN_HINT_RE = re.compile(
    r"(^#{1,6}\s)|(^\s*[-*]\s)|(^\s*\d+\.\s)|(^\s*---+\s*$)|(```)|(`[^`\n]+`)|(\*\*[^*\n].+?\*\*)|(~~[^~\n].+?~~)|(<u>.+?</u>)|(\*[^*\n]+\*)|(\[[^\]]+\]\([^)]+\))|(^>\s)",
    re.MULTILINE,
 )
+# Detect markdown tables: a line starting with | followed by a separator line.
+# Feishu post-type 'md' elements do not render tables, so we force text mode.
+_MARKDOWN_TABLE_RE = re.compile(r"^\|.*\|\n\|[-|: ]+\|", re.MULTILINE)
 _MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
 _MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$")
 _MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$")
@@ -2757,9 +2760,11 @@ class FeishuAdapter(BasePlatformAdapter):
            if hint:
                text = f"{hint}\n\n{text}" if text else hint

+        thread_id = getattr(message, "thread_id", None) or getattr(message, "root_id", None) or None
        reply_to_message_id = (
            getattr(message, "parent_id", None)
            or getattr(message, "upper_message_id", None)
+            or getattr(message, "root_id", None)
            or None
        )
        reply_to_text = await self._fetch_message_text(reply_to_message_id) if reply_to_message_id else None
@@ -2791,7 +2796,7 @@ class FeishuAdapter(BasePlatformAdapter):
            chat_type=self._resolve_source_chat_type(chat_info=chat_info, event_chat_type=chat_type),
            user_id=sender_profile["user_id"],
            user_name=sender_profile["user_name"],
-            thread_id=getattr(message, "thread_id", None) or None,
+            thread_id=thread_id,
            user_id_alt=sender_profile["user_id_alt"],
            is_bot=is_bot,
        )
@@ -3860,47 +3865,50 @@ class FeishuAdapter(BasePlatformAdapter):
        and self-sent bot event filtering.

        Populates ``_bot_open_id`` and ``_bot_name`` from /open-apis/bot/v3/info
-        (no extra scopes required beyond the tenant access token). Falls back to
-        the application info endpoint for ``_bot_name`` only when the first probe
-        doesn't return it. Each field is hydrated independently — a value already
-        supplied via env vars (FEISHU_BOT_OPEN_ID / FEISHU_BOT_USER_ID /
-        FEISHU_BOT_NAME) is preserved and skips its probe.
+        (no extra scopes required beyond the tenant access token). The probe
+        always runs when a client is available so stale env vars from app/bot
+        migrations do not break group @mention gating. Falls back to the
+        application info endpoint for ``_bot_name`` only when the first probe
+        doesn't return it. If the probe fails, env-provided values are preserved.
        """
        if not self._client:
            return
-        if self._bot_open_id and self._bot_name:
-            # Everything the self-send filter and precise mention gate need is
-            # already in place; nothing to probe.
-            return

        # Primary probe: /open-apis/bot/v3/info — returns bot_name + open_id, no
        # extra scopes required. This is the same endpoint the onboarding wizard
        # uses via probe_bot().
-        if not self._bot_open_id or not self._bot_name:
-            try:
-                req = (
-                    BaseRequest.builder()
-                    .http_method(HttpMethod.GET)
-                    .uri("/open-apis/bot/v3/info")
-                    .token_types({AccessTokenType.TENANT})
-                    .build()
-                )
-                resp = await asyncio.to_thread(self._client.request, req)
-                content = getattr(getattr(resp, "raw", None), "content", None)
-                if content:
-                    payload = json.loads(content)
-                    parsed = _parse_bot_response(payload) or {}
-                    open_id = (parsed.get("bot_open_id") or "").strip()
-                    bot_name = (parsed.get("bot_name") or "").strip()
-                    if open_id and not self._bot_open_id:
-                        self._bot_open_id = open_id
-                    if bot_name and not self._bot_name:
-                        self._bot_name = bot_name
-            except Exception:
-                logger.debug(
-                    "[Feishu] /bot/v3/info probe failed during hydration",
-                    exc_info=True,
-                )
+        try:
+            req = (
+                BaseRequest.builder()
+                .http_method(HttpMethod.GET)
+                .uri("/open-apis/bot/v3/info")
+                .token_types({AccessTokenType.TENANT})
+                .build()
+            )
+            resp = await asyncio.to_thread(self._client.request, req)
+            content = getattr(getattr(resp, "raw", None), "content", None)
+            if content:
+                payload = json.loads(content)
+                parsed = _parse_bot_response(payload) or {}
+                open_id = (parsed.get("bot_open_id") or "").strip()
+                bot_name = (parsed.get("bot_name") or "").strip()
+                if open_id:
+                    if self._bot_open_id and self._bot_open_id != open_id:
+                        logger.warning(
+                            "[Feishu] FEISHU_BOT_OPEN_ID is stale; using /bot/v3/info open_id for group @mention gating."
+                        )
+                    self._bot_open_id = open_id
+                if bot_name:
+                    if self._bot_name and self._bot_name != bot_name:
+                        logger.info(
+                            "[Feishu] FEISHU_BOT_NAME differs from /bot/v3/info; using hydrated bot name for group @mention gating."
+                        )
+                    self._bot_name = bot_name
+        except Exception:
+            logger.debug(
+                "[Feishu] /bot/v3/info probe failed during hydration",
+                exc_info=True,
+            )

        # Fallback probe for _bot_name only: application info endpoint. Needs
        # admin:app.info:readonly or application:application:self_manage scope,
@@ -3945,7 +3953,14 @@ class FeishuAdapter(BasePlatformAdapter):
        if isinstance(seen_data, list):
            entries: Dict[str, float] = {str(item).strip(): 0.0 for item in seen_data if str(item).strip()}
        elif isinstance(seen_data, dict):
-            entries = {k: float(v) for k, v in seen_data.items() if isinstance(k, str) and k.strip()}
+            entries = {}
+            for key, value in seen_data.items():
+                if not isinstance(key, str) or not key.strip():
+                    continue
+                try:
+                    entries[key] = float(value)
+                except (TypeError, ValueError):
+                    continue
        else:
            return
        # Filter out TTL-expired entries (entries saved with ts=0.0 are treated as immortal
@@ -3990,6 +4005,12 @@ class FeishuAdapter(BasePlatformAdapter):
    # =========================================================================

    def _build_outbound_payload(self, content: str) -> tuple[str, str]:
+        # Feishu post-type 'md' elements do not render markdown tables; sending
+        # table content as post causes the message to appear blank on the client.
+        # Force plain text for anything that looks like a markdown table.
+        if _MARKDOWN_TABLE_RE.search(content):
+            text_payload = {"text": content}
+            return "text", json.dumps(text_payload, ensure_ascii=False)
        if _MARKDOWN_HINT_RE.search(content):
            return "post", _build_markdown_post_payload(content)
        text_payload = {"text": content}
@@ -4068,15 +4089,18 @@ class FeishuAdapter(BasePlatformAdapter):
        reply_to: Optional[str],
        metadata: Optional[Dict[str, Any]],
    ) -> Any:
+        effective_reply_to = reply_to
+        if not effective_reply_to and metadata and metadata.get("thread_id"):
+            effective_reply_to = metadata.get("reply_to_message_id")
        reply_in_thread = bool((metadata or {}).get("thread_id"))
-        if reply_to:
+        if effective_reply_to:
            body = self._build_reply_message_body(
                content=payload,
                msg_type=msg_type,
                reply_in_thread=reply_in_thread,
                uuid_value=str(uuid.uuid4()),
            )
-            request = self._build_reply_message_request(reply_to, body)
+            request = self._build_reply_message_request(effective_reply_to, body)
            return await asyncio.to_thread(self._client.im.v1.message.reply, request)

        body = self._build_create_message_body(
@@ -4085,7 +4109,15 @@ class FeishuAdapter(BasePlatformAdapter):
            content=payload,
            uuid_value=str(uuid.uuid4()),
        )
-        request = self._build_create_message_request("chat_id", body)
+        # Detect whether chat_id is a user open_id (DM) or a chat_id (group).
+        # Feishu API expects receive_id_type="open_id" for user DMs (ou_ prefix)
+        # and receive_id_type="chat_id" for group chats (oc_ prefix, which IS
+        # the chat_id format — see https://open.feishu.cn/document/).
+        if chat_id.startswith("ou_"):
+            receive_id_type = "open_id"
+        else:
+            receive_id_type = "chat_id"
+        request = self._build_create_message_request(receive_id_type, body)
        return await asyncio.to_thread(self._client.im.v1.message.create, request)

    @staticmethod
@@ -4227,6 +4259,15 @@ class FeishuAdapter(BasePlatformAdapter):
                if active_reply_to and not self._response_succeeded(response):
                    code = getattr(response, "code", None)
                    if code in _FEISHU_REPLY_FALLBACK_CODES:
+                        if (metadata or {}).get("thread_id"):
+                            logger.warning(
+                                "[Feishu] Reply to %s failed in thread %s (code %s — message withdrawn/missing); "
+                                "skipping top-level fallback to avoid creating a new topic",
+                                active_reply_to,
+                                (metadata or {}).get("thread_id"),
+                                code,
+                            )
+                            return response
                        logger.warning(
                            "[Feishu] Reply to %s failed (code %s — message withdrawn/missing); "
                            "falling back to new message in chat %s",
@@ -222,33 +222,37 @@ class ThreadParticipationTracker:
    def __init__(self, platform_name: str, max_tracked: int = 500):
        self._platform = platform_name
        self._max_tracked = max_tracked
-        self._threads: set = self._load()
+        self._threads: dict[str, None] = {
+            str(thread_id): None for thread_id in self._load()
+        }

    def _state_path(self) -> Path:
        from hermes_constants import get_hermes_home
        return get_hermes_home() / f"{self._platform}_threads.json"

-    def _load(self) -> set:
+    def _load(self) -> list[str]:
        path = self._state_path()
        if path.exists():
            try:
-                return set(json.loads(path.read_text(encoding="utf-8")))
+                data = json.loads(path.read_text(encoding="utf-8"))
+                if isinstance(data, list):
+                    return [str(thread_id) for thread_id in data]
            except Exception:
                pass
-        return set()
+        return []

    def _save(self) -> None:
        path = self._state_path()
        thread_list = list(self._threads)
        if len(thread_list) > self._max_tracked:
            thread_list = thread_list[-self._max_tracked:]
-            self._threads = set(thread_list)
+            self._threads = {thread_id: None for thread_id in thread_list}
        atomic_json_write(path, thread_list, indent=None)

    def mark(self, thread_id: str) -> None:
        """Mark *thread_id* as participated and persist."""
        if thread_id not in self._threads:
-            self._threads.add(thread_id)
+            self._threads[thread_id] = None
            self._save()

    def __contains__(self, thread_id: str) -> bool:
@@ -397,13 +397,24 @@ class QQAdapter(BasePlatformAdapter):
            await self._session.close()
        self._session = None

-        self._session = aiohttp.ClientSession()
+        # Honor WSL proxy env for QQ WebSocket. Hermes upgrades overwrite this
+        # local patch, so QQ can regress to direct-connect timeouts after update.
+        self._session = aiohttp.ClientSession(trust_env=True)
+        ws_proxy = (
+            os.getenv("WSS_PROXY")
+            or os.getenv("wss_proxy")
+            or os.getenv("HTTPS_PROXY")
+            or os.getenv("https_proxy")
+            or os.getenv("ALL_PROXY")
+            or os.getenv("all_proxy")
+        )
        self._ws = await self._session.ws_connect(
            gateway_url,
            headers={
                "User-Agent": build_user_agent(),
            },
            timeout=CONNECT_TIMEOUT_SECONDS,
+            proxy=ws_proxy,
        )
        logger.info("[%s] WebSocket connected to %s", self._log_tag, gateway_url)

@@ -10,7 +10,7 @@ Shares credentials with the optional telephony skill — same env vars:

 Gateway-specific env vars:
  - SMS_WEBHOOK_PORT     (default 8080)
-  - SMS_WEBHOOK_HOST     (default 0.0.0.0)
+  - SMS_WEBHOOK_HOST     (default 127.0.0.1)
  - SMS_WEBHOOK_URL      (public URL for Twilio signature validation — required)
  - SMS_INSECURE_NO_SIGNATURE  (true to disable signature validation — dev only)
  - SMS_ALLOWED_USERS    (comma-separated E.164 phone numbers)
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
 TWILIO_API_BASE = "https://api.twilio.com/2010-04-01/Accounts"
 MAX_SMS_LENGTH = 1600  # ~10 SMS segments
 DEFAULT_WEBHOOK_PORT = 8080
-DEFAULT_WEBHOOK_HOST = "0.0.0.0"
+DEFAULT_WEBHOOK_HOST = "127.0.0.1"


 def check_sms_requirements() -> bool:
@@ -91,19 +91,23 @@ class SmsAdapter(BasePlatformAdapter):
        from aiohttp import web

        if not self._from_number:
-            logger.error("[sms] TWILIO_PHONE_NUMBER not set — cannot send replies")
+            msg = "[sms] TWILIO_PHONE_NUMBER not set — cannot send replies"
+            logger.error(msg)
+            self._set_fatal_error("sms_missing_phone_number", msg, retryable=False)
            return False

        insecure_no_sig = os.getenv("SMS_INSECURE_NO_SIGNATURE", "").lower() == "true"

        if not self._webhook_url and not insecure_no_sig:
-            logger.error(
+            msg = (
                "[sms] Refusing to start: SMS_WEBHOOK_URL is required for Twilio "
                "signature validation. Set it to the public URL configured in your "
                "Twilio console (e.g. https://example.com/webhooks/twilio). "
                "For local development without validation, set "
-                "SMS_INSECURE_NO_SIGNATURE=true (NOT recommended for production).",
+                "SMS_INSECURE_NO_SIGNATURE=true (NOT recommended for production)."
            )
+            logger.error(msg)
+            self._set_fatal_error("sms_missing_webhook_url", msg, retryable=False)
            return False

        if insecure_no_sig and not self._webhook_url:
@@ -353,7 +353,10 @@ class TelegramAdapter(BasePlatformAdapter):

    @classmethod
    def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]:
-        if not thread_id:
+        # Mirrors _message_thread_id_for_send: the General forum topic (thread id
+        # "1") is represented as "no thread id" on the wire. User-created topics
+        # keep their real id so typing stays scoped to that topic.
+        if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID:
            return None
        return int(thread_id)

@@ -688,6 +691,29 @@ class TelegramAdapter(BasePlatformAdapter):
                )
            return None

+    async def rename_dm_topic(
+        self,
+        chat_id: int,
+        thread_id: int,
+        name: str,
+    ) -> None:
+        """Rename a forum topic in a private (DM) chat."""
+        if not self._bot:
+            return
+        try:
+            chat_id_arg = int(chat_id)
+        except (TypeError, ValueError):
+            chat_id_arg = chat_id
+        await self._bot.edit_forum_topic(
+            chat_id=chat_id_arg,
+            message_thread_id=int(thread_id),
+            name=name,
+        )
+        logger.info(
+            "[%s] Renamed DM topic in chat %s thread_id=%s -> '%s'",
+            self.name, chat_id, thread_id, name,
+        )
+
    def _persist_dm_topic_thread_id(self, chat_id: int, topic_name: str, thread_id: int) -> None:
        """Save a newly created thread_id back into config.yaml so it persists across restarts."""
        try:
@@ -2267,13 +2293,54 @@ class TelegramAdapter(BasePlatformAdapter):
                )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
-            logger.error(
-                "[%s] Failed to send Telegram local image, falling back to base adapter: %s",
-                self.name,
-                e,
-                exc_info=True,
+            error_str = str(e)
+            # Dimension-related errors are the expected case for valid image
+            # files that Telegram just refuses as photos (screenshots, extreme
+            # aspect ratios). Log at INFO because the document fallback is
+            # the correct path. Any other send_photo failure also falls back
+            # to document (rate limits, corrupt file markers, format edge
+            # cases), but at WARNING because it's unexpected and worth
+            # surfacing in logs.
+            is_dim_error = (
+                "Photo_invalid_dimensions" in error_str
+                or "PHOTO_INVALID_DIMENSIONS" in error_str
            )
-            return await super().send_image_file(chat_id, image_path, caption, reply_to)
+            if is_dim_error:
+                logger.info(
+                    "[%s] Image dimensions exceed Telegram photo limits, "
+                    "sending as document: %s",
+                    self.name,
+                    image_path,
+                )
+            else:
+                logger.warning(
+                    "[%s] Failed to send Telegram local image as photo, "
+                    "trying document fallback: %s",
+                    self.name,
+                    e,
+                    exc_info=True,
+                )
+            # Fallback to sending as document (file) — no dimension limit,
+            # only 50MB size limit. If even that fails, fall back to the
+            # base adapter's text-only "Image: /path" rendering.
+            try:
+                return await self.send_document(
+                    chat_id=chat_id,
+                    file_path=image_path,
+                    caption=caption,
+                    file_name=os.path.basename(image_path),
+                    reply_to=reply_to,
+                    metadata=metadata,
+                )
+            except Exception as doc_err:
+                logger.error(
+                    "[%s] Failed to send Telegram local image as document, "
+                    "falling back to base adapter: %s",
+                    self.name,
+                    doc_err,
+                    exc_info=True,
+                )
+                return await super().send_image_file(chat_id, image_path, caption, reply_to)

    async def send_document(
        self,
@@ -2444,21 +2511,16 @@ class TelegramAdapter(BasePlatformAdapter):
            try:
                _typing_thread = self._metadata_thread_id(metadata)
                message_thread_id = self._message_thread_id_for_typing(_typing_thread)
-                try:
-                    await self._bot.send_chat_action(
-                        chat_id=int(chat_id),
-                        action="typing",
-                        message_thread_id=message_thread_id,
-                    )
-                except Exception as e:
-                    if message_thread_id is not None and self._is_thread_not_found_error(e):
-                        await self._bot.send_chat_action(
-                            chat_id=int(chat_id),
-                            action="typing",
-                            message_thread_id=None,
-                        )
-                    else:
-                        raise
+                # No retry-without-thread fallback here: _message_thread_id_for_typing
+                # already maps the forum General topic to None, so any non-None value
+                # reaching this call is a user-created topic. If Telegram rejects it
+                # (e.g. topic deleted mid-session), we swallow the failure rather than
+                # showing a typing indicator in the wrong chat/All Messages.
+                await self._bot.send_chat_action(
+                    chat_id=int(chat_id),
+                    action="typing",
+                    message_thread_id=message_thread_id,
+                )
            except Exception as e:
                # Typing failures are non-fatal; log at debug level only.
                logger.debug(
@@ -185,10 +185,13 @@ async def _query_doh_provider(
 async def discover_fallback_ips() -> list[str]:
    """Auto-discover Telegram API IPs via DNS-over-HTTPS.

-    Resolves api.telegram.org through Google and Cloudflare DoH, collects all
-    unique IPs, and excludes the system-DNS-resolved IP (which is presumably
-    unreachable on this network).  Falls back to a hardcoded seed list when DoH
-    is also unavailable.
+    Resolves api.telegram.org through Google and Cloudflare DoH and returns all
+    unique A records.  IPs that match the local system resolver are kept rather
+    than excluded: in many networks the system-DNS IP is the most reliable path
+    to api.telegram.org and a transient primary-path failure should be retried
+    against the same address via the IP-rewrite path before the seed list is
+    consulted (#14520).  Falls back to a hardcoded seed list only when DoH
+    yields no usable answers.
    """
    async with httpx.AsyncClient(timeout=httpx.Timeout(_DOH_TIMEOUT)) as client:
        doh_tasks = [_query_doh_provider(client, p) for p in _DOH_PROVIDERS]
@@ -203,11 +206,11 @@ async def discover_fallback_ips() -> list[str]:
        if isinstance(r, list):
            doh_ips.extend(r)

-    # Deduplicate preserving order, exclude system-DNS IPs
+    # Deduplicate preserving order
    seen: set[str] = set()
    candidates: list[str] = []
    for ip in doh_ips:
-        if ip not in seen and ip not in system_ips:
+        if ip not in seen:
            seen.add(ip)
            candidates.append(ip)

@@ -219,7 +222,7 @@ async def discover_fallback_ips() -> list[str]:
        return validated

    logger.info(
-        "DoH discovery yielded no new IPs (system DNS: %s); using seed fallback IPs %s",
+        "DoH discovery yielded no usable IPs (system DNS: %s); using seed fallback IPs %s",
        ", ".join(system_ips) or "unknown",
        ", ".join(_SEED_FALLBACK_IPS),
    )
@@ -142,6 +142,7 @@ class WeComAdapter(BasePlatformAdapter):
    """WeCom AI Bot adapter backed by a persistent WebSocket connection."""

    MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
+    SUPPORTS_MESSAGE_EDITING = False
    # Threshold for detecting WeCom client-side message splits.
    # When a chunk is near the 4000-char limit, a continuation is almost certain.
    _SPLIT_THRESHOLD = 3900
@@ -1014,6 +1015,8 @@ class WeComAdapter(BasePlatformAdapter):
        if not aes_key:
            raise ValueError("aes_key is required")

+        # WeCom doesn't pad base64 keys; add padding if needed
+        aes_key = aes_key + '=' * ((4 - len(aes_key) % 4) % 4)
        key = base64.b64decode(aes_key)
        if len(key) != 32:
            raise ValueError(f"Invalid WeCom AES key length: expected 32 bytes, got {len(key)}")
@@ -1333,6 +1333,15 @@ class WeixinAdapter(BasePlatformAdapter):
        if message_id and self._dedup.is_duplicate(message_id):
            return

+        # Secondary content-fingerprint dedup for text messages
+        item_list = message.get("item_list") or []
+        text = _extract_text(item_list)
+        if text:
+            content_key = f"content:{sender_id}:{hashlib.md5(text.encode()).hexdigest()}"
+            if self._dedup.is_duplicate(content_key):
+                logger.debug("[%s] Content-dedup: skipping duplicate message from %s", self.name, sender_id)
+                return
+
        chat_type, effective_chat_id = _guess_chat_type(message, self._account_id)
        if chat_type == "group":
            if self._group_policy == "disabled":
@@ -1347,8 +1356,6 @@ class WeixinAdapter(BasePlatformAdapter):
            self._token_store.set(self._account_id, sender_id, context_token)
        asyncio.create_task(self._maybe_fetch_typing_ticket(sender_id, context_token or None))

-        item_list = message.get("item_list") or []
-        text = _extract_text(item_list)
        media_paths: List[str] = []
        media_types: List[str] = []

@@ -1121,7 +1121,7 @@ class SessionStore:
                self._save()
        return count

-    def reset_session(self, session_key: str) -> Optional[SessionEntry]:
+    def reset_session(self, session_key: str, display_name: Optional[str] = None) -> Optional[SessionEntry]:
        """Force reset a session, creating a new session ID."""
        db_end_session_id = None
        db_create_kwargs = None
@@ -1145,7 +1145,7 @@ class SessionStore:
                created_at=now,
                updated_at=now,
                origin=old_entry.origin,
-                display_name=old_entry.display_name,
+                display_name=display_name if display_name is not None else old_entry.display_name,
                platform=old_entry.platform,
                chat_type=old_entry.chat_type,
                is_fresh_reset=True,
@@ -1276,8 +1276,9 @@ class SessionStore:
        
        # Also write legacy JSONL (keeps existing tooling working during transition)
        transcript_path = self.get_transcript_path(session_id)
-        with open(transcript_path, "a", encoding="utf-8") as f:
-            f.write(json.dumps(message, ensure_ascii=False) + "\n")
+        with self._lock:
+            with open(transcript_path, "a", encoding="utf-8") as f:
+                f.write(json.dumps(message, ensure_ascii=False) + "\n")
    
    def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
        """Replace the entire transcript for a session with new messages.
@@ -637,6 +637,8 @@ def release_all_scoped_locks(

 _TAKEOVER_MARKER_FILENAME = ".gateway-takeover.json"
 _TAKEOVER_MARKER_TTL_S = 60  # Marker older than this is treated as stale
+_PLANNED_STOP_MARKER_FILENAME = ".gateway-planned-stop.json"
+_PLANNED_STOP_MARKER_TTL_S = 60


 def _get_takeover_marker_path() -> Path:
@@ -645,6 +647,67 @@ def _get_takeover_marker_path() -> Path:
    return home / _TAKEOVER_MARKER_FILENAME


+def _get_planned_stop_marker_path() -> Path:
+    """Return the path to the intentional gateway stop marker file."""
+    home = get_hermes_home()
+    return home / _PLANNED_STOP_MARKER_FILENAME
+
+
+def _marker_is_stale(written_at: str, ttl_s: int) -> bool:
+    try:
+        written_dt = datetime.fromisoformat(written_at)
+        age = (datetime.now(timezone.utc) - written_dt).total_seconds()
+        return age > ttl_s
+    except (TypeError, ValueError):
+        return True
+
+
+def _consume_pid_marker_for_self(
+    path: Path,
+    *,
+    pid_field: str,
+    start_time_field: str,
+    ttl_s: int,
+) -> bool:
+    record = _read_json_file(path)
+    if not record:
+        return False
+
+    try:
+        target_pid = int(record[pid_field])
+        target_start_time = record.get(start_time_field)
+        written_at = record.get("written_at") or ""
+    except (KeyError, TypeError, ValueError):
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        return False
+
+    if _marker_is_stale(written_at, ttl_s):
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        return False
+
+    our_pid = os.getpid()
+    our_start_time = _get_process_start_time(our_pid)
+    matches = (
+        target_pid == our_pid
+        and target_start_time is not None
+        and our_start_time is not None
+        and target_start_time == our_start_time
+    )
+
+    try:
+        path.unlink(missing_ok=True)
+    except OSError:
+        pass
+
+    return matches
+
+
 def write_takeover_marker(target_pid: int) -> bool:
    """Record that ``target_pid`` is being replaced by the current process.

@@ -681,59 +744,13 @@ def consume_takeover_marker_for_self() -> bool:
    Always unlinks the marker on match (and on detected staleness) so
    subsequent unrelated signals don't re-trigger.
    """
-    path = _get_takeover_marker_path()
-    record = _read_json_file(path)
-    if not record:
-        return False
-
-    # Any malformed or stale marker → drop it and return False
-    try:
-        target_pid = int(record["target_pid"])
-        target_start_time = record.get("target_start_time")
-        written_at = record.get("written_at") or ""
-    except (KeyError, TypeError, ValueError):
-        try:
-            path.unlink(missing_ok=True)
-        except OSError:
-            pass
-        return False
-
-    # TTL guard: a stale marker older than _TAKEOVER_MARKER_TTL_S is ignored.
-    stale = False
-    try:
-        written_dt = datetime.fromisoformat(written_at)
-        age = (datetime.now(timezone.utc) - written_dt).total_seconds()
-        if age > _TAKEOVER_MARKER_TTL_S:
-            stale = True
-    except (TypeError, ValueError):
-        stale = True  # Unparseable timestamp — treat as stale
-
-    if stale:
-        try:
-            path.unlink(missing_ok=True)
-        except OSError:
-            pass
-        return False
-
-    # Does the marker name THIS process?
-    our_pid = os.getpid()
-    our_start_time = _get_process_start_time(our_pid)
-    matches = (
-        target_pid == our_pid
-        and target_start_time is not None
-        and our_start_time is not None
-        and target_start_time == our_start_time
+    return _consume_pid_marker_for_self(
+        _get_takeover_marker_path(),
+        pid_field="target_pid",
+        start_time_field="target_start_time",
+        ttl_s=_TAKEOVER_MARKER_TTL_S,
    )

-    # Consume the marker whether it matched or not — a marker that doesn't
-    # match our identity is stale-for-us anyway.
-    try:
-        path.unlink(missing_ok=True)
-    except OSError:
-        pass
-
-    return matches
-

 def clear_takeover_marker() -> None:
    """Remove the takeover marker unconditionally. Safe to call repeatedly."""
@@ -743,6 +760,45 @@ def clear_takeover_marker() -> None:
        pass


+def write_planned_stop_marker(target_pid: int) -> bool:
+    """Record that ``target_pid`` is being stopped intentionally.
+
+    The gateway exits non-zero for unexpected SIGTERM so service managers can
+    revive it. Service stop commands send the same SIGTERM, so the CLI writes
+    this short-lived marker first to let the target process exit cleanly.
+    """
+    try:
+        target_start_time = _get_process_start_time(target_pid)
+        record = {
+            "target_pid": target_pid,
+            "target_start_time": target_start_time,
+            "stopper_pid": os.getpid(),
+            "written_at": _utc_now_iso(),
+        }
+        _write_json_file(_get_planned_stop_marker_path(), record)
+        return True
+    except (OSError, PermissionError):
+        return False
+
+
+def consume_planned_stop_marker_for_self() -> bool:
+    """Return True when the current process is being intentionally stopped."""
+    return _consume_pid_marker_for_self(
+        _get_planned_stop_marker_path(),
+        pid_field="target_pid",
+        start_time_field="target_start_time",
+        ttl_s=_PLANNED_STOP_MARKER_TTL_S,
+    )
+
+
+def clear_planned_stop_marker() -> None:
+    """Remove the planned-stop marker unconditionally."""
+    try:
+        _get_planned_stop_marker_path().unlink(missing_ok=True)
+    except OSError:
+        pass
+
+
 def get_running_pid(
    pid_path: Optional[Path] = None,
    *,
@@ -416,6 +416,40 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
    ),
 }

+# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in
+# providers/ that is not already declared above.  New providers only need a
+# plugins/model-providers/<name>/ plugin — no edits to this file required.
+try:
+    from providers import list_providers as _list_providers_for_registry
+    for _pp in _list_providers_for_registry():
+        if _pp.name in PROVIDER_REGISTRY:
+            continue
+        if _pp.auth_type != "api_key" or not _pp.env_vars:
+            continue
+        # Skip providers that need custom token resolution or are special-cased
+        # in resolve_provider() (copilot/kimi/zai have bespoke token refresh;
+        # openrouter/custom are aggregator/user-supplied and handled outside
+        # the registry — adding them here breaks runtime_provider resolution
+        # that relies on `openrouter not in PROVIDER_REGISTRY`).
+        if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai", "openrouter", "custom"}:
+            continue
+        _api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL"))
+        _base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None)
+        PROVIDER_REGISTRY[_pp.name] = ProviderConfig(
+            id=_pp.name,
+            name=_pp.display_name or _pp.name,
+            auth_type="api_key",
+            inference_base_url=_pp.base_url,
+            api_key_env_vars=_api_key_vars or _pp.env_vars,
+            base_url_env_var=_base_url_var or "",
+        )
+        # Also register aliases so resolve_provider() resolves them
+        for _alias in _pp.aliases:
+            if _alias not in PROVIDER_REGISTRY:
+                PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name]
+except Exception:
+    pass
+

 # =============================================================================
 # Anthropic Key Helper
@@ -746,6 +780,73 @@ def _auth_file_path() -> Path:
    return path


+def _global_auth_file_path() -> Optional[Path]:
+    """Return the global-root auth.json when the process is in profile mode.
+
+    Returns ``None`` when the profile and global root resolve to the same
+    directory (classic mode, or custom HERMES_HOME that is not a profile).
+    Used by read-only fallback paths so providers authed at the root are
+    visible to profile processes that haven't configured them locally.
+
+    See issue #18594 follow-up (credential_pool shadowing).
+    """
+    try:
+        from hermes_constants import get_default_hermes_root
+        global_root = get_default_hermes_root()
+    except Exception:
+        return None
+    profile_home = get_hermes_home()
+    try:
+        if profile_home.resolve(strict=False) == global_root.resolve(strict=False):
+            return None
+    except Exception:
+        if profile_home == global_root:
+            return None
+    # No pytest seat belt here: this is a pure read-only path, and
+    # ``_load_global_auth_store()`` wraps the read in a try/except so an
+    # unreadable global file can never break the profile process.  The
+    # write-side seat belt still lives on ``_auth_file_path()`` where it
+    # belongs (that's what protects the real user's auth store from being
+    # corrupted by a mis-configured test).
+    return global_root / "auth.json"
+
+
+def _load_global_auth_store() -> Dict[str, Any]:
+    """Load the global-root auth store (read-only fallback).
+
+    Returns an empty dict when no global fallback exists (classic mode,
+    or the global auth.json is absent). Never raises on missing file.
+
+    Seat belt: under pytest, refuses to read the real user's
+    ``~/.hermes/auth.json`` even when HERMES_HOME is set to a profile
+    path. The hermetic conftest does not redirect ``HOME``, so
+    ``get_default_hermes_root()`` for a profile-shaped HERMES_HOME can
+    still resolve to the real user's home on a dev machine. That would
+    leak real credentials into tests. This guard uses the unmodified
+    ``HOME`` env var (what ``os.path.expanduser('~')`` would resolve to),
+    not ``Path.home()``, because ``Path.home`` is sometimes monkeypatched
+    by fixtures that want to relocate the global root to a tmp path.
+    """
+    global_path = _global_auth_file_path()
+    if global_path is None or not global_path.exists():
+        return {}
+    if os.environ.get("PYTEST_CURRENT_TEST"):
+        real_home_env = os.environ.get("HOME", "")
+        if real_home_env:
+            real_root = Path(real_home_env) / ".hermes" / "auth.json"
+            try:
+                if global_path.resolve(strict=False) == real_root.resolve(strict=False):
+                    return {}
+            except Exception:
+                pass
+    try:
+        return _load_auth_store(global_path)
+    except Exception:
+        # A malformed global store must not break profile reads. The
+        # profile's own auth store is still authoritative.
+        return {}
+
+
 def _auth_lock_path() -> Path:
    return _auth_file_path().with_suffix(".lock")

@@ -932,15 +1033,50 @@ def get_auth_provider_display_name(provider_id: str) -> str:


 def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
-    """Return the persisted credential pool, or one provider slice."""
+    """Return the persisted credential pool, or one provider slice.
+
+    In profile mode, the profile's credential pool is authoritative. If a
+    provider has no entries in the profile, entries from the global-root
+    ``auth.json`` are used as a read-only fallback — so workers spawned in a
+    profile can see providers that were only authenticated at global scope.
+
+    Profile entries always win: the global fallback only applies per-provider
+    when the profile has zero entries for that provider. Once the user runs
+    ``hermes auth add <provider>`` inside the profile, profile entries
+    fully shadow global for that provider on the next read.
+
+    Writes always go to the profile (``write_credential_pool`` is unchanged).
+    See issue #18594 follow-up.
+    """
    auth_store = _load_auth_store()
    pool = auth_store.get("credential_pool")
    if not isinstance(pool, dict):
        pool = {}
+
+    global_pool: Dict[str, Any] = {}
+    global_store = _load_global_auth_store()
+    maybe_global_pool = global_store.get("credential_pool") if global_store else None
+    if isinstance(maybe_global_pool, dict):
+        global_pool = maybe_global_pool
+
    if provider_id is None:
-        return dict(pool)
+        merged = dict(pool)
+        for gp_key, gp_entries in global_pool.items():
+            if not isinstance(gp_entries, list) or not gp_entries:
+                continue
+            # Per-provider shadowing: profile wins whenever it has ANY entries.
+            existing = merged.get(gp_key)
+            if isinstance(existing, list) and existing:
+                continue
+            merged[gp_key] = list(gp_entries)
+        return merged
+
    provider_entries = pool.get(provider_id)
-    return list(provider_entries) if isinstance(provider_entries, list) else []
+    if isinstance(provider_entries, list) and provider_entries:
+        return list(provider_entries)
+    # Profile has no entries for this provider — fall back to global.
+    global_entries = global_pool.get(provider_id)
+    return list(global_entries) if isinstance(global_entries, list) else []


 def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
@@ -999,9 +1135,25 @@ def unsuppress_credential_source(provider_id: str, source: str) -> bool:


 def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
-    """Return persisted auth state for a provider, or None."""
+    """Return persisted auth state for a provider, or None.
+
+    In profile mode, falls back to the global-root ``auth.json`` when the
+    profile has no state for this provider. Profile state always wins when
+    present. Writes (``_save_auth_store`` / ``persist_*_credentials``) are
+    unchanged — they still target the profile only. This mirrors
+    ``read_credential_pool``'s per-provider shadowing semantics so that
+    ``_seed_from_singletons`` can reseed a profile's credential pool from
+    global-scope provider state (e.g. a globally-authenticated Anthropic
+    OAuth or Nous device-code session). See issue #18594 follow-up.
+    """
    auth_store = _load_auth_store()
-    return _load_provider_state(auth_store, provider_id)
+    state = _load_provider_state(auth_store, provider_id)
+    if state is not None:
+        return state
+    global_store = _load_global_auth_store()
+    if not global_store:
+        return None
+    return _load_provider_state(global_store, provider_id)


 def get_active_provider() -> Optional[str]:
@@ -1195,6 +1347,17 @@ def resolve_provider(
        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
    }
+    # Extend with aliases declared in plugins/model-providers/<name>/ that aren't already mapped.
+    # This keeps providers/ as the single source for new aliases while the
+    # hardcoded dict above remains authoritative for existing ones.
+    try:
+        from providers import list_providers as _lp
+        for _pp in _lp():
+            for _alias in _pp.aliases:
+                if _alias not in _PROVIDER_ALIASES:
+                    _PROVIDER_ALIASES[_alias] = _pp.name
+    except Exception:
+        pass
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)

    if normalized == "openrouter":
@@ -2589,6 +2752,208 @@ def _poll_for_token(
 # Nous Portal — token refresh, agent key minting, model discovery
 # =============================================================================

+# -----------------------------------------------------------------------------
+# Shared Nous token store — lets OAuth credentials persist across profiles
+# so a new `hermes --profile <name> auth add nous --type oauth` can one-tap
+# import instead of running the full device-code flow every time.
+#
+# File lives at ${HERMES_SHARED_AUTH_DIR}/nous_auth.json, defaulting to
+# ~/.hermes/shared/nous_auth.json. It is OUTSIDE any named profile's
+# HERMES_HOME so named profiles (which typically live under
+# ~/.hermes/profiles/<name>/) all see the same file.
+#
+# Written on successful login and on every runtime refresh so the stored
+# refresh_token stays current even if one profile refreshes and rotates it.
+# If ever the stored refresh_token does go stale server-side, import fails
+# gracefully and the user falls back to the normal device-code flow.
+# -----------------------------------------------------------------------------
+
+NOUS_SHARED_STORE_FILENAME = "nous_auth.json"
+
+
+def _nous_shared_auth_dir() -> Path:
+    """Resolve the directory that holds the shared Nous token store.
+
+    Honors ``HERMES_SHARED_AUTH_DIR`` so tests can redirect it to a tmp
+    path without touching the real user's home. Defaults to
+    ``~/.hermes/shared/``.
+    """
+    override = os.getenv("HERMES_SHARED_AUTH_DIR", "").strip()
+    if override:
+        return Path(override).expanduser()
+    return Path.home() / ".hermes" / "shared"
+
+
+def _nous_shared_store_path() -> Path:
+    path = _nous_shared_auth_dir() / NOUS_SHARED_STORE_FILENAME
+    # Seat belt: if pytest is running and this resolves to a path under the
+    # real user's home, refuse rather than silently corrupt cross-profile
+    # state. Tests must set HERMES_SHARED_AUTH_DIR to a tmp_path (conftest
+    # does not do this automatically — mirror the _auth_file_path() guard
+    # so forgetting to set it fails loudly instead of writing to the real
+    # shared store).
+    if os.environ.get("PYTEST_CURRENT_TEST"):
+        real_home_shared = (
+            Path.home() / ".hermes" / "shared" / NOUS_SHARED_STORE_FILENAME
+        ).resolve(strict=False)
+        try:
+            resolved = path.resolve(strict=False)
+        except Exception:
+            resolved = path
+        if resolved == real_home_shared:
+            raise RuntimeError(
+                f"Refusing to touch real user shared Nous auth store during test run: "
+                f"{path}. Set HERMES_SHARED_AUTH_DIR to a tmp_path in your test fixture."
+            )
+    return path
+
+
+def _write_shared_nous_state(state: Dict[str, Any]) -> None:
+    """Persist a minimal copy of the Nous OAuth state to the shared store.
+
+    Best-effort: any failure is swallowed after logging. The shared store
+    is a convenience layer; the per-profile auth.json remains the source
+    of truth.
+
+    We deliberately omit the short-lived ``agent_key`` (24h TTL, profile-
+    specific) — only the long-lived OAuth tokens are cross-profile useful.
+    """
+    refresh_token = state.get("refresh_token")
+    access_token = state.get("access_token")
+    if not (isinstance(refresh_token, str) and refresh_token.strip()):
+        # No refresh_token = nothing worth sharing across profiles
+        return
+    if not (isinstance(access_token, str) and access_token.strip()):
+        return
+
+    shared = {
+        "_schema": 1,
+        "access_token": access_token,
+        "refresh_token": refresh_token,
+        "token_type": state.get("token_type") or "Bearer",
+        "scope": state.get("scope") or DEFAULT_NOUS_SCOPE,
+        "client_id": state.get("client_id") or DEFAULT_NOUS_CLIENT_ID,
+        "portal_base_url": state.get("portal_base_url") or DEFAULT_NOUS_PORTAL_URL,
+        "inference_base_url": state.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL,
+        "obtained_at": state.get("obtained_at"),
+        "expires_at": state.get("expires_at"),
+        "updated_at": datetime.now(timezone.utc).isoformat(),
+    }
+    try:
+        path = _nous_shared_store_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        tmp = path.with_suffix(path.suffix + ".tmp")
+        tmp.write_text(json.dumps(shared, indent=2, sort_keys=True))
+        try:
+            os.chmod(tmp, 0o600)
+        except OSError:
+            pass
+        os.replace(tmp, path)
+        _oauth_trace(
+            "nous_shared_store_written",
+            path=str(path),
+            refresh_token_fp=_token_fingerprint(refresh_token),
+        )
+    except Exception as exc:
+        logger.debug("Failed to write shared Nous auth store: %s", exc)
+
+
+def _read_shared_nous_state() -> Optional[Dict[str, Any]]:
+    """Return the shared Nous OAuth state if present and well-formed.
+
+    Returns ``None`` when the file is missing, unreadable, malformed, or
+    lacks required fields. Callers should treat ``None`` as "no shared
+    credentials available — fall through to device-code".
+    """
+    try:
+        path = _nous_shared_store_path()
+    except RuntimeError:
+        # Test seat belt tripped — treat as missing
+        return None
+    if not path.is_file():
+        return None
+    try:
+        payload = json.loads(path.read_text())
+    except (OSError, ValueError) as exc:
+        logger.debug("Shared Nous auth store at %s is unreadable: %s", path, exc)
+        return None
+    if not isinstance(payload, dict):
+        return None
+    refresh_token = payload.get("refresh_token")
+    access_token = payload.get("access_token")
+    if not (isinstance(refresh_token, str) and refresh_token.strip()):
+        return None
+    if not (isinstance(access_token, str) and access_token.strip()):
+        return None
+    return payload
+
+
+def _try_import_shared_nous_state(
+    *,
+    timeout_seconds: float = 15.0,
+    min_key_ttl_seconds: int = 5 * 60,
+) -> Optional[Dict[str, Any]]:
+    """Attempt to rehydrate Nous OAuth state from the shared store.
+
+    Reads the shared file (if present), runs a forced refresh+mint using
+    the stored refresh_token to produce a fresh access_token + agent_key
+    scoped to this profile, and returns the full auth_state dict ready
+    for ``persist_nous_credentials()``.
+
+    Returns ``None`` when no shared state is available or the rehydrate
+    fails for any reason (expired refresh_token, portal unreachable,
+    etc.) — caller should then fall through to the normal device-code
+    flow.
+    """
+    shared = _read_shared_nous_state()
+    if not shared:
+        return None
+
+    # Build a full state dict so refresh_nous_oauth_from_state has every
+    # field it needs. force_refresh=True gets us a fresh access_token
+    # for this profile; force_mint=True gets us a fresh agent_key.
+    state: Dict[str, Any] = {
+        "access_token": shared.get("access_token"),
+        "refresh_token": shared.get("refresh_token"),
+        "client_id": shared.get("client_id") or DEFAULT_NOUS_CLIENT_ID,
+        "portal_base_url": shared.get("portal_base_url") or DEFAULT_NOUS_PORTAL_URL,
+        "inference_base_url": shared.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL,
+        "token_type": shared.get("token_type") or "Bearer",
+        "scope": shared.get("scope") or DEFAULT_NOUS_SCOPE,
+        "obtained_at": shared.get("obtained_at"),
+        "expires_at": shared.get("expires_at"),
+        "agent_key": None,
+        "agent_key_expires_at": None,
+        "tls": {"insecure": False, "ca_bundle": None},
+    }
+
+    try:
+        refreshed = refresh_nous_oauth_from_state(
+            state,
+            min_key_ttl_seconds=min_key_ttl_seconds,
+            timeout_seconds=timeout_seconds,
+            force_refresh=True,
+            force_mint=True,
+        )
+    except AuthError as exc:
+        _oauth_trace(
+            "nous_shared_import_failed",
+            error_type=type(exc).__name__,
+            error_code=getattr(exc, "code", None),
+        )
+        logger.debug("Shared Nous import failed: %s", exc)
+        return None
+    except Exception as exc:
+        _oauth_trace(
+            "nous_shared_import_failed",
+            error_type=type(exc).__name__,
+        )
+        logger.debug("Shared Nous import failed: %s", exc)
+        return None
+
+    return refreshed
+
+
 def _refresh_access_token(
    *,
    client: httpx.Client,
@@ -2991,6 +3356,12 @@ def persist_nous_credentials(
        _save_provider_state(auth_store, "nous", state)
        _save_auth_store(auth_store)

+    # Mirror to the shared store so a new profile can one-tap import
+    # these credentials via `hermes auth add nous --type oauth`. Best-
+    # effort: any I/O failure is logged and swallowed (the per-profile
+    # auth.json is still the source of truth).
+    _write_shared_nous_state(state)
+
    pool = load_pool("nous")
    return next(
        (e for e in pool.entries() if e.source == NOUS_DEVICE_CODE_SOURCE),
@@ -3059,6 +3430,11 @@ def resolve_nous_runtime_credentials(
                refresh_token_fp=_token_fingerprint(state.get("refresh_token")),
                access_token_fp=_token_fingerprint(state.get("access_token")),
            )
+            # Mirror post-refresh state to the shared store so sibling
+            # profiles don't hold stale refresh_tokens after rotation.
+            # Best-effort — any failure is logged and swallowed inside
+            # _write_shared_nous_state.
+            _write_shared_nous_state(state)

        verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state)
        timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
@@ -3840,7 +4216,7 @@ def _prompt_model_selection(
            clear_screen=False,
            title=effective_title,
        )
-        idx = menu.show()
+        idx: int | None = menu.show()  # ty:ignore[invalid-assignment] - TerminalMenu.show() is always `int | None` when multi_select is False / not provided.
        from hermes_cli.curses_ui import flush_stdin
        flush_stdin()
        if idx is None:
@@ -4600,17 +4976,47 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
    )

    try:
-        auth_state = _nous_device_code_login(
-            portal_base_url=getattr(args, "portal_url", None),
-            inference_base_url=getattr(args, "inference_url", None),
-            client_id=getattr(args, "client_id", None) or pconfig.client_id,
-            scope=getattr(args, "scope", None) or pconfig.scope,
-            open_browser=not getattr(args, "no_browser", False),
-            timeout_seconds=timeout_seconds,
-            insecure=insecure,
-            ca_bundle=ca_bundle,
-            min_key_ttl_seconds=5 * 60,
-        )
+        auth_state = None
+
+        # Codex-style auto-import: before launching a fresh device-code
+        # flow, check the shared store for an existing Nous credential
+        # from any other profile. If present, offer to rehydrate it.
+        shared = _read_shared_nous_state()
+        if shared:
+            try:
+                shared_path = _nous_shared_store_path()
+            except RuntimeError:
+                shared_path = None
+            print()
+            if shared_path:
+                print(f"Found existing Nous OAuth credentials at {shared_path}")
+            else:
+                print("Found existing shared Nous OAuth credentials")
+            try:
+                do_import = input("Import these credentials? [Y/n]: ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                do_import = "y"
+            if do_import in ("", "y", "yes"):
+                print("Rehydrating Nous session from shared credentials...")
+                auth_state = _try_import_shared_nous_state(
+                    timeout_seconds=timeout_seconds,
+                    min_key_ttl_seconds=5 * 60,
+                )
+                if auth_state is None:
+                    print("Could not refresh shared credentials — falling back to device-code login.")
+
+        if auth_state is None:
+            auth_state = _nous_device_code_login(
+                portal_base_url=getattr(args, "portal_url", None),
+                inference_base_url=getattr(args, "inference_url", None),
+                client_id=getattr(args, "client_id", None) or pconfig.client_id,
+                scope=getattr(args, "scope", None) or pconfig.scope,
+                open_browser=not getattr(args, "no_browser", False),
+                timeout_seconds=timeout_seconds,
+                insecure=insecure,
+                ca_bundle=ca_bundle,
+                min_key_ttl_seconds=5 * 60,
+            )

        inference_base_url = auth_state["inference_base_url"]

@@ -4627,6 +5033,11 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            _save_provider_state(auth_store, "nous", auth_state)
            saved_to = _save_auth_store(auth_store)

+        # Mirror to the shared store so other profiles can one-tap import
+        # these credentials. Best-effort: any I/O failure is logged and
+        # swallowed inside the helper.
+        _write_shared_nous_state(auth_state)
+
        print()
        print("Login successful!")
        print(f"  Auth state: {saved_to}")
@@ -245,6 +245,47 @@ def auth_add_command(args) -> None:
        return

    if provider == "nous":
+        # Codex-style auto-import: if a shared Nous credential lives at
+        # ~/.hermes/shared/nous_auth.json (written by any previous
+        # successful login), offer to import it instead of running the
+        # full device-code flow. This makes `hermes --profile <name>
+        # auth add nous --type oauth` a one-tap operation for users who
+        # run multiple profiles.
+        shared = auth_mod._read_shared_nous_state()
+        if shared:
+            try:
+                path = auth_mod._nous_shared_store_path()
+            except RuntimeError:
+                path = None
+            print()
+            if path:
+                print(f"Found existing Nous OAuth credentials at {path}")
+            else:
+                print("Found existing shared Nous OAuth credentials")
+            try:
+                do_import = input("Import these credentials? [Y/n]: ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                do_import = "y"
+            if do_import in ("", "y", "yes"):
+                print("Rehydrating Nous session from shared credentials...")
+                rehydrated = auth_mod._try_import_shared_nous_state(
+                    timeout_seconds=getattr(args, "timeout", None) or 15.0,
+                    min_key_ttl_seconds=max(
+                        60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))
+                    ),
+                )
+                if rehydrated is not None:
+                    custom_label = (getattr(args, "label", None) or "").strip() or None
+                    entry = auth_mod.persist_nous_credentials(rehydrated, label=custom_label)
+                    shown_label = entry.label if entry is not None else label_from_token(
+                        rehydrated.get("access_token", ""), _oauth_default_label(provider, 1),
+                    )
+                    print(f'Imported {provider} OAuth credentials: "{shown_label}"')
+                    return
+                # Rehydrate failed (expired refresh_token, portal down, etc.)
+                # — fall through to device-code flow.
+                print("Could not refresh shared credentials — falling back to device-code login.")
+
        creds = auth_mod._nous_device_code_login(
            portal_base_url=getattr(args, "portal_url", None),
            inference_base_url=getattr(args, "inference_url", None),
@@ -61,6 +61,9 @@ _EXCLUDED_NAMES = {
    "cron.pid",
 }

+# zipfile.open() drops Unix mode bits on extract; restore tightens these to 0600.
+_SECRET_FILE_NAMES = {".env", "auth.json", "state.db"}
+

 def _should_exclude(rel_path: Path) -> bool:
    """Return True if *rel_path* (relative to hermes root) should be skipped."""
@@ -381,6 +384,8 @@ def run_import(args) -> None:
                target.parent.mkdir(parents=True, exist_ok=True)
                with zf.open(member) as src, open(target, "wb") as dst:
                    dst.write(src.read())
+                if target.name in _SECRET_FILE_NAMES:
+                    os.chmod(target, 0o600)
                restored += 1
            except (PermissionError, OSError) as exc:
                errors.append(f"  {rel}: {exc}")
@@ -788,9 +793,17 @@ def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
    Returns the number of files deleted.  Only touches files matching
    ``pre-update-*.zip`` so hand-made zips dropped in the same directory
    are never touched.
+
+    ``keep`` is floored to 1 because this helper is only called immediately
+    after a fresh backup is written: deleting that backup right after the
+    user paid the disk/CPU cost to create it would leave them worse off
+    than no backup at all (and the wrapper in ``main.py`` would still print
+    a misleading ``Saved: <path>`` line for a file that no longer exists).
+    Operators who genuinely don't want a backup should set
+    ``updates.pre_update_backup: false`` in config — that gates creation.
    """
-    if keep < 0:
-        keep = 0
+    if keep < 1:
+        keep = 1
    if not backup_dir.exists():
        return 0

@@ -0,0 +1,244 @@
+"""`hermes checkpoints` CLI subcommand.
+
+Gives users direct visibility and control over the filesystem checkpoint
+store at ``~/.hermes/checkpoints/``.  Actions:
+
+    hermes checkpoints               # same as `status`
+    hermes checkpoints status        # total size, project count, breakdown
+    hermes checkpoints list          # per-project checkpoint counts + workdir
+    hermes checkpoints prune [opts]  # force a sweep (ignores the 24h marker)
+    hermes checkpoints clear [-f]    # nuke the entire base (asks first)
+    hermes checkpoints clear-legacy  # delete just the legacy-* archives
+
+Examples::
+
+    hermes checkpoints
+    hermes checkpoints prune --retention-days 3 --max-size-mb 200
+    hermes checkpoints clear -f
+
+None of these require the agent to be running.  Safe to call any time.
+"""
+
+from __future__ import annotations
+
+import argparse
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict
+
+
+def _fmt_bytes(n: int) -> str:
+    units = ("B", "KB", "MB", "GB", "TB")
+    size = float(n or 0)
+    for unit in units:
+        if size < 1024 or unit == units[-1]:
+            if unit == "B":
+                return f"{int(size)} {unit}"
+            return f"{size:.1f} {unit}"
+        size /= 1024
+    return f"{size:.1f} TB"
+
+
+def _fmt_ts(ts: Any) -> str:
+    try:
+        return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M")
+    except (TypeError, ValueError):
+        return "—"
+
+
+def _fmt_age(ts: Any) -> str:
+    try:
+        age = time.time() - float(ts)
+    except (TypeError, ValueError):
+        return "—"
+    if age < 0:
+        return "now"
+    if age < 60:
+        return f"{int(age)}s ago"
+    if age < 3600:
+        return f"{int(age / 60)}m ago"
+    if age < 86400:
+        return f"{int(age / 3600)}h ago"
+    return f"{int(age / 86400)}d ago"
+
+
+def cmd_status(args: argparse.Namespace) -> int:
+    from tools.checkpoint_manager import store_status
+
+    info = store_status()
+    base = info["base"]
+    print(f"Checkpoint base: {base}")
+    print(f"Total size:      {_fmt_bytes(info['total_size_bytes'])}")
+    print(f"  store/         {_fmt_bytes(info['store_size_bytes'])}")
+    print(f"  legacy-*       {_fmt_bytes(info['legacy_size_bytes'])}")
+    print(f"Projects:        {info['project_count']}")
+
+    projects = sorted(
+        info["projects"],
+        key=lambda p: (p.get("last_touch") or 0),
+        reverse=True,
+    )
+    if projects:
+        print()
+        print(f"  {'WORKDIR':<60}  {'COMMITS':>7}  {'LAST TOUCH':>12}  STATE")
+        for p in projects[: args.limit if hasattr(args, "limit") and args.limit else 20]:
+            wd = p.get("workdir") or "(unknown)"
+            if len(wd) > 60:
+                wd = "…" + wd[-59:]
+            exists = p.get("exists")
+            state = "live" if exists else "orphan"
+            commits = p.get("commits", 0)
+            last = _fmt_age(p.get("last_touch"))
+            print(f"  {wd:<60}  {commits:>7}  {last:>12}  {state}")
+
+    legacy = info.get("legacy_archives", [])
+    if legacy:
+        print()
+        print(f"Legacy archives ({len(legacy)}):")
+        for arch in sorted(legacy, key=lambda a: a.get("mtime", 0), reverse=True):
+            print(f"  {arch['name']:<40}  {_fmt_bytes(arch['size_bytes']):>10}")
+        print()
+        print("Clear with: hermes checkpoints clear-legacy")
+    return 0
+
+
+def cmd_list(args: argparse.Namespace) -> int:
+    # `list` is just a terser status — already covered.
+    return cmd_status(args)
+
+
+def cmd_prune(args: argparse.Namespace) -> int:
+    from tools.checkpoint_manager import prune_checkpoints
+
+    retention_days = args.retention_days
+    max_size_mb = args.max_size_mb
+
+    print("Pruning checkpoint store…")
+    print(f"  retention_days:    {retention_days}")
+    print(f"  delete_orphans:    {not args.keep_orphans}")
+    print(f"  max_total_size_mb: {max_size_mb}")
+    print()
+
+    result = prune_checkpoints(
+        retention_days=retention_days,
+        delete_orphans=not args.keep_orphans,
+        max_total_size_mb=max_size_mb,
+    )
+    print(f"Scanned:         {result['scanned']}")
+    print(f"Deleted orphan:  {result['deleted_orphan']}")
+    print(f"Deleted stale:   {result['deleted_stale']}")
+    print(f"Errors:          {result['errors']}")
+    print(f"Bytes reclaimed: {_fmt_bytes(result['bytes_freed'])}")
+    return 0
+
+
+def _confirm(prompt: str) -> bool:
+    try:
+        resp = input(f"{prompt} [y/N]: ").strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        print()
+        return False
+    return resp in ("y", "yes")
+
+
+def cmd_clear(args: argparse.Namespace) -> int:
+    from tools.checkpoint_manager import CHECKPOINT_BASE, clear_all, store_status
+
+    info = store_status()
+    if info["total_size_bytes"] == 0 and not Path(CHECKPOINT_BASE).exists():
+        print("Nothing to clear — checkpoint base does not exist.")
+        return 0
+
+    print(f"This will delete the ENTIRE checkpoint base at {info['base']}")
+    print(f"  size:        {_fmt_bytes(info['total_size_bytes'])}")
+    print(f"  projects:    {info['project_count']}")
+    print(f"  legacy dirs: {len(info.get('legacy_archives', []))}")
+    print()
+    print("All /rollback history for every working directory will be lost.")
+    if not args.force and not _confirm("Proceed?"):
+        print("Aborted.")
+        return 1
+
+    result = clear_all()
+    if result["deleted"]:
+        print(f"Cleared. Reclaimed {_fmt_bytes(result['bytes_freed'])}.")
+        return 0
+    print("Could not clear checkpoint base (see logs).")
+    return 2
+
+
+def cmd_clear_legacy(args: argparse.Namespace) -> int:
+    from tools.checkpoint_manager import clear_legacy, store_status
+
+    info = store_status()
+    legacy = info.get("legacy_archives", [])
+    if not legacy:
+        print("No legacy archives to clear.")
+        return 0
+
+    total = sum(a.get("size_bytes", 0) for a in legacy)
+    print(f"Found {len(legacy)} legacy archive(s), total {_fmt_bytes(total)}:")
+    for arch in legacy:
+        print(f"  {arch['name']:<40}  {_fmt_bytes(arch['size_bytes']):>10}")
+    print()
+    print("Legacy archives hold pre-v2 per-project shadow repos, moved aside")
+    print("during the single-store migration. Delete when you're confident")
+    print("you don't need the old /rollback history.")
+    if not args.force and not _confirm("Delete all legacy archives?"):
+        print("Aborted.")
+        return 1
+
+    result = clear_legacy()
+    print(f"Deleted {result['deleted']} archive(s), reclaimed {_fmt_bytes(result['bytes_freed'])}.")
+    return 0
+
+
+def register_cli(parser: argparse.ArgumentParser) -> None:
+    """Wire subcommands onto the ``hermes checkpoints`` parser."""
+    parser.set_defaults(func=cmd_status)  # bare `hermes checkpoints` → status
+    subs = parser.add_subparsers(dest="checkpoints_command", metavar="COMMAND")
+
+    p_status = subs.add_parser(
+        "status",
+        help="Show total size, project count, and per-project breakdown",
+    )
+    p_status.add_argument("--limit", type=int, default=20,
+                          help="Max projects to list (default 20)")
+    p_status.set_defaults(func=cmd_status)
+
+    p_list = subs.add_parser(
+        "list",
+        help="Alias for 'status'",
+    )
+    p_list.add_argument("--limit", type=int, default=20)
+    p_list.set_defaults(func=cmd_list)
+
+    p_prune = subs.add_parser(
+        "prune",
+        help="Delete orphan/stale checkpoints and GC the store",
+    )
+    p_prune.add_argument("--retention-days", type=int, default=7,
+                         help="Drop projects whose last_touch is older than N days (default 7)")
+    p_prune.add_argument("--max-size-mb", type=int, default=500,
+                         help="After orphan/stale prune, drop oldest commits "
+                              "per project until total size <= this (default 500)")
+    p_prune.add_argument("--keep-orphans", action="store_true",
+                         help="Skip deleting projects whose workdir no longer exists")
+    p_prune.set_defaults(func=cmd_prune)
+
+    p_clear = subs.add_parser(
+        "clear",
+        help="Delete the entire checkpoint base (all /rollback history)",
+    )
+    p_clear.add_argument("-f", "--force", action="store_true",
+                         help="Skip confirmation prompt")
+    p_clear.set_defaults(func=cmd_clear)
+
+    p_legacy = subs.add_parser(
+        "clear-legacy",
+        help="Delete only the legacy-<ts>/ archives from v1 migration",
+    )
+    p_legacy.add_argument("-f", "--force", action="store_true",
+                          help="Skip confirmation prompt")
+    p_legacy.set_defaults(func=cmd_clear_legacy)
@@ -235,6 +235,9 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
    """
    findings: list[tuple[Path, str]] = []

+    if not source_dir.exists():
+        return findings
+
    # Direct state files in the root
    for name in ("todo.json", "sessions", "logs"):
        candidate = source_dir / name
@@ -243,7 +246,12 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
            findings.append((candidate, f"Root {kind}: {name}"))

    # State files inside workspace directories
-    for child in sorted(source_dir.iterdir()):
+    try:
+        children = sorted(source_dir.iterdir())
+    except OSError:
+        return findings
+
+    for child in children:
        if not child.is_dir() or child.name.startswith("."):
            continue
        # Check for workspace-like subdirectories
@@ -64,7 +64,9 @@ class CommandDef:
 COMMAND_REGISTRY: list[CommandDef] = [
    # Session
    CommandDef("new", "Start a new session (fresh session ID + history)", "Session",
-               aliases=("reset",)),
+               aliases=("reset",), args_hint="[name]"),
+    CommandDef("topic", "Enable or inspect Telegram DM topic sessions", "Session",
+               gateway_only=True, args_hint="[off|help|session-id]"),
    CommandDef("clear", "Clear screen and start a new session", "Session",
               cli_only=True),
    CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
@@ -1126,6 +1128,12 @@ class SlashCommandCompleter(Completer):
        except Exception:
            return {}

+    # Commands that open pickers when run without arguments.
+    # These should NOT receive a trailing space in completions because:
+    # - The TUI's submit handler applies completions on Enter if input differs
+    # - Adding space makes "/model" → "/model " which blocks picker execution
+    _PICKER_COMMANDS = frozenset({"model", "skin", "personality"})
+
    @staticmethod
    def _completion_text(cmd_name: str, word: str) -> str:
        """Return replacement text for a completion.
@@ -1134,8 +1142,17 @@ class SlashCommandCompleter(Completer):
        returning ``help`` would be a no-op and prompt_toolkit suppresses the
        menu. Appending a trailing space keeps the dropdown visible and makes
        backspacing retrigger it naturally.
+
+        However, commands that open pickers (model, skin, personality) should
+        NOT get a trailing space — the TUI would apply the completion on Enter
+        and block the picker from opening.
        """
-        return f"{cmd_name} " if cmd_name == word else cmd_name
+        if cmd_name != word:
+            return cmd_name
+        # Don't add space for picker commands — allows Enter to execute them
+        if cmd_name in SlashCommandCompleter._PICKER_COMMANDS:
+            return cmd_name
+        return f"{cmd_name} "

    @staticmethod
    def _extract_path_word(text: str) -> str | None:
@@ -544,12 +544,25 @@ DEFAULT_CONFIG = {
        # via TERMINAL_LOCAL_PERSISTENT env var.
        "persistent_shell": True,
    },
-    
+
+    "web": {
+        "backend": "",           # shared fallback — applies to both search and extract
+        "search_backend": "",    # per-capability override for web_search (e.g. "searxng")
+        "extract_backend": "",   # per-capability override for web_extract (e.g. "native")
+    },
+
    "browser": {
        "inactivity_timeout": 120,
        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
+        # Browser engine for local mode.  Passed as ``--engine <value>`` to
+        # agent-browser v0.25.3+.
+        # "auto"       — use Chrome (default, don't pass --engine at all)
+        # "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
+        # "chrome"     — explicitly request Chrome
+        # Also settable via AGENT_BROWSER_ENGINE env var.
+        "engine": "auto",
        "auto_local_for_private_urls": True,  # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
        "cdp_url": "",  # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
        # CDP supervisor — dialog + frame detection via a persistent WebSocket.
@@ -567,21 +580,39 @@ DEFAULT_CONFIG = {
    },

    # Filesystem checkpoints — automatic snapshots before destructive file ops.
-    # When enabled, the agent takes a snapshot of the working directory once per
-    # conversation turn (on first write_file/patch call).  Use /rollback to restore.
+    # When enabled, the agent takes a snapshot of the working directory once
+    # per conversation turn (on first write_file/patch call).  Use /rollback
+    # to restore.
+    #
+    # Defaults changed in v2 (single shared shadow store, real pruning):
+    #   - enabled: True -> False   (opt-in; most users never use /rollback)
+    #   - max_snapshots: 50 -> 20  (now actually enforced via ref rewrite)
+    #   - auto_prune:   False -> True (orphans/stale pruned automatically)
+    # Opt in via ``hermes chat --checkpoints`` or set enabled=True here.
    "checkpoints": {
-        "enabled": True,
-        "max_snapshots": 50,  # Max checkpoints to keep per directory
-        # Auto-maintenance: shadow repos accumulate forever under
-        # ~/.hermes/checkpoints/ (one per cd'd working directory). Field
-        # reports put the typical offender at 1000+ repos / ~12 GB. When
-        # auto_prune is on, hermes sweeps at startup (at most once per
-        # min_interval_hours) and deletes:
-        #   * orphan repos: HERMES_WORKDIR no longer exists on disk
-        #   * stale repos:  newest mtime older than retention_days
-        # Opt-in so users who rely on /rollback against long-ago sessions
-        # never lose data silently.
-        "auto_prune": False,
+        "enabled": False,
+        # Max checkpoints to keep per working directory.  Pre-v2 this only
+        # limited the `/rollback` listing; v2 actually rewrites the ref and
+        # garbage-collects older commits.
+        "max_snapshots": 20,
+        # Hard ceiling on total ``~/.hermes/checkpoints/`` size (MB).  When
+        # exceeded, the oldest checkpoint per project is dropped in a
+        # round-robin pass until total size falls under the cap.
+        # 0 disables the size cap.
+        "max_total_size_mb": 500,
+        # Skip any single file larger than this when staging a checkpoint.
+        # Prevents accidental snapshotting of datasets, model weights, and
+        # other large generated assets.  0 disables the filter.
+        "max_file_size_mb": 10,
+        # Auto-maintenance: hermes sweeps the checkpoint base at startup
+        # (at most once per ``min_interval_hours``) and:
+        #   * deletes project entries whose workdir no longer exists (orphan)
+        #   * deletes project entries whose last_touch is older than
+        #     ``retention_days``
+        #   * GCs the single shared store to reclaim unreachable objects
+        #   * enforces ``max_total_size_mb`` across remaining projects
+        #   * deletes ``legacy-*`` archives older than ``retention_days``
+        "auto_prune": True,
        "retention_days": 7,
        "delete_orphans": True,
        "min_interval_hours": 24,
@@ -778,9 +809,19 @@ DEFAULT_CONFIG = {
        "show_reasoning": False,
        "streaming": False,
        "final_response_markdown": "strip",  # render | strip | raw
+        # Preserve recent classic CLI output across Ctrl+L, /redraw, and
+        # terminal resize full-screen clears. Disable if a terminal emulator
+        # behaves badly with replayed scrollback.
+        "persistent_output": True,
+        "persistent_output_max_lines": 200,
        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
+        # UI language for static user-facing messages (approval prompts, a
+        # handful of gateway slash-command replies).  Does NOT affect agent
+        # responses, log lines, tool outputs, or slash-command descriptions.
+        # Supported: en, zh, ja, de, es, fr, tr, uk.  Unknown values fall back to en.
+        "language": "en",
        # TUI busy indicator style: kaomoji (default), emoji, unicode (braille
        # spinner), or ascii.  Live-swappable via `/indicator <style>`.
        "tui_status_indicator": "kaomoji",
@@ -809,6 +850,7 @@ DEFAULT_CONFIG = {
            "enabled": False,
            "fields": ["model", "context_pct", "cwd"],  # Order shown; drop any to hide
        },
+        "copy_shortcut": "auto",  # "auto" (platform default) | "ctrl_c" | "ctrl_shift_c" | "disabled"
    },

    # Web dashboard settings
@@ -1286,7 +1328,10 @@ DEFAULT_CONFIG = {
        # for a single update run.
        "pre_update_backup": False,
        # How many pre-update backup zips to retain.  Older ones are pruned
-        # automatically after each successful backup.
+        # automatically after each successful backup.  Values below 1 are
+        # floored to 1 — the backup just created is always preserved.  To
+        # disable backups entirely, set ``pre_update_backup: false`` above
+        # rather than ``backup_keep: 0``.
        "backup_keep": 5,
    },

@@ -1787,6 +1832,14 @@ OPTIONAL_ENV_VARS = {
        "password": True,
        "category": "tool",
    },
+    "SEARXNG_URL": {
+        "description": "URL of your SearXNG instance for free self-hosted web search",
+        "prompt": "SearXNG URL (e.g. http://localhost:8080)",
+        "url": "https://searxng.github.io/searxng/",
+        "tools": ["web_search"],
+        "password": False,
+        "category": "tool",
+    },
    "BROWSERBASE_API_KEY": {
        "description": "Browserbase API key for cloud browser (optional — local browser works without this)",
        "prompt": "Browserbase API key",
@@ -1818,6 +1871,15 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "tool",
    },
+    "AGENT_BROWSER_ENGINE": {
+        "description": "Browser engine for local mode: auto (default Chrome), lightpanda (faster, no screenshots), chrome",
+        "prompt": "Browser engine (auto/lightpanda/chrome)",
+        "url": "https://github.com/vercel-labs/agent-browser",
+        "tools": ["browser_navigate", "browser_snapshot", "browser_click", "browser_vision"],
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
    "CAMOFOX_URL": {
        "description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
        "prompt": "Camofox server URL",
@@ -1896,7 +1958,7 @@ OPTIONAL_ENV_VARS = {
    "LINEAR_API_KEY": {
        "description": "Linear personal API key (used by the `linear` skill)",
        "prompt": "Linear API key",
-        "url": "https://linear.app/settings/api",
+        "url": "https://linear.app/settings/account/security",
        "password": True,
        "category": "skill",
        "advanced": True,
@@ -3943,6 +4005,7 @@ _FALLBACK_COMMENT = """
 #   kimi-coding-cn (KIMI_CN_API_KEY)   — Kimi / Moonshot (China)
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
+#   bedrock      (AWS IAM / boto3)     — AWS Bedrock (Converse API)
 #
 # For custom OpenAI-compatible endpoints, add base_url and key_env.
 #
@@ -3974,6 +4037,7 @@ _COMMENTED_SECTIONS = """
 #   kimi-coding-cn (KIMI_CN_API_KEY)   — Kimi / Moonshot (China)
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
+#   bedrock      (AWS IAM / boto3)     — AWS Bedrock (Converse API)
 #
 # For custom OpenAI-compatible endpoints, add base_url and key_env.
 #
@@ -4831,3 +4895,45 @@ def config_command(args):
        print("  hermes config path      Show config file path")
        print("  hermes config env-path  Show .env file path")
        sys.exit(1)
+
+
+# ── Profile-driven env var injection ─────────────────────────────────────────
+# Any provider registered in providers/ with auth_type="api_key" automatically
+# gets its env_vars exposed in OPTIONAL_ENV_VARS without editing this file.
+# Runs once at import time.
+
+_profile_env_vars_injected = False
+
+
+def _inject_profile_env_vars() -> None:
+    """Populate OPTIONAL_ENV_VARS from provider profiles not already listed.
+
+    Called once at module load time. Idempotent — repeated calls are no-ops.
+    """
+    global _profile_env_vars_injected
+    if _profile_env_vars_injected:
+        return
+    _profile_env_vars_injected = True
+    try:
+        from providers import list_providers
+        for _pp in list_providers():
+            if _pp.auth_type not in ("api_key",):
+                continue
+            for _var in _pp.env_vars:
+                if _var in OPTIONAL_ENV_VARS:
+                    continue
+                _is_key = not _var.endswith("_BASE_URL") and not _var.endswith("_URL")
+                OPTIONAL_ENV_VARS[_var] = {
+                    "description": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL override'}",
+                    "prompt": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL (leave empty for default)'}",
+                    "url": _pp.signup_url or None,
+                    "password": _is_key,
+                    "category": "provider",
+                    "advanced": True,
+                }
+    except Exception:
+        pass
+
+
+# Eagerly inject so that OPTIONAL_ENV_VARS is fully populated at import time.
+_inject_profile_env_vars()
@@ -93,6 +93,8 @@ def cron_list(show_all: bool = False):
        script = job.get("script")
        if script:
            print(f"    Script:    {script}")
+        if job.get("no_agent"):
+            print(f"    Mode:      {color('no-agent', Colors.DIM)} (script stdout delivered directly)")
        workdir = job.get("workdir")
        if workdir:
            print(f"    Workdir:   {workdir}")
@@ -172,6 +174,7 @@ def cron_create(args):
        skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
        script=getattr(args, "script", None),
        workdir=getattr(args, "workdir", None),
+        no_agent=getattr(args, "no_agent", False) or None,
    )
    if not result.get("success"):
        print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
@@ -184,6 +187,8 @@ def cron_create(args):
    job_data = result.get("job", {})
    if job_data.get("script"):
        print(f"  Script: {job_data['script']}")
+    if job_data.get("no_agent"):
+        print("  Mode: no-agent (script stdout delivered directly)")
    if job_data.get("workdir"):
        print(f"  Workdir: {job_data['workdir']}")
    print(f"  Next run: {result['next_run_at']}")
@@ -225,6 +230,7 @@ def cron_edit(args):
        skills=final_skills,
        script=getattr(args, "script", None),
        workdir=getattr(args, "workdir", None),
+        no_agent=getattr(args, "no_agent", None),
    )
    if not result.get("success"):
        print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
@@ -240,6 +246,8 @@ def cron_edit(args):
        print("  Skills: none")
    if updated.get("script"):
        print(f"  Script: {updated['script']}")
+    if updated.get("no_agent"):
+        print("  Mode: no-agent (script stdout delivered directly)")
    if updated.get("workdir"):
        print(f"  Workdir: {updated['workdir']}")
    return 0
@@ -245,6 +245,111 @@ def _cmd_restore(args) -> int:
    return 0 if ok else 1


+def _cmd_archive(args) -> int:
+    """Manually archive an agent-created skill. Refuses if pinned.
+
+    The auto-curator archives stale skills on its own schedule; this verb is
+    for the user who wants to archive *now* without waiting for a run.
+    """
+    from tools import skill_usage
+    if skill_usage.get_record(args.skill).get("pinned"):
+        print(
+            f"curator: '{args.skill}' is pinned — unpin first with "
+            f"`hermes curator unpin {args.skill}`"
+        )
+        return 1
+    ok, msg = skill_usage.archive_skill(args.skill)
+    print(f"curator: {msg}")
+    return 0 if ok else 1
+
+
+def _idle_days(record: dict) -> Optional[int]:
+    """Days since the skill's last activity (view / use / patch).
+
+    Falls back to ``created_at`` so a skill that was authored but never used
+    can still be pruned — otherwise never-touched skills would be immortal.
+    Returns None only when both fields are missing or unparseable.
+    """
+    ts = record.get("last_activity_at") or record.get("created_at")
+    if not ts:
+        return None
+    try:
+        dt = datetime.fromisoformat(str(ts))
+    except (TypeError, ValueError):
+        return None
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    return max(0, (datetime.now(timezone.utc) - dt).days)
+
+
+def _cmd_prune(args) -> int:
+    """Bulk-archive agent-created skills idle for >= N days.
+
+    Pinned skills are exempt. Already-archived skills are skipped. Default
+    ``--days 90`` matches a conservative read of the curator's own archive
+    threshold; adjust with ``--days``. Use ``--dry-run`` to preview.
+    """
+    from tools import skill_usage
+    days = getattr(args, "days", 90)
+    if days < 1:
+        print(f"curator: --days must be >= 1 (got {days})", file=sys.stderr)
+        return 2
+
+    dry_run = bool(getattr(args, "dry_run", False))
+    skip_confirm = bool(getattr(args, "yes", False))
+
+    candidates = []
+    for r in skill_usage.agent_created_report():
+        if r.get("pinned"):
+            continue
+        if r.get("state") == skill_usage.STATE_ARCHIVED:
+            continue
+        idle = _idle_days(r)
+        if idle is None or idle < days:
+            continue
+        candidates.append((r["name"], idle))
+
+    if not candidates:
+        print(f"curator: nothing to prune (no unpinned skills idle >= {days}d)")
+        return 0
+
+    candidates.sort(key=lambda c: -c[1])
+    print(f"curator: {len(candidates)} skill(s) idle >= {days}d:")
+    for name, idle in candidates:
+        print(f"  {name:40s} idle {idle}d")
+
+    if dry_run:
+        print("\n(dry run — no changes made)")
+        return 0
+
+    if not skip_confirm:
+        try:
+            reply = input(f"\nArchive {len(candidates)} skill(s)? [y/N] ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            print("\ncurator: aborted")
+            return 1
+        if reply not in ("y", "yes"):
+            print("curator: aborted")
+            return 1
+
+    archived = 0
+    failures = []
+    for name, _ in candidates:
+        ok, msg = skill_usage.archive_skill(name)
+        if ok:
+            archived += 1
+        else:
+            failures.append((name, msg))
+
+    print(f"\ncurator: archived {archived}/{len(candidates)}")
+    if failures:
+        print("failures:")
+        for name, msg in failures:
+            print(f"  {name}: {msg}")
+        return 1
+    return 0
+
+
 def _cmd_backup(args) -> int:
    """Take a manual snapshot of the skills tree. Same mechanism as the
    automatic pre-run snapshot, just user-initiated."""
@@ -383,6 +488,31 @@ def register_cli(parent: argparse.ArgumentParser) -> None:
    p_restore.add_argument("skill", help="Skill name")
    p_restore.set_defaults(func=_cmd_restore)

+    p_archive = subs.add_parser(
+        "archive",
+        help="Manually archive a skill (move to .archive/, excluded from prompt)",
+    )
+    p_archive.add_argument("skill", help="Skill name")
+    p_archive.set_defaults(func=_cmd_archive)
+
+    p_prune = subs.add_parser(
+        "prune",
+        help="Bulk-archive agent-created skills idle for >= N days (default 90)",
+    )
+    p_prune.add_argument(
+        "--days", type=int, default=90,
+        help="Archive skills idle for at least N days (default: 90)",
+    )
+    p_prune.add_argument(
+        "-y", "--yes", action="store_true",
+        help="Skip the confirmation prompt",
+    )
+    p_prune.add_argument(
+        "--dry-run", dest="dry_run", action="store_true",
+        help="Show what would be archived without doing it",
+    )
+    p_prune.set_defaults(func=_cmd_prune)
+
    p_backup = subs.add_parser(
        "backup",
        help="Take a manual tar.gz snapshot of ~/.hermes/skills/ "
@@ -12,6 +12,7 @@ import importlib.util
 from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
+from hermes_cli.env_loader import load_hermes_dotenv
 from hermes_constants import display_hermes_home

 PROJECT_ROOT = get_project_root()
@@ -19,15 +20,8 @@ HERMES_HOME = get_hermes_home()
 _DHH = display_hermes_home()  # user-facing display path (e.g. ~/.hermes or ~/.hermes/profiles/coder)

 # Load environment variables from ~/.hermes/.env so API key checks work
-from dotenv import load_dotenv
 _env_path = get_env_path()
-if _env_path.exists():
-    try:
-        load_dotenv(_env_path, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(_env_path, encoding="latin-1")
-# Also try project .env as dev fallback
-load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
+load_hermes_dotenv(hermes_home=_env_path.parent, project_env=PROJECT_ROOT / ".env")

 from hermes_cli.colors import Colors, color
 from hermes_cli.models import _HERMES_USER_AGENT
@@ -113,15 +107,35 @@ def _honcho_is_configured_for_doctor() -> bool:
        return False


+def _is_kanban_worker_env_gate(item: dict) -> bool:
+    """Return True when Kanban is unavailable only because this is not a worker process."""
+    if item.get("name") != "kanban":
+        return False
+    if os.environ.get("HERMES_KANBAN_TASK"):
+        return False
+
+    tools = item.get("tools") or []
+    return bool(tools) and all(str(tool).startswith("kanban_") for tool in tools)
+
+
+def _doctor_tool_availability_detail(toolset: str) -> str:
+    """Optional explanatory suffix for toolsets whose doctor status needs context."""
+    if toolset == "kanban" and not os.environ.get("HERMES_KANBAN_TASK"):
+        return "(runtime-gated; loaded only for dispatcher-spawned workers)"
+    return ""
+
+
 def _apply_doctor_tool_availability_overrides(available: list[str], unavailable: list[dict]) -> tuple[list[str], list[dict]]:
    """Adjust runtime-gated tool availability for doctor diagnostics."""
-    if not _honcho_is_configured_for_doctor():
-        return available, unavailable
-
    updated_available = list(available)
    updated_unavailable = []
    for item in unavailable:
-        if item.get("name") == "honcho":
+        name = item.get("name")
+        if _is_kanban_worker_env_gate(item):
+            if "kanban" not in updated_available:
+                updated_available.append("kanban")
+            continue
+        if name == "honcho" and _honcho_is_configured_for_doctor():
            if "honcho" not in updated_available:
                updated_available.append("honcho")
            continue
@@ -175,6 +189,85 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
        check_warn("Could not verify systemd linger", f"({linger_detail})")


+_APIKEY_PROVIDERS_CACHE: list | None = None
+
+
+def _build_apikey_providers_list() -> list:
+    """Build the API-key provider health-check list once and cache it.
+
+    Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint)
+    Base list augmented with any ProviderProfile with auth_type="api_key" not
+    already present — adding plugins/model-providers/<name>/ is sufficient to get into doctor.
+    """
+    _static = [
+        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
+        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan", ("STEPFUN_API_KEY",),                          "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
+        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
+        ("Arcee AI",         ("ARCEEAI_API_KEY",),                           "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
+        ("GMI Cloud",        ("GMI_API_KEY",),                               "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
+        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                          "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
+        ("Hugging Face",     ("HF_TOKEN",),                                  "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
+        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                            "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
+        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                        "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
+        # MiniMax global: /v1 endpoint supports /models.
+        ("MiniMax",          ("MINIMAX_API_KEY",),                           "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
+        # MiniMax CN: /v1 endpoint does NOT support /models (returns 404).
+        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                        "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", False),
+        ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",),                       "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
+        ("Kilo Code",        ("KILOCODE_API_KEY",),                          "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
+        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                      "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
+        # OpenCode Go has no shared /models endpoint; skip the health check.
+        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                       None,                                  "OPENCODE_GO_BASE_URL", False),
+    ]
+    _known_names = {t[0] for t in _static}
+    # Also index by profile canonical name so profiles without display_name
+    # don't create duplicate entries for providers already in the static list.
+    _known_canonical: set[str] = set()
+    _name_to_canonical = {
+        "Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding",
+        "StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn",
+        "Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek",
+        "Hugging Face": "huggingface", "NVIDIA NIM": "nvidia",
+        "Alibaba/DashScope": "alibaba", "MiniMax": "minimax",
+        "MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway",
+        "Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen",
+        "OpenCode Go": "opencode-go",
+    }
+    for _label, _canonical in _name_to_canonical.items():
+        _known_canonical.add(_canonical)
+    try:
+        from providers import list_providers
+        from providers.base import ProviderProfile as _PP
+        for _pp in list_providers():
+            if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars:
+                continue
+            _label = _pp.display_name or _pp.name
+            if _label in _known_names or _pp.name in _known_canonical:
+                continue
+            # Separate API-key vars from base-URL override vars — the health-check
+            # loop sends the first found value as Authorization: Bearer, so a URL
+            # string must never be picked.
+            _key_vars = tuple(
+                v for v in _pp.env_vars
+                if not v.endswith("_BASE_URL") and not v.endswith("_URL")
+            )
+            _base_var = next(
+                (v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")),
+                None,
+            )
+            if not _key_vars:
+                continue
+            _models_url = (
+                (_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
+                if _pp.base_url else None
+            )
+            _static.append((_label, _key_vars, _models_url, _base_var, True))
+    except Exception:
+        pass
+    return _static
+
+
 def run_doctor(args):
    """Run diagnostic checks."""
    should_fix = getattr(args, 'fix', False)
@@ -935,6 +1028,8 @@ def run_doctor(args):
        agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
        if agent_browser_path.exists():
            check_ok("agent-browser (Node.js)", "(browser automation)")
+        elif shutil.which("agent-browser"):
+            check_ok("agent-browser", "(browser automation)")
        else:
            if _is_termux():
                check_info("agent-browser is not installed (expected in the tested Termux path)")
@@ -1085,26 +1180,11 @@ def run_doctor(args):
    # -- API-key providers --
    # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
    # If supports_models_endpoint is False, we skip the health check and just show "configured"
-    _apikey_providers = [
-        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
-        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
-        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
-        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
-        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
-        ("GMI Cloud",        ("GMI_API_KEY",),                                "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
-        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
-        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
-        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
-        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
-        # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
-        ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
-        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
-        ("Vercel AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
-        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
-        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
-        # OpenCode Go has no shared /models endpoint; skip the health check.
-        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                         None,                                  "OPENCODE_GO_BASE_URL", False),
-    ]
+    # Cached at module level after first build — profiles auto-extend it.
+    global _APIKEY_PROVIDERS_CACHE
+    if _APIKEY_PROVIDERS_CACHE is None:
+        _APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
+    _apikey_providers = _APIKEY_PROVIDERS_CACHE
    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
        _key = ""
        for _ev in _env_vars:
@@ -1218,7 +1298,7 @@ def run_doctor(args):
        
        for tid in available:
            info = TOOLSET_REQUIREMENTS.get(tid, {})
-            check_ok(info.get("name", tid))
+            check_ok(info.get("name", tid), _doctor_tool_availability_detail(tid))
        
        for item in unavailable:
            env_vars = item.get("missing_vars") or item.get("env_vars") or []
@@ -1261,9 +1341,23 @@ def run_doctor(args):
        check_warn("Skills Hub directory not initialized", "(run: hermes skills list)")

    from hermes_cli.config import get_env_value
+
+    def _gh_authenticated() -> bool:
+        """Check if gh CLI is authenticated via token file or device flow."""
+        try:
+            result = subprocess.run(
+                ["gh", "auth", "status", "--json", "authenticated"],
+                capture_output=True, timeout=10,
+            )
+            return result.returncode == 0
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            return False
+
    github_token = get_env_value("GITHUB_TOKEN") or get_env_value("GH_TOKEN")
    if github_token:
        check_ok("GitHub token configured (authenticated API access)")
+    elif _gh_authenticated():
+        check_ok("GitHub authenticated via gh CLI", "(full API access — no GITHUB_TOKEN needed)")
    else:
        check_warn("No GITHUB_TOKEN", f"(60 req/hr rate limit — set in {_DHH}/.env for better rates)")

@@ -14,6 +14,7 @@ import sys
 from pathlib import Path

 from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
+from hermes_cli.env_loader import load_hermes_dotenv
 from hermes_constants import display_hermes_home


@@ -195,15 +196,11 @@ def run_dump(args):
    show_keys = getattr(args, "show_keys", False)

    # Load env from .env file so key checks work
-    from dotenv import load_dotenv
    env_path = get_env_path()
-    if env_path.exists():
-        try:
-            load_dotenv(env_path, encoding="utf-8")
-        except UnicodeDecodeError:
-            load_dotenv(env_path, encoding="latin-1")
-    # Also try project .env as dev fallback
-    load_dotenv(get_project_root() / ".env", override=False, encoding="utf-8")
+    load_hermes_dotenv(
+        hermes_home=env_path.parent,
+        project_env=get_project_root() / ".env",
+    )

    project_root = get_project_root()
    hermes_home = get_hermes_home()
@@ -785,6 +785,12 @@ def stop_profile_gateway() -> bool:
    if pid is None:
        return False

+    try:
+        from gateway.status import write_planned_stop_marker
+        write_planned_stop_marker(pid)
+    except Exception:
+        pass
+
    try:
        os.kill(pid, signal.SIGTERM)
    except ProcessLookupError:
@@ -1608,6 +1614,46 @@ def _build_user_local_paths(home: Path, path_entries: list[str]) -> list[str]:
    return [p for p in candidates if p not in path_entries and Path(p).exists()]


+def _build_wsl_interop_paths(path_entries: list[str]) -> list[str]:
+    """Return WSL Windows interop PATH entries for generated systemd units.
+
+    WSL shells normally inherit Windows PATH entries such as
+    ``/mnt/c/WINDOWS/System32``. systemd user services do not, so gateway tools
+    that call ``powershell.exe``/``cmd.exe`` work in a terminal but fail in the
+    background service unless we persist the relevant entries at install time.
+    """
+    if not is_wsl():
+        return []
+
+    candidates: list[str] = []
+    for entry in os.environ.get("PATH", "").split(os.pathsep):
+        if entry.startswith("/mnt/"):
+            candidates.append(entry)
+
+    for executable in ("powershell.exe", "cmd.exe", "explorer.exe", "wsl.exe"):
+        resolved = shutil.which(executable)
+        if resolved:
+            candidates.append(str(Path(resolved).parent))
+
+    for entry in (
+        "/mnt/c/WINDOWS/system32",
+        "/mnt/c/WINDOWS",
+        "/mnt/c/WINDOWS/System32/Wbem",
+        "/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/",
+        "/mnt/c/WINDOWS/System32/OpenSSH/",
+    ):
+        if Path(entry).exists():
+            candidates.append(entry)
+
+    result: list[str] = []
+    seen = set(path_entries)
+    for entry in candidates:
+        if entry and entry not in seen:
+            seen.add(entry)
+            result.append(entry)
+    return result
+
+
 def _remap_path_for_user(path: str, target_home_dir: str) -> str:
    """Remap *path* from the current user's home to *target_home_dir*.

@@ -1699,6 +1745,7 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
        node_bin = _remap_path_for_user(node_bin, home_dir)
        path_entries = [_remap_path_for_user(p, home_dir) for p in path_entries]
        path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
+        path_entries.extend(_build_wsl_interop_paths(path_entries))
        path_entries.extend(common_bin_paths)
        sane_path = ":".join(path_entries)
        return f"""[Unit]
@@ -1738,6 +1785,7 @@ WantedBy=multi-user.target
    hermes_home = str(get_hermes_home().resolve())
    profile_arg = _profile_arg(hermes_home)
    path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
+    path_entries.extend(_build_wsl_interop_paths(path_entries))
    path_entries.extend(common_bin_paths)
    sane_path = ":".join(path_entries)
    return f"""[Unit]
@@ -1971,6 +2019,15 @@ def systemd_uninstall(system: bool = False):
    print(f"✓ {_service_scope_label(system).capitalize()} service uninstalled")


+def _require_service_installed(action: str, system: bool = False) -> None:
+    unit_path = get_systemd_unit_path(system=system)
+    if not unit_path.exists():
+        scope_flag = " --system" if system else ""
+        print(f"✗ Gateway service is not installed")
+        print(f"  Run: {'sudo ' if system else ''}hermes gateway install{scope_flag}")
+        sys.exit(1)
+
+
 def systemd_start(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
@@ -1980,6 +2037,7 @@ def systemd_start(system: bool = False):
        # reachable (common on fresh RHEL/Debian SSH sessions without linger).
        # Raises UserSystemdUnavailableError with a remediation message.
        _preflight_user_systemd()
+    _require_service_installed("start", system=system)
    refresh_systemd_unit_if_needed(system=system)
    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")
@@ -1990,6 +2048,14 @@ def systemd_stop(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("stop")
+    _require_service_installed("stop", system=system)
+    try:
+        from gateway.status import get_running_pid, write_planned_stop_marker
+        pid = get_running_pid(cleanup_stale=False)
+        if pid is not None:
+            write_planned_stop_marker(pid)
+    except Exception:
+        pass
    _run_systemctl(["stop", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service stopped")

@@ -2001,6 +2067,7 @@ def systemd_restart(system: bool = False):
        _require_root_for_system_service("restart")
    else:
        _preflight_user_systemd()
+    _require_service_installed("restart", system=system)
    refresh_systemd_unit_if_needed(system=system)
    from gateway.status import get_running_pid

@@ -2350,6 +2417,13 @@ def launchd_start():
 def launchd_stop():
    label = get_launchd_label()
    target = f"{_launchd_domain()}/{label}"
+    try:
+        from gateway.status import get_running_pid, write_planned_stop_marker
+        pid = get_running_pid(cleanup_stale=False)
+        if pid is not None:
+            write_planned_stop_marker(pid)
+    except Exception:
+        pass
    # bootout unloads the service definition so KeepAlive doesn't respawn
    # the process.  A plain `kill SIGTERM` only signals the process — launchd
    # immediately restarts it because KeepAlive.SuccessfulExit = false.
@@ -2492,6 +2566,20 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
                 hasn't fully exited yet.
    """
    sys.path.insert(0, str(PROJECT_ROOT))
+
+    # Refresh the systemd unit definition on every boot so that restart
+    # settings (RestartSec, StartLimitIntervalSec, etc.) stay current even
+    # when the process was respawned via exit-code-75 (stale-code or
+    # /restart) rather than through `hermes gateway restart` which already
+    # calls refresh_systemd_unit_if_needed().  Without this, a code update
+    # that ships new unit settings won't take effect until the next manual
+    # `hermes gateway start/restart` — leaving the gateway vulnerable to
+    # the exact failure mode the new settings were meant to prevent.
+    if supports_systemd_services():
+        try:
+            refresh_systemd_unit_if_needed(system=False)
+        except Exception:
+            pass  # best-effort; don't block gateway startup
    
    from gateway.run import start_gateway
    
@@ -169,11 +169,93 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
            "or docs/hermes-kanban-v1-spec.pdf for the full design."
        ),
    )
+    # --- global --board flag ---
+    # Applies to every subcommand below. When set, scopes all reads and
+    # writes to that board's DB. When omitted, resolves via the
+    # HERMES_KANBAN_BOARD env var, then the persisted current-board
+    # file, then "default". See kanban_db.get_current_board().
+    kanban_parser.add_argument(
+        "--board",
+        default=None,
+        metavar="<slug>",
+        help=(
+            "Board slug to operate on. Defaults to the current board "
+            "(set via `hermes kanban boards switch <slug>` or the "
+            "HERMES_KANBAN_BOARD env var). Use `hermes kanban boards list` "
+            "to see all boards."
+        ),
+    )
    sub = kanban_parser.add_subparsers(dest="kanban_action")

    # --- init ---
    sub.add_parser("init", help="Create kanban.db if missing (idempotent)")

+    # --- boards (new in v2: multi-project support) ---
+    p_boards = sub.add_parser(
+        "boards",
+        help="Manage kanban boards (one board per project / workstream)",
+        description=(
+            "Boards let you separate unrelated streams of work "
+            "(projects, repos, domains) into isolated queues. Each "
+            "board has its own DB, workspaces directory, and dispatcher "
+            "loop — tasks on one board cannot collide with tasks on "
+            "another. The first board is 'default' and always exists."
+        ),
+    )
+    boards_sub = p_boards.add_subparsers(dest="boards_action")
+
+    b_list = boards_sub.add_parser(
+        "list", aliases=["ls"],
+        help="List all boards with task counts",
+    )
+    b_list.add_argument("--json", action="store_true")
+    b_list.add_argument("--all", action="store_true",
+                        help="Include archived boards too")
+
+    b_create = boards_sub.add_parser(
+        "create", aliases=["new"],
+        help="Create a new board",
+    )
+    b_create.add_argument("slug",
+                          help="Board slug (kebab-case, e.g. atm10-server)")
+    b_create.add_argument("--name", default=None,
+                          help="Human-readable display name (defaults to Title Case of slug)")
+    b_create.add_argument("--description", default=None,
+                          help="Optional description")
+    b_create.add_argument("--icon", default=None,
+                          help="Optional emoji or single-character icon for the dashboard")
+    b_create.add_argument("--color", default=None,
+                          help="Optional hex color (e.g. '#8b5cf6') for the dashboard")
+    b_create.add_argument("--switch", action="store_true",
+                          help="Switch to the new board after creating it")
+
+    b_rm = boards_sub.add_parser(
+        "rm", aliases=["remove", "delete"],
+        help="Archive (default) or delete a board",
+    )
+    b_rm.add_argument("slug")
+    b_rm.add_argument("--delete", action="store_true",
+                      help="Hard-delete the board directory instead of archiving it. "
+                           "Default is to move it to boards/_archived/ so it's recoverable.")
+
+    b_switch = boards_sub.add_parser(
+        "switch", aliases=["use"],
+        help="Set the active board for subsequent CLI calls",
+    )
+    b_switch.add_argument("slug")
+
+    boards_sub.add_parser(
+        "show", aliases=["current"],
+        help="Print the currently-active board slug",
+    )
+
+    b_rename = boards_sub.add_parser(
+        "rename",
+        help="Change a board's human-readable display name (slug is immutable)",
+    )
+    b_rename.add_argument("slug")
+    b_rename.add_argument("name", help="New display name")
+
    # --- create ---
    p_create = sub.add_parser("create", help="Create a new task")
    p_create.add_argument("title", help="Task title")
@@ -226,6 +308,57 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
    p_assign.add_argument("task_id")
    p_assign.add_argument("profile", help="Profile name (or 'none' to unassign)")

+    # --- reclaim / reassign (recovery) ---
+    p_reclaim = sub.add_parser(
+        "reclaim",
+        help="Release an active worker claim on a running task",
+    )
+    p_reclaim.add_argument("task_id")
+    p_reclaim.add_argument(
+        "--reason", default=None,
+        help="Human-readable reason (recorded on the reclaimed event)",
+    )
+
+    p_reassign = sub.add_parser(
+        "reassign",
+        help="Reassign a task to a different profile, optionally reclaiming first",
+    )
+    p_reassign.add_argument("task_id")
+    p_reassign.add_argument(
+        "profile",
+        help="New profile name (or 'none' to unassign)",
+    )
+    p_reassign.add_argument(
+        "--reclaim", action="store_true",
+        help="Release any active claim before reassigning (required if task is running)",
+    )
+    p_reassign.add_argument(
+        "--reason", default=None,
+        help="Human-readable reason (recorded on the reclaimed event)",
+    )
+
+    # --- diagnostics (board-wide health) ---
+    p_diag = sub.add_parser(
+        "diagnostics",
+        aliases=["diag"],
+        help="List active diagnostics on the current board",
+    )
+    p_diag.add_argument(
+        "--severity",
+        choices=["warning", "error", "critical"],
+        default=None,
+        help="Only show diagnostics at or above this severity",
+    )
+    p_diag.add_argument(
+        "--task",
+        default=None,
+        help="Only show diagnostics for one task id",
+    )
+    p_diag.add_argument(
+        "--json", action="store_true",
+        help="Emit JSON (structured) instead of the default human table",
+    )
+
    # --- link / unlink ---
    p_link = sub.add_parser("link", help="Add a parent->child dependency")
    p_link.add_argument("parent_id")
@@ -261,6 +394,27 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
                            help='JSON dict of structured facts (e.g. \'{"changed_files": [...], '
                                 '"tests_run": 12}\'). Stored on the closing run.')

+    p_edit = sub.add_parser(
+        "edit",
+        help="Edit recovery fields on an already-completed task",
+    )
+    p_edit.add_argument("task_id")
+    p_edit.add_argument(
+        "--result",
+        required=True,
+        help="Backfilled task result text for a done task",
+    )
+    p_edit.add_argument(
+        "--summary",
+        default=None,
+        help="Structured handoff summary. Falls back to --result if omitted.",
+    )
+    p_edit.add_argument(
+        "--metadata",
+        default=None,
+        help="JSON dict of structured facts to store on the latest completed run.",
+    )
+
    p_block = sub.add_parser("block", help="Mark one or more tasks blocked")
    p_block.add_argument("task_id")
    p_block.add_argument("reason", nargs="*", help="Reason (also appended as a comment)")
@@ -442,6 +596,38 @@ def kanban_command(args: argparse.Namespace) -> int:
            )
        return 0

+    # `--board <slug>` applies to every subcommand below by way of an
+    # env-var pin for the duration of this call. Using HERMES_KANBAN_BOARD
+    # (rather than threading `board=` through 50+ kb.connect() sites)
+    # keeps the patch small and inherits the exact same resolution the
+    # dispatcher uses for workers — consistency is a feature here.
+    board_override = getattr(args, "board", None)
+    if board_override:
+        try:
+            normed = kb._normalize_board_slug(board_override)
+        except ValueError as exc:
+            print(f"kanban: {exc}", file=sys.stderr)
+            return 2
+        if not normed:
+            print("kanban: --board requires a slug", file=sys.stderr)
+            return 2
+        # Boards other than 'default' must already exist — typoed slugs
+        # would otherwise silently create an empty board.
+        if normed != kb.DEFAULT_BOARD and not kb.board_exists(normed):
+            print(
+                f"kanban: board {normed!r} does not exist. "
+                f"Create it with `hermes kanban boards create {normed}`.",
+                file=sys.stderr,
+            )
+            return 1
+        os.environ["HERMES_KANBAN_BOARD"] = normed
+
+    # Boards management doesn't touch the DB at all — dispatch early so
+    # fresh installs that haven't initialized any DB can still use
+    # `hermes kanban boards create …`.
+    if action == "boards":
+        return _dispatch_boards(args)
+
    # Auto-initialize the DB before dispatching any subcommand. init_db
    # is idempotent, so running it every invocation is cheap (one
    # SELECT against sqlite_master when tables already exist) and
@@ -462,11 +648,16 @@ def kanban_command(args: argparse.Namespace) -> int:
        "ls":       _cmd_list,
        "show":     _cmd_show,
        "assign":   _cmd_assign,
+        "reclaim":  _cmd_reclaim,
+        "reassign": _cmd_reassign,
+        "diagnostics": _cmd_diagnostics,
+        "diag":     _cmd_diagnostics,
        "link":     _cmd_link,
        "unlink":   _cmd_unlink,
        "claim":    _cmd_claim,
        "comment":  _cmd_comment,
        "complete": _cmd_complete,
+        "edit":     _cmd_edit,
        "block":    _cmd_block,
        "unblock":  _cmd_unblock,
        "archive":  _cmd_archive,
@@ -513,6 +704,185 @@ def _profile_author() -> str:
        return "user"


+# ---------------------------------------------------------------------------
+# Boards management (hermes kanban boards …)
+# ---------------------------------------------------------------------------
+
+def _dispatch_boards(args: argparse.Namespace) -> int:
+    """Handle ``hermes kanban boards <action>``.
+
+    Boards management is deliberately separate from the task-level
+    commands: it operates on the filesystem (board directories,
+    ``current`` pointer, ``board.json``), not on the per-board SQLite
+    DB, so a fresh HERMES_HOME that has never called ``kanban init``
+    can still run ``boards create`` / ``boards list``.
+    """
+    sub = getattr(args, "boards_action", None) or "list"
+    if sub in ("list", "ls"):
+        return _cmd_boards_list(args)
+    if sub in ("create", "new"):
+        return _cmd_boards_create(args)
+    if sub in ("rm", "remove", "delete"):
+        return _cmd_boards_rm(args)
+    if sub in ("switch", "use"):
+        return _cmd_boards_switch(args)
+    if sub in ("show", "current"):
+        return _cmd_boards_show(args)
+    if sub == "rename":
+        return _cmd_boards_rename(args)
+    print(f"kanban boards: unknown action {sub!r}", file=sys.stderr)
+    return 2
+
+
+def _board_task_counts(slug: str) -> dict[str, int]:
+    """Return ``{status: count}`` for a board. Safe to call on an empty DB."""
+    try:
+        path = kb.kanban_db_path(board=slug)
+        if not path.exists():
+            return {}
+        with kb.connect(board=slug) as conn:
+            rows = conn.execute(
+                "SELECT status, COUNT(*) AS n FROM tasks GROUP BY status"
+            ).fetchall()
+        return {r["status"]: int(r["n"]) for r in rows}
+    except Exception:
+        return {}
+
+
+def _cmd_boards_list(args: argparse.Namespace) -> int:
+    include_archived = bool(getattr(args, "all", False))
+    boards = kb.list_boards(include_archived=include_archived)
+    # Enrich each entry with task counts + whether it's the current board.
+    current = kb.get_current_board()
+    for b in boards:
+        b["is_current"] = (b["slug"] == current)
+        b["counts"] = _board_task_counts(b["slug"])
+        b["total"] = sum(b["counts"].values())
+    if getattr(args, "json", False):
+        print(json.dumps(boards, indent=2, ensure_ascii=False))
+        return 0
+    # Human table: marker (•) for current, slug, display name, counts.
+    if not boards:
+        print("(no boards — create one with `hermes kanban boards create <slug>`)")
+        return 0
+    print(f"{'':2s}  {'SLUG':24s}  {'NAME':28s}  COUNTS")
+    for b in boards:
+        marker = "●" if b["is_current"] else " "
+        counts = b["counts"] or {}
+        counts_str = (
+            ", ".join(f"{k}={v}" for k, v in sorted(counts.items()))
+            or "(empty)"
+        )
+        name = b.get("name") or ""
+        if b.get("archived"):
+            name += " [archived]"
+        print(f"{marker:2s}  {b['slug']:24s}  {name:28s}  {counts_str}")
+    print()
+    print(f"Current board: {current}")
+    if len(boards) > 1:
+        print("Switch boards with `hermes kanban boards switch <slug>`.")
+    return 0
+
+
+def _cmd_boards_create(args: argparse.Namespace) -> int:
+    try:
+        normed = kb._normalize_board_slug(args.slug)
+    except ValueError as exc:
+        print(f"kanban boards create: {exc}", file=sys.stderr)
+        return 2
+    if not normed:
+        print("kanban boards create: slug is required", file=sys.stderr)
+        return 2
+    already = kb.board_exists(normed) and normed != kb.DEFAULT_BOARD
+    meta = kb.create_board(
+        normed,
+        name=args.name,
+        description=args.description,
+        icon=args.icon,
+        color=args.color,
+    )
+    verb = "already exists" if already else "created"
+    print(f"Board {meta['slug']!r} {verb}.")
+    print(f"  Display name: {meta.get('name', '')}")
+    print(f"  DB path:      {meta['db_path']}")
+    if getattr(args, "switch", False):
+        kb.set_current_board(meta["slug"])
+        print(f"  Switched to {meta['slug']!r}.")
+    else:
+        print(f"  Use `hermes kanban boards switch {meta['slug']}` to make it current.")
+    return 0
+
+
+def _cmd_boards_rm(args: argparse.Namespace) -> int:
+    try:
+        res = kb.remove_board(args.slug, archive=not getattr(args, "delete", False))
+    except ValueError as exc:
+        print(f"kanban boards rm: {exc}", file=sys.stderr)
+        return 1
+    if res["action"] == "archived":
+        print(f"Board {res['slug']!r} archived → {res['new_path']}")
+        print("Recover by moving the directory back to "
+              "<root>/kanban/boards/<slug>/.")
+    else:
+        print(f"Board {res['slug']!r} deleted.")
+    return 0
+
+
+def _cmd_boards_switch(args: argparse.Namespace) -> int:
+    try:
+        normed = kb._normalize_board_slug(args.slug)
+    except ValueError as exc:
+        print(f"kanban boards switch: {exc}", file=sys.stderr)
+        return 2
+    if not normed:
+        print("kanban boards switch: slug is required", file=sys.stderr)
+        return 2
+    if not kb.board_exists(normed):
+        print(
+            f"kanban boards switch: board {normed!r} does not exist. "
+            f"Create it with `hermes kanban boards create {normed}`.",
+            file=sys.stderr,
+        )
+        return 1
+    kb.set_current_board(normed)
+    print(f"Active board is now {normed!r}.")
+    return 0
+
+
+def _cmd_boards_show(args: argparse.Namespace) -> int:
+    current = kb.get_current_board()
+    meta = kb.read_board_metadata(current)
+    counts = _board_task_counts(current)
+    total = sum(counts.values())
+    print(f"Current board: {current}")
+    print(f"  Display name: {meta.get('name', '')}")
+    if meta.get("description"):
+        print(f"  Description:  {meta['description']}")
+    print(f"  DB path:      {meta['db_path']}")
+    print(f"  Tasks:        {total} total"
+          + (f" ({', '.join(f'{k}={v}' for k, v in sorted(counts.items()))})"
+             if counts else ""))
+    return 0
+
+
+def _cmd_boards_rename(args: argparse.Namespace) -> int:
+    try:
+        normed = kb._normalize_board_slug(args.slug)
+    except ValueError as exc:
+        print(f"kanban boards rename: {exc}", file=sys.stderr)
+        return 2
+    if not normed or not kb.board_exists(normed):
+        print(f"kanban boards rename: board {args.slug!r} does not exist",
+              file=sys.stderr)
+        return 1
+    meta = kb.write_board_metadata(normed, name=args.name)
+    print(f"Board {normed!r} renamed to {meta['name']!r}.")
+    return 0
+
+
+# ---------------------------------------------------------------------------
+
+
 def _parse_duration(val) -> Optional[int]:
    """Parse ``30s`` / ``5m`` / ``2h`` / ``1d`` or a raw integer → seconds.

@@ -573,7 +943,12 @@ def _cmd_init(args: argparse.Namespace) -> int:

 def _cmd_heartbeat(args: argparse.Namespace) -> int:
    with kb.connect() as conn:
-        ok = kb.heartbeat_worker(conn, args.task_id, note=getattr(args, "note", None))
+        ok = kb.heartbeat_worker(
+            conn,
+            args.task_id,
+            note=getattr(args, "note", None),
+            expected_run_id=_worker_run_id_for(args.task_id),
+        )
    if not ok:
        print(f"cannot heartbeat {args.task_id} (not running?)", file=sys.stderr)
        return 1
@@ -662,6 +1037,21 @@ def _cmd_list(args: argparse.Namespace) -> int:
    if getattr(args, "json", False):
        print(json.dumps([_task_to_dict(t) for t in tasks], indent=2, ensure_ascii=False))
        return 0
+    # Passive discoverability: when the user has multiple boards, surface
+    # which one they're looking at in the list header. Single-board users
+    # never see this — the feature stays invisible until you opt in.
+    try:
+        all_boards = kb.list_boards(include_archived=False)
+    except Exception:
+        all_boards = []
+    if len(all_boards) > 1:
+        current = kb.get_current_board()
+        other_count = len(all_boards) - 1
+        print(
+            f"Board: {current} "
+            f"({other_count} other board{'s' if other_count != 1 else ''} — "
+            f"`hermes kanban boards list`)\n"
+        )
    if not tasks:
        print("(no matching tasks)")
        return 0
@@ -681,10 +1071,16 @@ def _cmd_show(args: argparse.Namespace) -> int:
        parents = kb.parent_ids(conn, args.task_id)
        children = kb.child_ids(conn, args.task_id)
        runs = kb.list_runs(conn, args.task_id)
+        # Workers hand off via ``task_runs.summary`` (kanban-worker skill);
+        # ``tasks.result`` is left NULL unless the caller explicitly passed
+        # ``result=``. Surfacing the latest summary here keeps ``show`` from
+        # looking like a no-op when the worker actually did real work.
+        latest_summary = kb.latest_summary(conn, args.task_id)

    if getattr(args, "json", False):
        payload = {
            "task": _task_to_dict(task),
+            "latest_summary": latest_summary,
            "parents": parents,
            "children": children,
            "comments": [
@@ -730,6 +1126,31 @@ def _cmd_show(args: argparse.Namespace) -> int:
    if task.skills:
        print(f"  skills:    {', '.join(task.skills)}")
    print(f"  created:   {_fmt_ts(task.created_at)} by {task.created_by or '-'}")
+
+    # Diagnostics section — surface active distress signals at the top
+    # of show output so CLI users see them before scrolling through
+    # comments / runs.
+    from hermes_cli import kanban_diagnostics as kd
+    diags = kd.compute_task_diagnostics(task, events, runs)
+    if diags:
+        sev_marker = {"warning": "⚠", "error": "!!", "critical": "!!!"}
+        print(f"\n  Diagnostics ({len(diags)}):")
+        for d in diags:
+            print(f"    {sev_marker.get(d.severity, '?')} [{d.severity}] {d.title}")
+            if d.data:
+                bits = []
+                for k, v in d.data.items():
+                    if isinstance(v, list):
+                        bits.append(f"{k}={','.join(str(x) for x in v)}")
+                    else:
+                        bits.append(f"{k}={v}")
+                if bits:
+                    print(f"       data: {' | '.join(bits)}")
+            # Only show suggested actions in show output to keep it tight;
+            # full list is available via `kanban diagnostics --task <id>`.
+            for a in d.actions:
+                if a.suggested:
+                    print(f"       → {a.label}")
    if task.started_at:
        print(f"  started:   {_fmt_ts(task.started_at)}")
    if task.completed_at:
@@ -746,6 +1167,13 @@ def _cmd_show(args: argparse.Namespace) -> int:
        print()
        print("Result:")
        print(task.result)
+    elif latest_summary:
+        # Worker handoff lives on the latest run, not on tasks.result.
+        # Surface it at top-level so a glance at ``hermes kanban show <id>``
+        # tells you what the worker did even if tasks.result is empty.
+        print()
+        print("Latest summary:")
+        print(latest_summary)
    if comments:
        print()
        print(f"Comments ({len(comments)}):")
@@ -787,6 +1215,167 @@ def _cmd_assign(args: argparse.Namespace) -> int:
    return 0


+def _cmd_reclaim(args: argparse.Namespace) -> int:
+    with kb.connect() as conn:
+        ok = kb.reclaim_task(
+            conn, args.task_id,
+            reason=getattr(args, "reason", None),
+        )
+    if not ok:
+        print(
+            f"cannot reclaim {args.task_id} (not running or unknown id)",
+            file=sys.stderr,
+        )
+        return 1
+    print(f"Reclaimed {args.task_id}")
+    return 0
+
+
+def _cmd_reassign(args: argparse.Namespace) -> int:
+    profile = None if args.profile.lower() in ("none", "-", "null") else args.profile
+    with kb.connect() as conn:
+        ok = kb.reassign_task(
+            conn, args.task_id, profile,
+            reclaim_first=bool(getattr(args, "reclaim", False)),
+            reason=getattr(args, "reason", None),
+        )
+    if not ok:
+        print(
+            f"cannot reassign {args.task_id} "
+            f"(unknown id, or still running — pass --reclaim to release first)",
+            file=sys.stderr,
+        )
+        return 1
+    print(
+        f"Reassigned {args.task_id} to "
+        f"{profile or '(unassigned)'}"
+        + (" (claim reclaimed)" if getattr(args, "reclaim", False) else "")
+    )
+    return 0
+
+
+def _cmd_diagnostics(args: argparse.Namespace) -> int:
+    """List active diagnostics on the board. Wraps the same rule engine
+    the dashboard uses, so CLI output matches what the UI shows.
+    """
+    from hermes_cli import kanban_diagnostics as kd
+
+    with kb.connect() as conn:
+        # Either one-task mode or fleet mode.
+        if getattr(args, "task", None):
+            task = kb.get_task(conn, args.task)
+            if task is None:
+                print(f"no such task: {args.task}", file=sys.stderr)
+                return 1
+            diags_by_task = {
+                args.task: kd.compute_task_diagnostics(
+                    task,
+                    kb.list_events(conn, args.task),
+                    kb.list_runs(conn, args.task),
+                )
+            }
+        else:
+            # Fleet mode: pull all non-archived tasks + their events/runs.
+            rows = list(conn.execute(
+                "SELECT * FROM tasks WHERE status != 'archived'"
+            ).fetchall())
+            ids = [r["id"] for r in rows]
+            if not ids:
+                diags_by_task = {}
+            else:
+                placeholders = ",".join(["?"] * len(ids))
+                ev_by = {i: [] for i in ids}
+                for row in conn.execute(
+                    f"SELECT * FROM task_events WHERE task_id IN ({placeholders}) ORDER BY id",
+                    tuple(ids),
+                ):
+                    ev_by.setdefault(row["task_id"], []).append(row)
+                run_by = {i: [] for i in ids}
+                for row in conn.execute(
+                    f"SELECT * FROM task_runs WHERE task_id IN ({placeholders}) ORDER BY id",
+                    tuple(ids),
+                ):
+                    run_by.setdefault(row["task_id"], []).append(row)
+                diags_by_task = {}
+                for r in rows:
+                    tid = r["id"]
+                    dl = kd.compute_task_diagnostics(r, ev_by.get(tid, []), run_by.get(tid, []))
+                    if dl:
+                        diags_by_task[tid] = dl
+
+        # Severity filter.
+        sev = getattr(args, "severity", None)
+        if sev:
+            for tid in list(diags_by_task.keys()):
+                kept = [d for d in diags_by_task[tid] if d.severity == sev]
+                if kept:
+                    diags_by_task[tid] = kept
+                else:
+                    del diags_by_task[tid]
+
+        # Map task_id → title/status/assignee for the table output.
+        meta: dict[str, dict] = {}
+        if diags_by_task:
+            placeholders = ",".join(["?"] * len(diags_by_task))
+            for r in conn.execute(
+                f"SELECT id, title, status, assignee FROM tasks WHERE id IN ({placeholders})",
+                tuple(diags_by_task.keys()),
+            ):
+                meta[r["id"]] = {
+                    "title": r["title"], "status": r["status"],
+                    "assignee": r["assignee"],
+                }
+
+    if getattr(args, "json", False):
+        out_json = [
+            {
+                "task_id": tid,
+                **meta.get(tid, {}),
+                "diagnostics": [d.to_dict() for d in dl],
+            }
+            for tid, dl in diags_by_task.items()
+        ]
+        print(json.dumps(out_json, indent=2, ensure_ascii=False))
+        return 0
+
+    if not diags_by_task:
+        print("No active diagnostics on this board.")
+        return 0
+
+    # Human-readable summary: grouped by task, severity-marked, with
+    # suggested actions inline.
+    sev_marker = {"warning": "⚠", "error": "!!", "critical": "!!!"}
+    total = sum(len(dl) for dl in diags_by_task.values())
+    print(
+        f"{total} active diagnostic(s) across "
+        f"{len(diags_by_task)} task(s):\n"
+    )
+    for tid, dl in diags_by_task.items():
+        m = meta.get(tid, {})
+        title = m.get("title") or "(untitled)"
+        status = m.get("status") or "?"
+        assignee = m.get("assignee") or "(unassigned)"
+        print(f"  {tid}  {status:8s}  @{assignee:18s}  {title}")
+        for d in dl:
+            print(f"    {sev_marker.get(d.severity, '?')} [{d.severity}] {d.kind}: {d.title}")
+            if d.data:
+                # Compact key:value pairs on one line.
+                bits = []
+                for k, v in d.data.items():
+                    if isinstance(v, list):
+                        bits.append(f"{k}={','.join(str(x) for x in v)}")
+                    else:
+                        bits.append(f"{k}={v}")
+                if bits:
+                    print(f"       data: {' | '.join(bits)}")
+            # Suggested actions first.
+            for a in d.actions:
+                if a.suggested:
+                    print(f"       → {a.label}")
+        print()
+    return 0
+
+
 def _cmd_link(args: argparse.Namespace) -> int:
    with kb.connect() as conn:
        kb.link_tasks(conn, args.parent_id, args.child_id)
@@ -835,6 +1424,18 @@ def _cmd_comment(args: argparse.Namespace) -> int:
    return 0


+def _worker_run_id_for(task_id: str) -> Optional[int]:
+    if os.environ.get("HERMES_KANBAN_TASK") != task_id:
+        return None
+    raw = os.environ.get("HERMES_KANBAN_RUN_ID")
+    if not raw:
+        return None
+    try:
+        return int(raw)
+    except ValueError:
+        return None
+
+
 def _cmd_complete(args: argparse.Namespace) -> int:
    """Mark one or more tasks done. Supports a single id or a list."""
    ids = list(args.task_ids or [])
@@ -871,6 +1472,7 @@ def _cmd_complete(args: argparse.Namespace) -> int:
                result=args.result,
                summary=summary,
                metadata=metadata,
+                expected_run_id=_worker_run_id_for(tid),
            ):
                failed.append(tid)
                print(f"cannot complete {tid} (unknown id or terminal state)", file=sys.stderr)
@@ -879,6 +1481,34 @@ def _cmd_complete(args: argparse.Namespace) -> int:
    return 0 if not failed else 1


+def _cmd_edit(args: argparse.Namespace) -> int:
+    raw_meta = getattr(args, "metadata", None)
+    metadata = None
+    if raw_meta:
+        try:
+            metadata = json.loads(raw_meta)
+            if not isinstance(metadata, dict):
+                raise ValueError("must be a JSON object")
+        except (ValueError, json.JSONDecodeError) as exc:
+            print(f"kanban: --metadata: {exc}", file=sys.stderr)
+            return 2
+    with kb.connect() as conn:
+        if not kb.edit_completed_task_result(
+            conn,
+            args.task_id,
+            result=args.result,
+            summary=getattr(args, "summary", None),
+            metadata=metadata,
+        ):
+            print(
+                f"cannot edit {args.task_id} (unknown id or task is not done)",
+                file=sys.stderr,
+            )
+            return 1
+    print(f"Edited {args.task_id}")
+    return 0
+
+
 def _cmd_block(args: argparse.Namespace) -> int:
    reason = " ".join(args.reason).strip() if args.reason else None
    author = _profile_author()
@@ -888,7 +1518,12 @@ def _cmd_block(args: argparse.Namespace) -> int:
        for tid in ids:
            if reason:
                kb.add_comment(conn, tid, author, f"BLOCKED: {reason}")
-            if not kb.block_task(conn, tid, reason=reason):
+            if not kb.block_task(
+                conn,
+                tid,
+                reason=reason,
+                expected_run_id=_worker_run_id_for(tid),
+            ):
                failed.append(tid)
                print(f"cannot block {tid}", file=sys.stderr)
            else:
@@ -966,6 +1601,7 @@ def _cmd_dispatch(args: argparse.Namespace) -> int:
                for (tid, who, ws) in res.spawned
            ],
            "skipped_unassigned": res.skipped_unassigned,
+            "skipped_nonspawnable": res.skipped_nonspawnable,
        }, indent=2))
        return 0
    print(f"Reclaimed:    {res.reclaimed}")
@@ -985,6 +1621,11 @@ def _cmd_dispatch(args: argparse.Namespace) -> int:
        print(f"  - {tid}  ->  {who}  @ {ws or '-'}{tag}")
    if res.skipped_unassigned:
        print(f"Skipped (unassigned): {', '.join(res.skipped_unassigned)}")
+    if res.skipped_nonspawnable:
+        print(
+            f"Skipped (non-spawnable assignee — terminal lane, OK): "
+            f"{', '.join(res.skipped_nonspawnable)}"
+        )
    return 0


@@ -1096,16 +1737,18 @@ def _cmd_daemon(args: argparse.Namespace) -> int:
            )

    def _ready_queue_nonempty() -> bool:
-        """Cheap SELECT — just asks whether there's at least one ready
-        task with an assignee that the dispatcher could have picked up."""
+        """Cheap probe — is there at least one ready+assigned+unclaimed
+        task whose assignee maps to a real Hermes profile (i.e. one the
+        dispatcher would actually try to spawn for)?
+
+        Filters out tasks assigned to control-plane lanes
+        (e.g. ``orion-cc``, ``orion-research``) that are pulled by
+        terminals via ``claim_task`` directly — those are correctly idle
+        from the dispatcher's perspective, not stuck.
+        """
        try:
            with kb.connect() as conn:
-                row = conn.execute(
-                    "SELECT 1 FROM tasks "
-                    "WHERE status = 'ready' AND assignee IS NOT NULL "
-                    "    AND claim_lock IS NULL LIMIT 1"
-                ).fetchone()
-                return row is not None
+                return kb.has_spawnable_ready(conn)
        except Exception:
            return False

@@ -0,0 +1,649 @@
+"""Kanban diagnostics — structured, actionable distress signals for tasks.
+
+A ``Diagnostic`` is a machine-readable description of something that's wrong
+with a kanban task: a hallucinated card id, a spawn crash-loop, a task
+stuck blocked for too long, etc. Each one carries:
+
+* A **kind** (canonical code; UI/tests match on this).
+* A **severity** (``warning`` / ``error`` / ``critical``).
+* A **title** (one-line human description) and **detail** (longer text).
+* A list of **suggested actions** — structured entries the dashboard
+  turns into buttons and the CLI turns into hints.
+
+Rules run over (task, recent events, recent runs) and emit diagnostics.
+They are stateless and read-only — no DB writes. Callers compute
+diagnostics on demand (on ``/board`` load, ``/tasks/:id`` fetch, or
+``hermes kanban diagnostics``).
+
+Design goals:
+
+* Fixable-on-the-operator's-side signals only (missing config, phantom
+  ids, crash loop). Not "the provider returned 502 once" — that's a
+  transient runtime blip, not a diagnostic.
+* Recoverable: every diagnostic comes with at least one suggested
+  recovery action the operator can actually take from the UI.
+* Auto-clearing: when the underlying failure mode resolves (a clean
+  ``completed`` event arrives, a spawn succeeds, the task gets
+  unblocked), the diagnostic stops firing. The audit event trail stays.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Callable, Iterable, Optional
+import json
+import time
+
+
+# Severity rungs, ordered least → most urgent. The UI colors them
+# amber (warning), orange (error), red (critical). Sorted outputs put
+# critical first so operators see the worst fires at the top.
+SEVERITY_ORDER = ("warning", "error", "critical")
+
+
+@dataclass
+class DiagnosticAction:
+    """A single recovery action attached to a diagnostic.
+
+    The ``kind`` determines how both the UI and CLI render it:
+
+    * ``reclaim`` / ``reassign`` — POST to the matching /tasks/:id/*
+      endpoint; dashboard wires into the existing recovery popover.
+    * ``unblock`` — PATCH status back to ``ready`` (for stuck-blocked
+      diagnostics).
+    * ``cli_hint`` — print/copy a shell command (e.g.
+      ``hermes -p <profile> auth``). No HTTP side effect.
+    * ``open_docs`` — deep-link to the docs URL named in ``payload.url``.
+    * ``comment`` — nudge the operator to add a comment (for
+      stuck-blocked tasks that need human input).
+
+    ``suggested=True`` marks the action as the recommended first step;
+    the UI highlights it. Multiple actions can be suggested if they're
+    equally valid.
+    """
+
+    kind: str
+    label: str
+    payload: dict = field(default_factory=dict)
+    suggested: bool = False
+
+    def to_dict(self) -> dict:
+        return {
+            "kind": self.kind,
+            "label": self.label,
+            "payload": self.payload,
+            "suggested": self.suggested,
+        }
+
+
+@dataclass
+class Diagnostic:
+    """One active distress signal on a task."""
+
+    kind: str
+    severity: str  # "warning" | "error" | "critical"
+    title: str
+    detail: str
+    actions: list[DiagnosticAction] = field(default_factory=list)
+    first_seen_at: int = 0
+    last_seen_at: int = 0
+    count: int = 1
+    # Optional: the run id this diagnostic is scoped to. None = task-wide.
+    run_id: Optional[int] = None
+    # Optional structured payload for the UI (phantom ids, failure count).
+    data: dict = field(default_factory=dict)
+
+    def to_dict(self) -> dict:
+        return {
+            "kind": self.kind,
+            "severity": self.severity,
+            "title": self.title,
+            "detail": self.detail,
+            "actions": [a.to_dict() for a in self.actions],
+            "first_seen_at": self.first_seen_at,
+            "last_seen_at": self.last_seen_at,
+            "count": self.count,
+            "run_id": self.run_id,
+            "data": self.data,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Rule helpers
+# ---------------------------------------------------------------------------
+
+def _task_field(task, name, default=None):
+    """Read a field from a task regardless of representation.
+
+    Callers pass sqlite3.Row (dict-like with [] but no attribute
+    access), kanban_db.Task dataclasses (attribute access), or plain
+    dicts (both). This normalises them so rule functions don't have
+    to branch on type each time.
+    """
+    if task is None:
+        return default
+    # sqlite Row + plain dicts both support mapping access; Row also
+    # supports .keys().
+    try:
+        # Row raises IndexError if the key isn't a column in the query;
+        # dicts return default via .get. Handle both.
+        if hasattr(task, "keys") and name in task.keys():
+            return task[name]
+    except Exception:
+        pass
+    if isinstance(task, dict):
+        return task.get(name, default)
+    return getattr(task, name, default)
+
+
+def _parse_payload(ev) -> dict:
+    """Tolerate event.payload being either a dict or a JSON string."""
+    p = _task_field(ev, "payload", None)
+    if p is None:
+        return {}
+    if isinstance(p, dict):
+        return p
+    if isinstance(p, str):
+        try:
+            return json.loads(p) or {}
+        except Exception:
+            return {}
+    return {}
+
+
+def _event_kind(ev) -> str:
+    return _task_field(ev, "kind", "") or ""
+
+
+def _event_ts(ev) -> int:
+    t = _task_field(ev, "created_at", 0)
+    return int(t or 0)
+
+
+def _active_hallucination_events(
+    events: Iterable[Any],
+    kind: str,
+) -> list[Any]:
+    """Return events of ``kind`` that have no ``completed``/``edited``
+    event *strictly after* them. Walks chronologically: each clean
+    event resets the accumulator; each matching event gets appended.
+
+    Events must be sorted by id (i.e. arrival order); callers pass the
+    task's full event list which the DB already returns in that order.
+    """
+    # Events arrive sorted by id asc (chronological). Walk once, track
+    # which hallucination events are still "active" (no clean event
+    # supersedes them).
+    active: list[Any] = []
+    for ev in events:
+        k = _event_kind(ev)
+        if k in ("completed", "edited"):
+            active.clear()
+        elif k == kind:
+            active.append(ev)
+    return active
+
+
+def _latest_clean_event_ts(events: Iterable[Any]) -> int:
+    """Timestamp of the most recent clean completion / edit event.
+
+    Kept for general "has this task ever been successfully completed"
+    lookups; hallucination rules use ``_active_hallucination_events``
+    instead because they need strict ordering.
+    """
+    latest = 0
+    for ev in events:
+        if _event_kind(ev) in ("completed", "edited"):
+            t = _event_ts(ev)
+            if t > latest:
+                latest = t
+    return latest
+
+
+# Standard always-available actions. Every diagnostic can offer these as
+# fallbacks regardless of kind — they're the two baseline recovery
+# primitives the kernel supports.
+def _generic_recovery_actions(task: Any, *, running: bool) -> list[DiagnosticAction]:
+    out: list[DiagnosticAction] = []
+    if running:
+        out.append(DiagnosticAction(
+            kind="reclaim",
+            label="Reclaim task",
+            payload={},
+        ))
+    out.append(DiagnosticAction(
+        kind="reassign",
+        label="Reassign to different profile",
+        payload={"reclaim_first": running},
+    ))
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Rule implementations
+# ---------------------------------------------------------------------------
+
+# Each rule takes (task, events, runs, now_ts, config) and returns
+# zero or more Diagnostic instances. ``events`` / ``runs`` are lists of
+# kanban_db.Event / kanban_db.Run (or plain dicts matching the same
+# shape — for test convenience).
+
+RuleFn = Callable[[Any, list[Any], list[Any], int, dict], list[Diagnostic]]
+
+
+def _rule_hallucinated_cards(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Blocked-hallucination gate fires: a worker called kanban_complete
+    with created_cards that didn't exist or weren't created by the
+    completing profile. Task stayed in its prior state; the operator
+    needs to decide how to proceed.
+
+    Auto-clears when a successful completion (or edit) follows the
+    blocked event.
+    """
+    hits = _active_hallucination_events(events, "completion_blocked_hallucination")
+    if not hits:
+        return []
+    phantom_ids: list[str] = []
+    first = _event_ts(hits[0])
+    last = _event_ts(hits[-1])
+    for ev in hits:
+        payload = _parse_payload(ev)
+        for pid in payload.get("phantom_cards", []) or []:
+            if pid not in phantom_ids:
+                phantom_ids.append(pid)
+    running = _task_field(task, "status") == "running"
+    actions: list[DiagnosticAction] = []
+    actions.append(DiagnosticAction(
+        kind="comment",
+        label="Add a comment explaining what to do",
+        suggested=False,
+    ))
+    actions.extend(_generic_recovery_actions(task, running=running))
+    return [Diagnostic(
+        kind="hallucinated_cards",
+        severity="error",
+        title="Worker claimed cards that don't exist",
+        detail=(
+            f"The completing worker declared created_cards that either didn't "
+            f"exist or weren't created by its profile. The completion was "
+            f"blocked and the task stayed in its prior state. "
+            f"Usually means the worker hallucinated ids instead of capturing "
+            f"return values from kanban_create."
+        ),
+        actions=actions,
+        first_seen_at=first,
+        last_seen_at=last,
+        count=len(hits),
+        data={"phantom_ids": phantom_ids},
+    )]
+
+
+def _rule_prose_phantom_refs(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Advisory prose-scan: the completion summary mentions ``t_<hex>``
+    ids that don't resolve. Non-blocking; surfaced as a warning only.
+
+    Auto-clears when a fresh clean completion arrives AFTER the
+    suspected event.
+    """
+    hits = _active_hallucination_events(events, "suspected_hallucinated_references")
+    if not hits:
+        return []
+    phantom_refs: list[str] = []
+    for ev in hits:
+        for pid in _parse_payload(ev).get("phantom_refs", []) or []:
+            if pid not in phantom_refs:
+                phantom_refs.append(pid)
+    running = _task_field(task, "status") == "running"
+    return [Diagnostic(
+        kind="prose_phantom_refs",
+        severity="warning",
+        title="Completion summary references unknown task ids",
+        detail=(
+            "The completion summary mentions task ids that don't resolve "
+            "in this board's database. The completion itself succeeded, "
+            "but downstream consumers parsing the summary may be pointed "
+            "at cards that never existed."
+        ),
+        actions=_generic_recovery_actions(task, running=running),
+        first_seen_at=_event_ts(hits[0]),
+        last_seen_at=_event_ts(hits[-1]),
+        count=len(hits),
+        data={"phantom_refs": phantom_refs},
+    )]
+
+
+def _rule_repeated_failures(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Task's unified ``consecutive_failures`` counter is climbing —
+    something about this task+profile combo is broken and each retry
+    fails the same way. Triggers regardless of the specific failure
+    mode (spawn error, timeout, crash) because operationally they
+    all look the same: the kernel keeps retrying and the operator
+    needs to intervene.
+
+    Threshold: cfg["failure_threshold"] (default 3). A threshold of 3
+    is one below the circuit-breaker's default (5), so the diagnostic
+    surfaces BEFORE the breaker trips — giving operators a window to
+    fix the problem while the dispatcher's still retrying.
+
+    Accepts the legacy ``spawn_failure_threshold`` config key for
+    back-compat.
+    """
+    threshold = int(cfg.get(
+        "failure_threshold",
+        cfg.get("spawn_failure_threshold", 3),
+    ))
+    # Read the new unified counter name, with a fallback to the legacy
+    # column name so this rule keeps working against old DB rows the
+    # caller somehow materialised without running the migration.
+    failures = (
+        _task_field(task, "consecutive_failures", None)
+        if _task_field(task, "consecutive_failures", None) is not None
+        else _task_field(task, "spawn_failures", 0)
+    )
+    if failures is None or failures < threshold:
+        return []
+    last_err = (
+        _task_field(task, "last_failure_error", None)
+        if _task_field(task, "last_failure_error", None) is not None
+        else _task_field(task, "last_spawn_error", None)
+    )
+    assignee = _task_field(task, "assignee")
+
+    # Classify the most recent failure by peeking at run outcomes so
+    # the title + suggested action can be specific without a separate
+    # per-outcome rule.
+    ordered_runs = sorted(runs, key=lambda r: _task_field(r, "id", 0))
+    most_recent_outcome = None
+    for r in reversed(ordered_runs):
+        oc = _task_field(r, "outcome")
+        if oc in ("spawn_failed", "timed_out", "crashed"):
+            most_recent_outcome = oc
+            break
+
+    actions: list[DiagnosticAction] = []
+    if most_recent_outcome == "spawn_failed" and assignee and assignee != "default":
+        # Spawn is failing specifically — profile setup issue.
+        actions.append(DiagnosticAction(
+            kind="cli_hint",
+            label=f"Verify profile: hermes -p {assignee} doctor",
+            payload={"command": f"hermes -p {assignee} doctor"},
+            suggested=True,
+        ))
+        actions.append(DiagnosticAction(
+            kind="cli_hint",
+            label=f"Fix profile auth: hermes -p {assignee} auth",
+            payload={"command": f"hermes -p {assignee} auth"},
+        ))
+    elif most_recent_outcome in ("timed_out", "crashed"):
+        # Worker got off the ground but died. Logs are the right place
+        # to diagnose; reclaim/reassign are the recovery levers.
+        task_id = _task_field(task, "id")
+        if task_id:
+            actions.append(DiagnosticAction(
+                kind="cli_hint",
+                label=f"Check logs: hermes kanban log {task_id}",
+                payload={"command": f"hermes kanban log {task_id}"},
+                suggested=True,
+            ))
+    actions.extend(_generic_recovery_actions(
+        task, running=_task_field(task, "status") == "running",
+    ))
+
+    severity = "critical" if failures >= threshold * 2 else "error"
+    err_text = (last_err or "").strip() if last_err else ""
+    err_snippet = err_text[:500] + ("…" if len(err_text) > 500 else "") if err_text else ""
+    outcome_label = {
+        "spawn_failed": "spawn",
+        "timed_out": "timeout",
+        "crashed": "crash",
+    }.get(most_recent_outcome or "", "failure")
+    if err_snippet:
+        title = f"Agent {outcome_label} x{failures}: {err_snippet.splitlines()[0][:160]}"
+        detail = (
+            f"This task has failed {failures} times in a row "
+            f"(most recent: {outcome_label}). Full last error:\n\n"
+            f"{err_snippet}\n\n"
+            f"The dispatcher will keep retrying until the consecutive-"
+            f"failures counter trips the circuit breaker (default 5), "
+            f"at which point the task auto-blocks. Fix the root cause "
+            f"and reclaim to retry."
+        )
+    else:
+        title = f"Agent {outcome_label} x{failures} (no error recorded)"
+        detail = (
+            f"This task has failed {failures} times in a row "
+            f"(most recent: {outcome_label}) but no error text was "
+            f"captured. Check the suggested command or the worker log."
+        )
+    return [Diagnostic(
+        kind="repeated_failures",
+        severity=severity,
+        title=title,
+        detail=detail,
+        actions=actions,
+        first_seen_at=now,
+        last_seen_at=now,
+        count=failures,
+        data={
+            "consecutive_failures": failures,
+            "most_recent_outcome": most_recent_outcome,
+            "last_error": last_err,
+        },
+    )]
+
+
+def _rule_repeated_crashes(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """The worker spawns fine but keeps crashing mid-run. Check the last
+    N runs' outcomes; N consecutive ``crashed`` without a successful
+    ``completed`` means something about the task + profile combo is
+    broken (OOM, missing dependency, tool it needs is down).
+
+    Threshold: cfg["crash_threshold"] (default 2).
+
+    Narrower than ``repeated_failures`` — fires earlier (2 crashes vs 3
+    total failures) so the operator gets a crash-specific heads-up
+    before the unified rule kicks in. Suppresses itself when the
+    unified rule is also about to fire, to avoid double-flagging.
+    """
+    failure_threshold = int(cfg.get(
+        "failure_threshold",
+        cfg.get("spawn_failure_threshold", 3),
+    ))
+    unified_counter = (
+        _task_field(task, "consecutive_failures", 0) or 0
+    )
+    # Unified rule will catch this — let it handle to avoid double fire.
+    if unified_counter >= failure_threshold:
+        return []
+
+    threshold = int(cfg.get("crash_threshold", 2))
+    ordered = sorted(runs, key=lambda r: _task_field(r, "id", 0))
+    # Count trailing consecutive 'crashed' outcomes.
+    consecutive = 0
+    last_err = None
+    for r in reversed(ordered):
+        outcome = _task_field(r, "outcome")
+        if outcome == "crashed":
+            consecutive += 1
+            if last_err is None:
+                last_err = _task_field(r, "error")
+        elif outcome in ("completed", "reclaimed"):
+            # A success (or manual reclaim) breaks the streak.
+            break
+        else:
+            # Other outcomes (timed_out, blocked, spawn_failed, gave_up)
+            # aren't crash signals — don't count them, but they also
+            # don't break the crash streak.
+            continue
+    if consecutive < threshold:
+        return []
+    task_id = _task_field(task, "id")
+    actions: list[DiagnosticAction] = []
+    if task_id:
+        actions.append(DiagnosticAction(
+            kind="cli_hint",
+            label=f"Check logs: hermes kanban log {task_id}",
+            payload={"command": f"hermes kanban log {task_id}"},
+            suggested=True,
+        ))
+    running = _task_field(task, "status") == "running"
+    actions.extend(_generic_recovery_actions(task, running=running))
+    severity = "critical" if consecutive >= threshold * 2 else "error"
+    # Put the actual error up-front so operators see WHAT broke without
+    # having to open the logs. Truncate defensively — these can be huge
+    # (full tracebacks).
+    err_text = (last_err or "").strip() if last_err else ""
+    err_snippet = err_text[:500] + ("…" if len(err_text) > 500 else "") if err_text else ""
+    if err_snippet:
+        title = f"Agent crashed {consecutive}x: {err_snippet.splitlines()[0][:160]}"
+        detail = (
+            f"The last {consecutive} runs ended with outcome=crashed. "
+            f"Full last error:\n\n{err_snippet}"
+        )
+    else:
+        title = f"Agent crashed {consecutive}x (no error recorded)"
+        detail = (
+            f"The last {consecutive} runs ended with outcome=crashed but "
+            f"no error text was captured. Check the worker log for more."
+        )
+    return [Diagnostic(
+        kind="repeated_crashes",
+        severity=severity,
+        title=title,
+        detail=detail,
+        actions=actions,
+        first_seen_at=now,
+        last_seen_at=now,
+        count=consecutive,
+        data={"consecutive_crashes": consecutive, "last_error": last_err},
+    )]
+
+
+def _rule_stuck_in_blocked(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Task has been in ``blocked`` status for too long without a comment.
+
+    Threshold: cfg["blocked_stale_hours"] (default 24).
+    Surfaced as a warning so humans know there's a pending unblock.
+    """
+    hours = float(cfg.get("blocked_stale_hours", 24))
+    status = _task_field(task, "status")
+    if status != "blocked":
+        return []
+    # Find the most recent ``blocked`` event.
+    last_blocked_ts = 0
+    for ev in events:
+        if _event_kind(ev) == "blocked":
+            t = _event_ts(ev)
+            if t > last_blocked_ts:
+                last_blocked_ts = t
+    if last_blocked_ts == 0:
+        return []
+    age_hours = (now - last_blocked_ts) / 3600.0
+    if age_hours < hours:
+        return []
+    # Any comment / unblock after the block breaks the "stale" signal.
+    for ev in events:
+        if _event_kind(ev) in ("commented", "unblocked") and _event_ts(ev) > last_blocked_ts:
+            return []
+    actions: list[DiagnosticAction] = [
+        DiagnosticAction(
+            kind="comment",
+            label="Add a comment / unblock the task",
+            suggested=True,
+        ),
+    ]
+    return [Diagnostic(
+        kind="stuck_in_blocked",
+        severity="warning",
+        title=f"Task has been blocked for {int(age_hours)}h",
+        detail=(
+            f"This task transitioned to blocked {int(age_hours)}h ago and "
+            f"has had no comments or unblock attempts since. Blocked tasks "
+            f"are waiting for human input — check the block reason and "
+            f"either unblock with feedback or answer with a comment."
+        ),
+        actions=actions,
+        first_seen_at=last_blocked_ts,
+        last_seen_at=last_blocked_ts,
+        count=1,
+        data={"blocked_at": last_blocked_ts, "age_hours": round(age_hours, 1)},
+    )]
+
+
+# Registry — order matters: rules higher on the list render first when
+# severity ties. Add new rules here.
+_RULES: list[RuleFn] = [
+    _rule_hallucinated_cards,
+    _rule_prose_phantom_refs,
+    _rule_repeated_failures,
+    _rule_repeated_crashes,
+    _rule_stuck_in_blocked,
+]
+
+
+# Known kinds (for the UI's filter / legend / i18n keys). Update when
+# rules are added.
+DIAGNOSTIC_KINDS = (
+    "hallucinated_cards",
+    "prose_phantom_refs",
+    "repeated_failures",
+    "repeated_crashes",
+    "stuck_in_blocked",
+)
+
+
+DEFAULT_CONFIG = {
+    "failure_threshold": 3,
+    # Legacy alias accepted at read time by _rule_repeated_failures.
+    "spawn_failure_threshold": 3,
+    "crash_threshold": 2,
+    "blocked_stale_hours": 24,
+}
+
+
+def compute_task_diagnostics(
+    task,
+    events: list,
+    runs: list,
+    *,
+    now: Optional[int] = None,
+    config: Optional[dict] = None,
+) -> list[Diagnostic]:
+    """Run every rule against a single task's state and return a
+    severity-sorted list of active diagnostics.
+
+    Sorting: critical first, then error, then warning; ties broken by
+    most-recent ``last_seen_at``.
+    """
+    now_ts = int(now if now is not None else time.time())
+    cfg = {**DEFAULT_CONFIG, **(config or {})}
+    out: list[Diagnostic] = []
+    for rule in _RULES:
+        try:
+            out.extend(rule(task, events, runs, now_ts, cfg))
+        except Exception:
+            # A broken rule must never crash the dashboard. Rule bugs
+            # get caught in tests; in production we'd rather drop the
+            # diagnostic than 500 a whole /board request.
+            continue
+    severity_idx = {s: i for i, s in enumerate(SEVERITY_ORDER)}
+    out.sort(
+        key=lambda d: (
+            -severity_idx.get(d.severity, -1),
+            -(d.last_seen_at or 0),
+        )
+    )
+    return out
+
+
+def severity_of_highest(diagnostics: Iterable[Diagnostic]) -> Optional[str]:
+    """Highest severity present in the list, or None if empty. Useful
+    for card badges that need a single color."""
+    highest_idx = -1
+    highest = None
+    for d in diagnostics:
+        idx = SEVERITY_ORDER.index(d.severity) if d.severity in SEVERITY_ORDER else -1
+        if idx > highest_idx:
+            highest_idx = idx
+            highest = d.severity
+    return highest
@@ -393,14 +393,21 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
    if provider in _AGGREGATOR_PROVIDERS:
        return _prepend_vendor(name)

-    # --- OpenCode Zen: Claude stays hyphenated; other models keep dots ---
-    if provider == "opencode-zen":
-        bare = _strip_matching_provider_prefix(name, provider)
-        if "/" in bare:
-            return bare
-        if bare.lower().startswith("claude-"):
-            return _dots_to_hyphens(bare)
-        return bare
+    # --- OpenCode Zen / OpenCode Go: flat-namespace resellers.
+    #     Their /v1/models API returns bare IDs only (no vendor prefix), and
+    #     the inference endpoint rejects vendor-prefixed names with HTTP 401
+    #     "Model not supported".  Strip ANY leading ``vendor/`` so config
+    #     entries like ``minimax/minimax-m2.7`` or ``deepseek/deepseek-v4-flash``
+    #     — commonly copied from aggregator slugs into fallback_model lists —
+    #     resolve to bare ``minimax-m2.7`` / ``deepseek-v4-flash`` the API
+    #     actually serves.  See PR reviewing opencode-go fallback 401s. ---
+    if provider in {"opencode-zen", "opencode-go"}:
+        if "/" in name:
+            _, bare_after_slash = name.split("/", 1)
+            name = bare_after_slash.strip() or name
+        if provider == "opencode-zen" and name.lower().startswith("claude-"):
+            return _dots_to_hyphens(name)
+        return name

    # --- Anthropic: strip matching provider prefix, dots -> hyphens ---
    if provider in _DOT_TO_HYPHEN_PROVIDERS:
@@ -190,11 +190,18 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:
            model: "minimax-m2.7"
            provider: custom
            base_url: "https://ollama.com/v1"
+
+    Also reads ``model.aliases`` (set by ``hermes config set model.aliases.xxx``)
+    and converts simple string entries (``ds-flash: deepseek/deepseek-v4-flash``)
+    into DirectAlias objects.  The provider is parsed from the ``provider/``
+    prefix in the value; if no slash, the current provider is used.
    """
    merged = dict(_BUILTIN_DIRECT_ALIASES)
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
+
+        # --- model_aliases (dict-based format) ---
        user_aliases = cfg.get("model_aliases")
        if isinstance(user_aliases, dict):
            for name, entry in user_aliases.items():
@@ -207,6 +214,30 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:
                    merged[name.strip().lower()] = DirectAlias(
                        model=model, provider=provider, base_url=base_url,
                    )
+
+        # --- model.aliases (string-based format, from config set) ---
+        model_section = cfg.get("model", {})
+        if isinstance(model_section, dict):
+            simple_aliases = model_section.get("aliases")
+            if isinstance(simple_aliases, dict):
+                current_provider = model_section.get("provider", "")
+                for name, value in simple_aliases.items():
+                    if not isinstance(value, str) or not value.strip():
+                        continue
+                    key = name.strip().lower()
+                    if key in merged:
+                        continue  # don't override explicit model_aliases entries
+                    val = value.strip()
+                    if "/" in val:
+                        provider, model = val.split("/", 1)
+                    else:
+                        provider = current_provider
+                        model = val
+                    merged[key] = DirectAlias(
+                        model=model.strip(),
+                        provider=provider.strip() or current_provider,
+                        base_url="",
+                    )
    except Exception:
        pass
    return merged
@@ -768,6 +799,12 @@ def switch_model(
                        )

        # --- Step d: Aggregator catalog search ---
+        # Track whether the live catalog of the CURRENT provider resolved the
+        # model — if so, step e must not second-guess and switch providers.
+        # Critical for flat-namespace resellers like opencode-go / opencode-zen
+        # whose live /v1/models returns bare IDs (e.g. "deepseek-v4-flash") that
+        # coincidentally match entries in native providers' static catalogs.
+        resolved_in_current_catalog = False
        if is_aggregator(target_provider) and not resolved_alias:
            catalog = list_provider_models(target_provider)
            if catalog:
@@ -775,6 +812,7 @@ def switch_model(
                for mid in catalog:
                    if mid.lower() == new_model_lower:
                        new_model = mid
+                        resolved_in_current_catalog = True
                        break
                else:
                    for mid in catalog:
@@ -782,6 +820,7 @@ def switch_model(
                            _, bare = mid.split("/", 1)
                            if bare.lower() == new_model_lower:
                                new_model = mid
+                                resolved_in_current_catalog = True
                                break

        # --- Step e: detect_provider_for_model() as last resort ---
@@ -794,6 +833,7 @@ def switch_model(
            target_provider == current_provider
            and not is_custom
            and not resolved_alias
+            and not resolved_in_current_catalog
        ):
            detected = detect_provider_for_model(new_model, current_provider)
            if detected:
@@ -1264,11 +1304,7 @@ def list_authenticated_providers(
                from hermes_cli.auth import _load_auth_store
                store = _load_auth_store()
                providers_store = store.get("providers", {})
-                pool_store = store.get("credential_pool", {})
-                if store and (
-                    pid in providers_store or hermes_slug in providers_store
-                    or pid in pool_store or hermes_slug in pool_store
-                ):
+                if store and (pid in providers_store or hermes_slug in providers_store):
                    has_creds = True
            except Exception as exc:
                logger.debug("Auth store check failed for %s: %s", pid, exc)
@@ -1364,11 +1400,7 @@ def list_authenticated_providers(
                from hermes_cli.auth import _load_auth_store
                _cp_store = _load_auth_store()
                _cp_providers_store = _cp_store.get("providers", {})
-                _cp_pool_store = _cp_store.get("credential_pool", {})
-                if _cp_store and (
-                    _cp.slug in _cp_providers_store
-                    or _cp.slug in _cp_pool_store
-                ):
+                if _cp_store and _cp.slug in _cp_providers_store:
                    _cp_has_creds = True
            except Exception:
                pass
@@ -1660,3 +1692,63 @@ def list_authenticated_providers(
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))

    return results
+
+
+def list_picker_providers(
+    current_provider: str = "",
+    current_base_url: str = "",
+    user_providers: dict = None,
+    custom_providers: list | None = None,
+    max_models: int = 8,
+    current_model: str = "",
+) -> List[dict]:
+    """Interactive-picker variant of :func:`list_authenticated_providers`.
+
+    Post-processes the base list so the ``/model`` picker (Telegram/Discord
+    inline keyboards) only surfaces models that are actually callable in the
+    current install:
+
+    - OpenRouter's model list is replaced with the output of
+      :func:`hermes_cli.models.fetch_openrouter_models`, which filters the
+      curated ``OPENROUTER_MODELS`` snapshot against the live OpenRouter
+      catalog.  IDs the live catalog no longer carries drop out, so the
+      picker never offers a model the user can't call.
+    - Provider rows whose model list ends up empty are dropped, except
+      custom endpoints (``is_user_defined=True`` with an ``api_url``) where
+      the user may supply their own model set through config.
+
+    All other providers and metadata fields are passed through unchanged.
+    The typed ``/model <name>`` path is unaffected -- only the interactive
+    picker payload is narrowed.
+    """
+    from hermes_cli.models import fetch_openrouter_models
+
+    providers = list_authenticated_providers(
+        current_provider=current_provider,
+        current_base_url=current_base_url,
+        user_providers=user_providers,
+        custom_providers=custom_providers,
+        max_models=max_models,
+        current_model=current_model,
+    )
+
+    filtered: List[dict] = []
+    for p in providers:
+        slug = str(p.get("slug", "")).lower()
+        if slug == "openrouter":
+            try:
+                live = fetch_openrouter_models()
+                live_ids = [mid for mid, _ in live]
+            except Exception:
+                live_ids = list(p.get("models", []))
+            p = dict(p)
+            p["models"] = live_ids[:max_models]
+            p["total_models"] = len(live_ids)
+
+        has_models = bool(p.get("models"))
+        is_custom_endpoint = bool(p.get("is_user_defined")) and bool(p.get("api_url"))
+        if not has_models and not is_custom_endpoint:
+            continue
+        filtered.append(p)
+
+    return filtered
@@ -61,12 +61,14 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("z-ai/glm-5v-turbo",               ""),
    ("z-ai/glm-5-turbo",                ""),
    ("x-ai/grok-4.20",                  ""),
+    ("x-ai/grok-4.3",                   ""),
    ("nvidia/nemotron-3-super-120b-a12b",      ""),
    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
    ("arcee-ai/trinity-large-preview:free", "free"),
    ("arcee-ai/trinity-large-thinking",  ""),
    ("openai/gpt-5.5-pro",              ""),
    ("openai/gpt-5.4-nano",             ""),
+    ("deepseek/deepseek-v4-pro",        ""),
 ]

 _openrouter_catalog_cache: list[tuple[str, str]] | None = None
@@ -181,10 +183,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "z-ai/glm-5v-turbo",
        "z-ai/glm-5-turbo",
        "x-ai/grok-4.20-beta",
+        "x-ai/grok-4.3",
        "nvidia/nemotron-3-super-120b-a12b",
        "arcee-ai/trinity-large-thinking",
        "openai/gpt-5.5-pro",
        "openai/gpt-5.4-nano",
+        "deepseek/deepseek-v4-pro",
    ],
    # Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
    # provider_model_ids fallback when /v1/models is unavailable.
@@ -806,6 +810,25 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway"),
 ]

+# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
+# that is not already in the list above.  Adding plugins/model-providers/<name>/
+# is sufficient to expose a new provider in the model picker, /model, and all
+# downstream consumers — no edits to this file needed.
+_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
+try:
+    from providers import list_providers as _list_providers_for_canonical
+    for _pp in _list_providers_for_canonical():
+        if _pp.name in _canonical_slugs:
+            continue
+        if _pp.auth_type in ("oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"):
+            continue  # non-api-key flows need bespoke picker UX; skip auto-inject
+        _label = _pp.display_name or _pp.name
+        _desc = _pp.description or f"{_label} (direct API)"
+        CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
+        _canonical_slugs.add(_pp.name)
+except Exception:
+    pass
+
 # Derived dicts — used throughout the codebase
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider
@@ -1740,10 +1763,20 @@ def model_supports_fast_mode(model_id: Optional[str]) -> bool:


 def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
-    """Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
+    """Return True if the model is a Claude model eligible for Anthropic Fast Mode.
+
+    Fast mode is currently supported on Claude Opus 4.6 only. Per Anthropic's
+    docs (https://platform.claude.com/docs/en/build-with-claude/fast-mode):
+    "Fast mode is currently supported on Opus 4.6 only. Sending speed: fast
+    with an unsupported model returns an error." Opus 4.7 explicitly rejects
+    the ``speed`` parameter with HTTP 400.
+    """
    raw = _strip_vendor_prefix(str(model_id or ""))
    base = raw.split(":")[0]
-    return base.startswith("claude-")
+    if not base.startswith("claude-"):
+        return False
+    # Only Opus 4.6 supports fast mode at present.
+    return "opus-4-6" in base or "opus-4.6" in base


 def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
@@ -2013,6 +2046,34 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                return ids
        except Exception:
            pass
+
+    # ── Profile-based generic live fetch (all simple api-key providers) ──
+    # Handles any provider registered in providers/ with auth_type="api_key".
+    # Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.).
+    try:
+        from providers import get_provider_profile
+        from hermes_cli.auth import resolve_api_key_provider_credentials
+
+        _p = get_provider_profile(normalized)
+        if _p and _p.auth_type == "api_key" and _p.base_url:
+            try:
+                creds = resolve_api_key_provider_credentials(normalized)
+                api_key = str(creds.get("api_key") or "").strip()
+                base_url = str(creds.get("base_url") or "").strip()
+            except Exception:
+                api_key, base_url = "", _p.base_url
+            if not base_url:
+                base_url = _p.base_url
+            if api_key:
+                live = _p.fetch_models(api_key=api_key)
+                if live:
+                    return live
+            # Use profile's fallback_models if defined
+            if _p.fallback_models:
+                return list(_p.fallback_models)
+    except Exception:
+        pass
+
    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
    if normalized in _MODELS_DEV_PREFERRED:
        return _merge_with_models_dev(normalized, curated_static)
@@ -2896,6 +2957,19 @@ def fetch_api_models(
 _OLLAMA_CLOUD_CACHE_TTL = 3600  # 1 hour


+def _strip_ollama_cloud_suffix(model_id: str) -> str:
+    """Strip :cloud / -cloud suffixes that models.dev appends to Ollama Cloud IDs.
+
+    The live API uses clean IDs (e.g. 'kimi-k2.6') while models.dev sometimes
+    returns them as 'kimi-k2.6:cloud'. Normalising before the dedup merge
+    prevents duplicate entries in the merged model list.
+    """
+    for suffix in (":cloud", "-cloud"):
+        if model_id.endswith(suffix):
+            return model_id[: -len(suffix)]
+    return model_id
+
+
 def _ollama_cloud_cache_path() -> Path:
    """Return the path for the Ollama Cloud model cache."""
    from hermes_constants import get_hermes_home
@@ -2991,9 +3065,10 @@ def fetch_ollama_cloud_models(
                seen.add(m)
                merged.append(m)
        for m in mdev_models:
-            if m and m not in seen:
-                seen.add(m)
-                merged.append(m)
+            normalized = _strip_ollama_cloud_suffix(m)
+            if normalized and normalized not in seen:
+                seen.add(normalized)
+                merged.append(normalized)
        if merged:
            _save_ollama_cloud_cache(merged)
            return merged
@@ -3185,11 +3260,12 @@ def validate_requested_model(
            if suggestions:
                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
            return {
-                "accepted": False,
-                "persist": False,
+                "accepted": True,
+                "persist": True,
                "recognized": False,
                "message": (
-                    f"Model `{requested}` was not found in the OpenAI Codex model listing."
+                    f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
+                    "It may still work if your ChatGPT/Codex account has access to a newer or hidden model ID."
                    f"{suggestion_text}"
                ),
            }
@@ -255,6 +255,10 @@ def get_nous_subscription_features(
    terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}

    web_backend = str(web_cfg.get("backend") or "").strip().lower()
+    # Per-capability overrides: if set, they determine which backend is active for
+    # search/extract independently of web.backend.
+    web_search_backend = str(web_cfg.get("search_backend") or "").strip().lower()
+    web_extract_backend = str(web_cfg.get("extract_backend") or "").strip().lower()
    tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
    browser_provider_explicit = "cloud_provider" in browser_cfg
    browser_provider = normalize_browser_cloud_provider(
@@ -280,6 +284,7 @@ def get_nous_subscription_features(
    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
    direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
+    direct_searxng = bool(get_env_value("SEARXNG_URL"))
    direct_fal = fal_key_is_configured()
    direct_openai_tts = bool(resolve_openai_audio_api_key())
    direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
@@ -323,10 +328,18 @@ def get_nous_subscription_features(
            or (web_backend == "firecrawl" and direct_firecrawl)
            or (web_backend == "parallel" and direct_parallel)
            or (web_backend == "tavily" and direct_tavily)
+            or (web_backend == "searxng" and direct_searxng)
+            # Per-capability overrides: search_backend or extract_backend may be set
+            # without web.backend (using the new split config from #20061)
+            or (web_search_backend == "searxng" and direct_searxng)
+            or (web_search_backend == "exa" and direct_exa)
+            or (web_search_backend == "firecrawl" and direct_firecrawl)
+            or (web_search_backend == "parallel" and direct_parallel)
+            or (web_search_backend == "tavily" and direct_tavily)
        )
    )
    web_available = bool(
-        managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily
+        managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily or direct_searxng
    )

    image_managed = image_tool_enabled and managed_image_available and not direct_fal
@@ -412,8 +425,8 @@ def get_nous_subscription_features(
            managed_by_nous=web_managed,
            direct_override=web_active and not web_managed,
            toolset_enabled=web_tool_enabled,
-            current_provider=web_backend or "",
-            explicit_configured=bool(web_backend),
+            current_provider=web_backend or web_search_backend or "",
+            explicit_configured=bool(web_backend or web_search_backend),
        ),
        "image_gen": NousFeatureState(
            key="image_gen",
@@ -173,7 +173,7 @@ def _get_enabled_plugins() -> Optional[set]:
 # Data classes
 # ---------------------------------------------------------------------------

-_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive", "platform"}
+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive", "platform", "model-provider"}


@dataclass
@@ -643,15 +643,17 @@ class PluginManager:
        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
        #
-        # ``memory/`` and ``context_engine/`` are skipped at the top level —
-        # they have their own discovery systems. ``platforms/`` is a category
-        # holding platform adapters (scanned one level deeper below).
+        # ``memory/``, ``context_engine/``, and ``model-providers/`` are
+        # skipped at the top level — they have their own discovery systems
+        # (plugins/memory/__init__.py, providers/__init__.py). ``platforms/``
+        # is a category holding platform adapters (scanned one level deeper
+        # below).
        repo_plugins = get_bundled_plugins_dir()
        manifests.extend(
            self._scan_directory(
                repo_plugins,
                source="bundled",
-                skip_names={"memory", "context_engine", "platforms"},
+                skip_names={"memory", "context_engine", "platforms", "model-providers"},
            )
        )
        manifests.extend(
@@ -709,6 +711,21 @@ class PluginManager:
                )
                continue

+            # Model provider plugins are loaded by providers/__init__.py
+            # (its own lazy discovery keyed off first get_provider_profile()
+            # call). We record the manifest here for introspection but do
+            # not import the module — a second import would create two
+            # ProviderProfile instances and break the "last writer wins"
+            # override semantics between bundled and user plugins.
+            if manifest.kind == "model-provider":
+                loaded = LoadedPlugin(manifest=manifest, enabled=True)
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (model-provider, handled by providers/ discovery)",
+                    lookup_key,
+                )
+                continue
+
            # Built-in backends auto-load — they ship with hermes and must
            # just work. Selection among them (e.g. which image_gen backend
            # services calls) is driven by ``<category>.provider`` config,
@@ -886,6 +903,19 @@ class PluginManager:
                                "treating as kind='exclusive'",
                                key,
                            )
+                        elif (
+                            "register_provider" in source_text
+                            and "ProviderProfile" in source_text
+                        ):
+                            # Model provider plugin (calls register_provider()
+                            # from ``providers`` with a ProviderProfile). Route
+                            # to providers/__init__.py discovery.
+                            kind = "model-provider"
+                            logger.debug(
+                                "Plugin %s: detected model provider, "
+                                "treating as kind='model-provider'",
+                                key,
+                            )
                    except Exception:
                        pass

@@ -179,8 +179,33 @@ def _get_wrapper_dir() -> Path:
 # Validation
 # ---------------------------------------------------------------------------

+def normalize_profile_name(name: str) -> str:
+    """Return the canonical profile id used on disk and in CLI ``-p`` argv.
+
+    Named profiles are stored lowercase under ``profiles/<id>/``. The special
+    alias ``default`` is matched case-insensitively (``Default`` → ``default``).
+    Dashboards and tools may pass title-cased display labels; normalize before
+    validation, assignment, and subprocess spawn (see issue #18498).
+    """
+    if not isinstance(name, str):
+        name = str(name)
+    stripped = name.strip()
+    if not stripped:
+        raise ValueError("profile name cannot be empty")
+    if stripped.casefold() == "default":
+        return "default"
+    return stripped.lower()
+
+
 def validate_profile_name(name: str) -> None:
-    """Raise ``ValueError`` if *name* is not a valid profile identifier."""
+    """Raise ``ValueError`` if *name* is not a valid profile identifier.
+
+    Validates the input as-given — strict lowercase match. Callers that accept
+    mixed-case or title-cased input from users (dashboard UI, CLI args) should
+    call :func:`normalize_profile_name` first. This separation keeps validate
+    honest about what the on-disk directory name must look like, while
+    ingress-point normalization handles UX flexibility (see #18498).
+    """
    if name == "default":
        return  # special alias for ~/.hermes
    if not _PROFILE_ID_RE.match(name):
@@ -192,16 +217,18 @@ def validate_profile_name(name: str) -> None:

 def get_profile_dir(name: str) -> Path:
    """Resolve a profile name to its HERMES_HOME directory."""
-    if name == "default":
+    canon = normalize_profile_name(name)
+    if canon == "default":
        return _get_default_hermes_home()
-    return _get_profiles_root() / name
+    return _get_profiles_root() / canon


 def profile_exists(name: str) -> bool:
    """Check whether a profile directory exists."""
-    if name == "default":
+    canon = normalize_profile_name(name)
+    if canon == "default":
        return True
-    return get_profile_dir(name).is_dir()
+    return get_profile_dir(canon).is_dir()


 # ---------------------------------------------------------------------------
@@ -213,28 +240,29 @@ def check_alias_collision(name: str) -> Optional[str]:

    Checks: reserved names, hermes subcommands, existing binaries in PATH.
    """
-    if name in _RESERVED_NAMES:
-        return f"'{name}' is a reserved name"
-    if name in _HERMES_SUBCOMMANDS:
-        return f"'{name}' conflicts with a hermes subcommand"
+    canon = normalize_profile_name(name)
+    if canon in _RESERVED_NAMES:
+        return f"'{canon}' is a reserved name"
+    if canon in _HERMES_SUBCOMMANDS:
+        return f"'{canon}' conflicts with a hermes subcommand"

    # Check existing commands in PATH
    wrapper_dir = _get_wrapper_dir()
    try:
        result = subprocess.run(
-            ["which", name], capture_output=True, text=True, timeout=5,
+            ["which", canon], capture_output=True, text=True, timeout=5,
        )
        if result.returncode == 0:
            existing_path = result.stdout.strip()
            # Allow overwriting our own wrappers
-            if existing_path == str(wrapper_dir / name):
+            if existing_path == str(wrapper_dir / canon):
                try:
-                    content = (wrapper_dir / name).read_text()
+                    content = (wrapper_dir / canon).read_text()
                    if "hermes -p" in content:
                        return None  # it's our wrapper, safe to overwrite
                except Exception:
                    pass
-            return f"'{name}' conflicts with an existing command ({existing_path})"
+            return f"'{canon}' conflicts with an existing command ({existing_path})"
    except (FileNotFoundError, subprocess.TimeoutExpired):
        pass

@@ -252,6 +280,7 @@ def create_wrapper_script(name: str) -> Optional[Path]:

    Returns the path to the created wrapper, or None if creation failed.
    """
+    canon = normalize_profile_name(name)
    wrapper_dir = _get_wrapper_dir()
    try:
        wrapper_dir.mkdir(parents=True, exist_ok=True)
@@ -259,9 +288,9 @@ def create_wrapper_script(name: str) -> Optional[Path]:
        print(f"⚠ Could not create {wrapper_dir}: {e}")
        return None

-    wrapper_path = wrapper_dir / name
+    wrapper_path = wrapper_dir / canon
    try:
-        wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {name} "$@"\n')
+        wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {canon} "$@"\n')
        wrapper_path.chmod(wrapper_path.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
        return wrapper_path
    except OSError as e:
@@ -271,7 +300,7 @@ def create_wrapper_script(name: str) -> Optional[Path]:

 def remove_wrapper_script(name: str) -> bool:
    """Remove the wrapper script for a profile. Returns True if removed."""
-    wrapper_path = _get_wrapper_dir() / name
+    wrapper_path = _get_wrapper_dir() / normalize_profile_name(name)
    if wrapper_path.exists():
        try:
            # Verify it's our wrapper before removing
@@ -421,16 +450,17 @@ def create_profile(
    Path
        The newly created profile directory.
    """
-    validate_profile_name(name)
+    canon = normalize_profile_name(name)
+    validate_profile_name(canon)

-    if name == "default":
+    if canon == "default":
        raise ValueError(
            "Cannot create a profile named 'default' — it is the built-in profile (~/.hermes)."
        )

-    profile_dir = get_profile_dir(name)
+    profile_dir = get_profile_dir(canon)
    if profile_dir.exists():
-        raise FileExistsError(f"Profile '{name}' already exists at {profile_dir}")
+        raise FileExistsError(f"Profile '{canon}' already exists at {profile_dir}")

    # Resolve clone source
    source_dir = None
@@ -440,6 +470,7 @@ def create_profile(
            from hermes_constants import get_hermes_home
            source_dir = get_hermes_home()
        else:
+            clone_from = normalize_profile_name(clone_from)
            validate_profile_name(clone_from)
            source_dir = get_profile_dir(clone_from)
        if not source_dir.is_dir():
@@ -540,24 +571,25 @@ def delete_profile(name: str, yes: bool = False) -> Path:

    Returns the path that was removed.
    """
-    validate_profile_name(name)
+    canon = normalize_profile_name(name)
+    validate_profile_name(canon)

-    if name == "default":
+    if canon == "default":
        raise ValueError(
            "Cannot delete the default profile (~/.hermes).\n"
            "To remove everything, use: hermes uninstall"
        )

-    profile_dir = get_profile_dir(name)
+    profile_dir = get_profile_dir(canon)
    if not profile_dir.is_dir():
-        raise FileNotFoundError(f"Profile '{name}' does not exist.")
+        raise FileNotFoundError(f"Profile '{canon}' does not exist.")

    # Show what will be deleted
    model, provider = _read_config_model(profile_dir)
    gw_running = _check_gateway_running(profile_dir)
    skill_count = _count_skills(profile_dir)

-    print(f"\nProfile: {name}")
+    print(f"\nProfile: {canon}")
    print(f"Path:    {profile_dir}")
    if model:
        print(f"Model:   {model}" + (f" ({provider})" if provider else ""))
@@ -569,7 +601,7 @@ def delete_profile(name: str, yes: bool = False) -> Path:
    ]

    # Check for service
-    wrapper_path = _get_wrapper_dir() / name
+    wrapper_path = _get_wrapper_dir() / canon
    has_wrapper = wrapper_path.exists()
    if has_wrapper:
        items.append(f"Command alias ({wrapper_path})")
@@ -584,16 +616,16 @@ def delete_profile(name: str, yes: bool = False) -> Path:
    if not yes:
        print()
        try:
-            confirm = input(f"Type '{name}' to confirm: ").strip()
+            confirm = input(f"Type '{canon}' to confirm: ").strip()
        except (KeyboardInterrupt, EOFError):
            print("\nCancelled.")
            return profile_dir
-        if confirm != name:
+        if confirm != canon:
            print("Cancelled.")
            return profile_dir

    # 1. Disable service (prevents auto-restart)
-    _cleanup_gateway_service(name, profile_dir)
+    _cleanup_gateway_service(canon, profile_dir)

    # 2. Stop running gateway
    if gw_running:
@@ -601,7 +633,7 @@ def delete_profile(name: str, yes: bool = False) -> Path:

    # 3. Remove wrapper script
    if has_wrapper:
-        if remove_wrapper_script(name):
+        if remove_wrapper_script(canon):
            print(f"✓ Removed {wrapper_path}")

    # 4. Remove profile directory
@@ -614,13 +646,13 @@ def delete_profile(name: str, yes: bool = False) -> Path:
    # 5. Clear active_profile if it pointed to this profile
    try:
        active = get_active_profile()
-        if active == name:
+        if active == canon:
            set_active_profile("default")
            print("✓ Active profile reset to default")
    except Exception:
        pass

-    print(f"\nProfile '{name}' deleted.")
+    print(f"\nProfile '{canon}' deleted.")
    return profile_dir


@@ -730,22 +762,23 @@ def set_active_profile(name: str) -> None:

    Writes to ``~/.hermes/active_profile``. Use ``"default"`` to clear.
    """
-    validate_profile_name(name)
-    if name != "default" and not profile_exists(name):
+    canon = normalize_profile_name(name)
+    validate_profile_name(canon)
+    if canon != "default" and not profile_exists(canon):
        raise FileNotFoundError(
-            f"Profile '{name}' does not exist. "
-            f"Create it with: hermes profile create {name}"
+            f"Profile '{canon}' does not exist. "
+            f"Create it with: hermes profile create {canon}"
        )

    path = _get_active_profile_path()
    path.parent.mkdir(parents=True, exist_ok=True)
-    if name == "default":
+    if canon == "default":
        # Remove the file to indicate default
        path.unlink(missing_ok=True)
    else:
        # Atomic write
        tmp = path.with_suffix(".tmp")
-        tmp.write_text(name + "\n")
+        tmp.write_text(canon + "\n")
        tmp.replace(path)


@@ -811,16 +844,17 @@ def export_profile(name: str, output_path: str) -> Path:
    """
    import tempfile

-    validate_profile_name(name)
-    profile_dir = get_profile_dir(name)
+    canon = normalize_profile_name(name)
+    validate_profile_name(canon)
+    profile_dir = get_profile_dir(canon)
    if not profile_dir.is_dir():
-        raise FileNotFoundError(f"Profile '{name}' does not exist.")
+        raise FileNotFoundError(f"Profile '{canon}' does not exist.")

    output = Path(output_path)
    # shutil.make_archive wants the base name without extension
    base = str(output).removesuffix(".tar.gz").removesuffix(".tgz")

-    if name == "default":
+    if canon == "default":
        # The default profile IS ~/.hermes itself — its parent is ~/ and its
        # directory name is ".hermes", not "default".  We stage a clean copy
        # under a temp dir so the archive contains ``default/...``.
@@ -836,14 +870,14 @@ def export_profile(name: str, output_path: str) -> Path:

    # Named profiles — stage a filtered copy to exclude credentials
    with tempfile.TemporaryDirectory() as tmpdir:
-        staged = Path(tmpdir) / name
+        staged = Path(tmpdir) / canon
        _CREDENTIAL_FILES = {"auth.json", ".env"}
        shutil.copytree(
            profile_dir,
            staged,
            ignore=lambda d, contents: _CREDENTIAL_FILES & set(contents),
        )
-        result = shutil.make_archive(base, "gztar", tmpdir, name)
+        result = shutil.make_archive(base, "gztar", tmpdir, canon)
        return Path(result)


@@ -952,16 +986,17 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    # Archives exported from the default profile have "default/" as top-level
    # dir.  Importing as "default" would target ~/.hermes itself — disallow
    # that and guide the user toward a named profile.
-    if inferred_name == "default":
+    canon = normalize_profile_name(inferred_name)
+    validate_profile_name(canon)
+    if canon == "default":
        raise ValueError(
            "Cannot import as 'default' — that is the built-in root profile (~/.hermes). "
            "Specify a different name: hermes profile import <archive> --name <name>"
        )

-    validate_profile_name(inferred_name)
-    profile_dir = get_profile_dir(inferred_name)
+    profile_dir = get_profile_dir(canon)
    if profile_dir.exists():
-        raise FileExistsError(f"Profile '{inferred_name}' already exists at {profile_dir}")
+        raise FileExistsError(f"Profile '{canon}' already exists at {profile_dir}")

    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)
@@ -977,8 +1012,8 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
            )

        final_source = extracted
-        if archive_root != inferred_name:
-            final_source = staging_root / inferred_name
+        if archive_root != canon:
+            final_source = staging_root / canon
            extracted.rename(final_source)

        shutil.move(str(final_source), str(profile_dir))
@@ -1048,25 +1083,27 @@ def rename_profile(old_name: str, new_name: str) -> Path:

    Returns the new profile directory.
    """
-    validate_profile_name(old_name)
-    validate_profile_name(new_name)
+    old_canon = normalize_profile_name(old_name)
+    new_canon = normalize_profile_name(new_name)
+    validate_profile_name(old_canon)
+    validate_profile_name(new_canon)

-    if old_name == "default":
+    if old_canon == "default":
        raise ValueError("Cannot rename the default profile.")
-    if new_name == "default":
+    if new_canon == "default":
        raise ValueError("Cannot rename to 'default' — it is reserved.")

-    old_dir = get_profile_dir(old_name)
-    new_dir = get_profile_dir(new_name)
+    old_dir = get_profile_dir(old_canon)
+    new_dir = get_profile_dir(new_canon)

    if not old_dir.is_dir():
-        raise FileNotFoundError(f"Profile '{old_name}' does not exist.")
+        raise FileNotFoundError(f"Profile '{old_canon}' does not exist.")
    if new_dir.exists():
-        raise FileExistsError(f"Profile '{new_name}' already exists.")
+        raise FileExistsError(f"Profile '{new_canon}' already exists.")

    # 1. Stop gateway if running
    if _check_gateway_running(old_dir):
-        _cleanup_gateway_service(old_name, old_dir)
+        _cleanup_gateway_service(old_canon, old_dir)
        _stop_gateway_process(old_dir)

    # 2. Rename directory
@@ -1074,22 +1111,22 @@ def rename_profile(old_name: str, new_name: str) -> Path:
    print(f"✓ Renamed {old_dir.name} → {new_dir.name}")

    # 3. Update profile-scoped Honcho host blocks, preserving aiPeer identity
-    _migrate_honcho_profile_host(old_name, new_name, new_dir)
+    _migrate_honcho_profile_host(old_canon, new_canon, new_dir)

    # 4. Update wrapper script
-    remove_wrapper_script(old_name)
-    collision = check_alias_collision(new_name)
+    remove_wrapper_script(old_canon)
+    collision = check_alias_collision(new_canon)
    if not collision:
-        create_wrapper_script(new_name)
-        print(f"✓ Alias updated: {new_name}")
+        create_wrapper_script(new_canon)
+        print(f"✓ Alias updated: {new_canon}")
    else:
-        print(f"⚠ Cannot create alias '{new_name}' — {collision}")
+        print(f"⚠ Cannot create alias '{new_canon}' — {collision}")

    # 5. Update active_profile if it pointed to old name
    try:
-        if get_active_profile() == old_name:
-            set_active_profile(new_name)
-            print(f"✓ Active profile updated: {new_name}")
+        if get_active_profile() == old_canon:
+            set_active_profile(new_canon)
+            print(f"✓ Active profile updated: {new_canon}")
    except Exception:
        pass

@@ -1191,13 +1228,14 @@ def resolve_profile_env(profile_name: str) -> str:
    Called early in the CLI entry point, before any hermes modules
    are imported, to set the HERMES_HOME environment variable.
    """
-    validate_profile_name(profile_name)
-    profile_dir = get_profile_dir(profile_name)
+    canon = normalize_profile_name(profile_name)
+    validate_profile_name(canon)
+    profile_dir = get_profile_dir(canon)

-    if profile_name != "default" and not profile_dir.is_dir():
+    if canon != "default" and not profile_dir.is_dir():
        raise FileNotFoundError(
-            f"Profile '{profile_name}' does not exist. "
-            f"Create it with: hermes profile create {profile_name}"
+            f"Profile '{canon}' does not exist. "
+            f"Create it with: hermes profile create {canon}"
        )

    return str(profile_dir)
@@ -108,9 +108,14 @@ class PtyBridge:
                    "(or pip install -e '.[pty]')."
                )
            raise PtyUnavailableError("Pseudo-terminals are unavailable.")
-        # Let caller-supplied env fully override inheritance; if they pass
-        # None we inherit the server's env (same semantics as subprocess).
-        spawn_env = os.environ.copy() if env is None else env
+        # PTY-hosted programs expect TERM to describe the terminal type.
+        # CI often runs without TERM in the parent process, which makes
+        # simple terminal probes like `tput cols` fail before winsize reads.
+        # Preserve explicit caller overrides, but backfill a sensible default
+        # when TERM is missing or blank.
+        spawn_env = (os.environ.copy() if env is None else env.copy())
+        if not spawn_env.get("TERM"):
+            spawn_env["TERM"] = "xterm-256color"
        proc = ptyprocess.PtyProcess.spawn(  # type: ignore[union-attr]
            list(argv),
            cwd=cwd,
@@ -15,6 +15,7 @@ import importlib.util
 import json
 import logging
 import os
+import re
 import shutil
 import sys
 import copy
@@ -208,12 +209,23 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
        else:
            value = input(color(display, Colors.YELLOW))

-        return value.strip() or default or ""
+        cleaned = _sanitize_pasted_input(value)
+        return cleaned.strip() or default or ""
    except (KeyboardInterrupt, EOFError):
        print()
        sys.exit(1)


+_BRACKETED_PASTE_PATTERN = re.compile(r"\x1b\[\s*200~|\x1b\[\s*201~")
+
+
+def _sanitize_pasted_input(value: str) -> str:
+    """Strip terminal bracketed-paste control markers from pasted text."""
+    if not isinstance(value, str) or not value:
+        return value
+    return _BRACKETED_PASTE_PATTERN.sub("", value)
+
+
 def _curses_prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
    """Single-select menu using curses. Delegates to curses_radiolist."""
    from hermes_cli.curses_ui import curses_radiolist
@@ -382,7 +394,7 @@ def _print_setup_summary(config: dict, hermes_home):
            label = f"Web Search & Extract ({subscription_features.web.current_provider})"
        tool_status.append((label, True, None))
    else:
-        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, or TAVILY_API_KEY"))
+        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, TAVILY_API_KEY, or SEARXNG_URL"))

    # Browser tools (local Chromium, Camofox, Browserbase, Browser Use, or Firecrawl)
    browser_provider = subscription_features.browser.current_provider
@@ -964,7 +976,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
                    )
                else:
                    _selected_vision_model = prompt("  Vision model (blank = use main/custom default)").strip()
-                save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
+                if _selected_vision_model:
+                    save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
                print_success(
                    f"Vision configured with {_base_url}"
                    + (f" ({_selected_vision_model})" if _selected_vision_model else "")
@@ -42,6 +42,7 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      session_border: "#8B8682"          # Session ID dim color
      status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
      voice_status_bg: "#1a1a2e"        # TUI voice status background
+      selection_bg: "#333355"           # TUI mouse-selection highlight background
      completion_menu_bg: "#1a1a2e"      # Completion menu background
      completion_menu_current_bg: "#333355"  # Active completion row background
      completion_menu_meta_bg: "#1a1a2e"     # Completion meta column background
@@ -122,11 +122,16 @@ def show_status(args):
    print()
    print(color("◆ API Keys", Colors.CYAN, Colors.BOLD))

-    keys = {
+    # Values may be a single env var name (str) or a tuple of alternates (first found wins).
+    keys: dict[str, str | tuple[str, ...]] = {
        "OpenRouter": "OPENROUTER_API_KEY",
        "OpenAI": "OPENAI_API_KEY",
-        "NVIDIA": "NVIDIA_API_KEY",
-        "Z.AI/GLM": "GLM_API_KEY",
+        "Anthropic": ("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN"),
+        "Google / Gemini": ("GOOGLE_API_KEY", "GEMINI_API_KEY"),
+        "DeepSeek": "DEEPSEEK_API_KEY",
+        "xAI / Grok": "XAI_API_KEY",
+        "NVIDIA NIM": "NVIDIA_API_KEY",
+        "Z.AI / GLM": "GLM_API_KEY",
        "Kimi": "KIMI_API_KEY",
        "StepFun Step Plan": "STEPFUN_API_KEY",
        "MiniMax": "MINIMAX_API_KEY",
@@ -142,8 +147,23 @@ def show_status(args):
        "GitHub": "GITHUB_TOKEN",
    }

-    for name, env_var in keys.items():
-        value = get_env_value(env_var) or ""
+    def _resolve_env(env_ref) -> str:
+        """Return first non-empty env var value from a str or tuple of names."""
+        if isinstance(env_ref, tuple):
+            for candidate in env_ref:
+                v = get_env_value(candidate) or ""
+                if v:
+                    return v
+            return ""
+        return get_env_value(env_ref) or ""
+
+    for name, env_ref in keys.items():
+        # Anthropic already has a dedicated lookup below; keep that as the
+        # single source of truth (it also resolves OAuth tokens), skip here
+        # so we don't print two "Anthropic" rows.
+        if name == "Anthropic":
+            continue
+        value = _resolve_env(env_ref)
        has_key = bool(value)
        display = redact_key(value) if not show_all else value
        print(f"  {name:<12}  {check_mark(has_key)} {display}")
@@ -192,7 +192,7 @@ TIPS = [
    "Voice messages on Telegram, Discord, WhatsApp, and Slack are auto-transcribed.",

    # --- Gateway & Messaging ---
-    "Hermes runs on 18 platforms: Telegram, Discord, Slack, WhatsApp, Signal, Matrix, email, and more.",
+    "Hermes runs on 21 messaging platforms: Telegram, Discord, Slack, WhatsApp, Signal, Matrix, IRC, Microsoft Teams, email, and more.",
    "hermes gateway install sets it up as a system service that starts on boot.",
    "DingTalk uses Stream Mode — no webhooks or public URL needed.",
    "BlueBubbles brings iMessage to Hermes via a local macOS server.",
@@ -334,6 +334,144 @@ TIPS = [
    "MCP ${ENV_VAR} placeholders in config are resolved at server spawn — including vars from ~/.hermes/.env.",
    "Skills from trusted repos (NousResearch) get a 'trusted' security level; community skills get extra scanning.",
    "The skills quarantine at ~/.hermes/skills/.hub/quarantine/ holds skills pending security review.",
+
+    # --- Advanced Slash Commands ---
+    '/steer <prompt> injects a note after the next tool call — nudge direction mid-task without interrupting.',
+    '/goal <text> sets a standing Ralph-loop objective — Hermes auto-continues turn after turn until a judge says done.',
+    '/snapshot create [label] saves a full state snapshot of Hermes config; /snapshot restore <id> reverts later.',
+    '/copy [N] copies the last assistant response to your clipboard, or the Nth-from-last with a number.',
+    '/redraw forces a full UI repaint, fixing terminal drift after tmux resize or mouse selection artifacts.',
+    '/agents (alias /tasks) shows active agents and running background tasks across the current session.',
+    '/footer toggles the gateway footer on final replies showing model, tool counts, and turn timing.',
+    '/busy queue|steer|interrupt controls what pressing Enter does while Hermes is working.',
+    '/topic in Telegram DMs enables user-managed multi-session topic mode — /topic <id> restores past sessions inline.',
+    '/approve session|always runs a pending dangerous command with your chosen trust scope; /deny rejects it.',
+    '/restart gracefully restarts the gateway after draining active runs, then pings the requester when back up.',
+    '/kanban boards switch <slug> changes the active multi-project Kanban board from inside chat.',
+    '/reload reloads ~/.hermes/.env into the running session — pick up new API keys without restarting.',
+
+    # --- Cron (no-agent & scripts) ---
+    'cronjob with no_agent=True runs a script on schedule and sends its stdout directly — zero tokens, zero LLM.',
+    'An empty cron script stdout means silent tick — nothing is delivered, perfect for threshold watchdogs.',
+    "HERMES_CRON_MAX_PARALLEL (default 4) caps how many cron jobs run per tick so bursts don't saturate your keys.",
+
+    # --- Gateway Hooks ---
+    'Gateway hooks live under ~/.hermes/hooks/<name>/ with HOOK.yaml + handler.py — handler must be named `handle`.',
+    'Hook events include gateway:startup, session:start, agent:step, and command:* wildcard subscriptions.',
+    'Drop a ~/.hermes/BOOT.md checklist and a gateway:startup hook runs it as a one-shot agent every boot.',
+
+    # --- Curator ---
+    'hermes curator run --dry-run previews what the curator would archive or consolidate without mutating anything.',
+    "hermes curator pin <skill> hard-fences a skill against both auto-archival and the agent's skill_manage tool.",
+    'hermes curator rollback restores skills from a pre-run snapshot — backups live under skills/.curator_backups/.',
+
+    # --- Credential Pools & Routing ---
+    'hermes auth reset <provider> clears all cooldowns and exhaustion flags on a credential pool.',
+    'credential_pool_strategies.<provider>: round_robin cycles keys evenly instead of the fill_first default.',
+    'use_gateway: true per-tool routes web, image, tts, or browser through your Nous subscription — no extra keys.',
+    'provider_routing.data_collection: deny excludes data-storing providers on OpenRouter.',
+    'provider_routing.require_parameters: true only routes to providers that support every param in your request.',
+
+    # --- TUI & Dashboard ---
+    'HERMES_TUI_RESUME=1 auto-re-attaches to the most recent TUI session on launch — handy after SSH drops.',
+    "HERMES_TUI_THEME=light|dark|<hex> forces the TUI theme on terminals that don't set COLORFGBG.",
+    'Ctrl+G or Ctrl+X Ctrl+E in the TUI opens the input buffer in $EDITOR for long multi-line prompts.',
+    'The TUI renders LaTeX inline — $E=mc^2$ becomes Unicode math instead of raw TeX.',
+    'hermes dashboard launches a local web UI at 127.0.0.1:9119 — zero data leaves localhost.',
+    'hermes dashboard --tui embeds the full Hermes TUI in your browser via xterm.js and a WebSocket PTY.',
+    'Drop a YAML in ~/.hermes/dashboard-themes/ with two palette colors to reskin the entire dashboard.',
+    'Dashboard plugins are drop-in: manifest.json + JS bundle in ~/.hermes/dashboard-plugins/ — no npm build required.',
+    'layoutVariant: cockpit in a dashboard theme adds a 260px left rail that plugins can populate via the sidebar slot.',
+
+    # --- Env Vars & Config Gates ---
+    "display.tool_progress_command: true exposes /verbose on messaging platforms; it's CLI-only by default.",
+    'HERMES_BACKGROUND_NOTIFICATIONS=result only pings when background tasks finish (vs all/error/off).',
+    'HERMES_WRITE_SAFE_ROOT restricts write_file and patch to a directory prefix; writes outside require approval.',
+    'HERMES_IGNORE_RULES skips auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills.',
+    'HERMES_ACCEPT_HOOKS auto-approves unseen shell hooks declared in config.yaml without a TTY prompt.',
+    'auxiliary.goal_judge.model routes the /goal judge to a cheap fast model to keep loop cost near zero.',
+    'Checkpoints skip directories with more than 50,000 files to avoid slow git operations on massive monorepos.',
+
+    # --- TTS ---
+    'tts.provider: piper runs 44-language local TTS on CPU — voices auto-download to ~/.hermes/cache/piper-voices/.',
+    'tts.providers.<name>.type: command wires any CLI TTS engine with {input_path} and {output_path} placeholders.',
+
+    # --- API Server & Proxy ---
+    'API_SERVER_ENABLED=true runs an OpenAI-compatible endpoint alongside the gateway for Open WebUI and LibreChat.',
+    'GATEWAY_PROXY_URL runs a split setup: platform I/O locally, agent work delegated to a remote API server.',
+
+    # --- Platform-specific ---
+    'MATRIX_DEVICE_ID pins a stable device ID for E2EE — without it, keys rotate every start and historic decrypt breaks.',
+    'TELEGRAM_WEBHOOK_SECRET is required whenever TELEGRAM_WEBHOOK_URL is set — generate with openssl rand -hex 32.',
+
+    # --- Batch ---
+    "batch_runner.py --resume content-matches completed prompts by text so dataset reorders don't re-run finished work.",
+
+    # --- Less-Known Slash Commands ---
+    '/new starts a fresh session in place (alias /reset) — fresh session ID, clean history, CLI stays open.',
+    '/clear wipes the terminal screen AND starts a new session — one shortcut for a visual reset.',
+    '/history prints the current conversation in-line without leaving the CLI — useful for a quick re-read.',
+    '/save writes the current conversation to disk without ending the session.',
+    '/status shows session info at a glance: ID, title, model, token usage, and elapsed time.',
+    '/image <path> attaches a local image file for your next prompt without pasting or drag-and-drop.',
+    '/platforms shows gateway and messaging-platform connection status right from inside chat.',
+    '/commands paginates the full slash-command + installed-skill list — useful on platforms without tab completion.',
+    '/toolsets lists every available toolset so you know what -t/--toolsets accepts.',
+    '/gquota shows Google Gemini Code Assist quota usage with progress bars when that provider is active.',
+    '/voice tts toggles TTS-only mode — agent replies out loud but you still type your prompts.',
+    '/reload-skills re-scans ~/.hermes/skills/ so drop-in skills appear without restarting the session.',
+    '/indicator kaomoji|emoji|unicode|ascii picks the TUI busy-indicator style shown during agent runs.',
+    '/debug uploads a support bundle (system info + logs) and returns shareable links — works in chat too.',
+
+    # --- CLI Subcommands & Flags ---
+    'hermes -z "<prompt>" is the purest one-shot: final answer on stdout, nothing else — ideal for piping in scripts.',
+    'hermes chat --pass-session-id injects the session ID into the system prompt so the agent can self-reference it.',
+    'hermes chat --image path/to/pic.png attaches a local image to a single -q query without a separate upload step.',
+    'hermes chat --ignore-user-config skips ~/.hermes/config.yaml — reproducible bug reports and CI runs.',
+    "hermes chat --source tool tags programmatic chats so they don't clutter hermes sessions list.",
+    'hermes dump --show-keys includes redacted API key fingerprints for deeper support debugging.',
+    'hermes sessions rename <ID> "new title" renames any past session; hermes sessions delete <ID> removes one.',
+    'hermes import restores a session export or profile archive produced by sessions export or profile export.',
+    'hermes fallback manages the fallback_model chain interactively — no hand-editing config.yaml.',
+    'hermes pairing rotates the DM pairing token — the first messager after rotation claims access to the bot.',
+    'hermes setup walks first-time users through provider, keys, and platform wiring in one interactive flow.',
+    'hermes status --deep runs the full health sweep across every component; plain hermes status is the quick view.',
+
+    # --- Agent Behavior Env Vars ---
+    'HERMES_AGENT_TIMEOUT=0 disables the gateway inactivity kill for a running agent — use for long research runs.',
+    'HERMES_ENABLE_PROJECT_PLUGINS=1 auto-loads repo-local plugins from ./.hermes/plugins/ — trust-gated by design.',
+    "HERMES_DISABLE_FILE_STATE_GUARD=1 turns off the 'file changed since you read it' guard on patch and write_file.",
+    'HERMES_ALLOW_PRIVATE_URLS=true lets web tools hit localhost and private networks — off by default in gateway mode.',
+    'HERMES_OPTIONAL_SKILLS=name1,name2 auto-installs extra optional-catalog skills on first run per profile.',
+    'HERMES_BUNDLED_SKILLS points at a custom bundled-skill tree — used by Homebrew and Nix packaging.',
+    'HERMES_DUMP_REQUEST_STDOUT=1 dumps every API request payload to stdout instead of log files.',
+    'HERMES_OAUTH_TRACE=1 logs redacted OAuth token exchange and refresh attempts for debugging provider auth.',
+    'HERMES_STREAM_RETRIES (default 3) controls mid-stream reconnect attempts on transient network errors.',
+
+    # --- Gateway Behavior Env Vars ---
+    'HERMES_GATEWAY_BUSY_ACK_ENABLED=false silences the ⚡/⏳/⏩ ack messages when a user messages a busy agent.',
+    'HERMES_AGENT_NOTIFY_INTERVAL (default 180s) sets how often the gateway pings with progress on long turns.',
+    'HERMES_RESTART_DRAIN_TIMEOUT (default 900s) caps how long /restart waits for in-flight runs before forcing.',
+    'HERMES_CHECKPOINT_TIMEOUT (default 30s) caps filesystem checkpoint creation — raise it on huge monorepos.',
+
+    # --- Auxiliary Tasks & Image Generation ---
+    'image_gen.model in config.yaml picks the FAL model: flux-2/klein, gpt-image-2, nano-banana-pro, and more.',
+    'image_gen.provider routes image generation through a plugin (OpenAI Images, Codex, FAL) instead of the default.',
+    'AUXILIARY_VISION_BASE_URL + AUXILIARY_VISION_API_KEY point vision analysis at any OpenAI-compatible endpoint.',
+    'auxiliary.session_search.max_concurrency bounds how many matched sessions are summarized in parallel (default 3).',
+    'auxiliary.session_search.extra_body forwards provider-specific OpenAI-compatible fields on summarization calls.',
+
+    # --- Security ---
+    'security.tirith_fail_open: false makes Hermes block commands when the tirith scanner itself errors out.',
+    'TIRITH_FAIL_OPEN env var overrides the tirith_fail_open config — a quick toggle without editing config.yaml.',
+
+    # --- Sessions & Source Tags ---
+    '--source tool chats are excluded from hermes sessions list by default — set --source explicitly to see them.',
+    'Session IDs are timestamp-prefixed (20250305_091523_abcd) so sorting works naturally in ls and jq.',
+
+    # --- Misc ---
+    'API_SERVER_MODEL_NAME customizes the model name on /v1/models — essential for multi-profile Open WebUI setups.',
+    'Dashboard plugins are served from /dashboard-plugins/<name>/ — drop files into ~/.hermes/dashboard-plugins/.',
 ]


@@ -299,6 +299,15 @@ TOOL_CATEGORIES = {
                    {"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"},
                ],
            },
+            {
+                "name": "SearXNG",
+                "badge": "free · self-hosted · search only",
+                "tag": "Privacy-respecting metasearch engine — search only (pair with any extract provider)",
+                "web_backend": "searxng",
+                "env_vars": [
+                    {"key": "SEARXNG_URL", "prompt": "Your SearXNG instance URL (e.g., http://localhost:8080)", "url": "https://searxng.github.io/searxng/"},
+                ],
+            },
        ],
    },
    "image_gen": {
@@ -1920,21 +1929,27 @@ def _reconfigure_provider(provider: dict, config: dict):
            return

    if provider.get("tts_provider"):
-        config.setdefault("tts", {})["provider"] = provider["tts_provider"]
+        tts_cfg = config.setdefault("tts", {})
+        tts_cfg["provider"] = provider["tts_provider"]
+        tts_cfg["use_gateway"] = bool(managed_feature)
        _print_success(f"  TTS provider set to: {provider['tts_provider']}")

    if "browser_provider" in provider:
        bp = provider["browser_provider"]
+        browser_cfg = config.setdefault("browser", {})
        if bp == "local":
-            config.setdefault("browser", {})["cloud_provider"] = "local"
+            browser_cfg["cloud_provider"] = "local"
            _print_success("  Browser set to local mode")
        elif bp:
-            config.setdefault("browser", {})["cloud_provider"] = bp
+            browser_cfg["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
+        browser_cfg["use_gateway"] = bool(managed_feature)

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
-        config.setdefault("web", {})["backend"] = provider["web_backend"]
+        web_cfg = config.setdefault("web", {})
+        web_cfg["backend"] = provider["web_backend"]
+        web_cfg["use_gateway"] = bool(managed_feature)
        _print_success(f"  Web backend set to: {provider['web_backend']}")

    if managed_feature and managed_feature not in ("web", "tts", "browser"):
@@ -27,6 +27,192 @@ import sys
 import threading
 from typing import Any, Callable, Optional

+# Modifier aliases mirrored from the TUI parser (``ui-tui/src/lib/platform.ts``)
+# ``_MOD_ALIASES`` table — the contract that removes the cross-runtime
+# mismatch Copilot flagged in round-9 on #19835.
+#
+# ``super``/``win``/``windows`` are intentionally absent: prompt_toolkit
+# has no super/meta modifier for the Cmd key, so those spellings are
+# TUI-only. The normalizer below returns the documented default
+# (``c-b``) for them — a silent fallback was preferred to a hard
+# startup crash (Copilot round-11). The CLI binding site
+# (``_register_voice_handler`` in cli.py) logs a warning when that
+# fallback fires so users see why their TUI-only shortcut isn't
+# bound in the classic CLI.
+_VOICE_MOD_ALIASES = {
+    "ctrl": "c-",
+    "control": "c-",
+    "alt": "a-",
+    "option": "a-",
+    "opt": "a-",
+}
+
+# Named keys prompt_toolkit accepts in ``c-<name>`` / ``a-<name>`` form.
+# Aliases collapse to prompt_toolkit's canonical spelling so the same
+# config value binds identically in both runtimes (Copilot round-10 on
+# #19835).
+_VOICE_NAMED_KEYS = {
+    "space": "space",
+    "spc": "space",
+    "enter": "enter",
+    "return": "enter",
+    "ret": "enter",
+    "tab": "tab",
+    "escape": "escape",
+    "esc": "escape",
+    "backspace": "backspace",
+    "bs": "backspace",
+    "delete": "delete",
+    "del": "delete",
+}
+
+# ``useInputHandlers()`` intercepts these before the voice check runs,
+# so a binding like ``ctrl+c`` (interrupt), ``ctrl+d`` (quit), or
+# ``ctrl+l`` (clear screen) would be advertised in /voice status but
+# never fire push-to-talk — the same blocklist the TUI parser uses.
+_VOICE_RESERVED_CTRL_CHARS = frozenset({"c", "d", "l"})
+
+# On macOS the classic CLI's prompt_toolkit bindings for copy / exit /
+# clear also claim ``a-c`` / ``a-d`` / ``a-l`` via the action-modifier
+# lookup, and hermes-ink reports Alt as ``key.meta`` on many terminals.
+# Mirror the TUI parser's darwin-only reservation so ``option+c`` etc.
+# don't bind Alt+C in the CLI while the TUI silently falls back to
+# Ctrl+B (Copilot round-14 on #19835).
+_VOICE_RESERVED_ALT_CHARS_MAC = frozenset({"c", "d", "l"})
+
+_DEFAULT_PT_KEY = "c-b"
+
+
+def voice_record_key_from_config(cfg: Any) -> Any:
+    """Shape-safe ``cfg.voice.record_key`` lookup.
+
+    ``load_config()`` deep-merges raw YAML and preserves scalar
+    overrides, so a hand-edited ``voice: true`` / ``voice: cmd+b``
+    leaves ``cfg["voice"]`` as a bool/str instead of a dict, and the
+    naive ``.get("voice", {}).get("record_key")`` chain raises
+    AttributeError before voice can even start (Copilot round-11 on
+    #19835). Return ``None`` for malformed shapes so call sites can
+    feed the result straight into the normalizer/formatter and get
+    the documented default.
+    """
+    if not isinstance(cfg, dict):
+        return None
+
+    voice = cfg.get("voice")
+    if not isinstance(voice, dict):
+        return None
+
+    return voice.get("record_key")
+
+
+def normalize_voice_record_key_for_prompt_toolkit(raw: Any) -> str:
+    """Coerce ``voice.record_key`` into prompt_toolkit's ``c-x`` / ``a-x`` format.
+
+    Mirrors the TUI parser contract (``ui-tui/src/lib/platform.ts``)
+    so one config value binds the same shortcut in both runtimes:
+
+    * non-string / empty / typo'd / bare-char / multi-modifier / reserved
+      ``ctrl+c|d|l`` → documented default ``c-b``
+    * single-char keys: ``ctrl+o`` → ``c-o``
+    * named keys: ``ctrl+space`` → ``c-space`` (aliases collapse:
+      ``ctrl+return`` → ``c-enter``)
+    * ``super`` / ``win`` / ``windows`` → ``c-b`` (TUI-only modifiers —
+      prompt_toolkit has no super mod; the CLI binding site is
+      expected to warn when this fallback fires so users see the
+      cross-runtime split, Copilot round-11 on #19835)
+    """
+    if not isinstance(raw, str):
+        return _DEFAULT_PT_KEY
+
+    lowered = raw.strip().lower()
+    if not lowered:
+        return _DEFAULT_PT_KEY
+
+    parts = [p.strip() for p in lowered.split("+") if p.strip()]
+    if not parts:
+        return _DEFAULT_PT_KEY
+
+    # Multi-modifier chords like ``ctrl+alt+r`` bind different shortcuts
+    # in prompt_toolkit (a-c-r form) and hermes-ink rejects them; collapse
+    # to the documented default instead of silently diverging.
+    if len(parts) > 2:
+        return _DEFAULT_PT_KEY
+
+    # Bare char / bare named key (no explicit modifier) — the CLI's
+    # prompt_toolkit binds the raw key without a modifier, which the TUI
+    # parser refuses; reject here too so both runtimes agree.
+    if len(parts) == 1:
+        return _DEFAULT_PT_KEY
+
+    modifier_token, key_token = parts
+
+    # ``super`` / ``win`` / ``windows`` are TUI-only (prompt_toolkit has
+    # no super modifier, so ``@kb.add(super+b)`` crashes the CLI at
+    # startup). Fall back to the documented default here; the CLI
+    # binding site is expected to log a warning when the configured
+    # value is one of these spellings so users know the TUI+CLI
+    # runtimes diverge on that shortcut (Copilot round-11 on #19835).
+    if modifier_token in {"super", "win", "windows"}:
+        return _DEFAULT_PT_KEY
+
+    normalized_mod = _VOICE_MOD_ALIASES.get(modifier_token)
+    if not normalized_mod:
+        return _DEFAULT_PT_KEY
+
+    # Single-char key: reject reserved-ctrl chords that the TUI would
+    # also block at parse time, plus the mac-only alt reservation.
+    if len(key_token) == 1:
+        if normalized_mod == "c-" and key_token in _VOICE_RESERVED_CTRL_CHARS:
+            return _DEFAULT_PT_KEY
+        if (
+            normalized_mod == "a-"
+            and sys.platform == "darwin"
+            and key_token in _VOICE_RESERVED_ALT_CHARS_MAC
+        ):
+            return _DEFAULT_PT_KEY
+        return f"{normalized_mod}{key_token}"
+
+    # Multi-char key token must be a known named key; typos like
+    # ``ctrl+spcae`` fall back to the default rather than being passed
+    # through as ``c-spcae`` (which prompt_toolkit would reject).
+    named = _VOICE_NAMED_KEYS.get(key_token)
+    if not named:
+        return _DEFAULT_PT_KEY
+
+    return f"{normalized_mod}{named}"
+
+
+def format_voice_record_key_for_status(raw: Any) -> str:
+    """Render ``voice.record_key`` for ``/voice status`` in CLI-friendly form.
+
+    Mirrors the TUI's ``formatVoiceRecordKey``: returns ``Ctrl+B`` /
+    ``Alt+Space`` / ``Ctrl+Enter``. Malformed configs surface as the
+    documented default so status never advertises a shortcut that
+    won't bind (Copilot round-10 on #19835).
+    """
+    normalized = normalize_voice_record_key_for_prompt_toolkit(raw)
+
+    if normalized.startswith("c-"):
+        prefix, key = "Ctrl+", normalized[2:]
+    elif normalized.startswith("a-"):
+        prefix, key = "Alt+", normalized[2:]
+    elif "+" in normalized:
+        # ``super+<key>`` / ``win+<key>`` — CLI won't bind them, but
+        # render in title case so status output is still readable.
+        mod, key = normalized.split("+", 1)
+        prefix = mod[0].upper() + mod[1:] + "+"
+    else:
+        return "Ctrl+B"
+
+    if not key:
+        return prefix.rstrip("+")
+
+    if len(key) == 1:
+        return prefix + key.upper()
+
+    return prefix + key[0].upper() + key[1:]
+
+
 from tools.voice_mode import (
    create_audio_recorder,
    is_whisper_hallucination,
@@ -3260,8 +3260,9 @@ def mount_spa(application: FastAPI):
 # Built-in dashboard themes — label + description only.  The actual color
 # definitions live in the frontend (web/src/themes/presets.ts).
 _BUILTIN_DASHBOARD_THEMES = [
-    {"name": "default",   "label": "Hermes Teal",  "description": "Classic dark teal — the canonical Hermes look"},
-    {"name": "midnight",  "label": "Midnight",      "description": "Deep blue-violet with cool accents"},
+    {"name": "default",       "label": "Hermes Teal",         "description": "Classic dark teal — the canonical Hermes look"},
+    {"name": "default-large", "label": "Hermes Teal (Large)", "description": "Hermes Teal with bigger fonts and roomier spacing"},
+    {"name": "midnight",      "label": "Midnight",            "description": "Deep blue-violet with cool accents"},
    {"name": "ember",     "label": "Ember",          "description": "Warm crimson and bronze — forge vibes"},
    {"name": "mono",      "label": "Mono",           "description": "Clean grayscale — minimal and focused"},
    {"name": "cyberpunk", "label": "Cyberpunk",      "description": "Neon green on black — matrix terminal"},
@@ -718,6 +718,45 @@ class SessionDB:
                self._remove_session_files(sessions_dir, sid)
        return len(removed_ids)

+    def finalize_orphaned_compression_sessions(self) -> int:
+        """Mark orphaned compression continuation sessions as ended.
+
+        Targets child sessions that were never finalized: parent is ended
+        with reason='compression', child has messages but no end_reason/ended_at
+        and api_call_count=0.  Non-destructive: preserves all messages and sets
+        end_reason='orphaned_compression'.  Fix for #20001.
+        """
+        cutoff = time.time() - 604800  # 7 days
+
+        def _do(conn):
+            now = time.time()
+            result = conn.execute(
+                """
+                UPDATE sessions
+                SET ended_at = ?,
+                    end_reason = 'orphaned_compression'
+                WHERE api_call_count = 0
+                  AND end_reason IS NULL
+                  AND ended_at IS NULL
+                  AND started_at < ?
+                  AND parent_session_id IS NOT NULL
+                  AND EXISTS (
+                      SELECT 1 FROM sessions p
+                      WHERE p.id = sessions.parent_session_id
+                        AND p.end_reason = 'compression'
+                        AND p.ended_at IS NOT NULL
+                  )
+                  AND EXISTS (
+                      SELECT 1 FROM messages m
+                      WHERE m.session_id = sessions.id
+                  )
+                """,
+                (now, cutoff),
+            )
+            return result.rowcount
+
+        return self._execute_write(_do) or 0
+
    def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
        """Get a session by ID."""
        with self._lock:
@@ -2148,6 +2187,388 @@ class SessionDB:
            )
        self._execute_write(_do)

+    def apply_telegram_topic_migration(self) -> None:
+        """Create Telegram DM topic-mode tables on explicit /topic opt-in.
+
+        This migration is deliberately not part of automatic SessionDB startup
+        reconciliation. Operators must be able to upgrade Hermes, keep the old
+        Telegram bot behavior running, and only mutate topic-mode state when the
+        user executes /topic to opt into the feature.
+
+        Schema versions:
+          v1 — initial shape (no ON DELETE CASCADE on session_id FK)
+          v2 — session_id FK gets ON DELETE CASCADE so session pruning
+               automatically clears bindings.
+        """
+        def _do(conn):
+            conn.executescript(
+                """
+                CREATE TABLE IF NOT EXISTS telegram_dm_topic_mode (
+                    chat_id TEXT PRIMARY KEY,
+                    user_id TEXT NOT NULL,
+                    enabled INTEGER NOT NULL DEFAULT 1,
+                    activated_at REAL NOT NULL,
+                    updated_at REAL NOT NULL,
+                    has_topics_enabled INTEGER,
+                    allows_users_to_create_topics INTEGER,
+                    capability_checked_at REAL,
+                    intro_message_id TEXT,
+                    pinned_message_id TEXT
+                );
+
+                CREATE TABLE IF NOT EXISTS telegram_dm_topic_bindings (
+                    chat_id TEXT NOT NULL,
+                    thread_id TEXT NOT NULL,
+                    user_id TEXT NOT NULL,
+                    session_key TEXT NOT NULL,
+                    session_id TEXT NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
+                    managed_mode TEXT NOT NULL DEFAULT 'auto',
+                    linked_at REAL NOT NULL,
+                    updated_at REAL NOT NULL,
+                    PRIMARY KEY (chat_id, thread_id)
+                );
+
+                CREATE UNIQUE INDEX IF NOT EXISTS idx_telegram_dm_topic_bindings_session
+                ON telegram_dm_topic_bindings(session_id);
+
+                CREATE INDEX IF NOT EXISTS idx_telegram_dm_topic_bindings_user
+                ON telegram_dm_topic_bindings(user_id, chat_id);
+                """
+            )
+
+            # v1 → v2: rebuild telegram_dm_topic_bindings if its session_id FK
+            # lacks ON DELETE CASCADE. SQLite can't ALTER a foreign key, so we
+            # rebuild the table. Only runs once per DB (version gate).
+            current = conn.execute(
+                "SELECT value FROM state_meta WHERE key = ?",
+                ("telegram_dm_topic_schema_version",),
+            ).fetchone()
+            current_version = int(current[0]) if current and str(current[0]).isdigit() else 0
+            if current_version < 2:
+                fk_rows = conn.execute(
+                    "PRAGMA foreign_key_list('telegram_dm_topic_bindings')"
+                ).fetchall()
+                needs_rebuild = any(
+                    row[2] == "sessions" and (row[6] or "") != "CASCADE"
+                    for row in fk_rows
+                )
+                if needs_rebuild:
+                    conn.executescript(
+                        """
+                        CREATE TABLE telegram_dm_topic_bindings_new (
+                            chat_id TEXT NOT NULL,
+                            thread_id TEXT NOT NULL,
+                            user_id TEXT NOT NULL,
+                            session_key TEXT NOT NULL,
+                            session_id TEXT NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
+                            managed_mode TEXT NOT NULL DEFAULT 'auto',
+                            linked_at REAL NOT NULL,
+                            updated_at REAL NOT NULL,
+                            PRIMARY KEY (chat_id, thread_id)
+                        );
+                        INSERT INTO telegram_dm_topic_bindings_new
+                            SELECT chat_id, thread_id, user_id, session_key,
+                                   session_id, managed_mode, linked_at, updated_at
+                            FROM telegram_dm_topic_bindings;
+                        DROP TABLE telegram_dm_topic_bindings;
+                        ALTER TABLE telegram_dm_topic_bindings_new
+                            RENAME TO telegram_dm_topic_bindings;
+                        CREATE UNIQUE INDEX idx_telegram_dm_topic_bindings_session
+                            ON telegram_dm_topic_bindings(session_id);
+                        CREATE INDEX idx_telegram_dm_topic_bindings_user
+                            ON telegram_dm_topic_bindings(user_id, chat_id);
+                        """
+                    )
+
+            conn.execute(
+                "INSERT INTO state_meta (key, value) VALUES (?, ?) "
+                "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
+                ("telegram_dm_topic_schema_version", "2"),
+            )
+        self._execute_write(_do)
+
+    def enable_telegram_topic_mode(
+        self,
+        *,
+        chat_id: str,
+        user_id: str,
+        has_topics_enabled: Optional[bool] = None,
+        allows_users_to_create_topics: Optional[bool] = None,
+    ) -> None:
+        """Enable Telegram DM topic mode for one private chat/user.
+
+        This method intentionally owns the explicit topic migration. Ordinary
+        SessionDB startup must not create these side tables.
+        """
+        self.apply_telegram_topic_migration()
+        now = time.time()
+
+        def _to_int(value: Optional[bool]) -> Optional[int]:
+            if value is None:
+                return None
+            return 1 if value else 0
+
+        def _do(conn):
+            conn.execute(
+                """
+                INSERT INTO telegram_dm_topic_mode (
+                    chat_id, user_id, enabled, activated_at, updated_at,
+                    has_topics_enabled, allows_users_to_create_topics,
+                    capability_checked_at
+                ) VALUES (?, ?, 1, ?, ?, ?, ?, ?)
+                ON CONFLICT(chat_id) DO UPDATE SET
+                    user_id = excluded.user_id,
+                    enabled = 1,
+                    updated_at = excluded.updated_at,
+                    has_topics_enabled = excluded.has_topics_enabled,
+                    allows_users_to_create_topics = excluded.allows_users_to_create_topics,
+                    capability_checked_at = excluded.capability_checked_at
+                """,
+                (
+                    str(chat_id),
+                    str(user_id),
+                    now,
+                    now,
+                    _to_int(has_topics_enabled),
+                    _to_int(allows_users_to_create_topics),
+                    now,
+                ),
+            )
+        self._execute_write(_do)
+
+    def disable_telegram_topic_mode(
+        self,
+        *,
+        chat_id: str,
+        clear_bindings: bool = True,
+    ) -> None:
+        """Disable Telegram DM topic mode for one private chat.
+
+        When ``clear_bindings`` is True (default) the (chat_id, thread_id)
+        bindings for this chat are also cleared so re-enabling later
+        starts from a clean slate. Set to False if the operator wants to
+        preserve bindings for a later re-enable.
+
+        Never creates the topic-mode tables from scratch; if they don't
+        exist there is nothing to disable and the call is a no-op.
+        """
+        def _do(conn):
+            try:
+                conn.execute(
+                    "UPDATE telegram_dm_topic_mode SET enabled = 0, updated_at = ? "
+                    "WHERE chat_id = ?",
+                    (time.time(), str(chat_id)),
+                )
+                if clear_bindings:
+                    conn.execute(
+                        "DELETE FROM telegram_dm_topic_bindings WHERE chat_id = ?",
+                        (str(chat_id),),
+                    )
+            except sqlite3.OperationalError:
+                # Tables don't exist yet — nothing to disable.
+                return
+        self._execute_write(_do)
+
+    def is_telegram_topic_mode_enabled(self, *, chat_id: str, user_id: str) -> bool:
+        """Return whether Telegram DM topic mode is enabled for this chat/user."""
+        with self._lock:
+            try:
+                row = self._conn.execute(
+                    """
+                    SELECT enabled FROM telegram_dm_topic_mode
+                    WHERE chat_id = ? AND user_id = ?
+                    """,
+                    (str(chat_id), str(user_id)),
+                ).fetchone()
+            except sqlite3.OperationalError:
+                return False
+        if row is None:
+            return False
+        enabled = row["enabled"] if isinstance(row, sqlite3.Row) else row[0]
+        return bool(enabled)
+
+    def get_telegram_topic_binding(
+        self,
+        *,
+        chat_id: str,
+        thread_id: str,
+    ) -> Optional[Dict[str, Any]]:
+        """Return the session binding for a Telegram DM topic, if present."""
+        with self._lock:
+            try:
+                row = self._conn.execute(
+                    """
+                    SELECT * FROM telegram_dm_topic_bindings
+                    WHERE chat_id = ? AND thread_id = ?
+                    """,
+                    (str(chat_id), str(thread_id)),
+                ).fetchone()
+            except sqlite3.OperationalError:
+                return None
+        return dict(row) if row else None
+
+    def bind_telegram_topic(
+        self,
+        *,
+        chat_id: str,
+        thread_id: str,
+        user_id: str,
+        session_key: str,
+        session_id: str,
+        managed_mode: str = "auto",
+    ) -> None:
+        """Bind one Telegram DM topic thread to one Hermes session.
+
+        A Hermes session may only be linked to one Telegram topic in MVP.
+        Rebinding the same topic to the same session is idempotent; trying to
+        link the same session to a different topic raises ValueError.
+        """
+        self.apply_telegram_topic_migration()
+        now = time.time()
+        chat_id = str(chat_id)
+        thread_id = str(thread_id)
+        user_id = str(user_id)
+        session_key = str(session_key)
+        session_id = str(session_id)
+
+        def _do(conn):
+            existing_session = conn.execute(
+                """
+                SELECT chat_id, thread_id FROM telegram_dm_topic_bindings
+                WHERE session_id = ?
+                """,
+                (session_id,),
+            ).fetchone()
+            if existing_session is not None:
+                linked_chat = existing_session["chat_id"] if isinstance(existing_session, sqlite3.Row) else existing_session[0]
+                linked_thread = existing_session["thread_id"] if isinstance(existing_session, sqlite3.Row) else existing_session[1]
+                if str(linked_chat) != chat_id or str(linked_thread) != thread_id:
+                    raise ValueError("session is already linked to another Telegram topic")
+
+            conn.execute(
+                """
+                INSERT INTO telegram_dm_topic_bindings (
+                    chat_id, thread_id, user_id, session_key, session_id,
+                    managed_mode, linked_at, updated_at
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                ON CONFLICT(chat_id, thread_id) DO UPDATE SET
+                    user_id = excluded.user_id,
+                    session_key = excluded.session_key,
+                    session_id = excluded.session_id,
+                    managed_mode = excluded.managed_mode,
+                    updated_at = excluded.updated_at
+                """,
+                (
+                    chat_id,
+                    thread_id,
+                    user_id,
+                    session_key,
+                    session_id,
+                    managed_mode,
+                    now,
+                    now,
+                ),
+            )
+        self._execute_write(_do)
+
+    def is_telegram_session_linked_to_topic(self, *, session_id: str) -> bool:
+        """Return True if a Hermes session is already bound to any Telegram DM topic.
+
+        Read-only: does NOT trigger the telegram-topic migration. If the
+        topic-mode tables have not been created yet (i.e. nobody has run
+        ``/topic`` in this profile), the session is by definition unbound
+        and we return False.
+        """
+        with self._lock:
+            try:
+                row = self._conn.execute(
+                    """
+                    SELECT 1 FROM telegram_dm_topic_bindings
+                    WHERE session_id = ?
+                    LIMIT 1
+                    """,
+                    (str(session_id),),
+                ).fetchone()
+            except sqlite3.OperationalError:
+                return False
+        return row is not None
+
+    def list_unlinked_telegram_sessions_for_user(
+        self,
+        *,
+        chat_id: str,
+        user_id: str,
+        limit: int = 10,
+    ) -> List[Dict[str, Any]]:
+        """List previous Telegram sessions for this user that are not bound to a topic.
+
+        Read-only: does NOT trigger the telegram-topic migration. If the
+        topic-mode tables are absent, fall back to a simpler query that
+        just returns this user's Telegram sessions — there can't be any
+        bindings yet.
+        """
+        with self._lock:
+            try:
+                rows = self._conn.execute(
+                    """
+                    SELECT s.*,
+                        COALESCE(
+                            (SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63)
+                             FROM messages m
+                             WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL
+                             ORDER BY m.timestamp, m.id LIMIT 1),
+                            ''
+                        ) AS _preview_raw,
+                        COALESCE(
+                            (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id),
+                            s.started_at
+                        ) AS last_active
+                    FROM sessions s
+                    WHERE s.source = 'telegram'
+                      AND s.user_id = ?
+                      AND NOT EXISTS (
+                          SELECT 1 FROM telegram_dm_topic_bindings b
+                          WHERE b.session_id = s.id
+                      )
+                    ORDER BY last_active DESC, s.started_at DESC
+                    LIMIT ?
+                    """,
+                    (str(user_id), int(limit)),
+                ).fetchall()
+            except sqlite3.OperationalError:
+                # telegram_dm_topic_bindings doesn't exist yet — no bindings
+                # means every telegram session for this user is "unlinked".
+                rows = self._conn.execute(
+                    """
+                    SELECT s.*,
+                        COALESCE(
+                            (SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63)
+                             FROM messages m
+                             WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL
+                             ORDER BY m.timestamp, m.id LIMIT 1),
+                            ''
+                        ) AS _preview_raw,
+                        COALESCE(
+                            (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id),
+                            s.started_at
+                        ) AS last_active
+                    FROM sessions s
+                    WHERE s.source = 'telegram'
+                      AND s.user_id = ?
+                    ORDER BY last_active DESC, s.started_at DESC
+                    LIMIT ?
+                    """,
+                    (str(user_id), int(limit)),
+                ).fetchall()
+
+        sessions: List[Dict[str, Any]] = []
+        for row in rows:
+            session = dict(row)
+            raw = str(session.pop("_preview_raw", "") or "").strip()
+            session["preview"] = raw[:60] + ("..." if len(raw) > 60 else "") if raw else ""
+            sessions.append(session)
+        return sessions
+
    # ── Space reclamation ──

    def vacuum(self) -> None:
@@ -0,0 +1,24 @@
+# Hermes-Katalog für statische Meldungen -- Deutsch
+# See locales/en.yaml for the source of truth; keep keys in sync.
+
+approval:
+  dangerous_header: "⚠️  GEFÄHRLICHER BEFEHL: {description}"
+  choose_long:     "      [o]einmal  |  [s]sitzung  |  [a]immer  |  [d]ablehnen"
+  choose_short:    "      [o]einmal  |  [s]sitzung  |  [d]ablehnen"
+  prompt_long:     "      Auswahl [o/s/a/D]: "
+  prompt_short:    "      Auswahl [o/s/D]: "
+  timeout:         "      ⏱ Zeitüberschreitung – Befehl wird abgelehnt"
+  allowed_once:    "      ✓ Einmalig erlaubt"
+  allowed_session: "      ✓ Für diese Sitzung erlaubt"
+  allowed_always:  "      ✓ Zur dauerhaften Erlaubnisliste hinzugefügt"
+  denied:          "      ✗ Abgelehnt"
+  cancelled:       "      ✗ Abgebrochen"
+  blocklist_message: "Dieser Befehl steht auf der unbedingten Sperrliste und kann nicht genehmigt werden."
+
+gateway:
+  approval_expired: "⚠️ Genehmigung abgelaufen (Agent wartet nicht mehr). Bitten Sie den Agenten, es erneut zu versuchen."
+  draining:         "⏳ Warte auf {count} aktive(n) Agent(en) vor dem Neustart..."
+  goal_cleared:     "✓ Ziel gelöscht."
+  no_active_goal:   "Kein aktives Ziel."
+  config_read_failed: "⚠️ config.yaml konnte nicht gelesen werden: {error}"
+  config_save_failed: "⚠️ Konfiguration konnte nicht gespeichert werden: {error}"
@@ -0,0 +1,35 @@
+# Hermes static-message catalog -- English (baseline / source of truth)
+#
+# Only user-facing static messages from the CLI approval prompt and a handful
+# of gateway slash-command replies live here.  Agent-generated output, log
+# lines, error tracebacks, tool outputs, and slash-command descriptions stay
+# in English and are NOT translated -- see agent/i18n.py for scope rationale.
+#
+# Keys are dotted paths; nesting below is purely for readability.  Values may
+# contain {placeholder} tokens for str.format substitution.  When adding a
+# new key, add it to EVERY locale file (en/zh/ja/de/es/fr/tr/uk) in the same commit --
+# tests/agent/test_i18n.py asserts catalog parity.
+
+approval:
+  # CLI approval prompt -- shown when a dangerous command needs user review.
+  dangerous_header: "⚠️  DANGEROUS COMMAND: {description}"
+  choose_long:     "      [o]nce  |  [s]ession  |  [a]lways  |  [d]eny"
+  choose_short:    "      [o]nce  |  [s]ession  |  [d]eny"
+  prompt_long:     "      Choice [o/s/a/D]: "
+  prompt_short:    "      Choice [o/s/D]: "
+  timeout:         "      ⏱ Timeout - denying command"
+  allowed_once:    "      ✓ Allowed once"
+  allowed_session: "      ✓ Allowed for this session"
+  allowed_always:  "      ✓ Added to permanent allowlist"
+  denied:          "      ✗ Denied"
+  cancelled:       "      ✗ Cancelled"
+  blocklist_message: "This command is on the unconditional blocklist and cannot be approved."
+
+gateway:
+  # Messenger replies to slash commands and implicit state changes.
+  approval_expired: "⚠️ Approval expired (agent is no longer waiting). Ask the agent to try again."
+  draining:         "⏳ Draining {count} active agent(s) before restart..."
+  goal_cleared:     "✓ Goal cleared."
+  no_active_goal:   "No active goal."
+  config_read_failed: "⚠️ Could not read config.yaml: {error}"
+  config_save_failed: "⚠️ Could not save config: {error}"
@@ -0,0 +1,24 @@
+# Catálogo de mensajes estáticos de Hermes -- Español
+# See locales/en.yaml for the source of truth; keep keys in sync.
+
+approval:
+  dangerous_header: "⚠️  COMANDO PELIGROSO: {description}"
+  choose_long:     "      [o]una vez  |  [s]sesión  |  [a]siempre  |  [d]denegar"
+  choose_short:    "      [o]una vez  |  [s]sesión  |  [d]denegar"
+  prompt_long:     "      Opción [o/s/a/D]: "
+  prompt_short:    "      Opción [o/s/D]: "
+  timeout:         "      ⏱ Tiempo agotado — comando denegado"
+  allowed_once:    "      ✓ Permitido una vez"
+  allowed_session: "      ✓ Permitido en esta sesión"
+  allowed_always:  "      ✓ Añadido a la lista de permitidos permanente"
+  denied:          "      ✗ Denegado"
+  cancelled:       "      ✗ Cancelado"
+  blocklist_message: "Este comando está en la lista de bloqueo incondicional y no se puede aprobar."
+
+gateway:
+  approval_expired: "⚠️ La aprobación ha caducado (el agente ya no está esperando). Pida al agente que lo intente de nuevo."
+  draining:         "⏳ Esperando a que terminen {count} agente(s) activo(s) antes de reiniciar..."
+  goal_cleared:     "✓ Objetivo eliminado."
+  no_active_goal:   "No hay objetivo activo."
+  config_read_failed: "⚠️ No se pudo leer config.yaml: {error}"
+  config_save_failed: "⚠️ No se pudo guardar la configuración: {error}"
@@ -0,0 +1,24 @@
+# Hermes static-message catalog -- French (français)
+# See locales/en.yaml for the source of truth; keep keys in sync.
+
+approval:
+  dangerous_header: "⚠️  COMMANDE DANGEREUSE : {description}"
+  choose_long:     "      [o]ne fois  |  [s]ession  |  [t]oujours  |  [r]efuser"
+  choose_short:    "      [o]ne fois  |  [s]ession  |  [r]efuser"
+  prompt_long:     "      Choix [o/s/t/R] : "
+  prompt_short:    "      Choix [o/s/R] : "
+  timeout:         "      ⏱ Délai dépassé — commande refusée"
+  allowed_once:    "      ✓ Autorisé une fois"
+  allowed_session: "      ✓ Autorisé pour cette session"
+  allowed_always:  "      ✓ Ajouté à la liste d'autorisation permanente"
+  denied:          "      ✗ Refusé"
+  cancelled:       "      ✗ Annulé"
+  blocklist_message: "Cette commande est sur la liste de blocage inconditionnel et ne peut pas être approuvée."
+
+gateway:
+  approval_expired: "⚠️ Approbation expirée (l'agent n'attend plus). Demandez à l'agent de réessayer."
+  draining:         "⏳ Vidage de {count} agent(s) actif(s) avant redémarrage..."
+  goal_cleared:     "✓ Objectif effacé."
+  no_active_goal:   "Aucun objectif actif."
+  config_read_failed: "⚠️ Impossible de lire config.yaml : {error}"
+  config_save_failed: "⚠️ Impossible de sauvegarder la configuration : {error}"
@@ -0,0 +1,24 @@
+# Hermes 静的メッセージカタログ -- 日本語
+# See locales/en.yaml for the source of truth; keep keys in sync.
+
+approval:
+  dangerous_header: "⚠️  危険なコマンド: {description}"
+  choose_long:     "      [o]今回のみ  |  [s]セッション中  |  [a]常に許可  |  [d]拒否"
+  choose_short:    "      [o]今回のみ  |  [s]セッション中  |  [d]拒否"
+  prompt_long:     "      選択 [o/s/a/D]: "
+  prompt_short:    "      選択 [o/s/D]: "
+  timeout:         "      ⏱ タイムアウト — コマンドを拒否しました"
+  allowed_once:    "      ✓ 今回のみ許可"
+  allowed_session: "      ✓ このセッション中は許可"
+  allowed_always:  "      ✓ 永続的な許可リストに追加"
+  denied:          "      ✗ 拒否しました"
+  cancelled:       "      ✗ キャンセルしました"
+  blocklist_message: "このコマンドは無条件ブロックリストに含まれており、承認できません。"
+
+gateway:
+  approval_expired: "⚠️ 承認の有効期限が切れました（エージェントはもう待機していません）。エージェントに再試行を依頼してください。"
+  draining:         "⏳ 再起動前に {count} 個のアクティブエージェントの終了を待っています..."
+  goal_cleared:     "✓ 目標をクリアしました。"
+  no_active_goal:   "アクティブな目標はありません。"
+  config_read_failed: "⚠️ config.yaml を読み込めませんでした: {error}"
+  config_save_failed: "⚠️ 設定を保存できませんでした: {error}"
@@ -0,0 +1,24 @@
+# Hermes statik mesaj katalogu -- Turkce
+# See locales/en.yaml for the source of truth; keep keys in sync.
+
+approval:
+  dangerous_header: "⚠️  TEHLİKELİ KOMUT: {description}"
+  choose_long:     "      [b]ir kez  |  [o]turum  |  [h]er zaman  |  [r]eddet"
+  choose_short:    "      [b]ir kez  |  [o]turum  |  [r]eddet"
+  prompt_long:     "      Seçim [b/o/h/R]: "
+  prompt_short:    "      Seçim [b/o/R]: "
+  timeout:         "      ⏱ Zaman aşımı — komut reddedildi"
+  allowed_once:    "      ✓ Bir kez izin verildi"
+  allowed_session: "      ✓ Bu oturum için izin verildi"
+  allowed_always:  "      ✓ Kalıcı izin listesine eklendi"
+  denied:          "      ✗ Reddedildi"
+  cancelled:       "      ✗ İptal edildi"
+  blocklist_message: "Bu komut koşulsuz engelleme listesinde ve onaylanamaz."
+
+gateway:
+  approval_expired: "⚠️ Onay süresi doldu (ajan artık beklemiyor). Ajanın tekrar denemesini isteyin."
+  draining:         "⏳ Yeniden başlatmadan önce {count} aktif ajan bekleniyor..."
+  goal_cleared:     "✓ Hedef temizlendi."
+  no_active_goal:   "Aktif hedef yok."
+  config_read_failed: "⚠️ config.yaml okunamadı: {error}"
+  config_save_failed: "⚠️ Yapılandırma kaydedilemedi: {error}"
@@ -0,0 +1,24 @@
+# Каталог статичних повідомлень Hermes -- Українська
+# See locales/en.yaml for the source of truth; keep keys in sync.
+
+approval:
+  dangerous_header: "⚠️  НЕБЕЗПЕЧНА КОМАНДА: {description}"
+  choose_long:     "      [o]один раз  |  [s]сеанс  |  [a]завжди  |  [d]відхилити"
+  choose_short:    "      [o]один раз  |  [s]сеанс  |  [d]відхилити"
+  prompt_long:     "      Вибір [o/s/a/D]: "
+  prompt_short:    "      Вибір [o/s/D]: "
+  timeout:         "      ⏱ Час очікування вичерпано — команду відхилено"
+  allowed_once:    "      ✓ Дозволено один раз"
+  allowed_session: "      ✓ Дозволено для цього сеансу"
+  allowed_always:  "      ✓ Додано до постійного списку дозволених команд"
+  denied:          "      ✗ Відхилено"
+  cancelled:       "      ✗ Скасовано"
+  blocklist_message: "Ця команда є в безумовному списку блокування, її не можна схвалити."
+
+gateway:
+  approval_expired: "⚠️ Час схвалення минув (агент більше не очікує). Попросіть агента спробувати ще раз."
+  draining:         "⏳ Очікування завершення {count} активних агент(ів) перед перезапуском..."
+  goal_cleared:     "✓ Ціль очищено."
+  no_active_goal:   "Немає активної цілі."
+  config_read_failed: "⚠️ Не вдалося прочитати config.yaml: {error}"
+  config_save_failed: "⚠️ Не вдалося зберегти конфігурацію: {error}"
@@ -0,0 +1,24 @@
+# Hermes 静态消息目录 -- 中文（简体）
+# See locales/en.yaml for the source of truth; keep keys in sync.
+
+approval:
+  dangerous_header: "⚠️  危险命令： {description}"
+  choose_long:     "      [o]仅此一次  |  [s]本次会话  |  [a]永久允许  |  [d]拒绝"
+  choose_short:    "      [o]仅此一次  |  [s]本次会话  |  [d]拒绝"
+  prompt_long:     "      选择 [o/s/a/D]: "
+  prompt_short:    "      选择 [o/s/D]: "
+  timeout:         "      ⏱ 超时 — 已拒绝命令"
+  allowed_once:    "      ✓ 本次允许"
+  allowed_session: "      ✓ 本次会话内允许"
+  allowed_always:  "      ✓ 已加入永久允许列表"
+  denied:          "      ✗ 已拒绝"
+  cancelled:       "      ✗ 已取消"
+  blocklist_message: "此命令位于无条件拦截列表中，无法被批准。"
+
+gateway:
+  approval_expired: "⚠️ 批准已过期（代理不再等待）。请让代理重试。"
+  draining:         "⏳ 正在等待 {count} 个活跃代理结束后重启..."
+  goal_cleared:     "✓ 目标已清除。"
+  no_active_goal:   "当前没有活跃的目标。"
+  config_read_failed: "⚠️ 无法读取 config.yaml：{error}"
+  config_save_failed: "⚠️ 无法保存配置：{error}"
@@ -511,6 +511,12 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:

    Handles ``"type": "integer"``, ``"type": "number"``, ``"type": "boolean"``,
    and union types (``"type": ["integer", "string"]``).
+
+    Also wraps bare scalar values in a single-element list when the schema
+    declares ``"type": "array"``.  Open-weight models (DeepSeek, Qwen, GLM)
+    sometimes emit ``{"urls": "https://a.com"}`` when the tool expects
+    ``{"urls": ["https://a.com"]}``; wrapping here avoids a confusing tool
+    failure on what is otherwise a well-formed call.
    """
    if not args or not isinstance(args, dict):
        return args
@@ -523,13 +529,42 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
    if not properties:
        return args

-    for key, value in args.items():
-        if not isinstance(value, str):
-            continue
+    for key, value in list(args.items()):
        prop_schema = properties.get(key)
        if not prop_schema:
            continue
        expected = prop_schema.get("type")
+
+        # Wrap bare non-list values when the schema declares ``array``.
+        # Strings still go through _coerce_value first so JSON-encoded
+        # arrays (``'["a","b"]'``) get parsed and nullable ``"null"``
+        # becomes ``None`` rather than ``["null"]``.
+        # ``None`` itself is preserved — we don't know whether the model
+        # meant "omit" or "empty list", and tools with sensible defaults
+        # (e.g. read_file's normalize_read_pagination) already handle it.
+        if expected == "array" and value is not None and not isinstance(value, (list, tuple)):
+            if isinstance(value, str):
+                coerced = _coerce_value(value, expected, schema=prop_schema)
+                if coerced is not value:
+                    # _coerce_value handled it (JSON-parsed list or
+                    # nullable "null" → None).
+                    args[key] = coerced
+                    continue
+                args[key] = [value]
+                logger.info(
+                    "coerce_tool_args: wrapped bare string in list for %s.%s",
+                    tool_name, key,
+                )
+                continue
+            args[key] = [value]
+            logger.info(
+                "coerce_tool_args: wrapped bare %s in list for %s.%s",
+                type(value).__name__, tool_name, key,
+            )
+            continue
+
+        if not isinstance(value, str):
+            continue
        if not expected and not _schema_allows_null(prop_schema):
            continue
        coerced = _coerce_value(value, expected, schema=prop_schema)
@@ -163,35 +163,42 @@
      for entry in "''${ENTRIES[@]}"; do
        IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
        echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
-        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
-        STATUS=$?
-        if [ "$STATUS" -eq 0 ]; then
+
+        # Compute the actual hash from the lockfile directly using
+        # prefetch-npm-deps. This avoids false "ok" from nix build when
+        # an old derivation is cached in a substituter (cachix/cache.nixos.org).
+        LOCK_FILE="$FOLDER/package-lock.json"
+        NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "$LOCK_FILE" 2>/dev/null)
+        if [ -z "$NEW_HASH" ]; then
+          echo "    prefetch-npm-deps failed, falling back to nix build" >&2
+          OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
+          STATUS=$?
+          if [ "$STATUS" -eq 0 ]; then
+            echo "    ok (via nix build)"
+            continue
+          fi
+          NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
+          if [ -z "$NEW_HASH" ]; then
+            if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
+              echo "    skipped (transient cache failure — see primary nix build for real status)" >&2
+              echo "$OUTPUT" | tail -8 >&2
+              continue
+            fi
+            echo "    build failed with no hash mismatch:" >&2
+            echo "$OUTPUT" | tail -40 >&2
+            exit 1
+          fi
+        fi
+
+        OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
+          | sed -E 's/hash = "(.*)"/\1/')
+
+        if [ "$NEW_HASH" = "$OLD_HASH" ]; then
          echo "    ok"
          continue
        fi

-        NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
-        if [ -z "$NEW_HASH" ]; then
-          # Magic-Nix-Cache occasionally returns HTTP 418 / cache-throttled
-          # mid-run; nix then prints "outputs … not valid, so checking is
-          # not possible" without a `got:` line.  That's an infrastructure
-          # blip, not a stale lockfile — warn + skip rather than failing
-          # the lint.  A real hash mismatch would still surface in the
-          # primary `.#$ATTR` build, which is a separate CI job.
-          if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
-            echo "    skipped (transient cache failure — see primary nix build for real status)" >&2
-            echo "$OUTPUT" | tail -8 >&2
-            continue
-          fi
-          echo "    build failed with no hash mismatch:" >&2
-          echo "$OUTPUT" | tail -40 >&2
-          exit 1
-        fi
-
        HASH_LINE=$(grep -n 'hash = "sha256-' "$NIX_FILE" | head -1 | cut -d: -f1)
-        OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
-          | sed -E 's/hash = "(.*)"/\1/')
-        LOCK_FILE="$FOLDER/package-lock.json"
        echo "    stale: $NIX_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
        STALE=1

@@ -4,7 +4,7 @@ let
  src = ../ui-tui;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-a/HGI9OgVcTnZrMXA7xFMGnFoVxyHe95fulVz+WNYB0=";
+    hash = "sha256-MLcLhjTF6dgdvNBtJWzo8Nh19eNh/ZitD2b07nm61Tc=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
@@ -0,0 +1,190 @@
+---
+name: hyperframes
+description: Create HTML-based video compositions, animated title cards, social overlays, captioned talking-head videos, audio-reactive visuals, and shader transitions using HyperFrames. HTML is the source of truth for video. Use when the user wants a rendered MP4/WebM from an HTML composition, wants to animate text/logos/charts over media, needs captions synced to audio, wants TTS narration, or wants to convert a website into a video.
+version: 1.0.0
+author: heygen-com
+license: Apache-2.0
+prerequisites:
+  commands: [node, ffmpeg, npx]
+metadata:
+  hermes:
+    tags: [creative, video, animation, html, gsap, motion-graphics]
+    related_skills: [manim-video, meme-generation]
+    category: creative
+    requires_toolsets: [terminal]
+---
+
+# HyperFrames
+
+HTML is the source of truth for video. A composition is an HTML file with `data-*` attributes for timing, a GSAP timeline for animation, and CSS for appearance. The HyperFrames engine captures the page frame-by-frame and encodes to MP4/WebM with FFmpeg.
+
+**Complement to `manim-video`:** Use `manim-video` for mathematical/geometric explainers (equations, 3B1B-style). Use `hyperframes` for motion-graphics, talking-head with captions, product tours, social overlays, shader transitions, and anything driven by real video/audio media.
+
+## When to Use
+
+- User asks for a rendered video from text, a script, or a website
+- Animated title cards, lower thirds, or typographic intros
+- Captioned narration video (TTS + captions synced to waveform)
+- Audio-reactive visuals (beat sync, spectrum bars, pulsing glow)
+- Scene-to-scene transitions (crossfade, wipe, shader warp, flash-through-white)
+- Social overlays (Instagram/TikTok/YouTube style)
+- Website-to-video pipeline (capture a URL, produce a promo)
+- Any HTML/CSS/JS animation that must render deterministically to a video file
+
+Do **not** use this skill for:
+- Pure math/equation animation (→ `manim-video`)
+- Image generation or memes (→ `meme-generation`, image models)
+- Live video conferencing or streaming
+
+## Quick Reference
+
+```bash
+npx hyperframes init my-video               # scaffold a project
+cd my-video
+npx hyperframes lint                        # validate before preview/render
+npx hyperframes preview                     # live-reload browser preview (port 3002)
+npx hyperframes render --output final.mp4   # render to MP4
+npx hyperframes doctor                      # diagnose environment issues
+```
+
+Render flags: `--quality draft|standard|high` · `--fps 24|30|60` · `--format mp4|webm` · `--docker` (reproducible) · `--strict`.
+
+Full CLI reference: [references/cli.md](references/cli.md).
+
+## Setup (one-time)
+
+```bash
+bash "$(dirname "$(find ~/.hermes/skills -path '*/hyperframes/SKILL.md' 2>/dev/null | head -1)")/scripts/setup.sh"
+```
+
+The script:
+1. Verifies Node.js >= 22 and FFmpeg are installed (prints fix instructions if not).
+2. Installs the `hyperframes` CLI globally (`npm install -g hyperframes@>=0.4.2`).
+3. Pre-caches `chrome-headless-shell` via Puppeteer — **required** for best-quality rendering via Chrome's `HeadlessExperimental.beginFrame` capture path.
+4. Runs `npx hyperframes doctor` and reports the result.
+
+See [references/troubleshooting.md](references/troubleshooting.md) if setup fails.
+
+## Procedure
+
+### 1. Plan before writing HTML
+
+Before touching code, articulate at a high level:
+- **What** — narrative arc, key moments, emotional beats
+- **Structure** — compositions, tracks (video/audio/overlays), durations
+- **Visual identity** — colors, fonts, motion character (explosive / cinematic / fluid / technical)
+- **Hero frame** — for each scene, the moment when the most elements are simultaneously visible. This is the static layout you'll build first.
+
+**Visual Identity Gate (HARD-GATE).** Before writing ANY composition HTML, a visual identity must be defined. Do NOT write compositions with default or generic colors (`#333`, `#3b82f6`, `Roboto` are tells that this step was skipped). Check in order:
+
+1. **`DESIGN.md` at project root?** → Use its exact colors, fonts, motion rules, and "What NOT to Do" constraints.
+2. **User named a style** (e.g. "Swiss Pulse", "dark and techy", "luxury brand")? → Generate a minimal `DESIGN.md` with `## Style Prompt`, `## Colors` (3-5 hex with roles), `## Typography` (1-2 families), `## What NOT to Do` (3-5 anti-patterns).
+3. **None of the above?** → Ask 3 questions before writing any HTML:
+   - Mood? (explosive / cinematic / fluid / technical / chaotic / warm)
+   - Light or dark canvas?
+   - Any brand colors, fonts, or visual references?
+
+   Then generate a `DESIGN.md` from the answers. Every composition must trace its palette and typography back to `DESIGN.md` or explicit user direction.
+
+### 2. Scaffold
+
+```bash
+npx hyperframes init my-video --non-interactive
+```
+
+Templates: `blank`, `warm-grain`, `play-mode`, `swiss-grid`, `vignelli`, `decision-tree`, `kinetic-type`, `product-promo`, `nyt-graph`. Pass `--example <name>` to pick one, `--video clip.mp4` or `--audio track.mp3` to seed with media.
+
+### 3. Layout before animation
+
+Write the static HTML+CSS for the **hero frame first** — no GSAP yet. The `.scene-content` container must fill the scene (`width:100%; height:100%; padding:Npx`) with `display:flex` + `gap`. Use padding to push content inward — never `position: absolute; top: Npx` on a content container (content overflows when taller than the remaining space).
+
+Only after the hero frame looks right, add `gsap.from()` entrances (animate **to** the CSS position) and `gsap.to()` exits (animate **from** it).
+
+See [references/composition.md](references/composition.md) for the full data-attribute schema and composition rules.
+
+### 4. Animate with GSAP
+
+Every composition must:
+- Register its timeline: `window.__timelines["<composition-id>"] = tl`
+- Start paused: `gsap.timeline({ paused: true })` — the player controls playback
+- Use finite `repeat` values (no `repeat: -1` — breaks the capture engine). Calculate: `repeat: Math.ceil(duration / cycleDuration) - 1`.
+- Be deterministic — no `Math.random()`, `Date.now()`, or wall-clock logic. Use a seeded PRNG if you need pseudo-randomness.
+- Build synchronously — no `async`/`await`, `setTimeout`, or Promises around timeline construction.
+
+See [references/gsap.md](references/gsap.md) for the core GSAP API (tweens, eases, stagger, timelines).
+
+### 5. Transitions between scenes
+
+Multi-scene compositions require transitions. Rules:
+1. **Always use a transition between scenes** — no jump cuts.
+2. **Always use entrance animations** on every scene element (`gsap.from(...)`).
+3. **Never use exit animations** except on the final scene — the transition IS the exit.
+4. The final scene may fade out.
+
+Use `npx hyperframes add <transition-name>` to install shader transitions (`flash-through-white`, `liquid-wipe`, etc.). Full list: `npx hyperframes add --list`.
+
+### 6. Audio, captions, TTS, audio-reactive, highlighting
+
+- **Audio:** always a separate `<audio>` element (video is `muted playsinline`).
+- **TTS:** `npx hyperframes tts "Script text" --voice af_nova --output narration.wav`. List voices with `--list`. Voice ID first letter encodes language (`a`/`b`=English, `e`=Spanish, `f`=French, `j`=Japanese, `z`=Mandarin, etc.) — the CLI auto-infers the phonemizer locale; pass `--lang` only to override. Non-English phonemization requires `espeak-ng` installed system-wide.
+- **Captions:** `npx hyperframes transcribe narration.wav` → word-level transcript. Pick style from the transcript tone (hype / corporate / tutorial / storytelling / social — see the table in `references/features.md`). **Language rule:** never use `.en` whisper models unless the audio is confirmed English — `.en` translates non-English audio instead of transcribing it. Every caption group MUST have a hard `tl.set(el, { opacity: 0, visibility: "hidden" }, group.end)` kill after its exit tween — otherwise groups leak visible into later ones.
+- **Audio-reactive visuals:** pre-extract audio bands (bass / mid / treble) and sample per-frame inside the timeline with a `for` loop of `tl.call(draw, [], f / fps)` — a single long tween does NOT react to audio. Map bass → `scale` (pulse), treble → `textShadow`/`boxShadow` (glow), overall amplitude → `opacity`/`y`/`backgroundColor`. Avoid equalizer-bar clichés — let content guide the visual, audio drive its behavior.
+- **Marker-style highlighting:** highlight, circle, burst, scribble, sketchout effects for text emphasis are deterministic CSS+GSAP — see `references/features.md#marker-highlighting`. Fully seekable, no animated SVG filters.
+- **Scene transitions:** every multi-scene composition MUST use transitions (no jump cuts). Pick from CSS primitives (push slide, blur crossfade, zoom through, staggered blocks) or shader transitions (`flash-through-white`, `liquid-wipe`, `cross-warp-morph`, `chromatic-split`, etc.) via `npx hyperframes add`. Mood and energy tables live in `references/features.md#transitions`. Do not mix CSS and shader transitions in the same composition.
+
+### 7. Lint, validate, inspect, preview, render
+
+```bash
+npx hyperframes lint              # catches missing data-composition-id, overlapping tracks, unregistered timelines
+npx hyperframes validate          # WCAG contrast audit at 5 timestamps
+npx hyperframes inspect           # visual layout audit — overflow, off-frame elements, occluded text
+npx hyperframes preview           # live browser preview
+npx hyperframes render --quality draft --output draft.mp4    # fast iteration
+npx hyperframes render --quality high --output final.mp4     # final delivery
+```
+
+`hyperframes validate` samples background pixels behind every text element and warns on contrast ratios below 4.5:1 (or 3:1 for large text). `hyperframes inspect` is the layout-side companion — runs the page at multiple timestamps and flags issues that a static lint can't see (a caption that wraps past the safe area only at 4.5s, a card that overflows when its title is the longest variant, an element that ends up behind a transition shader). Run `inspect` especially on compositions with speech bubbles, cards, captions, or tight typography.
+
+### 8. Website-to-video (if the user gives a URL)
+
+Use the 7-step capture-to-video workflow in [references/website-to-video.md](references/website-to-video.md): capture → DESIGN.md → SCRIPT.md → storyboard → composition → render → deliver.
+
+## Pitfalls
+
+- **`HeadlessExperimental.beginFrame' wasn't found`** — Chromium 147+ removed this protocol. Ensure you're on `hyperframes@>=0.4.2` (auto-detects and falls back to screenshot mode). Escape hatch: `export PRODUCER_FORCE_SCREENSHOT=true`. See [hyperframes#294](https://github.com/heygen-com/hyperframes/issues/294) and [references/troubleshooting.md](references/troubleshooting.md).
+- **System Chrome (not `chrome-headless-shell`)** — renders hang for 120s then timeout. Run `npx puppeteer browsers install chrome-headless-shell` (setup.sh does this). `hyperframes doctor` reports which binary will be used.
+- **`repeat: -1` anywhere** — breaks the capture engine. Always compute a finite repeat count.
+- **`gsap.set()` on clip elements that enter later** — the element doesn't exist at page load. Use `tl.set(selector, vars, timePosition)` inside the timeline instead, at or after the clip's `data-start`.
+- **`<br>` inside content text** — forced breaks don't know the rendered font width, so natural wrap + `<br>` double-breaks. Use `max-width` to let text wrap. Exception: short display titles where each word is deliberately on its own line.
+- **Animating `visibility` or `display`** — GSAP can't tween these. Use `autoAlpha` (handles both visibility and opacity).
+- **Calling `video.play()` or `audio.play()`** — the framework owns playback. Never call these yourself.
+- **Building timelines async** — the capture engine reads `window.__timelines` synchronously after page load. Never wrap timeline construction in `async`, `setTimeout`, or a Promise.
+- **Standalone `index.html` wrapped in `<template>`** — hides all content from the browser. Only **sub-compositions** loaded via `data-composition-src` use `<template>`.
+- **Using video for audio** — always muted `<video>` + separate `<audio>`.
+
+## Verification
+
+Before and after rendering:
+
+1. **Lint + validate + inspect pass:** `npx hyperframes lint --strict && npx hyperframes validate && npx hyperframes inspect` (lint catches structural issues, validate catches contrast, inspect catches visual layout / overflow issues — see troubleshooting.md if warnings appear).
+2. **Animation choreography** — for new compositions or significant animation changes, run the animation map. `npx hyperframes init` copies the skill scripts into the project, so the path is project-local:
+   ```bash
+   node skills/hyperframes/scripts/animation-map.mjs <composition-dir> \
+     --out <composition-dir>/.hyperframes/anim-map
+   ```
+   Outputs a single `animation-map.json` with per-tween summaries, ASCII Gantt timeline, stagger detection, dead zones (>1s with no animation), element lifecycles, and flags (`offscreen`, `collision`, `invisible`, `paced-fast` <0.2s, `paced-slow` >2s). Scan summaries and flags — fix or justify each. Skip on small edits.
+3. **File exists + non-zero:** `ls -lh final.mp4`.
+4. **Duration matches `data-duration`:** `ffprobe -v error -show_entries format=duration -of default=nw=1:nk=1 final.mp4`.
+5. **Visual check:** extract a mid-composition frame: `ffmpeg -i final.mp4 -ss 00:00:05 -vframes 1 preview.png`.
+6. **Audio present if expected:** `ffprobe -v error -show_streams -select_streams a -of default=nw=1:nk=1 final.mp4 | head -1`.
+
+If `hyperframes render` fails, run `npx hyperframes doctor` and attach its output when reporting.
+
+## References
+
+- [composition.md](references/composition.md) — data attributes, timeline contract, non-negotiable rules, typography/asset rules
+- [cli.md](references/cli.md) — every CLI command (init, capture, lint, validate, inspect, preview, render, transcribe, tts, doctor, browser, info, upgrade, benchmark)
+- [gsap.md](references/gsap.md) — GSAP core API for HyperFrames (tweens, eases, stagger, timelines, matchMedia)
+- [features.md](references/features.md) — captions, TTS, audio-reactive, marker highlighting, transitions (load on demand)
+- [website-to-video.md](references/website-to-video.md) — 7-step capture-to-video workflow
+- [troubleshooting.md](references/troubleshooting.md) — OpenClaw fix, env vars, common render errors
@@ -0,0 +1,185 @@
+# HyperFrames CLI
+
+Everything runs through `npx hyperframes` (or the globally-installed `hyperframes` after `npm install -g hyperframes`). Requires Node.js >= 22 and FFmpeg.
+
+## Workflow
+
+1. **Scaffold** — `npx hyperframes init my-video` (or `npx hyperframes capture <url>` if starting from a website)
+2. **Write** — author HTML composition (see `composition.md`)
+3. **Lint** — `npx hyperframes lint`
+4. **Validate** — `npx hyperframes validate` (WCAG contrast audit)
+5. **Inspect** — `npx hyperframes inspect` (visual layout audit)
+6. **Preview** — `npx hyperframes preview`
+7. **Render** — `npx hyperframes render`
+
+Always lint before preview/render — catches missing `data-composition-id`, overlapping tracks, and unregistered timelines.
+
+## init — Scaffold a Project
+
+```bash
+npx hyperframes init my-video                        # interactive wizard
+npx hyperframes init my-video --example warm-grain   # pick an example template
+npx hyperframes init my-video --video clip.mp4       # seed with a video file
+npx hyperframes init my-video --audio track.mp3      # seed with an audio file
+npx hyperframes init my-video --non-interactive      # skip prompts (CI / agent use)
+```
+
+Templates: `blank`, `warm-grain`, `play-mode`, `swiss-grid`, `vignelli`, `decision-tree`, `kinetic-type`, `product-promo`, `nyt-graph`.
+
+`init` creates the correct file structure, copies media, transcribes audio with Whisper, and installs authoring skills. Use it instead of creating files by hand.
+
+## capture — Website → Editable Components
+
+```bash
+npx hyperframes capture https://example.com                  # → captures/example.com/
+npx hyperframes capture https://stripe.com -o stripe-video   # custom output dir
+npx hyperframes capture https://example.com --json           # machine-readable output
+npx hyperframes capture https://example.com --skip-assets    # skip images/SVGs
+```
+
+Captures the site into `captures/<hostname>/capture/` by default, producing `capture/screenshots/`, `capture/assets/`, `capture/extracted/` (tokens.json, visible-text.txt, fonts.json), and a self-contained snapshot.
+
+All downstream steps (DESIGN.md, SCRIPT.md, STORYBOARD, composition) read from the `capture/` subfolder — see `website-to-video.md`.
+
+## lint
+
+```bash
+npx hyperframes lint                # current directory
+npx hyperframes lint ./my-project   # specific project
+npx hyperframes lint --verbose      # include info-level findings
+npx hyperframes lint --json         # machine-readable output
+```
+
+Lints `index.html` and all files in `compositions/`. Reports errors (must fix), warnings (should fix), and info (only with `--verbose`).
+
+## validate
+
+```bash
+npx hyperframes validate                 # WCAG contrast audit at 5 timestamps
+npx hyperframes validate --no-contrast   # skip while iterating
+```
+
+Seeks to 5 timestamps, screenshots the page, samples background pixels behind every text element, and warns on contrast ratios below 4.5:1 (normal text) or 3:1 (large text — 24px+, or 19px+ bold). Run before final render.
+
+## inspect
+
+```bash
+npx hyperframes inspect                 # visual layout audit at 5 timestamps
+npx hyperframes inspect ./my-project    # specific project
+npx hyperframes inspect --json          # agent-readable findings
+npx hyperframes inspect --samples 15    # denser timeline sweep
+npx hyperframes inspect --at 1.5,4,7.25 # explicit hero-frame timestamps
+```
+
+Use this after `lint` and `validate`, especially for compositions with speech bubbles, cards, captions, or tight typography. Reports overflow, off-frame elements, occluded text, contrast warnings, and per-timestamp layout summaries — catches issues that pure timeline lint can't see (e.g., a caption that wraps past the safe area only at a specific timestamp).
+
+`npx hyperframes layout` is a compatibility alias for the same visual inspection pass.
+
+## preview
+
+```bash
+npx hyperframes preview                # serve current directory (port 3002)
+npx hyperframes preview --port 4567    # custom port
+```
+
+Hot-reloads on file changes. Opens the Studio in your browser automatically.
+
+## render
+
+```bash
+npx hyperframes render                              # standard MP4
+npx hyperframes render --output final.mp4           # named output
+npx hyperframes render --quality draft              # fast iteration
+npx hyperframes render --fps 60 --quality high      # final delivery
+npx hyperframes render --format webm                # transparent WebM
+npx hyperframes render --docker                     # byte-identical reproducible render
+```
+
+| Flag           | Options                 | Default                        | Notes                       |
+| -------------- | ----------------------- | ------------------------------ | --------------------------- |
+| `--output`     | path                    | `renders/<name>_<timestamp>.mp4` | Output path                 |
+| `--fps`        | 24, 30, 60              | 30                             | 60fps doubles render time   |
+| `--quality`    | `draft`, `standard`, `high` | standard                   | draft for iterating         |
+| `--format`     | `mp4`, `webm`           | mp4                            | WebM supports transparency  |
+| `--workers`    | 1–8 or `auto`           | auto                           | Each spawns Chrome          |
+| `--docker`     | flag                    | off                            | Reproducible output         |
+| `--gpu`        | flag                    | off                            | GPU-accelerated encoding    |
+| `--strict`     | flag                    | off                            | Fail on lint errors         |
+| `--strict-all` | flag                    | off                            | Fail on errors AND warnings |
+
+**Quality guidance:** `draft` while iterating, `standard` for review, `high` for final delivery.
+
+## transcribe
+
+```bash
+npx hyperframes transcribe audio.mp3
+npx hyperframes transcribe video.mp4 --model medium.en --language en
+npx hyperframes transcribe subtitles.srt     # import existing
+npx hyperframes transcribe subtitles.vtt
+npx hyperframes transcribe openai-response.json
+```
+
+Produces word-level timings suitable for caption components. First run downloads the Whisper model (cached after).
+
+## tts
+
+```bash
+npx hyperframes tts "Text here" --voice af_nova --output narration.wav
+npx hyperframes tts script.txt --voice bf_emma
+npx hyperframes tts "La reunión empieza a las nueve" --voice ef_dora --output es.wav
+npx hyperframes tts "Hello there" --voice af_heart --lang fr-fr --output accented.wav
+npx hyperframes tts --list                    # show all voices
+```
+
+Uses Kokoro (local, no API key). Voice ID first letter encodes language: `a` American English, `b` British English, `e` Spanish, `f` French, `h` Hindi, `i` Italian, `j` Japanese, `p` Brazilian Portuguese, `z` Mandarin. The CLI auto-infers the phonemizer locale from that prefix — pass `--lang` only to override (e.g. stylized accents). Valid `--lang` codes: `en-us`, `en-gb`, `es`, `fr-fr`, `hi`, `it`, `pt-br`, `ja`, `zh`. Non-English phonemization requires `espeak-ng` installed system-wide (`apt-get install espeak-ng` / `brew install espeak-ng`).
+
+## doctor
+
+```bash
+npx hyperframes doctor
+```
+
+Verifies environment:
+- Node.js >= 22
+- FFmpeg present on PATH
+- Available RAM (renders are memory-hungry — 4 GB minimum)
+- Chrome binary resolution (`chrome-headless-shell` preferred over system Chrome)
+- Current `hyperframes` version
+
+Run this **first** when a render fails. See `troubleshooting.md` for interpreting the output.
+
+## browser
+
+```bash
+npx hyperframes browser --install      # install the bundled chrome-headless-shell
+npx hyperframes browser --path         # print the resolved browser binary path
+npx hyperframes browser --clean        # clear the bundled browser cache
+```
+
+## info
+
+```bash
+npx hyperframes info
+```
+
+Prints version, Node version, FFmpeg version, OS, and resolved browser path — useful in bug reports.
+
+## upgrade
+
+```bash
+npx hyperframes upgrade -y
+```
+
+Check for and install updates. Run this if you hit `HeadlessExperimental.beginFrame` errors — the auto-detect fix shipped in `hyperframes@0.4.2` (commit 4c72ba4, March 2026).
+
+## Other
+
+```bash
+npx hyperframes compositions    # list compositions in the project
+npx hyperframes docs            # open documentation in browser
+npx hyperframes benchmark .     # benchmark render performance
+npx hyperframes add <block>     # install a block/component from the catalog
+npx hyperframes add --list      # browse the catalog
+```
+
+Popular catalog blocks: `flash-through-white` (shader transition), `instagram-follow` (social overlay), `data-chart` (animated chart), `lower-third` (talking-head overlay). See [hyperframes.heygen.com/catalog](https://hyperframes.heygen.com/catalog).
@@ -0,0 +1,129 @@
+# Composition Authoring
+
+HTML structure, data attributes, timeline contract, and non-negotiable rules.
+
+## Root Structure
+
+Standalone `index.html` — the top-level composition. **Does NOT use `<template>`**. Put the `data-composition-id` div directly in `<body>`.
+
+```html
+<!doctype html>
+<html>
+  <body>
+    <div
+      id="stage"
+      data-composition-id="root"
+      data-start="0"
+      data-duration="10"
+      data-width="1920"
+      data-height="1080"
+    >
+      <!-- clips go here -->
+      <video id="clip-1" data-start="0" data-duration="5" data-track-index="0" src="intro.mp4" muted playsinline></video>
+      <img id="logo" data-start="2" data-duration="3" data-track-index="1" src="logo.png" />
+      <audio id="music" data-start="0" data-duration="10" data-track-index="2" data-volume="0.5" src="music.wav"></audio>
+    </div>
+
+    <script src="https://cdn.jsdelivr.net/npm/gsap@3.14.2/dist/gsap.min.js"></script>
+    <script>
+      window.__timelines = window.__timelines || {};
+      const tl = gsap.timeline({ paused: true });
+      tl.from("#logo", { opacity: 0, y: 40, duration: 0.6 }, 2);
+      window.__timelines["root"] = tl;
+    </script>
+  </body>
+</html>
+```
+
+Sub-compositions loaded via `data-composition-src` **DO** use `<template>`:
+
+```html
+<template id="my-comp-template">
+  <div data-composition-id="my-comp" data-width="1920" data-height="1080">
+    <!-- content + scoped <style> + <script> with window.__timelines["my-comp"] -->
+  </div>
+</template>
+```
+
+Load from the root: `<div id="el-1" data-composition-id="my-comp" data-composition-src="compositions/my-comp.html" data-start="0" data-duration="10" data-track-index="1"></div>`
+
+## Data Attributes
+
+### All clips
+
+| Attribute          | Required                          | Values                                                 |
+| ------------------ | --------------------------------- | ------------------------------------------------------ |
+| `id`               | Yes                               | Unique identifier                                      |
+| `data-start`       | Yes                               | Seconds, or clip ID reference (`"el-1"`, `"intro + 2"`) |
+| `data-duration`    | Required for img/div/compositions | Seconds. Video/audio defaults to media duration.       |
+| `data-track-index` | Yes                               | Integer. Same-track clips cannot overlap.              |
+| `data-media-start` | No                                | Trim offset into source (seconds)                      |
+| `data-volume`      | No                                | 0–1 (default 1)                                        |
+
+`data-track-index` controls timeline layout only — **not** visual layering. Use CSS `z-index` for layering.
+
+### Composition clips
+
+| Attribute                    | Required | Values                                       |
+| ---------------------------- | -------- | -------------------------------------------- |
+| `data-composition-id`        | Yes      | Unique composition ID                        |
+| `data-start`                 | Yes      | Start time (root composition: `"0"`)         |
+| `data-duration`              | Yes      | Takes precedence over GSAP timeline duration |
+| `data-width` / `data-height` | Yes      | Pixel dimensions (1920x1080 or 1080x1920)    |
+| `data-composition-src`       | No       | Path to external HTML file                   |
+
+## Timeline Contract
+
+- Every timeline starts `{ paused: true }` — the player controls playback.
+- Register every timeline: `window.__timelines["<composition-id>"] = tl`.
+- Duration comes from `data-duration`, not from the GSAP timeline length.
+- Framework auto-nests sub-timelines — do NOT manually add them.
+- Never create empty tweens just to set duration.
+
+## Non-Negotiable Rules
+
+1. **Deterministic.** No `Math.random()`, `Date.now()`, or time-based logic. Use a seeded PRNG (e.g. mulberry32) if you need pseudo-randomness.
+2. **GSAP only on visual properties.** `opacity`, `x`, `y`, `scale`, `rotation`, `color`, `backgroundColor`, `borderRadius`, transforms. Never animate `visibility`, `display`, or call `video.play()`/`audio.play()`.
+3. **No property conflicts across timelines.** Never animate the same property on the same element from multiple timelines simultaneously.
+4. **No `repeat: -1`.** Infinite-repeat tweens break the capture engine. Compute `repeat: Math.ceil(duration / cycleDuration) - 1`.
+5. **Synchronous timeline construction.** Never build timelines inside `async`/`await`, `setTimeout`, or Promises. The capture engine reads `window.__timelines` synchronously after page load. Fonts are embedded by the compiler — no need to wait for load.
+6. **Root composition has no `<template>` wrapper.** Only sub-compositions use `<template>`.
+7. **Video is always `muted playsinline`.** Audio is always a separate `<audio>` element — even if it's the same source file.
+8. **Content containers use padding, not absolute positioning.** `.scene-content { width: 100%; height: 100%; padding: Npx; display: flex; flex-direction: column; gap: Npx; box-sizing: border-box }`. Absolute-positioned content containers overflow. Reserve `position: absolute` for decoratives only.
+
+## Scene Transitions
+
+Multi-scene compositions MUST follow all of these:
+
+1. **Always use a transition between scenes.** No jump cuts.
+2. **Always use entrance animations** on every scene element. Every element animates IN via `gsap.from(...)`. No element may appear fully-formed.
+3. **Never use exit animations** (except on the final scene). This means NO `gsap.to()` that animates `opacity` to 0, `y` offscreen, etc. The transition IS the exit. Outgoing scene content must be fully visible at the moment the transition starts.
+4. **Final scene only:** may fade elements out. This is the only scene where `gsap.to(..., { opacity: 0 })` is allowed.
+
+## Typography and Assets
+
+- **Fonts:** write the `font-family` you want in CSS — the compiler embeds supported fonts automatically. Unsupported fonts produce a compiler warning.
+- Add `crossorigin="anonymous"` to external media.
+- For dynamic text sizing, use `window.__hyperframes.fitTextFontSize(text, { maxWidth, fontFamily, fontWeight })`.
+- All project files live at the project root alongside `index.html`. Sub-compositions reference assets with `../`.
+- For rendered video: 60px+ headlines, 20px+ body, 16px+ data labels. `font-variant-numeric: tabular-nums` on number columns. Avoid full-screen linear gradients on dark backgrounds (H.264 banding — use radial or solid + localized glow).
+
+## Animation Guardrails
+
+- Offset the first animation 0.1–0.3s (not `t=0`).
+- Vary eases across entrance tweens — at least 3 different eases per scene.
+- Don't repeat an entrance pattern within a scene.
+
+## Never Do
+
+1. Forget `window.__timelines` registration.
+2. Use video for audio — always muted video + separate `<audio>`.
+3. Nest video inside a timed div — use a non-timed wrapper.
+4. Use `data-layer` (use `data-track-index`) or `data-end` (use `data-duration`).
+5. Animate video element dimensions — animate a wrapper div instead.
+6. Call `play`/`pause`/`seek` on media — framework owns playback.
+7. Create a top-level container without `data-composition-id`.
+8. Use `repeat: -1` on any timeline or tween.
+9. Build timelines asynchronously.
+10. Use `gsap.set()` on elements from later scenes — they don't exist in the DOM at page load. Use `tl.set(selector, vars, timePosition)` inside the timeline at or after the clip's `data-start`.
+11. Use `<br>` in content text — causes unwanted extra breaks when the text wraps naturally. Use `max-width` instead. Exception: short display titles (e.g., "THE\nIMMORTAL\nGAME") where each word is deliberately on its own line.
@@ -0,0 +1,289 @@
+# HyperFrames Feature Reference
+
+Load this file when a composition needs captions, TTS narration, audio-reactive visuals, marker-style text highlighting, or scene transitions. All patterns here are deterministic (no `Math.random()`, no `Date.now()`, no runtime audio analysis) and live on the same GSAP timeline as the rest of the composition.
+
+## Captions
+
+### Language Rule (Non-Negotiable)
+
+**Never use `.en` whisper models unless the audio is confirmed English.** `.en` models TRANSLATE non-English audio into English instead of transcribing it.
+
+- User says the language → `npx hyperframes transcribe audio.mp3 --model small --language <code>` (no `.en`)
+- User confirms English → `--model small.en`
+- Language unknown → `--model small` (auto-detects)
+
+### Style Detection
+
+If the user doesn't specify a caption style, detect it from the transcript tone:
+
+| Tone         | Font mood                | Animation                          | Color                       | Size    |
+| ------------ | ------------------------ | ---------------------------------- | --------------------------- | ------- |
+| Hype / launch   | Heavy condensed, 800-900 | Scale-pop, `back.out(1.7)`, 0.1-0.2s | Bright on dark              | 72-96px |
+| Corporate    | Clean sans, 600-700      | Fade+slide, `power3.out`, 0.3s       | White / neutral + muted accent | 56-72px |
+| Tutorial     | Mono / clean sans, 500-600 | Typewriter or fade, 0.4-0.5s          | High contrast, minimal      | 48-64px |
+| Storytelling | Serif / elegant, 400-500   | Slow fade, `power2.out`, 0.5-0.6s    | Warm muted tones            | 44-56px |
+| Social       | Rounded sans, 700-800    | Bounce, `elastic.out`, word-by-word  | Playful, colored pills      | 56-80px |
+
+### Word Grouping
+
+- High energy: 2-3 words, quick turnover.
+- Conversational: 3-5 words, natural phrases.
+- Measured / calm: 4-6 words.
+
+Break on sentence boundaries, 150ms+ pauses, or a max word count.
+
+### Positioning
+
+- Landscape (1920x1080): bottom 80-120px, centered.
+- Portrait (1080x1920): ~600-700px from bottom, centered.
+- Never cover the subject's face. `position: absolute` (never relative). One caption group visible at a time.
+
+### Text Overflow Prevention
+
+Use the runtime helper so captions never overflow:
+
+```js
+const result = window.__hyperframes.fitTextFontSize(group.text.toUpperCase(), {
+  fontFamily: "Outfit",
+  fontWeight: 900,
+  maxWidth: 1600, // 1600 landscape, 900 portrait
+});
+el.style.fontSize = result.fontSize + "px";
+```
+
+When per-word styling uses `scale > 1.0`, compute `maxWidth = safeWidth / maxScale` to leave headroom. Container needs `overflow: visible` (not `hidden` — hidden clips scaled emphasis words and glow).
+
+### Caption Exit Guarantee
+
+Every group MUST have a hard kill after its exit tween — otherwise groups leak into later ones:
+
+```js
+tl.to(groupEl, { opacity: 0, scale: 0.95, duration: 0.12, ease: "power2.in" }, group.end - 0.12);
+tl.set(groupEl, { opacity: 0, visibility: "hidden" }, group.end); // deterministic kill
+```
+
+### Per-Word Styling
+
+Scan the transcript for words that deserve distinct treatment:
+
+- Brand / product names — larger, unique color.
+- ALL CAPS — scale boost, flash, accent color.
+- Numbers / statistics — bold weight, accent color.
+- Emotional keywords — exaggerated animation (overshoot, bounce).
+- Call-to-action — highlight, underline, color pop.
+
+## TTS (Kokoro-82M)
+
+Local, no API key. Runs on CPU. Model downloads on first use (~311 MB + ~27 MB voices, cached in `~/.cache/hyperframes/tts/`).
+
+### Voice Selection
+
+| Content type  | Voice                   | Why                         |
+| ------------- | ----------------------- | --------------------------- |
+| Product demo  | `af_heart` / `af_nova`  | Warm, professional          |
+| Tutorial      | `am_adam` / `bf_emma`   | Neutral, easy to follow     |
+| Marketing     | `af_sky` / `am_michael` | Energetic or authoritative  |
+| Documentation | `bf_emma` / `bm_george` | Clear British English       |
+| Casual        | `af_heart` / `af_sky`   | Approachable, natural       |
+
+Run `npx hyperframes tts --list` for all 54 voices across 8 languages.
+
+### Multilingual Phonemization
+
+Voice ID first letter encodes language: `a`=American English, `b`=British English, `e`=Spanish, `f`=French, `h`=Hindi, `i`=Italian, `j`=Japanese, `p`=Brazilian Portuguese, `z`=Mandarin. The CLI auto-infers the phonemizer locale from that prefix — you don't need `--lang` when voice and text match.
+
+```bash
+npx hyperframes tts "La reunión empieza a las nueve" --voice ef_dora --output es.wav
+npx hyperframes tts "今日はいい天気ですね"            --voice jf_alpha --output ja.wav
+```
+
+Pass `--lang` only to override auto-detection (e.g. stylized accents):
+
+```bash
+npx hyperframes tts "Hello there" --voice af_heart --lang fr-fr --output accented.wav
+```
+
+Valid `--lang` codes: `en-us`, `en-gb`, `es`, `fr-fr`, `hi`, `it`, `pt-br`, `ja`, `zh`. Non-English phonemization requires `espeak-ng` installed system-wide (`apt-get install espeak-ng` / `brew install espeak-ng`).
+
+### Speed
+
+- `0.7-0.8` — tutorial, complex content
+- `1.0` — natural (default)
+- `1.1-1.2` — intros, upbeat content
+- `1.5+` — rarely appropriate
+
+### TTS + Captions Workflow
+
+```bash
+npx hyperframes tts script.txt --voice af_heart --output narration.wav
+npx hyperframes transcribe narration.wav   # → transcript.json (word-level)
+```
+
+## Audio-Reactive Visuals
+
+Drive visuals from music, voice, or sound. Any GSAP-tweenable property can respond to pre-extracted audio data.
+
+### Data format
+
+```js
+const AUDIO_DATA = {
+  fps: 30,
+  totalFrames: 900,
+  frames: [{ bands: [0.82, 0.45, 0.31, /* ... */] }, /* ... */],
+};
+```
+
+`frames[i].bands[]` are frequency band amplitudes, 0-1. Index 0 = bass, higher indices = treble. Each band is normalized independently across the full track.
+
+### Mapping audio to visuals
+
+| Audio signal           | Visual property                   | Effect                     |
+| ---------------------- | --------------------------------- | -------------------------- |
+| Bass (`bands[0]`)      | `scale`                           | Pulse on beat              |
+| Treble (`bands[12-14]`)| `textShadow`, `boxShadow`         | Glow intensity             |
+| Overall amplitude      | `opacity`, `y`, `backgroundColor` | Breathe, lift, color shift |
+| Mid-range (`bands[4-8]`)| `borderRadius`, `width`          | Shape morphing             |
+
+Any GSAP-tweenable property works — `clipPath`, `filter`, SVG attributes, CSS custom properties. Let content guide the visual and let audio drive its behavior. **Never add** equalizer bars, spectrum analyzers, waveform displays, rainbow cycling, or generic particle systems — they look cheap.
+
+### Sampling pattern (required)
+
+Audio reactivity needs per-frame sampling via a `for` loop of `tl.call()`, NOT a single tween. A single long tween does NOT react to audio:
+
+```js
+for (let f = 0; f < AUDIO_DATA.totalFrames; f++) {
+  tl.call(
+    ((frame) => () => draw(frame))(AUDIO_DATA.frames[f]),
+    [],
+    f / AUDIO_DATA.fps,
+  );
+}
+```
+
+### Gotchas
+
+- **textShadow on a container** with semi-transparent children (e.g. inactive caption words at `rgba(255,255,255,0.3)`) renders a visible glow rectangle behind every child. Apply the glow to active words individually, not to the container.
+- **Subtlety for text** — 3-6% scale variation, soft glow. Heavy pulsing makes text unreadable.
+- **Go bigger on non-text** — backgrounds and shapes can handle 10-30% swings.
+- **Deterministic only** — pre-extracted audio data, no Web Audio API, no runtime analysis.
+
+## Marker-Style Highlighting
+
+Deterministic CSS + GSAP implementations of the classic "highlight / circle / burst / scribble / sketchout" drawing modes for emphasizing text. Fully seekable — no animated SVG filters, no JS timers.
+
+### Highlight (yellow marker sweep)
+
+```html
+<span class="mh-highlight-wrap">
+  <span class="mh-highlight-bar" id="hl-1"></span>
+  <span class="mh-highlight-text">highlighted text</span>
+</span>
+```
+
+```css
+.mh-highlight-wrap { position: relative; display: inline; }
+.mh-highlight-bar {
+  position: absolute; inset: 0 -6px;
+  background: #fdd835; opacity: 0.35;
+  transform: scaleX(0); transform-origin: left center;
+  border-radius: 3px; z-index: 0;
+}
+.mh-highlight-text { position: relative; z-index: 1; }
+```
+
+```js
+tl.to("#hl-1", { scaleX: 1, duration: 0.5, ease: "power2.out" }, 0.6);
+```
+
+Multi-line: apply to `.mh-highlight-bar` with `stagger: 0.3`.
+
+### Circle
+
+Hand-drawn ellipse around a word. Use a positioned `::before` with `border-radius: 50%`, slight rotation, and `clip-path` to avoid covering the letters. Animate `clip-path` or `stroke-dashoffset` on an inline SVG circle.
+
+### Burst
+
+Short radiating lines around a word. Render 6-12 small `<span>` elements positioned in a radial pattern; animate `scaleY` from 0.
+
+### Scribble
+
+A chaotic overlay created by animating `stroke-dashoffset` on an inline SVG `<path>` with a `d` attribute describing a zig-zag. Seed values, never `Math.random()`.
+
+### Sketchout
+
+A rough rectangle outline. Two `<rect>`s with slight `transform` offsets, animated via `stroke-dashoffset`.
+
+All five modes tween CSS transforms or `stroke-dashoffset` only — both tween cleanly, are deterministic, and seek correctly.
+
+## Scene Transitions
+
+Every multi-scene composition MUST use transitions. No jump cuts.
+
+### Energy → primary transition
+
+| Energy                               | CSS primary                  | Shader primary                       | Accent                         | Duration  | Easing                   |
+| ------------------------------------ | ---------------------------- | ------------------------------------ | ------------------------------ | --------- | ------------------------ |
+| **Calm** (wellness, brand, luxury)   | Blur crossfade, focus pull   | Cross-warp morph, thermal distortion | Light leak, circle iris        | 0.5-0.8s  | `sine.inOut`, `power1`   |
+| **Medium** (corporate, SaaS)         | Push slide, staggered blocks | Whip pan, cinematic zoom             | Squeeze, vertical push         | 0.3-0.5s  | `power2`, `power3`       |
+| **High** (promos, sports, launch)    | Zoom through, overexposure   | Ridged burn, glitch, chromatic split | Staggered blocks, gravity drop | 0.15-0.3s | `power4`, `expo`         |
+
+Pick ONE primary (60-70% of scene changes) plus 1-2 accents. Never use a different transition for every scene.
+
+### Mood → transition type
+
+| Mood                     | Transitions                                                                 |
+| ------------------------ | --------------------------------------------------------------------------- |
+| Warm / inviting          | Light leak, blur crossfade, focus pull, film burn · _Shader:_ thermal distortion, cross-warp morph |
+| Cold / clinical          | Squeeze, zoom out, blinds, shutter, grid dissolve · _Shader:_ gravitational lens |
+| Editorial / magazine     | Push slide, vertical push, diagonal split, shutter · _Shader:_ whip pan     |
+| Tech / futuristic        | Grid dissolve, staggered blocks, blinds · _Shader:_ glitch, chromatic split |
+| Tense / edgy             | Glitch, VHS, chromatic aberration, ripple · _Shader:_ ridged burn, domain warp |
+| Playful / fun            | Elastic push, 3D flip, circle iris, morph circle · _Shader:_ swirl vortex, ripple waves |
+| Dramatic / cinematic     | Zoom through, gravity drop, overexposure · _Shader:_ cinematic zoom, gravitational lens |
+| Premium / luxury         | Focus pull, blur crossfade, color dip to black · _Shader:_ cross-warp morph |
+| Retro / analog           | Film burn, light leak, VHS, clock wipe · _Shader:_ light leak               |
+
+### Presets
+
+| Preset     | Duration | Easing            |
+| ---------- | -------- | ----------------- |
+| `snappy`   | 0.2s     | `power4.inOut`    |
+| `smooth`   | 0.4s     | `power2.inOut`    |
+| `gentle`   | 0.6s     | `sine.inOut`      |
+| `dramatic` | 0.5s     | `power3.in` → out |
+| `instant`  | 0.15s    | `expo.inOut`      |
+| `luxe`     | 0.7s     | `power1.inOut`    |
+
+### Install a shader transition
+
+```bash
+npx hyperframes add flash-through-white
+npx hyperframes add --list
+```
+
+### CSS vs shader
+
+- **CSS transitions** animate scene containers with opacity, transforms, `clip-path`, and filters. Simpler to set up.
+- **Shader transitions** composite both scene textures per-pixel on a WebGL canvas — can warp, dissolve, and morph in ways CSS cannot. Import from `@hyperframes/shader-transitions` instead of writing raw GLSL.
+
+Don't mix CSS and shader transitions in the same composition — once a composition uses shader transitions, the WebGL canvas replaces DOM-based scene switching for every transition.
+
+### Shader-compatible CSS rules
+
+Shader transitions capture DOM scenes to WebGL textures via html2canvas. The canvas 2D pipeline doesn't match CSS exactly:
+
+1. No `transparent` keyword in gradients — use the target color at zero alpha: `rgba(200,117,51,0)` not `transparent`. (Canvas interpolates `transparent` as `rgba(0,0,0,0)` creating dark fringes.)
+2. No gradient backgrounds on elements thinner than 4px. Use solid `background-color` on thin accent lines.
+3. No CSS variables (`var()`) on elements visible during capture — html2canvas doesn't reliably resolve custom properties. Use literal color values.
+4. Mark uncapturable decoratives with `data-no-capture` — they stay on the live DOM but are absent from the shader texture.
+5. No gradient opacity below 0.15 — renders differently in canvas vs CSS.
+6. Every `.scene` div must have explicit `background-color`, AND pass the same color as `bgColor` in the `init()` config. Without either, the texture renders as black.
+
+These rules only apply to shader transition compositions. CSS-only compositions have no restrictions.
+
+### Don't
+
+- Mix CSS and shader transitions in one composition.
+- Use exit animations on any scene except the final scene — the transition IS the exit.
+- Introduce a new transition type every scene — pick one primary + 1-2 accents.
+- Use transitions that create visible geometric repetition (grids, hex cells, uniform dots) — they look artificial regardless of the math behind them. Prefer organic noise (FBM, domain warping).
@@ -0,0 +1,136 @@
+# GSAP for HyperFrames
+
+GSAP is the animation engine for all HyperFrames compositions. Load from CDN inside the composition:
+
+```html
+<script src="https://cdn.jsdelivr.net/npm/gsap@3.14.2/dist/gsap.min.js"></script>
+```
+
+## Core Tween Methods
+
+- **`gsap.to(targets, vars)`** — animate from current state to `vars`. Most common.
+- **`gsap.from(targets, vars)`** — animate from `vars` to current state (entrances).
+- **`gsap.fromTo(targets, fromVars, toVars)`** — explicit start and end.
+- **`gsap.set(targets, vars)`** — apply immediately (duration 0). Don't use on clip elements that enter later — use `tl.set(selector, vars, time)` inside the timeline instead.
+
+Always use **camelCase** property names (`backgroundColor`, `rotationX`, not `background-color`).
+
+## Common vars
+
+- **`duration`** — seconds (default 0.5).
+- **`delay`** — seconds before start.
+- **`ease`** — `"power1.out"` (default), `"power3.inOut"`, `"back.out(1.7)"`, `"elastic.out(1, 0.3)"`, `"none"`, `"expo.out"`, `"circ.inOut"`.
+- **`stagger`** — number `0.1` or object: `{ amount: 0.3, from: "center" }`, `{ each: 0.1, from: "random" }`.
+- **`overwrite`** — `false` (default), `true`, or `"auto"`.
+- **`repeat`** — number (never `-1` in HyperFrames). **`yoyo`** — alternates direction with repeat.
+- **`onComplete`**, **`onStart`**, **`onUpdate`** — callbacks.
+- **`immediateRender`** — default `true` for `from()`/`fromTo()`. Set `false` on later tweens targeting the same property+element to avoid overwrite surprises.
+
+## Transforms
+
+Prefer GSAP's transform aliases over raw CSS `transform`:
+
+| GSAP property               | Equivalent                 |
+| --------------------------- | -------------------------- |
+| `x`, `y`, `z`               | translateX/Y/Z (px)        |
+| `xPercent`, `yPercent`      | translateX/Y (%)           |
+| `scale`, `scaleX`, `scaleY` | scale                      |
+| `rotation`                  | rotate (deg)               |
+| `rotationX`, `rotationY`    | 3D rotate                  |
+| `skewX`, `skewY`            | skew                       |
+| `transformOrigin`           | transform-origin           |
+
+- **`autoAlpha`** — prefer over `opacity`. At 0, also sets `visibility: hidden`.
+- **CSS variables** — `"--hue": 180`.
+- **Directional rotation** — `"360_cw"`, `"-170_short"`, `"90_ccw"`.
+- **`clearProps`** — `"all"` or comma-separated; removes inline styles on complete.
+- **Relative values** — `"+=20"`, `"-=10"`, `"*=2"`.
+
+## Function-based Values
+
+```js
+gsap.to(".item", {
+  x: (i, target, targets) => i * 50,
+  stagger: 0.1,
+});
+```
+
+## Easing
+
+Built-in eases: `power1` through `power4`, `back`, `bounce`, `circ`, `elastic`, `expo`, `sine`. Each has `.in`, `.out`, `.inOut`.
+
+Rule of thumb:
+- Entrances: `power3.out`, `expo.out`, `back.out(1.4)`
+- Exits: `power2.in`, `expo.in`
+- Scrubbed sections: `none` (linear)
+- Vary eases across entrance tweens within a scene — at least 3 different eases.
+
+## Defaults
+
+```js
+gsap.defaults({ duration: 0.6, ease: "power2.out" });
+```
+
+## Timelines (HyperFrames primary pattern)
+
+```js
+window.__timelines = window.__timelines || {};
+
+const tl = gsap.timeline({ paused: true, defaults: { duration: 0.6, ease: "power2.out" } });
+
+tl.from(".title",    { y: 50, opacity: 0 }, 0.3);
+tl.from(".subtitle", { y: 30, opacity: 0 }, 0.5);
+tl.from(".cta",      { scale: 0.8, opacity: 0, ease: "back.out(1.7)" }, 0.8);
+
+window.__timelines["root"] = tl;
+```
+
+### Position parameter
+
+Third argument to `.from()` / `.to()` / `.add()`:
+
+- Absolute seconds: `0.5`, `2.1`.
+- Relative to end: `">+0.2"` (0.2s after previous), `"<"` (same time as previous), `"<+0.3"` (0.3s after previous's start).
+- Named labels: `tl.addLabel("act2", 5); tl.from(".x", { y: 30 }, "act2");`
+
+### Nesting
+
+HyperFrames auto-nests sub-composition timelines. **Do not** manually `tl.add(subTl)` — the framework wires sub-timelines into the parent at the sub-composition's `data-start`.
+
+### Playback
+
+The player controls playback. Don't call `tl.play()`, `tl.pause()`, or `tl.reverse()` at construction time. `{ paused: true }` is required.
+
+## Stagger
+
+```js
+// even distribution
+tl.from(".card", { opacity: 0, y: 40, stagger: 0.1 });
+
+// control total amount
+tl.from(".card", { opacity: 0, stagger: { amount: 0.6, from: "center" } });
+
+// deterministic "random" stagger (HyperFrames compositions must be deterministic)
+tl.from(".dot", { opacity: 0, stagger: { each: 0.05, from: "random" } });
+```
+
+`stagger.from`: `"start"` | `"end"` | `"center"` | `"edges"` | `"random"` | index | `[x, y]` for grid.
+
+## Performance
+
+- Animate transforms (`x`, `y`, `scale`, `rotation`, `opacity`) — cheap, GPU-accelerated.
+- Avoid animating `width`, `height`, `top`, `left`, `margin` — causes layout thrash.
+- Avoid box-shadow or filter animations on large elements — expensive.
+- `will-change` is rarely needed; GSAP handles promotion.
+
+## gsap.matchMedia (rarely needed in HyperFrames)
+
+Compositions have fixed dimensions (`data-width`/`data-height`), so responsive breakpoints don't apply. You may still use `matchMedia` for `prefers-reduced-motion` when authoring UI previews, but it's not used in rendered video output.
+
+## Don't Do
+
+- `repeat: -1` anywhere — breaks the capture engine.
+- `Math.random()`, `Date.now()`, performance.now()` inside tween values — non-deterministic.
+- `async` / `setTimeout` / `Promise` around timeline construction — the capture engine reads `window.__timelines` synchronously.
+- Animate `visibility` or `display` directly — use `autoAlpha`.
+- `gsap.set()` on clip elements that enter later in the timeline — they don't exist in the DOM at page-load. Use `tl.set(sel, vars, time)` inside the timeline.
@@ -0,0 +1,137 @@
+# Troubleshooting
+
+## `HeadlessExperimental.beginFrame' wasn't found` (first thing to check)
+
+**Symptom:** `npx hyperframes render` fails with:
+
+```
+✗ Render failed
+Protocol error (HeadlessExperimental.beginFrame):
+'HeadlessExperimental.beginFrame' wasn't found
+```
+
+**Cause:** Chromium 147+ removed the `HeadlessExperimental.beginFrame` CDP command. This affected sandbox environments (e.g., OpenClaw, some containerized agent hosts) that ship modern Chromium as the system browser. See [hyperframes#294](https://github.com/heygen-com/hyperframes/issues/294).
+
+**Fix (permanent — preferred):** upgrade.
+
+```bash
+npx hyperframes upgrade -y
+# or
+npm install -g hyperframes@latest
+```
+
+`hyperframes >= 0.4.2` auto-detects whether the resolved browser supports `beginFrame` (checks for `chrome-headless-shell` in the binary path) and falls back to screenshot capture mode when it doesn't. Commit [`4c72ba4`](https://github.com/heygen-com/hyperframes/commit/4c72ba4a36ec2bd6733f7b9cb2a9e63f9fb234b9) (March 2026) shipped this auto-detect.
+
+**Fix (escape hatch — if you can't upgrade):**
+
+```bash
+export PRODUCER_FORCE_SCREENSHOT=true
+npx hyperframes render
+```
+
+This forces screenshot mode regardless of the binary. Screenshot mode is slightly slower but visually identical.
+
+**Fix (prevent — recommended):** install `chrome-headless-shell` so the engine can use the fast BeginFrame path:
+
+```bash
+npx puppeteer browsers install chrome-headless-shell
+# or let the CLI do it
+npx hyperframes browser --install
+```
+
+`scripts/setup.sh` runs this automatically.
+
+## `npx hyperframes render` hangs for 120s then times out
+
+**Cause:** the resolved browser is system Chrome (e.g., `/usr/bin/google-chrome`) and doesn't support the BeginFrame path, but auto-detect also missed it (older `hyperframes` version).
+
+**Fix:**
+1. Check which binary is being used: `npx hyperframes browser --path`
+2. If it's system Chrome, either:
+   - Install `chrome-headless-shell`: `npx hyperframes browser --install`, OR
+   - Set the escape hatch: `export PRODUCER_FORCE_SCREENSHOT=true`, OR
+   - Upgrade: `npx hyperframes upgrade -y`
+
+## `ffmpeg: command not found`
+
+Install FFmpeg via your system package manager:
+
+| OS / distro     | Command                             |
+| --------------- | ----------------------------------- |
+| Ubuntu / Debian | `sudo apt-get install -y ffmpeg`    |
+| Fedora / RHEL   | `sudo dnf install -y ffmpeg`        |
+| Arch            | `sudo pacman -S ffmpeg`             |
+| macOS           | `brew install ffmpeg`               |
+| Windows         | `winget install Gyan.FFmpeg`        |
+
+Verify: `ffmpeg -version`.
+
+## `Node version X is not supported`
+
+HyperFrames requires Node.js >= 22. Check with `node --version`.
+
+- **nvm:** `nvm install 22 && nvm use 22`
+- **Homebrew (macOS):** `brew install node@22 && brew link --overwrite node@22`
+- **apt:** follow [nodesource](https://github.com/nodesource/distributions) for Node 22 LTS.
+
+## `ENOSPC: no space left on device` or OOM kills during render
+
+Renders are memory- and disk-hungry. Minimums:
+
+- **RAM:** 4 GB free (8 GB recommended for 60fps / `--quality high`)
+- **Disk:** 2 GB free scratch space — frames are written to `/tmp` during capture
+
+Mitigations:
+- Lower quality: `--quality draft`.
+- Lower fps: `--fps 24`.
+- Lower worker count: `--workers 1`.
+- Set `TMPDIR` to a volume with more space: `export TMPDIR=/mnt/scratch`.
+
+## Lint passes but the render is blank / black frames
+
+Check the browser console in `preview` — usually:
+- A timeline was registered with the wrong key (`__timelines["typo"]` instead of `__timelines["root"]`).
+- The root composition was wrapped in `<template>` (only sub-compositions use `<template>`).
+- A script tag failed to load — check Network tab in preview.
+
+Run `npx hyperframes lint --verbose` to see info-level findings.
+
+## Contrast warnings from `hyperframes validate`
+
+```
+⚠ WCAG AA contrast warnings (3):
+  · .subtitle "secondary text" — 2.67:1 (need 4.5:1, t=5.3s)
+```
+
+- **Dark backgrounds:** brighten the failing color until it clears 4.5:1 (normal text) or 3:1 (large text — 24px+ or 19px+ bold).
+- **Light backgrounds:** darken it.
+- Stay within the palette family — don't invent a new color, adjust the existing one.
+- Skip the check temporarily with `--no-contrast` if iterating rapidly, but clear it before delivery.
+
+## `Font family 'X' not supported by compiler`
+
+The compiler embeds a curated set of web-safe + open-source fonts. If a font isn't supported, either:
+- Swap to a supported alternative from the warning.
+- Register a custom font via `@font-face` pointing to a `.woff2` in the project directory (the compiler embeds referenced `@font-face` files).
+
+## Video plays back muted or with no audio
+
+Check:
+- The `<video>` element has `muted playsinline` (required — browser autoplay policy).
+- Audio is a **separate** `<audio>` element, not the video element.
+- Audio `data-volume` is set (defaults to 1).
+- The audio file is at the expected path — compositions load relative to their own directory.
+
+## Docker render fails on Linux with rootless Docker
+
+Add `--privileged` or pass `--cap-add=SYS_ADMIN`:
+
+```bash
+npx hyperframes render --docker --docker-args "--cap-add=SYS_ADMIN"
+```
+
+The headless browser needs namespace permissions for sandboxing.
+
+## Bug reports
+
+Include `npx hyperframes info` output + the full error log. File at [github.com/heygen-com/hyperframes](https://github.com/heygen-com/hyperframes/issues).
@@ -0,0 +1,145 @@
+# Website to Video
+
+Capture a website, produce a professional video from it. Use when the user provides a URL and wants a video — social ad, product tour, 30-second promo, etc.
+
+The workflow has 7 steps. Each produces an artifact that gates the next. **Do not skip steps** — each artifact prevents a downstream failure mode.
+
+## Step 1: Capture & Understand
+
+```bash
+npx hyperframes capture https://example.com -o example-video
+```
+
+Produces `example-video/capture/` with:
+- `capture/screenshots/` — above-the-fold + section screenshots (up to `--max-screenshots`)
+- `capture/assets/` — logos, hero images, background video (if any)
+- `capture/extracted/tokens.json` — colors, fonts, and spacing tokens
+- `capture/extracted/visible-text.txt` — extracted headings, paragraphs, CTAs
+- `capture/extracted/fonts.json` — font families and stacks detected in computed styles
+- `capture/asset-descriptions.md` — auto-generated asset catalog
+
+All subsequent steps read from the `capture/` subfolder — `capture/extracted/tokens.json`, `capture/assets/hero.png`, etc. Never strip the `capture/` prefix when referencing these files.
+
+**Gate:** Print a site summary — name, top 3 colors, primary + display fonts, hero asset path, one-sentence vibe. Keep it in your context — don't re-capture.
+
+## Step 2: Write DESIGN.md
+
+Small brand reference at the project root. 6 sections, ~90 lines. This is the cheat sheet — not the creative plan.
+
+```markdown
+# DESIGN
+
+## Brand
+- Name: Example Co.
+- One-line mission: "…"
+
+## Colors
+- Background: #0B0F14
+- Primary: #00E0A4 (accent, CTA)
+- Secondary: #7A8B9B (body text)
+- Text: #FFFFFF
+
+## Typography
+- Display: "Inter Tight", 700, tight letter-spacing
+- Body: "Inter", 400
+
+## Motion
+- Mood: precise, technical, confident
+- Eases: `power3.out` for entrances, `expo.in` for exits
+
+## Assets
+- Logo: `capture/assets/logo.svg`
+- Hero image: `capture/assets/hero.png`
+
+## What NOT to Do
+- No purple, no pastels, no serif body
+- No playful/bubbly eases (`elastic`, `bounce`)
+- No drop shadows on text
+```
+
+**Gate:** `DESIGN.md` exists in the project directory.
+
+## Step 3: Write SCRIPT.md
+
+Narration script. Story backbone. **Scene durations come from the narration, not from guessing.**
+
+```markdown
+# SCRIPT
+
+## Scene 1 — Hook (0:00–0:04)
+"What if your dashboards wrote themselves?"
+
+## Scene 2 — Problem (0:04–0:11)
+"Teams spend hours stitching together queries, charts, and callouts — every Monday."
+
+## Scene 3 — Solution (0:11–0:22)
+"Example Co. watches your data streams and proposes the dashboard you'd have built — in seconds."
+
+## Scene 4 — CTA (0:22–0:28)
+"Try it free at example.com."
+```
+
+Run `npx hyperframes tts SCRIPT.md --voice af_nova --output narration.wav` to generate TTS audio. Note the exact duration — that's the video's duration.
+
+**Gate:** `SCRIPT.md` + `narration.wav` exist and durations match the plan (±0.3s).
+
+## Step 4: Storyboard
+
+Text-only scene plan: for each scene, describe the hero frame — what's on screen at the scene's most-visible moment.
+
+```markdown
+# STORYBOARD
+
+## Scene 1 (0:00–0:04) — Hook
+Hero frame: giant "WHAT IF YOUR DASHBOARDS WROTE THEMSELVES?" in display font, centered, on near-black. Logo top-left at 40% opacity.
+Entrance: each word staggers in, 0.08s apart.
+Transition out: flash-through-white into Scene 2.
+```
+
+One paragraph per scene. Do NOT skip this step — it's where you catch narrative gaps before writing HTML.
+
+**Gate:** `STORYBOARD.md` exists. Each scene has: hero frame, entrance, transition.
+
+## Step 5: Composition
+
+Write `index.html` scene-by-scene:
+- Each scene is a `<div class="scene scene-N">` positioned absolutely, full-bleed.
+- Static HTML+CSS for the hero frame first (no GSAP).
+- Layer the narration `<audio>` at `data-start="0"` on a high track index.
+- Add a transitions component (`flash-through-white`, `liquid-wipe`, etc.) between each scene.
+- THEN add GSAP entrances (`gsap.from()`), no exits — transitions own the exit.
+- Register `window.__timelines["root"] = tl`.
+
+Install transitions as needed:
+
+```bash
+npx hyperframes add flash-through-white
+```
+
+## Step 6: Render
+
+```bash
+npx hyperframes lint --strict          # must pass
+npx hyperframes validate               # WCAG contrast audit
+npx hyperframes render --quality draft --output draft.mp4
+```
+
+Watch the draft. Note issues in a `REVIEW.md` bullet list (scene, timestamp, issue). Fix, re-render.
+
+When happy:
+
+```bash
+npx hyperframes render --quality high --output final.mp4
+```
+
+## Step 7: Deliver
+
+- Report file path + duration + file size to the user.
+- If the user wants a vertical cut, re-render with a 9:16 composition (`data-width="1080" data-height="1920"`) — typically requires a separate `index-vertical.html` with tighter typography and re-stacked scene layout.
+
+## Common Failure Modes
+
+- **Skipped DESIGN.md** → colors drift scene-to-scene; output feels like "AI slides."
+- **Skipped STORYBOARD.md** → scenes overlap or hero frames collide with transitions.
+- **Exit animations** before transitions → empty frames when the transition fires.
+- **Narration longer than `data-duration`** → audio clips mid-sentence. Update the composition's `data-duration` to match the TTS output length + 0.5s buffer.
@@ -0,0 +1,135 @@
+#!/usr/bin/env bash
+# HyperFrames setup for Hermes.
+#
+# Verifies Node >= 22 and FFmpeg, installs the `hyperframes` CLI globally,
+# pre-caches `chrome-headless-shell`, and runs `hyperframes doctor`.
+#
+# Pins `hyperframes@>=0.4.2` so the OpenClaw/Chromium-147 fix from
+# https://github.com/heygen-com/hyperframes/issues/294 (commit 4c72ba4)
+# is always present — the engine auto-detects `HeadlessExperimental.beginFrame`
+# support and falls back to screenshot capture otherwise.
+#
+# Idempotent: safe to re-run.
+
+set -euo pipefail
+
+MIN_NODE_MAJOR=22
+MIN_HYPERFRAMES_VERSION="0.4.2"
+
+red()    { printf '\033[31m%s\033[0m\n' "$*"; }
+green()  { printf '\033[32m%s\033[0m\n' "$*"; }
+yellow() { printf '\033[33m%s\033[0m\n' "$*"; }
+bold()   { printf '\033[1m%s\033[0m\n' "$*"; }
+
+bold "==> HyperFrames setup"
+
+# --- 1. Node.js --------------------------------------------------------------
+
+if ! command -v node >/dev/null 2>&1; then
+  red "✗ Node.js is not installed."
+  echo "   Install Node.js >= ${MIN_NODE_MAJOR} (nvm, Homebrew, or your package manager) and re-run."
+  exit 1
+fi
+
+node_version="$(node --version | sed 's/^v//')"
+node_major="$(echo "$node_version" | cut -d. -f1)"
+if [ "$node_major" -lt "$MIN_NODE_MAJOR" ]; then
+  red "✗ Node.js ${node_version} is too old. HyperFrames requires Node.js >= ${MIN_NODE_MAJOR}."
+  echo "   Upgrade with 'nvm install ${MIN_NODE_MAJOR} && nvm use ${MIN_NODE_MAJOR}' or your package manager."
+  exit 1
+fi
+green "✓ Node.js ${node_version}"
+
+# --- 2. FFmpeg ---------------------------------------------------------------
+
+if ! command -v ffmpeg >/dev/null 2>&1; then
+  red "✗ FFmpeg is not installed."
+  case "$(uname -s)" in
+    Linux*)   echo "   sudo apt-get install -y ffmpeg   # Debian/Ubuntu"
+              echo "   sudo dnf install -y ffmpeg       # Fedora/RHEL";;
+    Darwin*)  echo "   brew install ffmpeg";;
+    MINGW*|MSYS*|CYGWIN*) echo "   winget install Gyan.FFmpeg";;
+    *)        echo "   See https://ffmpeg.org/download.html";;
+  esac
+  exit 1
+fi
+green "✓ FFmpeg $(ffmpeg -version 2>&1 | head -1 | awk '{print $3}')"
+
+# --- 3. npm ------------------------------------------------------------------
+
+if ! command -v npm >/dev/null 2>&1; then
+  red "✗ npm is not installed (should ship with Node.js)."
+  exit 1
+fi
+
+# --- 4. Install / upgrade hyperframes CLI -----------------------------------
+
+bold "==> Installing hyperframes CLI (>= ${MIN_HYPERFRAMES_VERSION})"
+
+current_hyperframes=""
+if command -v hyperframes >/dev/null 2>&1; then
+  current_hyperframes="$(hyperframes --version 2>/dev/null | tail -1 | sed 's/^v//')"
+fi
+
+if [ -n "$current_hyperframes" ]; then
+  yellow "   Found hyperframes ${current_hyperframes}"
+fi
+
+# Always install/upgrade to >= MIN version.
+# Using 'latest' so we pick up any newer auto-detect/capture fixes.
+if ! npm install -g "hyperframes@latest" >/dev/null 2>&1; then
+  red "✗ npm install -g hyperframes@latest failed."
+  echo "   Try: sudo npm install -g hyperframes@latest"
+  echo "   Or use a user-scoped npm prefix: npm config set prefix ~/.npm-global && export PATH=\"\$HOME/.npm-global/bin:\$PATH\""
+  exit 1
+fi
+
+installed_version="$(hyperframes --version 2>/dev/null | tail -1 | sed 's/^v//')"
+green "✓ hyperframes ${installed_version} installed globally"
+
+# Sanity-check minimum version.
+version_ge() {
+  # version_ge A B  →  true if A >= B
+  [ "$(printf '%s\n%s\n' "$1" "$2" | sort -V | head -1)" = "$2" ]
+}
+if ! version_ge "$installed_version" "$MIN_HYPERFRAMES_VERSION"; then
+  red "✗ hyperframes ${installed_version} is below required minimum ${MIN_HYPERFRAMES_VERSION}."
+  echo "   Try 'npm install -g hyperframes@latest' or 'sudo npm install -g hyperframes@latest'."
+  exit 1
+fi
+
+# --- 5. Pre-cache chrome-headless-shell --------------------------------------
+#
+# Chromium 147+ removed HeadlessExperimental.beginFrame. System Chrome (e.g.
+# /usr/bin/google-chrome) can't render with the fast path, so the engine
+# auto-detects and falls back to screenshot mode — but BeginFrame mode is
+# faster and produces higher-quality output. Install chrome-headless-shell
+# up front so the engine picks it over system Chrome.
+
+bold "==> Pre-caching chrome-headless-shell (for best render quality)"
+
+if ! npx --yes puppeteer browsers install chrome-headless-shell >/dev/null 2>&1; then
+  yellow "⚠ Could not pre-install chrome-headless-shell."
+  yellow "  Rendering will still work via screenshot-mode fallback (slower)."
+  yellow "  If you hit HeadlessExperimental.beginFrame errors:"
+  yellow "     export PRODUCER_FORCE_SCREENSHOT=true"
+  yellow "  See references/troubleshooting.md."
+else
+  green "✓ chrome-headless-shell installed"
+fi
+
+# --- 6. Doctor ---------------------------------------------------------------
+
+bold "==> Running hyperframes doctor"
+
+if hyperframes doctor; then
+  green "✓ HyperFrames is ready"
+  echo
+  echo "   Scaffold a project:   npx hyperframes init my-video"
+  echo "   Preview:              npx hyperframes preview"
+  echo "   Render:               npx hyperframes render"
+else
+  yellow "⚠ hyperframes doctor reported issues."
+  yellow "  See references/troubleshooting.md or re-run 'hyperframes doctor'."
+  exit 1
+fi
@@ -345,10 +345,6 @@ Flash Attention uses float16/bfloat16 for speed. Float32 not supported.

 **Performance benchmarks**: See [references/benchmarks.md](references/benchmarks.md) for detailed speed and memory comparisons across GPUs and sequence lengths.

-**Algorithm details**: See [references/algorithm.md](references/algorithm.md) for tiling strategy, recomputation, and IO complexity analysis.
-
-**Advanced features**: See [references/advanced-features.md](references/advanced-features.md) for rotary embeddings, ALiBi, paged KV cache, and custom attention masks.
-
 ## Hardware requirements

 - **GPU**: NVIDIA Ampere+ (A100, A10, A30) or AMD MI200+
@@ -6,7 +6,6 @@ This directory contains comprehensive reference materials for SAELens.

 - [api.md](api.md) - Complete API reference for SAE, TrainingSAE, and configuration classes
 - [tutorials.md](tutorials.md) - Step-by-step tutorials for training and analyzing SAEs
- [papers.md](papers.md) - Key research papers on sparse autoencoders

 ## Quick Links

--- a/Show More
+++ b/Show More