fix: head+tail truncation for execute_code stdout (inspired by openclaw context-pruning)

Previously, _drain() only captured the first MAX_STDOUT_BYTES (50KB) of stdout, silently dropping all tail output. Scripts that print() their final results at the end would have those results lost. Now uses a two-buffer approach: 40% head + 60% tail (rolling window). This matches the pattern already used in terminal_tool.py (line 1042-1051) but gives the tail more space since execute_code scripts typically print() their final results at the end. Inspired by openclaw's softTrim context-pruning (headChars/tailChars).
Merge PR #754 : fix: stabilize system prompt across gateway turns for cache hits
2026-03-09 02:15:48 -07:00 · 2026-03-09 02:00:14 -07:00 · 2026-03-09 01:50:58 -07:00 · 2026-03-09 01:28:27 -07:00 · 2026-03-09 01:12:49 -07:00 · 2026-03-09 01:04:33 -07:00
127 changed files with 18577 additions and 2477 deletions
--- a/.env.example
+++ b/.env.example
@@ -24,10 +24,14 @@ GLM_API_KEY=
 # =============================================================================
 # LLM PROVIDER (Kimi / Moonshot)
 # =============================================================================
-# Kimi/Moonshot provides access to Moonshot AI coding models
-# Get your key at: https://platform.moonshot.ai
+# Kimi Code provides access to Moonshot AI coding models (kimi-k2.5, etc.)
+# Get your key at: https://platform.kimi.ai (Kimi Code console)
+# Keys prefixed sk-kimi- use the Kimi Code API (api.kimi.com) by default.
+# Legacy keys from platform.moonshot.ai need KIMI_BASE_URL override below.
 KIMI_API_KEY=
-# KIMI_BASE_URL=https://api.moonshot.ai/v1  # Override default base URL
+# KIMI_BASE_URL=https://api.kimi.com/coding/v1  # Default for sk-kimi- keys
+# KIMI_BASE_URL=https://api.moonshot.ai/v1      # For legacy Moonshot keys
+# KIMI_BASE_URL=https://api.moonshot.cn/v1       # For Moonshot China keys

 # =============================================================================
 # LLM PROVIDER (MiniMax)
@@ -49,10 +53,6 @@ MINIMAX_CN_API_KEY=
 # Get at: https://firecrawl.dev/
 FIRECRAWL_API_KEY=

-# Nous Research API Key - Vision analysis and multi-model reasoning
-# Get at: https://inference-api.nousresearch.com/
-NOUS_API_KEY=
-
 # FAL.ai API Key - Image generation
 # Get at: https://fal.ai/
 FAL_KEY=
--- a/.gitignore
+++ b/.gitignore
@@ -47,4 +47,5 @@ cli-config.yaml

 # Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case)
 skills/.hub/
-ignored/
+ignored/
+.worktrees/
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,78 +1,60 @@
 # Hermes Agent - Development Guide

-Instructions for AI coding assistants (GitHub Copilot, Cursor, etc.) and human developers.
-
-Hermes Agent is an AI agent harness with tool-calling capabilities, interactive CLI, messaging integrations, and scheduled tasks.
+Instructions for AI coding assistants and developers working on the hermes-agent codebase.

 ## Development Environment

-**IMPORTANT**: Always use the virtual environment if it exists:
 ```bash
-source venv/bin/activate  # Before running any Python commands
+source .venv/bin/activate  # ALWAYS activate before running Python
 ```

 ## Project Structure

 ```
 hermes-agent/
-├── agent/                # Agent internals (extracted from run_agent.py)
-│   ├── model_metadata.py     # Model context lengths, token estimation
+├── run_agent.py          # AIAgent class — core conversation loop
+├── model_tools.py        # Tool orchestration, _discover_tools(), handle_function_call()
+├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
+├── cli.py                # HermesCLI class — interactive CLI orchestrator
+├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
+├── agent/                # Agent internals
+│   ├── prompt_builder.py     # System prompt assembly
 │   ├── context_compressor.py # Auto context compression
 │   ├── prompt_caching.py     # Anthropic prompt caching
-│   ├── prompt_builder.py     # System prompt assembly (identity, skills index, context files)
+│   ├── auxiliary_client.py   # Auxiliary LLM client (vision, summarization)
+│   ├── model_metadata.py     # Model context lengths, token estimation
 │   ├── display.py            # KawaiiSpinner, tool preview formatting
+│   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
 │   └── trajectory.py         # Trajectory saving helpers
-├── hermes_cli/           # CLI implementation
-│   ├── main.py           # Entry point, command dispatcher
-│   ├── banner.py         # Welcome banner, ASCII art, skills summary
-│   ├── commands.py       # Slash command definitions + autocomplete
-│   ├── callbacks.py      # Interactive prompt callbacks (clarify, sudo, approval)
-│   ├── setup.py          # Interactive setup wizard
-│   ├── config.py         # Config management & migration
-│   ├── status.py         # Status display
-│   ├── doctor.py         # Diagnostics
-│   ├── gateway.py        # Gateway management
-│   ├── uninstall.py      # Uninstaller
-│   ├── cron.py           # Cron job management
-│   └── skills_hub.py     # Skills Hub CLI + /skills slash command
-├── tools/                # Tool implementations
-│   ├── registry.py            # Central tool registry (schemas, handlers, dispatch)
-│   ├── approval.py            # Dangerous command detection + per-session approval
-│   ├── environments/          # Terminal execution backends
-│   │   ├── base.py            # BaseEnvironment ABC
-│   │   ├── local.py           # Local execution with interrupt support
-│   │   ├── docker.py          # Docker container execution
-│   │   ├── ssh.py             # SSH remote execution
-│   │   ├── singularity.py     # Singularity/Apptainer + SIF management
-│   │   ├── modal.py           # Modal cloud execution
-│   │   └── daytona.py         # Daytona cloud sandboxes
-│   ├── terminal_tool.py       # Terminal orchestration (sudo, lifecycle, factory)
-│   ├── todo_tool.py           # Planning & task management
-│   ├── process_registry.py    # Background process management
-│   └── ...                    # Other tool files
-├── gateway/              # Messaging platform adapters
-│   ├── platforms/        # Platform-specific adapters (telegram, discord, slack, whatsapp)
-│   └── ...
-├── cron/                 # Scheduler implementation
-├── environments/         # RL training environments (Atropos integration)
-├── skills/               # Bundled skill sources
-├── optional-skills/      # Official optional skills (not activated by default)
-├── cli.py                # Interactive CLI orchestrator (HermesCLI class)
-├── run_agent.py          # AIAgent class (core conversation loop)
-├── model_tools.py        # Tool orchestration (thin layer over tools/registry.py)
-├── toolsets.py           # Tool groupings
-├── toolset_distributions.py  # Probability-based tool selection
+├── hermes_cli/           # CLI subcommands and setup
+│   ├── main.py           # Entry point — all `hermes` subcommands
+│   ├── config.py         # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
+│   ├── commands.py       # Slash command definitions + SlashCommandCompleter
+│   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
+│   └── setup.py          # Interactive setup wizard
+├── tools/                # Tool implementations (one file per tool)
+│   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
+│   ├── approval.py       # Dangerous command detection
+│   ├── terminal_tool.py  # Terminal orchestration
+│   ├── process_registry.py # Background process management
+│   ├── file_tools.py     # File read/write/search/patch
+│   ├── web_tools.py      # Firecrawl search/extract
+│   ├── browser_tool.py   # Browserbase browser automation
+│   ├── code_execution_tool.py # execute_code sandbox
+│   ├── delegate_tool.py  # Subagent delegation
+│   ├── mcp_tool.py       # MCP client (~1050 lines)
+│   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
+├── gateway/              # Messaging platform gateway
+│   ├── run.py            # Main loop, slash commands, message dispatch
+│   ├── session.py        # SessionStore — conversation persistence
+│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal
+├── cron/                 # Scheduler (jobs.py, scheduler.py)
+├── environments/         # RL training environments (Atropos)
+├── tests/                # Pytest suite (~2500+ tests)
 └── batch_runner.py       # Parallel batch processing
 ```

-**User Configuration** (stored in `~/.hermes/`):
- `~/.hermes/config.yaml` - Settings (model, terminal, toolsets, etc.)
- `~/.hermes/.env` - API keys and secrets
- `~/.hermes/pairing/` - DM pairing data
- `~/.hermes/hooks/` - Custom event hooks
- `~/.hermes/image_cache/` - Cached user images
- `~/.hermes/audio_cache/` - Cached user voice messages
- `~/.hermes/sticker_cache.json` - Telegram sticker descriptions
+**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)

 ## File Dependency Chain

@@ -86,603 +68,175 @@ model_tools.py  (imports tools/registry + triggers tool discovery)
 run_agent.py, cli.py, batch_runner.py, environments/
 ```

-Each tool file co-locates its schema, handler, and registration. `model_tools.py` is a thin orchestration layer.
-
 ---

-## AIAgent Class
-
-The main agent is implemented in `run_agent.py`:
+## AIAgent Class (run_agent.py)

 ```python
 class AIAgent:
-    def __init__(
-        self,
-        model: str = "anthropic/claude-sonnet-4",
-        api_key: str = None,
-        base_url: str = "https://openrouter.ai/api/v1",
-        max_iterations: int = 60,        # Max tool-calling loops
+    def __init__(self,
+        model: str = "anthropic/claude-opus-4.6",
+        max_iterations: int = 90,
        enabled_toolsets: list = None,
        disabled_toolsets: list = None,
-        verbose_logging: bool = False,
-        quiet_mode: bool = False,         # Suppress progress output
-        tool_progress_callback: callable = None,  # Called on each tool use
-    ):
-        # Initialize OpenAI client, load tools based on toolsets
-        ...
-    
-    def chat(self, user_message: str, task_id: str = None) -> str:
-        # Main entry point - runs the agent loop
-        ...
+        quiet_mode: bool = False,
+        save_trajectories: bool = False,
+        platform: str = None,           # "cli", "telegram", etc.
+        session_id: str = None,
+        skip_context_files: bool = False,
+        skip_memory: bool = False,
+        # ... plus provider, api_mode, callbacks, routing params
+    ): ...
+
+    def chat(self, message: str) -> str:
+        """Simple interface — returns final response string."""
+
+    def run_conversation(self, user_message: str, system_message: str = None,
+                         conversation_history: list = None, task_id: str = None) -> dict:
+        """Full interface — returns dict with final_response + messages."""
 ```

 ### Agent Loop

-The core loop in `_run_agent_loop()`:
-
-```
-1. Add user message to conversation
-2. Call LLM with tools
-3. If LLM returns tool calls:
-   - Execute each tool
-   - Add tool results to conversation
-   - Go to step 2
-4. If LLM returns text response:
-   - Return response to user
-```
+The core loop is inside `run_conversation()` — entirely synchronous:

 ```python
-while turns < max_turns:
-    response = client.chat.completions.create(
-        model=model,
-        messages=messages,
-        tools=tool_schemas,
-    )
-    
+while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
+    response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
    if response.tool_calls:
        for tool_call in response.tool_calls:
-            result = await execute_tool(tool_call)
+            result = handle_function_call(tool_call.name, tool_call.args, task_id)
            messages.append(tool_result_message(result))
-        turns += 1
+        api_call_count += 1
    else:
        return response.content
 ```

-### Conversation Management
-
-Messages are stored as a list of dicts following OpenAI format:
-
-```python
-messages = [
-    {"role": "system", "content": "You are a helpful assistant..."},
-    {"role": "user", "content": "Search for Python tutorials"},
-    {"role": "assistant", "content": None, "tool_calls": [...]},
-    {"role": "tool", "tool_call_id": "...", "content": "..."},
-    {"role": "assistant", "content": "Here's what I found..."},
-]
-```
-
-### Reasoning Model Support
-
-For models that support chain-of-thought reasoning:
- Extract `reasoning_content` from API responses
- Store in `assistant_msg["reasoning"]` for trajectory export
- Pass back via `reasoning_content` field on subsequent turns
+Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.

 ---

 ## CLI Architecture (cli.py)

-The interactive CLI uses:
- **Rich** - For the welcome banner and styled panels
- **prompt_toolkit** - For fixed input area with history, `patch_stdout`, slash command autocomplete, and floating completion menus
- **KawaiiSpinner** (in run_agent.py) - Animated kawaii faces during API calls; clean `┊` activity feed for tool execution results
-
-Key components:
- `HermesCLI` class - Main CLI controller with commands and conversation loop
- `SlashCommandCompleter` - Autocomplete dropdown for `/commands` (type `/` to see all)
- `agent/skill_commands.py` - Scans skills and builds invocation messages (shared with gateway)
- `load_cli_config()` - Loads config, sets environment variables for terminal
- `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
-
-CLI UX notes:
- Thinking spinner (during LLM API call) shows animated kawaii face + verb (`(⌐■_■) deliberating...`)
- When LLM returns tool calls, the spinner clears silently (no "got it!" noise)
- Tool execution results appear as a clean activity feed: `┊ {emoji} {verb} {detail} {duration}`
- "got it!" only appears when the LLM returns a final text response (`⚕ ready`)
- The prompt shows `⚕ ❯` when the agent is working, `❯` when idle
- Pasting 5+ lines auto-saves to `~/.hermes/pastes/` and collapses to a reference
- Multi-line input via Alt+Enter or Ctrl+J
- `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
- `/skill-name` - Invoke installed skills directly (e.g., `/axolotl`, `/gif-search`)
-
-CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging.
-
-### Skill Slash Commands
-
-Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command.
-The skill name (from frontmatter or folder name) becomes the command: `axolotl` → `/axolotl`.
-
-Implementation (`agent/skill_commands.py`, shared between CLI and gateway):
-1. `scan_skill_commands()` scans all SKILL.md files at startup, filtering out skills incompatible with the current OS platform (via the `platforms` frontmatter field)
-2. `build_skill_invocation_message()` loads the SKILL.md content and builds a user-turn message
-3. The message includes the full skill content, a list of supporting files (not loaded), and the user's instruction
-4. Supporting files can be loaded on demand via the `skill_view` tool
-5. Injected as a **user message** (not system prompt) to preserve prompt caching
+- **Rich** for banner/panels, **prompt_toolkit** for input with autocomplete
+- **KawaiiSpinner** (`agent/display.py`) — animated faces during API calls, `┊` activity feed for tool results
+- `load_cli_config()` in cli.py merges hardcoded defaults + user config YAML
+- `process_command()` is a method on `HermesCLI` (not in commands.py)
+- Skill slash commands: `agent/skill_commands.py` scans `~/.hermes/skills/`, injects as **user message** (not system prompt) to preserve prompt caching

 ### Adding CLI Commands

-1. Add to `COMMANDS` dict with description
-2. Add handler in `process_command()` method
-3. For persistent settings, use `save_config_value()` to update config
-
---
-
-## Hermes CLI Commands
-
-The unified `hermes` command provides all functionality:
-
-| Command | Description |
-|---------|-------------|
-| `hermes` | Interactive chat (default) |
-| `hermes chat -q "..."` | Single query mode |
-| `hermes setup` | Configure API keys and settings |
-| `hermes config` | View current configuration |
-| `hermes config edit` | Open config in editor |
-| `hermes config set KEY VAL` | Set a specific value |
-| `hermes config check` | Check for missing config |
-| `hermes config migrate` | Prompt for missing config interactively |
-| `hermes status` | Show configuration status |
-| `hermes doctor` | Diagnose issues |
-| `hermes update` | Update to latest (checks for new config) |
-| `hermes uninstall` | Uninstall (can keep configs for reinstall) |
-| `hermes gateway` | Start gateway (messaging + cron scheduler) |
-| `hermes gateway setup` | Configure messaging platforms interactively |
-| `hermes gateway install` | Install gateway as system service |
-| `hermes cron list` | View scheduled jobs |
-| `hermes cron status` | Check if cron scheduler is running |
-| `hermes version` | Show version info |
-| `hermes pairing list/approve/revoke` | Manage DM pairing codes |
-
---
-
-## Messaging Gateway
-
-The gateway connects Hermes to Telegram, Discord, Slack, and WhatsApp.
-
-### Setup
-
-The interactive setup wizard handles platform configuration:
-
-```bash
-hermes gateway setup      # Arrow-key menu of all platforms, configure tokens/allowlists/home channels
-```
-
-This is the recommended way to configure messaging. It shows which platforms are already set up, walks through each one interactively, and offers to start/restart the gateway service at the end.
-
-Platforms can also be configured manually in `~/.hermes/.env`:
-
-### Configuration (in `~/.hermes/.env`):
-
-```bash
-# Telegram
-TELEGRAM_BOT_TOKEN=123456:ABC-DEF...      # From @BotFather
-TELEGRAM_ALLOWED_USERS=123456789,987654   # Comma-separated user IDs (from @userinfobot)
-
-# Discord  
-DISCORD_BOT_TOKEN=MTIz...                 # From Developer Portal
-DISCORD_ALLOWED_USERS=123456789012345678  # Comma-separated user IDs
-
-# Agent Behavior
-HERMES_MAX_ITERATIONS=60                  # Max tool-calling iterations
-MESSAGING_CWD=/home/myuser                # Terminal working directory for messaging
-
-# Tool progress is configured in config.yaml (display.tool_progress: off|new|all|verbose)
-```
-
-### Working Directory Behavior
-
- **CLI (`hermes` command)**: Uses current directory (`.` → `os.getcwd()`)
- **Messaging (Telegram/Discord)**: Uses `MESSAGING_CWD` (default: home directory)
-
-This is intentional: CLI users are in a terminal and expect the agent to work in their current directory, while messaging users need a consistent starting location.
-
-### Security (User Allowlists):
-
-**IMPORTANT**: By default, the gateway denies all users who are not in an allowlist or paired via DM.
-
-The gateway checks `{PLATFORM}_ALLOWED_USERS` environment variables:
- If set: Only listed user IDs can interact with the bot
- If unset: All users are denied unless `GATEWAY_ALLOW_ALL_USERS=true` is set
-
-Users can find their IDs:
- **Telegram**: Message [@userinfobot](https://t.me/userinfobot)
- **Discord**: Enable Developer Mode, right-click name → Copy ID
-
-### DM Pairing System
-
-Instead of static allowlists, users can pair via one-time codes:
-1. Unknown user DMs the bot → receives pairing code
-2. Owner runs `hermes pairing approve <platform> <code>`
-3. User is permanently authorized
-
-Security: 8-char codes, 1-hour expiry, rate-limited (1/10min/user), max 3 pending per platform, lockout after 5 failed attempts, `chmod 0600` on data files.
-
-Files: `gateway/pairing.py`, `hermes_cli/pairing.py`
-
-### Event Hooks
-
-Hooks fire at lifecycle points. Place hook directories in `~/.hermes/hooks/`:
-
-```
-~/.hermes/hooks/my-hook/
-├── HOOK.yaml    # name, description, events list
-└── handler.py   # async def handle(event_type, context): ...
-```
-
-Events: `gateway:startup`, `session:start`, `session:reset`, `agent:start`, `agent:step`, `agent:end`, `command:*`
-
-The `agent:step` event fires each iteration of the tool-calling loop with tool names and results.
-
-Files: `gateway/hooks.py`
-
-### Tool Progress Notifications
-
-When `tool_progress` is enabled in `config.yaml`, the bot sends status messages as it works:
- `💻 \`ls -la\`...` (terminal commands show the actual command)
- `🔍 web_search...`
- `📄 web_extract...`
- `🐍 execute_code...` (programmatic tool calling sandbox)
- `🔀 delegate_task...` (subagent delegation)
- `❓ clarify...` (user question, CLI-only)
-
-Modes:
- `new`: Only when switching to a different tool (less spam)
- `all`: Every single tool call
-
-### Typing Indicator
-
-The gateway keeps the "typing..." indicator active throughout processing, refreshing every 4 seconds. This lets users know the bot is working even during long tool-calling sequences.
-
-### Platform Toolsets:
-
-Each platform has a dedicated toolset in `toolsets.py`:
- `hermes-telegram`: Full tools including terminal (with safety checks)
- `hermes-discord`: Full tools including terminal
- `hermes-whatsapp`: Full tools including terminal
-
---
-
-## Configuration System
-
-Configuration files are stored in `~/.hermes/` for easy user access:
- `~/.hermes/config.yaml` - All settings (model, terminal, compression, etc.)
- `~/.hermes/.env` - API keys and secrets
-
-### Adding New Configuration Options
-
-When adding new configuration variables, you MUST follow this process:
-
-#### For config.yaml options:
-
-1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
-2. **CRITICAL**: Bump `_config_version` in `DEFAULT_CONFIG` when adding required fields
-3. This triggers migration prompts for existing users on next `hermes update` or `hermes setup`
-
-Example:
-```python
-DEFAULT_CONFIG = {
-    # ... existing config ...
-    
-    "new_feature": {
-        "enabled": True,
-        "option": "default_value",
-    },
-    
-    # BUMP THIS when adding required fields
-    "_config_version": 2,  # Was 1, now 2
-}
-```
-
-#### For .env variables (API keys/secrets):
-
-1. Add to `REQUIRED_ENV_VARS` or `OPTIONAL_ENV_VARS` in `hermes_cli/config.py`
-2. Include metadata for the migration system:
-
-```python
-OPTIONAL_ENV_VARS = {
-    # ... existing vars ...
-    "NEW_API_KEY": {
-        "description": "What this key is for",
-        "prompt": "Display name in prompts",
-        "url": "https://where-to-get-it.com/",
-        "tools": ["tools_it_enables"],  # What tools need this
-        "password": True,  # Mask input
-    },
-}
-```
-
-#### Update related files:
-
- `hermes_cli/setup.py` - Add prompts in the setup wizard
- `cli-config.yaml.example` - Add example with comments
- Update README.md if user-facing
-
-### Config Version Migration
-
-The system uses `_config_version` to detect outdated configs:
-
-1. `check_for_missing_config()` compares user config to `DEFAULT_CONFIG`
-2. `migrate_config()` interactively prompts for missing values
-3. Called automatically by `hermes update` and optionally by `hermes setup`
-
---
-
-## Environment Variables
-
-API keys are loaded from `~/.hermes/.env`:
- `OPENROUTER_API_KEY` - Main LLM API access (primary provider)
- `FIRECRAWL_API_KEY` - Web search/extract tools
- `FIRECRAWL_API_URL` - Self-hosted Firecrawl endpoint (optional)
- `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` - Browser automation
- `FAL_KEY` - Image generation (FLUX model)
- `NOUS_API_KEY` - Vision and Mixture-of-Agents tools
-
-Terminal tool configuration (in `~/.hermes/config.yaml`):
- `terminal.backend` - Backend: local, docker, singularity, modal, daytona, or ssh
- `terminal.cwd` - Working directory ("." = host CWD for local only; for remote backends set an absolute path inside the target, or omit to use the backend's default)
- `terminal.docker_image` - Image for Docker backend
- `terminal.singularity_image` - Image for Singularity backend
- `terminal.modal_image` - Image for Modal backend
- `terminal.daytona_image` - Image for Daytona backend
- `DAYTONA_API_KEY` - API key for Daytona backend (in .env)
- SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env
-
-Agent behavior (in `~/.hermes/.env`):
- `HERMES_MAX_ITERATIONS` - Max tool-calling iterations (default: 60)
- `MESSAGING_CWD` - Working directory for messaging platforms (default: ~)
- `display.tool_progress` in config.yaml - Tool progress: `off`, `new`, `all`, `verbose`
- `OPENAI_API_KEY` - Voice transcription (Whisper STT)
- `SLACK_BOT_TOKEN` / `SLACK_APP_TOKEN` - Slack integration (Socket Mode)
- `SLACK_ALLOWED_USERS` - Comma-separated Slack user IDs
- `HERMES_HUMAN_DELAY_MODE` - Response pacing: off/natural/custom
- `HERMES_HUMAN_DELAY_MIN_MS` / `HERMES_HUMAN_DELAY_MAX_MS` - Custom delay range
-
-### Dangerous Command Approval
-
-The terminal tool includes safety checks for potentially destructive commands (e.g., `rm -rf`, `DROP TABLE`, `chmod 777`, etc.):
-
-**Behavior by Backend:**
- **Docker/Singularity/Modal**: Commands run unrestricted (isolated containers)
- **Local/SSH**: Dangerous commands trigger approval flow
-
-**Approval Flow (CLI):**
-```
-⚠️  Potentially dangerous command detected: recursive delete
-    rm -rf /tmp/test
-
-    [o]nce  |  [s]ession  |  [a]lways  |  [d]eny
-    Choice [o/s/a/D]: 
-```
-
-**Approval Flow (Messaging):**
- Command is blocked with explanation
- Agent explains the command was blocked for safety
- User must add the pattern to their allowlist via `hermes config edit` or run the command directly on their machine
-
-**Configuration:**
- `command_allowlist` in `~/.hermes/config.yaml` stores permanently allowed patterns
- Add patterns via "always" approval or edit directly
-
-**Sudo Handling (Messaging):**
- If sudo fails over messaging, output includes tip to add `SUDO_PASSWORD` to `~/.hermes/.env`
-
---
-
-## Background Process Management
-
-The `process` tool works alongside `terminal` for managing long-running background processes:
-
-**Starting a background process:**
-```python
-terminal(command="pytest -v tests/", background=true)
-# Returns: {"session_id": "proc_abc123", "pid": 12345, ...}
-```
-
-**Managing it with the process tool:**
- `process(action="list")` -- show all running/recent processes
- `process(action="poll", session_id="proc_abc123")` -- check status + new output
- `process(action="log", session_id="proc_abc123")` -- full output with pagination
- `process(action="wait", session_id="proc_abc123", timeout=600)` -- block until done
- `process(action="kill", session_id="proc_abc123")` -- terminate
- `process(action="write", session_id="proc_abc123", data="y")` -- send stdin
- `process(action="submit", session_id="proc_abc123", data="yes")` -- send + Enter
-
-**Key behaviors:**
- Background processes execute through the configured terminal backend (local/Docker/Modal/Daytona/SSH/Singularity) -- never directly on the host unless `TERMINAL_ENV=local`
- The `wait` action blocks the tool call until the process finishes, times out, or is interrupted by a new user message
- PTY mode (`pty=true` on terminal) enables interactive CLI tools (Codex, Claude Code)
- In RL training, background processes are auto-killed when the episode ends (`tool_context.cleanup()`)
- In the gateway, sessions with active background processes are exempt from idle reset
- The process registry checkpoints to `~/.hermes/processes.json` for crash recovery
-
-Files: `tools/process_registry.py` (registry + handler), `tools/terminal_tool.py` (spawn integration)
+1. Add to `COMMANDS` dict in `hermes_cli/commands.py`
+2. Add handler in `HermesCLI.process_command()` in `cli.py`
+3. For persistent settings, use `save_config_value()` in `cli.py`

 ---

 ## Adding New Tools

-Adding a tool requires changes in **2 files** (the tool file and `toolsets.py`):
-
-1. **Create `tools/your_tool.py`** with handler, schema, check function, and registry call:
+Requires changes in **3 files**:

+**1. Create `tools/your_tool.py`:**
 ```python
-# tools/example_tool.py
-import json
-import os
+import json, os
 from tools.registry import registry

-def check_example_requirements() -> bool:
-    """Check if required API keys/dependencies are available."""
+def check_requirements() -> bool:
    return bool(os.getenv("EXAMPLE_API_KEY"))

 def example_tool(param: str, task_id: str = None) -> str:
-    """Execute the tool and return JSON string result."""
-    try:
-        result = {"success": True, "data": "..."}
-        return json.dumps(result, ensure_ascii=False)
-    except Exception as e:
-        return json.dumps({"error": str(e)}, ensure_ascii=False)
-
-EXAMPLE_SCHEMA = {
-    "name": "example_tool",
-    "description": "Does something useful.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "param": {"type": "string", "description": "The parameter"}
-        },
-        "required": ["param"]
-    }
-}
+    return json.dumps({"success": True, "data": "..."})

 registry.register(
    name="example_tool",
    toolset="example",
-    schema=EXAMPLE_SCHEMA,
-    handler=lambda args, **kw: example_tool(
-        param=args.get("param", ""), task_id=kw.get("task_id")),
-    check_fn=check_example_requirements,
+    schema={"name": "example_tool", "description": "...", "parameters": {...}},
+    handler=lambda args, **kw: example_tool(param=args.get("param", ""), task_id=kw.get("task_id")),
+    check_fn=check_requirements,
    requires_env=["EXAMPLE_API_KEY"],
 )
 ```

-2. **Add to `toolsets.py`**: Add `"example_tool"` to `_HERMES_CORE_TOOLS` if it should be in all platform toolsets, or create a new toolset entry.
+**2. Add import** in `model_tools.py` `_discover_tools()` list.

-3. **Add discovery import** in `model_tools.py`'s `_discover_tools()` list: `"tools.example_tool"`.
+**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.

-That's it. The registry handles schema collection, dispatch, availability checking, and error wrapping automatically. No edits to `TOOLSET_REQUIREMENTS`, `handle_function_call()`, `get_all_tool_names()`, or any other data structure.
+The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.

-**Optional:** Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` for the setup wizard, and to `toolset_distributions.py` for batch processing.
-
-**Special case: tools that need agent-level state** (like `todo`, `memory`):
-These are intercepted by `run_agent.py`'s tool dispatch loop *before* `handle_function_call()`. The registry still holds their schemas, but dispatch returns a stub error as a safety fallback. See `todo_tool.py` for the pattern.
-
-All tool handlers MUST return a JSON string. The registry's `dispatch()` wraps all exceptions in `{"error": "..."}` automatically.
-
-### Dynamic Tool Availability
-
-Tools declare their requirements at registration time via `check_fn` and `requires_env`. The registry checks `check_fn()` when building tool definitions -- tools whose check fails are silently excluded.
-
-### Stateful Tools
-
-Tools that maintain state (terminal, browser) require:
- `task_id` parameter for session isolation between concurrent tasks
- `cleanup_*()` function to release resources
- Cleanup is called automatically in run_agent.py after conversation completes
+**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.

 ---

-## Trajectory Format
+## Adding Configuration

-Conversations are saved in ShareGPT format for training:
-```json
-{"from": "system", "value": "System prompt with <tools>...</tools>"}
-{"from": "human", "value": "User message"}
-{"from": "gpt", "value": "<think>reasoning</think>\n<tool_call>{...}</tool_call>"}
-{"from": "tool", "value": "<tool_response>{...}</tool_response>"}
-{"from": "gpt", "value": "Final response"}
-```
-
-Tool calls use `<tool_call>` XML tags, responses use `<tool_response>` tags, reasoning uses `<think>` tags.
-
-### Trajectory Export
+### config.yaml options:
+1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
+2. Bump `_config_version` (currently 5) to trigger migration for existing users

+### .env variables:
+1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
-agent = AIAgent(save_trajectories=True)
-agent.chat("Do something")
-# Saves to trajectories/*.jsonl in ShareGPT format
+"NEW_API_KEY": {
+    "description": "What it's for",
+    "prompt": "Display name",
+    "url": "https://...",
+    "password": True,
+    "category": "tool",  # provider, tool, messaging, setting
+},
 ```

+### Config loaders (two separate systems):
+
+| Loader | Used by | Location |
+|--------|---------|----------|
+| `load_cli_config()` | CLI mode | `cli.py` |
+| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
+| Direct YAML load | Gateway | `gateway/run.py` |
+
 ---

-## Batch Processing (batch_runner.py)
+## Important Policies

-For processing multiple prompts:
- Parallel execution with multiprocessing
- Content-based resume for fault tolerance (matches on prompt text, not indices)
- Toolset distributions control probabilistic tool availability per prompt
- Output: `data/<run_name>/trajectories.jsonl` (combined) + individual batch files
+### Prompt Caching Must Not Break
+
+Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
+- Alter past context mid-conversation
+- Change toolsets mid-conversation
+- Reload memories or rebuild system prompts mid-conversation
+
+Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
+
+### Working Directory Behavior
+- **CLI**: Uses current directory (`.` → `os.getcwd()`)
+- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
+
+---
+
+## Known Pitfalls
+
+### DO NOT use `simple_term_menu` for interactive menus
+Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
+
+### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
+Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
+
+### `_last_resolved_tool_names` is a process-global in `model_tools.py`
+When subagents overwrite this global, `execute_code` calls after delegation may fail with missing tool imports. Known bug.
+
+### Tests must not write to `~/.hermes/`
+The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
+
+---
+
+## Testing

 ```bash
-python batch_runner.py \
-    --dataset_file=prompts.jsonl \
-    --batch_size=20 \
-    --num_workers=4 \
-    --run_name=my_run
+source .venv/bin/activate
+python -m pytest tests/ -q          # Full suite (~2500 tests, ~2 min)
+python -m pytest tests/test_model_tools.py -q   # Toolset resolution
+python -m pytest tests/test_cli_init.py -q       # CLI config loading
+python -m pytest tests/gateway/ -q               # Gateway tests
+python -m pytest tests/tools/ -q                 # Tool-level tests
 ```

---
-
-## Skills System
-
-Skills are on-demand knowledge documents the agent can load. Compatible with the [agentskills.io](https://agentskills.io/specification) open standard.
-
-```
-skills/
-├── mlops/                    # Category folder
-│   ├── axolotl/             # Skill folder
-│   │   ├── SKILL.md         # Main instructions (required)
-│   │   ├── references/      # Additional docs, API specs
-│   │   ├── templates/       # Output formats, configs
-│   │   └── assets/          # Supplementary files (agentskills.io)
-│   └── vllm/
-│       └── SKILL.md
-├── .hub/                    # Skills Hub state (gitignored)
-│   ├── lock.json            # Installed skill provenance
-│   ├── quarantine/          # Pending security review
-│   ├── audit.log            # Security scan history
-│   ├── taps.json            # Custom source repos
-│   └── index-cache/         # Cached remote indexes
-```
-
-**Progressive disclosure** (token-efficient):
-1. `skills_categories()` - List category names (~50 tokens)
-2. `skills_list(category)` - Name + description per skill (~3k tokens)
-3. `skill_view(name)` - Full content + tags + linked files
-
-SKILL.md files use YAML frontmatter (agentskills.io format):
-```yaml
---
-name: skill-name
-description: Brief description for listing
-version: 1.0.0
-platforms: [macos]              # Optional — restrict to specific OS (macos/linux/windows)
-metadata:
-  hermes:
-    tags: [tag1, tag2]
-    related_skills: [other-skill]
---
-# Skill Content...
-```
-
-**Platform filtering** — Skills with a `platforms` field are automatically excluded from the system prompt index, `skills_list()`, and slash commands on incompatible platforms. Skills without the field load everywhere (backward compatible). See `skills/apple/` for macOS-only examples (iMessage, Reminders, Notes, FindMy).
-
-**Skills Hub** — user-driven skill search/install from online registries and official optional skills. Sources: official optional skills (shipped with repo, labeled "official"), GitHub (openai/skills, anthropics/skills, custom taps), ClawHub, Claude marketplace, LobeHub. Not exposed as an agent tool — the model cannot search for or install skills. Users manage skills via `hermes skills browse/search/install` CLI commands or the `/skills` slash command in chat.
-
-Key files:
- `tools/skills_tool.py` — Agent-facing skill list/view (progressive disclosure)
- `tools/skills_guard.py` — Security scanner (regex + LLM audit, trust-aware install policy)
- `tools/skills_hub.py` — Source adapters (OptionalSkillSource, GitHub, ClawHub, Claude marketplace, LobeHub), lock file, auth
- `hermes_cli/skills_hub.py` — CLI subcommands + `/skills` slash command handler
-
---
-
-## Testing Changes
-
-After making changes:
-
-1. Run `hermes doctor` to check setup
-2. Run `hermes config check` to verify config
-3. Test with `hermes chat -q "test message"`
-4. For new config options, test fresh install: `rm -rf ~/.hermes && hermes setup`
+Always run the full suite before pushing changes.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -118,7 +118,7 @@ hermes-agent/
 ├── cli.py                    # HermesCLI class — interactive TUI, prompt_toolkit integration
 ├── model_tools.py            # Tool orchestration (thin layer over tools/registry.py)
 ├── toolsets.py               # Tool groupings and presets (hermes-cli, hermes-telegram, etc.)
-├── hermes_state.py           # SQLite session database with FTS5 full-text search
+├── hermes_state.py           # SQLite session database with FTS5 full-text search, session titles
 ├── batch_runner.py           # Parallel batch processing for trajectory generation
 │
 ├── agent/                    # Agent internals (extracted modules)
@@ -218,7 +218,7 @@ User message → AIAgent._run_agent_loop()

 - **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules.
 - **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform.
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search. JSON logs go to `~/.hermes/sessions/`.
+- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. JSON logs go to `~/.hermes/sessions/`.
 - **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs.
 - **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint).
 - **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests.
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Nous Research
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open

 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
-<tr><td><b>Lives where you do</b></td><td>Telegram, Discord, Slack, WhatsApp, and CLI — all from a single gateway process. Voice memo transcription, cross-platform conversation continuity.</td></tr>
+<tr><td><b>Lives where you do</b></td><td>Telegram, Discord, Slack, WhatsApp, Signal, and CLI — all from a single gateway process. Voice memo transcription, cross-platform conversation continuity.</td></tr>
 <tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
 <tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
 <tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
@@ -71,7 +71,7 @@ All documentation lives at **[hermes-agent.nousresearch.com/docs](https://hermes
 | [Quickstart](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | Install → setup → first conversation in 2 minutes |
 | [CLI Usage](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | Commands, keybindings, personalities, sessions |
 | [Configuration](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | Config file, providers, models, all options |
-| [Messaging Gateway](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram, Discord, Slack, WhatsApp, Home Assistant |
+| [Messaging Gateway](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram, Discord, Slack, WhatsApp, Signal, Home Assistant |
 | [Security](https://hermes-agent.nousresearch.com/docs/user-guide/security) | Command approval, DM pairing, container isolation |
 | [Tools & Toolsets](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ tools, toolset system, terminal backends |
 | [Skills System](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | Procedural memory, Skills Hub, creating skills |
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -4,7 +4,7 @@ Provides a single resolution chain so every consumer (context compression,
 session search, web extraction, vision analysis, browser vision) picks up
 the best available backend without duplicating fallback logic.

-Resolution order for text tasks:
+Resolution order for text tasks (auto mode):
  1. OpenRouter  (OPENROUTER_API_KEY)
  2. Nous Portal (~/.hermes/auth.json active provider)
  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
@@ -14,10 +14,19 @@ Resolution order for text tasks:
     — checked via PROVIDER_REGISTRY entries with auth_type='api_key'
  6. None

-Resolution order for vision/multimodal tasks:
+Resolution order for vision/multimodal tasks (auto mode):
  1. OpenRouter
  2. Nous Portal
-  3. None  (custom endpoints can't substitute for Gemini multimodal)
+  3. None  (steps 3-5 are skipped — they may not support multimodal)
+
+Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
+CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
+"openrouter", "nous", "codex", or "main" (= steps 3-5).
+Default "auto" follows the chains above.
+
+Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
+AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
+than the provider's default.
 """

 import json
@@ -73,6 +82,55 @@ _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 # read response.choices[0].message.content. This adapter translates those
 # calls to the Codex Responses API so callers don't need any changes.

+
+def _convert_content_for_responses(content: Any) -> Any:
+    """Convert chat.completions content to Responses API format.
+
+    chat.completions uses:
+      {"type": "text", "text": "..."}
+      {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
+
+    Responses API uses:
+      {"type": "input_text", "text": "..."}
+      {"type": "input_image", "image_url": "data:image/png;base64,..."}
+
+    If content is a plain string, it's returned as-is (the Responses API
+    accepts strings directly for text-only messages).
+    """
+    if isinstance(content, str):
+        return content
+    if not isinstance(content, list):
+        return str(content) if content else ""
+
+    converted: List[Dict[str, Any]] = []
+    for part in content:
+        if not isinstance(part, dict):
+            continue
+        ptype = part.get("type", "")
+        if ptype == "text":
+            converted.append({"type": "input_text", "text": part.get("text", "")})
+        elif ptype == "image_url":
+            # chat.completions nests the URL: {"image_url": {"url": "..."}}
+            image_data = part.get("image_url", {})
+            url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
+            entry: Dict[str, Any] = {"type": "input_image", "image_url": url}
+            # Preserve detail if specified
+            detail = image_data.get("detail") if isinstance(image_data, dict) else None
+            if detail:
+                entry["detail"] = detail
+            converted.append(entry)
+        elif ptype in ("input_text", "input_image"):
+            # Already in Responses format — pass through
+            converted.append(part)
+        else:
+            # Unknown content type — try to preserve as text
+            text = part.get("text", "")
+            if text:
+                converted.append({"type": "input_text", "text": text})
+
+    return converted or ""
+
+
 class _CodexCompletionsAdapter:
    """Drop-in shim that accepts chat.completions.create() kwargs and
    routes them through the Codex Responses streaming API."""
@@ -86,30 +144,31 @@ class _CodexCompletionsAdapter:
        model = kwargs.get("model", self._model)
        temperature = kwargs.get("temperature")

-        # Separate system/instructions from conversation messages
+        # Separate system/instructions from conversation messages.
+        # Convert chat.completions multimodal content blocks to Responses
+        # API format (input_text / input_image instead of text / image_url).
        instructions = "You are a helpful assistant."
        input_msgs: List[Dict[str, Any]] = []
        for msg in messages:
            role = msg.get("role", "user")
            content = msg.get("content") or ""
            if role == "system":
-                instructions = content
+                instructions = content if isinstance(content, str) else str(content)
            else:
-                input_msgs.append({"role": role, "content": content})
+                input_msgs.append({
+                    "role": role,
+                    "content": _convert_content_for_responses(content),
+                })

        resp_kwargs: Dict[str, Any] = {
            "model": model,
            "instructions": instructions,
            "input": input_msgs or [{"role": "user", "content": ""}],
-            "stream": True,
            "store": False,
        }

-        max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
-        if max_tokens is not None:
-            resp_kwargs["max_output_tokens"] = int(max_tokens)
-        if temperature is not None:
-            resp_kwargs["temperature"] = temperature
+        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
+        # support max_output_tokens or temperature — omit to avoid 400 errors.

        # Tools support for flush_memories and similar callers
        tools = kwargs.get("tools")
@@ -317,71 +376,148 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        if not api_key:
            continue
        # Resolve base URL (with optional env-var override)
-        base_url = pconfig.inference_base_url
+        # Kimi Code keys (sk-kimi-) need api.kimi.com/coding/v1
+        env_url = ""
        if pconfig.base_url_env_var:
            env_url = os.getenv(pconfig.base_url_env_var, "").strip()
-            if env_url:
-                base_url = env_url.rstrip("/")
+        if env_url:
+            base_url = env_url.rstrip("/")
+        elif provider_id == "kimi-coding" and api_key.startswith("sk-kimi-"):
+            base_url = "https://api.kimi.com/coding/v1"
+        else:
+            base_url = pconfig.inference_base_url
        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
-        return OpenAI(api_key=api_key, base_url=base_url), model
+        extra = {}
+        if "api.kimi.com" in base_url.lower():
+            extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
+        return OpenAI(api_key=api_key, base_url=base_url, **extra), model

    return None, None


+# ── Provider resolution helpers ─────────────────────────────────────────────
+
+def _get_auxiliary_provider(task: str = "") -> str:
+    """Read the provider override for a specific auxiliary task.
+
+    Checks AUXILIARY_{TASK}_PROVIDER first (e.g. AUXILIARY_VISION_PROVIDER),
+    then CONTEXT_{TASK}_PROVIDER (for the compression section's summary_provider),
+    then falls back to "auto".  Returns one of: "auto", "openrouter", "nous", "main".
+    """
+    if task:
+        for prefix in ("AUXILIARY_", "CONTEXT_"):
+            val = os.getenv(f"{prefix}{task.upper()}_PROVIDER", "").strip().lower()
+            if val and val != "auto":
+                return val
+    return "auto"
+
+
+def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
+    or_key = os.getenv("OPENROUTER_API_KEY")
+    if not or_key:
+        return None, None
+    logger.debug("Auxiliary client: OpenRouter")
+    return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
+                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+
+
+def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
+    nous = _read_nous_auth()
+    if not nous:
+        return None, None
+    global auxiliary_is_nous
+    auxiliary_is_nous = True
+    logger.debug("Auxiliary client: Nous Portal")
+    return (
+        OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
+        _NOUS_MODEL,
+    )
+
+
+def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
+    custom_base = os.getenv("OPENAI_BASE_URL")
+    custom_key = os.getenv("OPENAI_API_KEY")
+    if not custom_base or not custom_key:
+        return None, None
+    model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
+    logger.debug("Auxiliary client: custom endpoint (%s)", model)
+    return OpenAI(api_key=custom_key, base_url=custom_base), model
+
+
+def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
+    codex_token = _read_codex_access_token()
+    if not codex_token:
+        return None, None
+    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
+    real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
+    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
+
+
+def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Resolve a specific forced provider.  Returns (None, None) if creds missing."""
+    if forced == "openrouter":
+        client, model = _try_openrouter()
+        if client is None:
+            logger.warning("auxiliary.provider=openrouter but OPENROUTER_API_KEY not set")
+        return client, model
+
+    if forced == "nous":
+        client, model = _try_nous()
+        if client is None:
+            logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes login)")
+        return client, model
+
+    if forced == "codex":
+        client, model = _try_codex()
+        if client is None:
+            logger.warning("auxiliary.provider=codex but no Codex OAuth token found (run: hermes model)")
+        return client, model
+
+    if forced == "main":
+        # "main" = skip OpenRouter/Nous, use the main chat model's credentials.
+        for try_fn in (_try_custom_endpoint, _try_codex, _resolve_api_key_provider):
+            client, model = try_fn()
+            if client is not None:
+                return client, model
+        logger.warning("auxiliary.provider=main but no main endpoint credentials found")
+        return None, None
+
+    # Unknown provider name — fall through to auto
+    logger.warning("Unknown auxiliary.provider=%r, falling back to auto", forced)
+    return None, None
+
+
+def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Full auto-detection chain: OpenRouter → Nous → custom → Codex → API-key → None."""
+    for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint,
+                   _try_codex, _resolve_api_key_provider):
+        client, model = try_fn()
+        if client is not None:
+            return client, model
+    logger.debug("Auxiliary client: none available")
+    return None, None
+
+
 # ── Public API ──────────────────────────────────────────────────────────────

-def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
-    """Return (client, model_slug) for text-only auxiliary tasks.
+def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Return (client, default_model_slug) for text-only auxiliary tasks.

-    Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth
-    -> direct API-key providers -> (None, None).
+    Args:
+        task: Optional task name ("compression", "web_extract") to check
+              for a task-specific provider override.
+
+    Callers may override the returned model with a per-task env var
+    (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
    """
-    # 1. OpenRouter
-    or_key = os.getenv("OPENROUTER_API_KEY")
-    if or_key:
-        logger.debug("Auxiliary text client: OpenRouter")
-        return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
-                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL
-
-    # 2. Nous Portal
-    nous = _read_nous_auth()
-    if nous:
-        global auxiliary_is_nous
-        auxiliary_is_nous = True
-        logger.debug("Auxiliary text client: Nous Portal")
-        return (
-            OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
-            _NOUS_MODEL,
-        )
-
-    # 3. Custom endpoint (both base URL and key must be set)
-    custom_base = os.getenv("OPENAI_BASE_URL")
-    custom_key = os.getenv("OPENAI_API_KEY")
-    if custom_base and custom_key:
-        model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
-        logger.debug("Auxiliary text client: custom endpoint (%s)", model)
-        return OpenAI(api_key=custom_key, base_url=custom_base), model
-
-    # 4. Codex OAuth -- uses the Responses API (only endpoint the token
-    # can access), wrapped to look like a chat.completions client.
-    codex_token = _read_codex_access_token()
-    if codex_token:
-        logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
-        real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
-        return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
-
-    # 5. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, etc.)
-    api_client, api_model = _resolve_api_key_provider()
-    if api_client is not None:
-        return api_client, api_model
-
-    # 6. Nothing available
-    logger.debug("Auxiliary text client: none available")
-    return None, None
+    forced = _get_auxiliary_provider(task)
+    if forced != "auto":
+        return _resolve_forced_provider(forced)
+    return _resolve_auto()


-def get_async_text_auxiliary_client():
+def get_async_text_auxiliary_client(task: str = ""):
    """Return (async_client, model_slug) for async consumers.

    For standard providers returns (AsyncOpenAI, model). For Codex returns
@@ -390,7 +526,7 @@ def get_async_text_auxiliary_client():
    """
    from openai import AsyncOpenAI

-    sync_client, model = get_text_auxiliary_client()
+    sync_client, model = get_text_auxiliary_client(task)
    if sync_client is None:
        return None, None

@@ -403,33 +539,33 @@ def get_async_text_auxiliary_client():
    }
    if "openrouter" in str(sync_client.base_url).lower():
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
+    elif "api.kimi.com" in str(sync_client.base_url).lower():
+        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
    return AsyncOpenAI(**async_kwargs), model


 def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
-    """Return (client, model_slug) for vision/multimodal auxiliary tasks.
+    """Return (client, default_model_slug) for vision/multimodal auxiliary tasks.

-    Only OpenRouter and Nous Portal qualify — custom endpoints cannot
-    substitute for Gemini multimodal.
+    Checks AUXILIARY_VISION_PROVIDER for a forced provider, otherwise
+    auto-detects.  Callers may override the returned model with
+    AUXILIARY_VISION_MODEL.
+
+    In auto mode, only providers known to support multimodal are tried:
+    OpenRouter, Nous Portal, and Codex OAuth (gpt-5.3-codex supports
+    vision via the Responses API).  Custom endpoints and API-key
+    providers are skipped — they may not handle vision input.  To use
+    them, set AUXILIARY_VISION_PROVIDER explicitly.
    """
-    # 1. OpenRouter
-    or_key = os.getenv("OPENROUTER_API_KEY")
-    if or_key:
-        logger.debug("Auxiliary vision client: OpenRouter")
-        return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
-                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL
-
-    # 2. Nous Portal
-    nous = _read_nous_auth()
-    if nous:
-        logger.debug("Auxiliary vision client: Nous Portal")
-        return (
-            OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
-            _NOUS_MODEL,
-        )
-
-    # 3. Nothing suitable
-    logger.debug("Auxiliary vision client: none available")
+    forced = _get_auxiliary_provider("vision")
+    if forced != "auto":
+        return _resolve_forced_provider(forced)
+    # Auto: only multimodal-capable providers
+    for try_fn in (_try_openrouter, _try_nous, _try_codex):
+        client, model = try_fn()
+        if client is not None:
+            return client, model
+    logger.debug("Auxiliary vision client: none available (auto only tries OpenRouter/Nous/Codex)")
    return None, None


--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -7,7 +7,7 @@ protecting head and tail context.

 import logging
 import os
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional

 from agent.auxiliary_client import get_text_auxiliary_client
 from agent.model_metadata import (
@@ -53,7 +53,7 @@ class ContextCompressor:
        self.last_completion_tokens = 0
        self.last_total_tokens = 0

-        self.client, default_model = get_text_auxiliary_client()
+        self.client, default_model = get_text_auxiliary_client("compression")
        self.summary_model = summary_model_override or default_model

    def update_from_response(self, usage: Dict[str, Any]):
@@ -82,11 +82,14 @@ class ContextCompressor:
            "compression_count": self.compression_count,
        }

-    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> str:
-        """Generate a concise summary of conversation turns using a fast model."""
-        if not self.client:
-            return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed to save space. The assistant performed various actions and received responses."
+    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
+        """Generate a concise summary of conversation turns.

+        Tries the auxiliary model first, then falls back to the user's main
+        model.  Returns None if all attempts fail — the caller should drop
+        the middle turns without a summary rather than inject a useless
+        placeholder.
+        """
        parts = []
        for msg in turns_to_summarize:
            role = msg.get("role", "unknown")
@@ -117,28 +120,28 @@ TURNS TO SUMMARIZE:

 Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""

-        try:
-            return self._call_summary_model(self.client, self.summary_model, prompt)
-        except Exception as e:
-            logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
+        # 1. Try the auxiliary model (cheap/fast)
+        if self.client:
+            try:
+                return self._call_summary_model(self.client, self.summary_model, prompt)
+            except Exception as e:
+                logging.warning(f"Failed to generate context summary with auxiliary model: {e}")

-            # Fallback: try the main model's endpoint.  This handles the common
-            # case where the user switched providers (e.g. OpenRouter → local LLM)
-            # but a stale API key causes the auxiliary client to pick the old
-            # provider which then fails (402, auth error, etc.).
-            fallback_client, fallback_model = self._get_fallback_client()
-            if fallback_client is not None:
-                try:
-                    logger.info("Retrying context summary with fallback client (%s)", fallback_model)
-                    summary = self._call_summary_model(fallback_client, fallback_model, prompt)
-                    # Success — swap in the working client for future compressions
-                    self.client = fallback_client
-                    self.summary_model = fallback_model
-                    return summary
-                except Exception as fallback_err:
-                    logging.warning(f"Fallback summary model also failed: {fallback_err}")
+        # 2. Fallback: try the user's main model endpoint
+        fallback_client, fallback_model = self._get_fallback_client()
+        if fallback_client is not None:
+            try:
+                logger.info("Retrying context summary with main model (%s)", fallback_model)
+                summary = self._call_summary_model(fallback_client, fallback_model, prompt)
+                self.client = fallback_client
+                self.summary_model = fallback_model
+                return summary
+            except Exception as fallback_err:
+                logging.warning(f"Main model summary also failed: {fallback_err}")

-            return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed. The assistant performed tool calls and received responses."
+        # 3. All models failed — return None so the caller drops turns without a summary
+        logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.")
+        return None

    def _call_summary_model(self, client, model: str, prompt: str) -> str:
        """Make the actual LLM call to generate a summary. Raises on failure."""
@@ -326,25 +329,6 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
            print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
            print(f"   📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")

-        # Truncation fallback when no auxiliary model is available
-        if self.client is None:
-            print("⚠️  Context compression: no auxiliary model available. Falling back to message truncation.")
-            # Keep system message(s) at the front and the protected tail;
-            # simply drop the oldest non-system messages until under threshold.
-            kept = []
-            for msg in messages:
-                if msg.get("role") == "system":
-                    kept.append(msg.copy())
-                else:
-                    break
-            tail = messages[-self.protect_last_n:]
-            kept.extend(m.copy() for m in tail)
-            self.compression_count += 1
-            kept = self._sanitize_tool_pairs(kept)
-            if not self.quiet_mode:
-                print(f"   ✂️  Truncated: {len(messages)} → {len(kept)} messages (dropped middle turns)")
-            return kept
-
        if not self.quiet_mode:
            print(f"   🗜️  Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")

@@ -357,7 +341,13 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
            compressed.append(msg)

-        compressed.append({"role": "user", "content": summary})
+        if summary:
+            last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
+            summary_role = "user" if last_head_role in ("assistant", "tool") else "assistant"
+            compressed.append({"role": summary_role, "content": summary})
+        else:
+            if not self.quiet_mode:
+                print("   ⚠️  No summary model available — middle turns dropped without summary")

        for i in range(compress_end, n_messages):
            compressed.append(messages[i].copy())
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -66,7 +66,8 @@ DEFAULT_AGENT_IDENTITY = (
    "range of tasks including answering questions, writing and editing code, "
    "analyzing information, creative work, and executing actions via your tools. "
    "You communicate clearly, admit uncertainty when appropriate, and prioritize "
-    "being genuinely useful over being verbose unless otherwise directed below."
+    "being genuinely useful over being verbose unless otherwise directed below. "
+    "Be targeted and efficient in your exploration and investigations."
 )

 MEMORY_GUIDANCE = (
@@ -102,12 +103,33 @@ PLATFORM_HINTS = {
        "You are on a text messaging communication platform, Telegram. "
        "Please do not use markdown as it does not render. "
        "You can send media files natively: to deliver a file to the user, "
-        "include MEDIA:/absolute/path/to/file in your response. Audio "
-        "(.ogg) sends as voice bubbles. You can also include image URLs "
-        "in markdown format ![alt](url) and they will be sent as native photos."
+        "include MEDIA:/absolute/path/to/file in your response. Images "
+        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
+        "bubbles, and videos (.mp4) play inline. You can also include image "
+        "URLs in markdown format ![alt](url) and they will be sent as native photos."
    ),
    "discord": (
-        "You are in a Discord server or group chat communicating with your user."
+        "You are in a Discord server or group chat communicating with your user. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.png, .jpg, .webp) are sent as photo "
+        "attachments, audio as file attachments. You can also include image URLs "
+        "in markdown format ![alt](url) and they will be sent as attachments."
+    ),
+    "slack": (
+        "You are in a Slack workspace communicating with your user. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.png, .jpg, .webp) are uploaded as photo "
+        "attachments, audio as file attachments. You can also include image URLs "
+        "in markdown format ![alt](url) and they will be uploaded as attachments."
+    ),
+    "signal": (
+        "You are on a text messaging communication platform, Signal. "
+        "Please do not use markdown as it does not render. "
+        "You can send media files natively: to deliver a file to the user, "
+        "include MEDIA:/absolute/path/to/file in your response. Images "
+        "(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
+        "files arrive as downloadable documents. You can also include image "
+        "URLs in markdown format ![alt](url) and they will be sent as photos."
    ),
    "cli": (
        "You are a CLI AI Agent. Try not to use markdown but simple text "
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -8,6 +8,7 @@ the first 6 and last 4 characters for debuggability.
 """

 import logging
+import os
 import re
 from typing import Optional

@@ -15,7 +16,7 @@ logger = logging.getLogger(__name__)

 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
-    r"sk-[A-Za-z0-9_-]{10,}",           # OpenAI / OpenRouter
+    r"sk-[A-Za-z0-9_-]{10,}",           # OpenAI / OpenRouter / Anthropic (sk-ant-*)
    r"ghp_[A-Za-z0-9]{10,}",            # GitHub PAT (classic)
    r"github_pat_[A-Za-z0-9_]{10,}",    # GitHub PAT (fine-grained)
    r"xox[baprs]-[A-Za-z0-9-]{10,}",    # Slack tokens
@@ -25,6 +26,18 @@ _PREFIX_PATTERNS = [
    r"fc-[A-Za-z0-9]{10,}",             # Firecrawl
    r"bb_live_[A-Za-z0-9_-]{10,}",      # BrowserBase
    r"gAAAA[A-Za-z0-9_=-]{20,}",        # Codex encrypted tokens
+    r"AKIA[A-Z0-9]{16}",                # AWS Access Key ID
+    r"sk_live_[A-Za-z0-9]{10,}",        # Stripe secret key (live)
+    r"sk_test_[A-Za-z0-9]{10,}",        # Stripe secret key (test)
+    r"rk_live_[A-Za-z0-9]{10,}",        # Stripe restricted key
+    r"SG\.[A-Za-z0-9_-]{10,}",          # SendGrid API key
+    r"hf_[A-Za-z0-9]{10,}",             # HuggingFace token
+    r"r8_[A-Za-z0-9]{10,}",             # Replicate API token
+    r"npm_[A-Za-z0-9]{10,}",            # npm access token
+    r"pypi-[A-Za-z0-9_-]{10,}",         # PyPI API token
+    r"dop_v1_[A-Za-z0-9]{10,}",         # DigitalOcean PAT
+    r"doo_v1_[A-Za-z0-9]{10,}",         # DigitalOcean OAuth
+    r"am_[A-Za-z0-9_-]{10,}",           # AgentMail API key
 ]

 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
@@ -52,6 +65,22 @@ _TELEGRAM_RE = re.compile(
    r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
 )

+# Private key blocks: -----BEGIN RSA PRIVATE KEY----- ... -----END RSA PRIVATE KEY-----
+_PRIVATE_KEY_RE = re.compile(
+    r"-----BEGIN[A-Z ]*PRIVATE KEY-----[\s\S]*?-----END[A-Z ]*PRIVATE KEY-----"
+)
+
+# Database connection strings: protocol://user:PASSWORD@host
+# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
+_DB_CONNSTR_RE = re.compile(
+    r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
+    re.IGNORECASE,
+)
+
+# E.164 phone numbers: +<country><number>, 7-15 digits
+# Negative lookahead prevents matching hex strings or identifiers
+_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
+
 # Compile known prefix patterns into one alternation
 _PREFIX_RE = re.compile(
    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -69,9 +98,12 @@ def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
+    Disabled when security.redact_secrets is false in config.yaml.
    """
    if not text:
        return text
+    if os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("0", "false", "no", "off"):
+        return text

    # Known prefixes (sk-, ghp_, etc.)
    text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
@@ -101,6 +133,20 @@ def redact_sensitive_text(text: str) -> str:
        return f"{prefix}{digits}:***"
    text = _TELEGRAM_RE.sub(_redact_telegram, text)

+    # Private key blocks
+    text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
+
+    # Database connection string passwords
+    text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
+
+    # E.164 phone numbers (Signal, WhatsApp)
+    def _redact_phone(m):
+        phone = m.group(1)
+        if len(phone) <= 8:
+            return phone[:2] + "****" + phone[-2:]
+        return phone[:4] + "****" + phone[-4:]
+    text = _SIGNAL_PHONE_RE.sub(_redact_phone, text)
+
    return text


--- a/batch_runner.py
+++ b/batch_runner.py
@@ -1112,7 +1112,7 @@ def main(
    batch_size: int = None,
    run_name: str = None,
    distribution: str = "default",
-    model: str = "anthropic/claude-sonnet-4-20250514",
+    model: str = "anthropic/claude-sonnet-4.6",
    api_key: str = None,
    base_url: str = "https://openrouter.ai/api/v1",
    max_turns: int = 10,
@@ -1155,7 +1155,7 @@ def main(
        providers_order (str): Comma-separated list of OpenRouter providers to try in order (e.g. "anthropic,openai,google")
        provider_sort (str): Sort providers by "price", "throughput", or "latency" (OpenRouter only)
        max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
-        reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "xhigh")
+        reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "medium")
        reasoning_disabled (bool): Completely disable reasoning/thinking tokens (default: False)
        prefill_messages_file (str): Path to JSON file containing prefill messages (list of {role, content} dicts)
        max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
@@ -1216,7 +1216,7 @@ def main(
    providers_order_list = [p.strip() for p in providers_order.split(",")] if providers_order else None
    
    # Build reasoning_config from CLI flags
-    # --reasoning_disabled takes priority, then --reasoning_effort, then default (xhigh)
+    # --reasoning_disabled takes priority, then --reasoning_effort, then default (medium)
    reasoning_config = None
    if reasoning_disabled:
        # Completely disable reasoning/thinking tokens
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -50,6 +50,16 @@ model:
 #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 #   # data_collection: "deny"

+# =============================================================================
+# Git Worktree Isolation
+# =============================================================================
+# When enabled, each CLI session creates an isolated git worktree so multiple
+# agents can work on the same repo concurrently without file collisions.
+# Equivalent to always passing --worktree / -w on the command line.
+#
+# worktree: true    # Always create a worktree when in a git repo
+# worktree: false   # Default — only create when -w flag is passed
+
 # =============================================================================
 # Terminal Tool Configuration
 # =============================================================================
@@ -199,8 +209,58 @@ compression:
  threshold: 0.85
  
  # Model to use for generating summaries (fast/cheap recommended)
-  # This model compresses the middle turns into a concise summary
+  # This model compresses the middle turns into a concise summary.
+  # IMPORTANT: it receives the full middle section of the conversation, so it
+  # MUST support a context length at least as large as your main model's.
  summary_model: "google/gemini-3-flash-preview"
+  
+  # Provider for the summary model (default: "auto")
+  # Options: "auto", "openrouter", "nous", "main"
+  # summary_provider: "auto"
+
+# =============================================================================
+# Auxiliary Models (Advanced — Experimental)
+# =============================================================================
+# Hermes uses lightweight "auxiliary" models for side tasks: image analysis,
+# browser screenshot analysis, web page summarization, and context compression.
+#
+# By default these use Gemini Flash via OpenRouter or Nous Portal and are
+# auto-detected from your credentials.  You do NOT need to change anything
+# here for normal usage.
+#
+# WARNING: Overriding these with providers other than OpenRouter or Nous Portal
+# is EXPERIMENTAL and may not work.  Not all models/providers support vision,
+# produce usable summaries, or accept the same API format.  Change at your own
+# risk — if things break, reset to "auto" / empty values.
+#
+# Each task has its own provider + model pair so you can mix providers.
+# For example: OpenRouter for vision (needs multimodal), but your main
+# local endpoint for compression (just needs text).
+#
+# Provider options:
+#   "auto"       - Best available: OpenRouter → Nous Portal → main endpoint (default)
+#   "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
+#   "nous"       - Force Nous Portal (requires: hermes login)
+#   "codex"      - Force Codex OAuth (requires: hermes model → Codex).
+#                  Uses gpt-5.3-codex which supports vision.
+#   "main"       - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
+#                  Works with OpenAI API, local models, or any OpenAI-compatible
+#                  endpoint.  Also falls back to Codex OAuth and API-key providers.
+#
+# Model: leave empty to use the provider's default.  When empty, OpenRouter
+# uses "google/gemini-3-flash-preview" and Nous uses "gemini-3-flash".
+# Other providers pick a sensible default automatically.
+#
+# auxiliary:
+#   # Image analysis: vision_analyze tool + browser screenshots
+#   vision:
+#     provider: "auto"
+#     model: ""              # e.g. "google/gemini-2.5-flash", "openai/gpt-4o"
+#
+#   # Web page scraping / summarization + browser page text extraction
+#   web_extract:
+#     provider: "auto"
+#     model: ""

 # =============================================================================
 # Persistent Memory
@@ -285,7 +345,7 @@ agent:
  # Reasoning effort level (OpenRouter and Nous Portal)
  # Controls how much "thinking" the model does before responding.
  # Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable)
-  reasoning_effort: "xhigh"
+  reasoning_effort: "medium"
  
  # Predefined personalities (use with /personality command)
  personalities:
@@ -575,3 +635,8 @@ display:
  #   verbose: Full args, results, and debug logs (same as /verbose)
  # Toggle at runtime with /verbose in the CLI
  tool_progress: all
+
+  # Play terminal bell when agent finishes a response.
+  # Useful for long-running tasks — your terminal will ding when the agent is done.
+  # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
+  bell_on_complete: false
--- a/cli.py
+++ b/cli.py
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -98,6 +98,7 @@ def _deliver_result(job: dict, content: str) -> None:
        "discord": Platform.DISCORD,
        "slack": Platform.SLACK,
        "whatsapp": Platform.WHATSAPP,
+        "signal": Platform.SIGNAL,
    }
    platform = platform_map.get(platform_name.lower())
    if not platform:
@@ -176,6 +177,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"

+        # Load config.yaml for model, reasoning, prefill, toolsets, provider routing
+        _cfg = {}
        try:
            import yaml
            _cfg_path = str(_hermes_home / "config.yaml")
@@ -190,6 +193,41 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        except Exception:
            pass

+        # Reasoning config from env or config.yaml
+        reasoning_config = None
+        effort = os.getenv("HERMES_REASONING_EFFORT", "")
+        if not effort:
+            effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
+        if effort and effort.lower() != "none":
+            valid = ("xhigh", "high", "medium", "low", "minimal")
+            if effort.lower() in valid:
+                reasoning_config = {"enabled": True, "effort": effort.lower()}
+        elif effort.lower() == "none":
+            reasoning_config = {"enabled": False}
+
+        # Prefill messages from env or config.yaml
+        prefill_messages = None
+        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
+        if prefill_file:
+            import json as _json
+            pfpath = Path(prefill_file).expanduser()
+            if not pfpath.is_absolute():
+                pfpath = _hermes_home / pfpath
+            if pfpath.exists():
+                try:
+                    with open(pfpath, "r", encoding="utf-8") as _pf:
+                        prefill_messages = _json.load(_pf)
+                    if not isinstance(prefill_messages, list):
+                        prefill_messages = None
+                except Exception:
+                    prefill_messages = None
+
+        # Max iterations
+        max_iterations = _cfg.get("agent", {}).get("max_turns") or _cfg.get("max_turns") or 90
+
+        # Provider routing
+        pr = _cfg.get("provider_routing", {})
+
        from hermes_cli.runtime_provider import (
            resolve_runtime_provider,
            format_runtime_provider_error,
@@ -208,6 +246,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            base_url=runtime.get("base_url"),
            provider=runtime.get("provider"),
            api_mode=runtime.get("api_mode"),
+            max_iterations=max_iterations,
+            reasoning_config=reasoning_config,
+            prefill_messages=prefill_messages,
+            providers_allowed=pr.get("only"),
+            providers_ignored=pr.get("ignore"),
+            providers_order=pr.get("order"),
+            provider_sort=pr.get("sort"),
            quiet_mode=True,
            session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
        )
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,7 +0,0 @@
-# Documentation
-
-All documentation has moved to the website:
-
-**📖 [hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**
-
-The documentation source files live in [`website/docs/`](../website/docs/).
--- a/docs/send_file_integration_map.md
+++ b/docs/send_file_integration_map.md
@@ -1,344 +0,0 @@
-# send_file Integration Map — Hermes Agent Codebase Deep Dive
-
-## 1. environments/tool_context.py — Base64 File Transfer Implementation
-
-### upload_file() (lines 153-205)
- Reads local file as raw bytes, base64-encodes to ASCII string
- Creates parent dirs in sandbox via `self.terminal(f"mkdir -p {parent}")`
- **Chunk size:** 60,000 chars (~60KB per shell command)
- **Small files (<=60KB b64):** Single `printf '%s' '{b64}' | base64 -d > {remote_path}`
- **Large files:** Writes chunks to `/tmp/_hermes_upload.b64` via `printf >> append`, then `base64 -d` to target
- **Error handling:** Checks local file exists; returns `{exit_code, output}`
- **Size limits:** No explicit limit, but shell arg limit ~2MB means chunking is necessary for files >~45KB raw
- **No theoretical max** — but very large files would be slow (many terminal round trips)
-
-### download_file() (lines 234-278)
- Runs `base64 {remote_path}` inside sandbox, captures stdout
- Strips output, base64-decodes to raw bytes
- Writes to host filesystem with parent dir creation
- **Error handling:** Checks exit code, empty output, decode errors
- Returns `{success: bool, bytes: int}` or `{success: false, error: str}`
- **Size limit:** Bounded by terminal output buffer (practical limit ~few MB via base64 terminal output)
-
-### Promotion potential:
- These methods work via `self.terminal()` — they're environment-agnostic
- Could be directly lifted into a new tool that operates on the agent's current sandbox
- For send_file, this `download_file()` pattern is the key: it extracts files from sandbox → host
-
-## 2. tools/environments/base.py — BaseEnvironment Interface
-
-### Current methods:
- `execute(command, cwd, timeout, stdin_data)` → `{output, returncode}`
- `cleanup()` — release resources
- `stop()` — alias for cleanup
- `_prepare_command()` — sudo transformation
- `_build_run_kwargs()` — subprocess kwargs
- `_timeout_result()` — standard timeout dict
-
-### What would need to be added for file transfer:
- **Nothing required at this level.** File transfer can be implemented via `execute()` (base64 over terminal, like ToolContext does) or via environment-specific methods.
- Optional: `upload_file(local_path, remote_path)` and `download_file(remote_path, local_path)` methods could be added to BaseEnvironment for optimized per-backend transfers, but the base64-over-terminal approach already works universally.
-
-## 3. tools/environments/docker.py — Docker Container Details
-
-### Container ID tracking:
- `self._container_id` stored at init from `self._inner.container_id`
- Inner is `minisweagent.environments.docker.DockerEnvironment`
- Container ID is a standard Docker container hash
-
-### docker cp feasibility:
- **YES**, `docker cp` could be used for optimized file transfer:
-  - `docker cp {container_id}:{remote_path} {local_path}` (download)
-  - `docker cp {local_path} {container_id}:{remote_path}` (upload)
- Much faster than base64-over-terminal for large files
- Container ID is directly accessible via `env._container_id` or `env._inner.container_id`
-
-### Volumes mounted:
- **Persistent mode:** Bind mounts at `~/.hermes/sandboxes/docker/{task_id}/workspace` → `/workspace` and `.../home` → `/root`
- **Ephemeral mode:** tmpfs at `/workspace` (10GB), `/home` (1GB), `/root` (1GB)
- **User volumes:** From `config.yaml docker_volumes` (arbitrary `-v` mounts)
- **Security tmpfs:** `/tmp` (512MB), `/var/tmp` (256MB), `/run` (64MB)
-
-### Direct host access for persistent mode:
- If persistent, files at `/workspace/foo.txt` are just `~/.hermes/sandboxes/docker/{task_id}/workspace/foo.txt` on host — no transfer needed!
-
-## 4. tools/environments/ssh.py — SSH Connection Management
-
-### Connection management:
- Uses SSH ControlMaster for persistent connection
- Control socket at `/tmp/hermes-ssh/{user}@{host}:{port}.sock`
- ControlPersist=300 (5 min keepalive)
- BatchMode=yes (non-interactive)
- Stores: `self.host`, `self.user`, `self.port`, `self.key_path`
-
-### SCP/SFTP feasibility:
- **YES**, SCP can piggyback on the ControlMaster socket:
-  - `scp -o ControlPath={socket} {user}@{host}:{remote} {local}` (download)
-  - `scp -o ControlPath={socket} {local} {user}@{host}:{remote}` (upload)
- Same SSH key and connection reuse — zero additional auth
- Would be much faster than base64-over-terminal for large files
-
-## 5. tools/environments/modal.py — Modal Sandbox Filesystem
-
-### Filesystem API exposure:
- **Not directly.** The inner `SwerexModalEnvironment` wraps Modal's sandbox
- The sandbox object is accessible at: `env._inner.deployment._sandbox`
- Modal's Python SDK exposes `sandbox.open()` for file I/O — but only via async API
- Currently only used for `snapshot_filesystem()` during cleanup
- **Could use:** `sandbox.open(path, "rb")` to read files or `sandbox.open(path, "wb")` to write
- **Alternative:** Base64-over-terminal already works via `execute()` — simpler, no SDK dependency
-
-## 6. gateway/platforms/base.py — MEDIA: Tag Flow (Complete)
-
-### extract_media() (lines 587-620):
- **Pattern:** `MEDIA:\S+` — extracts file paths after MEDIA: prefix
- **Voice flag:** `[[audio_as_voice]]` global directive sets `is_voice=True` for all media in message
- Returns `List[Tuple[str, bool]]` (path, is_voice) and cleaned content
-
-### _process_message_background() media routing (lines 752-786):
- After extracting MEDIA tags, routes by file extension:
-  - `.ogg .opus .mp3 .wav .m4a` → `send_voice()`
-  - `.mp4 .mov .avi .mkv .3gp` → `send_video()`
-  - `.jpg .jpeg .png .webp .gif` → `send_image_file()`
-  - **Everything else** → `send_document()`
- This routing already supports arbitrary files!
-
-### send_* method inventory (base class):
- `send(chat_id, content, reply_to, metadata)` — ABSTRACT, text
- `send_image(chat_id, image_url, caption, reply_to)` — URL-based images
- `send_animation(chat_id, animation_url, caption, reply_to)` — GIF animations
- `send_voice(chat_id, audio_path, caption, reply_to)` — voice messages
- `send_video(chat_id, video_path, caption, reply_to)` — video files
- `send_document(chat_id, file_path, caption, file_name, reply_to)` — generic files
- `send_image_file(chat_id, image_path, caption, reply_to)` — local image files
- `send_typing(chat_id)` — typing indicator
- `edit_message(chat_id, message_id, content)` — edit sent messages
-
-### What's missing:
- **Telegram:** No override for `send_document` or `send_image_file` — falls back to text!
- **Discord:** No override for `send_document` — falls back to text!
- **WhatsApp:** Has `send_document` and `send_image_file` via bridge — COMPLETE.
- The base class defaults just send "📎 File: /path" as text — useless for actual file delivery.
-
-## 7. gateway/platforms/telegram.py — Send Method Analysis
-
-### Implemented send methods:
- `send()` — MarkdownV2 text with fallback to plain
- `send_voice()` — `.ogg`/`.opus` as `send_voice()`, others as `send_audio()`
- `send_image()` — URL-based via `send_photo()`
- `send_animation()` — GIF via `send_animation()`
- `send_typing()` — "typing" chat action
- `edit_message()` — edit text messages
-
-### MISSING:
- **`send_document()` NOT overridden** — Need to add `self._bot.send_document(chat_id, document=open(file_path, 'rb'), ...)`
- **`send_image_file()` NOT overridden** — Need to add `self._bot.send_photo(chat_id, photo=open(path, 'rb'), ...)`
- **`send_video()` NOT overridden** — Need to add `self._bot.send_video(...)`
-
-## 8. gateway/platforms/discord.py — Send Method Analysis
-
-### Implemented send methods:
- `send()` — text messages with chunking
- `send_voice()` — discord.File attachment
- `send_image()` — downloads URL, creates discord.File attachment
- `send_typing()` — channel.typing()
- `edit_message()` — edit text messages
-
-### MISSING:
- **`send_document()` NOT overridden** — Need to add discord.File attachment
- **`send_image_file()` NOT overridden** — Need to add discord.File from local path
- **`send_video()` NOT overridden** — Need to add discord.File attachment
-
-## 9. gateway/run.py — User File Attachment Handling
-
-### Current attachment flow:
-1. **Telegram photos** (line 509-529): Download via `photo.get_file()` → `cache_image_from_bytes()` → vision auto-analysis
-2. **Telegram voice** (line 532-541): Download → `cache_audio_from_bytes()` → STT transcription
-3. **Telegram audio** (line 542-551): Same pattern
-4. **Telegram documents** (line 553-617): Extension validation against `SUPPORTED_DOCUMENT_TYPES`, 20MB limit, content injection for text files
-5. **Discord attachments** (line 717-751): Content-type detection, image/audio caching, URL fallback for other types
-6. **Gateway run.py** (lines 818-883): Auto-analyzes images with vision, transcribes audio, enriches document messages with context notes
-
-### Key insight: Files are always cached to host filesystem first, then processed. The agent sees local file paths.
-
-## 10. tools/terminal_tool.py — Terminal Tool & Environment Interaction
-
-### How it manages environments:
- Global dict `_active_environments: Dict[str, Any]` keyed by task_id
- Per-task creation locks prevent duplicate sandbox creation
- Auto-cleanup thread kills idle environments after `TERMINAL_LIFETIME_SECONDS`
- `_get_env_config()` reads all TERMINAL_* env vars for backend selection
- `_create_environment()` factory creates the right backend type
-
-### Could send_file piggyback?
- **YES.** send_file needs access to the same environment to extract files from sandboxes.
- It can reuse `_active_environments[task_id]` to get the environment, then:
-  - Docker: Use `docker cp` via `env._container_id`
-  - SSH: Use `scp` via `env.control_socket`
-  - Local: Just read the file directly
-  - Modal: Use base64-over-terminal via `env.execute()`
- The file_tools.py module already does this with `ShellFileOperations` — read_file/write_file/search/patch all share the same env instance.
-
-## 11. tools/tts_tool.py — Working Example of File Delivery
-
-### Flow:
-1. Generate audio file to `~/.hermes/audio_cache/tts_TIMESTAMP.{ogg,mp3}`
-2. Return JSON with `media_tag: "MEDIA:/path/to/file"`
-3. For Telegram voice: prepend `[[audio_as_voice]]` directive
-4. The LLM includes the MEDIA tag in its response text
-5. `BasePlatformAdapter._process_message_background()` calls `extract_media()` to find the tag
-6. Routes by extension → `send_voice()` for audio files
-7. Platform adapter sends the file natively
-
-### Key pattern: Tool saves file to host → returns MEDIA: path → LLM echoes it → gateway extracts → platform delivers
-
-## 12. tools/image_generation_tool.py — Working Example of Image Delivery
-
-### Flow:
-1. Call FAL.ai API → get image URL
-2. Return JSON with `image: "https://fal.media/..."` URL
-3. The LLM includes the URL in markdown: `![description](URL)`
-4. `BasePlatformAdapter.extract_images()` finds `![alt](url)` patterns
-5. Routes through `send_image()` (URL) or `send_animation()` (GIF)
-6. Platform downloads and sends natively
-
-### Key difference from TTS: Images are URL-based, not local files. The gateway downloads at send time.
-
---
-
-# INTEGRATION MAP: Where send_file Hooks In
-
-## Architecture Decision: MEDIA: Tag Protocol vs. New Tool
-
-The MEDIA: tag protocol is already the established pattern for file delivery. Two options:
-
-### Option A: Pure MEDIA: Tag (Minimal Change)
- No new tool needed
- Agent downloads file from sandbox to host using terminal (base64)
- Saves to known location (e.g., `~/.hermes/file_cache/`)
- Includes `MEDIA:/path` in response text
- Existing routing in `_process_message_background()` handles delivery
- **Problem:** Agent has to manually do base64 dance + know about MEDIA: convention
-
-### Option B: Dedicated send_file Tool (Recommended)
- New tool that the agent calls with `(file_path, caption?)`
- Tool handles the sandbox → host extraction automatically
- Returns MEDIA: tag that gets routed through existing pipeline
- Much cleaner agent experience
-
-## Implementation Plan for Option B
-
-### Files to CREATE:
-
-1. **`tools/send_file_tool.py`** — The new tool
-   - Accepts: `file_path` (path in sandbox), `caption` (optional)
-   - Detects environment backend from `_active_environments`
-   - Extracts file from sandbox:
-     - **local:** `shutil.copy()` or direct path
-     - **docker:** `docker cp {container_id}:{path} {local_cache}/` 
-     - **ssh:** `scp -o ControlPath=... {user}@{host}:{path} {local_cache}/`
-     - **modal:** base64-over-terminal via `env.execute("base64 {path}")`
-   - Saves to `~/.hermes/file_cache/{uuid}_{filename}`
-   - Returns: `MEDIA:/cached/path` in response for gateway to pick up
-   - Register with `registry.register(name="send_file", toolset="file", ...)`
-
-### Files to MODIFY:
-
-2. **`gateway/platforms/telegram.py`** — Add missing send methods:
-   ```python
-   async def send_document(self, chat_id, file_path, caption=None, file_name=None, reply_to=None):
-       with open(file_path, "rb") as f:
-           msg = await self._bot.send_document(
-               chat_id=int(chat_id), document=f,
-               caption=caption, filename=file_name or os.path.basename(file_path))
-       return SendResult(success=True, message_id=str(msg.message_id))
-   
-   async def send_image_file(self, chat_id, image_path, caption=None, reply_to=None):
-       with open(image_path, "rb") as f:
-           msg = await self._bot.send_photo(chat_id=int(chat_id), photo=f, caption=caption)
-       return SendResult(success=True, message_id=str(msg.message_id))
-   
-   async def send_video(self, chat_id, video_path, caption=None, reply_to=None):
-       with open(video_path, "rb") as f:
-           msg = await self._bot.send_video(chat_id=int(chat_id), video=f, caption=caption)
-       return SendResult(success=True, message_id=str(msg.message_id))
-   ```
-
-3. **`gateway/platforms/discord.py`** — Add missing send methods:
-   ```python
-   async def send_document(self, chat_id, file_path, caption=None, file_name=None, reply_to=None):
-       channel = self._client.get_channel(int(chat_id)) or await self._client.fetch_channel(int(chat_id))
-       with open(file_path, "rb") as f:
-           file = discord.File(io.BytesIO(f.read()), filename=file_name or os.path.basename(file_path))
-           msg = await channel.send(content=caption, file=file)
-       return SendResult(success=True, message_id=str(msg.id))
-   
-   async def send_image_file(self, chat_id, image_path, caption=None, reply_to=None):
-       # Same pattern as send_document with image filename
-   
-   async def send_video(self, chat_id, video_path, caption=None, reply_to=None):
-       # Same pattern, discord renders video attachments inline
-   ```
-
-4. **`toolsets.py`** — Add `"send_file"` to `_HERMES_CORE_TOOLS` list
-
-5. **`agent/prompt_builder.py`** — Update platform hints to mention send_file tool
-
-### Code that can be REUSED (zero rewrite):
-
- `BasePlatformAdapter.extract_media()` — Already extracts MEDIA: tags
- `BasePlatformAdapter._process_message_background()` — Already routes by extension
- `ToolContext.download_file()` — Base64-over-terminal extraction pattern
- `tools/terminal_tool.py` _active_environments dict — Environment access
- `tools/registry.py` — Tool registration infrastructure
- `gateway/platforms/base.py` send_document/send_image_file/send_video signatures — Already defined
-
-### Code that needs to be WRITTEN from scratch:
-
-1. `tools/send_file_tool.py` (~150 lines):
-   - File extraction from each environment backend type
-   - Local file cache management
-   - Registry registration
-   
-2. Telegram `send_document` + `send_image_file` + `send_video` overrides (~40 lines)
-3. Discord `send_document` + `send_image_file` + `send_video` overrides (~50 lines)
-
-### Total effort: ~240 lines of new code, ~5 lines of config changes
-
-## Key Environment-Specific Extract Strategies
-
-| Backend    | Extract Method                 | Speed    | Complexity |
-|------------|-------------------------------|----------|------------|
-| local      | shutil.copy / direct path     | Instant  | None       |
-| docker     | `docker cp container:path .`  | Fast     | Low        |
-| docker+vol | Direct host path access       | Instant  | None       |
-| ssh        | `scp -o ControlPath=...`      | Fast     | Low        |
-| modal      | base64-over-terminal          | Moderate | Medium     |
-| singularity| Direct path (overlay mount)   | Fast     | Low        |
-
-## Data Flow Summary
-
-```
-Agent calls send_file(file_path="/workspace/output.pdf", caption="Here's the report")
-    │
-    ▼
-send_file_tool.py:
-    1. Get environment from _active_environments[task_id]
-    2. Detect backend type (docker/ssh/modal/local)
-    3. Extract file to ~/.hermes/file_cache/{uuid}_{filename}
-    4. Return: '{"success": true, "media_tag": "MEDIA:/home/user/.hermes/file_cache/abc123_output.pdf"}'
-    │
-    ▼
-LLM includes MEDIA: tag in its response text
-    │
-    ▼
-BasePlatformAdapter._process_message_background():
-    1. extract_media(response) → finds MEDIA:/path
-    2. Checks extension: .pdf → send_document()
-    3. Calls platform-specific send_document(chat_id, file_path, caption)
-    │
-    ▼
-TelegramAdapter.send_document() / DiscordAdapter.send_document():
-    Opens file, sends via platform API as native document attachment
-    User receives downloadable file in chat
-```
--- a/gateway/channel_directory.py
+++ b/gateway/channel_directory.py
@@ -40,8 +40,8 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
        except Exception as e:
            logger.warning("Channel directory: failed to build %s: %s", platform.value, e)

-    # Telegram & WhatsApp can't enumerate chats -- pull from session history
-    for plat_name in ("telegram", "whatsapp"):
+    # Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
+    for plat_name in ("telegram", "whatsapp", "signal"):
        if plat_name not in platforms:
            platforms[plat_name] = _build_from_sessions(plat_name)

@@ -52,7 +52,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:

    try:
        DIRECTORY_PATH.parent.mkdir(parents=True, exist_ok=True)
-        with open(DIRECTORY_PATH, "w") as f:
+        with open(DIRECTORY_PATH, "w", encoding="utf-8") as f:
            json.dump(directory, f, indent=2, ensure_ascii=False)
    except Exception as e:
        logger.warning("Channel directory: failed to write: %s", e)
@@ -115,7 +115,7 @@ def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:

    entries = []
    try:
-        with open(sessions_path) as f:
+        with open(sessions_path, encoding="utf-8") as f:
            data = json.load(f)

        seen_ids = set()
@@ -147,7 +147,7 @@ def load_directory() -> Dict[str, Any]:
    if not DIRECTORY_PATH.exists():
        return {"updated_at": None, "platforms": {}}
    try:
-        with open(DIRECTORY_PATH) as f:
+        with open(DIRECTORY_PATH, encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return {"updated_at": None, "platforms": {}}
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -26,6 +26,7 @@ class Platform(Enum):
    DISCORD = "discord"
    WHATSAPP = "whatsapp"
    SLACK = "slack"
+    SIGNAL = "signal"
    HOMEASSISTANT = "homeassistant"


@@ -155,7 +156,16 @@ class GatewayConfig:
        """Return list of platforms that are enabled and configured."""
        connected = []
        for platform, config in self.platforms.items():
-            if config.enabled and (config.token or config.api_key):
+            if not config.enabled:
+                continue
+            # Platforms that use token/api_key auth
+            if config.token or config.api_key:
+                connected.append(platform)
+            # WhatsApp uses enabled flag only (bridge handles auth)
+            elif platform == Platform.WHATSAPP:
+                connected.append(platform)
+            # Signal uses extra dict for config (http_url + account)
+            elif platform == Platform.SIGNAL and config.extra.get("http_url"):
                connected.append(platform)
        return connected
    
@@ -379,6 +389,26 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("SLACK_HOME_CHANNEL_NAME", ""),
            )
    
+    # Signal
+    signal_url = os.getenv("SIGNAL_HTTP_URL")
+    signal_account = os.getenv("SIGNAL_ACCOUNT")
+    if signal_url and signal_account:
+        if Platform.SIGNAL not in config.platforms:
+            config.platforms[Platform.SIGNAL] = PlatformConfig()
+        config.platforms[Platform.SIGNAL].enabled = True
+        config.platforms[Platform.SIGNAL].extra.update({
+            "http_url": signal_url,
+            "account": signal_account,
+            "ignore_stories": os.getenv("SIGNAL_IGNORE_STORIES", "true").lower() in ("true", "1", "yes"),
+        })
+        signal_home = os.getenv("SIGNAL_HOME_CHANNEL")
+        if signal_home:
+            config.platforms[Platform.SIGNAL].home_channel = HomeChannel(
+                platform=Platform.SIGNAL,
+                chat_id=signal_home,
+                name=os.getenv("SIGNAL_HOME_CHANNEL_NAME", "Home"),
+            )
+
    # Home Assistant
    hass_token = os.getenv("HASS_TOKEN")
    if hass_token:
--- a/gateway/mirror.py
+++ b/gateway/mirror.py
@@ -73,7 +73,7 @@ def _find_session_id(platform: str, chat_id: str) -> Optional[str]:
        return None

    try:
-        with open(_SESSIONS_INDEX) as f:
+        with open(_SESSIONS_INDEX, encoding="utf-8") as f:
            data = json.load(f)
    except Exception:
        return None
@@ -103,7 +103,7 @@ def _append_to_jsonl(session_id: str, message: dict) -> None:
    """Append a message to the JSONL transcript file."""
    transcript_path = _SESSIONS_DIR / f"{session_id}.jsonl"
    try:
-        with open(transcript_path, "a") as f:
+        with open(transcript_path, "a", encoding="utf-8") as f:
            f.write(json.dumps(message, ensure_ascii=False) + "\n")
    except Exception as e:
        logger.debug("Mirror JSONL write failed: %s", e)
--- a/gateway/platforms/ADDING_A_PLATFORM.md
+++ b/gateway/platforms/ADDING_A_PLATFORM.md
@@ -0,0 +1,313 @@
+# Adding a New Messaging Platform
+
+Checklist for integrating a new messaging platform into the Hermes gateway.
+Use this as a reference when building a new adapter — every item here is a
+real integration point that exists in the codebase. Missing any of them will
+cause broken functionality, missing features, or inconsistent behavior.
+
+---
+
+## 1. Core Adapter (`gateway/platforms/<platform>.py`)
+
+The adapter is a subclass of `BasePlatformAdapter` from `gateway/platforms/base.py`.
+
+### Required methods
+
+| Method | Purpose |
+|--------|---------|
+| `__init__(self, config)` | Parse config, init state. Call `super().__init__(config, Platform.YOUR_PLATFORM)` |
+| `connect() -> bool` | Connect to the platform, start listeners. Return True on success |
+| `disconnect()` | Stop listeners, close connections, cancel tasks |
+| `send(chat_id, text, ...) -> SendResult` | Send a text message |
+| `send_typing(chat_id)` | Send typing indicator |
+| `send_image(chat_id, image_url, caption) -> SendResult` | Send an image |
+| `get_chat_info(chat_id) -> dict` | Return `{name, type, chat_id}` for a chat |
+
+### Optional methods (have default stubs in base)
+
+| Method | Purpose |
+|--------|---------|
+| `send_document(chat_id, path, caption)` | Send a file attachment |
+| `send_voice(chat_id, path)` | Send a voice message |
+| `send_video(chat_id, path, caption)` | Send a video |
+| `send_animation(chat_id, path, caption)` | Send a GIF/animation |
+| `send_image_file(chat_id, path, caption)` | Send image from local file |
+
+### Required function
+
+```python
+def check_<platform>_requirements() -> bool:
+    """Check if this platform's dependencies are available."""
+```
+
+### Key patterns to follow
+
+- Use `self.build_source(...)` to construct `SessionSource` objects
+- Call `self.handle_message(event)` to dispatch inbound messages to the gateway
+- Use `MessageEvent`, `MessageType`, `SendResult` from base
+- Use `cache_image_from_bytes`, `cache_audio_from_bytes`, `cache_document_from_bytes` for attachments
+- Filter self-messages (prevent reply loops)
+- Filter sync/echo messages if the platform has them
+- Redact sensitive identifiers (phone numbers, tokens) in all log output
+- Implement reconnection with exponential backoff + jitter for streaming connections
+- Set `MAX_MESSAGE_LENGTH` if the platform has message size limits
+
+---
+
+## 2. Platform Enum (`gateway/config.py`)
+
+Add the platform to the `Platform` enum:
+
+```python
+class Platform(Enum):
+    ...
+    YOUR_PLATFORM = "your_platform"
+```
+
+Add env var loading in `_apply_env_overrides()`:
+
+```python
+# Your Platform
+your_token = os.getenv("YOUR_PLATFORM_TOKEN")
+if your_token:
+    if Platform.YOUR_PLATFORM not in config.platforms:
+        config.platforms[Platform.YOUR_PLATFORM] = PlatformConfig()
+    config.platforms[Platform.YOUR_PLATFORM].enabled = True
+    config.platforms[Platform.YOUR_PLATFORM].token = your_token
+```
+
+Update `get_connected_platforms()` if your platform doesn't use token/api_key
+(e.g., WhatsApp uses `enabled` flag, Signal uses `extra` dict).
+
+---
+
+## 3. Adapter Factory (`gateway/run.py`)
+
+Add to `_create_adapter()`:
+
+```python
+elif platform == Platform.YOUR_PLATFORM:
+    from gateway.platforms.your_platform import YourAdapter, check_your_requirements
+    if not check_your_requirements():
+        logger.warning("Your Platform: dependencies not met")
+        return None
+    return YourAdapter(config)
+```
+
+---
+
+## 4. Authorization Maps (`gateway/run.py`)
+
+Add to BOTH dicts in `_is_user_authorized()`:
+
+```python
+platform_env_map = {
+    ...
+    Platform.YOUR_PLATFORM: "YOUR_PLATFORM_ALLOWED_USERS",
+}
+platform_allow_all_map = {
+    ...
+    Platform.YOUR_PLATFORM: "YOUR_PLATFORM_ALLOW_ALL_USERS",
+}
+```
+
+---
+
+## 5. Session Source (`gateway/session.py`)
+
+If your platform needs extra identity fields (e.g., Signal's UUID alongside
+phone number), add them to the `SessionSource` dataclass with `Optional` defaults,
+and update `to_dict()`, `from_dict()`, and `build_source()` in base.py.
+
+---
+
+## 6. System Prompt Hints (`agent/prompt_builder.py`)
+
+Add a `PLATFORM_HINTS` entry so the agent knows what platform it's on:
+
+```python
+PLATFORM_HINTS = {
+    ...
+    "your_platform": (
+        "You are on Your Platform. "
+        "Describe formatting capabilities, media support, etc."
+    ),
+}
+```
+
+Without this, the agent won't know it's on your platform and may use
+inappropriate formatting (e.g., markdown on platforms that don't render it).
+
+---
+
+## 7. Toolset (`toolsets.py`)
+
+Add a named toolset for your platform:
+
+```python
+"hermes-your-platform": {
+    "description": "Your Platform bot toolset",
+    "tools": _HERMES_CORE_TOOLS,
+    "includes": []
+},
+```
+
+And add it to the `hermes-gateway` composite:
+
+```python
+"hermes-gateway": {
+    "includes": [..., "hermes-your-platform"]
+}
+```
+
+---
+
+## 8. Cron Delivery (`cron/scheduler.py`)
+
+Add to `platform_map` in `_deliver_result()`:
+
+```python
+platform_map = {
+    ...
+    "your_platform": Platform.YOUR_PLATFORM,
+}
+```
+
+Without this, `schedule_cronjob(deliver="your_platform")` silently fails.
+
+---
+
+## 9. Send Message Tool (`tools/send_message_tool.py`)
+
+Add to `platform_map` in `send_message_tool()`:
+
+```python
+platform_map = {
+    ...
+    "your_platform": Platform.YOUR_PLATFORM,
+}
+```
+
+Add routing in `_send_to_platform()`:
+
+```python
+elif platform == Platform.YOUR_PLATFORM:
+    return await _send_your_platform(pconfig, chat_id, message)
+```
+
+Implement `_send_your_platform()` — a standalone async function that sends
+a single message without requiring the full adapter (for use by cron jobs
+and the send_message tool outside the gateway process).
+
+Update the tool schema `target` description to include your platform example.
+
+---
+
+## 10. Cronjob Tool Schema (`tools/cronjob_tools.py`)
+
+Update the `deliver` parameter description and docstring to mention your
+platform as a delivery option.
+
+---
+
+## 11. Channel Directory (`gateway/channel_directory.py`)
+
+If your platform can't enumerate chats (most can't), add it to the
+session-based discovery list:
+
+```python
+for plat_name in ("telegram", "whatsapp", "signal", "your_platform"):
+```
+
+---
+
+## 12. Status Display (`hermes_cli/status.py`)
+
+Add to the `platforms` dict in the Messaging Platforms section:
+
+```python
+platforms = {
+    ...
+    "Your Platform": ("YOUR_PLATFORM_TOKEN", "YOUR_PLATFORM_HOME_CHANNEL"),
+}
+```
+
+---
+
+## 13. Gateway Setup Wizard (`hermes_cli/gateway.py`)
+
+Add to the `_PLATFORMS` list:
+
+```python
+{
+    "key": "your_platform",
+    "label": "Your Platform",
+    "emoji": "📱",
+    "token_var": "YOUR_PLATFORM_TOKEN",
+    "setup_instructions": [...],
+    "vars": [...],
+}
+```
+
+If your platform needs custom setup logic (connectivity testing, QR codes,
+policy choices), add a `_setup_your_platform()` function and route to it
+in the platform selection switch.
+
+Update `_platform_status()` if your platform's "configured" check differs
+from the standard `bool(get_env_value(token_var))`.
+
+---
+
+## 14. Phone/ID Redaction (`agent/redact.py`)
+
+If your platform uses sensitive identifiers (phone numbers, etc.), add a
+regex pattern and redaction function to `agent/redact.py`. This ensures
+identifiers are masked in ALL log output, not just your adapter's logs.
+
+---
+
+## 15. Documentation
+
+| File | What to update |
+|------|---------------|
+| `README.md` | Platform list in feature table + documentation table |
+| `AGENTS.md` | Gateway description + env var config section |
+| `website/docs/user-guide/messaging/<platform>.md` | **NEW** — Full setup guide (see existing platform docs for template) |
+| `website/docs/user-guide/messaging/index.md` | Architecture diagram, toolset table, security examples, Next Steps links |
+| `website/docs/reference/environment-variables.md` | All env vars for the platform |
+
+---
+
+## 16. Tests (`tests/gateway/test_<platform>.py`)
+
+Recommended test coverage:
+
+- Platform enum exists with correct value
+- Config loading from env vars via `_apply_env_overrides`
+- Adapter init (config parsing, allowlist handling, default values)
+- Helper functions (redaction, parsing, file type detection)
+- Session source round-trip (to_dict → from_dict)
+- Authorization integration (platform in allowlist maps)
+- Send message tool routing (platform in platform_map)
+
+Optional but valuable:
+- Async tests for message handling flow (mock the platform API)
+- SSE/WebSocket reconnection logic
+- Attachment processing
+- Group message filtering
+
+---
+
+## Quick Verification
+
+After implementing everything, verify with:
+
+```bash
+# All tests pass
+python -m pytest tests/ -q
+
+# Grep for your platform name to find any missed integration points
+grep -r "telegram\|discord\|whatsapp\|slack" gateway/ tools/ agent/ cron/ hermes_cli/ toolsets.py \
+  --include="*.py" -l | sort -u
+# Check each file in the output — if it mentions other platforms but not yours, you missed it
+```
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -701,6 +701,8 @@ class BasePlatformAdapter(ABC):
                
                # Extract image URLs and send them as native platform attachments
                images, text_content = self.extract_images(response)
+                if images:
+                    logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response))
                
                # Send the text portion first (if any remains after extractions)
                if text_content:
@@ -727,10 +729,13 @@ class BasePlatformAdapter(ABC):
                human_delay = self._get_human_delay()
                
                # Send extracted images as native attachments
+                if images:
+                    logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
                for image_url, alt_text in images:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
+                        logger.info("[%s] Sending image: %s (alt=%s)", self.name, image_url[:80], alt_text[:30] if alt_text else "")
                        # Route animated GIFs through send_animation for proper playback
                        if self._is_animation_url(image_url):
                            img_result = await self.send_animation(
@@ -745,9 +750,9 @@ class BasePlatformAdapter(ABC):
                                caption=alt_text if alt_text else None,
                            )
                        if not img_result.success:
-                            print(f"[{self.name}] Failed to send image: {img_result.error}")
+                            logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
                    except Exception as img_err:
-                        print(f"[{self.name}] Error sending image: {img_err}")
+                        logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
                
                # Send extracted media files — route by file type
                _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
@@ -833,6 +838,8 @@ class BasePlatformAdapter(ABC):
        user_name: Optional[str] = None,
        thread_id: Optional[str] = None,
        chat_topic: Optional[str] = None,
+        user_id_alt: Optional[str] = None,
+        chat_id_alt: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -847,6 +854,8 @@ class BasePlatformAdapter(ABC):
            user_name=user_name,
            thread_id=str(thread_id) if thread_id else None,
            chat_topic=chat_topic.strip() if chat_topic else None,
+            user_id_alt=user_id_alt,
+            chat_id_alt=chat_id_alt,
        )
    
    @abstractmethod
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -267,6 +267,43 @@ class DiscordAdapter(BasePlatformAdapter):
            print(f"[{self.name}] Failed to send audio: {e}")
            return await super().send_voice(chat_id, audio_path, caption, reply_to)
    
+    async def send_image_file(
+        self,
+        chat_id: str,
+        image_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send a local image file natively as a Discord file attachment."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+        
+        try:
+            import io
+            
+            channel = self._client.get_channel(int(chat_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(chat_id))
+            if not channel:
+                return SendResult(success=False, error=f"Channel {chat_id} not found")
+            
+            if not os.path.exists(image_path):
+                return SendResult(success=False, error=f"Image file not found: {image_path}")
+            
+            filename = os.path.basename(image_path)
+            
+            with open(image_path, "rb") as f:
+                file = discord.File(io.BytesIO(f.read()), filename=filename)
+                msg = await channel.send(
+                    content=caption if caption else None,
+                    file=file,
+                )
+                return SendResult(success=True, message_id=str(msg.id))
+        
+        except Exception as e:
+            print(f"[{self.name}] Failed to send local image: {e}")
+            return await super().send_image_file(chat_id, image_path, caption, reply_to)
+
    async def send_image(
        self,
        chat_id: str,
@@ -555,6 +592,89 @@ class DiscordAdapter(BasePlatformAdapter):
            except Exception as e:
                logger.debug("Discord followup failed: %s", e)

+        @tree.command(name="compress", description="Compress conversation context")
+        async def slash_compress(interaction: discord.Interaction):
+            await interaction.response.defer(ephemeral=True)
+            event = self._build_slash_event(interaction, "/compress")
+            await self.handle_message(event)
+            try:
+                await interaction.followup.send("Done~", ephemeral=True)
+            except Exception as e:
+                logger.debug("Discord followup failed: %s", e)
+
+        @tree.command(name="title", description="Set or show the session title")
+        @discord.app_commands.describe(name="Session title. Leave empty to show current.")
+        async def slash_title(interaction: discord.Interaction, name: str = ""):
+            await interaction.response.defer(ephemeral=True)
+            event = self._build_slash_event(interaction, f"/title {name}".strip())
+            await self.handle_message(event)
+            try:
+                await interaction.followup.send("Done~", ephemeral=True)
+            except Exception as e:
+                logger.debug("Discord followup failed: %s", e)
+
+        @tree.command(name="resume", description="Resume a previously-named session")
+        @discord.app_commands.describe(name="Session name to resume. Leave empty to list sessions.")
+        async def slash_resume(interaction: discord.Interaction, name: str = ""):
+            await interaction.response.defer(ephemeral=True)
+            event = self._build_slash_event(interaction, f"/resume {name}".strip())
+            await self.handle_message(event)
+            try:
+                await interaction.followup.send("Done~", ephemeral=True)
+            except Exception as e:
+                logger.debug("Discord followup failed: %s", e)
+
+        @tree.command(name="usage", description="Show token usage for this session")
+        async def slash_usage(interaction: discord.Interaction):
+            await interaction.response.defer(ephemeral=True)
+            event = self._build_slash_event(interaction, "/usage")
+            await self.handle_message(event)
+            try:
+                await interaction.followup.send("Done~", ephemeral=True)
+            except Exception as e:
+                logger.debug("Discord followup failed: %s", e)
+
+        @tree.command(name="provider", description="Show available providers")
+        async def slash_provider(interaction: discord.Interaction):
+            await interaction.response.defer(ephemeral=True)
+            event = self._build_slash_event(interaction, "/provider")
+            await self.handle_message(event)
+            try:
+                await interaction.followup.send("Done~", ephemeral=True)
+            except Exception as e:
+                logger.debug("Discord followup failed: %s", e)
+
+        @tree.command(name="help", description="Show available commands")
+        async def slash_help(interaction: discord.Interaction):
+            await interaction.response.defer(ephemeral=True)
+            event = self._build_slash_event(interaction, "/help")
+            await self.handle_message(event)
+            try:
+                await interaction.followup.send("Done~", ephemeral=True)
+            except Exception as e:
+                logger.debug("Discord followup failed: %s", e)
+
+        @tree.command(name="insights", description="Show usage insights and analytics")
+        @discord.app_commands.describe(days="Number of days to analyze (default: 7)")
+        async def slash_insights(interaction: discord.Interaction, days: int = 7):
+            await interaction.response.defer(ephemeral=True)
+            event = self._build_slash_event(interaction, f"/insights {days}")
+            await self.handle_message(event)
+            try:
+                await interaction.followup.send("Done~", ephemeral=True)
+            except Exception as e:
+                logger.debug("Discord followup failed: %s", e)
+
+        @tree.command(name="reload-mcp", description="Reload MCP servers from config")
+        async def slash_reload_mcp(interaction: discord.Interaction):
+            await interaction.response.defer(ephemeral=True)
+            event = self._build_slash_event(interaction, "/reload-mcp")
+            await self.handle_message(event)
+            try:
+                await interaction.followup.send("Done~", ephemeral=True)
+            except Exception as e:
+                logger.debug("Discord followup failed: %s", e)
+
        @tree.command(name="update", description="Update Hermes Agent to the latest version")
        async def slash_update(interaction: discord.Interaction):
            await interaction.response.defer(ephemeral=True)
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -0,0 +1,716 @@
+"""Signal messenger platform adapter.
+
+Connects to a signal-cli daemon running in HTTP mode.
+Inbound messages arrive via SSE (Server-Sent Events) streaming.
+Outbound messages and actions use JSON-RPC 2.0 over HTTP.
+
+Based on PR #268 by ibhagwan, rebuilt with bug fixes.
+
+Requires:
+  - signal-cli installed and running: signal-cli daemon --http 127.0.0.1:8080
+  - SIGNAL_HTTP_URL and SIGNAL_ACCOUNT environment variables set
+"""
+
+import asyncio
+import base64
+import json
+import logging
+import os
+import random
+import re
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+from urllib.parse import unquote
+
+import httpx
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+    cache_image_from_bytes,
+    cache_audio_from_bytes,
+    cache_document_from_bytes,
+    cache_image_from_url,
+)
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+SIGNAL_MAX_ATTACHMENT_SIZE = 100 * 1024 * 1024  # 100 MB
+MAX_MESSAGE_LENGTH = 8000  # Signal message size limit
+TYPING_INTERVAL = 8.0  # seconds between typing indicator refreshes
+SSE_RETRY_DELAY_INITIAL = 2.0
+SSE_RETRY_DELAY_MAX = 60.0
+HEALTH_CHECK_INTERVAL = 30.0  # seconds between health checks
+HEALTH_CHECK_STALE_THRESHOLD = 120.0  # seconds without SSE activity before concern
+
+# E.164 phone number pattern for redaction
+_PHONE_RE = re.compile(r"\+[1-9]\d{6,14}")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _redact_phone(phone: str) -> str:
+    """Redact a phone number for logging: +15551234567 -> +155****4567."""
+    if not phone:
+        return "<none>"
+    if len(phone) <= 8:
+        return phone[:2] + "****" + phone[-2:] if len(phone) > 4 else "****"
+    return phone[:4] + "****" + phone[-4:]
+
+
+def _parse_comma_list(value: str) -> List[str]:
+    """Split a comma-separated string into a list, stripping whitespace."""
+    return [v.strip() for v in value.split(",") if v.strip()]
+
+
+def _guess_extension(data: bytes) -> str:
+    """Guess file extension from magic bytes."""
+    if data[:4] == b"\x89PNG":
+        return ".png"
+    if data[:2] == b"\xff\xd8":
+        return ".jpg"
+    if data[:4] == b"GIF8":
+        return ".gif"
+    if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
+        return ".webp"
+    if data[:4] == b"%PDF":
+        return ".pdf"
+    if len(data) >= 8 and data[4:8] == b"ftyp":
+        return ".mp4"
+    if data[:4] == b"OggS":
+        return ".ogg"
+    if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0:
+        return ".mp3"
+    if data[:2] == b"PK":
+        return ".zip"
+    return ".bin"
+
+
+def _is_image_ext(ext: str) -> bool:
+    return ext.lower() in (".jpg", ".jpeg", ".png", ".gif", ".webp")
+
+
+def _is_audio_ext(ext: str) -> bool:
+    return ext.lower() in (".mp3", ".wav", ".ogg", ".m4a", ".aac")
+
+
+def _render_mentions(text: str, mentions: list) -> str:
+    """Replace Signal mention placeholders (\\uFFFC) with readable @identifiers.
+
+    Signal encodes @mentions as the Unicode object replacement character
+    with out-of-band metadata containing the mentioned user's UUID/number.
+    """
+    if not mentions or "\uFFFC" not in text:
+        return text
+    # Sort mentions by start position (reverse) to replace from end to start
+    # so indices don't shift as we replace
+    sorted_mentions = sorted(mentions, key=lambda m: m.get("start", 0), reverse=True)
+    for mention in sorted_mentions:
+        start = mention.get("start", 0)
+        length = mention.get("length", 1)
+        # Use the mention's number or UUID as the replacement
+        identifier = mention.get("number") or mention.get("uuid") or "user"
+        replacement = f"@{identifier}"
+        text = text[:start] + replacement + text[start + length:]
+    return text
+
+
+def check_signal_requirements() -> bool:
+    """Check if Signal is configured (has URL and account)."""
+    return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT"))
+
+
+# ---------------------------------------------------------------------------
+# Signal Adapter
+# ---------------------------------------------------------------------------
+
+class SignalAdapter(BasePlatformAdapter):
+    """Signal messenger adapter using signal-cli HTTP daemon."""
+
+    platform = Platform.SIGNAL
+
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.SIGNAL)
+
+        extra = config.extra or {}
+        self.http_url = extra.get("http_url", "http://127.0.0.1:8080").rstrip("/")
+        self.account = extra.get("account", "")
+        self.ignore_stories = extra.get("ignore_stories", True)
+
+        # Parse allowlists — group policy is derived from presence of group allowlist
+        group_allowed_str = os.getenv("SIGNAL_GROUP_ALLOWED_USERS", "")
+        self.group_allow_from = set(_parse_comma_list(group_allowed_str))
+
+        # HTTP client
+        self.client: Optional[httpx.AsyncClient] = None
+
+        # Background tasks
+        self._sse_task: Optional[asyncio.Task] = None
+        self._health_monitor_task: Optional[asyncio.Task] = None
+        self._typing_tasks: Dict[str, asyncio.Task] = {}
+        self._running = False
+        self._last_sse_activity = 0.0
+        self._sse_response: Optional[httpx.Response] = None
+
+        # Normalize account for self-message filtering
+        self._account_normalized = self.account.strip()
+
+        logger.info("Signal adapter initialized: url=%s account=%s groups=%s",
+                     self.http_url, _redact_phone(self.account),
+                     "enabled" if self.group_allow_from else "disabled")
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    async def connect(self) -> bool:
+        """Connect to signal-cli daemon and start SSE listener."""
+        if not self.http_url or not self.account:
+            logger.error("Signal: SIGNAL_HTTP_URL and SIGNAL_ACCOUNT are required")
+            return False
+
+        self.client = httpx.AsyncClient(timeout=30.0)
+
+        # Health check — verify signal-cli daemon is reachable
+        try:
+            resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
+            if resp.status_code != 200:
+                logger.error("Signal: health check failed (status %d)", resp.status_code)
+                return False
+        except Exception as e:
+            logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
+            return False
+
+        self._running = True
+        self._last_sse_activity = time.time()
+        self._sse_task = asyncio.create_task(self._sse_listener())
+        self._health_monitor_task = asyncio.create_task(self._health_monitor())
+
+        logger.info("Signal: connected to %s", self.http_url)
+        return True
+
+    async def disconnect(self) -> None:
+        """Stop SSE listener and clean up."""
+        self._running = False
+
+        if self._sse_task:
+            self._sse_task.cancel()
+            try:
+                await self._sse_task
+            except asyncio.CancelledError:
+                pass
+
+        if self._health_monitor_task:
+            self._health_monitor_task.cancel()
+            try:
+                await self._health_monitor_task
+            except asyncio.CancelledError:
+                pass
+
+        # Cancel all typing tasks
+        for task in self._typing_tasks.values():
+            task.cancel()
+        self._typing_tasks.clear()
+
+        if self.client:
+            await self.client.aclose()
+            self.client = None
+
+        logger.info("Signal: disconnected")
+
+    # ------------------------------------------------------------------
+    # SSE Streaming (inbound messages)
+    # ------------------------------------------------------------------
+
+    async def _sse_listener(self) -> None:
+        """Listen for SSE events from signal-cli daemon."""
+        url = f"{self.http_url}/api/v1/events?account={self.account}"
+        backoff = SSE_RETRY_DELAY_INITIAL
+
+        while self._running:
+            try:
+                logger.debug("Signal SSE: connecting to %s", url)
+                async with self.client.stream(
+                    "GET", url,
+                    headers={"Accept": "text/event-stream"},
+                    timeout=None,
+                ) as response:
+                    self._sse_response = response
+                    backoff = SSE_RETRY_DELAY_INITIAL  # Reset on successful connection
+                    self._last_sse_activity = time.time()
+                    logger.info("Signal SSE: connected")
+
+                    buffer = ""
+                    async for chunk in response.aiter_text():
+                        if not self._running:
+                            break
+                        buffer += chunk
+                        while "\n" in buffer:
+                            line, buffer = buffer.split("\n", 1)
+                            line = line.strip()
+                            if not line:
+                                continue
+                            # Parse SSE data lines
+                            if line.startswith("data:"):
+                                data_str = line[5:].strip()
+                                if not data_str:
+                                    continue
+                                self._last_sse_activity = time.time()
+                                try:
+                                    data = json.loads(data_str)
+                                    await self._handle_envelope(data)
+                                except json.JSONDecodeError:
+                                    logger.debug("Signal SSE: invalid JSON: %s", data_str[:100])
+                                except Exception:
+                                    logger.exception("Signal SSE: error handling event")
+
+            except asyncio.CancelledError:
+                break
+            except httpx.HTTPError as e:
+                if self._running:
+                    logger.warning("Signal SSE: HTTP error: %s (reconnecting in %.0fs)", e, backoff)
+            except Exception as e:
+                if self._running:
+                    logger.warning("Signal SSE: error: %s (reconnecting in %.0fs)", e, backoff)
+
+            if self._running:
+                # Add 20% jitter to prevent thundering herd on reconnection
+                jitter = backoff * 0.2 * random.random()
+                await asyncio.sleep(backoff + jitter)
+                backoff = min(backoff * 2, SSE_RETRY_DELAY_MAX)
+
+        self._sse_response = None
+
+    # ------------------------------------------------------------------
+    # Health Monitor
+    # ------------------------------------------------------------------
+
+    async def _health_monitor(self) -> None:
+        """Monitor SSE connection health and force reconnect if stale."""
+        while self._running:
+            await asyncio.sleep(HEALTH_CHECK_INTERVAL)
+            if not self._running:
+                break
+
+            elapsed = time.time() - self._last_sse_activity
+            if elapsed > HEALTH_CHECK_STALE_THRESHOLD:
+                logger.warning("Signal: SSE idle for %.0fs, checking daemon health", elapsed)
+                try:
+                    resp = await self.client.get(
+                        f"{self.http_url}/api/v1/check", timeout=10.0
+                    )
+                    if resp.status_code == 200:
+                        # Daemon is alive but SSE is idle — update activity to
+                        # avoid repeated warnings (connection may just be quiet)
+                        self._last_sse_activity = time.time()
+                        logger.debug("Signal: daemon healthy, SSE idle")
+                    else:
+                        logger.warning("Signal: health check failed (%d), forcing reconnect", resp.status_code)
+                        self._force_reconnect()
+                except Exception as e:
+                    logger.warning("Signal: health check error: %s, forcing reconnect", e)
+                    self._force_reconnect()
+
+    def _force_reconnect(self) -> None:
+        """Force SSE reconnection by closing the current response."""
+        if self._sse_response and not self._sse_response.is_stream_consumed:
+            try:
+                asyncio.create_task(self._sse_response.aclose())
+            except Exception:
+                pass
+            self._sse_response = None
+
+    # ------------------------------------------------------------------
+    # Message Handling
+    # ------------------------------------------------------------------
+
+    async def _handle_envelope(self, envelope: dict) -> None:
+        """Process an incoming signal-cli envelope."""
+        # Unwrap nested envelope if present
+        envelope_data = envelope.get("envelope", envelope)
+
+        # Filter syncMessage envelopes (sent transcripts, read receipts, etc.)
+        # signal-cli may set syncMessage to null vs omitting it, so check key existence
+        if "syncMessage" in envelope_data:
+            return
+
+        # Extract sender info
+        sender = (
+            envelope_data.get("sourceNumber")
+            or envelope_data.get("sourceUuid")
+            or envelope_data.get("source")
+        )
+        sender_name = envelope_data.get("sourceName", "")
+        sender_uuid = envelope_data.get("sourceUuid", "")
+
+        if not sender:
+            logger.debug("Signal: ignoring envelope with no sender")
+            return
+
+        # Self-message filtering — prevent reply loops
+        if self._account_normalized and sender == self._account_normalized:
+            return
+
+        # Filter stories
+        if self.ignore_stories and envelope_data.get("storyMessage"):
+            return
+
+        # Get data message — also check editMessage (edited messages contain
+        # their updated dataMessage inside editMessage.dataMessage)
+        data_message = (
+            envelope_data.get("dataMessage")
+            or (envelope_data.get("editMessage") or {}).get("dataMessage")
+        )
+        if not data_message:
+            return
+
+        # Check for group message
+        group_info = data_message.get("groupInfo")
+        group_id = group_info.get("groupId") if group_info else None
+        is_group = bool(group_id)
+
+        # Group message filtering — derived from SIGNAL_GROUP_ALLOWED_USERS:
+        # - No env var set → groups disabled (default safe behavior)
+        # - Env var set with group IDs → only those groups allowed
+        # - Env var set with "*" → all groups allowed
+        # DM auth is fully handled by run.py (_is_user_authorized)
+        if is_group:
+            if not self.group_allow_from:
+                logger.debug("Signal: ignoring group message (no SIGNAL_GROUP_ALLOWED_USERS)")
+                return
+            if "*" not in self.group_allow_from and group_id not in self.group_allow_from:
+                logger.debug("Signal: group %s not in allowlist", group_id[:8] if group_id else "?")
+                return
+
+        # Build chat info
+        chat_id = sender if not is_group else f"group:{group_id}"
+        chat_type = "group" if is_group else "dm"
+
+        # Extract text and render mentions
+        text = data_message.get("message", "")
+        mentions = data_message.get("mentions", [])
+        if text and mentions:
+            text = _render_mentions(text, mentions)
+
+        # Process attachments
+        attachments_data = data_message.get("attachments", [])
+        image_paths = []
+        audio_path = None
+        document_paths = []
+
+        if attachments_data and not getattr(self, "ignore_attachments", False):
+            for att in attachments_data:
+                att_id = att.get("id")
+                att_size = att.get("size", 0)
+                if not att_id:
+                    continue
+                if att_size > SIGNAL_MAX_ATTACHMENT_SIZE:
+                    logger.warning("Signal: attachment too large (%d bytes), skipping", att_size)
+                    continue
+                try:
+                    cached_path, ext = await self._fetch_attachment(att_id)
+                    if cached_path:
+                        if _is_image_ext(ext):
+                            image_paths.append(cached_path)
+                        elif _is_audio_ext(ext):
+                            audio_path = cached_path
+                        else:
+                            document_paths.append(cached_path)
+                except Exception:
+                    logger.exception("Signal: failed to fetch attachment %s", att_id)
+
+        # Build session source
+        source = self.build_source(
+            chat_id=chat_id,
+            chat_name=group_info.get("groupName") if group_info else sender_name,
+            chat_type=chat_type,
+            user_id=sender,
+            user_name=sender_name or sender,
+            user_id_alt=sender_uuid if sender_uuid else None,
+            chat_id_alt=group_id if is_group else None,
+        )
+
+        # Determine message type
+        msg_type = MessageType.TEXT
+        if audio_path:
+            msg_type = MessageType.VOICE
+        elif image_paths:
+            msg_type = MessageType.IMAGE
+
+        # Parse timestamp from envelope data (milliseconds since epoch)
+        ts_ms = envelope_data.get("timestamp", 0)
+        if ts_ms:
+            try:
+                timestamp = datetime.fromtimestamp(ts_ms / 1000, tz=timezone.utc)
+            except (ValueError, OSError):
+                timestamp = datetime.now(tz=timezone.utc)
+        else:
+            timestamp = datetime.now(tz=timezone.utc)
+
+        # Build and dispatch event
+        event = MessageEvent(
+            source=source,
+            text=text or "",
+            message_type=msg_type,
+            image_paths=image_paths,
+            audio_path=audio_path,
+            document_paths=document_paths,
+            timestamp=timestamp,
+        )
+
+        logger.debug("Signal: message from %s in %s: %s",
+                      _redact_phone(sender), chat_id[:20], (text or "")[:50])
+
+        await self.handle_message(event)
+
+    # ------------------------------------------------------------------
+    # Attachment Handling
+    # ------------------------------------------------------------------
+
+    async def _fetch_attachment(self, attachment_id: str) -> tuple:
+        """Fetch an attachment via JSON-RPC and cache it. Returns (path, ext)."""
+        result = await self._rpc("getAttachment", {
+            "account": self.account,
+            "attachmentId": attachment_id,
+        })
+
+        if not result:
+            return None, ""
+
+        # Result is base64-encoded file content
+        raw_data = base64.b64decode(result)
+        ext = _guess_extension(raw_data)
+
+        if _is_image_ext(ext):
+            path = cache_image_from_bytes(raw_data, ext)
+        elif _is_audio_ext(ext):
+            path = cache_audio_from_bytes(raw_data, ext)
+        else:
+            path = cache_document_from_bytes(raw_data, ext)
+
+        return path, ext
+
+    # ------------------------------------------------------------------
+    # JSON-RPC Communication
+    # ------------------------------------------------------------------
+
+    async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any:
+        """Send a JSON-RPC 2.0 request to signal-cli daemon."""
+        if not self.client:
+            logger.warning("Signal: RPC called but client not connected")
+            return None
+
+        if rpc_id is None:
+            rpc_id = f"{method}_{int(time.time() * 1000)}"
+
+        payload = {
+            "jsonrpc": "2.0",
+            "method": method,
+            "params": params,
+            "id": rpc_id,
+        }
+
+        try:
+            resp = await self.client.post(
+                f"{self.http_url}/api/v1/rpc",
+                json=payload,
+                timeout=30.0,
+            )
+            resp.raise_for_status()
+            data = resp.json()
+
+            if "error" in data:
+                logger.warning("Signal RPC error (%s): %s", method, data["error"])
+                return None
+
+            return data.get("result")
+
+        except Exception as e:
+            logger.warning("Signal RPC %s failed: %s", method, e)
+            return None
+
+    # ------------------------------------------------------------------
+    # Sending
+    # ------------------------------------------------------------------
+
+    async def send(
+        self,
+        chat_id: str,
+        text: str,
+        reply_to_message_id: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send a text message."""
+        await self._stop_typing_indicator(chat_id)
+
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "message": text,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("send", params)
+
+        if result is not None:
+            return SendResult(success=True)
+        return SendResult(success=False, error="RPC send failed")
+
+    async def send_typing(self, chat_id: str) -> None:
+        """Send a typing indicator."""
+        params: Dict[str, Any] = {
+            "account": self.account,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        await self._rpc("sendTyping", params, rpc_id="typing")
+
+    async def send_image(
+        self,
+        chat_id: str,
+        image_url: str,
+        caption: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send an image. Supports http(s):// and file:// URLs."""
+        await self._stop_typing_indicator(chat_id)
+
+        # Resolve image to local path
+        if image_url.startswith("file://"):
+            file_path = unquote(image_url[7:])
+        else:
+            # Download remote image to cache
+            try:
+                file_path = await cache_image_from_url(image_url)
+            except Exception as e:
+                logger.warning("Signal: failed to download image: %s", e)
+                return SendResult(success=False, error=str(e))
+
+        if not file_path or not Path(file_path).exists():
+            return SendResult(success=False, error="Image file not found")
+
+        # Validate size
+        file_size = Path(file_path).stat().st_size
+        if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
+            return SendResult(success=False, error=f"Image too large ({file_size} bytes)")
+
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "message": caption or "",
+            "attachments": [file_path],
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("send", params)
+        if result is not None:
+            return SendResult(success=True)
+        return SendResult(success=False, error="RPC send with attachment failed")
+
+    async def send_document(
+        self,
+        chat_id: str,
+        file_path: str,
+        caption: Optional[str] = None,
+        filename: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send a document/file attachment."""
+        await self._stop_typing_indicator(chat_id)
+
+        if not Path(file_path).exists():
+            return SendResult(success=False, error="File not found")
+
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "message": caption or "",
+            "attachments": [file_path],
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("send", params)
+        if result is not None:
+            return SendResult(success=True)
+        return SendResult(success=False, error="RPC send document failed")
+
+    # ------------------------------------------------------------------
+    # Typing Indicators
+    # ------------------------------------------------------------------
+
+    async def _start_typing_indicator(self, chat_id: str) -> None:
+        """Start a typing indicator loop for a chat."""
+        if chat_id in self._typing_tasks:
+            return  # Already running
+
+        async def _typing_loop():
+            try:
+                while True:
+                    await self.send_typing(chat_id)
+                    await asyncio.sleep(TYPING_INTERVAL)
+            except asyncio.CancelledError:
+                pass
+
+        self._typing_tasks[chat_id] = asyncio.create_task(_typing_loop())
+
+    async def _stop_typing_indicator(self, chat_id: str) -> None:
+        """Stop a typing indicator loop for a chat."""
+        task = self._typing_tasks.pop(chat_id, None)
+        if task:
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+
+    # ------------------------------------------------------------------
+    # Chat Info
+    # ------------------------------------------------------------------
+
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """Get information about a chat/contact."""
+        if chat_id.startswith("group:"):
+            return {
+                "name": chat_id,
+                "type": "group",
+                "chat_id": chat_id,
+            }
+
+        # Try to resolve contact name
+        result = await self._rpc("getContact", {
+            "account": self.account,
+            "contactAddress": chat_id,
+        })
+
+        name = chat_id
+        if result and isinstance(result, dict):
+            name = result.get("name") or result.get("profileName") or chat_id
+
+        return {
+            "name": name,
+            "type": "dm",
+            "chat_id": chat_id,
+        }
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -179,6 +179,35 @@ class SlackAdapter(BasePlatformAdapter):
        """Slack doesn't have a direct typing indicator API for bots."""
        pass

+    async def send_image_file(
+        self,
+        chat_id: str,
+        image_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send a local image file to Slack by uploading it."""
+        if not self._app:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            import os
+            if not os.path.exists(image_path):
+                return SendResult(success=False, error=f"Image file not found: {image_path}")
+
+            result = await self._app.client.files_upload_v2(
+                channel=chat_id,
+                file=image_path,
+                filename=os.path.basename(image_path),
+                initial_comment=caption or "",
+                thread_ts=reply_to,
+            )
+            return SendResult(success=True, raw_response=result)
+
+        except Exception as e:
+            print(f"[{self.name}] Failed to send local image: {e}")
+            return await super().send_image_file(chat_id, image_path, caption, reply_to)
+
    async def send_image(
        self,
        chat_id: str,
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -155,6 +155,14 @@ class TelegramAdapter(BasePlatformAdapter):
                    BotCommand("status", "Show session info"),
                    BotCommand("stop", "Stop the running agent"),
                    BotCommand("sethome", "Set this chat as the home channel"),
+                    BotCommand("compress", "Compress conversation context"),
+                    BotCommand("title", "Set or show the session title"),
+                    BotCommand("resume", "Resume a previously-named session"),
+                    BotCommand("usage", "Show token usage for this session"),
+                    BotCommand("provider", "Show available providers"),
+                    BotCommand("insights", "Show usage insights and analytics"),
+                    BotCommand("update", "Update Hermes to the latest version"),
+                    BotCommand("reload_mcp", "Reload MCP servers from config"),
                    BotCommand("help", "Show available commands"),
                ])
            except Exception as e:
@@ -306,6 +314,34 @@ class TelegramAdapter(BasePlatformAdapter):
            print(f"[{self.name}] Failed to send voice/audio: {e}")
            return await super().send_voice(chat_id, audio_path, caption, reply_to)
    
+    async def send_image_file(
+        self,
+        chat_id: str,
+        image_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send a local image file natively as a Telegram photo."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+        
+        try:
+            import os
+            if not os.path.exists(image_path):
+                return SendResult(success=False, error=f"Image file not found: {image_path}")
+            
+            with open(image_path, "rb") as image_file:
+                msg = await self._bot.send_photo(
+                    chat_id=int(chat_id),
+                    photo=image_file,
+                    caption=caption[:1024] if caption else None,
+                    reply_to_message_id=int(reply_to) if reply_to else None,
+                )
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            print(f"[{self.name}] Failed to send local image: {e}")
+            return await super().send_image_file(chat_id, image_path, caption, reply_to)
+
    async def send_image(
        self,
        chat_id: str,
@@ -313,12 +349,16 @@ class TelegramAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
    ) -> SendResult:
-        """Send an image natively as a Telegram photo."""
+        """Send an image natively as a Telegram photo.
+        
+        Tries URL-based send first (fast, works for <5MB images).
+        Falls back to downloading and uploading as file (supports up to 10MB).
+        """
        if not self._bot:
            return SendResult(success=False, error="Not connected")
        
        try:
-            # Telegram can send photos directly from URLs
+            # Telegram can send photos directly from URLs (up to ~5MB)
            msg = await self._bot.send_photo(
                chat_id=int(chat_id),
                photo=image_url,
@@ -327,9 +367,26 @@ class TelegramAdapter(BasePlatformAdapter):
            )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
-            print(f"[{self.name}] Failed to send photo, falling back to URL: {e}")
-            # Fallback: send as text link
-            return await super().send_image(chat_id, image_url, caption, reply_to)
+            logger.warning("[%s] URL-based send_photo failed (%s), trying file upload", self.name, e)
+            # Fallback: download and upload as file (supports up to 10MB)
+            try:
+                import httpx
+                async with httpx.AsyncClient(timeout=30.0) as client:
+                    resp = await client.get(image_url)
+                    resp.raise_for_status()
+                    image_data = resp.content
+                
+                msg = await self._bot.send_photo(
+                    chat_id=int(chat_id),
+                    photo=image_data,
+                    caption=caption[:1024] if caption else None,
+                    reply_to_message_id=int(reply_to) if reply_to else None,
+                )
+                return SendResult(success=True, message_id=str(msg.message_id))
+            except Exception as e2:
+                logger.error("[%s] File upload send_photo also failed: %s", self.name, e2)
+                # Final fallback: send URL as text
+                return await super().send_image(chat_id, image_url, caption, reply_to)
    
    async def send_animation(
        self,
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -75,6 +75,7 @@ if _config_path.exists():
                "container_memory": "TERMINAL_CONTAINER_MEMORY",
                "container_disk": "TERMINAL_CONTAINER_DISK",
                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
+                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
            }
            for _cfg_key, _env_var in _terminal_env_map.items():
                if _cfg_key in _terminal_cfg:
@@ -85,10 +86,29 @@ if _config_path.exists():
                "enabled": "CONTEXT_COMPRESSION_ENABLED",
                "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
                "summary_model": "CONTEXT_COMPRESSION_MODEL",
+                "summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
            }
            for _cfg_key, _env_var in _compression_env_map.items():
                if _cfg_key in _compression_cfg:
                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
+        # Auxiliary model overrides (vision, web_extract).
+        # Each task has provider + model; bridge non-default values to env vars.
+        _auxiliary_cfg = _cfg.get("auxiliary", {})
+        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
+            _aux_task_env = {
+                "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
+                "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+            }
+            for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
+                _task_cfg = _auxiliary_cfg.get(_task_key, {})
+                if not isinstance(_task_cfg, dict):
+                    continue
+                _prov = str(_task_cfg.get("provider", "")).strip()
+                _model = str(_task_cfg.get("model", "")).strip()
+                if _prov and _prov != "auto":
+                    os.environ[_prov_env] = _prov
+                if _model:
+                    os.environ[_model_env] = _model
        _agent_cfg = _cfg.get("agent", {})
        if _agent_cfg and isinstance(_agent_cfg, dict):
            if "max_turns" in _agent_cfg:
@@ -98,6 +118,12 @@ if _config_path.exists():
        _tz_cfg = _cfg.get("timezone", "")
        if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
            os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
+        # Security settings
+        _security_cfg = _cfg.get("security", {})
+        if isinstance(_security_cfg, dict):
+            _redact = _security_cfg.get("redact_secrets")
+            if _redact is not None:
+                os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
    except Exception:
        pass  # Non-fatal; gateway can still run with .env values

@@ -107,11 +133,13 @@ os.environ["HERMES_QUIET"] = "1"
 # Enable interactive exec approval for dangerous commands on messaging platforms
 os.environ["HERMES_EXEC_ASK"] = "1"

-# Set terminal working directory for messaging platforms
-# Uses MESSAGING_CWD if set, otherwise defaults to home directory
-# This is separate from CLI which uses the directory where `hermes` is run
-messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
-os.environ["TERMINAL_CWD"] = messaging_cwd
+# Set terminal working directory for messaging platforms.
+# If the user set an explicit path in config.yaml (not "." or "auto"),
+# respect it. Otherwise use MESSAGING_CWD or default to home directory.
+_configured_cwd = os.environ.get("TERMINAL_CWD", "")
+if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
+    messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
+    os.environ["TERMINAL_CWD"] = messaging_cwd

 from gateway.config import (
    Platform,
@@ -172,13 +200,13 @@ class GatewayRunner:
        self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
        self._reasoning_config = self._load_reasoning_config()
        self._provider_routing = self._load_provider_routing()
+        self._fallback_model = self._load_fallback_model()

        # Wire process registry into session store for reset protection
        from tools.process_registry import process_registry
        self.session_store = SessionStore(
            self.config.sessions_dir, self.config,
            has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
-            on_auto_reset=self._flush_memories_before_reset,
        )
        self.delivery_router = DeliveryRouter(self.config)
        self._running = False
@@ -209,15 +237,14 @@ class GatewayRunner:
        from gateway.hooks import HookRegistry
        self.hooks = HookRegistry()
    
-    def _flush_memories_before_reset(self, old_entry):
-        """Prompt the agent to save memories/skills before an auto-reset.
-        
-        Called synchronously by SessionStore before destroying an expired session.
-        Loads the transcript, gives the agent a real turn with memory + skills
-        tools, and explicitly asks it to preserve anything worth keeping.
+    def _flush_memories_for_session(self, old_session_id: str):
+        """Prompt the agent to save memories/skills before context is lost.
+
+        Synchronous worker — meant to be called via run_in_executor from
+        an async context so it doesn't block the event loop.
        """
        try:
-            history = self.session_store.load_transcript(old_entry.session_id)
+            history = self.session_store.load_transcript(old_session_id)
            if not history or len(history) < 4:
                return

@@ -231,7 +258,7 @@ class GatewayRunner:
                max_iterations=8,
                quiet_mode=True,
                enabled_toolsets=["memory", "skills"],
-                session_id=old_entry.session_id,
+                session_id=old_session_id,
            )

            # Build conversation history from transcript
@@ -260,9 +287,14 @@ class GatewayRunner:
                user_message=flush_prompt,
                conversation_history=msgs,
            )
-            logger.info("Pre-reset save completed for session %s", old_entry.session_id)
+            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
        except Exception as e:
-            logger.debug("Pre-reset save failed for session %s: %s", old_entry.session_id, e)
+            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
+
+    async def _async_flush_memories(self, old_session_id: str):
+        """Run the sync memory flush in a thread pool so it won't block the event loop."""
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(None, self._flush_memories_for_session, old_session_id)
    
    @staticmethod
    def _load_prefill_messages() -> List[Dict[str, Any]]:
@@ -330,7 +362,7 @@ class GatewayRunner:
        
        Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort
        in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none".
-        Returns None to use default (xhigh).
+        Returns None to use default (medium).
        """
        effort = os.getenv("HERMES_REASONING_EFFORT", "")
        if not effort:
@@ -351,7 +383,7 @@ class GatewayRunner:
        valid = ("xhigh", "high", "medium", "low", "minimal")
        if effort in valid:
            return {"enabled": True, "effort": effort}
-        logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort)
+        logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
        return None

    @staticmethod
@@ -368,6 +400,26 @@ class GatewayRunner:
            pass
        return {}

+    @staticmethod
+    def _load_fallback_model() -> dict | None:
+        """Load fallback model config from config.yaml.
+
+        Returns a dict with 'provider' and 'model' keys, or None if
+        not configured / both fields empty.
+        """
+        try:
+            import yaml as _y
+            cfg_path = _hermes_home / "config.yaml"
+            if cfg_path.exists():
+                with open(cfg_path) as _f:
+                    cfg = _y.safe_load(_f) or {}
+                fb = cfg.get("fallback_model", {}) or {}
+                if fb.get("provider") and fb.get("model"):
+                    return fb
+        except Exception:
+            pass
+        return None
+
    async def start(self) -> bool:
        """
        Start the gateway and all configured platform adapters.
@@ -464,10 +516,50 @@ class GatewayRunner:
        # Check if we're restarting after a /update command
        await self._send_update_notification()

+        # Start background session expiry watcher for proactive memory flushing
+        asyncio.create_task(self._session_expiry_watcher())
+
        logger.info("Press Ctrl+C to stop")
        
        return True
    
+    async def _session_expiry_watcher(self, interval: int = 300):
+        """Background task that proactively flushes memories for expired sessions.
+        
+        Runs every `interval` seconds (default 5 min).  For each session that
+        has expired according to its reset policy, flushes memories in a thread
+        pool and marks the session so it won't be flushed again.
+
+        This means memories are already saved by the time the user sends their
+        next message, so there's no blocking delay.
+        """
+        await asyncio.sleep(60)  # initial delay — let the gateway fully start
+        while self._running:
+            try:
+                self.session_store._ensure_loaded()
+                for key, entry in list(self.session_store._entries.items()):
+                    if entry.session_id in self.session_store._pre_flushed_sessions:
+                        continue  # already flushed this session
+                    if not self.session_store._is_session_expired(entry):
+                        continue  # session still active
+                    # Session has expired — flush memories in the background
+                    logger.info(
+                        "Session %s expired (key=%s), flushing memories proactively",
+                        entry.session_id, key,
+                    )
+                    try:
+                        await self._async_flush_memories(entry.session_id)
+                        self.session_store._pre_flushed_sessions.add(entry.session_id)
+                    except Exception as e:
+                        logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
+            except Exception as e:
+                logger.debug("Session expiry watcher error: %s", e)
+            # Sleep in small increments so we can stop quickly
+            for _ in range(interval):
+                if not self._running:
+                    break
+                await asyncio.sleep(1)
+
    async def stop(self) -> None:
        """Stop the gateway and disconnect all adapters."""
        logger.info("Stopping gateway...")
@@ -526,6 +618,13 @@ class GatewayRunner:
                return None
            return SlackAdapter(config)

+        elif platform == Platform.SIGNAL:
+            from gateway.platforms.signal import SignalAdapter, check_signal_requirements
+            if not check_signal_requirements():
+                logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
+                return None
+            return SignalAdapter(config)
+
        elif platform == Platform.HOMEASSISTANT:
            from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
            if not check_ha_requirements():
@@ -561,12 +660,14 @@ class GatewayRunner:
            Platform.DISCORD: "DISCORD_ALLOWED_USERS",
            Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
            Platform.SLACK: "SLACK_ALLOWED_USERS",
+            Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
        }
        platform_allow_all_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
            Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
            Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
            Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
+            Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
        }

        # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
@@ -664,7 +765,8 @@ class GatewayRunner:
        # Emit command:* hook for any recognized slash command
        _known_commands = {"new", "reset", "help", "status", "stop", "model",
                          "personality", "retry", "undo", "sethome", "set-home",
-                          "compress", "usage", "insights", "reload-mcp", "update"}
+                          "compress", "usage", "insights", "reload-mcp", "reload_mcp",
+                          "update", "title", "resume", "provider"}
        if command and command in _known_commands:
            await self.hooks.emit(f"command:{command}", {
                "platform": source.platform.value if source.platform else "",
@@ -688,6 +790,9 @@ class GatewayRunner:
        if command == "model":
            return await self._handle_model_command(event)
        
+        if command == "provider":
+            return await self._handle_provider_command(event)
+        
        if command == "personality":
            return await self._handle_personality_command(event)
        
@@ -709,11 +814,17 @@ class GatewayRunner:
        if command == "insights":
            return await self._handle_insights_command(event)

-        if command == "reload-mcp":
+        if command in ("reload-mcp", "reload_mcp"):
            return await self._handle_reload_mcp_command(event)

        if command == "update":
            return await self._handle_update_command(event)
+
+        if command == "title":
+            return await self._handle_title_command(event)
+
+        if command == "resume":
+            return await self._handle_resume_command(event)
        
        # Skill slash commands: /skill-name loads the skill and sends to agent
        if command:
@@ -788,6 +899,195 @@ class GatewayRunner:
        # Load conversation history from transcript
        history = self.session_store.load_transcript(session_entry.session_id)
        
+        # -----------------------------------------------------------------
+        # Session hygiene: auto-compress pathologically large transcripts
+        #
+        # Long-lived gateway sessions can accumulate enough history that
+        # every new message rehydrates an oversized transcript, causing
+        # repeated truncation/context failures.  Detect this early and
+        # compress proactively — before the agent even starts.  (#628)
+        #
+        # Thresholds are derived from the SAME compression config the
+        # agent uses (compression.threshold × model context length) so
+        # CLI and messaging platforms behave identically.
+        # -----------------------------------------------------------------
+        if history and len(history) >= 4:
+            from agent.model_metadata import (
+                estimate_messages_tokens_rough,
+                get_model_context_length,
+            )
+
+            # Read model + compression config from config.yaml — same
+            # source of truth the agent itself uses.
+            _hyg_model = "anthropic/claude-sonnet-4.6"
+            _hyg_threshold_pct = 0.85
+            _hyg_compression_enabled = True
+            try:
+                _hyg_cfg_path = _hermes_home / "config.yaml"
+                if _hyg_cfg_path.exists():
+                    import yaml as _hyg_yaml
+                    with open(_hyg_cfg_path) as _hyg_f:
+                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
+
+                    # Resolve model name (same logic as run_sync)
+                    _model_cfg = _hyg_data.get("model", {})
+                    if isinstance(_model_cfg, str):
+                        _hyg_model = _model_cfg
+                    elif isinstance(_model_cfg, dict):
+                        _hyg_model = _model_cfg.get("default", _hyg_model)
+
+                    # Read compression settings
+                    _comp_cfg = _hyg_data.get("compression", {})
+                    if isinstance(_comp_cfg, dict):
+                        _hyg_threshold_pct = float(
+                            _comp_cfg.get("threshold", _hyg_threshold_pct)
+                        )
+                        _hyg_compression_enabled = str(
+                            _comp_cfg.get("enabled", True)
+                        ).lower() in ("true", "1", "yes")
+            except Exception:
+                pass
+
+            # Also check env overrides (same as run_agent.py)
+            _hyg_threshold_pct = float(
+                os.getenv("CONTEXT_COMPRESSION_THRESHOLD", str(_hyg_threshold_pct))
+            )
+            if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"):
+                _hyg_compression_enabled = False
+
+            if _hyg_compression_enabled:
+                _hyg_context_length = get_model_context_length(_hyg_model)
+                _compress_token_threshold = int(
+                    _hyg_context_length * _hyg_threshold_pct
+                )
+                # Warn if still huge after compression (95% of context)
+                _warn_token_threshold = int(_hyg_context_length * 0.95)
+
+                _msg_count = len(history)
+                _approx_tokens = estimate_messages_tokens_rough(history)
+
+                _needs_compress = _approx_tokens >= _compress_token_threshold
+
+                if _needs_compress:
+                    logger.info(
+                        "Session hygiene: %s messages, ~%s tokens — auto-compressing "
+                        "(threshold: %s%% of %s = %s tokens)",
+                        _msg_count, f"{_approx_tokens:,}",
+                        int(_hyg_threshold_pct * 100),
+                        f"{_hyg_context_length:,}",
+                        f"{_compress_token_threshold:,}",
+                    )
+
+                    _hyg_adapter = self.adapters.get(source.platform)
+                    if _hyg_adapter:
+                        try:
+                            await _hyg_adapter.send(
+                                source.chat_id,
+                                f"🗜️ Session is large ({_msg_count} messages, "
+                                f"~{_approx_tokens:,} tokens). Auto-compressing..."
+                            )
+                        except Exception:
+                            pass
+
+                    try:
+                        from run_agent import AIAgent
+
+                        _hyg_runtime = _resolve_runtime_agent_kwargs()
+                        if _hyg_runtime.get("api_key"):
+                            _hyg_msgs = [
+                                {"role": m.get("role"), "content": m.get("content")}
+                                for m in history
+                                if m.get("role") in ("user", "assistant")
+                                and m.get("content")
+                            ]
+
+                            if len(_hyg_msgs) >= 4:
+                                _hyg_agent = AIAgent(
+                                    **_hyg_runtime,
+                                    max_iterations=4,
+                                    quiet_mode=True,
+                                    enabled_toolsets=["memory"],
+                                    session_id=session_entry.session_id,
+                                )
+
+                                loop = asyncio.get_event_loop()
+                                _compressed, _ = await loop.run_in_executor(
+                                    None,
+                                    lambda: _hyg_agent._compress_context(
+                                        _hyg_msgs, "",
+                                        approx_tokens=_approx_tokens,
+                                    ),
+                                )
+
+                                self.session_store.rewrite_transcript(
+                                    session_entry.session_id, _compressed
+                                )
+                                history = _compressed
+                                _new_count = len(_compressed)
+                                _new_tokens = estimate_messages_tokens_rough(
+                                    _compressed
+                                )
+
+                                logger.info(
+                                    "Session hygiene: compressed %s → %s msgs, "
+                                    "~%s → ~%s tokens",
+                                    _msg_count, _new_count,
+                                    f"{_approx_tokens:,}", f"{_new_tokens:,}",
+                                )
+
+                                if _hyg_adapter:
+                                    try:
+                                        await _hyg_adapter.send(
+                                            source.chat_id,
+                                            f"🗜️ Compressed: {_msg_count} → "
+                                            f"{_new_count} messages, "
+                                            f"~{_approx_tokens:,} → "
+                                            f"~{_new_tokens:,} tokens"
+                                        )
+                                    except Exception:
+                                        pass
+
+                                # Still too large after compression — warn user
+                                if _new_tokens >= _warn_token_threshold:
+                                    logger.warning(
+                                        "Session hygiene: still ~%s tokens after "
+                                        "compression — suggesting /reset",
+                                        f"{_new_tokens:,}",
+                                    )
+                                    if _hyg_adapter:
+                                        try:
+                                            await _hyg_adapter.send(
+                                                source.chat_id,
+                                                "⚠️ Session is still very large "
+                                                "after compression "
+                                                f"(~{_new_tokens:,} tokens). "
+                                                "Consider using /reset to start "
+                                                "fresh if you experience issues."
+                                            )
+                                        except Exception:
+                                            pass
+
+                    except Exception as e:
+                        logger.warning(
+                            "Session hygiene auto-compress failed: %s", e
+                        )
+                        # Compression failed and session is dangerously large
+                        if _approx_tokens >= _warn_token_threshold:
+                            _hyg_adapter = self.adapters.get(source.platform)
+                            if _hyg_adapter:
+                                try:
+                                    await _hyg_adapter.send(
+                                        source.chat_id,
+                                        f"⚠️ Session is very large "
+                                        f"({_msg_count} messages, "
+                                        f"~{_approx_tokens:,} tokens) and "
+                                        "auto-compression failed. Consider "
+                                        "using /compress or /reset to avoid "
+                                        "issues."
+                                    )
+                                except Exception:
+                                    pass
+
        # First-message onboarding -- only on the very first interaction ever
        if not history and not self.session_store.has_any_sessions():
            context_prompt += (
@@ -1012,33 +1312,12 @@ class GatewayRunner:
        # Get existing session key
        session_key = self.session_store._generate_session_key(source)
        
-        # Memory flush before reset: load the old transcript and let a
-        # temporary agent save memories before the session is wiped.
+        # Flush memories in the background (fire-and-forget) so the user
+        # gets the "Session reset!" response immediately.
        try:
            old_entry = self.session_store._entries.get(session_key)
            if old_entry:
-                old_history = self.session_store.load_transcript(old_entry.session_id)
-                if old_history:
-                    from run_agent import AIAgent
-                    loop = asyncio.get_event_loop()
-                    _flush_kwargs = _resolve_runtime_agent_kwargs()
-                    def _do_flush():
-                        tmp_agent = AIAgent(
-                            **_flush_kwargs,
-                            max_iterations=5,
-                            quiet_mode=True,
-                            enabled_toolsets=["memory"],
-                            session_id=old_entry.session_id,
-                        )
-                        # Build simple message list from transcript
-                        msgs = []
-                        for m in old_history:
-                            role = m.get("role")
-                            content = m.get("content")
-                            if role in ("user", "assistant") and content:
-                                msgs.append({"role": role, "content": content})
-                        tmp_agent.flush_memories(msgs)
-                    await loop.run_in_executor(None, _do_flush)
+                asyncio.create_task(self._async_flush_memories(old_entry.session_id))
        except Exception as e:
            logger.debug("Gateway memory flush on reset failed: %s", e)
        
@@ -1105,12 +1384,15 @@ class GatewayRunner:
            "`/reset` — Reset conversation history",
            "`/status` — Show session info",
            "`/stop` — Interrupt the running agent",
-            "`/model [name]` — Show or change the model",
+            "`/model [provider:model]` — Show/change model (or switch provider)",
+            "`/provider` — Show available providers and auth status",
            "`/personality [name]` — Set a personality",
            "`/retry` — Retry your last message",
            "`/undo` — Remove the last exchange",
            "`/sethome` — Set this chat as the home channel",
            "`/compress` — Compress conversation context",
+            "`/title [name]` — Set or show the session title",
+            "`/resume [name]` — Resume a previously-named session",
            "`/usage` — Show token usage for this session",
            "`/insights [days]` — Show usage insights and analytics",
            "`/reload-mcp` — Reload MCP servers from config",
@@ -1131,13 +1413,20 @@ class GatewayRunner:
    async def _handle_model_command(self, event: MessageEvent) -> str:
        """Handle /model command - show or change the current model."""
        import yaml
+        from hermes_cli.models import (
+            parse_model_input,
+            validate_requested_model,
+            curated_models_for_provider,
+            normalize_provider,
+            _PROVIDER_LABELS,
+        )

        args = event.get_command_args().strip()
        config_path = _hermes_home / 'config.yaml'

-        # Resolve current model the same way the agent init does:
-        # env vars first, then config.yaml always overrides.
+        # Resolve current model and provider from config
        current = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+        current_provider = "openrouter"
        try:
            if config_path.exists():
                with open(config_path) as f:
@@ -1147,39 +1436,164 @@ class GatewayRunner:
                    current = model_cfg
                elif isinstance(model_cfg, dict):
                    current = model_cfg.get("default", current)
+                    current_provider = model_cfg.get("provider", current_provider)
        except Exception:
            pass

+        # Resolve "auto" to the actual provider using credential detection
+        current_provider = normalize_provider(current_provider)
+        if current_provider == "auto":
+            try:
+                from hermes_cli.auth import resolve_provider as _resolve_provider
+                current_provider = _resolve_provider(current_provider)
+            except Exception:
+                current_provider = "openrouter"
+
        if not args:
-            return f"🤖 **Current model:** `{current}`\n\nTo change: `/model provider/model-name`"
+            provider_label = _PROVIDER_LABELS.get(current_provider, current_provider)
+            lines = [
+                f"🤖 **Current model:** `{current}`",
+                f"**Provider:** {provider_label}",
+                "",
+            ]
+            curated = curated_models_for_provider(current_provider)
+            if curated:
+                lines.append(f"**Available models ({provider_label}):**")
+                for mid, desc in curated:
+                    marker = " ←" if mid == current else ""
+                    label = f"  _{desc}_" if desc else ""
+                    lines.append(f"• `{mid}`{label}{marker}")
+                lines.append("")
+            lines.append("To change: `/model model-name`")
+            lines.append("Switch provider: `/model provider:model-name`")
+            return "\n".join(lines)

-        if "/" not in args:
-            return (
-                f"🤖 Invalid model format: `{args}`\n\n"
-                f"Use `provider/model-name` format, e.g.:\n"
-                f"• `anthropic/claude-sonnet-4`\n"
-                f"• `google/gemini-2.5-pro`\n"
-                f"• `openai/gpt-4o`"
-            )
+        # Parse provider:model syntax
+        target_provider, new_model = parse_model_input(args, current_provider)
+        provider_changed = target_provider != current_provider

-        # Write to config.yaml (source of truth), same pattern as CLI save_config_value.
+        # Resolve credentials for the target provider (for API probe)
+        api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
+        base_url = "https://openrouter.ai/api/v1"
+        if provider_changed:
+            try:
+                from hermes_cli.runtime_provider import resolve_runtime_provider
+                runtime = resolve_runtime_provider(requested=target_provider)
+                api_key = runtime.get("api_key", "")
+                base_url = runtime.get("base_url", "")
+            except Exception as e:
+                provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+                return f"⚠️ Could not resolve credentials for provider '{provider_label}': {e}"
+        else:
+            # Use current provider's base_url from config or registry
+            try:
+                from hermes_cli.runtime_provider import resolve_runtime_provider
+                runtime = resolve_runtime_provider(requested=current_provider)
+                api_key = runtime.get("api_key", "")
+                base_url = runtime.get("base_url", "")
+            except Exception:
+                pass
+
+        # Validate the model against the live API
+        try:
+            validation = validate_requested_model(
+                new_model,
+                target_provider,
+                api_key=api_key,
+                base_url=base_url,
+            )
+        except Exception:
+            validation = {"accepted": True, "persist": True, "recognized": False, "message": None}
+
+        if not validation.get("accepted"):
+            msg = validation.get("message", "Invalid model")
+            tip = "\n\nUse `/model` to see available models, `/provider` to see providers" if "Did you mean" not in msg else ""
+            return f"⚠️ {msg}{tip}"
+
+        # Persist to config only if validation approves
+        if validation.get("persist"):
+            try:
+                user_config = {}
+                if config_path.exists():
+                    with open(config_path) as f:
+                        user_config = yaml.safe_load(f) or {}
+                if "model" not in user_config or not isinstance(user_config["model"], dict):
+                    user_config["model"] = {}
+                user_config["model"]["default"] = new_model
+                if provider_changed:
+                    user_config["model"]["provider"] = target_provider
+                with open(config_path, 'w') as f:
+                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
+            except Exception as e:
+                return f"⚠️ Failed to save model change: {e}"
+
+        # Set env vars so the next agent run picks up the change
+        os.environ["HERMES_MODEL"] = new_model
+        if provider_changed:
+            os.environ["HERMES_INFERENCE_PROVIDER"] = target_provider
+
+        provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+        provider_note = f"\n**Provider:** {provider_label}" if provider_changed else ""
+
+        warning = ""
+        if validation.get("message"):
+            warning = f"\n⚠️ {validation['message']}"
+
+        if validation.get("persist"):
+            persist_note = "saved to config"
+        else:
+            persist_note = "this session only — will revert on restart"
+        return f"🤖 Model changed to `{new_model}` ({persist_note}){provider_note}{warning}\n_(takes effect on next message)_"
+
+    async def _handle_provider_command(self, event: MessageEvent) -> str:
+        """Handle /provider command - show available providers."""
+        import yaml
+        from hermes_cli.models import (
+            list_available_providers,
+            normalize_provider,
+            _PROVIDER_LABELS,
+        )
+
+        # Resolve current provider from config
+        current_provider = "openrouter"
+        config_path = _hermes_home / 'config.yaml'
        try:
-            user_config = {}
            if config_path.exists():
                with open(config_path) as f:
-                    user_config = yaml.safe_load(f) or {}
-            if "model" not in user_config or not isinstance(user_config["model"], dict):
-                user_config["model"] = {}
-            user_config["model"]["default"] = args
-            with open(config_path, 'w') as f:
-                yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
-        except Exception as e:
-            return f"⚠️ Failed to save model change: {e}"
+                    cfg = yaml.safe_load(f) or {}
+                model_cfg = cfg.get("model", {})
+                if isinstance(model_cfg, dict):
+                    current_provider = model_cfg.get("provider", current_provider)
+        except Exception:
+            pass

-        # Also set env var so code reading it before the next agent init sees the update.
-        os.environ["HERMES_MODEL"] = args
+        current_provider = normalize_provider(current_provider)
+        if current_provider == "auto":
+            try:
+                from hermes_cli.auth import resolve_provider as _resolve_provider
+                current_provider = _resolve_provider(current_provider)
+            except Exception:
+                current_provider = "openrouter"

-        return f"🤖 Model changed to `{args}`\n_(takes effect on next message)_"
+        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
+
+        lines = [
+            f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
+            "",
+            "**Available providers:**",
+        ]
+
+        providers = list_available_providers()
+        for p in providers:
+            marker = " ← active" if p["id"] == current_provider else ""
+            auth = "✅" if p["authenticated"] else "❌"
+            aliases = f"  _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
+            lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")
+
+        lines.append("")
+        lines.append("Switch: `/model provider:model-name`")
+        lines.append("Setup: `hermes setup`")
+        return "\n".join(lines)
    
    async def _handle_personality_command(self, event: MessageEvent) -> str:
        """Handle /personality command - list or set a personality."""
@@ -1369,6 +1783,113 @@ class GatewayRunner:
            logger.warning("Manual compress failed: %s", e)
            return f"Compression failed: {e}"

+    async def _handle_title_command(self, event: MessageEvent) -> str:
+        """Handle /title command — set or show the current session's title."""
+        source = event.source
+        session_entry = self.session_store.get_or_create_session(source)
+        session_id = session_entry.session_id
+
+        if not self._session_db:
+            return "Session database not available."
+
+        title_arg = event.get_command_args().strip()
+        if title_arg:
+            # Sanitize the title before setting
+            try:
+                sanitized = self._session_db.sanitize_title(title_arg)
+            except ValueError as e:
+                return f"⚠️ {e}"
+            if not sanitized:
+                return "⚠️ Title is empty after cleanup. Please use printable characters."
+            # Set the title
+            try:
+                if self._session_db.set_session_title(session_id, sanitized):
+                    return f"✏️ Session title set: **{sanitized}**"
+                else:
+                    return "Session not found in database."
+            except ValueError as e:
+                return f"⚠️ {e}"
+        else:
+            # Show the current title
+            title = self._session_db.get_session_title(session_id)
+            if title:
+                return f"📌 Session title: **{title}**"
+            else:
+                return "No title set. Usage: `/title My Session Name`"
+
+    async def _handle_resume_command(self, event: MessageEvent) -> str:
+        """Handle /resume command — switch to a previously-named session."""
+        if not self._session_db:
+            return "Session database not available."
+
+        source = event.source
+        session_key = build_session_key(source)
+        name = event.get_command_args().strip()
+
+        if not name:
+            # List recent titled sessions for this user/platform
+            try:
+                user_source = source.platform.value if source.platform else None
+                sessions = self._session_db.list_sessions_rich(
+                    source=user_source, limit=10
+                )
+                titled = [s for s in sessions if s.get("title")]
+                if not titled:
+                    return (
+                        "No named sessions found.\n"
+                        "Use `/title My Session` to name your current session, "
+                        "then `/resume My Session` to return to it later."
+                    )
+                lines = ["📋 **Named Sessions**\n"]
+                for s in titled[:10]:
+                    title = s["title"]
+                    preview = s.get("preview", "")[:40]
+                    preview_part = f" — _{preview}_" if preview else ""
+                    lines.append(f"• **{title}**{preview_part}")
+                lines.append("\nUsage: `/resume <session name>`")
+                return "\n".join(lines)
+            except Exception as e:
+                logger.debug("Failed to list titled sessions: %s", e)
+                return f"Could not list sessions: {e}"
+
+        # Resolve the name to a session ID
+        target_id = self._session_db.resolve_session_by_title(name)
+        if not target_id:
+            return (
+                f"No session found matching '**{name}**'.\n"
+                "Use `/resume` with no arguments to see available sessions."
+            )
+
+        # Check if already on that session
+        current_entry = self.session_store.get_or_create_session(source)
+        if current_entry.session_id == target_id:
+            return f"📌 Already on session **{name}**."
+
+        # Flush memories for current session before switching
+        try:
+            asyncio.create_task(self._async_flush_memories(current_entry.session_id))
+        except Exception as e:
+            logger.debug("Memory flush on resume failed: %s", e)
+
+        # Clear any running agent for this session key
+        if session_key in self._running_agents:
+            del self._running_agents[session_key]
+
+        # Switch the session entry to point at the old session
+        new_entry = self.session_store.switch_session(session_key, target_id)
+        if not new_entry:
+            return "Failed to switch session."
+
+        # Get the title for confirmation
+        title = self._session_db.get_session_title(target_id) or name
+
+        # Count messages for context
+        history = self.session_store.load_transcript(target_id)
+        msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
+        msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else ""
+
+        return f"↻ Resumed session **{title}**{msg_part}. Conversation restored."
+
    async def _handle_usage_command(self, event: MessageEvent) -> str:
        """Handle /usage command -- show token usage for the session's last agent run."""
        source = event.source
@@ -2166,6 +2687,7 @@ class GatewayRunner:
                platform=platform_key,
                honcho_session_key=session_key,
                session_db=self._session_db,
+                fallback_model=self._fallback_model,
            )
            
            # Store agent reference for interrupt support
@@ -2437,34 +2959,77 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int
    logger.info("Cron ticker stopped")


-async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
+async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool:
    """
    Start the gateway and run until interrupted.
    
    This is the main entry point for running the gateway.
    Returns True if the gateway ran successfully, False if it failed to start.
    A False return causes a non-zero exit code so systemd can auto-restart.
+    
+    Args:
+        config: Optional gateway configuration override.
+        replace: If True, kill any existing gateway instance before starting.
+                 Useful for systemd services to avoid restart-loop deadlocks
+                 when the previous process hasn't fully exited yet.
    """
    # ── Duplicate-instance guard ──────────────────────────────────────
    # Prevent two gateways from running under the same HERMES_HOME.
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    from gateway.status import get_running_pid
+    import time as _time
+    from gateway.status import get_running_pid, remove_pid_file
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
-        hermes_home = os.getenv("HERMES_HOME", "~/.hermes")
-        logger.error(
-            "Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
-            "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
-            existing_pid, hermes_home,
-        )
-        print(
-            f"\n❌ Gateway already running (PID {existing_pid}).\n"
-            f"   Use 'hermes gateway restart' to replace it,\n"
-            f"   or 'hermes gateway stop' to kill it first.\n"
-        )
-        return False
+        if replace:
+            logger.info(
+                "Replacing existing gateway instance (PID %d) with --replace.",
+                existing_pid,
+            )
+            try:
+                os.kill(existing_pid, signal.SIGTERM)
+            except ProcessLookupError:
+                pass  # Already gone
+            except PermissionError:
+                logger.error(
+                    "Permission denied killing PID %d. Cannot replace.",
+                    existing_pid,
+                )
+                return False
+            # Wait up to 10 seconds for the old process to exit
+            for _ in range(20):
+                try:
+                    os.kill(existing_pid, 0)
+                    _time.sleep(0.5)
+                except (ProcessLookupError, PermissionError):
+                    break  # Process is gone
+            else:
+                # Still alive after 10s — force kill
+                logger.warning(
+                    "Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
+                    existing_pid,
+                )
+                try:
+                    os.kill(existing_pid, signal.SIGKILL)
+                    _time.sleep(0.5)
+                except (ProcessLookupError, PermissionError):
+                    pass
+            remove_pid_file()
+        else:
+            hermes_home = os.getenv("HERMES_HOME", "~/.hermes")
+            logger.error(
+                "Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
+                "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
+                existing_pid, hermes_home,
+            )
+            print(
+                f"\n❌ Gateway already running (PID {existing_pid}).\n"
+                f"   Use 'hermes gateway restart' to replace it,\n"
+                f"   or 'hermes gateway stop' to kill it first.\n"
+                f"   Or use 'hermes gateway run --replace' to auto-replace.\n"
+            )
+            return False

    # Sync bundled skills on gateway start (fast -- skips unchanged)
    try:
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -45,6 +45,8 @@ class SessionSource:
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
+    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
+    chat_id_alt: Optional[str] = None  # Signal group internal ID
    
    @property
    def description(self) -> str:
@@ -68,7 +70,7 @@ class SessionSource:
        return ", ".join(parts)
    
    def to_dict(self) -> Dict[str, Any]:
-        return {
+        d = {
            "platform": self.platform.value,
            "chat_id": self.chat_id,
            "chat_name": self.chat_name,
@@ -78,6 +80,11 @@ class SessionSource:
            "thread_id": self.thread_id,
            "chat_topic": self.chat_topic,
        }
+        if self.user_id_alt:
+            d["user_id_alt"] = self.user_id_alt
+        if self.chat_id_alt:
+            d["chat_id_alt"] = self.chat_id_alt
+        return d
    
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
@@ -90,6 +97,8 @@ class SessionSource:
            user_name=data.get("user_name"),
            thread_id=data.get("thread_id"),
            chat_topic=data.get("chat_topic"),
+            user_id_alt=data.get("user_id_alt"),
+            chat_id_alt=data.get("chat_id_alt"),
        )
    
    @classmethod
@@ -311,7 +320,9 @@ class SessionStore:
        self._entries: Dict[str, SessionEntry] = {}
        self._loaded = False
        self._has_active_processes_fn = has_active_processes_fn
-        self._on_auto_reset = on_auto_reset  # callback(old_entry) before auto-reset
+        # on_auto_reset is deprecated — memory flush now runs proactively
+        # via the background session expiry watcher in GatewayRunner.
+        self._pre_flushed_sessions: set = set()  # session_ids already flushed by watcher
        
        # Initialize SQLite session database
        self._db = None
@@ -331,7 +342,7 @@ class SessionStore:
        
        if sessions_file.exists():
            try:
-                with open(sessions_file, "r") as f:
+                with open(sessions_file, "r", encoding="utf-8") as f:
                    data = json.load(f)
                    for key, entry_data in data.items():
                        self._entries[key] = SessionEntry.from_dict(entry_data)
@@ -346,13 +357,51 @@ class SessionStore:
        sessions_file = self.sessions_dir / "sessions.json"
        
        data = {key: entry.to_dict() for key, entry in self._entries.items()}
-        with open(sessions_file, "w") as f:
+        with open(sessions_file, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=2)
    
    def _generate_session_key(self, source: SessionSource) -> str:
        """Generate a session key from a source."""
        return build_session_key(source)
    
+    def _is_session_expired(self, entry: SessionEntry) -> bool:
+        """Check if a session has expired based on its reset policy.
+        
+        Works from the entry alone — no SessionSource needed.
+        Used by the background expiry watcher to proactively flush memories.
+        Sessions with active background processes are never considered expired.
+        """
+        if self._has_active_processes_fn:
+            if self._has_active_processes_fn(entry.session_key):
+                return False
+
+        policy = self.config.get_reset_policy(
+            platform=entry.platform,
+            session_type=entry.chat_type,
+        )
+
+        if policy.mode == "none":
+            return False
+
+        now = datetime.now()
+
+        if policy.mode in ("idle", "both"):
+            idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
+            if now > idle_deadline:
+                return True
+
+        if policy.mode in ("daily", "both"):
+            today_reset = now.replace(
+                hour=policy.at_hour,
+                minute=0, second=0, microsecond=0,
+            )
+            if now.hour < policy.at_hour:
+                today_reset -= timedelta(days=1)
+            if entry.updated_at < today_reset:
+                return True
+
+        return False
+
    def _should_reset(self, entry: SessionEntry, source: SessionSource) -> bool:
        """
        Check if a session should be reset based on policy.
@@ -439,13 +488,11 @@ class SessionStore:
                self._save()
                return entry
            else:
-                # Session is being auto-reset — flush memories before destroying
+                # Session is being auto-reset.  The background expiry watcher
+                # should have already flushed memories proactively; discard
+                # the marker so it doesn't accumulate.
                was_auto_reset = True
-                if self._on_auto_reset:
-                    try:
-                        self._on_auto_reset(entry)
-                    except Exception as e:
-                        logger.debug("Auto-reset callback failed: %s", e)
+                self._pre_flushed_sessions.discard(entry.session_id)
                if self._db:
                    try:
                        self._db.end_session(entry.session_id, "session_reset")
@@ -555,7 +602,49 @@ class SessionStore:
                logger.debug("Session DB operation failed: %s", e)
        
        return new_entry
-    
+
+    def switch_session(self, session_key: str, target_session_id: str) -> Optional[SessionEntry]:
+        """Switch a session key to point at an existing session ID.
+
+        Used by ``/resume`` to restore a previously-named session.
+        Ends the current session in SQLite (like reset), but instead of
+        generating a fresh session ID, re-uses ``target_session_id`` so the
+        old transcript is loaded on the next message.
+        """
+        self._ensure_loaded()
+
+        if session_key not in self._entries:
+            return None
+
+        old_entry = self._entries[session_key]
+
+        # Don't switch if already on that session
+        if old_entry.session_id == target_session_id:
+            return old_entry
+
+        # End the current session in SQLite
+        if self._db:
+            try:
+                self._db.end_session(old_entry.session_id, "session_switch")
+            except Exception as e:
+                logger.debug("Session DB end_session failed: %s", e)
+
+        now = datetime.now()
+        new_entry = SessionEntry(
+            session_key=session_key,
+            session_id=target_session_id,
+            created_at=now,
+            updated_at=now,
+            origin=old_entry.origin,
+            display_name=old_entry.display_name,
+            platform=old_entry.platform,
+            chat_type=old_entry.chat_type,
+        )
+
+        self._entries[session_key] = new_entry
+        self._save()
+        return new_entry
+
    def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
        """List all sessions, optionally filtered by activity."""
        self._ensure_loaded()
@@ -592,7 +681,7 @@ class SessionStore:
        
        # Also write legacy JSONL (keeps existing tooling working during transition)
        transcript_path = self.get_transcript_path(session_id)
-        with open(transcript_path, "a") as f:
+        with open(transcript_path, "a", encoding="utf-8") as f:
            f.write(json.dumps(message, ensure_ascii=False) + "\n")
    
    def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
@@ -619,7 +708,7 @@ class SessionStore:
        
        # JSONL: overwrite the file
        transcript_path = self.get_transcript_path(session_id)
-        with open(transcript_path, "w") as f:
+        with open(transcript_path, "w", encoding="utf-8") as f:
            for msg in messages:
                f.write(json.dumps(msg, ensure_ascii=False) + "\n")

@@ -641,7 +730,7 @@ class SessionStore:
            return []
        
        messages = []
-        with open(transcript_path, "r") as f:
+        with open(transcript_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -138,6 +138,83 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
 }


+# =============================================================================
+# Kimi Code Endpoint Detection
+# =============================================================================
+
+# Kimi Code (platform.kimi.ai) issues keys prefixed "sk-kimi-" that only work
+# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
+# KIMI_BASE_URL explicitly.
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
+
+
+def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
+    """Return the correct Kimi base URL based on the API key prefix.
+
+    If the user has explicitly set KIMI_BASE_URL, that always wins.
+    Otherwise, sk-kimi- prefixed keys route to api.kimi.com/coding/v1.
+    """
+    if env_override:
+        return env_override
+    if api_key.startswith("sk-kimi-"):
+        return KIMI_CODE_BASE_URL
+    return default_url
+
+
+# =============================================================================
+# Z.AI Endpoint Detection
+# =============================================================================
+
+# Z.AI has separate billing for general vs coding plans, and global vs China
+# endpoints.  A key that works on one may return "Insufficient balance" on
+# another.  We probe at setup time and store the working endpoint.
+
+ZAI_ENDPOINTS = [
+    # (id, base_url, default_model, label)
+    ("global",        "https://api.z.ai/api/paas/v4",        "glm-5",   "Global"),
+    ("cn",            "https://open.bigmodel.cn/api/paas/v4", "glm-5",   "China"),
+    ("coding-global", "https://api.z.ai/api/coding/paas/v4",  "glm-4.7", "Global (Coding Plan)"),
+    ("coding-cn",     "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"),
+]
+
+
+def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str, str]]:
+    """Probe z.ai endpoints to find one that accepts this API key.
+
+    Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the
+    first working endpoint, or None if all fail.
+    """
+    for ep_id, base_url, model, label in ZAI_ENDPOINTS:
+        try:
+            resp = httpx.post(
+                f"{base_url}/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": model,
+                    "stream": False,
+                    "max_tokens": 1,
+                    "messages": [{"role": "user", "content": "ping"}],
+                },
+                timeout=timeout,
+            )
+            if resp.status_code == 200:
+                logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url)
+                return {
+                    "id": ep_id,
+                    "base_url": base_url,
+                    "model": model,
+                    "label": label,
+                }
+            logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code)
+        except Exception as exc:
+            logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc)
+    return None
+
+
 # =============================================================================
 # Error Types
 # =============================================================================
@@ -1298,11 +1375,16 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
            key_source = env_var
            break

-    base_url = pconfig.inference_base_url
+    env_url = ""
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()
-        if env_url:
-            base_url = env_url
+
+    if provider_id == "kimi-coding":
+        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
+    elif env_url:
+        base_url = env_url
+    else:
+        base_url = pconfig.inference_base_url

    return {
        "configured": bool(api_key),
@@ -1350,11 +1432,16 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
            key_source = env_var
            break

-    base_url = pconfig.inference_base_url
+    env_url = ""
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()
-        if env_url:
-            base_url = env_url.rstrip("/")
+
+    if provider_id == "kimi-coding":
+        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
+    elif env_url:
+        base_url = env_url.rstrip("/")
+    else:
+        base_url = pconfig.inference_base_url

    return {
        "provider": provider_id,
--- a/hermes_cli/clipboard.py
+++ b/hermes_cli/clipboard.py
@@ -285,8 +285,8 @@ def _convert_to_png(path: Path) -> bool:
        logger.debug("Pillow BMP→PNG conversion failed: %s", e)

    # Fall back to ImageMagick convert
+    tmp = path.with_suffix(".bmp")
    try:
-        tmp = path.with_suffix(".bmp")
        path.rename(tmp)
        r = subprocess.run(
            ["convert", str(tmp), "png:" + str(path)],
@@ -297,8 +297,12 @@ def _convert_to_png(path: Path) -> bool:
            return True
    except FileNotFoundError:
        logger.debug("ImageMagick not installed — cannot convert BMP to PNG")
+        if tmp.exists() and not path.exists():
+            tmp.rename(path)
    except Exception as e:
        logger.debug("ImageMagick BMP→PNG conversion failed: %s", e)
+        if tmp.exists() and not path.exists():
+            tmp.rename(path)

    # Can't convert — BMP is still usable as-is for most APIs
    return path.exists() and path.stat().st_size > 0
--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@@ -94,8 +94,6 @@ def _read_cache_models(codex_home: Path) -> List[str]:
            if not isinstance(slug, str) or not slug.strip():
                continue
            slug = slug.strip()
-            if "codex" not in slug.lower():
-                continue
            if item.get("supported_in_api") is False:
                continue
            visibility = item.get("visibility")
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -1,9 +1,15 @@
 """Slash command definitions and autocomplete for the Hermes CLI.

-Contains the COMMANDS dict and the SlashCommandCompleter class.
-These are pure data/UI with no HermesCLI state dependency.
+Contains the shared built-in ``COMMANDS`` dict and ``SlashCommandCompleter``.
+The completer can optionally include dynamic skill slash commands supplied by the
+interactive CLI.
 """

+from __future__ import annotations
+
+from collections.abc import Callable, Mapping
+from typing import Any
+
 from prompt_toolkit.completion import Completer, Completion


@@ -12,6 +18,7 @@ COMMANDS = {
    "/tools": "List available tools",
    "/toolsets": "List available toolsets",
    "/model": "Show or change the current model",
+    "/provider": "Show available providers and current provider",
    "/prompt": "View/set custom system prompt",
    "/personality": "Set a predefined personality",
    "/clear": "Clear screen and reset conversation (fresh start)",
@@ -27,26 +34,68 @@ COMMANDS = {
    "/platforms": "Show gateway/messaging platform status",
    "/verbose": "Cycle tool progress display: off → new → all → verbose",
    "/compress": "Manually compress conversation context (flush memories + summarize)",
+    "/title": "Set a title for the current session (usage: /title My Session Name)",
    "/usage": "Show token usage for the current session",
    "/insights": "Show usage insights and analytics (last 30 days)",
+    "/paste": "Check clipboard for an image and attach it",
+    "/reload-mcp": "Reload MCP servers from config.yaml",
    "/quit": "Exit the CLI (also: /exit, /q)",
 }


 class SlashCommandCompleter(Completer):
-    """Autocomplete for /commands in the input area."""
+    """Autocomplete for built-in slash commands and optional skill commands."""
+
+    def __init__(
+        self,
+        skill_commands_provider: Callable[[], Mapping[str, dict[str, Any]]] | None = None,
+    ) -> None:
+        self._skill_commands_provider = skill_commands_provider
+
+    def _iter_skill_commands(self) -> Mapping[str, dict[str, Any]]:
+        if self._skill_commands_provider is None:
+            return {}
+        try:
+            return self._skill_commands_provider() or {}
+        except Exception:
+            return {}
+
+    @staticmethod
+    def _completion_text(cmd_name: str, word: str) -> str:
+        """Return replacement text for a completion.
+
+        When the user has already typed the full command exactly (``/help``),
+        returning ``help`` would be a no-op and prompt_toolkit suppresses the
+        menu. Appending a trailing space keeps the dropdown visible and makes
+        backspacing retrigger it naturally.
+        """
+        return f"{cmd_name} " if cmd_name == word else cmd_name

    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
        if not text.startswith("/"):
            return
+
        word = text[1:]
+
        for cmd, desc in COMMANDS.items():
            cmd_name = cmd[1:]
            if cmd_name.startswith(word):
                yield Completion(
-                    cmd_name,
+                    self._completion_text(cmd_name, word),
                    start_position=-len(word),
                    display=cmd,
                    display_meta=desc,
                )
+
+        for cmd, info in self._iter_skill_commands().items():
+            cmd_name = cmd[1:]
+            if cmd_name.startswith(word):
+                description = str(info.get("description", "Skill command"))
+                short_desc = description[:50] + ("..." if len(description) > 50 else "")
+                yield Completion(
+                    self._completion_text(cmd_name, word),
+                    start_position=-len(word),
+                    display=cmd,
+                    display_meta=f"⚡ {short_desc}",
+                )
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -81,17 +81,34 @@ DEFAULT_CONFIG = {
    
    "browser": {
        "inactivity_timeout": 120,
+        "record_sessions": False,  # Auto-record browser sessions as WebM videos
    },
    
    "compression": {
        "enabled": True,
        "threshold": 0.85,
        "summary_model": "google/gemini-3-flash-preview",
+        "summary_provider": "auto",
+    },
+    
+    # Auxiliary model overrides (advanced).  By default Hermes auto-selects
+    # the provider and model for each side task.  Set these to override.
+    "auxiliary": {
+        "vision": {
+            "provider": "auto",    # auto | openrouter | nous | main
+            "model": "",           # e.g. "google/gemini-2.5-flash", "gpt-4o"
+        },
+        "web_extract": {
+            "provider": "auto",
+            "model": "",
+        },
    },
    
    "display": {
        "compact": False,
        "personality": "kawaii",
+        "resume_display": "full",  # "full" (show previous messages) | "minimal" (one-liner only)
+        "bell_on_complete": False,  # Play terminal bell (\a) when agent finishes a response
    },
    
    # Text-to-speech configuration
@@ -156,6 +173,15 @@ DEFAULT_CONFIG = {
 # Config Migration System
 # =============================================================================

+# Track which env vars were introduced in each config version.
+# Migration only mentions vars new since the user's previous version.
+ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
+    3: ["FIRECRAWL_API_KEY", "BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID", "FAL_KEY"],
+    4: ["VOICE_TOOLS_OPENAI_KEY", "ELEVENLABS_API_KEY"],
+    5: ["WHATSAPP_ENABLED", "WHATSAPP_MODE", "WHATSAPP_ALLOWED_USERS",
+        "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
+}
+
 # Required environment variables with metadata for migration prompts.
 # LLM provider is required but handled in the setup wizard's provider
 # selection step (Nous Portal / OpenRouter / Custom endpoint), so this
@@ -413,7 +439,7 @@ OPTIONAL_ENV_VARS = {
        "category": "setting",
    },
    "HERMES_MAX_ITERATIONS": {
-        "description": "Maximum tool-calling iterations per conversation (default: 60)",
+        "description": "Maximum tool-calling iterations per conversation (default: 90)",
        "prompt": "Max iterations",
        "url": None,
        "password": False,
@@ -625,34 +651,47 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
        if v["name"] not in required_names and not v.get("advanced")
    ]
    
-    if interactive and missing_optional:
-        print("  Would you like to configure any optional keys now?")
-        try:
-            answer = input("  Configure optional keys? [y/N]: ").strip().lower()
-        except (EOFError, KeyboardInterrupt):
-            answer = "n"
-        
-        if answer in ("y", "yes"):
+    # Only offer to configure env vars that are NEW since the user's previous version
+    new_var_names = set()
+    for ver in range(current_ver + 1, latest_ver + 1):
+        new_var_names.update(ENV_VARS_BY_VERSION.get(ver, []))
+
+    if new_var_names and interactive and not quiet:
+        new_and_unset = [
+            (name, OPTIONAL_ENV_VARS[name])
+            for name in sorted(new_var_names)
+            if not get_env_value(name) and name in OPTIONAL_ENV_VARS
+        ]
+        if new_and_unset:
+            print(f"\n  {len(new_and_unset)} new optional key(s) in this update:")
+            for name, info in new_and_unset:
+                print(f"    • {name} — {info.get('description', '')}")
            print()
-            for var in missing_optional:
-                desc = var.get("description", "")
-                if var.get("url"):
-                    print(f"  {desc}")
-                    print(f"  Get your key at: {var['url']}")
-                else:
-                    print(f"  {desc}")
-                
-                if var.get("password"):
-                    import getpass
-                    value = getpass.getpass(f"  {var['prompt']} (Enter to skip): ")
-                else:
-                    value = input(f"  {var['prompt']} (Enter to skip): ").strip()
-                
-                if value:
-                    save_env_value(var["name"], value)
-                    results["env_added"].append(var["name"])
-                    print(f"  ✓ Saved {var['name']}")
+            try:
+                answer = input("  Configure new keys? [y/N]: ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                answer = "n"
+
+            if answer in ("y", "yes"):
                print()
+                for name, info in new_and_unset:
+                    if info.get("url"):
+                        print(f"  {info.get('description', name)}")
+                        print(f"  Get your key at: {info['url']}")
+                    else:
+                        print(f"  {info.get('description', name)}")
+                    if info.get("password"):
+                        import getpass
+                        value = getpass.getpass(f"  {info.get('prompt', name)} (Enter to skip): ")
+                    else:
+                        value = input(f"  {info.get('prompt', name)} (Enter to skip): ").strip()
+                    if value:
+                        save_env_value(name, value)
+                        results["env_added"].append(name)
+                        print(f"  ✓ Saved {name}")
+                    print()
+            else:
+                print("  Set later with: hermes config set KEY VALUE")
    
    # Check for missing config fields
    missing_config = get_missing_config_fields()
@@ -720,6 +759,36 @@ def load_config() -> Dict[str, Any]:
    return config


+_COMMENTED_SECTIONS = """
+# ── Security ──────────────────────────────────────────────────────────
+# API keys, tokens, and passwords are redacted from tool output by default.
+# Set to false to see full values (useful for debugging auth issues).
+#
+# security:
+#   redact_secrets: false
+
+# ── Fallback Model ────────────────────────────────────────────────────
+# Automatic provider failover when primary is unavailable.
+# Uncomment and configure to enable. Triggers on rate limits (429),
+# overload (529), service errors (503), or connection failures.
+#
+# Supported providers:
+#   openrouter   (OPENROUTER_API_KEY)  — routes to any model
+#   openai-codex (OAuth — hermes login) — OpenAI Codex
+#   nous         (OAuth — hermes login) — Nous Portal
+#   zai          (ZAI_API_KEY)         — Z.AI / GLM
+#   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
+#   minimax      (MINIMAX_API_KEY)     — MiniMax
+#   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
+#
+# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+#
+# fallback_model:
+#   provider: openrouter
+#   model: anthropic/claude-sonnet-4
+"""
+
+
 def save_config(config: Dict[str, Any]):
    """Save configuration to ~/.hermes/config.yaml."""
    ensure_hermes_home()
@@ -727,6 +796,18 @@ def save_config(config: Dict[str, Any]):
    
    with open(config_path, 'w') as f:
        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+        # Append commented-out sections for features that are off by default
+        # or only relevant when explicitly configured. Skip sections the
+        # user has already uncommented and configured.
+        sections = []
+        sec = config.get("security", {})
+        if not sec or sec.get("redact_secrets") is None:
+            sections.append("security")
+        fb = config.get("fallback_model", {})
+        if not fb or not (fb.get("provider") and fb.get("model")):
+            sections.append("fallback")
+        if sections:
+            f.write(_COMMENTED_SECTIONS)


 def load_env() -> Dict[str, str]:
@@ -890,6 +971,31 @@ def show_config():
    if enabled:
        print(f"  Threshold:    {compression.get('threshold', 0.85) * 100:.0f}%")
        print(f"  Model:        {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
+        comp_provider = compression.get('summary_provider', 'auto')
+        if comp_provider != 'auto':
+            print(f"  Provider:     {comp_provider}")
+    
+    # Auxiliary models
+    auxiliary = config.get('auxiliary', {})
+    aux_tasks = {
+        "Vision":      auxiliary.get('vision', {}),
+        "Web extract": auxiliary.get('web_extract', {}),
+    }
+    has_overrides = any(
+        t.get('provider', 'auto') != 'auto' or t.get('model', '')
+        for t in aux_tasks.values()
+    )
+    if has_overrides:
+        print()
+        print(color("◆ Auxiliary Models (overrides)", Colors.CYAN, Colors.BOLD))
+        for label, task_cfg in aux_tasks.items():
+            prov = task_cfg.get('provider', 'auto')
+            mdl = task_cfg.get('model', '')
+            if prov != 'auto' or mdl:
+                parts = [f"provider={prov}"]
+                if mdl:
+                    parts.append(f"model={mdl}")
+                print(f"  {label:12s}  {', '.join(parts)}")
    
    # Messaging
    print()
@@ -947,7 +1053,7 @@ def set_config_value(key: str, value: str):
        'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
        'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
        'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN',
-        'GITHUB_TOKEN', 'HONCHO_API_KEY', 'NOUS_API_KEY', 'WANDB_API_KEY',
+        'GITHUB_TOKEN', 'HONCHO_API_KEY', 'WANDB_API_KEY',
        'TINKER_API_KEY',
    ]
    
@@ -1004,6 +1110,7 @@ def set_config_value(key: str, value: str):
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
        "terminal.cwd": "TERMINAL_CWD",
        "terminal.timeout": "TERMINAL_TIMEOUT",
+        "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
    }
    if key in _config_to_env_sync:
        save_env_value(_config_to_env_sync[key], str(value))
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -33,6 +33,26 @@ os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
 from hermes_cli.colors import Colors, color
 from hermes_constants import OPENROUTER_MODELS_URL

+
+_PROVIDER_ENV_HINTS = (
+    "OPENROUTER_API_KEY",
+    "OPENAI_API_KEY",
+    "ANTHROPIC_API_KEY",
+    "OPENAI_BASE_URL",
+    "GLM_API_KEY",
+    "ZAI_API_KEY",
+    "Z_AI_API_KEY",
+    "KIMI_API_KEY",
+    "MINIMAX_API_KEY",
+    "MINIMAX_CN_API_KEY",
+)
+
+
+def _has_provider_env_config(content: str) -> bool:
+    """Return True when ~/.hermes/.env contains provider auth/base URL settings."""
+    return any(key in content for key in _PROVIDER_ENV_HINTS)
+
+
 def check_ok(text: str, detail: str = ""):
    print(f"  {color('✓', Colors.GREEN)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else ""))

@@ -132,12 +152,8 @@ def run_doctor(args):
        
        # Check for common issues
        content = env_path.read_text()
-        if any(k in content for k in (
-            "OPENROUTER_API_KEY", "ANTHROPIC_API_KEY",
-            "GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY",
-            "KIMI_API_KEY", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
-        )):
-            check_ok("API key configured")
+        if _has_provider_env_config(content):
+            check_ok("API key or custom endpoint configured")
        else:
            check_warn("No API key found in ~/.hermes/.env")
            issues.append("Run 'hermes setup' to configure API keys")
@@ -492,10 +508,16 @@ def run_doctor(args):
            try:
                import httpx
                _base = os.getenv(_base_env, "")
+                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
+                if not _base and _key.startswith("sk-kimi-"):
+                    _base = "https://api.kimi.com/coding/v1"
                _url = (_base.rstrip("/") + "/models") if _base else _default_url
+                _headers = {"Authorization": f"Bearer {_key}"}
+                if "api.kimi.com" in _url.lower():
+                    _headers["User-Agent"] = "KimiCLI/1.0"
                _resp = httpx.get(
                    _url,
-                    headers={"Authorization": f"Bearer {_key}"},
+                    headers=_headers,
                    timeout=10,
                )
                if _resp.status_code == 200:
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -154,19 +154,33 @@ def get_hermes_cli_path() -> str:
 # =============================================================================

 def generate_systemd_unit() -> str:
+    import shutil
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
+    venv_dir = str(PROJECT_ROOT / "venv")
+    venv_bin = str(PROJECT_ROOT / "venv" / "bin")
+    node_bin = str(PROJECT_ROOT / "node_modules" / ".bin")
+
+    # Build a PATH that includes the venv, node_modules, and standard system dirs
+    sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
    
+    hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main"
    return f"""[Unit]
 Description={SERVICE_DESCRIPTION}
 After=network.target

 [Service]
 Type=simple
-ExecStart={python_path} -m hermes_cli.main gateway run
+ExecStart={python_path} -m hermes_cli.main gateway run --replace
+ExecStop={hermes_cli} gateway stop
 WorkingDirectory={working_dir}
+Environment="PATH={sane_path}"
+Environment="VIRTUAL_ENV={venv_dir}"
 Restart=on-failure
 RestartSec=10
+KillMode=mixed
+KillSignal=SIGTERM
+TimeoutStopSec=15
 StandardOutput=journal
 StandardError=journal

@@ -377,8 +391,15 @@ def launchd_status(deep: bool = False):
 # Gateway Runner
 # =============================================================================

-def run_gateway(verbose: bool = False):
-    """Run the gateway in foreground."""
+def run_gateway(verbose: bool = False, replace: bool = False):
+    """Run the gateway in foreground.
+    
+    Args:
+        verbose: Enable verbose logging output.
+        replace: If True, kill any existing gateway instance before starting.
+                 This prevents systemd restart loops when the old process
+                 hasn't fully exited yet.
+    """
    sys.path.insert(0, str(PROJECT_ROOT))
    
    from gateway.run import start_gateway
@@ -393,7 +414,7 @@ def run_gateway(verbose: bool = False):
    
    # Exit with code 1 if gateway fails to connect any platform,
    # so systemd Restart=on-failure will retry on transient errors
-    success = asyncio.run(start_gateway())
+    success = asyncio.run(start_gateway(replace=replace))
    if not success:
        sys.exit(1)

@@ -486,6 +507,12 @@ _PLATFORMS = [
        "emoji": "📲",
        "token_var": "WHATSAPP_ENABLED",
    },
+    {
+        "key": "signal",
+        "label": "Signal",
+        "emoji": "📡",
+        "token_var": "SIGNAL_HTTP_URL",
+    },
 ]


@@ -504,6 +531,13 @@ def _platform_status(platform: dict) -> str:
                return "configured + paired"
            return "enabled, not paired"
        return "not configured"
+    if platform.get("key") == "signal":
+        account = get_env_value("SIGNAL_ACCOUNT")
+        if val and account:
+            return "configured"
+        if val or account:
+            return "partially configured"
+        return "not configured"
    if val:
        return "configured"
    return "not configured"
@@ -629,6 +663,121 @@ def _is_service_running() -> bool:
    return len(find_gateway_pids()) > 0


+def _setup_signal():
+    """Interactive setup for Signal messenger."""
+    import shutil
+
+    print()
+    print(color("  ─── 📡 Signal Setup ───", Colors.CYAN))
+
+    existing_url = get_env_value("SIGNAL_HTTP_URL")
+    existing_account = get_env_value("SIGNAL_ACCOUNT")
+    if existing_url and existing_account:
+        print()
+        print_success("Signal is already configured.")
+        if not prompt_yes_no("  Reconfigure Signal?", False):
+            return
+
+    # Check if signal-cli is available
+    print()
+    if shutil.which("signal-cli"):
+        print_success("signal-cli found on PATH.")
+    else:
+        print_warning("signal-cli not found on PATH.")
+        print_info("  Signal requires signal-cli running as an HTTP daemon.")
+        print_info("  Install options:")
+        print_info("    Linux:  sudo apt install signal-cli")
+        print_info("            or download from https://github.com/AsamK/signal-cli")
+        print_info("    macOS:  brew install signal-cli")
+        print_info("    Docker: bbernhard/signal-cli-rest-api")
+        print()
+        print_info("  After installing, link your account and start the daemon:")
+        print_info("    signal-cli link -n \"HermesAgent\"")
+        print_info("    signal-cli --account +YOURNUMBER daemon --http 127.0.0.1:8080")
+        print()
+
+    # HTTP URL
+    print()
+    print_info("  Enter the URL where signal-cli HTTP daemon is running.")
+    default_url = existing_url or "http://127.0.0.1:8080"
+    try:
+        url = input(f"  HTTP URL [{default_url}]: ").strip() or default_url
+    except (EOFError, KeyboardInterrupt):
+        print("\n  Setup cancelled.")
+        return
+
+    # Test connectivity
+    print_info("  Testing connection...")
+    try:
+        import httpx
+        resp = httpx.get(f"{url.rstrip('/')}/api/v1/check", timeout=10.0)
+        if resp.status_code == 200:
+            print_success("  signal-cli daemon is reachable!")
+        else:
+            print_warning(f"  signal-cli responded with status {resp.status_code}.")
+            if not prompt_yes_no("  Continue anyway?", False):
+                return
+    except Exception as e:
+        print_warning(f"  Could not reach signal-cli at {url}: {e}")
+        if not prompt_yes_no("  Save this URL anyway? (you can start signal-cli later)", True):
+            return
+
+    save_env_value("SIGNAL_HTTP_URL", url)
+
+    # Account phone number
+    print()
+    print_info("  Enter your Signal account phone number in E.164 format.")
+    print_info("  Example: +15551234567")
+    default_account = existing_account or ""
+    try:
+        account = input(f"  Account number{f' [{default_account}]' if default_account else ''}: ").strip()
+        if not account:
+            account = default_account
+    except (EOFError, KeyboardInterrupt):
+        print("\n  Setup cancelled.")
+        return
+
+    if not account:
+        print_error("  Account number is required.")
+        return
+
+    save_env_value("SIGNAL_ACCOUNT", account)
+
+    # Allowed users
+    print()
+    print_info("  The gateway DENIES all users by default for security.")
+    print_info("  Enter phone numbers or UUIDs of allowed users (comma-separated).")
+    existing_allowed = get_env_value("SIGNAL_ALLOWED_USERS") or ""
+    default_allowed = existing_allowed or account
+    try:
+        allowed = input(f"  Allowed users [{default_allowed}]: ").strip() or default_allowed
+    except (EOFError, KeyboardInterrupt):
+        print("\n  Setup cancelled.")
+        return
+
+    save_env_value("SIGNAL_ALLOWED_USERS", allowed)
+
+    # Group messaging
+    print()
+    if prompt_yes_no("  Enable group messaging? (disabled by default for security)", False):
+        print()
+        print_info("  Enter group IDs to allow, or * for all groups.")
+        existing_groups = get_env_value("SIGNAL_GROUP_ALLOWED_USERS") or ""
+        try:
+            groups = input(f"  Group IDs [{existing_groups or '*'}]: ").strip() or existing_groups or "*"
+        except (EOFError, KeyboardInterrupt):
+            print("\n  Setup cancelled.")
+            return
+        save_env_value("SIGNAL_GROUP_ALLOWED_USERS", groups)
+
+    print()
+    print_success("Signal configured!")
+    print_info(f"  URL: {url}")
+    print_info(f"  Account: {account}")
+    print_info(f"  DM auth: via SIGNAL_ALLOWED_USERS + DM pairing")
+    print_info(f"  Groups: {'enabled' if get_env_value('SIGNAL_GROUP_ALLOWED_USERS') else 'disabled'}")
+
+
 def gateway_setup():
    """Interactive setup for messaging platforms + gateway service."""

@@ -681,6 +830,8 @@ def gateway_setup():

        if platform["key"] == "whatsapp":
            _setup_whatsapp()
+        elif platform["key"] == "signal":
+            _setup_signal()
        else:
            _setup_standard_platform(platform)

@@ -765,7 +916,8 @@ def gateway_command(args):
    # Default to run if no subcommand
    if subcmd is None or subcmd == "run":
        verbose = getattr(args, 'verbose', False)
-        run_gateway(verbose)
+        replace = getattr(args, 'replace', False)
+        run_gateway(verbose, replace=replace)
        return

    if subcmd == "setup":
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -21,6 +21,7 @@ Usage:
    hermes version             # Show version
    hermes update              # Update to latest version
    hermes uninstall           # Uninstall Hermes Agent
+    hermes sessions browse     # Interactive session picker with search
 """

 import argparse
@@ -106,6 +107,279 @@ def _has_any_provider_configured() -> bool:
    return False


+def _session_browse_picker(sessions: list) -> Optional[str]:
+    """Interactive curses-based session browser with live search filtering.
+
+    Returns the selected session ID, or None if cancelled.
+    Uses curses (not simple_term_menu) to avoid the ghost-duplication rendering
+    bug in tmux/iTerm when arrow keys are used.
+    """
+    if not sessions:
+        print("No sessions found.")
+        return None
+
+    # Try curses-based picker first
+    try:
+        import curses
+        import time as _time
+        from datetime import datetime
+
+        result_holder = [None]
+
+        def _relative_time(ts):
+            if not ts:
+                return "?"
+            delta = _time.time() - ts
+            if delta < 60:
+                return "just now"
+            elif delta < 3600:
+                return f"{int(delta / 60)}m ago"
+            elif delta < 86400:
+                return f"{int(delta / 3600)}h ago"
+            elif delta < 172800:
+                return "yesterday"
+            elif delta < 604800:
+                return f"{int(delta / 86400)}d ago"
+            else:
+                return datetime.fromtimestamp(ts).strftime("%Y-%m-%d")
+
+        def _format_row(s, max_x):
+            """Format a session row for display."""
+            title = (s.get("title") or "").strip()
+            preview = (s.get("preview") or "").strip()
+            source = s.get("source", "")[:6]
+            last_active = _relative_time(s.get("last_active"))
+            sid = s["id"][:18]
+
+            # Adaptive column widths based on terminal width
+            # Layout: [arrow 3] [title/preview flexible] [active 12] [src 6] [id 18]
+            fixed_cols = 3 + 12 + 6 + 18 + 6  # arrow + active + src + id + padding
+            name_width = max(20, max_x - fixed_cols)
+
+            if title:
+                name = title[:name_width]
+            elif preview:
+                name = preview[:name_width]
+            else:
+                name = sid
+
+            return f"{name:<{name_width}}  {last_active:<10}  {source:<5} {sid}"
+
+        def _match(s, query):
+            """Check if a session matches the search query (case-insensitive)."""
+            q = query.lower()
+            return (
+                q in (s.get("title") or "").lower()
+                or q in (s.get("preview") or "").lower()
+                or q in s.get("id", "").lower()
+                or q in (s.get("source") or "").lower()
+            )
+
+        def _curses_browse(stdscr):
+            curses.curs_set(0)
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)   # selected
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)  # header
+                curses.init_pair(3, curses.COLOR_CYAN, -1)    # search
+                curses.init_pair(4, 8, -1)                    # dim
+
+            cursor = 0
+            scroll_offset = 0
+            search_text = ""
+            filtered = list(sessions)
+
+            while True:
+                stdscr.clear()
+                max_y, max_x = stdscr.getmaxyx()
+                if max_y < 5 or max_x < 40:
+                    # Terminal too small
+                    try:
+                        stdscr.addstr(0, 0, "Terminal too small")
+                    except curses.error:
+                        pass
+                    stdscr.refresh()
+                    stdscr.getch()
+                    return
+
+                # Header line
+                if search_text:
+                    header = f"  Browse sessions — filter: {search_text}█"
+                    header_attr = curses.A_BOLD
+                    if curses.has_colors():
+                        header_attr |= curses.color_pair(3)
+                else:
+                    header = "  Browse sessions — ↑↓ navigate  Enter select  Type to filter  Esc quit"
+                    header_attr = curses.A_BOLD
+                    if curses.has_colors():
+                        header_attr |= curses.color_pair(2)
+                try:
+                    stdscr.addnstr(0, 0, header, max_x - 1, header_attr)
+                except curses.error:
+                    pass
+
+                # Column header line
+                fixed_cols = 3 + 12 + 6 + 18 + 6
+                name_width = max(20, max_x - fixed_cols)
+                col_header = f"   {'Title / Preview':<{name_width}}  {'Active':<10}  {'Src':<5} {'ID'}"
+                try:
+                    dim_attr = curses.color_pair(4) if curses.has_colors() else curses.A_DIM
+                    stdscr.addnstr(1, 0, col_header, max_x - 1, dim_attr)
+                except curses.error:
+                    pass
+
+                # Compute visible area
+                visible_rows = max_y - 4  # header + col header + blank + footer
+                if visible_rows < 1:
+                    visible_rows = 1
+
+                # Clamp cursor and scroll
+                if not filtered:
+                    try:
+                        msg = "  No sessions match the filter."
+                        stdscr.addnstr(3, 0, msg, max_x - 1, curses.A_DIM)
+                    except curses.error:
+                        pass
+                else:
+                    if cursor >= len(filtered):
+                        cursor = len(filtered) - 1
+                    if cursor < 0:
+                        cursor = 0
+                    if cursor < scroll_offset:
+                        scroll_offset = cursor
+                    elif cursor >= scroll_offset + visible_rows:
+                        scroll_offset = cursor - visible_rows + 1
+
+                    for draw_i, i in enumerate(range(
+                        scroll_offset,
+                        min(len(filtered), scroll_offset + visible_rows)
+                    )):
+                        y = draw_i + 3
+                        if y >= max_y - 1:
+                            break
+                        s = filtered[i]
+                        arrow = " → " if i == cursor else "   "
+                        row = arrow + _format_row(s, max_x - 3)
+                        attr = curses.A_NORMAL
+                        if i == cursor:
+                            attr = curses.A_BOLD
+                            if curses.has_colors():
+                                attr |= curses.color_pair(1)
+                        try:
+                            stdscr.addnstr(y, 0, row, max_x - 1, attr)
+                        except curses.error:
+                            pass
+
+                # Footer
+                footer_y = max_y - 1
+                if filtered:
+                    footer = f"  {cursor + 1}/{len(filtered)} sessions"
+                    if len(filtered) < len(sessions):
+                        footer += f" (filtered from {len(sessions)})"
+                else:
+                    footer = f"  0/{len(sessions)} sessions"
+                try:
+                    stdscr.addnstr(footer_y, 0, footer, max_x - 1,
+                                   curses.color_pair(4) if curses.has_colors() else curses.A_DIM)
+                except curses.error:
+                    pass
+
+                stdscr.refresh()
+                key = stdscr.getch()
+
+                if key in (curses.KEY_UP, ):
+                    if filtered:
+                        cursor = (cursor - 1) % len(filtered)
+                elif key in (curses.KEY_DOWN, ):
+                    if filtered:
+                        cursor = (cursor + 1) % len(filtered)
+                elif key in (curses.KEY_ENTER, 10, 13):
+                    if filtered:
+                        result_holder[0] = filtered[cursor]["id"]
+                    return
+                elif key == 27:  # Esc
+                    if search_text:
+                        # First Esc clears the search
+                        search_text = ""
+                        filtered = list(sessions)
+                        cursor = 0
+                        scroll_offset = 0
+                    else:
+                        # Second Esc exits
+                        return
+                elif key in (curses.KEY_BACKSPACE, 127, 8):
+                    if search_text:
+                        search_text = search_text[:-1]
+                        if search_text:
+                            filtered = [s for s in sessions if _match(s, search_text)]
+                        else:
+                            filtered = list(sessions)
+                        cursor = 0
+                        scroll_offset = 0
+                elif key == ord('q') and not search_text:
+                    return
+                elif 32 <= key <= 126:
+                    # Printable character → add to search filter
+                    search_text += chr(key)
+                    filtered = [s for s in sessions if _match(s, search_text)]
+                    cursor = 0
+                    scroll_offset = 0
+
+        curses.wrapper(_curses_browse)
+        return result_holder[0]
+
+    except Exception:
+        pass
+
+    # Fallback: numbered list (Windows without curses, etc.)
+    import time as _time
+    from datetime import datetime
+
+    def _relative_time_fb(ts):
+        if not ts:
+            return "?"
+        delta = _time.time() - ts
+        if delta < 60:
+            return "just now"
+        elif delta < 3600:
+            return f"{int(delta / 60)}m ago"
+        elif delta < 86400:
+            return f"{int(delta / 3600)}h ago"
+        elif delta < 172800:
+            return "yesterday"
+        elif delta < 604800:
+            return f"{int(delta / 86400)}d ago"
+        else:
+            return datetime.fromtimestamp(ts).strftime("%Y-%m-%d")
+
+    print("\n  Browse sessions  (enter number to resume, q to cancel)\n")
+    for i, s in enumerate(sessions):
+        title = (s.get("title") or "").strip()
+        preview = (s.get("preview") or "").strip()
+        label = title or preview or s["id"]
+        if len(label) > 50:
+            label = label[:47] + "..."
+        last_active = _relative_time_fb(s.get("last_active"))
+        src = s.get("source", "")[:6]
+        print(f"  {i + 1:>3}. {label:<50}  {last_active:<10}  {src}")
+
+    while True:
+        try:
+            val = input(f"\n  Select [1-{len(sessions)}]: ").strip()
+            if not val or val.lower() in ("q", "quit", "exit"):
+                return None
+            idx = int(val) - 1
+            if 0 <= idx < len(sessions):
+                return sessions[idx]["id"]
+            print(f"  Invalid selection. Enter 1-{len(sessions)} or q to cancel.")
+        except ValueError:
+            print(f"  Invalid input. Enter a number or q to cancel.")
+        except (KeyboardInterrupt, EOFError):
+            print()
+            return None
+
+
 def _resolve_last_cli_session() -> Optional[str]:
    """Look up the most recent CLI session ID from SQLite. Returns None if unavailable."""
    try:
@@ -120,16 +394,63 @@ def _resolve_last_cli_session() -> Optional[str]:
    return None


+def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
+    """Resolve a session name (title) or ID to a session ID.
+
+    - If it looks like a session ID (contains underscore + hex), try direct lookup first.
+    - Otherwise, treat it as a title and use resolve_session_by_title (auto-latest).
+    - Falls back to the other method if the first doesn't match.
+    """
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+
+        # Try as exact session ID first
+        session = db.get_session(name_or_id)
+        if session:
+            db.close()
+            return session["id"]
+
+        # Try as title (with auto-latest for lineage)
+        session_id = db.resolve_session_by_title(name_or_id)
+        db.close()
+        return session_id
+    except Exception:
+        pass
+    return None
+
+
 def cmd_chat(args):
    """Run interactive chat CLI."""
-    # Resolve --continue into --resume with the latest CLI session
-    if getattr(args, "continue_last", False) and not getattr(args, "resume", None):
-        last_id = _resolve_last_cli_session()
-        if last_id:
-            args.resume = last_id
+    # Resolve --continue into --resume with the latest CLI session or by name
+    continue_val = getattr(args, "continue_last", None)
+    if continue_val and not getattr(args, "resume", None):
+        if isinstance(continue_val, str):
+            # -c "session name" — resolve by title or ID
+            resolved = _resolve_session_by_name_or_id(continue_val)
+            if resolved:
+                args.resume = resolved
+            else:
+                print(f"No session found matching '{continue_val}'.")
+                print("Use 'hermes sessions list' to see available sessions.")
+                sys.exit(1)
        else:
-            print("No previous CLI session found to continue.")
-            sys.exit(1)
+            # -c with no argument — continue the most recent session
+            last_id = _resolve_last_cli_session()
+            if last_id:
+                args.resume = last_id
+            else:
+                print("No previous CLI session found to continue.")
+                sys.exit(1)
+
+    # Resolve --resume by title if it's not a direct session ID
+    resume_val = getattr(args, "resume", None)
+    if resume_val:
+        resolved = _resolve_session_by_name_or_id(resume_val)
+        if resolved:
+            args.resume = resolved
+        # If resolution fails, keep the original value — _init_agent will
+        # report "Session not found" with the original input

    # First-run guard: check if any provider is configured before launching
    if not _has_any_provider_configured():
@@ -167,6 +488,7 @@ def cmd_chat(args):
        "verbose": args.verbose,
        "query": args.query,
        "resume": getattr(args, "resume", None),
+        "worktree": getattr(args, "worktree", False),
    }
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
@@ -1208,8 +1530,9 @@ def main():
 Examples:
    hermes                        Start interactive chat
    hermes chat -q "Hello"        Single query mode
-    hermes --continue             Resume the most recent session
-    hermes --resume <session_id>  Resume a specific session
+    hermes -c                     Resume the most recent session
+    hermes -c "my project"        Resume a session by name (latest in lineage)
+    hermes --resume <session_id>  Resume a specific session by ID
    hermes setup                  Run setup wizard
    hermes logout                 Clear stored authentication
    hermes model                  Select default model
@@ -1217,8 +1540,11 @@ Examples:
    hermes config edit            Edit config in $EDITOR
    hermes config set model gpt-4 Set a config value
    hermes gateway                Run messaging gateway
+    hermes -w                     Start in isolated git worktree
    hermes gateway install        Install as system service
    hermes sessions list          List past sessions
+    hermes sessions browse        Interactive session picker
+    hermes sessions rename ID T   Rename/title a session
    hermes update                 Update to latest version

 For more help on a command:
@@ -1233,16 +1559,24 @@ For more help on a command:
    )
    parser.add_argument(
        "--resume", "-r",
-        metavar="SESSION_ID",
+        metavar="SESSION",
        default=None,
-        help="Resume a previous session by ID (shortcut for: hermes chat --resume ID)"
+        help="Resume a previous session by ID or title"
    )
    parser.add_argument(
        "--continue", "-c",
        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=None,
+        metavar="SESSION_NAME",
+        help="Resume a session by name, or the most recent if no name given"
+    )
+    parser.add_argument(
+        "--worktree", "-w",
        action="store_true",
        default=False,
-        help="Resume the most recent CLI session"
+        help="Run in an isolated git worktree (for parallel agents)"
    )
    
    subparsers = parser.add_subparsers(dest="command", help="Command to run")
@@ -1286,9 +1620,17 @@ For more help on a command:
    chat_parser.add_argument(
        "--continue", "-c",
        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=None,
+        metavar="SESSION_NAME",
+        help="Resume a session by name, or the most recent if no name given"
+    )
+    chat_parser.add_argument(
+        "--worktree", "-w",
        action="store_true",
        default=False,
-        help="Resume the most recent CLI session"
+        help="Run in an isolated git worktree (for parallel agents on the same repo)"
    )
    chat_parser.set_defaults(func=cmd_chat)

@@ -1315,6 +1657,8 @@ For more help on a command:
    # gateway run (default)
    gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground")
    gateway_run.add_argument("-v", "--verbose", action="store_true")
+    gateway_run.add_argument("--replace", action="store_true",
+                             help="Replace any existing gateway instance (useful for systemd)")
    
    # gateway start
    gateway_start = gateway_subparsers.add_parser("start", help="Start gateway service")
@@ -1655,7 +1999,7 @@ For more help on a command:
    # =========================================================================
    sessions_parser = subparsers.add_parser(
        "sessions",
-        help="Manage session history (list, export, prune, delete)",
+        help="Manage session history (list, rename, export, prune, delete)",
        description="View and manage the SQLite session store"
    )
    sessions_subparsers = sessions_parser.add_subparsers(dest="sessions_action")
@@ -1680,6 +2024,17 @@ For more help on a command:

    sessions_stats = sessions_subparsers.add_parser("stats", help="Show session store statistics")

+    sessions_rename = sessions_subparsers.add_parser("rename", help="Set or change a session's title")
+    sessions_rename.add_argument("session_id", help="Session ID to rename")
+    sessions_rename.add_argument("title", nargs="+", help="New title for the session")
+
+    sessions_browse = sessions_subparsers.add_parser(
+        "browse",
+        help="Interactive session picker — browse, search, and resume sessions",
+    )
+    sessions_browse.add_argument("--source", help="Filter by source (cli, telegram, discord, etc.)")
+    sessions_browse.add_argument("--limit", type=int, default=50, help="Max sessions to load (default: 50)")
+
    def cmd_sessions(args):
        import json as _json
        try:
@@ -1692,18 +2047,51 @@ For more help on a command:
        action = args.sessions_action

        if action == "list":
-            sessions = db.search_sessions(source=args.source, limit=args.limit)
+            sessions = db.list_sessions_rich(source=args.source, limit=args.limit)
            if not sessions:
                print("No sessions found.")
                return
-            print(f"{'ID':<30} {'Source':<12} {'Model':<30} {'Messages':>8} {'Started'}")
-            print("─" * 100)
            from datetime import datetime
+            import time as _time
+
+            def _relative_time(ts):
+                """Format a timestamp as relative time (e.g., '2h ago', 'yesterday')."""
+                if not ts:
+                    return "?"
+                delta = _time.time() - ts
+                if delta < 60:
+                    return "just now"
+                elif delta < 3600:
+                    mins = int(delta / 60)
+                    return f"{mins}m ago"
+                elif delta < 86400:
+                    hours = int(delta / 3600)
+                    return f"{hours}h ago"
+                elif delta < 172800:
+                    return "yesterday"
+                elif delta < 604800:
+                    days = int(delta / 86400)
+                    return f"{days}d ago"
+                else:
+                    return datetime.fromtimestamp(ts).strftime("%Y-%m-%d")
+
+            has_titles = any(s.get("title") for s in sessions)
+            if has_titles:
+                print(f"{'Title':<22} {'Preview':<40} {'Last Active':<13} {'ID'}")
+                print("─" * 100)
+            else:
+                print(f"{'Preview':<50} {'Last Active':<13} {'Src':<6} {'ID'}")
+                print("─" * 90)
            for s in sessions:
-                started = datetime.fromtimestamp(s["started_at"]).strftime("%Y-%m-%d %H:%M") if s["started_at"] else "?"
-                model = (s.get("model") or "?")[:28]
-                ended = " (ended)" if s.get("ended_at") else ""
-                print(f"{s['id']:<30} {s['source']:<12} {model:<30} {s['message_count']:>8} {started}{ended}")
+                last_active = _relative_time(s.get("last_active"))
+                preview = s.get("preview", "")[:38] if has_titles else s.get("preview", "")[:48]
+                if has_titles:
+                    title = (s.get("title") or "—")[:20]
+                    sid = s["id"][:20]
+                    print(f"{title:<22} {preview:<40} {last_active:<13} {sid}")
+                else:
+                    sid = s["id"][:20]
+                    print(f"{preview:<50} {last_active:<13} {s['source']:<6} {sid}")

        elif action == "export":
            if args.session_id:
@@ -1743,6 +2131,44 @@ For more help on a command:
            count = db.prune_sessions(older_than_days=days, source=args.source)
            print(f"Pruned {count} session(s).")

+        elif action == "rename":
+            title = " ".join(args.title)
+            try:
+                if db.set_session_title(args.session_id, title):
+                    print(f"Session '{args.session_id}' renamed to: {title}")
+                else:
+                    print(f"Session '{args.session_id}' not found.")
+            except ValueError as e:
+                print(f"Error: {e}")
+
+        elif action == "browse":
+            limit = getattr(args, "limit", 50) or 50
+            source = getattr(args, "source", None)
+            sessions = db.list_sessions_rich(source=source, limit=limit)
+            db.close()
+            if not sessions:
+                print("No sessions found.")
+                return
+
+            selected_id = _session_browse_picker(sessions)
+            if not selected_id:
+                print("Cancelled.")
+                return
+
+            # Launch hermes --resume <id> by replacing the current process
+            print(f"Resuming session: {selected_id}")
+            import shutil
+            hermes_bin = shutil.which("hermes")
+            if hermes_bin:
+                os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
+            else:
+                # Fallback: re-invoke via python -m
+                os.execvp(
+                    sys.executable,
+                    [sys.executable, "-m", "hermes_cli.main", "--resume", selected_id],
+                )
+            return  # won't reach here after execvp
+
        elif action == "stats":
            total = db.session_count()
            msgs = db.message_count()
@@ -1752,7 +2178,6 @@ For more help on a command:
                c = db.session_count(source=src)
                if c > 0:
                    print(f"  {src}: {c} sessions")
-            import os
            db_path = db.db_path
            if db_path.exists():
                size_mb = os.path.getsize(db_path) / (1024 * 1024)
@@ -1848,6 +2273,8 @@ For more help on a command:
        args.provider = None
        args.toolsets = None
        args.verbose = False
+        if not hasattr(args, "worktree"):
+            args.worktree = False
        cmd_chat(args)
        return
    
@@ -1859,7 +2286,9 @@ For more help on a command:
        args.toolsets = None
        args.verbose = False
        args.resume = None
-        args.continue_last = False
+        args.continue_last = None
+        if not hasattr(args, "worktree"):
+            args.worktree = False
        cmd_chat(args)
        return
    
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -1,10 +1,18 @@
 """
-Canonical list of OpenRouter models offered in CLI and setup wizards.
+Canonical model catalogs and lightweight validation helpers.

 Add, remove, or reorder entries here — both `hermes setup` and
 `hermes` provider-selection will pick up the change automatically.
 """

+from __future__ import annotations
+
+import json
+import urllib.request
+import urllib.error
+from difflib import get_close_matches
+from typing import Any, Optional
+
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-opus-4.6",       "recommended"),
@@ -14,17 +22,64 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-preview",     ""),
    ("google/gemini-3-flash-preview",   ""),
-    ("qwen/qwen3.5-plus-02-15",        ""),
-    ("qwen/qwen3.5-35b-a3b",           ""),
+    ("qwen/qwen3.5-plus-02-15",         ""),
+    ("qwen/qwen3.5-35b-a3b",            ""),
    ("stepfun/step-3.5-flash",          ""),
    ("z-ai/glm-5",                      ""),
    ("moonshotai/kimi-k2.5",            ""),
    ("minimax/minimax-m2.5",            ""),
 ]

+_PROVIDER_MODELS: dict[str, list[str]] = {
+    "zai": [
+        "glm-5",
+        "glm-4.7",
+        "glm-4.5",
+        "glm-4.5-flash",
+    ],
+    "kimi-coding": [
+        "kimi-k2.5",
+        "kimi-k2-thinking",
+        "kimi-k2-turbo-preview",
+        "kimi-k2-0905-preview",
+    ],
+    "minimax": [
+        "MiniMax-M2.5",
+        "MiniMax-M2.5-highspeed",
+        "MiniMax-M2.1",
+    ],
+    "minimax-cn": [
+        "MiniMax-M2.5",
+        "MiniMax-M2.5-highspeed",
+        "MiniMax-M2.1",
+    ],
+}
+
+_PROVIDER_LABELS = {
+    "openrouter": "OpenRouter",
+    "openai-codex": "OpenAI Codex",
+    "nous": "Nous Portal",
+    "zai": "Z.AI / GLM",
+    "kimi-coding": "Kimi / Moonshot",
+    "minimax": "MiniMax",
+    "minimax-cn": "MiniMax (China)",
+    "custom": "custom endpoint",
+}
+
+_PROVIDER_ALIASES = {
+    "glm": "zai",
+    "z-ai": "zai",
+    "z.ai": "zai",
+    "zhipu": "zai",
+    "kimi": "kimi-coding",
+    "moonshot": "kimi-coding",
+    "minimax-china": "minimax-cn",
+    "minimax_cn": "minimax-cn",
+}
+

 def model_ids() -> list[str]:
-    """Return just the model-id strings (convenience helper)."""
+    """Return just the OpenRouter model-id strings."""
    return [mid for mid, _ in OPENROUTER_MODELS]


@@ -34,3 +89,231 @@ def menu_labels() -> list[str]:
    for mid, desc in OPENROUTER_MODELS:
        labels.append(f"{mid} ({desc})" if desc else mid)
    return labels
+
+
+# All provider IDs and aliases that are valid for the provider:model syntax.
+_KNOWN_PROVIDER_NAMES: set[str] = (
+    set(_PROVIDER_LABELS.keys())
+    | set(_PROVIDER_ALIASES.keys())
+    | {"openrouter", "custom"}
+)
+
+
+def list_available_providers() -> list[dict[str, str]]:
+    """Return info about all providers the user could use with ``provider:model``.
+
+    Each dict has ``id``, ``label``, and ``aliases``.
+    Checks which providers have valid credentials configured.
+    """
+    # Canonical providers in display order
+    _PROVIDER_ORDER = [
+        "openrouter", "nous", "openai-codex",
+        "zai", "kimi-coding", "minimax", "minimax-cn",
+    ]
+    # Build reverse alias map
+    aliases_for: dict[str, list[str]] = {}
+    for alias, canonical in _PROVIDER_ALIASES.items():
+        aliases_for.setdefault(canonical, []).append(alias)
+
+    result = []
+    for pid in _PROVIDER_ORDER:
+        label = _PROVIDER_LABELS.get(pid, pid)
+        alias_list = aliases_for.get(pid, [])
+        # Check if this provider has credentials available
+        has_creds = False
+        try:
+            from hermes_cli.runtime_provider import resolve_runtime_provider
+            runtime = resolve_runtime_provider(requested=pid)
+            has_creds = bool(runtime.get("api_key"))
+        except Exception:
+            pass
+        result.append({
+            "id": pid,
+            "label": label,
+            "aliases": alias_list,
+            "authenticated": has_creds,
+        })
+    return result
+
+
+def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]:
+    """Parse ``/model`` input into ``(provider, model)``.
+
+    Supports ``provider:model`` syntax to switch providers at runtime::
+
+        openrouter:anthropic/claude-sonnet-4.5  →  ("openrouter", "anthropic/claude-sonnet-4.5")
+        nous:hermes-3                           →  ("nous", "hermes-3")
+        anthropic/claude-sonnet-4.5             →  (current_provider, "anthropic/claude-sonnet-4.5")
+        gpt-5.4                                 →  (current_provider, "gpt-5.4")
+
+    The colon is only treated as a provider delimiter if the left side is a
+    recognized provider name or alias.  This avoids misinterpreting model names
+    that happen to contain colons (e.g. ``anthropic/claude-3.5-sonnet:beta``).
+
+    Returns ``(provider, model)`` where *provider* is either the explicit
+    provider from the input or *current_provider* if none was specified.
+    """
+    stripped = raw.strip()
+    colon = stripped.find(":")
+    if colon > 0:
+        provider_part = stripped[:colon].strip().lower()
+        model_part = stripped[colon + 1:].strip()
+        if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES:
+            return (normalize_provider(provider_part), model_part)
+    return (current_provider, stripped)
+
+
+def curated_models_for_provider(provider: Optional[str]) -> list[tuple[str, str]]:
+    """Return ``(model_id, description)`` tuples for a provider's curated list."""
+    normalized = normalize_provider(provider)
+    if normalized == "openrouter":
+        return list(OPENROUTER_MODELS)
+    models = _PROVIDER_MODELS.get(normalized, [])
+    return [(m, "") for m in models]
+
+
+def normalize_provider(provider: Optional[str]) -> str:
+    """Normalize provider aliases to Hermes' canonical provider ids.
+
+    Note: ``"auto"`` passes through unchanged — use
+    ``hermes_cli.auth.resolve_provider()`` to resolve it to a concrete
+    provider based on credentials and environment.
+    """
+    normalized = (provider or "openrouter").strip().lower()
+    return _PROVIDER_ALIASES.get(normalized, normalized)
+
+
+def provider_model_ids(provider: Optional[str]) -> list[str]:
+    """Return the best known model catalog for a provider."""
+    normalized = normalize_provider(provider)
+    if normalized == "openrouter":
+        return model_ids()
+    if normalized == "openai-codex":
+        from hermes_cli.codex_models import get_codex_model_ids
+
+        return get_codex_model_ids()
+    return list(_PROVIDER_MODELS.get(normalized, []))
+
+
+def fetch_api_models(
+    api_key: Optional[str],
+    base_url: Optional[str],
+    timeout: float = 5.0,
+) -> Optional[list[str]]:
+    """Fetch the list of available model IDs from the provider's ``/models`` endpoint.
+
+    Returns a list of model ID strings, or ``None`` if the endpoint could not
+    be reached (network error, timeout, auth failure, etc.).
+    """
+    if not base_url:
+        return None
+
+    url = base_url.rstrip("/") + "/models"
+    headers: dict[str, str] = {}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    req = urllib.request.Request(url, headers=headers)
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+            # Standard OpenAI format: {"data": [{"id": "model-name", ...}, ...]}
+            return [m.get("id", "") for m in data.get("data", [])]
+    except Exception:
+        return None
+
+
+def validate_requested_model(
+    model_name: str,
+    provider: Optional[str],
+    *,
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> dict[str, Any]:
+    """
+    Validate a ``/model`` value for the active provider.
+
+    Performs format checks first, then probes the live API to confirm
+    the model actually exists.
+
+    Returns a dict with:
+      - accepted: whether the CLI should switch to the requested model now
+      - persist: whether it is safe to save to config
+      - recognized: whether it matched a known provider catalog
+      - message: optional warning / guidance for the user
+    """
+    requested = (model_name or "").strip()
+    normalized = normalize_provider(provider)
+    if normalized == "openrouter" and base_url and "openrouter.ai" not in base_url:
+        normalized = "custom"
+
+    if not requested:
+        return {
+            "accepted": False,
+            "persist": False,
+            "recognized": False,
+            "message": "Model name cannot be empty.",
+        }
+
+    if any(ch.isspace() for ch in requested):
+        return {
+            "accepted": False,
+            "persist": False,
+            "recognized": False,
+            "message": "Model names cannot contain spaces.",
+        }
+
+    # Probe the live API to check if the model actually exists
+    api_models = fetch_api_models(api_key, base_url)
+
+    if api_models is not None:
+        if requested in set(api_models):
+            # API confirmed the model exists
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "message": None,
+            }
+        else:
+            # API responded but model is not listed
+            suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
+            suggestion_text = ""
+            if suggestions:
+                suggestion_text = "\n  Did you mean: " + ", ".join(f"`{s}`" for s in suggestions)
+
+            return {
+                "accepted": False,
+                "persist": False,
+                "recognized": False,
+                "message": (
+                    f"Error: `{requested}` is not a valid model for this provider."
+                    f"{suggestion_text}"
+                ),
+            }
+
+    # api_models is None — couldn't reach API, fall back to catalog check
+    provider_label = _PROVIDER_LABELS.get(normalized, normalized)
+    known_models = provider_model_ids(normalized)
+
+    if requested in known_models:
+        return {
+            "accepted": True,
+            "persist": True,
+            "recognized": True,
+            "message": None,
+        }
+
+    # Can't validate — accept for session only
+    suggestion = get_close_matches(requested, known_models, n=1, cutoff=0.6)
+    suggestion_text = f" Did you mean `{suggestion[0]}`?" if suggestion else ""
+    return {
+        "accepted": True,
+        "persist": False,
+        "recognized": False,
+        "message": (
+            f"Could not validate `{requested}` against the live {provider_label} API. "
+            "Using it for this session only; config unchanged."
+            f"{suggestion_text}"
+        ),
+    }
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -667,16 +667,17 @@ def setup_model_provider(config: dict):
        print_header("Z.AI / GLM API Key")
        pconfig = PROVIDER_REGISTRY["zai"]
        print_info(f"Provider: {pconfig.name}")
-        print_info(f"Base URL: {pconfig.inference_base_url}")
        print_info("Get your API key at: https://open.bigmodel.cn/")
        print()

        existing_key = get_env_value("GLM_API_KEY") or get_env_value("ZAI_API_KEY")
+        api_key = existing_key  # will be overwritten if user enters a new one
        if existing_key:
            print_info(f"Current: {existing_key[:8]}... (configured)")
            if prompt_yes_no("Update API key?", False):
-                api_key = prompt("  GLM API key", password=True)
-                if api_key:
+                new_key = prompt("  GLM API key", password=True)
+                if new_key:
+                    api_key = new_key
                    save_env_value("GLM_API_KEY", api_key)
                    print_success("GLM API key updated")
        else:
@@ -687,11 +688,32 @@ def setup_model_provider(config: dict):
            else:
                print_warning("Skipped - agent won't work without an API key")

+        # Detect the correct z.ai endpoint for this key.
+        # Z.AI has separate billing for general vs coding plans and
+        # global vs China endpoints — we probe to find the right one.
+        zai_base_url = pconfig.inference_base_url
+        if api_key:
+            print()
+            print_info("Detecting your z.ai endpoint...")
+            from hermes_cli.auth import detect_zai_endpoint
+            detected = detect_zai_endpoint(api_key)
+            if detected:
+                zai_base_url = detected["base_url"]
+                print_success(f"Detected: {detected['label']} endpoint")
+                print_info(f"  URL: {detected['base_url']}")
+                if detected["id"].startswith("coding"):
+                    print_info(f"  Note: Coding Plan detected — GLM-5 is not available, using {detected['model']}")
+                save_env_value("GLM_BASE_URL", zai_base_url)
+            else:
+                print_warning("Could not verify any z.ai endpoint with this key.")
+                print_info(f"  Using default: {zai_base_url}")
+                print_info("  If you get billing errors, check your plan at https://open.bigmodel.cn/")
+
        # Clear custom endpoint vars if switching
        if existing_custom:
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
-        _update_config_for_provider("zai", pconfig.inference_base_url)
+        _update_config_for_provider("zai", zai_base_url)

    elif provider_idx == 5:  # Kimi / Moonshot
        selected_provider = "kimi-coding"
@@ -838,9 +860,18 @@ def setup_model_provider(config: dict):
                    config['model'] = model_name
            # else: keep current

+        elif selected_provider == "nous":
+            # Nous login succeeded but model fetch failed — prompt manually
+            # instead of falling through to the OpenRouter static list.
+            print_warning("Could not fetch available models from Nous Portal.")
+            print_info("Enter a Nous model name manually (e.g., claude-opus-4-6).")
+            custom = prompt(f"  Model name (Enter to keep '{current_model}')")
+            if custom:
+                config['model'] = custom
+                save_env_value("LLM_MODEL", custom)
        elif selected_provider == "openai-codex":
-            from hermes_cli.codex_models import get_codex_models
-            codex_models = get_codex_models()
+            from hermes_cli.codex_models import get_codex_model_ids
+            codex_models = get_codex_model_ids()
            model_choices = codex_models + [f"Keep current ({current_model})"]
            default_codex = 0
            if current_model in codex_models:
@@ -859,7 +890,12 @@ def setup_model_provider(config: dict):
                    save_env_value("LLM_MODEL", custom)
            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
        elif selected_provider == "zai":
-            zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
+            # Coding Plan endpoints don't have GLM-5
+            is_coding_plan = get_env_value("GLM_BASE_URL") and "coding" in (get_env_value("GLM_BASE_URL") or "")
+            if is_coding_plan:
+                zai_models = ["glm-4.7", "glm-4.5", "glm-4.5-flash"]
+            else:
+                zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
            model_choices = list(zai_models)
            model_choices.append("Custom model")
            model_choices.append(f"Keep current ({current_model})")
@@ -1228,7 +1264,7 @@ def setup_agent_settings(config: dict):
    # ── Max Iterations ──
    print_header("Agent Settings")

-    current_max = get_env_value('HERMES_MAX_ITERATIONS') or '60'
+    current_max = get_env_value('HERMES_MAX_ITERATIONS') or '90'
    print_info("Maximum tool-calling iterations per conversation.")
    print_info("Higher = more complex tasks, but costs more tokens.")
    print_info("Recommended: 30-60 for most tasks, 100+ for open exploration.")
@@ -1624,14 +1660,18 @@ def setup_gateway(config: dict):
 # Section 5: Tool Configuration (delegates to unified tools_config.py)
 # =============================================================================

-def setup_tools(config: dict):
+def setup_tools(config: dict, first_install: bool = False):
    """Configure tools — delegates to the unified tools_command() in tools_config.py.
    
    Both `hermes setup tools` and `hermes tools` use the same flow:
    platform selection → toolset toggles → provider/API key configuration.
+    
+    Args:
+        first_install: When True, uses the simplified first-install flow
+            (no platform menu, prompts for all unconfigured API keys).
    """
    from hermes_cli.tools_config import tools_command
-    tools_command()
+    tools_command(first_install=first_install, config=config)


 # =============================================================================
@@ -1784,7 +1824,7 @@ def run_setup_wizard(args):
    setup_gateway(config)

    # Section 5: Tools
-    setup_tools(config)
+    setup_tools(config, first_install=not is_existing)

    # Save and show summary
    save_config(config)
--- a/hermes_cli/skills_hub.py
+++ b/hermes_cli/skills_hub.py
@@ -408,10 +408,11 @@ def do_inspect(identifier: str, console: Optional[Console] = None) -> None:

 def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
    """List installed skills, distinguishing builtins from hub-installed."""
-    from tools.skills_hub import HubLockFile, SKILLS_DIR
+    from tools.skills_hub import HubLockFile, ensure_hub_dirs
    from tools.skills_tool import _find_all_skills

    c = console or _console
+    ensure_hub_dirs()
    lock = HubLockFile()
    hub_installed = {e["name"]: e for e in lock.list_installed()}

--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -206,6 +206,8 @@ def show_status(args):
        "Telegram": ("TELEGRAM_BOT_TOKEN", "TELEGRAM_HOME_CHANNEL"),
        "Discord": ("DISCORD_BOT_TOKEN", "DISCORD_HOME_CHANNEL"),
        "WhatsApp": ("WHATSAPP_ENABLED", None),
+        "Signal": ("SIGNAL_HTTP_URL", "SIGNAL_HOME_CHANNEL"),
+        "Slack": ("SLACK_BOT_TOKEN", None),
    }
    
    for name, (token_var, home_var) in platforms.items():
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -96,6 +96,11 @@ CONFIGURABLE_TOOLSETS = [
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
 ]

+# Toolsets that are OFF by default for new installs.
+# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
+# but the setup checklist won't pre-select them for first-time users.
+_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"}
+
 # Platform display config
 PLATFORMS = {
    "cli":      {"label": "🖥️  CLI",       "default_toolset": "hermes-cli"},
@@ -142,6 +147,8 @@ TOOL_CATEGORIES = {
    },
    "web": {
        "name": "Web Search & Extract",
+        "setup_title": "Select Search Provider",
+        "setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need Firecrawl.",
        "icon": "🔍",
        "providers": [
            {
@@ -308,7 +315,7 @@ def _get_platform_tools(config: dict, platform: str) -> Set[str]:
    platform_toolsets = config.get("platform_toolsets", {})
    toolset_names = platform_toolsets.get(platform)

-    if not toolset_names or not isinstance(toolset_names, list):
+    if toolset_names is None or not isinstance(toolset_names, list):
        default_ts = PLATFORMS[platform]["default_toolset"]
        toolset_names = [default_ts]

@@ -358,46 +365,88 @@ def _toolset_has_keys(ts_key: str) -> bool:
 # ─── Menu Helpers ─────────────────────────────────────────────────────────────

 def _prompt_choice(question: str, choices: list, default: int = 0) -> int:
-    """Single-select menu (arrow keys)."""
-    print(color(question, Colors.YELLOW))
+    """Single-select menu (arrow keys). Uses curses to avoid simple_term_menu
+    rendering bugs in tmux, iTerm, and other non-standard terminals."""

+    # Curses-based single-select — works in tmux, iTerm, and standard terminals
    try:
-        from simple_term_menu import TerminalMenu
-        menu = TerminalMenu(
-            [f"  {c}" for c in choices],
-            cursor_index=default,
-            menu_cursor="→ ",
-            menu_cursor_style=("fg_green", "bold"),
-            menu_highlight_style=("fg_green",),
-            cycle_cursor=True,
-            clear_screen=False,
-        )
-        idx = menu.show()
-        if idx is None:
-            return default
-        print()
-        return idx
-    except (ImportError, NotImplementedError):
-        for i, c in enumerate(choices):
-            marker = "●" if i == default else "○"
-            style = Colors.GREEN if i == default else ""
-            print(color(f"  {marker} {c}", style) if style else f"  {marker} {c}")
-        while True:
-            try:
-                val = input(color(f"  Select [1-{len(choices)}] ({default + 1}): ", Colors.DIM))
-                if not val:
-                    return default
-                idx = int(val) - 1
-                if 0 <= idx < len(choices):
-                    return idx
-            except (ValueError, KeyboardInterrupt, EOFError):
-                print()
+        import curses
+        result_holder = [default]
+
+        def _curses_menu(stdscr):
+            curses.curs_set(0)
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)
+            cursor = default
+
+            while True:
+                stdscr.clear()
+                max_y, max_x = stdscr.getmaxyx()
+                try:
+                    stdscr.addnstr(0, 0, question, max_x - 1,
+                                   curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0))
+                except curses.error:
+                    pass
+
+                for i, c in enumerate(choices):
+                    y = i + 2
+                    if y >= max_y - 1:
+                        break
+                    arrow = "→" if i == cursor else " "
+                    line = f" {arrow}  {c}"
+                    attr = curses.A_NORMAL
+                    if i == cursor:
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(1)
+                    try:
+                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
+                    except curses.error:
+                        pass
+
+                stdscr.refresh()
+                key = stdscr.getch()
+
+                if key in (curses.KEY_UP, ord('k')):
+                    cursor = (cursor - 1) % len(choices)
+                elif key in (curses.KEY_DOWN, ord('j')):
+                    cursor = (cursor + 1) % len(choices)
+                elif key in (curses.KEY_ENTER, 10, 13):
+                    result_holder[0] = cursor
+                    return
+                elif key in (27, ord('q')):
+                    return
+
+        curses.wrapper(_curses_menu)
+        return result_holder[0]
+
+    except Exception:
+        pass
+
+    # Fallback: numbered input (Windows without curses, etc.)
+    print(color(question, Colors.YELLOW))
+    for i, c in enumerate(choices):
+        marker = "●" if i == default else "○"
+        style = Colors.GREEN if i == default else ""
+        print(color(f"  {marker} {i+1}. {c}", style) if style else f"  {marker} {i+1}. {c}")
+    while True:
+        try:
+            val = input(color(f"  Select [1-{len(choices)}] ({default + 1}): ", Colors.DIM))
+            if not val:
                return default
+            idx = int(val) - 1
+            if 0 <= idx < len(choices):
+                return idx
+        except (ValueError, KeyboardInterrupt, EOFError):
+            print()
+            return default


 def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
    """Multi-select checklist of toolsets. Returns set of selected toolset keys."""
-    import platform as _platform

    labels = []
    for ts_key, ts_label, ts_desc in CONFIGURABLE_TOOLSETS:
@@ -411,48 +460,8 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
        if ts_key in enabled
    ]

-    # simple_term_menu multi-select has rendering bugs on macOS terminals,
-    # so we use a curses-based fallback there.
-    use_term_menu = _platform.system() != "Darwin"
-
-    if use_term_menu:
-        try:
-            from simple_term_menu import TerminalMenu
-
-            print(color(f"Tools for {platform_label}", Colors.YELLOW))
-            print(color("  SPACE to toggle, ENTER to confirm.", Colors.DIM))
-            print()
-
-            menu_items = [f"  {label}" for label in labels]
-            menu = TerminalMenu(
-                menu_items,
-                multi_select=True,
-                show_multi_select_hint=False,
-                multi_select_cursor="[✓] ",
-                multi_select_select_on_accept=False,
-                multi_select_empty_ok=True,
-                preselected_entries=pre_selected_indices if pre_selected_indices else None,
-                menu_cursor="→ ",
-                menu_cursor_style=("fg_green", "bold"),
-                menu_highlight_style=("fg_green",),
-                cycle_cursor=True,
-                clear_screen=False,
-                clear_menu_on_exit=False,
-            )
-
-            menu.show()
-
-            if menu.chosen_menu_entries is None:
-                return enabled
-
-            selected_indices = list(menu.chosen_menu_indices or [])
-            return {CONFIGURABLE_TOOLSETS[i][0] for i in selected_indices}
-
-        except (ImportError, NotImplementedError):
-            pass  # fall through to curses/numbered fallback
-
    # Curses-based multi-select — arrow keys + space to toggle + enter to confirm.
-    # Used on macOS (where simple_term_menu ghosts) and as a fallback.
+    # simple_term_menu has rendering bugs in tmux, iTerm, and other terminals.
    try:
        import curses
        selected = set(pre_selected_indices)
@@ -593,11 +602,18 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
        print(color(f"  --- {icon} {name} ({provider['name']}) ---", Colors.CYAN))
        if provider.get("tag"):
            _print_info(f"  {provider['tag']}")
+        # For single-provider tools, show a note if available
+        if cat.get("setup_note"):
+            _print_info(f"  {cat['setup_note']}")
        _configure_provider(provider, config)
    else:
        # Multiple providers - let user choose
        print()
-        print(color(f"  --- {icon} {name} - Choose a provider ---", Colors.CYAN))
+        # Use custom title if provided (e.g. "Select Search Provider")
+        title = cat.get("setup_title", f"Choose a provider")
+        print(color(f"  --- {icon} {name} - {title} ---", Colors.CYAN))
+        if cat.get("setup_note"):
+            _print_info(f"  {cat['setup_note']}")
        print()

        # Plain text labels only (no ANSI codes in menu items)
@@ -615,6 +631,9 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
                    configured = " [configured]"
            provider_choices.append(f"{p['name']}{tag}{configured}")

+        # Add skip option
+        provider_choices.append("Skip — keep defaults / configure later")
+
        # Detect current provider as default
        default_idx = 0
        for i, p in enumerate(providers):
@@ -626,7 +645,13 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
                default_idx = i
                break

-        provider_idx = _prompt_choice("  Select provider:", provider_choices, default_idx)
+        provider_idx = _prompt_choice(f"  {title}:", provider_choices, default_idx)
+
+        # Skip selected
+        if provider_idx >= len(providers):
+            _print_info(f"  Skipped {name}")
+            return
+
        _configure_provider(providers[provider_idx], config)


@@ -833,9 +858,19 @@ def _reconfigure_simple_requirements(ts_key: str):

 # ─── Main Entry Point ─────────────────────────────────────────────────────────

-def tools_command(args=None):
-    """Entry point for `hermes tools` and `hermes setup tools`."""
-    config = load_config()
+def tools_command(args=None, first_install: bool = False, config: dict = None):
+    """Entry point for `hermes tools` and `hermes setup tools`.
+
+    Args:
+        first_install: When True (set by the setup wizard on fresh installs),
+            skip the platform menu, go straight to the CLI checklist, and
+            prompt for API keys on all enabled tools that need them.
+        config: Optional config dict to use.  When called from the setup
+            wizard, the wizard passes its own dict so that platform_toolsets
+            are written into it and survive the wizard's final save_config().
+    """
+    if config is None:
+        config = load_config()
    enabled_platforms = _get_enabled_platforms()

    print()
@@ -844,6 +879,57 @@ def tools_command(args=None):
    print(color("  Tools that need API keys will be configured when enabled.", Colors.DIM))
    print()

+    # ── First-time install: linear flow, no platform menu ──
+    if first_install:
+        for pkey in enabled_platforms:
+            pinfo = PLATFORMS[pkey]
+            current_enabled = _get_platform_tools(config, pkey)
+
+            # Uncheck toolsets that should be off by default
+            checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS
+
+            # Show checklist
+            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
+
+            added = new_enabled - current_enabled
+            removed = current_enabled - new_enabled
+            if added:
+                for ts in sorted(added):
+                    label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts), ts)
+                    print(color(f"  + {label}", Colors.GREEN))
+            if removed:
+                for ts in sorted(removed):
+                    label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts), ts)
+                    print(color(f"  - {label}", Colors.RED))
+
+            # Walk through ALL selected tools that have provider options or
+            # need API keys.  This ensures browser (Local vs Browserbase),
+            # TTS (Edge vs OpenAI vs ElevenLabs), etc. are shown even when
+            # a free provider exists.
+            to_configure = [
+                ts_key for ts_key in sorted(new_enabled)
+                if TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)
+            ]
+
+            if to_configure:
+                print()
+                print(color(f"  Configuring {len(to_configure)} tool(s):", Colors.YELLOW))
+                for ts_key in to_configure:
+                    label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key)
+                    print(color(f"    • {label}", Colors.DIM))
+                print(color("  You can skip any tool you don't need right now.", Colors.DIM))
+                print()
+                for ts_key in to_configure:
+                    _configure_toolset(ts_key, config)
+
+            _save_platform_tools(config, pkey, new_enabled)
+            save_config(config)
+            print(color(f"  ✓ Saved {pinfo['label']} tool configuration", Colors.GREEN))
+            print()
+
+        return
+
+    # ── Returning user: platform menu loop ──
    # Build platform choices
    platform_choices = []
    platform_keys = []
@@ -894,11 +980,10 @@ def tools_command(args=None):
                    print(color(f"  - {label}", Colors.RED))

            # Configure newly enabled toolsets that need API keys
-            if added:
-                for ts_key in sorted(added):
-                    if TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key):
-                        if not _toolset_has_keys(ts_key):
-                            _configure_toolset(ts_key, config)
+            for ts_key in sorted(added):
+                if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
+                    if not _toolset_has_keys(ts_key):
+                        _configure_toolset(ts_key, config)

            _save_platform_tools(config, pkey, new_enabled)
            save_config(config)
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -24,7 +24,7 @@ from typing import Dict, Any, List, Optional

 DEFAULT_DB_PATH = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "state.db"

-SCHEMA_VERSION = 2
+SCHEMA_VERSION = 4

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -46,6 +46,7 @@ CREATE TABLE IF NOT EXISTS sessions (
    tool_call_count INTEGER DEFAULT 0,
    input_tokens INTEGER DEFAULT 0,
    output_tokens INTEGER DEFAULT 0,
+    title TEXT,
    FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );

@@ -133,7 +134,33 @@ class SessionDB:
                except sqlite3.OperationalError:
                    pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 2")
+            if current_version < 3:
+                # v3: add title column to sessions
+                try:
+                    cursor.execute("ALTER TABLE sessions ADD COLUMN title TEXT")
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 3")
+            if current_version < 4:
+                # v4: add unique index on title (NULLs allowed, only non-NULL must be unique)
+                try:
+                    cursor.execute(
+                        "CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique "
+                        "ON sessions(title) WHERE title IS NOT NULL"
+                    )
+                except sqlite3.OperationalError:
+                    pass  # Index already exists
+                cursor.execute("UPDATE schema_version SET version = 4")

+        # Unique title index — always ensure it exists (safe to run after migrations
+        # since the title column is guaranteed to exist at this point)
+        try:
+            cursor.execute(
+                "CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique "
+                "ON sessions(title) WHERE title IS NOT NULL"
+            )
+        except sqlite3.OperationalError:
+            pass  # Index already exists

        # FTS5 setup (separate because CREATE VIRTUAL TABLE can't be in executescript with IF NOT EXISTS reliably)
        try:
@@ -219,6 +246,210 @@ class SessionDB:
        row = cursor.fetchone()
        return dict(row) if row else None

+    # Maximum length for session titles
+    MAX_TITLE_LENGTH = 100
+
+    @staticmethod
+    def sanitize_title(title: Optional[str]) -> Optional[str]:
+        """Validate and sanitize a session title.
+
+        - Strips leading/trailing whitespace
+        - Removes ASCII control characters (0x00-0x1F, 0x7F) and problematic
+          Unicode control chars (zero-width, RTL/LTR overrides, etc.)
+        - Collapses internal whitespace runs to single spaces
+        - Normalizes empty/whitespace-only strings to None
+        - Enforces MAX_TITLE_LENGTH
+
+        Returns the cleaned title string or None.
+        Raises ValueError if the title exceeds MAX_TITLE_LENGTH after cleaning.
+        """
+        if not title:
+            return None
+
+        import re
+
+        # Remove ASCII control characters (0x00-0x1F, 0x7F) but keep
+        # whitespace chars (\t=0x09, \n=0x0A, \r=0x0D) so they can be
+        # normalized to spaces by the whitespace collapsing step below
+        cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', title)
+
+        # Remove problematic Unicode control characters:
+        # - Zero-width chars (U+200B-U+200F, U+FEFF)
+        # - Directional overrides (U+202A-U+202E, U+2066-U+2069)
+        # - Object replacement (U+FFFC), interlinear annotation (U+FFF9-U+FFFB)
+        cleaned = re.sub(
+            r'[\u200b-\u200f\u2028-\u202e\u2060-\u2069\ufeff\ufffc\ufff9-\ufffb]',
+            '', cleaned,
+        )
+
+        # Collapse internal whitespace runs and strip
+        cleaned = re.sub(r'\s+', ' ', cleaned).strip()
+
+        if not cleaned:
+            return None
+
+        if len(cleaned) > SessionDB.MAX_TITLE_LENGTH:
+            raise ValueError(
+                f"Title too long ({len(cleaned)} chars, max {SessionDB.MAX_TITLE_LENGTH})"
+            )
+
+        return cleaned
+
+    def set_session_title(self, session_id: str, title: str) -> bool:
+        """Set or update a session's title.
+
+        Returns True if session was found and title was set.
+        Raises ValueError if title is already in use by another session,
+        or if the title fails validation (too long, invalid characters).
+        Empty/whitespace-only strings are normalized to None (clearing the title).
+        """
+        title = self.sanitize_title(title)
+        if title:
+            # Check uniqueness (allow the same session to keep its own title)
+            cursor = self._conn.execute(
+                "SELECT id FROM sessions WHERE title = ? AND id != ?",
+                (title, session_id),
+            )
+            conflict = cursor.fetchone()
+            if conflict:
+                raise ValueError(
+                    f"Title '{title}' is already in use by session {conflict['id']}"
+                )
+        cursor = self._conn.execute(
+            "UPDATE sessions SET title = ? WHERE id = ?",
+            (title, session_id),
+        )
+        self._conn.commit()
+        return cursor.rowcount > 0
+
+    def get_session_title(self, session_id: str) -> Optional[str]:
+        """Get the title for a session, or None."""
+        cursor = self._conn.execute(
+            "SELECT title FROM sessions WHERE id = ?", (session_id,)
+        )
+        row = cursor.fetchone()
+        return row["title"] if row else None
+
+    def get_session_by_title(self, title: str) -> Optional[Dict[str, Any]]:
+        """Look up a session by exact title. Returns session dict or None."""
+        cursor = self._conn.execute(
+            "SELECT * FROM sessions WHERE title = ?", (title,)
+        )
+        row = cursor.fetchone()
+        return dict(row) if row else None
+
+    def resolve_session_by_title(self, title: str) -> Optional[str]:
+        """Resolve a title to a session ID, preferring the latest in a lineage.
+
+        If the exact title exists, returns that session's ID.
+        If not, searches for "title #N" variants and returns the latest one.
+        If the exact title exists AND numbered variants exist, returns the
+        latest numbered variant (the most recent continuation).
+        """
+        # First try exact match
+        exact = self.get_session_by_title(title)
+
+        # Also search for numbered variants: "title #2", "title #3", etc.
+        # Escape SQL LIKE wildcards (%, _) in the title to prevent false matches
+        escaped = title.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
+        cursor = self._conn.execute(
+            "SELECT id, title, started_at FROM sessions "
+            "WHERE title LIKE ? ESCAPE '\\' ORDER BY started_at DESC",
+            (f"{escaped} #%",),
+        )
+        numbered = cursor.fetchall()
+
+        if numbered:
+            # Return the most recent numbered variant
+            return numbered[0]["id"]
+        elif exact:
+            return exact["id"]
+        return None
+
+    def get_next_title_in_lineage(self, base_title: str) -> str:
+        """Generate the next title in a lineage (e.g., "my session" → "my session #2").
+
+        Strips any existing " #N" suffix to find the base name, then finds
+        the highest existing number and increments.
+        """
+        import re
+        # Strip existing #N suffix to find the true base
+        match = re.match(r'^(.*?) #(\d+)$', base_title)
+        if match:
+            base = match.group(1)
+        else:
+            base = base_title
+
+        # Find all existing numbered variants
+        # Escape SQL LIKE wildcards (%, _) in the base to prevent false matches
+        escaped = base.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
+        cursor = self._conn.execute(
+            "SELECT title FROM sessions WHERE title = ? OR title LIKE ? ESCAPE '\\'",
+            (base, f"{escaped} #%"),
+        )
+        existing = [row["title"] for row in cursor.fetchall()]
+
+        if not existing:
+            return base  # No conflict, use the base name as-is
+
+        # Find the highest number
+        max_num = 1  # The unnumbered original counts as #1
+        for t in existing:
+            m = re.match(r'^.* #(\d+)$', t)
+            if m:
+                max_num = max(max_num, int(m.group(1)))
+
+        return f"{base} #{max_num + 1}"
+
+    def list_sessions_rich(
+        self,
+        source: str = None,
+        limit: int = 20,
+        offset: int = 0,
+    ) -> List[Dict[str, Any]]:
+        """List sessions with preview (first user message) and last active timestamp.
+
+        Returns dicts with keys: id, source, model, title, started_at, ended_at,
+        message_count, preview (first 60 chars of first user message),
+        last_active (timestamp of last message).
+
+        Uses a single query with correlated subqueries instead of N+2 queries.
+        """
+        source_clause = "WHERE s.source = ?" if source else ""
+        query = f"""
+            SELECT s.*,
+                COALESCE(
+                    (SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63)
+                     FROM messages m
+                     WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL
+                     ORDER BY m.timestamp, m.id LIMIT 1),
+                    ''
+                ) AS _preview_raw,
+                COALESCE(
+                    (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id),
+                    s.started_at
+                ) AS last_active
+            FROM sessions s
+            {source_clause}
+            ORDER BY s.started_at DESC
+            LIMIT ? OFFSET ?
+        """
+        params = (source, limit, offset) if source else (limit, offset)
+        cursor = self._conn.execute(query, params)
+        sessions = []
+        for row in cursor.fetchall():
+            s = dict(row)
+            # Build the preview from the raw substring
+            raw = s.pop("_preview_raw", "").strip()
+            if raw:
+                text = raw[:60]
+                s["preview"] = text + ("..." if len(raw) > 60 else "")
+            else:
+                s["preview"] = ""
+            sessions.append(s)
+
+        return sessions
+
    # =========================================================================
    # Message storage
    # =========================================================================
--- a/mini_swe_runner.py
+++ b/mini_swe_runner.py
@@ -149,7 +149,7 @@ class MiniSWERunner:
    
    def __init__(
        self,
-        model: str = "anthropic/claude-sonnet-4-20250514",
+        model: str = "anthropic/claude-sonnet-4.6",
        base_url: str = None,
        api_key: str = None,
        env_type: str = "local",
@@ -200,13 +200,7 @@ class MiniSWERunner:
        else:
            client_kwargs["base_url"] = "https://openrouter.ai/api/v1"

-        if base_url and "api.anthropic.com" in base_url.strip().lower():
-            raise ValueError(
-                "Anthropic's native /v1/messages API is not supported yet (planned for a future release). "
-                "Hermes currently requires OpenAI-compatible /chat/completions endpoints. "
-                "To use Claude models now, route through OpenRouter (OPENROUTER_API_KEY) "
-                "or any OpenAI-compatible proxy that wraps the Anthropic API."
-            )
+
        
        # Handle API key - OpenRouter is the primary provider
        if api_key:
--- a/optional-skills/blockchain/solana/SKILL.md
+++ b/optional-skills/blockchain/solana/SKILL.md
@@ -0,0 +1,207 @@
+---
+name: solana
+description: Query Solana blockchain data with USD pricing — wallet balances, token portfolios with values, transaction details, NFTs, whale detection, and live network stats. Uses Solana RPC + CoinGecko. No API key required.
+version: 0.2.0
+author: Deniz Alagoz (gizdusum), enhanced by Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [Solana, Blockchain, Crypto, Web3, RPC, DeFi, NFT]
+    related_skills: []
+---
+
+# Solana Blockchain Skill
+
+Query Solana on-chain data enriched with USD pricing via CoinGecko.
+8 commands: wallet portfolio, token info, transactions, activity, NFTs,
+whale detection, network stats, and price lookup.
+
+No API key needed. Uses only Python standard library (urllib, json, argparse).
+
+---
+
+## When to Use
+
+- User asks for a Solana wallet balance, token holdings, or portfolio value
+- User wants to inspect a specific transaction by signature
+- User wants SPL token metadata, price, supply, or top holders
+- User wants recent transaction history for an address
+- User wants NFTs owned by a wallet
+- User wants to find large SOL transfers (whale detection)
+- User wants Solana network health, TPS, epoch, or SOL price
+- User asks "what's the price of BONK/JUP/SOL?"
+
+---
+
+## Prerequisites
+
+The helper script uses only Python standard library (urllib, json, argparse).
+No external packages required.
+
+Pricing data comes from CoinGecko's free API (no key needed, rate-limited
+to ~10-30 requests/minute). For faster lookups, use `--no-prices` flag.
+
+---
+
+## Quick Reference
+
+RPC endpoint (default): https://api.mainnet-beta.solana.com
+Override: export SOLANA_RPC_URL=https://your-private-rpc.com
+
+Helper script path: ~/.hermes/skills/blockchain/solana/scripts/solana_client.py
+
+```
+python3 solana_client.py wallet   <address> [--limit N] [--all] [--no-prices]
+python3 solana_client.py tx       <signature>
+python3 solana_client.py token    <mint_address>
+python3 solana_client.py activity <address> [--limit N]
+python3 solana_client.py nft      <address>
+python3 solana_client.py whales   [--min-sol N]
+python3 solana_client.py stats
+python3 solana_client.py price    <mint_or_symbol>
+```
+
+---
+
+## Procedure
+
+### 0. Setup Check
+
+```bash
+python3 --version
+
+# Optional: set a private RPC for better rate limits
+export SOLANA_RPC_URL="https://api.mainnet-beta.solana.com"
+
+# Confirm connectivity
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py stats
+```
+
+### 1. Wallet Portfolio
+
+Get SOL balance, SPL token holdings with USD values, NFT count, and
+portfolio total. Tokens sorted by value, dust filtered, known tokens
+labeled by name (BONK, JUP, USDC, etc.).
+
+```bash
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \
+  wallet 9WzDXwBbmkg8ZTbNMqUxvQRAyrZzDsGYdLVL9zYtAWWM
+```
+
+Flags:
+- `--limit N` — show top N tokens (default: 20)
+- `--all` — show all tokens, no dust filter, no limit
+- `--no-prices` — skip CoinGecko price lookups (faster, RPC-only)
+
+Output includes: SOL balance + USD value, token list with prices sorted
+by value, dust count, NFT summary, total portfolio value in USD.
+
+### 2. Transaction Details
+
+Inspect a full transaction by its base58 signature. Shows balance changes
+in both SOL and USD.
+
+```bash
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \
+  tx 5j7s8K...your_signature_here
+```
+
+Output: slot, timestamp, fee, status, balance changes (SOL + USD),
+program invocations.
+
+### 3. Token Info
+
+Get SPL token metadata, current price, market cap, supply, decimals,
+mint/freeze authorities, and top 5 holders.
+
+```bash
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \
+  token DezXAZ8z7PnrnRJjz3wXBoRgixCa6xjnB7YaB1pPB263
+```
+
+Output: name, symbol, decimals, supply, price, market cap, top 5
+holders with percentages.
+
+### 4. Recent Activity
+
+List recent transactions for an address (default: last 10, max: 25).
+
+```bash
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \
+  activity 9WzDXwBbmkg8ZTbNMqUxvQRAyrZzDsGYdLVL9zYtAWWM --limit 25
+```
+
+### 5. NFT Portfolio
+
+List NFTs owned by a wallet (heuristic: SPL tokens with amount=1, decimals=0).
+
+```bash
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \
+  nft 9WzDXwBbmkg8ZTbNMqUxvQRAyrZzDsGYdLVL9zYtAWWM
+```
+
+Note: Compressed NFTs (cNFTs) are not detected by this heuristic.
+
+### 6. Whale Detector
+
+Scan the most recent block for large SOL transfers with USD values.
+
+```bash
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \
+  whales --min-sol 500
+```
+
+Note: scans the latest block only — point-in-time snapshot, not historical.
+
+### 7. Network Stats
+
+Live Solana network health: current slot, epoch, TPS, supply, validator
+version, SOL price, and market cap.
+
+```bash
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py stats
+```
+
+### 8. Price Lookup
+
+Quick price check for any token by mint address or known symbol.
+
+```bash
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py price BONK
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py price JUP
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py price SOL
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py price DezXAZ8z7PnrnRJjz3wXBoRgixCa6xjnB7YaB1pPB263
+```
+
+Known symbols: SOL, USDC, USDT, BONK, JUP, WETH, JTO, mSOL, stSOL,
+PYTH, HNT, RNDR, WEN, W, TNSR, DRIFT, bSOL, JLP, WIF, MEW, BOME, PENGU.
+
+---
+
+## Pitfalls
+
+- **CoinGecko rate-limits** — free tier allows ~10-30 requests/minute.
+  Price lookups use 1 request per token. Wallets with many tokens may
+  not get prices for all of them. Use `--no-prices` for speed.
+- **Public RPC rate-limits** — Solana mainnet public RPC limits requests.
+  For production use, set SOLANA_RPC_URL to a private endpoint
+  (Helius, QuickNode, Triton).
+- **NFT detection is heuristic** — amount=1 + decimals=0. Compressed
+  NFTs (cNFTs) and Token-2022 NFTs won't appear.
+- **Whale detector scans latest block only** — not historical. Results
+  vary by the moment you query.
+- **Transaction history** — public RPC keeps ~2 days. Older transactions
+  may not be available.
+- **Token names** — ~25 well-known tokens are labeled by name. Others
+  show abbreviated mint addresses. Use the `token` command for full info.
+- **Retry on 429** — both RPC and CoinGecko calls retry up to 2 times
+  with exponential backoff on rate-limit errors.
+
+---
+
+## Verification
+
+```bash
+# Should print current Solana slot, TPS, and SOL price
+python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py stats
+```
--- a/optional-skills/blockchain/solana/scripts/solana_client.py
+++ b/optional-skills/blockchain/solana/scripts/solana_client.py
@@ -0,0 +1,698 @@
+#!/usr/bin/env python3
+"""
+Solana Blockchain CLI Tool for Hermes Agent
+--------------------------------------------
+Queries the Solana JSON-RPC API and CoinGecko for enriched on-chain data.
+Uses only Python standard library — no external packages required.
+
+Usage:
+  python3 solana_client.py stats
+  python3 solana_client.py wallet   <address> [--limit N] [--all] [--no-prices]
+  python3 solana_client.py tx       <signature>
+  python3 solana_client.py token    <mint_address>
+  python3 solana_client.py activity <address> [--limit N]
+  python3 solana_client.py nft      <address>
+  python3 solana_client.py whales   [--min-sol N]
+  python3 solana_client.py price    <mint_address_or_symbol>
+
+Environment:
+  SOLANA_RPC_URL  Override the default RPC endpoint (default: mainnet-beta public)
+"""
+
+import argparse
+import json
+import os
+import sys
+import time
+import urllib.request
+import urllib.error
+from typing import Any, Dict, List, Optional
+
+RPC_URL = os.environ.get(
+    "SOLANA_RPC_URL",
+    "https://api.mainnet-beta.solana.com",
+)
+
+LAMPORTS_PER_SOL = 1_000_000_000
+
+# Well-known Solana token names — avoids API calls for common tokens.
+# Maps mint address → (symbol, name).
+KNOWN_TOKENS: Dict[str, tuple] = {
+    "So11111111111111111111111111111111111111112":  ("SOL",   "Solana"),
+    "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v": ("USDC",  "USD Coin"),
+    "Es9vMFrzaCERmJfrF4H2FYD4KCoNkY11McCe8BenwNYB":  ("USDT",  "Tether"),
+    "DezXAZ8z7PnrnRJjz3wXBoRgixCa6xjnB7YaB1pPB263": ("BONK",  "Bonk"),
+    "JUPyiwrYJFskUPiHa7hkeR8VUtAeFoSYbKedZNsDvCN":  ("JUP",   "Jupiter"),
+    "7vfCXTUXx5WJV5JADk17DUJ4ksgau7utNKj4b963voxs": ("WETH",  "Wrapped Ether"),
+    "jtojtomepa8beP8AuQc6eXt5FriJwfFMwQx2v2f9mCL":  ("JTO",   "Jito"),
+    "mSoLzYCxHdYgdzU16g5QSh3i5K3z3KZK7ytfqcJm7So":  ("mSOL",  "Marinade Staked SOL"),
+    "7dHbWXmci3dT8UFYWYZweBLXgycu7Y3iL6trKn1Y7ARj": ("stSOL", "Lido Staked SOL"),
+    "HZ1JovNiVvGrGNiiYvEozEVgZ58xaU3RKwX8eACQBCt3": ("PYTH",  "Pyth Network"),
+    "RLBxxFkseAZ4RgJH3Sqn8jXxhmGoz9jWxDNJMh8pL7a":  ("RLBB",  "Rollbit"),
+    "hntyVP6YFm1Hg25TN9WGLqM12b8TQmcknKrdu1oxWux":  ("HNT",   "Helium"),
+    "rndrizKT3MK1iimdxRdWabcF7Zg7AR5T4nud4EkHBof":  ("RNDR",  "Render"),
+    "WENWENvqqNya429ubCdR81ZmD69brwQaaBYY6p91oHQQ":  ("WEN",   "Wen"),
+    "85VBFQZC9TZkfaptBWjvUw7YbZjy52A6mjtPGjstQAmQ": ("W",     "Wormhole"),
+    "TNSRxcUxoT9xBG3de7PiJyTDYu7kskLqcpddxnEJAS6":  ("TNSR",  "Tensor"),
+    "DriFtupJYLTosbwoN8koMbEYSx54aFAVLddWsbksjwg7":  ("DRIFT", "Drift"),
+    "bSo13r4TkiE4KumL71LsHTPpL2euBYLFx6h9HP3piy1":  ("bSOL",  "BlazeStake Staked SOL"),
+    "27G8MtK7VtTcCHkpASjSDdkWWYfoqT6ggEuKidVJidD4": ("JLP",   "Jupiter LP"),
+    "EKpQGSJtjMFqKZ9KQanSqYXRcF8fBopzLHYxdM65zcjm": ("WIF",   "dogwifhat"),
+    "MEW1gQWJ3nEXg2qgERiKu7FAFj79PHvQVREQUzScPP5":  ("MEW",   "cat in a dogs world"),
+    "ukHH6c7mMyiWCf1b9pnWe25TSpkDDt3H5pQZgZ74J82":  ("BOME",  "Book of Meme"),
+    "A8C3xuqscfmyLrte3VwJvtPHXvcSN3FjDbUaSMAkQrCS": ("PENGU", "Pudgy Penguins"),
+}
+
+# Reverse lookup: symbol → mint (for the `price` command).
+_SYMBOL_TO_MINT = {v[0].upper(): k for k, v in KNOWN_TOKENS.items()}
+
+
+# ---------------------------------------------------------------------------
+# HTTP / RPC helpers
+# ---------------------------------------------------------------------------
+
+def _http_get_json(url: str, timeout: int = 10, retries: int = 2) -> Any:
+    """GET JSON from a URL with retry on 429 rate-limit. Returns parsed JSON or None."""
+    for attempt in range(retries + 1):
+        req = urllib.request.Request(
+            url, headers={"Accept": "application/json", "User-Agent": "HermesAgent/1.0"},
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as resp:
+                return json.load(resp)
+        except urllib.error.HTTPError as exc:
+            if exc.code == 429 and attempt < retries:
+                time.sleep(2.0 * (attempt + 1))
+                continue
+            return None
+        except Exception:
+            return None
+    return None
+
+
+def _rpc_call(method: str, params: list = None, retries: int = 2) -> Any:
+    """Send a JSON-RPC request with retry on 429 rate-limit."""
+    payload = json.dumps({
+        "jsonrpc": "2.0", "id": 1,
+        "method": method, "params": params or [],
+    }).encode()
+
+    for attempt in range(retries + 1):
+        req = urllib.request.Request(
+            RPC_URL, data=payload,
+            headers={"Content-Type": "application/json"}, method="POST",
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=20) as resp:
+                body = json.load(resp)
+            if "error" in body:
+                err = body["error"]
+                # Rate-limit: retry after delay
+                if isinstance(err, dict) and err.get("code") == 429:
+                    if attempt < retries:
+                        time.sleep(1.5 * (attempt + 1))
+                        continue
+                sys.exit(f"RPC error: {err}")
+            return body.get("result")
+        except urllib.error.HTTPError as exc:
+            if exc.code == 429 and attempt < retries:
+                time.sleep(1.5 * (attempt + 1))
+                continue
+            sys.exit(f"RPC HTTP error: {exc}")
+        except urllib.error.URLError as exc:
+            sys.exit(f"RPC connection error: {exc}")
+    return None
+
+
+# Keep backward compat — the rest of the code uses `rpc()`.
+rpc = _rpc_call
+
+
+def rpc_batch(calls: list) -> list:
+    """Send a batch of JSON-RPC requests (with retry on 429)."""
+    payload = json.dumps([
+        {"jsonrpc": "2.0", "id": i, "method": c["method"], "params": c.get("params", [])}
+        for i, c in enumerate(calls)
+    ]).encode()
+
+    for attempt in range(3):
+        req = urllib.request.Request(
+            RPC_URL, data=payload,
+            headers={"Content-Type": "application/json"}, method="POST",
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=20) as resp:
+                return json.load(resp)
+        except urllib.error.HTTPError as exc:
+            if exc.code == 429 and attempt < 2:
+                time.sleep(1.5 * (attempt + 1))
+                continue
+            sys.exit(f"RPC batch HTTP error: {exc}")
+        except urllib.error.URLError as exc:
+            sys.exit(f"RPC batch error: {exc}")
+    return []
+
+
+def lamports_to_sol(lamports: int) -> float:
+    return lamports / LAMPORTS_PER_SOL
+
+
+def print_json(obj: Any) -> None:
+    print(json.dumps(obj, indent=2))
+
+
+def _short_mint(mint: str) -> str:
+    """Abbreviate a mint address for display: first 4 + last 4."""
+    if len(mint) <= 12:
+        return mint
+    return f"{mint[:4]}...{mint[-4:]}"
+
+
+# ---------------------------------------------------------------------------
+# Price & token name helpers (CoinGecko — free, no API key)
+# ---------------------------------------------------------------------------
+
+def fetch_prices(mints: List[str], max_lookups: int = 20) -> Dict[str, float]:
+    """Fetch USD prices for mint addresses via CoinGecko (one per request).
+
+    CoinGecko free tier doesn't support batch Solana token lookups,
+    so we do individual calls — capped at *max_lookups* to stay within
+    rate limits. Returns {mint: usd_price}.
+    """
+    prices: Dict[str, float] = {}
+    for i, mint in enumerate(mints[:max_lookups]):
+        url = (
+            f"https://api.coingecko.com/api/v3/simple/token_price/solana"
+            f"?contract_addresses={mint}&vs_currencies=usd"
+        )
+        data = _http_get_json(url, timeout=10)
+        if data and isinstance(data, dict):
+            for addr, info in data.items():
+                if isinstance(info, dict) and "usd" in info:
+                    prices[mint] = info["usd"]
+                    break
+        # Pause between calls to respect CoinGecko free-tier rate-limits
+        if i < len(mints[:max_lookups]) - 1:
+            time.sleep(1.0)
+    return prices
+
+
+def fetch_sol_price() -> Optional[float]:
+    """Fetch current SOL price in USD via CoinGecko."""
+    data = _http_get_json(
+        "https://api.coingecko.com/api/v3/simple/price?ids=solana&vs_currencies=usd"
+    )
+    if data and "solana" in data:
+        return data["solana"].get("usd")
+    return None
+
+
+def resolve_token_name(mint: str) -> Optional[Dict[str, str]]:
+    """Look up token name and symbol from CoinGecko by mint address.
+
+    Returns {"name": ..., "symbol": ...} or None.
+    """
+    if mint in KNOWN_TOKENS:
+        sym, name = KNOWN_TOKENS[mint]
+        return {"symbol": sym, "name": name}
+    url = f"https://api.coingecko.com/api/v3/coins/solana/contract/{mint}"
+    data = _http_get_json(url, timeout=10)
+    if data and "symbol" in data:
+        return {"symbol": data["symbol"].upper(), "name": data.get("name", "")}
+    return None
+
+
+def _token_label(mint: str) -> str:
+    """Return a human-readable label for a mint: symbol if known, else abbreviated address."""
+    if mint in KNOWN_TOKENS:
+        return KNOWN_TOKENS[mint][0]
+    return _short_mint(mint)
+
+
+# ---------------------------------------------------------------------------
+# 1. Network Stats
+# ---------------------------------------------------------------------------
+
+def cmd_stats(_args):
+    """Live Solana network: slot, epoch, TPS, supply, version, SOL price."""
+    results = rpc_batch([
+        {"method": "getSlot"},
+        {"method": "getEpochInfo"},
+        {"method": "getRecentPerformanceSamples", "params": [1]},
+        {"method": "getSupply"},
+        {"method": "getVersion"},
+    ])
+
+    by_id = {r["id"]: r.get("result") for r in results}
+
+    slot         = by_id.get(0)
+    epoch_info   = by_id.get(1)
+    perf_samples = by_id.get(2)
+    supply       = by_id.get(3)
+    version      = by_id.get(4)
+
+    tps = None
+    if perf_samples:
+        s = perf_samples[0]
+        tps = round(s["numTransactions"] / s["samplePeriodSecs"], 1)
+
+    total_supply = lamports_to_sol(supply["value"]["total"])      if supply else None
+    circ_supply  = lamports_to_sol(supply["value"]["circulating"]) if supply else None
+
+    sol_price = fetch_sol_price()
+
+    out = {
+        "slot":                   slot,
+        "epoch":                  epoch_info.get("epoch")     if epoch_info else None,
+        "slot_in_epoch":          epoch_info.get("slotIndex") if epoch_info else None,
+        "tps":                    tps,
+        "total_supply_SOL":       round(total_supply, 2) if total_supply else None,
+        "circulating_supply_SOL": round(circ_supply, 2)  if circ_supply  else None,
+        "validator_version":      version.get("solana-core")  if version   else None,
+    }
+    if sol_price is not None:
+        out["sol_price_usd"] = sol_price
+        if circ_supply:
+            out["market_cap_usd"] = round(sol_price * circ_supply, 0)
+    print_json(out)
+
+
+# ---------------------------------------------------------------------------
+# 2. Wallet Info (enhanced with prices, sorting, filtering)
+# ---------------------------------------------------------------------------
+
+def cmd_wallet(args):
+    """SOL balance + SPL token holdings with USD values."""
+    address = args.address
+    show_all = getattr(args, "all", False)
+    limit = getattr(args, "limit", 20) or 20
+    skip_prices = getattr(args, "no_prices", False)
+
+    # Fetch SOL balance
+    balance_result = rpc("getBalance", [address])
+    sol_balance = lamports_to_sol(balance_result["value"])
+
+    # Fetch all SPL token accounts
+    token_result = rpc("getTokenAccountsByOwner", [
+        address,
+        {"programId": "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"},
+        {"encoding": "jsonParsed"},
+    ])
+
+    raw_tokens = []
+    for acct in (token_result.get("value") or []):
+        info = acct["account"]["data"]["parsed"]["info"]
+        ta = info["tokenAmount"]
+        amount = float(ta.get("uiAmountString") or 0)
+        if amount > 0:
+            raw_tokens.append({
+                "mint":     info["mint"],
+                "amount":   amount,
+                "decimals": ta["decimals"],
+            })
+
+    # Separate NFTs (amount=1, decimals=0) from fungible tokens
+    nfts = [t for t in raw_tokens if t["decimals"] == 0 and t["amount"] == 1]
+    fungible = [t for t in raw_tokens if not (t["decimals"] == 0 and t["amount"] == 1)]
+
+    # Fetch prices for fungible tokens (cap lookups to avoid API abuse)
+    sol_price = None
+    prices: Dict[str, float] = {}
+    if not skip_prices and fungible:
+        sol_price = fetch_sol_price()
+        # Prioritize known tokens, then a small sample of unknowns.
+        # CoinGecko free tier = 1 request per mint, so we cap lookups.
+        known_mints = [t["mint"] for t in fungible if t["mint"] in KNOWN_TOKENS]
+        other_mints = [t["mint"] for t in fungible if t["mint"] not in KNOWN_TOKENS][:15]
+        mints_to_price = known_mints + other_mints
+        if mints_to_price:
+            prices = fetch_prices(mints_to_price, max_lookups=30)
+
+    # Enrich tokens with labels and USD values
+    enriched = []
+    dust_count = 0
+    dust_value = 0.0
+    for t in fungible:
+        mint = t["mint"]
+        label = _token_label(mint)
+        usd_price = prices.get(mint)
+        usd_value = round(usd_price * t["amount"], 2) if usd_price else None
+
+        # Filter dust (< $0.01) unless --all
+        if not show_all and usd_value is not None and usd_value < 0.01:
+            dust_count += 1
+            dust_value += usd_value
+            continue
+
+        entry = {"token": label, "mint": mint, "amount": t["amount"]}
+        if usd_price is not None:
+            entry["price_usd"] = usd_price
+            entry["value_usd"] = usd_value
+        enriched.append(entry)
+
+    # Sort: tokens with known USD value first (highest→lowest), then unknowns
+    enriched.sort(key=lambda x: (x.get("value_usd") is not None, x.get("value_usd") or 0), reverse=True)
+
+    # Apply limit unless --all
+    total_tokens = len(enriched)
+    if not show_all and len(enriched) > limit:
+        enriched = enriched[:limit]
+
+    # Compute portfolio total
+    total_usd = sum(t.get("value_usd", 0) for t in enriched)
+    sol_value_usd = round(sol_price * sol_balance, 2) if sol_price else None
+    if sol_value_usd:
+        total_usd += sol_value_usd
+    total_usd += dust_value
+
+    output = {
+        "address":     address,
+        "sol_balance":  round(sol_balance, 9),
+    }
+    if sol_price:
+        output["sol_price_usd"] = sol_price
+        output["sol_value_usd"] = sol_value_usd
+    output["tokens_shown"] = len(enriched)
+    if total_tokens > len(enriched):
+        output["tokens_hidden"] = total_tokens - len(enriched)
+    output["spl_tokens"] = enriched
+    if dust_count > 0:
+        output["dust_filtered"] = {"count": dust_count, "total_value_usd": round(dust_value, 4)}
+    output["nft_count"] = len(nfts)
+    if nfts:
+        output["nfts"] = [_token_label(n["mint"]) + f" ({_short_mint(n['mint'])})" for n in nfts[:10]]
+        if len(nfts) > 10:
+            output["nfts"].append(f"... and {len(nfts) - 10} more")
+    if total_usd > 0:
+        output["portfolio_total_usd"] = round(total_usd, 2)
+
+    print_json(output)
+
+
+# ---------------------------------------------------------------------------
+# 3. Transaction Details
+# ---------------------------------------------------------------------------
+
+def cmd_tx(args):
+    """Full transaction details by signature."""
+    result = rpc("getTransaction", [
+        args.signature,
+        {"encoding": "jsonParsed", "maxSupportedTransactionVersion": 0},
+    ])
+
+    if result is None:
+        sys.exit("Transaction not found (may be too old for public RPC history).")
+
+    meta         = result.get("meta", {}) or {}
+    msg          = result.get("transaction", {}).get("message", {})
+    account_keys = msg.get("accountKeys", [])
+
+    pre  = meta.get("preBalances",  [])
+    post = meta.get("postBalances", [])
+
+    balance_changes = []
+    for i, key in enumerate(account_keys):
+        acct_key = key["pubkey"] if isinstance(key, dict) else key
+        if i < len(pre) and i < len(post):
+            change = lamports_to_sol(post[i] - pre[i])
+            if change != 0:
+                balance_changes.append({"account": acct_key, "change_SOL": round(change, 9)})
+
+    programs = []
+    for ix in msg.get("instructions", []):
+        prog = ix.get("programId")
+        if prog is None and "programIdIndex" in ix:
+            k = account_keys[ix["programIdIndex"]]
+            prog = k["pubkey"] if isinstance(k, dict) else k
+        if prog:
+            programs.append(prog)
+
+    # Add USD value for SOL changes
+    sol_price = fetch_sol_price()
+    if sol_price and balance_changes:
+        for bc in balance_changes:
+            bc["change_USD"] = round(bc["change_SOL"] * sol_price, 2)
+
+    print_json({
+        "signature":        args.signature,
+        "slot":             result.get("slot"),
+        "block_time":       result.get("blockTime"),
+        "fee_SOL":          lamports_to_sol(meta.get("fee", 0)),
+        "status":           "success" if meta.get("err") is None else "failed",
+        "balance_changes":  balance_changes,
+        "programs_invoked": list(dict.fromkeys(programs)),
+    })
+
+
+# ---------------------------------------------------------------------------
+# 4. Token Info (enhanced with name + price)
+# ---------------------------------------------------------------------------
+
+def cmd_token(args):
+    """SPL token metadata, supply, decimals, price, top holders."""
+    mint = args.mint
+
+    mint_info = rpc("getAccountInfo", [mint, {"encoding": "jsonParsed"}])
+    if mint_info is None or mint_info.get("value") is None:
+        sys.exit("Mint account not found.")
+
+    parsed       = mint_info["value"]["data"]["parsed"]["info"]
+    decimals     = parsed.get("decimals", 0)
+    supply_raw   = int(parsed.get("supply", 0))
+    supply_human = supply_raw / (10 ** decimals) if decimals else supply_raw
+
+    largest = rpc("getTokenLargestAccounts", [mint])
+    holders = []
+    for acct in (largest.get("value") or [])[:5]:
+        amount = float(acct.get("uiAmountString") or 0)
+        pct = round((amount / supply_human * 100), 4) if supply_human > 0 else 0
+        holders.append({
+            "account": acct["address"],
+            "amount":  amount,
+            "percent": pct,
+        })
+
+    # Resolve name + price
+    token_meta = resolve_token_name(mint)
+    price_data = fetch_prices([mint])
+
+    out = {"mint": mint}
+    if token_meta:
+        out["name"] = token_meta["name"]
+        out["symbol"] = token_meta["symbol"]
+    out["decimals"] = decimals
+    out["supply"] = round(supply_human, min(decimals, 6))
+    out["mint_authority"] = parsed.get("mintAuthority")
+    out["freeze_authority"] = parsed.get("freezeAuthority")
+    if mint in price_data:
+        out["price_usd"] = price_data[mint]
+        out["market_cap_usd"] = round(price_data[mint] * supply_human, 0)
+    out["top_5_holders"] = holders
+
+    print_json(out)
+
+
+# ---------------------------------------------------------------------------
+# 5. Recent Activity
+# ---------------------------------------------------------------------------
+
+def cmd_activity(args):
+    """Recent transaction signatures for an address."""
+    limit  = min(args.limit, 25)
+    result = rpc("getSignaturesForAddress", [args.address, {"limit": limit}])
+
+    txs = [
+        {
+            "signature": item["signature"],
+            "slot":       item.get("slot"),
+            "block_time": item.get("blockTime"),
+            "err":        item.get("err"),
+        }
+        for item in (result or [])
+    ]
+
+    print_json({"address": args.address, "transactions": txs})
+
+
+# ---------------------------------------------------------------------------
+# 6. NFT Portfolio
+# ---------------------------------------------------------------------------
+
+def cmd_nft(args):
+    """NFTs owned by a wallet (amount=1 && decimals=0 heuristic)."""
+    result = rpc("getTokenAccountsByOwner", [
+        args.address,
+        {"programId": "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"},
+        {"encoding": "jsonParsed"},
+    ])
+
+    nfts = [
+        acct["account"]["data"]["parsed"]["info"]["mint"]
+        for acct in (result.get("value") or [])
+        if acct["account"]["data"]["parsed"]["info"]["tokenAmount"]["decimals"] == 0
+        and int(acct["account"]["data"]["parsed"]["info"]["tokenAmount"]["amount"]) == 1
+    ]
+
+    print_json({
+        "address":   args.address,
+        "nft_count": len(nfts),
+        "nfts":      nfts,
+        "note":      "Heuristic only. Compressed NFTs (cNFTs) are not detected.",
+    })
+
+
+# ---------------------------------------------------------------------------
+# 7. Whale Detector (enhanced with USD values)
+# ---------------------------------------------------------------------------
+
+def cmd_whales(args):
+    """Scan the latest block for large SOL transfers."""
+    min_lamports = int(args.min_sol * LAMPORTS_PER_SOL)
+
+    slot  = rpc("getSlot")
+    block = rpc("getBlock", [
+        slot,
+        {
+            "encoding": "jsonParsed",
+            "transactionDetails": "full",
+            "maxSupportedTransactionVersion": 0,
+            "rewards": False,
+        },
+    ])
+
+    if block is None:
+        sys.exit("Could not retrieve latest block.")
+
+    sol_price = fetch_sol_price()
+
+    whales = []
+    for tx in (block.get("transactions") or []):
+        meta = tx.get("meta", {}) or {}
+        if meta.get("err") is not None:
+            continue
+
+        msg          = tx["transaction"].get("message", {})
+        account_keys = msg.get("accountKeys", [])
+        pre          = meta.get("preBalances",  [])
+        post         = meta.get("postBalances", [])
+
+        for i in range(len(pre)):
+            change = post[i] - pre[i]
+            if change >= min_lamports:
+                k        = account_keys[i]
+                receiver = k["pubkey"] if isinstance(k, dict) else k
+                sender   = None
+                for j in range(len(pre)):
+                    if pre[j] - post[j] >= min_lamports:
+                        sk     = account_keys[j]
+                        sender = sk["pubkey"] if isinstance(sk, dict) else sk
+                        break
+                entry = {
+                    "sender":     sender,
+                    "receiver":   receiver,
+                    "amount_SOL": round(lamports_to_sol(change), 4),
+                }
+                if sol_price:
+                    entry["amount_USD"] = round(lamports_to_sol(change) * sol_price, 2)
+                whales.append(entry)
+
+    out = {
+        "slot":              slot,
+        "min_threshold_SOL": args.min_sol,
+        "large_transfers":   whales,
+        "note":              "Scans latest block only — point-in-time snapshot.",
+    }
+    if sol_price:
+        out["sol_price_usd"] = sol_price
+    print_json(out)
+
+
+# ---------------------------------------------------------------------------
+# 8. Price Lookup
+# ---------------------------------------------------------------------------
+
+def cmd_price(args):
+    """Quick price lookup for a token by mint address or known symbol."""
+    query = args.token
+
+    # Check if it's a known symbol
+    mint = _SYMBOL_TO_MINT.get(query.upper(), query)
+
+    # Try to resolve name
+    token_meta = resolve_token_name(mint)
+
+    # Fetch price
+    prices = fetch_prices([mint])
+
+    out = {"query": query, "mint": mint}
+    if token_meta:
+        out["name"] = token_meta["name"]
+        out["symbol"] = token_meta["symbol"]
+    if mint in prices:
+        out["price_usd"] = prices[mint]
+    else:
+        out["price_usd"] = None
+        out["note"] = "Price not available — token may not be listed on CoinGecko."
+    print_json(out)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog="solana_client.py",
+        description="Solana blockchain query tool for Hermes Agent",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    sub.add_parser("stats", help="Network stats: slot, epoch, TPS, supply, SOL price")
+
+    p_wallet = sub.add_parser("wallet", help="SOL balance + SPL tokens with USD values")
+    p_wallet.add_argument("address")
+    p_wallet.add_argument("--limit", type=int, default=20,
+                          help="Max tokens to display (default: 20)")
+    p_wallet.add_argument("--all", action="store_true",
+                          help="Show all tokens (no limit, no dust filter)")
+    p_wallet.add_argument("--no-prices", action="store_true",
+                          help="Skip price lookups (faster, RPC-only)")
+
+    p_tx = sub.add_parser("tx", help="Transaction details by signature")
+    p_tx.add_argument("signature")
+
+    p_token = sub.add_parser("token", help="SPL token metadata, price, and top holders")
+    p_token.add_argument("mint")
+
+    p_activity = sub.add_parser("activity", help="Recent transactions for an address")
+    p_activity.add_argument("address")
+    p_activity.add_argument("--limit", type=int, default=10,
+                            help="Number of transactions (max 25, default 10)")
+
+    p_nft = sub.add_parser("nft", help="NFT portfolio for a wallet")
+    p_nft.add_argument("address")
+
+    p_whales = sub.add_parser("whales", help="Large SOL transfers in the latest block")
+    p_whales.add_argument("--min-sol", type=float, default=1000.0,
+                          help="Minimum SOL transfer size (default: 1000)")
+
+    p_price = sub.add_parser("price", help="Quick price lookup by mint or symbol")
+    p_price.add_argument("token", help="Mint address or known symbol (SOL, BONK, JUP, ...)")
+
+    args = parser.parse_args()
+
+    dispatch = {
+        "stats":    cmd_stats,
+        "wallet":   cmd_wallet,
+        "tx":       cmd_tx,
+        "token":    cmd_token,
+        "activity": cmd_activity,
+        "nft":      cmd_nft,
+        "whales":   cmd_whales,
+        "price":    cmd_price,
+    }
+    dispatch[args.command](args)
+
+
+if __name__ == "__main__":
+    main()
--- a/optional-skills/email/agentmail/SKILL.md
+++ b/optional-skills/email/agentmail/SKILL.md
@@ -0,0 +1,125 @@
+---
+name: agentmail
+description: Give the agent its own dedicated email inbox via AgentMail. Send, receive, and manage email autonomously using agent-owned email addresses (e.g. hermes-agent@agentmail.to).
+version: 1.0.0
+metadata:
+  hermes:
+    tags: [email, communication, agentmail, mcp]
+    category: email
+---
+
+# AgentMail — Agent-Owned Email Inboxes
+
+## Requirements
+
+- **AgentMail API key** (required) — sign up at https://console.agentmail.to (free tier: 3 inboxes, 3,000 emails/month; paid plans from $20/mo)
+- Node.js 18+ (for the MCP server)
+
+## When to Use
+Use this skill when you need to:
+- Give the agent its own dedicated email address
+- Send emails autonomously on behalf of the agent
+- Receive and read incoming emails
+- Manage email threads and conversations
+- Sign up for services or authenticate via email
+- Communicate with other agents or humans via email
+
+This is NOT for reading the user's personal email (use himalaya or Gmail for that).
+AgentMail gives the agent its own identity and inbox.
+
+## Setup
+
+### 1. Get an API Key
+- Go to https://console.agentmail.to
+- Create an account and generate an API key (starts with `am_`)
+
+### 2. Configure MCP Server
+Add to `~/.hermes/config.yaml` (paste your actual key — MCP env vars are not expanded from .env):
+```yaml
+mcp_servers:
+  agentmail:
+    command: "npx"
+    args: ["-y", "agentmail-mcp"]
+    env:
+      AGENTMAIL_API_KEY: "am_your_key_here"
+```
+
+### 3. Restart Hermes
+```bash
+hermes
+```
+All 11 AgentMail tools are now available automatically.
+
+## Available Tools (via MCP)
+
+| Tool | Description |
+|------|-------------|
+| `list_inboxes` | List all agent inboxes |
+| `get_inbox` | Get details of a specific inbox |
+| `create_inbox` | Create a new inbox (gets a real email address) |
+| `delete_inbox` | Delete an inbox |
+| `list_threads` | List email threads in an inbox |
+| `get_thread` | Get a specific email thread |
+| `send_message` | Send a new email |
+| `reply_to_message` | Reply to an existing email |
+| `forward_message` | Forward an email |
+| `update_message` | Update message labels/status |
+| `get_attachment` | Download an email attachment |
+
+## Procedure
+
+### Create an inbox and send an email
+1. Create a dedicated inbox:
+   - Use `create_inbox` with a username (e.g. `hermes-agent`)
+   - The agent gets address: `hermes-agent@agentmail.to`
+2. Send an email:
+   - Use `send_message` with `inbox_id`, `to`, `subject`, `text`
+3. Check for replies:
+   - Use `list_threads` to see incoming conversations
+   - Use `get_thread` to read a specific thread
+
+### Check incoming email
+1. Use `list_inboxes` to find your inbox ID
+2. Use `list_threads` with the inbox ID to see conversations
+3. Use `get_thread` to read a thread and its messages
+
+### Reply to an email
+1. Get the thread with `get_thread`
+2. Use `reply_to_message` with the message ID and your reply text
+
+## Example Workflows
+
+**Sign up for a service:**
+```
+1. create_inbox (username: "signup-bot")
+2. Use the inbox address to register on the service
+3. list_threads to check for verification email
+4. get_thread to read the verification code
+```
+
+**Agent-to-human outreach:**
+```
+1. create_inbox (username: "hermes-outreach")
+2. send_message (to: user@example.com, subject: "Hello", text: "...")
+3. list_threads to check for replies
+```
+
+## Pitfalls
+- Free tier limited to 3 inboxes and 3,000 emails/month
+- Emails come from `@agentmail.to` domain on free tier (custom domains on paid plans)
+- Node.js (18+) is required for the MCP server (`npx -y agentmail-mcp`)
+- The `mcp` Python package must be installed: `pip install mcp`
+- Real-time inbound email (webhooks) requires a public server — use `list_threads` polling via cronjob instead for personal use
+
+## Verification
+After setup, test with:
+```
+hermes --toolsets mcp -q "Create an AgentMail inbox called test-agent and tell me its email address"
+```
+You should see the new inbox address returned.
+
+## References
+- AgentMail docs: https://docs.agentmail.to/
+- AgentMail console: https://console.agentmail.to
+- AgentMail MCP repo: https://github.com/agentmail-to/agentmail-mcp
+- Pricing: https://www.agentmail.to/pricing
--- a/optional-skills/research/qmd/SKILL.md
+++ b/optional-skills/research/qmd/SKILL.md
@@ -0,0 +1,441 @@
+---
+name: qmd
+description: Search personal knowledge bases, notes, docs, and meeting transcripts locally using qmd — a hybrid retrieval engine with BM25, vector search, and LLM reranking. Supports CLI and MCP integration.
+version: 1.0.0
+author: Hermes Agent + Teknium
+license: MIT
+platforms: [macos, linux]
+metadata:
+  hermes:
+    tags: [Search, Knowledge-Base, RAG, Notes, MCP, Local-AI]
+    related_skills: [obsidian, native-mcp, arxiv]
+---
+
+# QMD — Query Markup Documents
+
+Local, on-device search engine for personal knowledge bases. Indexes markdown
+notes, meeting transcripts, documentation, and any text-based files, then
+provides hybrid search combining keyword matching, semantic understanding, and
+LLM-powered reranking — all running locally with no cloud dependencies.
+
+Created by [Tobi Lütke](https://github.com/tobi/qmd). MIT licensed.
+
+## When to Use
+
+- User asks to search their notes, docs, knowledge base, or meeting transcripts
+- User wants to find something across a large collection of markdown/text files
+- User wants semantic search ("find notes about X concept") not just keyword grep
+- User has already set up qmd collections and wants to query them
+- User asks to set up a local knowledge base or document search system
+- Keywords: "search my notes", "find in my docs", "knowledge base", "qmd"
+
+## Prerequisites
+
+### Node.js >= 22 (required)
+
+```bash
+# Check version
+node --version  # must be >= 22
+
+# macOS — install or upgrade via Homebrew
+brew install node@22
+
+# Linux — use NodeSource or nvm
+curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash -
+sudo apt-get install -y nodejs
+# or with nvm:
+nvm install 22 && nvm use 22
+```
+
+### SQLite with Extension Support (macOS only)
+
+macOS system SQLite lacks extension loading. Install via Homebrew:
+
+```bash
+brew install sqlite
+```
+
+### Install qmd
+
+```bash
+npm install -g @tobilu/qmd
+# or with Bun:
+bun install -g @tobilu/qmd
+```
+
+First run auto-downloads 3 local GGUF models (~2GB total):
+
+| Model | Purpose | Size |
+|-------|---------|------|
+| embeddinggemma-300M-Q8_0 | Vector embeddings | ~300MB |
+| qwen3-reranker-0.6b-q8_0 | Result reranking | ~640MB |
+| qmd-query-expansion-1.7B | Query expansion | ~1.1GB |
+
+### Verify Installation
+
+```bash
+qmd --version
+qmd status
+```
+
+## Quick Reference
+
+| Command | What It Does | Speed |
+|---------|-------------|-------|
+| `qmd search "query"` | BM25 keyword search (no models) | ~0.2s |
+| `qmd vsearch "query"` | Semantic vector search (1 model) | ~3s |
+| `qmd query "query"` | Hybrid + reranking (all 3 models) | ~2-3s warm, ~19s cold |
+| `qmd get <docid>` | Retrieve full document content | instant |
+| `qmd multi-get "glob"` | Retrieve multiple files | instant |
+| `qmd collection add <path> --name <n>` | Add a directory as a collection | instant |
+| `qmd context add <path> "description"` | Add context metadata to improve retrieval | instant |
+| `qmd embed` | Generate/update vector embeddings | varies |
+| `qmd status` | Show index health and collection info | instant |
+| `qmd mcp` | Start MCP server (stdio) | persistent |
+| `qmd mcp --http --daemon` | Start MCP server (HTTP, warm models) | persistent |
+
+## Setup Workflow
+
+### 1. Add Collections
+
+Point qmd at directories containing your documents:
+
+```bash
+# Add a notes directory
+qmd collection add ~/notes --name notes
+
+# Add project docs
+qmd collection add ~/projects/myproject/docs --name project-docs
+
+# Add meeting transcripts
+qmd collection add ~/meetings --name meetings
+
+# List all collections
+qmd collection list
+```
+
+### 2. Add Context Descriptions
+
+Context metadata helps the search engine understand what each collection
+contains. This significantly improves retrieval quality:
+
+```bash
+qmd context add qmd://notes "Personal notes, ideas, and journal entries"
+qmd context add qmd://project-docs "Technical documentation for the main project"
+qmd context add qmd://meetings "Meeting transcripts and action items from team syncs"
+```
+
+### 3. Generate Embeddings
+
+```bash
+qmd embed
+```
+
+This processes all documents in all collections and generates vector
+embeddings. Re-run after adding new documents or collections.
+
+### 4. Verify
+
+```bash
+qmd status   # shows index health, collection stats, model info
+```
+
+## Search Patterns
+
+### Fast Keyword Search (BM25)
+
+Best for: exact terms, code identifiers, names, known phrases.
+No models loaded — near-instant results.
+
+```bash
+qmd search "authentication middleware"
+qmd search "handleError async"
+```
+
+### Semantic Vector Search
+
+Best for: natural language questions, conceptual queries.
+Loads embedding model (~3s first query).
+
+```bash
+qmd vsearch "how does the rate limiter handle burst traffic"
+qmd vsearch "ideas for improving onboarding flow"
+```
+
+### Hybrid Search with Reranking (Best Quality)
+
+Best for: important queries where quality matters most.
+Uses all 3 models — query expansion, parallel BM25+vector, reranking.
+
+```bash
+qmd query "what decisions were made about the database migration"
+```
+
+### Structured Multi-Mode Queries
+
+Combine different search types in a single query for precision:
+
+```bash
+# BM25 for exact term + vector for concept
+qmd query $'lex: rate limiter\nvec: how does throttling work under load'
+
+# With query expansion
+qmd query $'expand: database migration plan\nlex: "schema change"'
+```
+
+### Query Syntax (lex/BM25 mode)
+
+| Syntax | Effect | Example |
+|--------|--------|---------|
+| `term` | Prefix match | `perf` matches "performance" |
+| `"phrase"` | Exact phrase | `"rate limiter"` |
+| `-term` | Exclude term | `performance -sports` |
+
+### HyDE (Hypothetical Document Embeddings)
+
+For complex topics, write what you expect the answer to look like:
+
+```bash
+qmd query $'hyde: The migration plan involves three phases. First, we add the new columns without dropping the old ones. Then we backfill data. Finally we cut over and remove legacy columns.'
+```
+
+### Scoping to Collections
+
+```bash
+qmd search "query" --collection notes
+qmd query "query" --collection project-docs
+```
+
+### Output Formats
+
+```bash
+qmd search "query" --json        # JSON output (best for parsing)
+qmd search "query" --limit 5     # Limit results
+qmd get "#abc123"                # Get by document ID
+qmd get "path/to/file.md"       # Get by file path
+qmd get "file.md:50" -l 100     # Get specific line range
+qmd multi-get "journals/*.md" --json  # Batch retrieve by glob
+```
+
+## MCP Integration (Recommended)
+
+qmd exposes an MCP server that provides search tools directly to
+Hermes Agent via the native MCP client. This is the preferred
+integration — once configured, the agent gets qmd tools automatically
+without needing to load this skill.
+
+### Option A: Stdio Mode (Simple)
+
+Add to `~/.hermes/config.yaml`:
+
+```yaml
+mcp_servers:
+  qmd:
+    command: "qmd"
+    args: ["mcp"]
+    timeout: 30
+    connect_timeout: 45
+```
+
+This registers tools: `mcp_qmd_search`, `mcp_qmd_vsearch`,
+`mcp_qmd_deep_search`, `mcp_qmd_get`, `mcp_qmd_status`.
+
+**Tradeoff:** Models load on first search call (~19s cold start),
+then stay warm for the session. Acceptable for occasional use.
+
+### Option B: HTTP Daemon Mode (Fast, Recommended for Heavy Use)
+
+Start the qmd daemon separately — it keeps models warm in memory:
+
+```bash
+# Start daemon (persists across agent restarts)
+qmd mcp --http --daemon
+
+# Runs on http://localhost:8181 by default
+```
+
+Then configure Hermes Agent to connect via HTTP:
+
+```yaml
+mcp_servers:
+  qmd:
+    url: "http://localhost:8181/mcp"
+    timeout: 30
+```
+
+**Tradeoff:** Uses ~2GB RAM while running, but every query is fast
+(~2-3s). Best for users who search frequently.
+
+### Keeping the Daemon Running
+
+#### macOS (launchd)
+
+```bash
+cat > ~/Library/LaunchAgents/com.qmd.daemon.plist << 'EOF'
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
+  "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+  <key>Label</key>
+  <string>com.qmd.daemon</string>
+  <key>ProgramArguments</key>
+  <array>
+    <string>qmd</string>
+    <string>mcp</string>
+    <string>--http</string>
+    <string>--daemon</string>
+  </array>
+  <key>RunAtLoad</key>
+  <true/>
+  <key>KeepAlive</key>
+  <true/>
+  <key>StandardOutPath</key>
+  <string>/tmp/qmd-daemon.log</string>
+  <key>StandardErrorPath</key>
+  <string>/tmp/qmd-daemon.log</string>
+</dict>
+</plist>
+EOF
+
+launchctl load ~/Library/LaunchAgents/com.qmd.daemon.plist
+```
+
+#### Linux (systemd user service)
+
+```bash
+mkdir -p ~/.config/systemd/user
+
+cat > ~/.config/systemd/user/qmd-daemon.service << 'EOF'
+[Unit]
+Description=QMD MCP Daemon
+After=network.target
+
+[Service]
+ExecStart=qmd mcp --http --daemon
+Restart=on-failure
+RestartSec=10
+Environment=PATH=/usr/local/bin:/usr/bin:/bin
+
+[Install]
+WantedBy=default.target
+EOF
+
+systemctl --user daemon-reload
+systemctl --user enable --now qmd-daemon
+systemctl --user status qmd-daemon
+```
+
+### MCP Tools Reference
+
+Once connected, these tools are available as `mcp_qmd_*`:
+
+| MCP Tool | Maps To | Description |
+|----------|---------|-------------|
+| `mcp_qmd_search` | `qmd search` | BM25 keyword search |
+| `mcp_qmd_vsearch` | `qmd vsearch` | Semantic vector search |
+| `mcp_qmd_deep_search` | `qmd query` | Hybrid search + reranking |
+| `mcp_qmd_get` | `qmd get` | Retrieve document by ID or path |
+| `mcp_qmd_status` | `qmd status` | Index health and stats |
+
+The MCP tools accept structured JSON queries for multi-mode search:
+
+```json
+{
+  "searches": [
+    {"type": "lex", "query": "authentication middleware"},
+    {"type": "vec", "query": "how user login is verified"}
+  ],
+  "collections": ["project-docs"],
+  "limit": 10
+}
+```
+
+## CLI Usage (Without MCP)
+
+When MCP is not configured, use qmd directly via terminal:
+
+```
+terminal(command="qmd query 'what was decided about the API redesign' --json", timeout=30)
+```
+
+For setup and management tasks, always use terminal:
+
+```
+terminal(command="qmd collection add ~/Documents/notes --name notes")
+terminal(command="qmd context add qmd://notes 'Personal research notes and ideas'")
+terminal(command="qmd embed")
+terminal(command="qmd status")
+```
+
+## How the Search Pipeline Works
+
+Understanding the internals helps choose the right search mode:
+
+1. **Query Expansion** — A fine-tuned 1.7B model generates 2 alternative
+   queries. The original gets 2x weight in fusion.
+2. **Parallel Retrieval** — BM25 (SQLite FTS5) and vector search run
+   simultaneously across all query variants.
+3. **RRF Fusion** — Reciprocal Rank Fusion (k=60) merges results.
+   Top-rank bonus: #1 gets +0.05, #2-3 get +0.02.
+4. **LLM Reranking** — qwen3-reranker scores top 30 candidates (0.0-1.0).
+5. **Position-Aware Blending** — Ranks 1-3: 75% retrieval / 25% reranker.
+   Ranks 4-10: 60/40. Ranks 11+: 40/60 (trusts reranker more for long tail).
+
+**Smart Chunking:** Documents are split at natural break points (headings,
+code blocks, blank lines) targeting ~900 tokens with 15% overlap. Code
+blocks are never split mid-block.
+
+## Best Practices
+
+1. **Always add context descriptions** — `qmd context add` dramatically
+   improves retrieval accuracy. Describe what each collection contains.
+2. **Re-embed after adding documents** — `qmd embed` must be re-run when
+   new files are added to collections.
+3. **Use `qmd search` for speed** — when you need fast keyword lookup
+   (code identifiers, exact names), BM25 is instant and needs no models.
+4. **Use `qmd query` for quality** — when the question is conceptual or
+   the user needs the best possible results, use hybrid search.
+5. **Prefer MCP integration** — once configured, the agent gets native
+   tools without needing to load this skill each time.
+6. **Daemon mode for frequent users** — if the user searches their
+   knowledge base regularly, recommend the HTTP daemon setup.
+7. **First query in structured search gets 2x weight** — put the most
+   important/certain query first when combining lex and vec.
+
+## Troubleshooting
+
+### "Models downloading on first run"
+Normal — qmd auto-downloads ~2GB of GGUF models on first use.
+This is a one-time operation.
+
+### Cold start latency (~19s)
+This happens when models aren't loaded in memory. Solutions:
+- Use HTTP daemon mode (`qmd mcp --http --daemon`) to keep warm
+- Use `qmd search` (BM25 only) when models aren't needed
+- MCP stdio mode loads models on first search, stays warm for session
+
+### macOS: "unable to load extension"
+Install Homebrew SQLite: `brew install sqlite`
+Then ensure it's on PATH before system SQLite.
+
+### "No collections found"
+Run `qmd collection add <path> --name <name>` to add directories,
+then `qmd embed` to index them.
+
+### Embedding model override (CJK/multilingual)
+Set `QMD_EMBED_MODEL` environment variable for non-English content:
+```bash
+export QMD_EMBED_MODEL="your-multilingual-model"
+```
+
+## Data Storage
+
+- **Index & vectors:** `~/.cache/qmd/index.sqlite`
+- **Models:** Auto-downloaded to local cache on first run
+- **No cloud dependencies** — everything runs locally
+
+## References
+
+- [GitHub: tobi/qmd](https://github.com/tobi/qmd)
+- [QMD Changelog](https://github.com/tobi/qmd/blob/main/CHANGELOG.md)
--- a/run_agent.py
+++ b/run_agent.py
@@ -183,6 +183,7 @@ class AIAgent:
        session_db=None,
        honcho_session_key: str = None,
        iteration_budget: "IterationBudget" = None,
+        fallback_model: Dict[str, Any] = None,
    ):
        """
        Initialize the AI Agent.
@@ -213,7 +214,7 @@ class AIAgent:
                Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
            max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
            reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
-                If None, defaults to {"enabled": True, "effort": "xhigh"} for OpenRouter. Set to disable/customize reasoning.
+                If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning.
            prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context.
                Useful for injecting a few-shot example or priming the model's response style.
                Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}]
@@ -253,13 +254,7 @@ class AIAgent:
            self.provider = "openai-codex"
        else:
            self.api_mode = "chat_completions"
-        if base_url and "api.anthropic.com" in base_url.strip().lower():
-            raise ValueError(
-                "Anthropic's native /v1/messages API is not supported yet (planned for a future release). "
-                "Hermes currently requires OpenAI-compatible /chat/completions endpoints. "
-                "To use Claude models now, route through OpenRouter (OPENROUTER_API_KEY) "
-                "or any OpenAI-compatible proxy that wraps the Anthropic API."
-            )
+
        self.tool_progress_callback = tool_progress_callback
        self.clarify_callback = clarify_callback
        self.step_callback = step_callback
@@ -287,7 +282,7 @@ class AIAgent:
        
        # Model response configuration
        self.max_tokens = max_tokens  # None = use model default
-        self.reasoning_config = reasoning_config  # None = use default (xhigh for OpenRouter)
+        self.reasoning_config = reasoning_config  # None = use default (medium for OpenRouter)
        self.prefill_messages = prefill_messages or []  # Prefilled conversation turns
        
        # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
@@ -389,6 +384,12 @@ class AIAgent:
                "X-OpenRouter-Title": "Hermes Agent",
                "X-OpenRouter-Categories": "productivity,cli-agent",
            }
+        elif "api.kimi.com" in effective_base.lower():
+            # Kimi Code API requires a recognized coding-agent User-Agent
+            # (see https://github.com/MoonshotAI/kimi-cli)
+            client_kwargs["default_headers"] = {
+                "User-Agent": "KimiCLI/1.0",
+            }
        
        self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
        try:
@@ -406,6 +407,17 @@ class AIAgent:
        except Exception as e:
            raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
        
+        # Provider fallback — a single backup model/provider tried when the
+        # primary is exhausted (rate-limit, overload, connection failure).
+        # Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
+        self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None
+        self._fallback_activated = False
+        if self._fallback_model:
+            fb_p = self._fallback_model.get("provider", "")
+            fb_m = self._fallback_model.get("model", "")
+            if fb_p and fb_m and not self.quiet_mode:
+                print(f"🔄 Fallback model: {fb_m} ({fb_p})")
+
        # Get available tools with filtering
        self.tools = get_tool_definitions(
            enabled_toolsets=enabled_toolsets,
@@ -2146,6 +2158,141 @@ class AIAgent:
            raise result["error"]
        return result["response"]

+    # ── Provider fallback ──────────────────────────────────────────────────
+
+    # API-key providers: provider → (base_url, [env_var_names])
+    _FALLBACK_API_KEY_PROVIDERS = {
+        "openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]),
+        "zai": ("https://api.z.ai/api/paas/v4", ["ZAI_API_KEY", "Z_AI_API_KEY"]),
+        "kimi-coding": ("https://api.moonshot.ai/v1", ["KIMI_API_KEY"]),
+        "minimax": ("https://api.minimax.io/v1", ["MINIMAX_API_KEY"]),
+        "minimax-cn": ("https://api.minimaxi.com/v1", ["MINIMAX_CN_API_KEY"]),
+    }
+
+    # OAuth providers: provider → (resolver_import_path, api_mode)
+    # Each resolver returns {"api_key": ..., "base_url": ...}.
+    _FALLBACK_OAUTH_PROVIDERS = {
+        "openai-codex": ("resolve_codex_runtime_credentials", "codex_responses"),
+        "nous": ("resolve_nous_runtime_credentials", "chat_completions"),
+    }
+
+    def _resolve_fallback_credentials(
+        self, fb_provider: str, fb_config: dict
+    ) -> Optional[tuple]:
+        """Resolve credentials for a fallback provider.
+
+        Returns (api_key, base_url, api_mode) on success, or None on failure.
+        Handles three cases:
+          1. OAuth providers (openai-codex, nous) — call credential resolver
+          2. API-key providers (openrouter, zai, etc.) — read env var
+          3. Custom endpoints — use base_url + api_key_env from config
+        """
+        # ── 1. OAuth providers ────────────────────────────────────────
+        if fb_provider in self._FALLBACK_OAUTH_PROVIDERS:
+            resolver_name, api_mode = self._FALLBACK_OAUTH_PROVIDERS[fb_provider]
+            try:
+                import hermes_cli.auth as _auth
+                resolver = getattr(_auth, resolver_name)
+                creds = resolver()
+                return creds["api_key"], creds["base_url"], api_mode
+            except Exception as e:
+                logging.warning(
+                    "Fallback to %s failed (credential resolution): %s",
+                    fb_provider, e,
+                )
+                return None
+
+        # ── 2. API-key providers ──────────────────────────────────────
+        fb_key = (fb_config.get("api_key") or "").strip()
+        if not fb_key:
+            key_env = (fb_config.get("api_key_env") or "").strip()
+            if key_env:
+                fb_key = os.getenv(key_env, "")
+            elif fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
+                for env_var in self._FALLBACK_API_KEY_PROVIDERS[fb_provider][1]:
+                    fb_key = os.getenv(env_var, "")
+                    if fb_key:
+                        break
+        if not fb_key:
+            logging.warning(
+                "Fallback model configured but no API key found for provider '%s'",
+                fb_provider,
+            )
+            return None
+
+        # ── 3. Resolve base URL ───────────────────────────────────────
+        fb_base_url = (fb_config.get("base_url") or "").strip()
+        if not fb_base_url and fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
+            fb_base_url = self._FALLBACK_API_KEY_PROVIDERS[fb_provider][0]
+        if not fb_base_url:
+            fb_base_url = OPENROUTER_BASE_URL
+
+        return fb_key, fb_base_url, "chat_completions"
+
+    def _try_activate_fallback(self) -> bool:
+        """Switch to the configured fallback model/provider.
+
+        Called when the primary model is failing after retries.  Swaps the
+        OpenAI client, model slug, and provider in-place so the retry loop
+        can continue with the new backend.  One-shot: returns False if
+        already activated or not configured.
+        """
+        if self._fallback_activated or not self._fallback_model:
+            return False
+
+        fb = self._fallback_model
+        fb_provider = (fb.get("provider") or "").strip().lower()
+        fb_model = (fb.get("model") or "").strip()
+        if not fb_provider or not fb_model:
+            return False
+
+        resolved = self._resolve_fallback_credentials(fb_provider, fb)
+        if resolved is None:
+            return False
+        fb_key, fb_base_url, fb_api_mode = resolved
+
+        # Build new client
+        try:
+            client_kwargs = {"api_key": fb_key, "base_url": fb_base_url}
+            if "openrouter" in fb_base_url.lower():
+                client_kwargs["default_headers"] = {
+                    "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
+                    "X-OpenRouter-Title": "Hermes Agent",
+                    "X-OpenRouter-Categories": "productivity,cli-agent",
+                }
+            elif "api.kimi.com" in fb_base_url.lower():
+                client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
+
+            self.client = OpenAI(**client_kwargs)
+            self._client_kwargs = client_kwargs
+            old_model = self.model
+            self.model = fb_model
+            self.provider = fb_provider
+            self.base_url = fb_base_url
+            self.api_mode = fb_api_mode
+            self._fallback_activated = True
+
+            # Re-evaluate prompt caching for the new provider/model
+            self._use_prompt_caching = (
+                "openrouter" in fb_base_url.lower()
+                and "claude" in fb_model.lower()
+            )
+
+            print(
+                f"{self.log_prefix}🔄 Primary model failed — switching to fallback: "
+                f"{fb_model} via {fb_provider}"
+            )
+            logging.info(
+                "Fallback activated: %s → %s (%s)",
+                old_model, fb_model, fb_provider,
+            )
+            return True
+        except Exception as e:
+            logging.error("Failed to activate fallback model: %s", e)
+            return False
+
+    # ── End provider fallback ──────────────────────────────────────────────
+
    def _build_api_kwargs(self, api_messages: list) -> dict:
        """Build the keyword arguments dict for the active API mode."""
        if self.api_mode == "codex_responses":
@@ -2157,8 +2304,8 @@ class AIAgent:
            if not instructions:
                instructions = DEFAULT_AGENT_IDENTITY

-            # Resolve reasoning effort: config > default (xhigh)
-            reasoning_effort = "xhigh"
+            # Resolve reasoning effort: config > default (medium)
+            reasoning_effort = "medium"
            reasoning_enabled = True
            if self.reasoning_config and isinstance(self.reasoning_config, dict):
                if self.reasoning_config.get("enabled") is False:
@@ -2224,7 +2371,7 @@ class AIAgent:
            else:
                extra_body["reasoning"] = {
                    "enabled": True,
-                    "effort": "xhigh"
+                    "effort": "medium"
                }

        # Nous Portal product attribution
@@ -2484,6 +2631,8 @@ class AIAgent:

        if self._session_db:
            try:
+                # Propagate title to the new session with auto-numbering
+                old_title = self._session_db.get_session_title(self.session_id)
                self._session_db.end_session(self.session_id, "compression")
                old_session_id = self.session_id
                self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
@@ -2493,6 +2642,13 @@ class AIAgent:
                    model=self.model,
                    parent_session_id=old_session_id,
                )
+                # Auto-number the title for the continuation session
+                if old_title:
+                    try:
+                        new_title = self._session_db.get_next_title_in_lineage(old_title)
+                        self._session_db.set_session_title(self.session_id, new_title)
+                    except (ValueError, Exception) as e:
+                        logger.debug("Could not propagate title on compression: %s", e)
                self._session_db.update_system_prompt(self.session_id, new_system_prompt)
            except Exception as e:
                logger.debug("Session DB compression split failed: %s", e)
@@ -2510,9 +2666,10 @@ class AIAgent:
                if remaining_calls:
                    print(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)")
                for skipped_tc in remaining_calls:
+                    skipped_name = skipped_tc.function.name
                    skip_msg = {
                        "role": "tool",
-                        "content": "[Tool execution cancelled - user interrupted]",
+                        "content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
                        "tool_call_id": skipped_tc.id,
                    }
                    messages.append(skip_msg)
@@ -2619,7 +2776,6 @@ class AIAgent:
                        context=function_args.get("context"),
                        toolsets=function_args.get("toolsets"),
                        tasks=tasks_arg,
-                        model=function_args.get("model"),
                        max_iterations=function_args.get("max_iterations"),
                        parent_agent=self,
                    )
@@ -2716,9 +2872,10 @@ class AIAgent:
                remaining = len(assistant_message.tool_calls) - i
                print(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)")
                for skipped_tc in assistant_message.tool_calls[i:]:
+                    skipped_name = skipped_tc.function.name
                    skip_msg = {
                        "role": "tool",
-                        "content": "[Tool execution skipped - user sent a new message]",
+                        "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
                        "tool_call_id": skipped_tc.id
                    }
                    messages.append(skip_msg)
@@ -2768,7 +2925,7 @@ class AIAgent:
                else:
                    summary_extra_body["reasoning"] = {
                        "enabled": True,
-                        "effort": "xhigh"
+                        "effort": "medium"
                    }
            if _is_nous:
                summary_extra_body["tags"] = ["product=hermes-agent"]
@@ -2880,13 +3037,15 @@ class AIAgent:
        # Generate unique task_id if not provided to isolate VMs between concurrent tasks
        effective_task_id = task_id or str(uuid.uuid4())
        
-        # Reset retry counters at the start of each conversation to prevent state leakage
+        # Reset retry counters and iteration budget at the start of each turn
+        # so subagent usage from a previous turn doesn't eat into the next one.
        self._invalid_tool_retries = 0
        self._invalid_json_retries = 0
        self._empty_content_retries = 0
        self._last_content_with_tools = None
        self._turns_since_memory = 0
        self._iters_since_skill = 0
+        self.iteration_budget = IterationBudget(self.max_iterations)
        
        # Initialize conversation (copy to avoid mutating the caller's list)
        messages = list(conversation_history) if conversation_history else []
@@ -2933,9 +3092,14 @@ class AIAgent:
            )
            self._iters_since_skill = 0

-        # Honcho prefetch: retrieve user context for system prompt injection
+        # Honcho prefetch: retrieve user context for system prompt injection.
+        # Only on the FIRST turn of a session (empty history).  On subsequent
+        # turns the model already has all prior context in its conversation
+        # history, and the Honcho context is baked into the stored system
+        # prompt — re-fetching it would change the system message and break
+        # Anthropic prompt caching.
        self._honcho_context = ""
-        if self._honcho and self._honcho_session_key:
+        if self._honcho and self._honcho_session_key and not conversation_history:
            try:
                self._honcho_context = self._honcho_prefetch(user_message)
            except Exception as e:
@@ -2953,14 +3117,42 @@ class AIAgent:
        # Built once on first call, reused for all subsequent calls.
        # Only rebuilt after context compression events (which invalidate
        # the cache and reload memory from disk).
+        #
+        # For continuing sessions (gateway creates a fresh AIAgent per
+        # message), we load the stored system prompt from the session DB
+        # instead of rebuilding.  Rebuilding would pick up memory changes
+        # from disk that the model already knows about (it wrote them!),
+        # producing a different system prompt and breaking the Anthropic
+        # prefix cache.
        if self._cached_system_prompt is None:
-            self._cached_system_prompt = self._build_system_prompt(system_message)
-            # Store the system prompt snapshot in SQLite
-            if self._session_db:
+            stored_prompt = None
+            if conversation_history and self._session_db:
                try:
-                    self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
-                except Exception as e:
-                    logger.debug("Session DB update_system_prompt failed: %s", e)
+                    session_row = self._session_db.get_session(self.session_id)
+                    if session_row:
+                        stored_prompt = session_row.get("system_prompt") or None
+                except Exception:
+                    pass  # Fall through to build fresh
+
+            if stored_prompt:
+                # Continuing session — reuse the exact system prompt from
+                # the previous turn so the Anthropic cache prefix matches.
+                self._cached_system_prompt = stored_prompt
+            else:
+                # First turn of a new session — build from scratch.
+                self._cached_system_prompt = self._build_system_prompt(system_message)
+                # Bake Honcho context into the prompt so it's stable for
+                # the entire session (not re-fetched per turn).
+                if self._honcho_context:
+                    self._cached_system_prompt = (
+                        self._cached_system_prompt + "\n\n" + self._honcho_context
+                    ).strip()
+                # Store the system prompt snapshot in SQLite
+                if self._session_db:
+                    try:
+                        self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
+                    except Exception as e:
+                        logger.debug("Session DB update_system_prompt failed: %s", e)

        active_system_prompt = self._cached_system_prompt

@@ -3085,11 +3277,13 @@ class AIAgent:
            # Build the final system message: cached prompt + ephemeral system prompt.
            # The ephemeral part is appended here (not baked into the cached prompt)
            # so it stays out of the session DB and logs.
+            # Note: Honcho context is baked into _cached_system_prompt on the first
+            # turn and stored in the session DB, so it does NOT need to be injected
+            # here.  This keeps the system message identical across all turns in a
+            # session, maximizing Anthropic prompt cache hits.
            effective_system = active_system_prompt or ""
            if self.ephemeral_system_prompt:
                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
-            if self._honcho_context:
-                effective_system = (effective_system + "\n\n" + self._honcho_context).strip()
            if effective_system:
                api_messages = [{"role": "system", "content": effective_system}] + api_messages
            
@@ -3142,10 +3336,13 @@ class AIAgent:
            api_start_time = time.time()
            retry_count = 0
            max_retries = 6  # Increased to allow longer backoff periods
+            compression_attempts = 0
+            max_compression_attempts = 3
            codex_auth_retry_attempted = False
            nous_auth_retry_attempted = False

            finish_reason = "stop"
+            response = None  # Guard against UnboundLocalError if all retries fail

            while retry_count < max_retries:
                try:
@@ -3237,6 +3434,10 @@ class AIAgent:
                        print(f"{self.log_prefix}   ⏱️  Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
                        
                        if retry_count >= max_retries:
+                            # Try fallback before giving up
+                            if self._try_activate_fallback():
+                                retry_count = 0
+                                continue
                            print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
                            logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
                            self._persist_session(messages, conversation_history)
@@ -3261,7 +3462,7 @@ class AIAgent:
                                self._persist_session(messages, conversation_history)
                                self.clear_interrupt()
                                return {
-                                    "final_response": "Operation interrupted.",
+                                    "final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).",
                                    "messages": messages,
                                    "api_calls": api_call_count,
                                    "completed": False,
@@ -3370,10 +3571,11 @@ class AIAgent:
                    if thinking_spinner:
                        thinking_spinner.stop("")
                        thinking_spinner = None
+                    api_elapsed = time.time() - api_start_time
                    print(f"{self.log_prefix}⚡ Interrupted during API call.")
                    self._persist_session(messages, conversation_history)
                    interrupted = True
-                    final_response = "Operation interrupted."
+                    final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)."
                    break

                except Exception as api_error:
@@ -3422,7 +3624,7 @@ class AIAgent:
                        self._persist_session(messages, conversation_history)
                        self.clear_interrupt()
                        return {
-                            "final_response": "Operation interrupted.",
+                            "final_response": f"Operation interrupted: handling API error ({error_type}: {str(api_error)[:80]}).",
                            "messages": messages,
                            "api_calls": api_call_count,
                            "completed": False,
@@ -3441,7 +3643,19 @@ class AIAgent:
                    )

                    if is_payload_too_large:
-                        print(f"{self.log_prefix}⚠️  Request payload too large (413) - attempting compression...")
+                        compression_attempts += 1
+                        if compression_attempts > max_compression_attempts:
+                            print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.")
+                            logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
+                            self._persist_session(messages, conversation_history)
+                            return {
+                                "messages": messages,
+                                "completed": False,
+                                "api_calls": api_call_count,
+                                "error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
+                                "partial": True
+                            }
+                        print(f"{self.log_prefix}⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")

                        original_len = len(messages)
                        messages, active_system_prompt = self._compress_context(
@@ -3450,6 +3664,7 @@ class AIAgent:

                        if len(messages) < original_len:
                            print(f"{self.log_prefix}   🗜️  Compressed {original_len} → {len(messages)} messages, retrying...")
+                            time.sleep(2)  # Brief pause between compression retries
                            continue  # Retry with compressed messages
                        else:
                            print(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
@@ -3495,6 +3710,20 @@ class AIAgent:
                        else:
                            print(f"{self.log_prefix}⚠️  Context length exceeded at minimum tier — attempting compression...")

+                        compression_attempts += 1
+                        if compression_attempts > max_compression_attempts:
+                            print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.")
+                            logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
+                            self._persist_session(messages, conversation_history)
+                            return {
+                                "messages": messages,
+                                "completed": False,
+                                "api_calls": api_call_count,
+                                "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
+                                "partial": True
+                            }
+                        print(f"{self.log_prefix}   🗜️  Context compression attempt {compression_attempts}/{max_compression_attempts}...")
+
                        original_len = len(messages)
                        messages, active_system_prompt = self._compress_context(
                            messages, system_message, approx_tokens=approx_tokens
@@ -3503,6 +3732,7 @@ class AIAgent:
                        if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
                            if len(messages) < original_len:
                                print(f"{self.log_prefix}   🗜️  Compressed {original_len} → {len(messages)} messages, retrying...")
+                            time.sleep(2)  # Brief pause between compression retries
                            continue  # Retry with compressed messages or new tier
                        else:
                            # Can't compress further and already at minimum tier
@@ -3532,6 +3762,11 @@ class AIAgent:
                    ])) and not is_context_length_error

                    if is_client_error:
+                        # Try fallback before aborting — a different provider
+                        # may not have the same issue (rate limit, auth, etc.)
+                        if self._try_activate_fallback():
+                            retry_count = 0
+                            continue
                        self._dump_api_request_debug(
                            api_kwargs, reason="non_retryable_client_error", error=api_error,
                        )
@@ -3549,6 +3784,10 @@ class AIAgent:
                        }

                    if retry_count >= max_retries:
+                        # Try fallback before giving up entirely
+                        if self._try_activate_fallback():
+                            retry_count = 0
+                            continue
                        print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
                        logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
                        logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
@@ -3569,7 +3808,7 @@ class AIAgent:
                            self._persist_session(messages, conversation_history)
                            self.clear_interrupt()
                            return {
-                                "final_response": "Operation interrupted.",
+                                "final_response": f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries}).",
                                "messages": messages,
                                "api_calls": api_call_count,
                                "completed": False,
@@ -3581,6 +3820,14 @@ class AIAgent:
            if interrupted:
                break

+            # Guard: if all retries exhausted without a successful response
+            # (e.g. repeated context-length errors that exhausted retry_count),
+            # the `response` variable is still None. Break out cleanly.
+            if response is None:
+                print(f"{self.log_prefix}❌ All API retries exhausted with no successful response.")
+                self._persist_session(messages, conversation_history)
+                break
+
            try:
                if self.api_mode == "codex_responses":
                    assistant_message, finish_reason = self._normalize_codex_response(response)
@@ -4006,7 +4253,12 @@ class AIAgent:
                    final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
                    break
        
-        if api_call_count >= self.max_iterations and final_response is None:
+        if final_response is None and (
+            api_call_count >= self.max_iterations
+            or self.iteration_budget.remaining <= 0
+        ):
+            if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
+                print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)")
            final_response = self._handle_max_iterations(messages, api_call_count)
        
        # Determine if conversation completed successfully
@@ -4077,7 +4329,7 @@ def main(

    Args:
        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
-        model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4-20250514.
+        model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4.6.
        api_key (str): API key for authentication. Uses OPENROUTER_API_KEY env var if not provided.
        base_url (str): Base URL for the model API. Defaults to https://openrouter.ai/api/v1
        max_turns (int): Maximum number of API call iterations. Defaults to 10.
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -492,9 +492,23 @@ install_system_packages() {
                        return 0
                    fi
                fi
+            elif [ -e /dev/tty ]; then
+                # Non-interactive (e.g. curl | bash) but a terminal is available.
+                # Read the prompt from /dev/tty (same approach the setup wizard uses).
+                echo ""
+                log_info "Installing ${description} requires sudo."
+                read -p "Install? [Y/n] " -n 1 -r < /dev/tty
+                echo
+                if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+                    if sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a $install_cmd < /dev/tty; then
+                        [ "$need_ripgrep" = true ] && HAS_RIPGREP=true && log_success "ripgrep installed"
+                        [ "$need_ffmpeg" = true ]  && HAS_FFMPEG=true  && log_success "ffmpeg installed"
+                        return 0
+                    fi
+                fi
            else
-                log_warn "Non-interactive mode: cannot prompt for sudo password"
-                log_info "Install missing packages manually: sudo $install_cmd"
+                log_warn "Non-interactive mode and no terminal available — cannot install system packages"
+                log_info "Install manually after setup completes: sudo $install_cmd"
            fi
        fi
    fi
@@ -829,6 +843,33 @@ install_node_deps() {
            log_warn "npm install failed (browser tools may not work)"
        }
        log_success "Node.js dependencies installed"
+
+        # Install Playwright browser + system dependencies.
+        # Playwright's install-deps only supports apt/dnf/zypper natively.
+        # For Arch/Manjaro we install the system libs via pacman first.
+        log_info "Installing browser engine (Playwright Chromium)..."
+        case "$DISTRO" in
+            arch|manjaro)
+                if command -v pacman &> /dev/null; then
+                    log_info "Arch/Manjaro detected — installing Chromium system dependencies via pacman..."
+                    if command -v sudo &> /dev/null && sudo -n true 2>/dev/null; then
+                        sudo NEEDRESTART_MODE=a pacman -S --noconfirm --needed \
+                            nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true
+                    elif [ "$(id -u)" -eq 0 ]; then
+                        pacman -S --noconfirm --needed \
+                            nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true
+                    else
+                        log_warn "Cannot install browser deps without sudo. Run manually:"
+                        log_warn "  sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib"
+                    fi
+                fi
+                cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
+                ;;
+            *)
+                cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || true
+                ;;
+        esac
+        log_success "Browser engine installed"
    fi

    # Install WhatsApp bridge dependencies
--- a/skills/creative/ascii-art/SKILL.md
+++ b/skills/creative/ascii-art/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: ascii-art
-description: Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii conversion, and search curated art from emojicombos.com and asciiart.eu (11,000+ artworks). Falls back to LLM-generated art.
-version: 3.1.0
+description: Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii, remote APIs (asciified, ascii.co.uk), and LLM fallback. No API keys required.
+version: 4.0.0
 author: 0xbyt4, Hermes Agent
 license: MIT
 dependencies: []
@@ -14,9 +14,9 @@ metadata:

 # ASCII Art Skill

-Multiple tools for different ASCII art needs. All tools are local CLI programs — no API keys required.
+Multiple tools for different ASCII art needs. All tools are local CLI programs or free REST APIs — no API keys required.

-## Tool 1: Text Banners (pyfiglet)
+## Tool 1: Text Banners (pyfiglet — local)

 Render text as large ASCII art banners. 571 built-in fonts.

@@ -53,7 +53,35 @@ python3 -m pyfiglet --list_fonts             # List all 571 fonts
 - Short text (1-8 chars) works best with detailed fonts like `doom` or `block`
 - Long text works better with compact fonts like `small` or `mini`

-## Tool 2: Cowsay (Message Art)
+## Tool 2: Text Banners (asciified API — remote, no install)
+
+Free REST API that converts text to ASCII art. 250+ FIGlet fonts. Returns plain text directly — no parsing needed. Use this when pyfiglet is not installed or as a quick alternative.
+
+### Usage (via terminal curl)
+
+```bash
+# Basic text banner (default font)
+curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello+World"
+
+# With a specific font
+curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=Slant"
+curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=Doom"
+curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=Star+Wars"
+curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=3-D"
+curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=Banner3"
+
+# List all available fonts (returns JSON array)
+curl -s "https://asciified.thelicato.io/api/v2/fonts"
+```
+
+### Tips
+
+- URL-encode spaces as `+` in the text parameter
+- The response is plain text ASCII art — no JSON wrapping, ready to display
+- Font names are case-sensitive; use the fonts endpoint to get exact names
+- Works from any terminal with curl — no Python or pip needed
+
+## Tool 3: Cowsay (Message Art)

 Classic tool that wraps text in a speech bubble with an ASCII character.

@@ -97,7 +125,7 @@ cowsay -e "OO" "Msg"   # Custom eyes
 cowsay -T "U " "Msg"   # Custom tongue
 ```

-## Tool 3: Boxes (Decorative Borders)
+## Tool 4: Boxes (Decorative Borders)

 Draw decorative ASCII art borders/frames around any text. 70+ built-in designs.

@@ -124,13 +152,15 @@ echo "Hello World" | boxes -a c               # Center text
 boxes -l                                       # List all 70+ designs
 ```

-### Combine with pyfiglet
+### Combine with pyfiglet or asciified

 ```bash
 python3 -m pyfiglet "HERMES" -f slant | boxes -d stone
+# Or without pyfiglet installed:
+curl -s "https://asciified.thelicato.io/api/v2/ascii?text=HERMES&font=Slant" | boxes -d stone
 ```

-## Tool 4: TOIlet (Colored Text Art)
+## Tool 5: TOIlet (Colored Text Art)

 Like pyfiglet but with ANSI color effects and visual filters. Great for terminal eye candy.

@@ -160,14 +190,14 @@ toilet -F list                          # List available filters

 **Note**: toilet outputs ANSI escape codes for colors — works in terminals but may not render in all contexts (e.g., plain text files, some chat platforms).

-## Tool 5: Image to ASCII Art
+## Tool 6: Image to ASCII Art

 Convert images (PNG, JPEG, GIF, WEBP) to ASCII art.

 ### Option A: ascii-image-converter (recommended, modern)

 ```bash
-# Install via snap or Go
+# Install
 sudo snap install ascii-image-converter
 # OR: go install github.com/TheZoraiz/ascii-image-converter@latest
 ```
@@ -190,63 +220,77 @@ jp2a --width=80 image.jpg
 jp2a --colors image.jpg              # Colorized
 ```

-## Tool 6: Search Pre-Made ASCII Art (Web APIs)
+## Tool 7: Search Pre-Made ASCII Art

-Search curated ASCII art databases via `web_extract`. No API keys needed.
+Search curated ASCII art from the web. Use `terminal` with `curl`.

-### Source A: emojicombos.com (recommended first)
+### Source A: ascii.co.uk (recommended for pre-made art)

-Huge collection of ASCII art, dot art, kaomoji, and emoji combos. Modern, meme-aware, user-submitted content. Great for pop culture, animals, objects, aesthetics.
+Large collection of classic ASCII art organized by subject. Art is inside HTML `<pre>` tags. Fetch the page with curl, then extract art with a small Python snippet.

-**URL pattern:** `https://emojicombos.com/{term}-ascii-art`
+**URL pattern:** `https://ascii.co.uk/art/{subject}`

+**Step 1 — Fetch the page:**
+
+```bash
+curl -s 'https://ascii.co.uk/art/cat' -o /tmp/ascii_art.html
 ```
-web_extract(urls=["https://emojicombos.com/cat-ascii-art"])
-web_extract(urls=["https://emojicombos.com/rocket-ascii-art"])
-web_extract(urls=["https://emojicombos.com/dragon-ascii-art"])
-web_extract(urls=["https://emojicombos.com/skull-ascii-art"])
-web_extract(urls=["https://emojicombos.com/heart-ascii-art"])
+
+**Step 2 — Extract art from pre tags:**
+
+```python
+import re, html
+with open('/tmp/ascii_art.html') as f:
+    text = f.read()
+arts = re.findall(r'<pre[^>]*>(.*?)</pre>', text, re.DOTALL)
+for art in arts:
+    clean = re.sub(r'<[^>]+>', '', art)
+    clean = html.unescape(clean).strip()
+    if len(clean) > 30:
+        print(clean)
+        print('\n---\n')
 ```

+**Available subjects** (use as URL path):
+- Animals: `cat`, `dog`, `horse`, `bird`, `fish`, `dragon`, `snake`, `rabbit`, `elephant`, `dolphin`, `butterfly`, `owl`, `wolf`, `bear`, `penguin`, `turtle`
+- Objects: `car`, `ship`, `airplane`, `rocket`, `guitar`, `computer`, `coffee`, `beer`, `cake`, `house`, `castle`, `sword`, `crown`, `key`
+- Nature: `tree`, `flower`, `sun`, `moon`, `star`, `mountain`, `ocean`, `rainbow`
+- Characters: `skull`, `robot`, `angel`, `wizard`, `pirate`, `ninja`, `alien`
+- Holidays: `christmas`, `halloween`, `valentine`
+
 **Tips:**
- Use hyphenated search terms: `hello-kitty-ascii-art`, `star-wars-ascii-art`
- Returns a mix of classic ASCII, Braille dot art, and kaomoji — pick the best style for the user
- Includes modern meme art and pop culture references
- Great for kaomoji/emoticons too: `https://emojicombos.com/cat-kaomoji`
+- Preserve artist signatures/initials — important etiquette
+- Multiple art pieces per page — pick the best one for the user
+- Works reliably via curl, no JavaScript needed

-### Source B: asciiart.eu (classic archive)
+### Source B: GitHub Octocat API (fun easter egg)

-11,000+ classic ASCII artworks organized by category. More traditional/vintage art.
-
-**Browse by category** (use as URL paths):
- `animals/cats`, `animals/dogs`, `animals/birds`, `animals/horses`
- `animals/dolphins`, `animals/dragons`, `animals/insects`
- `space/rockets`, `space/stars`, `space/planets`
- `vehicles/cars`, `vehicles/ships`, `vehicles/airplanes`
- `food-and-drinks/coffee`, `food-and-drinks/beer`
- `computers/computers`, `electronics/robots`
- `art-and-design/hearts`, `art-and-design/skulls`
- `plants/flowers`, `plants/trees`
- `mythology/dragons`, `mythology/unicorns`
-
-```
-web_extract(urls=["https://www.asciiart.eu/animals/cats"])
-web_extract(urls=["https://www.asciiart.eu/search?q=rocket"])
-```
-
-**Tips:**
- Preserve artist initials/signatures (e.g., `jgs`, `hjw`) — this is important etiquette
- Better for classic/vintage ASCII art style
-
-### Source C: GitHub Octocat API (fun easter egg)
-
-Returns a random GitHub Octocat with a quote. No auth needed.
+Returns a random GitHub Octocat with a wise quote. No auth needed.

 ```bash
 curl -s https://api.github.com/octocat
 ```

-## Tool 7: LLM-Generated Custom Art (Fallback)
+## Tool 8: Fun ASCII Utilities (via curl)
+
+These free services return ASCII art directly — great for fun extras.
+
+### QR Codes as ASCII Art
+
+```bash
+curl -s "qrenco.de/Hello+World"
+curl -s "qrenco.de/https://example.com"
+```
+
+### Weather as ASCII Art
+
+```bash
+curl -s "wttr.in/London"          # Full weather report with ASCII graphics
+curl -s "wttr.in/Moon"            # Moon phase in ASCII art
+curl -s "v2.wttr.in/London"       # Detailed version
+```
+
+## Tool 9: LLM-Generated Custom Art (Fallback)

 When tools above don't have what's needed, generate ASCII art directly using these Unicode characters:

@@ -264,28 +308,14 @@ When tools above don't have what's needed, generate ASCII art directly using the
 - Max height: 15 lines for banners, 25 for scenes
 - Monospace only: output must render correctly in fixed-width fonts

-## Fun Extras
-
-### Star Wars in ASCII (via telnet)
-
-```bash
-telnet towel.blinkenlights.nl
-```
-
-### Useful Resources
-
- [asciiart.eu](https://www.asciiart.eu/) — 11,000+ artworks, searchable
- [patorjk.com/software/taag](http://patorjk.com/software/taag/) — Web-based text-to-ASCII with font preview
- [asciiflow.com](http://asciiflow.com/) — Interactive ASCII diagram editor (browser)
- [awesome-ascii-art](https://github.com/moul/awesome-ascii-art) — Curated resource list
-
 ## Decision Flow

-1. **Text as a banner** → pyfiglet (or toilet for colored output)
+1. **Text as a banner** → pyfiglet if installed, otherwise asciified API via curl
 2. **Wrap a message in fun character art** → cowsay
-3. **Add decorative border/frame** → boxes (can combine with pyfiglet)
-4. **Art of a thing** (cat, rocket, dragon) → emojicombos.com first, then asciiart.eu
-5. **Kaomoji / emoticons** → emojicombos.com (`{term}-kaomoji`)
-6. **Convert an image to ASCII** → ascii-image-converter or jp2a
-7. **Something custom/creative** → LLM generation with Unicode palette
-8. **Any tool not installed** → install it, or fall back to next option
+3. **Add decorative border/frame** → boxes (can combine with pyfiglet/asciified)
+4. **Art of a specific thing** (cat, rocket, dragon) → ascii.co.uk via curl + parsing
+5. **Convert an image to ASCII** → ascii-image-converter or jp2a
+6. **QR code** → qrenco.de via curl
+7. **Weather/moon art** → wttr.in via curl
+8. **Something custom/creative** → LLM generation with Unicode palette
+9. **Any tool not installed** → install it, or fall back to next option
--- a/skills/dogfood/SKILL.md
+++ b/skills/dogfood/SKILL.md
@@ -0,0 +1,162 @@
+---
+name: dogfood
+description: Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports
+version: 1.0.0
+metadata:
+  hermes:
+    tags: [qa, testing, browser, web, dogfood]
+    related_skills: []
+---
+
+# Dogfood: Systematic Web Application QA Testing
+
+## Overview
+
+This skill guides you through systematic exploratory QA testing of web applications using the browser toolset. You will navigate the application, interact with elements, capture evidence of issues, and produce a structured bug report.
+
+## Prerequisites
+
+- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`, `browser_close`)
+- A target URL and testing scope from the user
+
+## Inputs
+
+The user provides:
+1. **Target URL** — the entry point for testing
+2. **Scope** — what areas/features to focus on (or "full site" for comprehensive testing)
+3. **Output directory** (optional) — where to save screenshots and the report (default: `./dogfood-output`)
+
+## Workflow
+
+Follow this 5-phase systematic workflow:
+
+### Phase 1: Plan
+
+1. Create the output directory structure:
+   ```
+   {output_dir}/
+   ├── screenshots/       # Evidence screenshots
+   └── report.md          # Final report (generated in Phase 5)
+   ```
+2. Identify the testing scope based on user input.
+3. Build a rough sitemap by planning which pages and features to test:
+   - Landing/home page
+   - Navigation links (header, footer, sidebar)
+   - Key user flows (sign up, login, search, checkout, etc.)
+   - Forms and interactive elements
+   - Edge cases (empty states, error pages, 404s)
+
+### Phase 2: Explore
+
+For each page or feature in your plan:
+
+1. **Navigate** to the page:
+   ```
+   browser_navigate(url="https://example.com/page")
+   ```
+
+2. **Take a snapshot** to understand the DOM structure:
+   ```
+   browser_snapshot()
+   ```
+
+3. **Check the console** for JavaScript errors:
+   ```
+   browser_console(clear=true)
+   ```
+   Do this after every navigation and after every significant interaction. Silent JS errors are high-value findings.
+
+4. **Take an annotated screenshot** to visually assess the page and identify interactive elements:
+   ```
+   browser_vision(question="Describe the page layout, identify any visual issues, broken elements, or accessibility concerns", annotate=true)
+   ```
+   The `annotate=true` flag overlays numbered `[N]` labels on interactive elements. Each `[N]` maps to ref `@eN` for subsequent browser commands.
+
+5. **Test interactive elements** systematically:
+   - Click buttons and links: `browser_click(ref="@eN")`
+   - Fill forms: `browser_type(ref="@eN", text="test input")`
+   - Test keyboard navigation: `browser_press(key="Tab")`, `browser_press(key="Enter")`
+   - Scroll through content: `browser_scroll(direction="down")`
+   - Test form validation with invalid inputs
+   - Test empty submissions
+
+6. **After each interaction**, check for:
+   - Console errors: `browser_console()`
+   - Visual changes: `browser_vision(question="What changed after the interaction?")`
+   - Expected vs actual behavior
+
+### Phase 3: Collect Evidence
+
+For every issue found:
+
+1. **Take a screenshot** showing the issue:
+   ```
+   browser_vision(question="Capture and describe the issue visible on this page", annotate=false)
+   ```
+   Save the `screenshot_path` from the response — you will reference it in the report.
+
+2. **Record the details**:
+   - URL where the issue occurs
+   - Steps to reproduce
+   - Expected behavior
+   - Actual behavior
+   - Console errors (if any)
+   - Screenshot path
+
+3. **Classify the issue** using the issue taxonomy (see `references/issue-taxonomy.md`):
+   - Severity: Critical / High / Medium / Low
+   - Category: Functional / Visual / Accessibility / Console / UX / Content
+
+### Phase 4: Categorize
+
+1. Review all collected issues.
+2. De-duplicate — merge issues that are the same bug manifesting in different places.
+3. Assign final severity and category to each issue.
+4. Sort by severity (Critical first, then High, Medium, Low).
+5. Count issues by severity and category for the executive summary.
+
+### Phase 5: Report
+
+Generate the final report using the template at `templates/dogfood-report-template.md`.
+
+The report must include:
+1. **Executive summary** with total issue count, breakdown by severity, and testing scope
+2. **Per-issue sections** with:
+   - Issue number and title
+   - Severity and category badges
+   - URL where observed
+   - Description of the issue
+   - Steps to reproduce
+   - Expected vs actual behavior
+   - Screenshot references (use `MEDIA:<screenshot_path>` for inline images)
+   - Console errors if relevant
+3. **Summary table** of all issues
+4. **Testing notes** — what was tested, what was not, any blockers
+
+Save the report to `{output_dir}/report.md`.
+
+## Tools Reference
+
+| Tool | Purpose |
+|------|---------|
+| `browser_navigate` | Go to a URL |
+| `browser_snapshot` | Get DOM text snapshot (accessibility tree) |
+| `browser_click` | Click an element by ref (`@eN`) or text |
+| `browser_type` | Type into an input field |
+| `browser_scroll` | Scroll up/down on the page |
+| `browser_back` | Go back in browser history |
+| `browser_press` | Press a keyboard key |
+| `browser_vision` | Screenshot + AI analysis; use `annotate=true` for element labels |
+| `browser_console` | Get JS console output and errors |
+| `browser_close` | Close the browser session |
+
+## Tips
+
+- **Always check `browser_console()` after navigating and after significant interactions.** Silent JS errors are among the most valuable findings.
+- **Use `annotate=true` with `browser_vision`** when you need to reason about interactive element positions or when the snapshot refs are unclear.
+- **Test with both valid and invalid inputs** — form validation bugs are common.
+- **Scroll through long pages** — content below the fold may have rendering issues.
+- **Test navigation flows** — click through multi-step processes end-to-end.
+- **Check responsive behavior** by noting any layout issues visible in screenshots.
+- **Don't forget edge cases**: empty states, very long text, special characters, rapid clicking.
+- When reporting screenshots to the user, include `MEDIA:<screenshot_path>` so they can see the evidence inline.
--- a/skills/dogfood/references/issue-taxonomy.md
+++ b/skills/dogfood/references/issue-taxonomy.md
@@ -0,0 +1,109 @@
+# Issue Taxonomy
+
+Use this taxonomy to classify issues found during dogfood QA testing.
+
+## Severity Levels
+
+### Critical
+The issue makes a core feature completely unusable or causes data loss.
+
+**Examples:**
+- Application crashes or shows a blank white page
+- Form submission silently loses user data
+- Authentication is completely broken (can't log in at all)
+- Payment flow fails and charges the user without completing the order
+- Security vulnerability (e.g., XSS, exposed credentials in console)
+
+### High
+The issue significantly impairs functionality but a workaround may exist.
+
+**Examples:**
+- A key button does nothing when clicked (but refreshing fixes it)
+- Search returns no results for valid queries
+- Form validation rejects valid input
+- Page loads but critical content is missing or garbled
+- Navigation link leads to a 404 or wrong page
+- Uncaught JavaScript exceptions in the console on core pages
+
+### Medium
+The issue is noticeable and affects user experience but doesn't block core functionality.
+
+**Examples:**
+- Layout is misaligned or overlapping on certain screen sections
+- Images fail to load (broken image icons)
+- Slow performance (visible loading delays > 3 seconds)
+- Form field lacks proper validation feedback (no error message on bad input)
+- Console warnings that suggest deprecated or misconfigured features
+- Inconsistent styling between similar pages
+
+### Low
+Minor polish issues that don't affect functionality.
+
+**Examples:**
+- Typos or grammatical errors in text content
+- Minor spacing or alignment inconsistencies
+- Placeholder text left in production ("Lorem ipsum")
+- Favicon missing
+- Console info/debug messages that shouldn't be in production
+- Subtle color contrast issues that don't fail WCAG requirements
+
+## Categories
+
+### Functional
+Issues where features don't work as expected.
+
+- Buttons/links that don't respond
+- Forms that don't submit or submit incorrectly
+- Broken user flows (can't complete a multi-step process)
+- Incorrect data displayed
+- Features that work partially
+
+### Visual
+Issues with the visual presentation of the page.
+
+- Layout problems (overlapping elements, broken grids)
+- Broken images or missing media
+- Styling inconsistencies
+- Responsive design failures
+- Z-index issues (elements hidden behind others)
+- Text overflow or truncation
+
+### Accessibility
+Issues that prevent or hinder access for users with disabilities.
+
+- Missing alt text on meaningful images
+- Poor color contrast (fails WCAG AA)
+- Elements not reachable via keyboard navigation
+- Missing form labels or ARIA attributes
+- Focus indicators missing or unclear
+- Screen reader incompatible content
+
+### Console
+Issues detected through JavaScript console output.
+
+- Uncaught exceptions and unhandled promise rejections
+- Failed network requests (4xx, 5xx errors in console)
+- Deprecation warnings
+- CORS errors
+- Mixed content warnings (HTTP resources on HTTPS page)
+- Excessive console.log output left from development
+
+### UX (User Experience)
+Issues where functionality works but the experience is poor.
+
+- Confusing navigation or information architecture
+- Missing loading indicators (user doesn't know something is happening)
+- No feedback after user actions (e.g., button click with no visible result)
+- Inconsistent interaction patterns
+- Missing confirmation dialogs for destructive actions
+- Poor error messages that don't help the user recover
+
+### Content
+Issues with the text, media, or information on the page.
+
+- Typos and grammatical errors
+- Placeholder/dummy content in production
+- Outdated information
+- Missing content (empty sections)
+- Broken or dead links to external resources
+- Incorrect or misleading labels
--- a/skills/dogfood/templates/dogfood-report-template.md
+++ b/skills/dogfood/templates/dogfood-report-template.md
@@ -0,0 +1,86 @@
+# Dogfood QA Report
+
+**Target:** {target_url}
+**Date:** {date}
+**Scope:** {scope_description}
+**Tester:** Hermes Agent (automated exploratory QA)
+
+---
+
+## Executive Summary
+
+| Severity | Count |
+|----------|-------|
+| 🔴 Critical | {critical_count} |
+| 🟠 High | {high_count} |
+| 🟡 Medium | {medium_count} |
+| 🔵 Low | {low_count} |
+| **Total** | **{total_count}** |
+
+**Overall Assessment:** {one_sentence_assessment}
+
+---
+
+## Issues
+
+<!-- Repeat this section for each issue found, sorted by severity (Critical first) -->
+
+### Issue #{issue_number}: {issue_title}
+
+| Field | Value |
+|-------|-------|
+| **Severity** | {severity} |
+| **Category** | {category} |
+| **URL** | {url_where_found} |
+
+**Description:**
+{detailed_description_of_the_issue}
+
+**Steps to Reproduce:**
+1. {step_1}
+2. {step_2}
+3. {step_3}
+
+**Expected Behavior:**
+{what_should_happen}
+
+**Actual Behavior:**
+{what_actually_happens}
+
+**Screenshot:**
+MEDIA:{screenshot_path}
+
+**Console Errors** (if applicable):
+```
+{console_error_output}
+```
+
+---
+
+<!-- End of per-issue section -->
+
+## Issues Summary Table
+
+| # | Title | Severity | Category | URL |
+|---|-------|----------|----------|-----|
+| {n} | {title} | {severity} | {category} | {url} |
+
+## Testing Coverage
+
+### Pages Tested
+- {list_of_pages_visited}
+
+### Features Tested
+- {list_of_features_exercised}
+
+### Not Tested / Out of Scope
+- {areas_not_covered_and_why}
+
+### Blockers
+- {any_issues_that_prevented_testing_certain_areas}
+
+---
+
+## Notes
+
+{any_additional_observations_or_recommendations}
--- a/skills/market-data/polymarket/SKILL.md
+++ b/skills/market-data/polymarket/SKILL.md
@@ -0,0 +1,76 @@
+---
+name: polymarket
+description: Query Polymarket prediction market data — search markets, get prices, orderbooks, and price history. Read-only via public REST APIs, no API key needed.
+version: 1.0.0
+author: Hermes Agent + Teknium
+tags: [polymarket, prediction-markets, market-data, trading]
+---
+
+# Polymarket — Prediction Market Data
+
+Query prediction market data from Polymarket using their public REST APIs.
+All endpoints are read-only and require zero authentication.
+
+See `references/api-endpoints.md` for the full endpoint reference with curl examples.
+
+## When to Use
+
+- User asks about prediction markets, betting odds, or event probabilities
+- User wants to know "what are the odds of X happening?"
+- User asks about Polymarket specifically
+- User wants market prices, orderbook data, or price history
+- User asks to monitor or track prediction market movements
+
+## Key Concepts
+
+- **Events** contain one or more **Markets** (1:many relationship)
+- **Markets** are binary outcomes with Yes/No prices between 0.00 and 1.00
+- Prices ARE probabilities: price 0.65 means the market thinks 65% likely
+- `outcomePrices` field: JSON-encoded array like `["0.80", "0.20"]`
+- `clobTokenIds` field: JSON-encoded array of two token IDs [Yes, No] for price/book queries
+- `conditionId` field: hex string used for price history queries
+- Volume is in USDC (US dollars)
+
+## Three Public APIs
+
+1. **Gamma API** at `gamma-api.polymarket.com` — Discovery, search, browsing
+2. **CLOB API** at `clob.polymarket.com` — Real-time prices, orderbooks, history
+3. **Data API** at `data-api.polymarket.com` — Trades, open interest
+
+## Typical Workflow
+
+When a user asks about prediction market odds:
+
+1. **Search** using the Gamma API public-search endpoint with their query
+2. **Parse** the response — extract events and their nested markets
+3. **Present** market question, current prices as percentages, and volume
+4. **Deep dive** if asked — use clobTokenIds for orderbook, conditionId for history
+
+## Presenting Results
+
+Format prices as percentages for readability:
+- outcomePrices `["0.652", "0.348"]` becomes "Yes: 65.2%, No: 34.8%"
+- Always show the market question and probability
+- Include volume when available
+
+Example: `"Will X happen?" — 65.2% Yes ($1.2M volume)`
+
+## Parsing Double-Encoded Fields
+
+The Gamma API returns `outcomePrices`, `outcomes`, and `clobTokenIds` as JSON strings
+inside JSON responses (double-encoded). When processing with Python, parse them with
+`json.loads(market['outcomePrices'])` to get the actual array.
+
+## Rate Limits
+
+Generous — unlikely to hit for normal usage:
+- Gamma: 4,000 requests per 10 seconds (general)
+- CLOB: 9,000 requests per 10 seconds (general)
+- Data: 1,000 requests per 10 seconds (general)
+
+## Limitations
+
+- This skill is read-only — it does not support placing trades
+- Trading requires wallet-based crypto authentication (EIP-712 signatures)
+- Some new markets may have empty price history
+- Geographic restrictions apply to trading but read-only data is globally accessible
--- a/skills/market-data/polymarket/references/api-endpoints.md
+++ b/skills/market-data/polymarket/references/api-endpoints.md
@@ -0,0 +1,220 @@
+# Polymarket API Endpoints Reference
+
+All endpoints are public REST (GET), return JSON, and need no authentication.
+
+## Gamma API — gamma-api.polymarket.com
+
+### Search Markets
+
+```
+GET /public-search?q=QUERY
+```
+
+Response structure:
+```json
+{
+  "events": [
+    {
+      "id": "12345",
+      "title": "Event title",
+      "slug": "event-slug",
+      "volume": 1234567.89,
+      "markets": [
+        {
+          "question": "Will X happen?",
+          "outcomePrices": "[\"0.65\", \"0.35\"]",
+          "outcomes": "[\"Yes\", \"No\"]",
+          "clobTokenIds": "[\"TOKEN_YES\", \"TOKEN_NO\"]",
+          "conditionId": "0xabc...",
+          "volume": 500000
+        }
+      ]
+    }
+  ],
+  "pagination": {"hasMore": true, "totalResults": 100}
+}
+```
+
+### List Events
+
+```
+GET /events?limit=N&active=true&closed=false&order=volume&ascending=false
+```
+
+Parameters:
+- `limit` — max results (default varies)
+- `offset` — pagination offset
+- `active` — true/false
+- `closed` — true/false
+- `order` — sort field: `volume`, `createdAt`, `updatedAt`
+- `ascending` — true/false
+- `tag` — filter by tag slug
+- `slug` — get specific event by slug
+
+Response: array of event objects. Each event includes a `markets` array.
+
+Event fields: `id`, `title`, `slug`, `description`, `volume`, `liquidity`,
+`openInterest`, `active`, `closed`, `category`, `startDate`, `endDate`,
+`markets` (array of market objects).
+
+### List Markets
+
+```
+GET /markets?limit=N&active=true&closed=false&order=volume&ascending=false
+```
+
+Same filter parameters as events, plus:
+- `slug` — get specific market by slug
+
+Market fields: `id`, `question`, `conditionId`, `slug`, `description`,
+`outcomes`, `outcomePrices`, `volume`, `liquidity`, `active`, `closed`,
+`marketType`, `clobTokenIds`, `endDate`, `category`, `createdAt`.
+
+Important: `outcomePrices`, `outcomes`, and `clobTokenIds` are JSON strings
+(double-encoded). Parse with json.loads() in Python.
+
+### List Tags
+
+```
+GET /tags
+```
+
+Returns array of tag objects: `id`, `label`, `slug`.
+Use the `slug` value when filtering events/markets by tag.
+
+---
+
+## CLOB API — clob.polymarket.com
+
+All CLOB price endpoints use `token_id` from the market's `clobTokenIds` field.
+Index 0 = Yes outcome, Index 1 = No outcome.
+
+### Current Price
+
+```
+GET /price?token_id=TOKEN_ID&side=buy
+```
+
+Response: `{"price": "0.650"}`
+
+The `side` parameter: `buy` or `sell`.
+
+### Midpoint Price
+
+```
+GET /midpoint?token_id=TOKEN_ID
+```
+
+Response: `{"mid": "0.645"}`
+
+### Spread
+
+```
+GET /spread?token_id=TOKEN_ID
+```
+
+Response: `{"spread": "0.02"}`
+
+### Orderbook
+
+```
+GET /book?token_id=TOKEN_ID
+```
+
+Response:
+```json
+{
+  "market": "condition_id",
+  "asset_id": "token_id",
+  "bids": [{"price": "0.64", "size": "500"}, ...],
+  "asks": [{"price": "0.66", "size": "300"}, ...],
+  "min_order_size": "5",
+  "tick_size": "0.01",
+  "last_trade_price": "0.65"
+}
+```
+
+Bids and asks are sorted by price. Size is in shares (USDC-denominated).
+
+### Price History
+
+```
+GET /prices-history?market=CONDITION_ID&interval=INTERVAL&fidelity=N
+```
+
+Parameters:
+- `market` — the conditionId (hex string with 0x prefix)
+- `interval` — time range: `all`, `1d`, `1w`, `1m`, `3m`, `6m`, `1y`
+- `fidelity` — number of data points to return
+
+Response:
+```json
+{
+  "history": [
+    {"t": 1709000000, "p": "0.55"},
+    {"t": 1709100000, "p": "0.58"}
+  ]
+}
+```
+
+`t` is Unix timestamp, `p` is price (probability).
+
+Note: Very new markets may return empty history.
+
+### CLOB Markets List
+
+```
+GET /markets?limit=N
+```
+
+Response:
+```json
+{
+  "data": [
+    {
+      "condition_id": "0xabc...",
+      "question": "Will X?",
+      "tokens": [
+        {"token_id": "123...", "outcome": "Yes", "price": 0.65},
+        {"token_id": "456...", "outcome": "No", "price": 0.35}
+      ],
+      "active": true,
+      "closed": false
+    }
+  ],
+  "next_cursor": "cursor_string",
+  "limit": 100,
+  "count": 1000
+}
+```
+
+---
+
+## Data API — data-api.polymarket.com
+
+### Recent Trades
+
+```
+GET /trades?limit=N
+GET /trades?market=CONDITION_ID&limit=N
+```
+
+Trade fields: `side` (BUY/SELL), `size`, `price`, `timestamp`,
+`title`, `slug`, `outcome`, `transactionHash`, `conditionId`.
+
+### Open Interest
+
+```
+GET /oi?market=CONDITION_ID
+```
+
+---
+
+## Field Cross-Reference
+
+To go from a Gamma market to CLOB data:
+
+1. Get market from Gamma: has `clobTokenIds` and `conditionId`
+2. Parse `clobTokenIds` (JSON string): `["YES_TOKEN", "NO_TOKEN"]`
+3. Use YES_TOKEN with `/price`, `/book`, `/midpoint`, `/spread`
+4. Use `conditionId` with `/prices-history` and Data API endpoints
--- a/skills/market-data/polymarket/scripts/polymarket.py
+++ b/skills/market-data/polymarket/scripts/polymarket.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python3
+"""Polymarket CLI helper — query prediction market data.
+
+Usage:
+    python3 polymarket.py search "bitcoin"
+    python3 polymarket.py trending [--limit 10]
+    python3 polymarket.py market <slug>
+    python3 polymarket.py event <slug>
+    python3 polymarket.py price <token_id>
+    python3 polymarket.py book <token_id>
+    python3 polymarket.py history <condition_id> [--interval all] [--fidelity 50]
+    python3 polymarket.py trades [--limit 10] [--market CONDITION_ID]
+"""
+
+import json
+import sys
+import urllib.request
+import urllib.parse
+import urllib.error
+
+GAMMA = "https://gamma-api.polymarket.com"
+CLOB = "https://clob.polymarket.com"
+DATA = "https://data-api.polymarket.com"
+
+
+def _get(url: str) -> dict | list:
+    """GET request, return parsed JSON."""
+    req = urllib.request.Request(url, headers={"User-Agent": "hermes-agent/1.0"})
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read().decode())
+    except urllib.error.HTTPError as e:
+        print(f"HTTP {e.code}: {e.reason}", file=sys.stderr)
+        sys.exit(1)
+    except urllib.error.URLError as e:
+        print(f"Connection error: {e.reason}", file=sys.stderr)
+        sys.exit(1)
+
+
+def _parse_json_field(val):
+    """Parse double-encoded JSON fields (outcomePrices, outcomes, clobTokenIds)."""
+    if isinstance(val, str):
+        try:
+            return json.loads(val)
+        except (json.JSONDecodeError, TypeError):
+            return val
+    return val
+
+
+def _fmt_pct(price_str: str) -> str:
+    """Format price string as percentage."""
+    try:
+        return f"{float(price_str) * 100:.1f}%"
+    except (ValueError, TypeError):
+        return price_str
+
+
+def _fmt_volume(vol) -> str:
+    """Format volume as human-readable."""
+    try:
+        v = float(vol)
+        if v >= 1_000_000:
+            return f"${v / 1_000_000:.1f}M"
+        if v >= 1_000:
+            return f"${v / 1_000:.1f}K"
+        return f"${v:.0f}"
+    except (ValueError, TypeError):
+        return str(vol)
+
+
+def _print_market(m: dict, indent: str = ""):
+    """Print a market summary."""
+    question = m.get("question", "?")
+    prices = _parse_json_field(m.get("outcomePrices", "[]"))
+    outcomes = _parse_json_field(m.get("outcomes", "[]"))
+    vol = _fmt_volume(m.get("volume", 0))
+    closed = m.get("closed", False)
+    status = " [CLOSED]" if closed else ""
+
+    if isinstance(prices, list) and len(prices) >= 2:
+        outcome_labels = outcomes if isinstance(outcomes, list) else ["Yes", "No"]
+        price_str = " / ".join(
+            f"{outcome_labels[i]}: {_fmt_pct(prices[i])}"
+            for i in range(min(len(prices), len(outcome_labels)))
+        )
+        print(f"{indent}{question}{status}")
+        print(f"{indent}  {price_str}  |  Volume: {vol}")
+    else:
+        print(f"{indent}{question}{status}  |  Volume: {vol}")
+
+    slug = m.get("slug", "")
+    if slug:
+        print(f"{indent}  slug: {slug}")
+
+
+def cmd_search(query: str):
+    """Search for markets."""
+    q = urllib.parse.quote(query)
+    data = _get(f"{GAMMA}/public-search?q={q}")
+    events = data.get("events", [])
+    total = data.get("pagination", {}).get("totalResults", len(events))
+    print(f"Found {total} results for \"{query}\":\n")
+    for evt in events[:10]:
+        print(f"=== {evt['title']} ===")
+        print(f"  Volume: {_fmt_volume(evt.get('volume', 0))}  |  slug: {evt.get('slug', '')}")
+        markets = evt.get("markets", [])
+        for m in markets[:5]:
+            _print_market(m, indent="  ")
+        if len(markets) > 5:
+            print(f"  ... and {len(markets) - 5} more markets")
+        print()
+
+
+def cmd_trending(limit: int = 10):
+    """Show trending events by volume."""
+    events = _get(f"{GAMMA}/events?limit={limit}&active=true&closed=false&order=volume&ascending=false")
+    print(f"Top {len(events)} trending events:\n")
+    for i, evt in enumerate(events, 1):
+        print(f"{i}. {evt['title']}")
+        print(f"   Volume: {_fmt_volume(evt.get('volume', 0))}  |  Markets: {len(evt.get('markets', []))}")
+        print(f"   slug: {evt.get('slug', '')}")
+        markets = evt.get("markets", [])
+        for m in markets[:3]:
+            _print_market(m, indent="   ")
+        if len(markets) > 3:
+            print(f"   ... and {len(markets) - 3} more markets")
+        print()
+
+
+def cmd_market(slug: str):
+    """Get market details by slug."""
+    markets = _get(f"{GAMMA}/markets?slug={urllib.parse.quote(slug)}")
+    if not markets:
+        print(f"No market found with slug: {slug}")
+        return
+    m = markets[0]
+    print(f"Market: {m.get('question', '?')}")
+    print(f"Status: {'CLOSED' if m.get('closed') else 'ACTIVE'}")
+    _print_market(m)
+    print(f"\n  conditionId: {m.get('conditionId', 'N/A')}")
+    tokens = _parse_json_field(m.get("clobTokenIds", "[]"))
+    if isinstance(tokens, list):
+        outcomes = _parse_json_field(m.get("outcomes", "[]"))
+        for i, t in enumerate(tokens):
+            label = outcomes[i] if isinstance(outcomes, list) and i < len(outcomes) else f"Outcome {i}"
+            print(f"  token ({label}): {t}")
+    desc = m.get("description", "")
+    if desc:
+        print(f"\n  Description: {desc[:500]}")
+
+
+def cmd_event(slug: str):
+    """Get event details by slug."""
+    events = _get(f"{GAMMA}/events?slug={urllib.parse.quote(slug)}")
+    if not events:
+        print(f"No event found with slug: {slug}")
+        return
+    evt = events[0]
+    print(f"Event: {evt['title']}")
+    print(f"Volume: {_fmt_volume(evt.get('volume', 0))}")
+    print(f"Status: {'CLOSED' if evt.get('closed') else 'ACTIVE'}")
+    print(f"Markets: {len(evt.get('markets', []))}\n")
+    for m in evt.get("markets", []):
+        _print_market(m, indent="  ")
+        print()
+
+
+def cmd_price(token_id: str):
+    """Get current price for a token."""
+    buy = _get(f"{CLOB}/price?token_id={token_id}&side=buy")
+    mid = _get(f"{CLOB}/midpoint?token_id={token_id}")
+    spread = _get(f"{CLOB}/spread?token_id={token_id}")
+    print(f"Token: {token_id[:30]}...")
+    print(f"  Buy price: {_fmt_pct(buy.get('price', '?'))}")
+    print(f"  Midpoint:  {_fmt_pct(mid.get('mid', '?'))}")
+    print(f"  Spread:    {spread.get('spread', '?')}")
+
+
+def cmd_book(token_id: str):
+    """Get orderbook for a token."""
+    book = _get(f"{CLOB}/book?token_id={token_id}")
+    bids = book.get("bids", [])
+    asks = book.get("asks", [])
+    last = book.get("last_trade_price", "?")
+    print(f"Orderbook for {token_id[:30]}...")
+    print(f"Last trade: {_fmt_pct(last)}  |  Tick size: {book.get('tick_size', '?')}")
+    print(f"\n  Top bids ({len(bids)} total):")
+    # Show bids sorted by price descending (best bids first)
+    sorted_bids = sorted(bids, key=lambda x: float(x.get("price", 0)), reverse=True)
+    for b in sorted_bids[:10]:
+        print(f"    {_fmt_pct(b['price']):>7}  |  Size: {float(b['size']):>10.2f}")
+    print(f"\n  Top asks ({len(asks)} total):")
+    sorted_asks = sorted(asks, key=lambda x: float(x.get("price", 0)))
+    for a in sorted_asks[:10]:
+        print(f"    {_fmt_pct(a['price']):>7}  |  Size: {float(a['size']):>10.2f}")
+
+
+def cmd_history(condition_id: str, interval: str = "all", fidelity: int = 50):
+    """Get price history for a market."""
+    data = _get(f"{CLOB}/prices-history?market={condition_id}&interval={interval}&fidelity={fidelity}")
+    history = data.get("history", [])
+    if not history:
+        print("No price history available for this market.")
+        return
+    print(f"Price history ({len(history)} points, interval={interval}):\n")
+    from datetime import datetime, timezone
+    for pt in history:
+        ts = datetime.fromtimestamp(pt["t"], tz=timezone.utc).strftime("%Y-%m-%d %H:%M")
+        price = _fmt_pct(pt["p"])
+        bar = "█" * int(float(pt["p"]) * 40)
+        print(f"  {ts}  {price:>7}  {bar}")
+
+
+def cmd_trades(limit: int = 10, market: str = None):
+    """Get recent trades."""
+    url = f"{DATA}/trades?limit={limit}"
+    if market:
+        url += f"&market={market}"
+    trades = _get(url)
+    if not isinstance(trades, list):
+        print(f"Unexpected response: {trades}")
+        return
+    print(f"Recent trades ({len(trades)}):\n")
+    for t in trades:
+        side = t.get("side", "?")
+        price = _fmt_pct(t.get("price", "?"))
+        size = t.get("size", "?")
+        outcome = t.get("outcome", "?")
+        title = t.get("title", "?")[:50]
+        ts = t.get("timestamp", "")
+        print(f"  {side:4}  {price:>7}  x{float(size):>8.2f}  [{outcome}]  {title}")
+
+
+def main():
+    args = sys.argv[1:]
+    if not args or args[0] in ("-h", "--help", "help"):
+        print(__doc__)
+        return
+
+    cmd = args[0]
+
+    if cmd == "search" and len(args) >= 2:
+        cmd_search(" ".join(args[1:]))
+    elif cmd == "trending":
+        limit = 10
+        if "--limit" in args:
+            idx = args.index("--limit")
+            limit = int(args[idx + 1]) if idx + 1 < len(args) else 10
+        cmd_trending(limit)
+    elif cmd == "market" and len(args) >= 2:
+        cmd_market(args[1])
+    elif cmd == "event" and len(args) >= 2:
+        cmd_event(args[1])
+    elif cmd == "price" and len(args) >= 2:
+        cmd_price(args[1])
+    elif cmd == "book" and len(args) >= 2:
+        cmd_book(args[1])
+    elif cmd == "history" and len(args) >= 2:
+        interval = "all"
+        fidelity = 50
+        if "--interval" in args:
+            idx = args.index("--interval")
+            interval = args[idx + 1] if idx + 1 < len(args) else "all"
+        if "--fidelity" in args:
+            idx = args.index("--fidelity")
+            fidelity = int(args[idx + 1]) if idx + 1 < len(args) else 50
+        cmd_history(args[1], interval, fidelity)
+    elif cmd == "trades":
+        limit = 10
+        market = None
+        if "--limit" in args:
+            idx = args.index("--limit")
+            limit = int(args[idx + 1]) if idx + 1 < len(args) else 10
+        if "--market" in args:
+            idx = args.index("--market")
+            market = args[idx + 1] if idx + 1 < len(args) else None
+        cmd_trades(limit, market)
+    else:
+        print(f"Unknown command: {cmd}")
+        print(__doc__)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -1,4 +1,4 @@
-"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
+"""Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""

 import json
 import os
@@ -12,6 +12,9 @@ from agent.auxiliary_client import (
    get_vision_auxiliary_client,
    auxiliary_max_tokens_param,
    _read_codex_access_token,
+    _get_auxiliary_provider,
+    _resolve_forced_provider,
+    _resolve_auto,
 )


@@ -21,6 +24,10 @@ def _clean_env(monkeypatch):
    for key in (
        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
+        # Per-task provider/model overrides
+        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
+        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
+        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
    ):
        monkeypatch.delenv(key, raising=False)

@@ -151,15 +158,230 @@ class TestGetTextAuxiliaryClient:
        assert model is None


-class TestCodexNotInVisionClient:
-    """Codex fallback should NOT apply to vision tasks."""
+class TestVisionClientFallback:
+    """Vision client auto mode only tries OpenRouter + Nous (multimodal-capable)."""

-    def test_vision_returns_none_without_openrouter_nous(self):
+    def test_vision_returns_none_without_any_credentials(self):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
            client, model = get_vision_auxiliary_client()
        assert client is None
        assert model is None

+    def test_vision_auto_includes_codex(self, codex_auth_dir):
+        """Codex supports vision (gpt-5.3-codex), so auto mode should use it."""
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_vision_auxiliary_client()
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-5.3-codex"
+
+    def test_vision_auto_skips_custom_endpoint(self, monkeypatch):
+        """Custom endpoint is skipped in vision auto mode."""
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+    def test_vision_uses_openrouter_when_available(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_vision_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        assert client is not None
+
+    def test_vision_uses_nous_when_available(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI"):
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = get_vision_auxiliary_client()
+        assert model == "gemini-3-flash"
+        assert client is not None
+
+    def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch):
+        """When explicitly forced to 'main', vision CAN use custom endpoint."""
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_vision_auxiliary_client()
+        assert client is not None
+        assert model == "gpt-4o-mini"
+
+    def test_vision_forced_main_returns_none_without_creds(self, monkeypatch):
+        """Forced main with no credentials still returns None."""
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+    def test_vision_forced_codex(self, monkeypatch, codex_auth_dir):
+        """When forced to 'codex', vision uses Codex OAuth."""
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "codex")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_vision_auxiliary_client()
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-5.3-codex"
+
+
+class TestGetAuxiliaryProvider:
+    """Tests for _get_auxiliary_provider env var resolution."""
+
+    def test_no_task_returns_auto(self):
+        assert _get_auxiliary_provider() == "auto"
+        assert _get_auxiliary_provider("") == "auto"
+
+    def test_auxiliary_prefix_takes_priority(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "openrouter")
+        assert _get_auxiliary_provider("vision") == "openrouter"
+
+    def test_context_prefix_fallback(self, monkeypatch):
+        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
+        assert _get_auxiliary_provider("compression") == "nous"
+
+    def test_auxiliary_prefix_over_context_prefix(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_COMPRESSION_PROVIDER", "openrouter")
+        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
+        assert _get_auxiliary_provider("compression") == "openrouter"
+
+    def test_auto_value_treated_as_auto(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "auto")
+        assert _get_auxiliary_provider("vision") == "auto"
+
+    def test_whitespace_stripped(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "  openrouter  ")
+        assert _get_auxiliary_provider("vision") == "openrouter"
+
+    def test_case_insensitive(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "OpenRouter")
+        assert _get_auxiliary_provider("vision") == "openrouter"
+
+    def test_main_provider(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "main")
+        assert _get_auxiliary_provider("web_extract") == "main"
+
+
+class TestResolveForcedProvider:
+    """Tests for _resolve_forced_provider with explicit provider selection."""
+
+    def test_forced_openrouter(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = _resolve_forced_provider("openrouter")
+        assert model == "google/gemini-3-flash-preview"
+        assert client is not None
+
+    def test_forced_openrouter_no_key(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = _resolve_forced_provider("openrouter")
+        assert client is None
+        assert model is None
+
+    def test_forced_nous(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI"):
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = _resolve_forced_provider("nous")
+        assert model == "gemini-3-flash"
+        assert client is not None
+
+    def test_forced_nous_not_configured(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = _resolve_forced_provider("nous")
+        assert client is None
+        assert model is None
+
+    def test_forced_main_uses_custom(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = _resolve_forced_provider("main")
+        assert model == "gpt-4o-mini"
+
+    def test_forced_main_skips_openrouter_nous(self, monkeypatch):
+        """Even if OpenRouter key is set, 'main' skips it."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = _resolve_forced_provider("main")
+        # Should use custom endpoint, not OpenRouter
+        assert model == "gpt-4o-mini"
+
+    def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = _resolve_forced_provider("main")
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-5.3-codex"
+
+    def test_forced_codex(self, codex_auth_dir, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = _resolve_forced_provider("codex")
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-5.3-codex"
+
+    def test_forced_codex_no_token(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = _resolve_forced_provider("codex")
+        assert client is None
+        assert model is None
+
+    def test_forced_unknown_returns_none(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = _resolve_forced_provider("invalid-provider")
+        assert client is None
+        assert model is None
+
+
+class TestTaskSpecificOverrides:
+    """Integration tests for per-task provider routing via get_text_auxiliary_client(task=...)."""
+
+    def test_text_with_vision_provider_override(self, monkeypatch):
+        """AUXILIARY_VISION_PROVIDER should not affect text tasks."""
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "nous")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client()  # no task → auto
+        assert model == "google/gemini-3-flash-preview"  # OpenRouter, not Nous
+
+    def test_compression_task_reads_context_prefix(self, monkeypatch):
+        """Compression task should check CONTEXT_COMPRESSION_PROVIDER."""
+        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")  # would win in auto
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI"):
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = get_text_auxiliary_client("compression")
+        assert model == "gemini-3-flash"  # forced to Nous, not OpenRouter
+
+    def test_web_extract_task_override(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "openrouter")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client("web_extract")
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_task_without_override_uses_auto(self, monkeypatch):
+        """A task with no provider env var falls through to auto chain."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client("compression")
+        assert model == "google/gemini-3-flash-preview"  # auto → OpenRouter
+

 class TestAuxiliaryMaxTokensParam:
    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -176,3 +176,147 @@ class TestCompressWithClient:
        contents = [m.get("content", "") for m in result]
        assert any("CONTEXT SUMMARY" in c for c in contents)
        assert len(result) < len(msgs)
+
+    def test_summarization_does_not_split_tool_call_pairs(self):
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
+        mock_client.chat.completions.create.return_value = mock_response
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
+             patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
+            c = ContextCompressor(
+                model="test",
+                quiet_mode=True,
+                protect_first_n=3,
+                protect_last_n=4,
+            )
+
+        msgs = [
+            {"role": "user", "content": "Could you address the reviewer comments in PR#71"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "call_a", "type": "function", "function": {"name": "skill_view", "arguments": "{}"}},
+                    {"id": "call_b", "type": "function", "function": {"name": "skill_view", "arguments": "{}"}},
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_a", "content": "output a"},
+            {"role": "tool", "tool_call_id": "call_b", "content": "output b"},
+            {"role": "user", "content": "later 1"},
+            {"role": "assistant", "content": "later 2"},
+            {"role": "tool", "tool_call_id": "call_x", "content": "later output"},
+            {"role": "assistant", "content": "later 3"},
+            {"role": "user", "content": "later 4"},
+        ]
+
+        result = c.compress(msgs)
+
+        answered_ids = {
+            msg.get("tool_call_id")
+            for msg in result
+            if msg.get("role") == "tool" and msg.get("tool_call_id")
+        }
+        for msg in result:
+            if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                for tc in msg["tool_calls"]:
+                    assert tc["id"] in answered_ids
+
+    def test_summary_role_avoids_consecutive_user_messages(self):
+        """Summary role should alternate with the last head message to avoid consecutive same-role messages."""
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
+        mock_client.chat.completions.create.return_value = mock_response
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
+             patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        # Last head message (index 1) is "assistant" → summary should be "user"
+        msgs = [
+            {"role": "user", "content": "msg 0"},
+            {"role": "assistant", "content": "msg 1"},
+            {"role": "user", "content": "msg 2"},
+            {"role": "assistant", "content": "msg 3"},
+            {"role": "user", "content": "msg 4"},
+            {"role": "assistant", "content": "msg 5"},
+        ]
+        result = c.compress(msgs)
+        summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
+        assert len(summary_msg) == 1
+        assert summary_msg[0]["role"] == "user"
+
+    def test_summary_role_avoids_consecutive_user_when_head_ends_with_user(self):
+        """When last head message is 'user', summary must be 'assistant' to avoid two consecutive user messages."""
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
+        mock_client.chat.completions.create.return_value = mock_response
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
+             patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=2)
+
+        # Last head message (index 2) is "user" → summary should be "assistant"
+        msgs = [
+            {"role": "system", "content": "system prompt"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "user", "content": "msg 2"},  # last head — user
+            {"role": "assistant", "content": "msg 3"},
+            {"role": "user", "content": "msg 4"},
+            {"role": "assistant", "content": "msg 5"},
+            {"role": "user", "content": "msg 6"},
+            {"role": "assistant", "content": "msg 7"},
+        ]
+        result = c.compress(msgs)
+        summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
+        assert len(summary_msg) == 1
+        assert summary_msg[0]["role"] == "assistant"
+
+    def test_summarization_does_not_start_tail_with_tool_outputs(self):
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
+        mock_client.chat.completions.create.return_value = mock_response
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
+             patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
+            c = ContextCompressor(
+                model="test",
+                quiet_mode=True,
+                protect_first_n=2,
+                protect_last_n=3,
+            )
+
+        msgs = [
+            {"role": "user", "content": "earlier 1"},
+            {"role": "assistant", "content": "earlier 2"},
+            {"role": "user", "content": "earlier 3"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "call_c", "type": "function", "function": {"name": "search_files", "arguments": "{}"}},
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_c", "content": "output c"},
+            {"role": "user", "content": "latest user"},
+        ]
+
+        result = c.compress(msgs)
+
+        called_ids = {
+            tc["id"]
+            for msg in result
+            if msg.get("role") == "assistant" and msg.get("tool_calls")
+            for tc in msg["tool_calls"]
+        }
+        for msg in result:
+            if msg.get("role") == "tool" and msg.get("tool_call_id"):
+                assert msg["tool_call_id"] in called_ids
--- a/tests/gateway/test_async_memory_flush.py
+++ b/tests/gateway/test_async_memory_flush.py
@@ -0,0 +1,180 @@
+"""Tests for proactive memory flush on session expiry.
+
+Verifies that:
+1. _is_session_expired() works from a SessionEntry alone (no source needed)
+2. The sync callback is no longer called in get_or_create_session
+3. _pre_flushed_sessions tracking works correctly
+4. The background watcher can detect expired sessions
+"""
+
+import pytest
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+from gateway.config import Platform, GatewayConfig, SessionResetPolicy
+from gateway.session import SessionSource, SessionStore, SessionEntry
+
+
+@pytest.fixture()
+def idle_store(tmp_path):
+    """SessionStore with a 60-minute idle reset policy."""
+    config = GatewayConfig(
+        default_reset_policy=SessionResetPolicy(mode="idle", idle_minutes=60),
+    )
+    with patch("gateway.session.SessionStore._ensure_loaded"):
+        s = SessionStore(sessions_dir=tmp_path, config=config)
+    s._db = None
+    s._loaded = True
+    return s
+
+
+@pytest.fixture()
+def no_reset_store(tmp_path):
+    """SessionStore with no reset policy (mode=none)."""
+    config = GatewayConfig(
+        default_reset_policy=SessionResetPolicy(mode="none"),
+    )
+    with patch("gateway.session.SessionStore._ensure_loaded"):
+        s = SessionStore(sessions_dir=tmp_path, config=config)
+    s._db = None
+    s._loaded = True
+    return s
+
+
+class TestIsSessionExpired:
+    """_is_session_expired should detect expiry from entry alone."""
+
+    def test_idle_session_expired(self, idle_store):
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm",
+            session_id="sid_1",
+            created_at=datetime.now() - timedelta(hours=3),
+            updated_at=datetime.now() - timedelta(minutes=120),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+        )
+        assert idle_store._is_session_expired(entry) is True
+
+    def test_active_session_not_expired(self, idle_store):
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm",
+            session_id="sid_2",
+            created_at=datetime.now() - timedelta(hours=1),
+            updated_at=datetime.now() - timedelta(minutes=10),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+        )
+        assert idle_store._is_session_expired(entry) is False
+
+    def test_none_mode_never_expires(self, no_reset_store):
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm",
+            session_id="sid_3",
+            created_at=datetime.now() - timedelta(days=30),
+            updated_at=datetime.now() - timedelta(days=30),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+        )
+        assert no_reset_store._is_session_expired(entry) is False
+
+    def test_active_processes_prevent_expiry(self, idle_store):
+        """Sessions with active background processes should never expire."""
+        idle_store._has_active_processes_fn = lambda key: True
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm",
+            session_id="sid_4",
+            created_at=datetime.now() - timedelta(hours=5),
+            updated_at=datetime.now() - timedelta(hours=5),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+        )
+        assert idle_store._is_session_expired(entry) is False
+
+    def test_daily_mode_expired(self, tmp_path):
+        """Daily mode should expire sessions from before today's reset hour."""
+        config = GatewayConfig(
+            default_reset_policy=SessionResetPolicy(mode="daily", at_hour=4),
+        )
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            store = SessionStore(sessions_dir=tmp_path, config=config)
+        store._db = None
+        store._loaded = True
+
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm",
+            session_id="sid_5",
+            created_at=datetime.now() - timedelta(days=2),
+            updated_at=datetime.now() - timedelta(days=2),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+        )
+        assert store._is_session_expired(entry) is True
+
+
+class TestGetOrCreateSessionNoCallback:
+    """get_or_create_session should NOT call a sync flush callback."""
+
+    def test_auto_reset_cleans_pre_flushed_marker(self, idle_store):
+        """When a session auto-resets, the pre_flushed marker should be discarded."""
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="123",
+            chat_type="dm",
+        )
+        # Create initial session
+        entry1 = idle_store.get_or_create_session(source)
+        old_sid = entry1.session_id
+
+        # Simulate the watcher having flushed it
+        idle_store._pre_flushed_sessions.add(old_sid)
+
+        # Simulate the session going idle
+        entry1.updated_at = datetime.now() - timedelta(minutes=120)
+        idle_store._save()
+
+        # Next call should auto-reset
+        entry2 = idle_store.get_or_create_session(source)
+        assert entry2.session_id != old_sid
+        assert entry2.was_auto_reset is True
+
+        # The old session_id should be removed from pre_flushed
+        assert old_sid not in idle_store._pre_flushed_sessions
+
+    def test_no_sync_callback_invoked(self, idle_store):
+        """No synchronous callback should block during auto-reset."""
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="123",
+            chat_type="dm",
+        )
+        entry1 = idle_store.get_or_create_session(source)
+        entry1.updated_at = datetime.now() - timedelta(minutes=120)
+        idle_store._save()
+
+        # Verify no _on_auto_reset attribute
+        assert not hasattr(idle_store, '_on_auto_reset')
+
+        # This should NOT block (no sync LLM call)
+        entry2 = idle_store.get_or_create_session(source)
+        assert entry2.was_auto_reset is True
+
+
+class TestPreFlushedSessionsTracking:
+    """The _pre_flushed_sessions set should prevent double-flushing."""
+
+    def test_starts_empty(self, idle_store):
+        assert len(idle_store._pre_flushed_sessions) == 0
+
+    def test_add_and_check(self, idle_store):
+        idle_store._pre_flushed_sessions.add("sid_old")
+        assert "sid_old" in idle_store._pre_flushed_sessions
+        assert "sid_other" not in idle_store._pre_flushed_sessions
+
+    def test_discard_on_reset(self, idle_store):
+        """discard should remove without raising if not present."""
+        idle_store._pre_flushed_sessions.add("sid_a")
+        idle_store._pre_flushed_sessions.discard("sid_a")
+        assert "sid_a" not in idle_store._pre_flushed_sessions
+        # discard on non-existent should not raise
+        idle_store._pre_flushed_sessions.discard("sid_nonexistent")
--- a/tests/gateway/test_resume_command.py
+++ b/tests/gateway/test_resume_command.py
@@ -0,0 +1,200 @@
+"""Tests for /resume gateway slash command.
+
+Tests the _handle_resume_command handler (switch to a previously-named session)
+across gateway messenger platforms.
+"""
+
+from unittest.mock import MagicMock, AsyncMock
+
+import pytest
+
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource, build_session_key
+
+
+def _make_event(text="/resume", platform=Platform.TELEGRAM,
+                user_id="12345", chat_id="67890"):
+    """Build a MessageEvent for testing."""
+    source = SessionSource(
+        platform=platform,
+        user_id=user_id,
+        chat_id=chat_id,
+        user_name="testuser",
+    )
+    return MessageEvent(text=text, source=source)
+
+
+def _session_key_for_event(event):
+    """Get the session key that build_session_key produces for an event."""
+    return build_session_key(event.source)
+
+
+def _make_runner(session_db=None, current_session_id="current_session_001",
+                 event=None):
+    """Create a bare GatewayRunner with a mock session_store and optional session_db."""
+    from gateway.run import GatewayRunner
+    runner = object.__new__(GatewayRunner)
+    runner.adapters = {}
+    runner._session_db = session_db
+    runner._running_agents = {}
+
+    # Compute the real session key if an event is provided
+    session_key = build_session_key(event.source) if event else "agent:main:telegram:dm"
+
+    # Mock session_store that returns a session entry with a known session_id
+    mock_session_entry = MagicMock()
+    mock_session_entry.session_id = current_session_id
+    mock_session_entry.session_key = session_key
+    mock_store = MagicMock()
+    mock_store.get_or_create_session.return_value = mock_session_entry
+    mock_store.load_transcript.return_value = []
+    mock_store.switch_session.return_value = mock_session_entry
+    runner.session_store = mock_store
+
+    # Stub out memory flushing
+    runner._async_flush_memories = AsyncMock()
+
+    return runner
+
+
+# ---------------------------------------------------------------------------
+# _handle_resume_command
+# ---------------------------------------------------------------------------
+
+
+class TestHandleResumeCommand:
+    """Tests for GatewayRunner._handle_resume_command."""
+
+    @pytest.mark.asyncio
+    async def test_no_session_db(self):
+        """Returns error when session database is unavailable."""
+        runner = _make_runner(session_db=None)
+        event = _make_event(text="/resume My Project")
+        result = await runner._handle_resume_command(event)
+        assert "not available" in result.lower()
+
+    @pytest.mark.asyncio
+    async def test_list_named_sessions_when_no_arg(self, tmp_path):
+        """With no argument, lists recently titled sessions."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("sess_001", "telegram")
+        db.create_session("sess_002", "telegram")
+        db.set_session_title("sess_001", "Research")
+        db.set_session_title("sess_002", "Coding")
+
+        event = _make_event(text="/resume")
+        runner = _make_runner(session_db=db, event=event)
+        result = await runner._handle_resume_command(event)
+        assert "Research" in result
+        assert "Coding" in result
+        assert "Named Sessions" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_list_shows_usage_when_no_titled(self, tmp_path):
+        """With no arg and no titled sessions, shows instructions."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("sess_001", "telegram")  # No title
+
+        event = _make_event(text="/resume")
+        runner = _make_runner(session_db=db, event=event)
+        result = await runner._handle_resume_command(event)
+        assert "No named sessions" in result
+        assert "/title" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_resume_by_name(self, tmp_path):
+        """Resolves a title and switches to that session."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("old_session_abc", "telegram")
+        db.set_session_title("old_session_abc", "My Project")
+        db.create_session("current_session_001", "telegram")
+
+        event = _make_event(text="/resume My Project")
+        runner = _make_runner(session_db=db, current_session_id="current_session_001",
+                              event=event)
+        result = await runner._handle_resume_command(event)
+
+        assert "Resumed" in result
+        assert "My Project" in result
+        # Verify switch_session was called with the old session ID
+        runner.session_store.switch_session.assert_called_once()
+        call_args = runner.session_store.switch_session.call_args
+        assert call_args[0][1] == "old_session_abc"
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_resume_nonexistent_name(self, tmp_path):
+        """Returns error for unknown session name."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("current_session_001", "telegram")
+
+        event = _make_event(text="/resume Nonexistent Session")
+        runner = _make_runner(session_db=db, event=event)
+        result = await runner._handle_resume_command(event)
+        assert "No session found" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_resume_already_on_session(self, tmp_path):
+        """Returns friendly message when already on the requested session."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("current_session_001", "telegram")
+        db.set_session_title("current_session_001", "Active Project")
+
+        event = _make_event(text="/resume Active Project")
+        runner = _make_runner(session_db=db, current_session_id="current_session_001",
+                              event=event)
+        result = await runner._handle_resume_command(event)
+        assert "Already on session" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_resume_auto_lineage(self, tmp_path):
+        """Asking for 'My Project' when 'My Project #2' exists gets the latest."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("sess_v1", "telegram")
+        db.set_session_title("sess_v1", "My Project")
+        db.create_session("sess_v2", "telegram")
+        db.set_session_title("sess_v2", "My Project #2")
+        db.create_session("current_session_001", "telegram")
+
+        event = _make_event(text="/resume My Project")
+        runner = _make_runner(session_db=db, current_session_id="current_session_001",
+                              event=event)
+        result = await runner._handle_resume_command(event)
+
+        assert "Resumed" in result
+        # Should resolve to #2 (latest in lineage)
+        call_args = runner.session_store.switch_session.call_args
+        assert call_args[0][1] == "sess_v2"
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_resume_clears_running_agent(self, tmp_path):
+        """Switching sessions clears any cached running agent."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("old_session", "telegram")
+        db.set_session_title("old_session", "Old Work")
+        db.create_session("current_session_001", "telegram")
+
+        event = _make_event(text="/resume Old Work")
+        runner = _make_runner(session_db=db, current_session_id="current_session_001",
+                              event=event)
+        # Simulate a running agent using the real session key
+        real_key = _session_key_for_event(event)
+        runner._running_agents[real_key] = MagicMock()
+
+        await runner._handle_resume_command(event)
+
+        assert real_key not in runner._running_agents
+        db.close()
--- a/tests/gateway/test_send_image_file.py
+++ b/tests/gateway/test_send_image_file.py
@@ -0,0 +1,335 @@
+"""
+Tests for send_image_file() on Telegram, Discord, and Slack platforms,
+and MEDIA: .png extraction/routing in the base platform adapter.
+
+Covers: local image file sending, file-not-found handling, fallback on error,
+        MEDIA: tag extraction for image extensions, and routing to send_image_file.
+"""
+
+import asyncio
+import os
+import sys
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, SendResult
+
+
+# ---------------------------------------------------------------------------
+# MEDIA: extraction tests for image files
+# ---------------------------------------------------------------------------
+
+
+class TestExtractMediaImages:
+    """Test that MEDIA: tags with image extensions are correctly extracted."""
+
+    def test_png_image_extracted(self):
+        content = "Here is the screenshot:\nMEDIA:/home/user/.hermes/browser_screenshots/shot.png"
+        media, cleaned = BasePlatformAdapter.extract_media(content)
+        assert len(media) == 1
+        assert media[0][0] == "/home/user/.hermes/browser_screenshots/shot.png"
+        assert "MEDIA:" not in cleaned
+        assert "Here is the screenshot" in cleaned
+
+    def test_jpg_image_extracted(self):
+        content = "MEDIA:/tmp/photo.jpg"
+        media, cleaned = BasePlatformAdapter.extract_media(content)
+        assert len(media) == 1
+        assert media[0][0] == "/tmp/photo.jpg"
+
+    def test_webp_image_extracted(self):
+        content = "MEDIA:/tmp/image.webp"
+        media, _ = BasePlatformAdapter.extract_media(content)
+        assert len(media) == 1
+
+    def test_mixed_audio_and_image(self):
+        content = "MEDIA:/audio.ogg\nMEDIA:/screenshot.png"
+        media, _ = BasePlatformAdapter.extract_media(content)
+        assert len(media) == 2
+        paths = [m[0] for m in media]
+        assert "/audio.ogg" in paths
+        assert "/screenshot.png" in paths
+
+
+# ---------------------------------------------------------------------------
+# Telegram send_image_file tests
+# ---------------------------------------------------------------------------
+
+
+def _ensure_telegram_mock():
+    """Install mock telegram modules so TelegramAdapter can be imported."""
+    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
+        return
+
+    telegram_mod = MagicMock()
+    telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
+    telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
+    telegram_mod.constants.ChatType.GROUP = "group"
+    telegram_mod.constants.ChatType.SUPERGROUP = "supergroup"
+    telegram_mod.constants.ChatType.CHANNEL = "channel"
+    telegram_mod.constants.ChatType.PRIVATE = "private"
+
+    for name in ("telegram", "telegram.ext", "telegram.constants"):
+        sys.modules.setdefault(name, telegram_mod)
+
+
+_ensure_telegram_mock()
+
+from gateway.platforms.telegram import TelegramAdapter  # noqa: E402
+
+
+class TestTelegramSendImageFile:
+    @pytest.fixture
+    def adapter(self):
+        config = PlatformConfig(enabled=True, token="fake-token")
+        a = TelegramAdapter(config)
+        a._bot = MagicMock()
+        return a
+
+    def test_sends_local_image_as_photo(self, adapter, tmp_path):
+        """send_image_file should call bot.send_photo with the opened file."""
+        img = tmp_path / "screenshot.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100)  # Minimal PNG-like
+
+        mock_msg = MagicMock()
+        mock_msg.message_id = 42
+        adapter._bot.send_photo = AsyncMock(return_value=mock_msg)
+
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="12345", image_path=str(img))
+        )
+        assert result.success
+        assert result.message_id == "42"
+        adapter._bot.send_photo.assert_awaited_once()
+
+        # Verify photo arg was a file object (opened in rb mode)
+        call_kwargs = adapter._bot.send_photo.call_args
+        assert call_kwargs.kwargs["chat_id"] == 12345
+
+    def test_returns_error_when_file_missing(self, adapter):
+        """send_image_file should return error for nonexistent file."""
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="12345", image_path="/nonexistent/image.png")
+        )
+        assert not result.success
+        assert "not found" in result.error
+
+    def test_returns_error_when_not_connected(self, adapter):
+        """send_image_file should return error when bot is None."""
+        adapter._bot = None
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="12345", image_path="/tmp/img.png")
+        )
+        assert not result.success
+        assert "Not connected" in result.error
+
+    def test_caption_truncated_to_1024(self, adapter, tmp_path):
+        """Telegram captions have a 1024 char limit."""
+        img = tmp_path / "shot.png"
+        img.write_bytes(b"\x89PNG" + b"\x00" * 50)
+
+        mock_msg = MagicMock()
+        mock_msg.message_id = 1
+        adapter._bot.send_photo = AsyncMock(return_value=mock_msg)
+
+        long_caption = "A" * 2000
+        asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="12345", image_path=str(img), caption=long_caption)
+        )
+
+        call_kwargs = adapter._bot.send_photo.call_args.kwargs
+        assert len(call_kwargs["caption"]) == 1024
+
+
+# ---------------------------------------------------------------------------
+# Discord send_image_file tests
+# ---------------------------------------------------------------------------
+
+
+def _ensure_discord_mock():
+    """Install mock discord module so DiscordAdapter can be imported."""
+    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        return
+
+    discord_mod = MagicMock()
+    discord_mod.Intents.default.return_value = MagicMock()
+    discord_mod.Client = MagicMock
+    discord_mod.File = MagicMock
+
+    for name in ("discord", "discord.ext", "discord.ext.commands"):
+        sys.modules.setdefault(name, discord_mod)
+
+
+_ensure_discord_mock()
+
+import discord as discord_mod_ref  # noqa: E402
+from gateway.platforms.discord import DiscordAdapter  # noqa: E402
+
+
+class TestDiscordSendImageFile:
+    @pytest.fixture
+    def adapter(self):
+        config = PlatformConfig(enabled=True, token="fake-token")
+        a = DiscordAdapter(config)
+        a._client = MagicMock()
+        return a
+
+    def test_sends_local_image_as_attachment(self, adapter, tmp_path):
+        """send_image_file should create discord.File and send to channel."""
+        img = tmp_path / "screenshot.png"
+        img.write_bytes(b"\x89PNG" + b"\x00" * 50)
+
+        mock_channel = MagicMock()
+        mock_msg = MagicMock()
+        mock_msg.id = 99
+        mock_channel.send = AsyncMock(return_value=mock_msg)
+        adapter._client.get_channel = MagicMock(return_value=mock_channel)
+
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="67890", image_path=str(img))
+        )
+        assert result.success
+        assert result.message_id == "99"
+        mock_channel.send.assert_awaited_once()
+
+    def test_returns_error_when_file_missing(self, adapter):
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="67890", image_path="/nonexistent.png")
+        )
+        assert not result.success
+        assert "not found" in result.error
+
+    def test_returns_error_when_not_connected(self, adapter):
+        adapter._client = None
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="67890", image_path="/tmp/img.png")
+        )
+        assert not result.success
+        assert "Not connected" in result.error
+
+    def test_handles_missing_channel(self, adapter):
+        adapter._client.get_channel = MagicMock(return_value=None)
+        adapter._client.fetch_channel = AsyncMock(return_value=None)
+
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="99999", image_path="/tmp/img.png")
+        )
+        assert not result.success
+        assert "not found" in result.error
+
+
+# ---------------------------------------------------------------------------
+# Slack send_image_file tests
+# ---------------------------------------------------------------------------
+
+
+def _ensure_slack_mock():
+    """Install mock slack_bolt module so SlackAdapter can be imported."""
+    if "slack_bolt" in sys.modules and hasattr(sys.modules["slack_bolt"], "__file__"):
+        return
+
+    slack_mod = MagicMock()
+    for name in ("slack_bolt", "slack_bolt.async_app", "slack_sdk", "slack_sdk.web.async_client"):
+        sys.modules.setdefault(name, slack_mod)
+
+
+_ensure_slack_mock()
+
+from gateway.platforms.slack import SlackAdapter  # noqa: E402
+
+
+class TestSlackSendImageFile:
+    @pytest.fixture
+    def adapter(self):
+        config = PlatformConfig(enabled=True, token="xoxb-fake")
+        a = SlackAdapter(config)
+        a._app = MagicMock()
+        return a
+
+    def test_sends_local_image_via_upload(self, adapter, tmp_path):
+        """send_image_file should call files_upload_v2 with the local path."""
+        img = tmp_path / "screenshot.png"
+        img.write_bytes(b"\x89PNG" + b"\x00" * 50)
+
+        mock_result = MagicMock()
+        adapter._app.client.files_upload_v2 = AsyncMock(return_value=mock_result)
+
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="C12345", image_path=str(img))
+        )
+        assert result.success
+        adapter._app.client.files_upload_v2.assert_awaited_once()
+
+        call_kwargs = adapter._app.client.files_upload_v2.call_args.kwargs
+        assert call_kwargs["file"] == str(img)
+        assert call_kwargs["filename"] == "screenshot.png"
+        assert call_kwargs["channel"] == "C12345"
+
+    def test_returns_error_when_file_missing(self, adapter):
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="C12345", image_path="/nonexistent.png")
+        )
+        assert not result.success
+        assert "not found" in result.error
+
+    def test_returns_error_when_not_connected(self, adapter):
+        adapter._app = None
+        result = asyncio.get_event_loop().run_until_complete(
+            adapter.send_image_file(chat_id="C12345", image_path="/tmp/img.png")
+        )
+        assert not result.success
+        assert "Not connected" in result.error
+
+
+# ---------------------------------------------------------------------------
+# browser_vision screenshot cleanup tests
+# ---------------------------------------------------------------------------
+
+
+class TestScreenshotCleanup:
+    def test_cleanup_removes_old_screenshots(self, tmp_path):
+        """_cleanup_old_screenshots should remove files older than max_age_hours."""
+        import time
+        from tools.browser_tool import _cleanup_old_screenshots
+
+        # Create a "fresh" file
+        fresh = tmp_path / "browser_screenshot_fresh.png"
+        fresh.write_bytes(b"new")
+
+        # Create an "old" file and backdate its mtime
+        old = tmp_path / "browser_screenshot_old.png"
+        old.write_bytes(b"old")
+        old_time = time.time() - (25 * 3600)  # 25 hours ago
+        os.utime(str(old), (old_time, old_time))
+
+        _cleanup_old_screenshots(tmp_path, max_age_hours=24)
+
+        assert fresh.exists(), "Fresh screenshot should not be removed"
+        assert not old.exists(), "Old screenshot should be removed"
+
+    def test_cleanup_ignores_non_screenshot_files(self, tmp_path):
+        """Only files matching browser_screenshot_*.png should be cleaned."""
+        import time
+        from tools.browser_tool import _cleanup_old_screenshots
+
+        other_file = tmp_path / "important_data.txt"
+        other_file.write_bytes(b"keep me")
+        old_time = time.time() - (48 * 3600)
+        os.utime(str(other_file), (old_time, old_time))
+
+        _cleanup_old_screenshots(tmp_path, max_age_hours=24)
+
+        assert other_file.exists(), "Non-screenshot files should not be touched"
+
+    def test_cleanup_handles_empty_dir(self, tmp_path):
+        """Cleanup should not fail on empty directory."""
+        from tools.browser_tool import _cleanup_old_screenshots
+        _cleanup_old_screenshots(tmp_path, max_age_hours=24)  # Should not raise
+
+    def test_cleanup_handles_nonexistent_dir(self):
+        """Cleanup should not fail if directory doesn't exist."""
+        from pathlib import Path
+        from tools.browser_tool import _cleanup_old_screenshots
+        _cleanup_old_screenshots(Path("/nonexistent/dir"), max_age_hours=24)  # Should not raise
--- a/tests/gateway/test_session_hygiene.py
+++ b/tests/gateway/test_session_hygiene.py
@@ -0,0 +1,204 @@
+"""Tests for gateway session hygiene — auto-compression of large sessions.
+
+Verifies that the gateway detects pathologically large transcripts and
+triggers auto-compression before running the agent.  (#628)
+
+The hygiene system uses the SAME compression config as the agent:
+  compression.threshold × model context length
+so CLI and messaging platforms behave identically.
+"""
+
+import pytest
+from unittest.mock import patch, MagicMock, AsyncMock
+from agent.model_metadata import estimate_messages_tokens_rough
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_history(n_messages: int, content_size: int = 100) -> list:
+    """Build a fake transcript with n_messages user/assistant pairs."""
+    history = []
+    content = "x" * content_size
+    for i in range(n_messages):
+        role = "user" if i % 2 == 0 else "assistant"
+        history.append({"role": role, "content": content, "timestamp": f"t{i}"})
+    return history
+
+
+def _make_large_history_tokens(target_tokens: int) -> list:
+    """Build a history that estimates to roughly target_tokens tokens."""
+    # estimate_messages_tokens_rough counts total chars in str(msg) // 4
+    # Each msg dict has ~60 chars of overhead + content chars
+    # So for N tokens we need roughly N * 4 total chars across all messages
+    target_chars = target_tokens * 4
+    # Each message as a dict string is roughly len(content) + 60 chars
+    msg_overhead = 60
+    # Use 50 messages with appropriately sized content
+    n_msgs = 50
+    content_size = max(10, (target_chars // n_msgs) - msg_overhead)
+    return _make_history(n_msgs, content_size=content_size)
+
+
+# ---------------------------------------------------------------------------
+# Detection threshold tests (model-aware, unified with compression config)
+# ---------------------------------------------------------------------------
+
+class TestSessionHygieneThresholds:
+    """Test that the threshold logic correctly identifies large sessions.
+
+    Thresholds are derived from model context length × compression threshold,
+    matching what the agent's ContextCompressor uses.
+    """
+
+    def test_small_session_below_thresholds(self):
+        """A 10-message session should not trigger compression."""
+        history = _make_history(10)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        # For a 200k-context model at 85% threshold = 170k
+        context_length = 200_000
+        threshold_pct = 0.85
+        compress_token_threshold = int(context_length * threshold_pct)
+
+        needs_compress = approx_tokens >= compress_token_threshold
+        assert not needs_compress
+
+    def test_large_token_count_triggers(self):
+        """High token count should trigger compression when exceeding model threshold."""
+        # Build a history that exceeds 85% of a 200k model (170k tokens)
+        history = _make_large_history_tokens(180_000)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        context_length = 200_000
+        threshold_pct = 0.85
+        compress_token_threshold = int(context_length * threshold_pct)
+
+        needs_compress = approx_tokens >= compress_token_threshold
+        assert needs_compress
+
+    def test_under_threshold_no_trigger(self):
+        """Session under threshold should not trigger, even with many messages."""
+        # 250 short messages — lots of messages but well under token threshold
+        history = _make_history(250, content_size=10)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        # 200k model at 85% = 170k token threshold
+        context_length = 200_000
+        threshold_pct = 0.85
+        compress_token_threshold = int(context_length * threshold_pct)
+
+        needs_compress = approx_tokens >= compress_token_threshold
+        assert not needs_compress, (
+            f"250 short messages (~{approx_tokens} tokens) should NOT trigger "
+            f"compression at {compress_token_threshold} token threshold"
+        )
+
+    def test_message_count_alone_does_not_trigger(self):
+        """Message count alone should NOT trigger — only token count matters.
+
+        The old system used an OR of token-count and message-count thresholds,
+        which caused premature compression in tool-heavy sessions with 200+
+        messages but low total tokens.
+        """
+        # 300 very short messages — old system would compress, new should not
+        history = _make_history(300, content_size=10)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        context_length = 200_000
+        threshold_pct = 0.85
+        compress_token_threshold = int(context_length * threshold_pct)
+
+        # Token-based check only
+        needs_compress = approx_tokens >= compress_token_threshold
+        assert not needs_compress
+
+    def test_threshold_scales_with_model(self):
+        """Different models should have different compression thresholds."""
+        # 128k model at 85% = 108,800 tokens
+        small_model_threshold = int(128_000 * 0.85)
+        # 200k model at 85% = 170,000 tokens
+        large_model_threshold = int(200_000 * 0.85)
+        # 1M model at 85% = 850,000 tokens
+        huge_model_threshold = int(1_000_000 * 0.85)
+
+        # A session at ~120k tokens:
+        history = _make_large_history_tokens(120_000)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        # Should trigger for 128k model
+        assert approx_tokens >= small_model_threshold
+        # Should NOT trigger for 200k model
+        assert approx_tokens < large_model_threshold
+        # Should NOT trigger for 1M model
+        assert approx_tokens < huge_model_threshold
+
+    def test_custom_threshold_percentage(self):
+        """Custom threshold percentage from config should be respected."""
+        context_length = 200_000
+
+        # At 50% threshold = 100k
+        low_threshold = int(context_length * 0.50)
+        # At 90% threshold = 180k
+        high_threshold = int(context_length * 0.90)
+
+        history = _make_large_history_tokens(150_000)
+        approx_tokens = estimate_messages_tokens_rough(history)
+
+        # Should trigger at 50% but not at 90%
+        assert approx_tokens >= low_threshold
+        assert approx_tokens < high_threshold
+
+    def test_minimum_message_guard(self):
+        """Sessions with fewer than 4 messages should never trigger."""
+        history = _make_history(3, content_size=100_000)
+        # Even with enormous content, < 4 messages should be skipped
+        # (the gateway code checks `len(history) >= 4` before evaluating)
+        assert len(history) < 4
+
+
+class TestSessionHygieneWarnThreshold:
+    """Test the post-compression warning threshold (95% of context)."""
+
+    def test_warn_when_still_large(self):
+        """If compressed result is still above 95% of context, should warn."""
+        context_length = 200_000
+        warn_threshold = int(context_length * 0.95)  # 190k
+        post_compress_tokens = 195_000
+        assert post_compress_tokens >= warn_threshold
+
+    def test_no_warn_when_under(self):
+        """If compressed result is under 95% of context, no warning."""
+        context_length = 200_000
+        warn_threshold = int(context_length * 0.95)  # 190k
+        post_compress_tokens = 150_000
+        assert post_compress_tokens < warn_threshold
+
+
+class TestTokenEstimation:
+    """Verify rough token estimation works as expected for hygiene checks."""
+
+    def test_empty_history(self):
+        assert estimate_messages_tokens_rough([]) == 0
+
+    def test_proportional_to_content(self):
+        small = _make_history(10, content_size=100)
+        large = _make_history(10, content_size=10_000)
+        assert estimate_messages_tokens_rough(large) > estimate_messages_tokens_rough(small)
+
+    def test_proportional_to_count(self):
+        few = _make_history(10, content_size=1000)
+        many = _make_history(100, content_size=1000)
+        assert estimate_messages_tokens_rough(many) > estimate_messages_tokens_rough(few)
+
+    def test_pathological_session_detected(self):
+        """The reported pathological case: 648 messages, ~299K tokens.
+
+        With a 200k model at 85% threshold (170k), this should trigger.
+        """
+        history = _make_history(648, content_size=1800)
+        tokens = estimate_messages_tokens_rough(history)
+        # Should be well above the 170K threshold for a 200k model
+        threshold = int(200_000 * 0.85)
+        assert tokens > threshold
--- a/tests/gateway/test_signal.py
+++ b/tests/gateway/test_signal.py
@@ -0,0 +1,294 @@
+"""Tests for Signal messenger platform adapter."""
+import json
+import pytest
+from unittest.mock import MagicMock, patch, AsyncMock
+
+from gateway.config import Platform, PlatformConfig
+
+
+# ---------------------------------------------------------------------------
+# Platform & Config
+# ---------------------------------------------------------------------------
+
+class TestSignalPlatformEnum:
+    def test_signal_enum_exists(self):
+        assert Platform.SIGNAL.value == "signal"
+
+    def test_signal_in_platform_list(self):
+        platforms = [p.value for p in Platform]
+        assert "signal" in platforms
+
+
+class TestSignalConfigLoading:
+    def test_apply_env_overrides_signal(self, monkeypatch):
+        monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:9090")
+        monkeypatch.setenv("SIGNAL_ACCOUNT", "+15551234567")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        assert Platform.SIGNAL in config.platforms
+        sc = config.platforms[Platform.SIGNAL]
+        assert sc.enabled is True
+        assert sc.extra["http_url"] == "http://localhost:9090"
+        assert sc.extra["account"] == "+15551234567"
+
+    def test_signal_not_loaded_without_both_vars(self, monkeypatch):
+        monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:9090")
+        # No SIGNAL_ACCOUNT
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        assert Platform.SIGNAL not in config.platforms
+
+    def test_connected_platforms_includes_signal(self, monkeypatch):
+        monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:8080")
+        monkeypatch.setenv("SIGNAL_ACCOUNT", "+15551234567")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        connected = config.get_connected_platforms()
+        assert Platform.SIGNAL in connected
+
+
+# ---------------------------------------------------------------------------
+# Adapter Init & Helpers
+# ---------------------------------------------------------------------------
+
+class TestSignalAdapterInit:
+    def _make_config(self, **extra):
+        config = PlatformConfig()
+        config.enabled = True
+        config.extra = {
+            "http_url": "http://localhost:8080",
+            "account": "+15551234567",
+            **extra,
+        }
+        return config
+
+    def test_init_parses_config(self, monkeypatch):
+        monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "group123,group456")
+
+        from gateway.platforms.signal import SignalAdapter
+        adapter = SignalAdapter(self._make_config())
+
+        assert adapter.http_url == "http://localhost:8080"
+        assert adapter.account == "+15551234567"
+        assert "group123" in adapter.group_allow_from
+
+    def test_init_empty_allowlist(self, monkeypatch):
+        monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "")
+
+        from gateway.platforms.signal import SignalAdapter
+        adapter = SignalAdapter(self._make_config())
+
+        assert len(adapter.group_allow_from) == 0
+
+    def test_init_strips_trailing_slash(self, monkeypatch):
+        monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "")
+
+        from gateway.platforms.signal import SignalAdapter
+        adapter = SignalAdapter(self._make_config(http_url="http://localhost:8080/"))
+
+        assert adapter.http_url == "http://localhost:8080"
+
+    def test_self_message_filtering(self, monkeypatch):
+        monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "")
+
+        from gateway.platforms.signal import SignalAdapter
+        adapter = SignalAdapter(self._make_config())
+
+        assert adapter._account_normalized == "+15551234567"
+
+
+class TestSignalHelpers:
+    def test_redact_phone_long(self):
+        from gateway.platforms.signal import _redact_phone
+        assert _redact_phone("+15551234567") == "+155****4567"
+
+    def test_redact_phone_short(self):
+        from gateway.platforms.signal import _redact_phone
+        assert _redact_phone("+12345") == "+1****45"
+
+    def test_redact_phone_empty(self):
+        from gateway.platforms.signal import _redact_phone
+        assert _redact_phone("") == "<none>"
+
+    def test_parse_comma_list(self):
+        from gateway.platforms.signal import _parse_comma_list
+        assert _parse_comma_list("+1234, +5678 , +9012") == ["+1234", "+5678", "+9012"]
+        assert _parse_comma_list("") == []
+        assert _parse_comma_list("  ,  ,  ") == []
+
+    def test_guess_extension_png(self):
+        from gateway.platforms.signal import _guess_extension
+        assert _guess_extension(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100) == ".png"
+
+    def test_guess_extension_jpeg(self):
+        from gateway.platforms.signal import _guess_extension
+        assert _guess_extension(b"\xff\xd8\xff\xe0" + b"\x00" * 100) == ".jpg"
+
+    def test_guess_extension_pdf(self):
+        from gateway.platforms.signal import _guess_extension
+        assert _guess_extension(b"%PDF-1.4" + b"\x00" * 100) == ".pdf"
+
+    def test_guess_extension_zip(self):
+        from gateway.platforms.signal import _guess_extension
+        assert _guess_extension(b"PK\x03\x04" + b"\x00" * 100) == ".zip"
+
+    def test_guess_extension_mp4(self):
+        from gateway.platforms.signal import _guess_extension
+        assert _guess_extension(b"\x00\x00\x00\x18ftypisom" + b"\x00" * 100) == ".mp4"
+
+    def test_guess_extension_unknown(self):
+        from gateway.platforms.signal import _guess_extension
+        assert _guess_extension(b"\x00\x01\x02\x03" * 10) == ".bin"
+
+    def test_is_image_ext(self):
+        from gateway.platforms.signal import _is_image_ext
+        assert _is_image_ext(".png") is True
+        assert _is_image_ext(".jpg") is True
+        assert _is_image_ext(".gif") is True
+        assert _is_image_ext(".pdf") is False
+
+    def test_is_audio_ext(self):
+        from gateway.platforms.signal import _is_audio_ext
+        assert _is_audio_ext(".mp3") is True
+        assert _is_audio_ext(".ogg") is True
+        assert _is_audio_ext(".png") is False
+
+    def test_check_requirements(self, monkeypatch):
+        from gateway.platforms.signal import check_signal_requirements
+        monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:8080")
+        monkeypatch.setenv("SIGNAL_ACCOUNT", "+15551234567")
+        assert check_signal_requirements() is True
+
+    def test_render_mentions(self):
+        from gateway.platforms.signal import _render_mentions
+        text = "Hello \uFFFC, how are you?"
+        mentions = [{"start": 6, "length": 1, "number": "+15559999999"}]
+        result = _render_mentions(text, mentions)
+        assert "@+15559999999" in result
+        assert "\uFFFC" not in result
+
+    def test_render_mentions_no_mentions(self):
+        from gateway.platforms.signal import _render_mentions
+        text = "Hello world"
+        result = _render_mentions(text, [])
+        assert result == "Hello world"
+
+    def test_check_requirements_missing(self, monkeypatch):
+        from gateway.platforms.signal import check_signal_requirements
+        monkeypatch.delenv("SIGNAL_HTTP_URL", raising=False)
+        monkeypatch.delenv("SIGNAL_ACCOUNT", raising=False)
+        assert check_signal_requirements() is False
+
+
+# ---------------------------------------------------------------------------
+# Session Source
+# ---------------------------------------------------------------------------
+
+class TestSignalSessionSource:
+    def test_session_source_alt_fields(self):
+        from gateway.session import SessionSource
+        source = SessionSource(
+            platform=Platform.SIGNAL,
+            chat_id="+15551234567",
+            user_id="+15551234567",
+            user_id_alt="uuid:abc-123",
+            chat_id_alt=None,
+        )
+        d = source.to_dict()
+        assert d["user_id_alt"] == "uuid:abc-123"
+        assert "chat_id_alt" not in d  # None fields excluded
+
+    def test_session_source_roundtrip(self):
+        from gateway.session import SessionSource
+        source = SessionSource(
+            platform=Platform.SIGNAL,
+            chat_id="group:xyz",
+            chat_type="group",
+            user_id="+15551234567",
+            user_id_alt="uuid:abc",
+            chat_id_alt="xyz",
+        )
+        d = source.to_dict()
+        restored = SessionSource.from_dict(d)
+        assert restored.user_id_alt == "uuid:abc"
+        assert restored.chat_id_alt == "xyz"
+        assert restored.platform == Platform.SIGNAL
+
+
+# ---------------------------------------------------------------------------
+# Phone Redaction in agent/redact.py
+# ---------------------------------------------------------------------------
+
+class TestSignalPhoneRedaction:
+    def test_us_number(self):
+        from agent.redact import redact_sensitive_text
+        result = redact_sensitive_text("Call +15551234567 now")
+        assert "+15551234567" not in result
+        assert "+155" in result  # Prefix preserved
+        assert "4567" in result  # Suffix preserved
+
+    def test_uk_number(self):
+        from agent.redact import redact_sensitive_text
+        result = redact_sensitive_text("UK: +442071838750")
+        assert "+442071838750" not in result
+        assert "****" in result
+
+    def test_multiple_numbers(self):
+        from agent.redact import redact_sensitive_text
+        text = "From +15551234567 to +442071838750"
+        result = redact_sensitive_text(text)
+        assert "+15551234567" not in result
+        assert "+442071838750" not in result
+
+    def test_short_number_not_matched(self):
+        from agent.redact import redact_sensitive_text
+        result = redact_sensitive_text("Code: +12345")
+        # 5 digits after + is below the 7-digit minimum
+        assert "+12345" in result  # Too short to redact
+
+
+# ---------------------------------------------------------------------------
+# Authorization in run.py
+# ---------------------------------------------------------------------------
+
+class TestSignalAuthorization:
+    def test_signal_in_allowlist_maps(self):
+        """Signal should be in the platform auth maps."""
+        from gateway.run import GatewayRunner
+        from gateway.config import GatewayConfig
+
+        gw = GatewayRunner.__new__(GatewayRunner)
+        gw.config = GatewayConfig()
+        gw.pairing_store = MagicMock()
+        gw.pairing_store.is_approved.return_value = False
+
+        source = MagicMock()
+        source.platform = Platform.SIGNAL
+        source.user_id = "+15559999999"
+
+        # No allowlists set — should check GATEWAY_ALLOW_ALL_USERS
+        with patch.dict("os.environ", {}, clear=True):
+            result = gw._is_user_authorized(source)
+            assert result is False
+
+
+# ---------------------------------------------------------------------------
+# Send Message Tool
+# ---------------------------------------------------------------------------
+
+class TestSignalSendMessage:
+    def test_signal_in_platform_map(self):
+        """Signal should be in the send_message tool's platform map."""
+        from tools.send_message_tool import send_message_tool
+        # Just verify the import works and Signal is a valid platform
+        from gateway.config import Platform
+        assert Platform.SIGNAL.value == "signal"
--- a/tests/gateway/test_title_command.py
+++ b/tests/gateway/test_title_command.py
@@ -0,0 +1,207 @@
+"""Tests for /title gateway slash command.
+
+Tests the _handle_title_command handler (set/show session titles)
+across all gateway messenger platforms.
+"""
+
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource
+
+
+def _make_event(text="/title", platform=Platform.TELEGRAM,
+                user_id="12345", chat_id="67890"):
+    """Build a MessageEvent for testing."""
+    source = SessionSource(
+        platform=platform,
+        user_id=user_id,
+        chat_id=chat_id,
+        user_name="testuser",
+    )
+    return MessageEvent(text=text, source=source)
+
+
+def _make_runner(session_db=None):
+    """Create a bare GatewayRunner with a mock session_store and optional session_db."""
+    from gateway.run import GatewayRunner
+    runner = object.__new__(GatewayRunner)
+    runner.adapters = {}
+    runner._session_db = session_db
+
+    # Mock session_store that returns a session entry with a known session_id
+    mock_session_entry = MagicMock()
+    mock_session_entry.session_id = "test_session_123"
+    mock_session_entry.session_key = "telegram:12345:67890"
+    mock_store = MagicMock()
+    mock_store.get_or_create_session.return_value = mock_session_entry
+    runner.session_store = mock_store
+
+    return runner
+
+
+# ---------------------------------------------------------------------------
+# _handle_title_command
+# ---------------------------------------------------------------------------
+
+
+class TestHandleTitleCommand:
+    """Tests for GatewayRunner._handle_title_command."""
+
+    @pytest.mark.asyncio
+    async def test_set_title(self, tmp_path):
+        """Setting a title returns confirmation."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("test_session_123", "telegram")
+
+        runner = _make_runner(session_db=db)
+        event = _make_event(text="/title My Research Project")
+        result = await runner._handle_title_command(event)
+        assert "My Research Project" in result
+        assert "✏️" in result
+
+        # Verify in DB
+        assert db.get_session_title("test_session_123") == "My Research Project"
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_show_title_when_set(self, tmp_path):
+        """Showing title when one is set returns the title."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("test_session_123", "telegram")
+        db.set_session_title("test_session_123", "Existing Title")
+
+        runner = _make_runner(session_db=db)
+        event = _make_event(text="/title")
+        result = await runner._handle_title_command(event)
+        assert "Existing Title" in result
+        assert "📌" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_show_title_when_not_set(self, tmp_path):
+        """Showing title when none is set returns usage hint."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("test_session_123", "telegram")
+
+        runner = _make_runner(session_db=db)
+        event = _make_event(text="/title")
+        result = await runner._handle_title_command(event)
+        assert "No title set" in result
+        assert "/title" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_title_conflict(self, tmp_path):
+        """Setting a title already used by another session returns error."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("other_session", "telegram")
+        db.set_session_title("other_session", "Taken Title")
+        db.create_session("test_session_123", "telegram")
+
+        runner = _make_runner(session_db=db)
+        event = _make_event(text="/title Taken Title")
+        result = await runner._handle_title_command(event)
+        assert "already in use" in result
+        assert "⚠️" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_no_session_db(self):
+        """Returns error when session database is not available."""
+        runner = _make_runner(session_db=None)
+        event = _make_event(text="/title My Title")
+        result = await runner._handle_title_command(event)
+        assert "not available" in result
+
+    @pytest.mark.asyncio
+    async def test_title_too_long(self, tmp_path):
+        """Setting a title that exceeds max length returns error."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("test_session_123", "telegram")
+
+        runner = _make_runner(session_db=db)
+        long_title = "A" * 150
+        event = _make_event(text=f"/title {long_title}")
+        result = await runner._handle_title_command(event)
+        assert "too long" in result
+        assert "⚠️" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_title_control_chars_sanitized(self, tmp_path):
+        """Control characters are stripped and sanitized title is stored."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("test_session_123", "telegram")
+
+        runner = _make_runner(session_db=db)
+        event = _make_event(text="/title hello\x00world")
+        result = await runner._handle_title_command(event)
+        assert "helloworld" in result
+        assert db.get_session_title("test_session_123") == "helloworld"
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_title_only_control_chars(self, tmp_path):
+        """Title with only control chars returns empty error."""
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("test_session_123", "telegram")
+
+        runner = _make_runner(session_db=db)
+        event = _make_event(text="/title \x00\x01\x02")
+        result = await runner._handle_title_command(event)
+        assert "empty after cleanup" in result
+        db.close()
+
+    @pytest.mark.asyncio
+    async def test_works_across_platforms(self, tmp_path):
+        """The /title command works for Discord, Slack, and WhatsApp too."""
+        from hermes_state import SessionDB
+        for platform in [Platform.DISCORD, Platform.TELEGRAM]:
+            db = SessionDB(db_path=tmp_path / f"state_{platform.value}.db")
+            db.create_session("test_session_123", platform.value)
+
+            runner = _make_runner(session_db=db)
+            event = _make_event(text="/title Cross-Platform Test", platform=platform)
+            result = await runner._handle_title_command(event)
+            assert "Cross-Platform Test" in result
+            assert db.get_session_title("test_session_123") == "Cross-Platform Test"
+            db.close()
+
+
+# ---------------------------------------------------------------------------
+# /title in help and known_commands
+# ---------------------------------------------------------------------------
+
+
+class TestTitleInHelp:
+    """Verify /title appears in help text and known commands."""
+
+    @pytest.mark.asyncio
+    async def test_title_in_help_output(self):
+        """The /help output includes /title."""
+        runner = _make_runner()
+        event = _make_event(text="/help")
+        # Need hooks for help command
+        from gateway.hooks import HookRegistry
+        runner.hooks = HookRegistry()
+        result = await runner._handle_help_command(event)
+        assert "/title" in result
+
+    def test_title_is_known_command(self):
+        """The /title command is in the _known_commands set."""
+        from gateway.run import GatewayRunner
+        import inspect
+        source = inspect.getsource(GatewayRunner._handle_message)
+        assert '"title"' in source
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@@ -0,0 +1,145 @@
+"""Tests for shared slash command definitions and autocomplete."""
+
+from prompt_toolkit.completion import CompleteEvent
+from prompt_toolkit.document import Document
+
+from hermes_cli.commands import COMMANDS, SlashCommandCompleter
+
+
+# All commands that must be present in the shared COMMANDS dict.
+EXPECTED_COMMANDS = {
+    "/help", "/tools", "/toolsets", "/model", "/provider", "/prompt",
+    "/personality", "/clear", "/history", "/new", "/reset", "/retry",
+    "/undo", "/save", "/config", "/cron", "/skills", "/platforms",
+    "/verbose", "/compress", "/title", "/usage", "/insights", "/paste",
+    "/reload-mcp", "/quit",
+}
+
+
+def _completions(completer: SlashCommandCompleter, text: str):
+    return list(
+        completer.get_completions(
+            Document(text=text),
+            CompleteEvent(completion_requested=True),
+        )
+    )
+
+
+class TestCommands:
+    def test_shared_commands_include_cli_specific_entries(self):
+        """Entries that previously only existed in cli.py are now in the shared dict."""
+        assert COMMANDS["/paste"] == "Check clipboard for an image and attach it"
+        assert COMMANDS["/reload-mcp"] == "Reload MCP servers from config.yaml"
+
+    def test_all_expected_commands_present(self):
+        """Regression guard — every known command must appear in the shared dict."""
+        assert set(COMMANDS.keys()) == EXPECTED_COMMANDS
+
+    def test_every_command_has_nonempty_description(self):
+        for cmd, desc in COMMANDS.items():
+            assert isinstance(desc, str) and len(desc) > 0, f"{cmd} has empty description"
+
+
+class TestSlashCommandCompleter:
+    # -- basic prefix completion -----------------------------------------
+
+    def test_builtin_prefix_completion_uses_shared_registry(self):
+        completions = _completions(SlashCommandCompleter(), "/re")
+        texts = {item.text for item in completions}
+
+        assert "reset" in texts
+        assert "retry" in texts
+        assert "reload-mcp" in texts
+
+    def test_builtin_completion_display_meta_shows_description(self):
+        completions = _completions(SlashCommandCompleter(), "/help")
+        assert len(completions) == 1
+        assert completions[0].display_meta_text == "Show this help message"
+
+    # -- exact-match trailing space --------------------------------------
+
+    def test_exact_match_completion_adds_trailing_space(self):
+        completions = _completions(SlashCommandCompleter(), "/help")
+
+        assert [item.text for item in completions] == ["help "]
+
+    def test_partial_match_does_not_add_trailing_space(self):
+        completions = _completions(SlashCommandCompleter(), "/hel")
+
+        assert [item.text for item in completions] == ["help"]
+
+    # -- non-slash input returns nothing ---------------------------------
+
+    def test_no_completions_for_non_slash_input(self):
+        assert _completions(SlashCommandCompleter(), "help") == []
+
+    def test_no_completions_for_empty_input(self):
+        assert _completions(SlashCommandCompleter(), "") == []
+
+    # -- skill commands via provider ------------------------------------
+
+    def test_skill_commands_are_completed_from_provider(self):
+        completer = SlashCommandCompleter(
+            skill_commands_provider=lambda: {
+                "/gif-search": {"description": "Search for GIFs across providers"},
+            }
+        )
+
+        completions = _completions(completer, "/gif")
+
+        assert len(completions) == 1
+        assert completions[0].text == "gif-search"
+        assert completions[0].display_text == "/gif-search"
+        assert completions[0].display_meta_text == "⚡ Search for GIFs across providers"
+
+    def test_skill_exact_match_adds_trailing_space(self):
+        completer = SlashCommandCompleter(
+            skill_commands_provider=lambda: {
+                "/gif-search": {"description": "Search for GIFs"},
+            }
+        )
+
+        completions = _completions(completer, "/gif-search")
+
+        assert len(completions) == 1
+        assert completions[0].text == "gif-search "
+
+    def test_no_skill_provider_means_no_skill_completions(self):
+        """Default (None) provider should not blow up or add completions."""
+        completer = SlashCommandCompleter()
+        completions = _completions(completer, "/gif")
+        # /gif doesn't match any builtin command
+        assert completions == []
+
+    def test_skill_provider_exception_is_swallowed(self):
+        """A broken provider should not crash autocomplete."""
+        completer = SlashCommandCompleter(
+            skill_commands_provider=lambda: (_ for _ in ()).throw(RuntimeError("boom")),
+        )
+        # Should return builtin matches only, no crash
+        completions = _completions(completer, "/he")
+        texts = {item.text for item in completions}
+        assert "help" in texts
+
+    def test_skill_description_truncated_at_50_chars(self):
+        long_desc = "A" * 80
+        completer = SlashCommandCompleter(
+            skill_commands_provider=lambda: {
+                "/long-skill": {"description": long_desc},
+            }
+        )
+        completions = _completions(completer, "/long")
+        assert len(completions) == 1
+        meta = completions[0].display_meta_text
+        # "⚡ " prefix + 50 chars + "..."
+        assert meta == f"⚡ {'A' * 50}..."
+
+    def test_skill_missing_description_uses_fallback(self):
+        completer = SlashCommandCompleter(
+            skill_commands_provider=lambda: {
+                "/no-desc": {},
+            }
+        )
+        completions = _completions(completer, "/no-desc")
+        assert len(completions) == 1
+        assert "Skill command" in completions[0].display_meta_text
--- a/tests/hermes_cli/test_doctor.py
+++ b/tests/hermes_cli/test_doctor.py
@@ -0,0 +1,17 @@
+"""Tests for hermes doctor helpers."""
+
+from hermes_cli.doctor import _has_provider_env_config
+
+
+class TestProviderEnvDetection:
+    def test_detects_openai_api_key(self):
+        content = "OPENAI_BASE_URL=http://localhost:1234/v1\nOPENAI_API_KEY=sk-test-key\n"
+        assert _has_provider_env_config(content)
+
+    def test_detects_custom_endpoint_without_openrouter_key(self):
+        content = "OPENAI_BASE_URL=http://localhost:8080/v1\n"
+        assert _has_provider_env_config(content)
+
+    def test_returns_false_when_no_provider_settings(self):
+        content = "TERMINAL_ENV=local\n"
+        assert not _has_provider_env_config(content)
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -0,0 +1,220 @@
+"""Tests for provider-aware `/model` validation in hermes_cli.models."""
+
+from unittest.mock import patch
+
+from hermes_cli.models import (
+    curated_models_for_provider,
+    fetch_api_models,
+    normalize_provider,
+    parse_model_input,
+    provider_model_ids,
+    validate_requested_model,
+)
+
+
+# -- helpers -----------------------------------------------------------------
+
+FAKE_API_MODELS = [
+    "anthropic/claude-opus-4.6",
+    "anthropic/claude-sonnet-4.5",
+    "openai/gpt-5.4-pro",
+    "openai/gpt-5.4",
+    "google/gemini-3-pro-preview",
+]
+
+
+def _validate(model, provider="openrouter", api_models=FAKE_API_MODELS, **kw):
+    """Shortcut: call validate_requested_model with mocked API."""
+    with patch("hermes_cli.models.fetch_api_models", return_value=api_models):
+        return validate_requested_model(model, provider, **kw)
+
+
+# -- parse_model_input -------------------------------------------------------
+
+class TestParseModelInput:
+    def test_plain_model_keeps_current_provider(self):
+        provider, model = parse_model_input("anthropic/claude-sonnet-4.5", "openrouter")
+        assert provider == "openrouter"
+        assert model == "anthropic/claude-sonnet-4.5"
+
+    def test_provider_colon_model_switches_provider(self):
+        provider, model = parse_model_input("openrouter:anthropic/claude-sonnet-4.5", "nous")
+        assert provider == "openrouter"
+        assert model == "anthropic/claude-sonnet-4.5"
+
+    def test_provider_alias_resolved(self):
+        provider, model = parse_model_input("glm:glm-5", "openrouter")
+        assert provider == "zai"
+        assert model == "glm-5"
+
+    def test_no_slash_no_colon_keeps_provider(self):
+        provider, model = parse_model_input("gpt-5.4", "openrouter")
+        assert provider == "openrouter"
+        assert model == "gpt-5.4"
+
+    def test_nous_provider_switch(self):
+        provider, model = parse_model_input("nous:hermes-3", "openrouter")
+        assert provider == "nous"
+        assert model == "hermes-3"
+
+    def test_empty_model_after_colon_keeps_current(self):
+        provider, model = parse_model_input("openrouter:", "nous")
+        assert provider == "nous"
+        assert model == "openrouter:"
+
+    def test_colon_at_start_keeps_current(self):
+        provider, model = parse_model_input(":something", "openrouter")
+        assert provider == "openrouter"
+        assert model == ":something"
+
+    def test_unknown_prefix_colon_not_treated_as_provider(self):
+        """Colons are only provider delimiters if the left side is a known provider."""
+        provider, model = parse_model_input("anthropic/claude-3.5-sonnet:beta", "openrouter")
+        assert provider == "openrouter"
+        assert model == "anthropic/claude-3.5-sonnet:beta"
+
+    def test_http_url_not_treated_as_provider(self):
+        provider, model = parse_model_input("http://localhost:8080/model", "openrouter")
+        assert provider == "openrouter"
+        assert model == "http://localhost:8080/model"
+
+
+# -- curated_models_for_provider ---------------------------------------------
+
+class TestCuratedModelsForProvider:
+    def test_openrouter_returns_curated_list(self):
+        models = curated_models_for_provider("openrouter")
+        assert len(models) > 0
+        assert any("claude" in m[0] for m in models)
+
+    def test_zai_returns_glm_models(self):
+        models = curated_models_for_provider("zai")
+        assert any("glm" in m[0] for m in models)
+
+    def test_unknown_provider_returns_empty(self):
+        assert curated_models_for_provider("totally-unknown") == []
+
+
+# -- normalize_provider ------------------------------------------------------
+
+class TestNormalizeProvider:
+    def test_defaults_to_openrouter(self):
+        assert normalize_provider(None) == "openrouter"
+        assert normalize_provider("") == "openrouter"
+
+    def test_known_aliases(self):
+        assert normalize_provider("glm") == "zai"
+        assert normalize_provider("kimi") == "kimi-coding"
+        assert normalize_provider("moonshot") == "kimi-coding"
+
+    def test_case_insensitive(self):
+        assert normalize_provider("OpenRouter") == "openrouter"
+
+
+# -- provider_model_ids ------------------------------------------------------
+
+class TestProviderModelIds:
+    def test_openrouter_returns_curated_list(self):
+        ids = provider_model_ids("openrouter")
+        assert len(ids) > 0
+        assert all("/" in mid for mid in ids)
+
+    def test_unknown_provider_returns_empty(self):
+        assert provider_model_ids("some-unknown-provider") == []
+
+    def test_zai_returns_glm_models(self):
+        assert "glm-5" in provider_model_ids("zai")
+
+
+# -- fetch_api_models --------------------------------------------------------
+
+class TestFetchApiModels:
+    def test_returns_none_when_no_base_url(self):
+        assert fetch_api_models("key", None) is None
+
+    def test_returns_none_on_network_error(self):
+        with patch("hermes_cli.models.urllib.request.urlopen", side_effect=Exception("timeout")):
+            assert fetch_api_models("key", "https://example.com/v1") is None
+
+
+# -- validate — format checks -----------------------------------------------
+
+class TestValidateFormatChecks:
+    def test_empty_model_rejected(self):
+        result = _validate("")
+        assert result["accepted"] is False
+        assert "empty" in result["message"]
+
+    def test_whitespace_only_rejected(self):
+        result = _validate("   ")
+        assert result["accepted"] is False
+
+    def test_model_with_spaces_rejected(self):
+        result = _validate("anthropic/ claude-opus")
+        assert result["accepted"] is False
+
+    def test_no_slash_model_still_probes_api(self):
+        result = _validate("gpt-5.4", api_models=["gpt-5.4", "gpt-5.4-pro"])
+        assert result["accepted"] is True
+        assert result["persist"] is True
+
+    def test_no_slash_model_rejected_if_not_in_api(self):
+        result = _validate("gpt-5.4", api_models=["openai/gpt-5.4"])
+        assert result["accepted"] is False
+
+
+# -- validate — API found ----------------------------------------------------
+
+class TestValidateApiFound:
+    def test_model_found_in_api(self):
+        result = _validate("anthropic/claude-opus-4.6")
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True
+
+    def test_model_found_for_custom_endpoint(self):
+        result = _validate(
+            "my-model", provider="openrouter",
+            api_models=["my-model"], base_url="http://localhost:11434/v1",
+        )
+        assert result["accepted"] is True
+        assert result["persist"] is True
+
+
+# -- validate — API not found ------------------------------------------------
+
+class TestValidateApiNotFound:
+    def test_model_not_in_api_rejected(self):
+        result = _validate("anthropic/claude-nonexistent")
+        assert result["accepted"] is False
+        assert "not a valid model" in result["message"]
+
+    def test_rejection_includes_suggestions(self):
+        result = _validate("anthropic/claude-opus-4.5")
+        assert result["accepted"] is False
+        assert "Did you mean" in result["message"]
+
+
+# -- validate — API unreachable (fallback) -----------------------------------
+
+class TestValidateApiFallback:
+    def test_known_catalog_model_accepted_when_api_down(self):
+        result = _validate("anthropic/claude-opus-4.6", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+
+    def test_unknown_model_session_only_when_api_down(self):
+        result = _validate("anthropic/claude-next-gen", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is False
+        assert "session only" in result["message"].lower()
+
+    def test_zai_known_model_accepted_when_api_down(self):
+        result = _validate("glm-5", provider="zai", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+
+    def test_unknown_provider_session_only_when_api_down(self):
+        result = _validate("some-model", provider="totally-unknown", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is False
--- a/tests/hermes_cli/test_session_browse.py
+++ b/tests/hermes_cli/test_session_browse.py
@@ -0,0 +1,542 @@
+"""Tests for the interactive session browser (`hermes sessions browse`).
+
+Covers:
+- _session_browse_picker logic (curses mocked, fallback tested)
+- cmd_sessions 'browse' action integration
+- Argument parser registration
+"""
+
+import os
+import time
+from unittest.mock import MagicMock, patch, call
+
+import pytest
+
+from hermes_cli.main import _session_browse_picker
+
+
+# ─── Sample session data ──────────────────────────────────────────────────────
+
+def _make_sessions(n=5):
+    """Generate a list of fake rich-session dicts."""
+    now = time.time()
+    sessions = []
+    for i in range(n):
+        sessions.append({
+            "id": f"20260308_{i:06d}_abcdef",
+            "source": "cli" if i % 2 == 0 else "telegram",
+            "model": "test/model",
+            "title": f"Session {i}" if i % 3 != 0 else None,
+            "preview": f"Hello from session {i}",
+            "last_active": now - i * 3600,
+            "started_at": now - i * 3600 - 60,
+            "message_count": (i + 1) * 5,
+        })
+    return sessions
+
+
+SAMPLE_SESSIONS = _make_sessions(5)
+
+
+# ─── _session_browse_picker ──────────────────────────────────────────────────
+
+class TestSessionBrowsePicker:
+    """Tests for the _session_browse_picker function."""
+
+    def test_empty_sessions_returns_none(self, capsys):
+        result = _session_browse_picker([])
+        assert result is None
+        assert "No sessions found" in capsys.readouterr().out
+
+    def test_returns_none_when_no_sessions(self, capsys):
+        result = _session_browse_picker([])
+        assert result is None
+
+    def test_fallback_mode_valid_selection(self):
+        """When curses is unavailable, fallback numbered list should work."""
+        sessions = _make_sessions(3)
+
+        # Mock curses import to fail, forcing fallback
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="2"):
+                result = _session_browse_picker(sessions)
+
+        assert result == sessions[1]["id"]
+
+    def test_fallback_mode_cancel_q(self):
+        """Entering 'q' in fallback mode cancels."""
+        sessions = _make_sessions(3)
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="q"):
+                result = _session_browse_picker(sessions)
+
+        assert result is None
+
+    def test_fallback_mode_cancel_empty(self):
+        """Entering empty string in fallback mode cancels."""
+        sessions = _make_sessions(3)
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value=""):
+                result = _session_browse_picker(sessions)
+
+        assert result is None
+
+    def test_fallback_mode_invalid_then_valid(self):
+        """Invalid selection followed by valid one works."""
+        sessions = _make_sessions(3)
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", side_effect=["99", "1"]):
+                result = _session_browse_picker(sessions)
+
+        assert result == sessions[0]["id"]
+
+    def test_fallback_mode_keyboard_interrupt(self):
+        """KeyboardInterrupt in fallback mode returns None."""
+        sessions = _make_sessions(3)
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", side_effect=KeyboardInterrupt):
+                result = _session_browse_picker(sessions)
+
+        assert result is None
+
+    def test_fallback_displays_all_sessions(self, capsys):
+        """Fallback mode should display all session entries."""
+        sessions = _make_sessions(4)
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="q"):
+                _session_browse_picker(sessions)
+
+        output = capsys.readouterr().out
+        # All 4 entries should be shown
+        assert "1." in output
+        assert "2." in output
+        assert "3." in output
+        assert "4." in output
+
+    def test_fallback_shows_title_over_preview(self, capsys):
+        """When a session has a title, show it instead of the preview."""
+        sessions = [{
+            "id": "test_001",
+            "source": "cli",
+            "title": "My Cool Project",
+            "preview": "some preview text",
+            "last_active": time.time(),
+        }]
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="q"):
+                _session_browse_picker(sessions)
+
+        output = capsys.readouterr().out
+        assert "My Cool Project" in output
+
+    def test_fallback_shows_preview_when_no_title(self, capsys):
+        """When no title, show preview."""
+        sessions = [{
+            "id": "test_002",
+            "source": "cli",
+            "title": None,
+            "preview": "Hello world test message",
+            "last_active": time.time(),
+        }]
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="q"):
+                _session_browse_picker(sessions)
+
+        output = capsys.readouterr().out
+        assert "Hello world test message" in output
+
+    def test_fallback_shows_id_when_no_title_or_preview(self, capsys):
+        """When neither title nor preview, show session ID."""
+        sessions = [{
+            "id": "test_003_fallback",
+            "source": "cli",
+            "title": None,
+            "preview": "",
+            "last_active": time.time(),
+        }]
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="q"):
+                _session_browse_picker(sessions)
+
+        output = capsys.readouterr().out
+        assert "test_003_fallback" in output
+
+
+# ─── Curses-based picker (mocked curses) ────────────────────────────────────
+
+class TestCursesBrowse:
+    """Tests for the curses-based interactive picker via simulated key sequences."""
+
+    def _run_with_keys(self, sessions, key_sequence):
+        """Simulate running the curses picker with a given key sequence."""
+        import curses
+
+        # Build a mock stdscr that returns keys from the sequence
+        mock_stdscr = MagicMock()
+        mock_stdscr.getmaxyx.return_value = (30, 120)
+        mock_stdscr.getch.side_effect = key_sequence
+
+        # Capture what curses.wrapper receives and call it with our mock
+        with patch("curses.wrapper") as mock_wrapper:
+            # When wrapper is called, invoke the function with our mock stdscr
+            def run_inner(func):
+                try:
+                    func(mock_stdscr)
+                except StopIteration:
+                    pass  # key sequence exhausted
+
+            mock_wrapper.side_effect = run_inner
+            with patch("curses.curs_set"):
+                with patch("curses.has_colors", return_value=False):
+                    return _session_browse_picker(sessions)
+
+    def test_enter_selects_first_session(self):
+        sessions = _make_sessions(3)
+        result = self._run_with_keys(sessions, [10])  # Enter key
+        assert result == sessions[0]["id"]
+
+    def test_down_then_enter_selects_second(self):
+        import curses
+        sessions = _make_sessions(3)
+        result = self._run_with_keys(sessions, [curses.KEY_DOWN, 10])
+        assert result == sessions[1]["id"]
+
+    def test_down_down_enter_selects_third(self):
+        import curses
+        sessions = _make_sessions(5)
+        result = self._run_with_keys(sessions, [curses.KEY_DOWN, curses.KEY_DOWN, 10])
+        assert result == sessions[2]["id"]
+
+    def test_up_wraps_to_last(self):
+        import curses
+        sessions = _make_sessions(3)
+        result = self._run_with_keys(sessions, [curses.KEY_UP, 10])
+        assert result == sessions[2]["id"]
+
+    def test_escape_cancels(self):
+        sessions = _make_sessions(3)
+        result = self._run_with_keys(sessions, [27])  # Esc
+        assert result is None
+
+    def test_q_cancels(self):
+        sessions = _make_sessions(3)
+        result = self._run_with_keys(sessions, [ord('q')])
+        assert result is None
+
+    def test_type_to_filter_then_enter(self):
+        """Typing characters filters the list, Enter selects from filtered."""
+        import curses
+        sessions = [
+            {"id": "s1", "source": "cli", "title": "Alpha project", "preview": "", "last_active": time.time()},
+            {"id": "s2", "source": "cli", "title": "Beta project", "preview": "", "last_active": time.time()},
+            {"id": "s3", "source": "cli", "title": "Gamma project", "preview": "", "last_active": time.time()},
+        ]
+        # Type "Beta" then Enter — should select s2
+        keys = [ord(c) for c in "Beta"] + [10]
+        result = self._run_with_keys(sessions, keys)
+        assert result == "s2"
+
+    def test_filter_no_match_enter_does_nothing(self):
+        """When filter produces no results, Enter shouldn't select."""
+        sessions = _make_sessions(3)
+        keys = [ord(c) for c in "zzzznonexistent"] + [10]
+        result = self._run_with_keys(sessions, keys)
+        assert result is None
+
+    def test_backspace_removes_filter_char(self):
+        """Backspace removes the last character from the filter."""
+        import curses
+        sessions = [
+            {"id": "s1", "source": "cli", "title": "Alpha", "preview": "", "last_active": time.time()},
+            {"id": "s2", "source": "cli", "title": "Beta", "preview": "", "last_active": time.time()},
+        ]
+        # Type "Bet", backspace, backspace, backspace (clears filter), then Enter (selects first)
+        keys = [ord('B'), ord('e'), ord('t'), 127, 127, 127, 10]
+        result = self._run_with_keys(sessions, keys)
+        assert result == "s1"
+
+    def test_escape_clears_filter_first(self):
+        """First Esc clears the search text, second Esc exits."""
+        import curses
+        sessions = _make_sessions(3)
+        # Type "ab" then Esc (clears filter) then Enter (selects first)
+        keys = [ord('a'), ord('b'), 27, 10]
+        result = self._run_with_keys(sessions, keys)
+        assert result == sessions[0]["id"]
+
+    def test_filter_matches_preview(self):
+        """Typing should match against session preview text."""
+        sessions = [
+            {"id": "s1", "source": "cli", "title": None, "preview": "Set up Minecraft server", "last_active": time.time()},
+            {"id": "s2", "source": "cli", "title": None, "preview": "Review PR 438", "last_active": time.time()},
+        ]
+        keys = [ord(c) for c in "Mine"] + [10]
+        result = self._run_with_keys(sessions, keys)
+        assert result == "s1"
+
+    def test_filter_matches_source(self):
+        """Typing a source name should filter by source."""
+        sessions = [
+            {"id": "s1", "source": "telegram", "title": "TG session", "preview": "", "last_active": time.time()},
+            {"id": "s2", "source": "cli", "title": "CLI session", "preview": "", "last_active": time.time()},
+        ]
+        keys = [ord(c) for c in "telegram"] + [10]
+        result = self._run_with_keys(sessions, keys)
+        assert result == "s1"
+
+    def test_q_quits_when_no_filter_active(self):
+        """When no search text is active, 'q' should quit (not filter)."""
+        sessions = _make_sessions(3)
+        result = self._run_with_keys(sessions, [ord('q')])
+        assert result is None
+
+    def test_q_types_into_filter_when_filter_active(self):
+        """When search text is already active, 'q' should add to filter, not quit."""
+        sessions = [
+            {"id": "s1", "source": "cli", "title": "the sequel", "preview": "", "last_active": time.time()},
+            {"id": "s2", "source": "cli", "title": "other thing", "preview": "", "last_active": time.time()},
+        ]
+        # Type "se" first (activates filter, matches "the sequel")
+        # Then type "q" — should add 'q' to filter (filter="seq"), NOT quit
+        # "seq" still matches "the sequel" → Enter selects it
+        keys = [ord('s'), ord('e'), ord('q'), 10]
+        result = self._run_with_keys(sessions, keys)
+        assert result == "s1"  # "the sequel" matches "seq"
+
+
+# ─── Argument parser registration ──────────────────────────────────────────
+
+class TestSessionBrowseArgparse:
+    """Verify the 'browse' subcommand is properly registered."""
+
+    def test_browse_subcommand_exists(self):
+        """hermes sessions browse should be parseable."""
+        from hermes_cli.main import main as _main_entry
+
+        # We can't run main(), but we can import and test the parser setup
+        # by checking that argparse doesn't error on "sessions browse"
+        import argparse
+        # Re-create the parser portion
+        # Instead, let's just verify the import works and the function exists
+        from hermes_cli.main import _session_browse_picker
+        assert callable(_session_browse_picker)
+
+    def test_browse_default_limit_is_50(self):
+        """The default --limit for browse should be 50."""
+        # This test verifies at the argparse level
+        # We test by running the parse on "sessions browse" args
+        # Since we can't easily extract the subparser, verify via the
+        # _session_browse_picker accepting large lists
+        sessions = _make_sessions(50)
+        assert len(sessions) == 50
+
+
+# ─── Integration: cmd_sessions browse action ────────────────────────────────
+
+class TestCmdSessionsBrowse:
+    """Integration tests for the 'browse' action in cmd_sessions."""
+
+    def test_browse_no_sessions_prints_message(self, capsys):
+        """When no sessions exist, _session_browse_picker returns None and prints message."""
+        result = _session_browse_picker([])
+        assert result is None
+        output = capsys.readouterr().out
+        assert "No sessions found" in output
+
+    def test_browse_with_source_filter(self):
+        """The --source flag should be passed to list_sessions_rich."""
+        sessions = [
+            {"id": "s1", "source": "cli", "title": "CLI only", "preview": "", "last_active": time.time()},
+        ]
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="1"):
+                result = _session_browse_picker(sessions)
+
+        assert result == "s1"
+
+
+# ─── Edge cases ──────────────────────────────────────────────────────────────
+
+class TestEdgeCases:
+    """Edge case handling for the session browser."""
+
+    def test_sessions_with_missing_fields(self):
+        """Sessions with missing optional fields should not crash."""
+        sessions = [
+            {"id": "minimal_001", "source": "cli"},  # No title, preview, last_active
+        ]
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="1"):
+                result = _session_browse_picker(sessions)
+
+        assert result == "minimal_001"
+
+    def test_single_session(self):
+        """A single session in the list should work fine."""
+        sessions = [
+            {"id": "only_one", "source": "cli", "title": "Solo", "preview": "", "last_active": time.time()},
+        ]
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="1"):
+                result = _session_browse_picker(sessions)
+
+        assert result == "only_one"
+
+    def test_long_title_truncated_in_fallback(self, capsys):
+        """Very long titles should be truncated in fallback mode."""
+        sessions = [{
+            "id": "long_title_001",
+            "source": "cli",
+            "title": "A" * 100,
+            "preview": "",
+            "last_active": time.time(),
+        }]
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="q"):
+                _session_browse_picker(sessions)
+
+        output = capsys.readouterr().out
+        # Title should be truncated to 50 chars with "..."
+        assert "..." in output
+
+    def test_relative_time_formatting(self, capsys):
+        """Verify various time deltas format correctly."""
+        now = time.time()
+        sessions = [
+            {"id": "recent", "source": "cli", "title": None, "preview": "just now test", "last_active": now},
+            {"id": "hour_ago", "source": "cli", "title": None, "preview": "hour ago test", "last_active": now - 7200},
+            {"id": "days_ago", "source": "cli", "title": None, "preview": "days ago test", "last_active": now - 259200},
+        ]
+
+        import builtins
+        original_import = builtins.__import__
+
+        def mock_import(name, *args, **kwargs):
+            if name == "curses":
+                raise ImportError("no curses")
+            return original_import(name, *args, **kwargs)
+
+        with patch.object(builtins, "__import__", side_effect=mock_import):
+            with patch("builtins.input", return_value="q"):
+                _session_browse_picker(sessions)
+
+        output = capsys.readouterr().out
+        assert "just now" in output
+        assert "2h ago" in output
+        assert "3d ago" in output
--- a/tests/hermes_cli/test_set_config_value.py
+++ b/tests/hermes_cli/test_set_config_value.py
@@ -38,7 +38,6 @@ class TestExplicitAllowlist:
        "OPENROUTER_API_KEY",
        "OPENAI_API_KEY",
        "ANTHROPIC_API_KEY",
-        "NOUS_API_KEY",
        "WANDB_API_KEY",
        "TINKER_API_KEY",
        "HONCHO_API_KEY",
--- a/tests/hermes_cli/test_skills_hub.py
+++ b/tests/hermes_cli/test_skills_hub.py
@@ -0,0 +1,31 @@
+from io import StringIO
+
+from rich.console import Console
+
+from hermes_cli.skills_hub import do_list
+
+
+def test_do_list_initializes_hub_dir(monkeypatch, tmp_path):
+    import tools.skills_hub as hub
+    import tools.skills_tool as skills_tool
+
+    hub_dir = tmp_path / "skills" / ".hub"
+    monkeypatch.setattr(hub, "SKILLS_DIR", tmp_path / "skills")
+    monkeypatch.setattr(hub, "HUB_DIR", hub_dir)
+    monkeypatch.setattr(hub, "LOCK_FILE", hub_dir / "lock.json")
+    monkeypatch.setattr(hub, "QUARANTINE_DIR", hub_dir / "quarantine")
+    monkeypatch.setattr(hub, "AUDIT_LOG", hub_dir / "audit.log")
+    monkeypatch.setattr(hub, "TAPS_FILE", hub_dir / "taps.json")
+    monkeypatch.setattr(hub, "INDEX_CACHE_DIR", hub_dir / "index-cache")
+    monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: [])
+
+    console = Console(file=StringIO(), force_terminal=False, color_system=None)
+
+    assert not hub_dir.exists()
+
+    do_list(console=console)
+
+    assert hub_dir.exists()
+    assert (hub_dir / "lock.json").exists()
+    assert (hub_dir / "quarantine").is_dir()
+    assert (hub_dir / "index-cache").is_dir()
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -0,0 +1,19 @@
+"""Tests for hermes_cli.tools_config platform tool persistence."""
+
+from hermes_cli.tools_config import _get_platform_tools
+
+
+def test_get_platform_tools_uses_default_when_platform_not_configured():
+    config = {}
+
+    enabled = _get_platform_tools(config, "cli")
+
+    assert enabled
+
+
+def test_get_platform_tools_preserves_explicit_empty_selection():
+    config = {"platform_toolsets": {"cli": []}}
+
+    enabled = _get_platform_tools(config, "cli")
+
+    assert enabled == set()
--- a/tests/integration/test_web_tools.py
+++ b/tests/integration/test_web_tools.py
@@ -12,7 +12,7 @@ Usage:

 Requirements:
    - FIRECRAWL_API_KEY environment variable must be set
-    - NOUS_API_KEY environment variable (optional, for LLM tests)
+    - An auxiliary LLM provider (OPENROUTER_API_KEY or Nous Portal auth) (optional, for LLM tests)
 """

 import pytest
@@ -128,12 +128,12 @@ class WebToolsTester:
        else:
            self.log_result("Firecrawl API Key", "passed", "Found")
        
-        # Check Nous API key (optional)
+        # Check auxiliary LLM provider (optional)
        if not check_auxiliary_model():
-            self.log_result("Nous API Key", "skipped", "NOUS_API_KEY not set (LLM tests will be skipped)")
+            self.log_result("Auxiliary LLM", "skipped", "No auxiliary LLM provider available (LLM tests will be skipped)")
            self.test_llm = False
        else:
-            self.log_result("Nous API Key", "passed", "Found")
+            self.log_result("Auxiliary LLM", "passed", "Found")
        
        # Check debug mode
        debug_info = get_debug_session_info()
--- a/tests/test_api_key_providers.py
+++ b/tests/test_api_key_providers.py
@@ -20,6 +20,8 @@ from hermes_cli.auth import (
    resolve_api_key_provider_credentials,
    get_auth_status,
    AuthError,
+    KIMI_CODE_BASE_URL,
+    _resolve_kimi_base_url,
 )


@@ -84,7 +86,7 @@ class TestProviderRegistry:
 PROVIDER_ENV_VARS = (
    "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
    "GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY",
-    "KIMI_API_KEY", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
+    "KIMI_API_KEY", "KIMI_BASE_URL", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
    "OPENAI_BASE_URL",
 )

@@ -340,3 +342,87 @@ class TestHasAnyProviderConfigured:
        monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
        from hermes_cli.main import _has_any_provider_configured
        assert _has_any_provider_configured() is True
+
+
+# =============================================================================
+# Kimi Code auto-detection tests
+# =============================================================================
+
+MOONSHOT_DEFAULT_URL = "https://api.moonshot.ai/v1"
+
+
+class TestResolveKimiBaseUrl:
+    """Test _resolve_kimi_base_url() helper for key-prefix auto-detection."""
+
+    def test_sk_kimi_prefix_routes_to_kimi_code(self):
+        url = _resolve_kimi_base_url("sk-kimi-abc123", MOONSHOT_DEFAULT_URL, "")
+        assert url == KIMI_CODE_BASE_URL
+
+    def test_legacy_key_uses_default(self):
+        url = _resolve_kimi_base_url("sk-abc123", MOONSHOT_DEFAULT_URL, "")
+        assert url == MOONSHOT_DEFAULT_URL
+
+    def test_empty_key_uses_default(self):
+        url = _resolve_kimi_base_url("", MOONSHOT_DEFAULT_URL, "")
+        assert url == MOONSHOT_DEFAULT_URL
+
+    def test_env_override_wins_over_sk_kimi(self):
+        """KIMI_BASE_URL env var should always take priority."""
+        custom = "https://custom.example.com/v1"
+        url = _resolve_kimi_base_url("sk-kimi-abc123", MOONSHOT_DEFAULT_URL, custom)
+        assert url == custom
+
+    def test_env_override_wins_over_legacy(self):
+        custom = "https://custom.example.com/v1"
+        url = _resolve_kimi_base_url("sk-abc123", MOONSHOT_DEFAULT_URL, custom)
+        assert url == custom
+
+
+class TestKimiCodeStatusAutoDetect:
+    """Test that get_api_key_provider_status auto-detects sk-kimi- keys."""
+
+    def test_sk_kimi_key_gets_kimi_code_url(self, monkeypatch):
+        monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-test-key-123")
+        status = get_api_key_provider_status("kimi-coding")
+        assert status["configured"] is True
+        assert status["base_url"] == KIMI_CODE_BASE_URL
+
+    def test_legacy_key_gets_moonshot_url(self, monkeypatch):
+        monkeypatch.setenv("KIMI_API_KEY", "sk-legacy-test-key")
+        status = get_api_key_provider_status("kimi-coding")
+        assert status["configured"] is True
+        assert status["base_url"] == MOONSHOT_DEFAULT_URL
+
+    def test_env_override_wins(self, monkeypatch):
+        monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-test-key")
+        monkeypatch.setenv("KIMI_BASE_URL", "https://override.example/v1")
+        status = get_api_key_provider_status("kimi-coding")
+        assert status["base_url"] == "https://override.example/v1"
+
+
+class TestKimiCodeCredentialAutoDetect:
+    """Test that resolve_api_key_provider_credentials auto-detects sk-kimi- keys."""
+
+    def test_sk_kimi_key_gets_kimi_code_url(self, monkeypatch):
+        monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-secret-key")
+        creds = resolve_api_key_provider_credentials("kimi-coding")
+        assert creds["api_key"] == "sk-kimi-secret-key"
+        assert creds["base_url"] == KIMI_CODE_BASE_URL
+
+    def test_legacy_key_gets_moonshot_url(self, monkeypatch):
+        monkeypatch.setenv("KIMI_API_KEY", "sk-legacy-secret-key")
+        creds = resolve_api_key_provider_credentials("kimi-coding")
+        assert creds["api_key"] == "sk-legacy-secret-key"
+        assert creds["base_url"] == MOONSHOT_DEFAULT_URL
+
+    def test_env_override_wins(self, monkeypatch):
+        monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-secret-key")
+        monkeypatch.setenv("KIMI_BASE_URL", "https://override.example/v1")
+        creds = resolve_api_key_provider_credentials("kimi-coding")
+        assert creds["base_url"] == "https://override.example/v1"
+
+    def test_non_kimi_providers_unaffected(self, monkeypatch):
+        """Ensure the auto-detect logic doesn't leak to other providers."""
+        monkeypatch.setenv("GLM_API_KEY", "sk-kimi-looks-like-kimi-but-isnt")
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
--- a/tests/test_auxiliary_config_bridge.py
+++ b/tests/test_auxiliary_config_bridge.py
@@ -0,0 +1,292 @@
+"""Tests for auxiliary model config bridging — verifies that config.yaml values
+are properly mapped to environment variables by both CLI and gateway loaders.
+
+Also tests the vision_tools and browser_tool model override env vars.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+import yaml
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+def _run_auxiliary_bridge(config_dict, monkeypatch):
+    """Simulate the auxiliary config → env var bridging logic shared by CLI and gateway.
+
+    This mirrors the code in cli.py load_cli_config() and gateway/run.py.
+    Both use the same pattern; we test it once here.
+    """
+    # Clear env vars
+    for key in (
+        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
+        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
+        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+    # Compression bridge
+    compression_cfg = config_dict.get("compression", {})
+    if compression_cfg and isinstance(compression_cfg, dict):
+        compression_env_map = {
+            "enabled": "CONTEXT_COMPRESSION_ENABLED",
+            "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
+            "summary_model": "CONTEXT_COMPRESSION_MODEL",
+            "summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
+        }
+        for cfg_key, env_var in compression_env_map.items():
+            if cfg_key in compression_cfg:
+                os.environ[env_var] = str(compression_cfg[cfg_key])
+
+    # Auxiliary bridge
+    auxiliary_cfg = config_dict.get("auxiliary", {})
+    if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
+        aux_task_env = {
+            "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
+            "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+        }
+        for task_key, (prov_env, model_env) in aux_task_env.items():
+            task_cfg = auxiliary_cfg.get(task_key, {})
+            if not isinstance(task_cfg, dict):
+                continue
+            prov = str(task_cfg.get("provider", "")).strip()
+            model = str(task_cfg.get("model", "")).strip()
+            if prov and prov != "auto":
+                os.environ[prov_env] = prov
+            if model:
+                os.environ[model_env] = model
+
+
+# ── Config bridging tests ────────────────────────────────────────────────────
+
+
+class TestAuxiliaryConfigBridge:
+    """Verify the config.yaml → env var bridging logic used by CLI and gateway."""
+
+    def test_vision_provider_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "openrouter", "model": ""},
+                "web_extract": {"provider": "auto", "model": ""},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter"
+        # auto should not be set
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None
+
+    def test_vision_model_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "auto", "model": "openai/gpt-4o"},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_MODEL") == "openai/gpt-4o"
+        # auto provider should not be set
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+
+    def test_web_extract_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "web_extract": {"provider": "nous", "model": "gemini-2.5-flash"},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
+
+    def test_compression_provider_bridged(self, monkeypatch):
+        config = {
+            "compression": {
+                "summary_provider": "nous",
+                "summary_model": "gemini-3-flash",
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("CONTEXT_COMPRESSION_PROVIDER") == "nous"
+        assert os.environ.get("CONTEXT_COMPRESSION_MODEL") == "gemini-3-flash"
+
+    def test_empty_values_not_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "auto", "model": ""},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+        assert os.environ.get("AUXILIARY_VISION_MODEL") is None
+
+    def test_missing_auxiliary_section_safe(self, monkeypatch):
+        """Config without auxiliary section should not crash."""
+        config = {"model": {"default": "test-model"}}
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+
+    def test_non_dict_task_config_ignored(self, monkeypatch):
+        """Malformed task config (e.g. string instead of dict) is safely ignored."""
+        config = {
+            "auxiliary": {
+                "vision": "openrouter",  # should be a dict
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+
+    def test_mixed_tasks(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "openrouter", "model": ""},
+                "web_extract": {"provider": "auto", "model": "custom-llm"},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter"
+        assert os.environ.get("AUXILIARY_VISION_MODEL") is None
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "custom-llm"
+
+    def test_all_tasks_with_overrides(self, monkeypatch):
+        config = {
+            "compression": {
+                "summary_provider": "main",
+                "summary_model": "local-model",
+            },
+            "auxiliary": {
+                "vision": {"provider": "openrouter", "model": "google/gemini-2.5-flash"},
+                "web_extract": {"provider": "nous", "model": "gemini-3-flash"},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("CONTEXT_COMPRESSION_PROVIDER") == "main"
+        assert os.environ.get("CONTEXT_COMPRESSION_MODEL") == "local-model"
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter"
+        assert os.environ.get("AUXILIARY_VISION_MODEL") == "google/gemini-2.5-flash"
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-3-flash"
+
+    def test_whitespace_in_values_stripped(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "  openrouter  ", "model": "  my-model  "},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter"
+        assert os.environ.get("AUXILIARY_VISION_MODEL") == "my-model"
+
+    def test_empty_auxiliary_dict_safe(self, monkeypatch):
+        config = {"auxiliary": {}}
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None
+
+
+# ── Gateway bridge parity test ───────────────────────────────────────────────
+
+
+class TestGatewayBridgeCodeParity:
+    """Verify the gateway/run.py config bridge contains the auxiliary section."""
+
+    def test_gateway_has_auxiliary_bridge(self):
+        """The gateway config bridge must include auxiliary.* bridging."""
+        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        content = gateway_path.read_text()
+        # Check for key patterns that indicate the bridge is present
+        assert "AUXILIARY_VISION_PROVIDER" in content
+        assert "AUXILIARY_VISION_MODEL" in content
+        assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
+        assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
+
+    def test_gateway_has_compression_provider(self):
+        """Gateway must bridge compression.summary_provider."""
+        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        content = gateway_path.read_text()
+        assert "summary_provider" in content
+        assert "CONTEXT_COMPRESSION_PROVIDER" in content
+
+
+# ── Vision model override tests ──────────────────────────────────────────────
+
+
+class TestVisionModelOverride:
+    """Test that AUXILIARY_VISION_MODEL env var overrides the default model in the handler."""
+
+    def test_env_var_overrides_default(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_MODEL", "openai/gpt-4o")
+        from tools.vision_tools import _handle_vision_analyze
+        with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool:
+            mock_tool.return_value = '{"success": true}'
+            _handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"})
+            call_args = mock_tool.call_args
+            # 3rd positional arg = model
+            assert call_args[0][2] == "openai/gpt-4o"
+
+    def test_default_model_when_no_override(self, monkeypatch):
+        monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
+        from tools.vision_tools import _handle_vision_analyze, DEFAULT_VISION_MODEL
+        with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool:
+            mock_tool.return_value = '{"success": true}'
+            _handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"})
+            call_args = mock_tool.call_args
+            expected = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
+            assert call_args[0][2] == expected
+
+
+# ── DEFAULT_CONFIG shape tests ───────────────────────────────────────────────
+
+
+class TestDefaultConfigShape:
+    """Verify the DEFAULT_CONFIG in hermes_cli/config.py has correct auxiliary structure."""
+
+    def test_auxiliary_section_exists(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        assert "auxiliary" in DEFAULT_CONFIG
+
+    def test_vision_task_structure(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        vision = DEFAULT_CONFIG["auxiliary"]["vision"]
+        assert "provider" in vision
+        assert "model" in vision
+        assert vision["provider"] == "auto"
+        assert vision["model"] == ""
+
+    def test_web_extract_task_structure(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        web = DEFAULT_CONFIG["auxiliary"]["web_extract"]
+        assert "provider" in web
+        assert "model" in web
+        assert web["provider"] == "auto"
+        assert web["model"] == ""
+
+    def test_compression_provider_default(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        compression = DEFAULT_CONFIG["compression"]
+        assert "summary_provider" in compression
+        assert compression["summary_provider"] == "auto"
+
+
+# ── CLI defaults parity ─────────────────────────────────────────────────────
+
+
+class TestCLIDefaultsHaveAuxiliaryKeys:
+    """Verify cli.py load_cli_config() defaults dict does NOT include auxiliary
+    (it comes from config.yaml deep merge, not hardcoded defaults)."""
+
+    def test_cli_defaults_can_merge_auxiliary(self):
+        """The load_cli_config deep merge logic handles keys not in defaults.
+        Verify auxiliary would be picked up from config.yaml."""
+        # This is a structural assertion: cli.py's second-pass loop
+        # carries over keys from file_config that aren't in defaults.
+        # So auxiliary config from config.yaml gets merged even though
+        # cli.py's defaults dict doesn't define it.
+        import cli as _cli_mod
+        source = Path(_cli_mod.__file__).read_text()
+        assert "auxiliary_config = defaults.get(\"auxiliary\"" in source
+        assert "AUXILIARY_VISION_PROVIDER" in source
+        assert "AUXILIARY_VISION_MODEL" in source
--- a/tests/test_cli_init.py
+++ b/tests/test_cli_init.py
@@ -3,14 +3,12 @@ that only manifest at runtime (not in mocked unit tests)."""

 import os
 import sys
-from unittest.mock import patch, MagicMock
-
-import pytest
+from unittest.mock import patch

 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))


-def _make_cli(**kwargs):
+def _make_cli(env_overrides=None, **kwargs):
    """Create a HermesCLI instance with minimal mocking."""
    import cli as _cli_mod
    from cli import HermesCLI
@@ -24,8 +22,11 @@ def _make_cli(**kwargs):
        "agent": {},
        "terminal": {"env_type": "local"},
    }
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    if env_overrides:
+        clean_env.update(env_overrides)
    with patch("cli.get_tool_definitions", return_value=[]), \
-         patch.dict("os.environ", {"LLM_MODEL": ""}, clear=False), \
+         patch.dict("os.environ", clean_env, clear=False), \
         patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}):
        return HermesCLI(**kwargs)

@@ -36,7 +37,7 @@ class TestMaxTurnsResolution:
    def test_default_max_turns_is_integer(self):
        cli = _make_cli()
        assert isinstance(cli.max_turns, int)
-        assert cli.max_turns == 60
+        assert cli.max_turns == 90

    def test_explicit_max_turns_honored(self):
        cli = _make_cli(max_turns=25)
@@ -45,29 +46,17 @@ class TestMaxTurnsResolution:
    def test_none_max_turns_gets_default(self):
        cli = _make_cli(max_turns=None)
        assert isinstance(cli.max_turns, int)
-        assert cli.max_turns == 60
+        assert cli.max_turns == 90

-    def test_env_var_max_turns(self, monkeypatch):
+    def test_env_var_max_turns(self):
        """Env var is used when config file doesn't set max_turns."""
-        monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
-        import cli as cli_module
-        original_agent = cli_module.CLI_CONFIG["agent"].get("max_turns")
-        original_root = cli_module.CLI_CONFIG.get("max_turns")
-        cli_module.CLI_CONFIG["agent"]["max_turns"] = None
-        cli_module.CLI_CONFIG.pop("max_turns", None)
-        try:
-            cli_obj = _make_cli()
-            assert cli_obj.max_turns == 42
-        finally:
-            if original_agent is not None:
-                cli_module.CLI_CONFIG["agent"]["max_turns"] = original_agent
-            if original_root is not None:
-                cli_module.CLI_CONFIG["max_turns"] = original_root
+        cli_obj = _make_cli(env_overrides={"HERMES_MAX_ITERATIONS": "42"})
+        assert cli_obj.max_turns == 42

    def test_max_turns_never_none_for_agent(self):
        """The value passed to AIAgent must never be None (causes TypeError in run_conversation)."""
        cli = _make_cli()
-        assert isinstance(cli.max_turns, int) and cli.max_turns == 60
+        assert isinstance(cli.max_turns, int) and cli.max_turns == 90


 class TestVerboseAndToolProgress:
@@ -81,6 +70,38 @@ class TestVerboseAndToolProgress:
        assert cli.tool_progress_mode in ("off", "new", "all", "verbose")


+class TestHistoryDisplay:
+    def test_history_numbers_only_visible_messages_and_summarizes_tools(self, capsys):
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "system", "content": "system prompt"},
+            {"role": "user", "content": "Hello"},
+            {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [{"id": "call_1"}, {"id": "call_2"}],
+            },
+            {"role": "tool", "content": "tool output 1"},
+            {"role": "tool", "content": "tool output 2"},
+            {"role": "assistant", "content": "All set."},
+            {"role": "user", "content": "A" * 250},
+        ]
+
+        cli.show_history()
+        output = capsys.readouterr().out
+
+        assert "[You #1]" in output
+        assert "[Hermes #2]" in output
+        assert "(requested 2 tool calls)" in output
+        assert "[Tools]" in output
+        assert "(2 tool messages hidden)" in output
+        assert "[Hermes #3]" in output
+        assert "[You #4]" in output
+        assert "[You #5]" not in output
+        assert "A" * 250 in output
+        assert "A" * 250 + "..." not in output
+
+
 class TestProviderResolution:
    def test_api_key_is_string_or_none(self):
        cli = _make_cli()
--- a/tests/test_cli_model_command.py
+++ b/tests/test_cli_model_command.py
@@ -0,0 +1,133 @@
+"""Regression tests for the `/model` slash command in the interactive CLI."""
+
+from unittest.mock import patch, MagicMock
+
+from cli import HermesCLI
+
+
+class TestModelCommand:
+    def _make_cli(self):
+        cli_obj = HermesCLI.__new__(HermesCLI)
+        cli_obj.model = "anthropic/claude-opus-4.6"
+        cli_obj.agent = object()
+        cli_obj.provider = "openrouter"
+        cli_obj.requested_provider = "openrouter"
+        cli_obj.base_url = "https://openrouter.ai/api/v1"
+        cli_obj.api_key = "test-key"
+        cli_obj._explicit_api_key = None
+        cli_obj._explicit_base_url = None
+        return cli_obj
+
+    def test_valid_model_from_api_saved_to_config(self, capsys):
+        cli_obj = self._make_cli()
+
+        with patch("hermes_cli.models.fetch_api_models",
+                   return_value=["anthropic/claude-sonnet-4.5", "openai/gpt-5.4"]), \
+             patch("cli.save_config_value", return_value=True) as save_mock:
+            cli_obj.process_command("/model anthropic/claude-sonnet-4.5")
+
+        output = capsys.readouterr().out
+        assert "saved to config" in output
+        assert cli_obj.model == "anthropic/claude-sonnet-4.5"
+        save_mock.assert_called_once_with("model.default", "anthropic/claude-sonnet-4.5")
+
+    def test_invalid_model_from_api_is_rejected(self, capsys):
+        cli_obj = self._make_cli()
+
+        with patch("hermes_cli.models.fetch_api_models",
+                   return_value=["anthropic/claude-opus-4.6"]), \
+             patch("cli.save_config_value") as save_mock:
+            cli_obj.process_command("/model anthropic/fake-model")
+
+        output = capsys.readouterr().out
+        assert "not a valid model" in output
+        assert "Model unchanged" in output
+        assert cli_obj.model == "anthropic/claude-opus-4.6"
+        save_mock.assert_not_called()
+
+    def test_api_unreachable_falls_back_session_only(self, capsys):
+        cli_obj = self._make_cli()
+
+        with patch("hermes_cli.models.fetch_api_models", return_value=None), \
+             patch("cli.save_config_value") as save_mock:
+            cli_obj.process_command("/model anthropic/claude-sonnet-next")
+
+        output = capsys.readouterr().out
+        assert "session only" in output
+        assert "will revert on restart" in output
+        assert cli_obj.model == "anthropic/claude-sonnet-next"
+        save_mock.assert_not_called()
+
+    def test_no_slash_model_probes_api_and_rejects(self, capsys):
+        cli_obj = self._make_cli()
+
+        with patch("hermes_cli.models.fetch_api_models",
+                   return_value=["openai/gpt-5.4"]) as fetch_mock, \
+             patch("cli.save_config_value") as save_mock:
+            cli_obj.process_command("/model gpt-5.4")
+
+        output = capsys.readouterr().out
+        assert "not a valid model" in output
+        assert "Model unchanged" in output
+        assert cli_obj.model == "anthropic/claude-opus-4.6"  # unchanged
+        assert cli_obj.agent is not None  # not reset
+        save_mock.assert_not_called()
+
+    def test_validation_crash_falls_back_to_save(self, capsys):
+        cli_obj = self._make_cli()
+
+        with patch("hermes_cli.models.validate_requested_model",
+                   side_effect=RuntimeError("boom")), \
+             patch("cli.save_config_value", return_value=True) as save_mock:
+            cli_obj.process_command("/model anthropic/claude-sonnet-4.5")
+
+        output = capsys.readouterr().out
+        assert "saved to config" in output
+        assert cli_obj.model == "anthropic/claude-sonnet-4.5"
+        save_mock.assert_called_once()
+
+    def test_show_model_when_no_argument(self, capsys):
+        cli_obj = self._make_cli()
+        cli_obj.process_command("/model")
+
+        output = capsys.readouterr().out
+        assert "anthropic/claude-opus-4.6" in output
+        assert "OpenRouter" in output
+        assert "Available models" in output
+        assert "provider:model-name" in output
+
+    # -- provider switching tests -------------------------------------------
+
+    def test_provider_colon_model_switches_provider(self, capsys):
+        cli_obj = self._make_cli()
+
+        with patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
+                 "provider": "zai",
+                 "api_key": "zai-key",
+                 "base_url": "https://api.z.ai/api/paas/v4",
+             }), \
+             patch("hermes_cli.models.fetch_api_models",
+                   return_value=["glm-5", "glm-4.7"]), \
+             patch("cli.save_config_value", return_value=True) as save_mock:
+            cli_obj.process_command("/model zai:glm-5")
+
+        output = capsys.readouterr().out
+        assert "glm-5" in output
+        assert "provider:" in output.lower() or "Z.AI" in output
+        assert cli_obj.model == "glm-5"
+        assert cli_obj.provider == "zai"
+        assert cli_obj.base_url == "https://api.z.ai/api/paas/v4"
+        # Both model and provider should be saved
+        assert save_mock.call_count == 2
+
+    def test_provider_switch_fails_on_bad_credentials(self, capsys):
+        cli_obj = self._make_cli()
+
+        with patch("hermes_cli.runtime_provider.resolve_runtime_provider",
+                   side_effect=Exception("No API key found")):
+            cli_obj.process_command("/model nous:hermes-3")
+
+        output = capsys.readouterr().out
+        assert "Could not resolve credentials" in output
+        assert cli_obj.model == "anthropic/claude-opus-4.6"  # unchanged
+        assert cli_obj.provider == "openrouter"  # unchanged
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -162,6 +162,124 @@ def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
    assert shell.api_mode == "codex_responses"


+def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
+    """When provider resolves to openai-codex and no model was explicitly
+    chosen, the global config default (e.g. anthropic/claude-opus-4.6) must
+    be replaced with a Codex-compatible model.  Fixes #651."""
+    cli = _import_cli()
+
+    monkeypatch.delenv("LLM_MODEL", raising=False)
+    monkeypatch.delenv("OPENAI_MODEL", raising=False)
+
+    def _runtime_resolve(**kwargs):
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "test-key",
+            "source": "env/config",
+        }
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+    monkeypatch.setattr(
+        "hermes_cli.codex_models.get_codex_model_ids",
+        lambda access_token=None: ["gpt-5.2-codex", "gpt-5.1-codex-mini"],
+    )
+
+    shell = cli.HermesCLI(compact=True, max_turns=1)
+
+    assert shell._model_is_default is True
+    assert shell._ensure_runtime_credentials() is True
+    assert shell.provider == "openai-codex"
+    assert "anthropic" not in shell.model
+    assert "claude" not in shell.model
+    assert shell.model == "gpt-5.2-codex"
+
+
+def test_codex_provider_trusts_explicit_envvar_model(monkeypatch):
+    """When the user explicitly sets LLM_MODEL, we trust their choice and
+    let the API be the judge — even if it's a non-OpenAI model.  Only
+    provider prefixes are stripped; the bare model passes through."""
+    cli = _import_cli()
+
+    monkeypatch.setenv("LLM_MODEL", "claude-opus-4-6")
+    monkeypatch.delenv("OPENAI_MODEL", raising=False)
+
+    def _runtime_resolve(**kwargs):
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "test-key",
+            "source": "env/config",
+        }
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    shell = cli.HermesCLI(compact=True, max_turns=1)
+
+    assert shell._model_is_default is False
+    assert shell._ensure_runtime_credentials() is True
+    assert shell.provider == "openai-codex"
+    # User explicitly chose this model — it passes through untouched
+    assert shell.model == "claude-opus-4-6"
+
+
+def test_codex_provider_preserves_explicit_codex_model(monkeypatch):
+    """If the user explicitly passes a Codex-compatible model, it must be
+    preserved even when the provider resolves to openai-codex."""
+    cli = _import_cli()
+
+    monkeypatch.delenv("LLM_MODEL", raising=False)
+    monkeypatch.delenv("OPENAI_MODEL", raising=False)
+
+    def _runtime_resolve(**kwargs):
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "test-key",
+            "source": "env/config",
+        }
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    shell = cli.HermesCLI(model="gpt-5.1-codex-mini", compact=True, max_turns=1)
+
+    assert shell._model_is_default is False
+    assert shell._ensure_runtime_credentials() is True
+    assert shell.model == "gpt-5.1-codex-mini"
+
+
+def test_codex_provider_strips_provider_prefix_from_model(monkeypatch):
+    """openai/gpt-5.3-codex should become gpt-5.3-codex — the Codex
+    Responses API does not accept provider-prefixed model slugs."""
+    cli = _import_cli()
+
+    monkeypatch.delenv("LLM_MODEL", raising=False)
+    monkeypatch.delenv("OPENAI_MODEL", raising=False)
+
+    def _runtime_resolve(**kwargs):
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "test-key",
+            "source": "env/config",
+        }
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    shell = cli.HermesCLI(model="openai/gpt-5.3-codex", compact=True, max_turns=1)
+
+    assert shell._ensure_runtime_credentials() is True
+    assert shell.model == "gpt-5.3-codex"
+
+
 def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
    monkeypatch.setattr(
        "hermes_cli.config.load_config",
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@@ -149,6 +149,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
    runner._prefill_messages = []
    runner._reasoning_config = None
    runner._provider_routing = {}
+    runner._fallback_model = None
    runner._running_agents = {}
    from unittest.mock import MagicMock, AsyncMock
    runner.hooks = MagicMock()
--- a/tests/test_codex_models.py
+++ b/tests/test_codex_models.py
@@ -1,4 +1,9 @@
 import json
+import os
+import sys
+from unittest.mock import patch
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

 from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids

@@ -13,7 +18,7 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch
                "models": [
                    {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
                    {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
-                    {"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
+                    {"slug": "gpt-5.4", "priority": 1, "supported_in_api": True},
                    {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
                ]
            }
@@ -26,10 +31,19 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch
    assert models[0] == "gpt-5.2-codex"
    assert "gpt-5.1-codex" in models
    assert "gpt-5.3-codex" in models
-    assert "gpt-4o" not in models
+    # Non-codex-suffixed models are included when the cache says they're available
+    assert "gpt-5.4" in models
    assert "gpt-5-hidden-codex" not in models


+def test_setup_wizard_codex_import_resolves():
+    """Regression test for #712: setup.py must import the correct function name."""
+    # This mirrors the exact import used in hermes_cli/setup.py line 873.
+    # A prior bug had 'get_codex_models' (wrong) instead of 'get_codex_model_ids'.
+    from hermes_cli.codex_models import get_codex_model_ids as setup_import
+    assert callable(setup_import)
+
+
 def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch):
    codex_home = tmp_path / "codex-home"
    codex_home.mkdir(parents=True, exist_ok=True)
@@ -38,3 +52,144 @@ def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatc
    models = get_codex_model_ids()

    assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
+
+
+# ── Tests for _normalize_model_for_provider ──────────────────────────
+
+
+def _make_cli(model="anthropic/claude-opus-4.6", **kwargs):
+    """Create a HermesCLI with minimal mocking."""
+    import cli as _cli_mod
+    from cli import HermesCLI
+
+    _clean_config = {
+        "model": {
+            "default": "anthropic/claude-opus-4.6",
+            "base_url": "https://openrouter.ai/api/v1",
+            "provider": "auto",
+        },
+        "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
+        "agent": {},
+        "terminal": {"env_type": "local"},
+    }
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    with (
+        patch("cli.get_tool_definitions", return_value=[]),
+        patch.dict("os.environ", clean_env, clear=False),
+        patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}),
+    ):
+        cli = HermesCLI(model=model, **kwargs)
+    return cli
+
+
+class TestNormalizeModelForProvider:
+    """_normalize_model_for_provider() trusts user-selected models.
+
+    Only two things happen:
+    1. Provider prefixes are stripped (API needs bare slugs)
+    2. The *untouched default* model is swapped for a Codex model
+    Everything else passes through — the API is the judge.
+    """
+
+    def test_non_codex_provider_is_noop(self):
+        cli = _make_cli(model="gpt-5.4")
+        changed = cli._normalize_model_for_provider("openrouter")
+        assert changed is False
+        assert cli.model == "gpt-5.4"
+
+    def test_bare_codex_model_passes_through(self):
+        cli = _make_cli(model="gpt-5.3-codex")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is False
+        assert cli.model == "gpt-5.3-codex"
+
+    def test_bare_non_codex_model_passes_through(self):
+        """gpt-5.4 (no 'codex' suffix) passes through — user chose it."""
+        cli = _make_cli(model="gpt-5.4")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is False
+        assert cli.model == "gpt-5.4"
+
+    def test_any_bare_model_trusted(self):
+        """Even a non-OpenAI bare model passes through — user explicitly set it."""
+        cli = _make_cli(model="claude-opus-4-6")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        # User explicitly chose this model — we trust them, API will error if wrong
+        assert changed is False
+        assert cli.model == "claude-opus-4-6"
+
+    def test_provider_prefix_stripped(self):
+        """openai/gpt-5.4 → gpt-5.4 (strip prefix, keep model)."""
+        cli = _make_cli(model="openai/gpt-5.4")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is True
+        assert cli.model == "gpt-5.4"
+
+    def test_any_provider_prefix_stripped(self):
+        """anthropic/claude-opus-4.6 → claude-opus-4.6 (strip prefix only).
+        User explicitly chose this — let the API decide if it works."""
+        cli = _make_cli(model="anthropic/claude-opus-4.6")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is True
+        assert cli.model == "claude-opus-4.6"
+
+    def test_default_model_replaced(self):
+        """The untouched default (anthropic/claude-opus-4.6) gets swapped."""
+        import cli as _cli_mod
+        _clean_config = {
+            "model": {
+                "default": "anthropic/claude-opus-4.6",
+                "base_url": "https://openrouter.ai/api/v1",
+                "provider": "auto",
+            },
+            "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
+            "agent": {},
+            "terminal": {"env_type": "local"},
+        }
+        # Don't pass model= so _model_is_default is True
+        with (
+            patch("cli.get_tool_definitions", return_value=[]),
+            patch.dict("os.environ", {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}, clear=False),
+            patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}),
+        ):
+            from cli import HermesCLI
+            cli = HermesCLI()
+
+        assert cli._model_is_default is True
+        with patch(
+            "hermes_cli.codex_models.get_codex_model_ids",
+            return_value=["gpt-5.3-codex", "gpt-5.4"],
+        ):
+            changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is True
+        # Uses first from available list
+        assert cli.model == "gpt-5.3-codex"
+
+    def test_default_fallback_when_api_fails(self):
+        """Default model falls back to gpt-5.3-codex when API unreachable."""
+        import cli as _cli_mod
+        _clean_config = {
+            "model": {
+                "default": "anthropic/claude-opus-4.6",
+                "base_url": "https://openrouter.ai/api/v1",
+                "provider": "auto",
+            },
+            "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
+            "agent": {},
+            "terminal": {"env_type": "local"},
+        }
+        with (
+            patch("cli.get_tool_definitions", return_value=[]),
+            patch.dict("os.environ", {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}, clear=False),
+            patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}),
+        ):
+            from cli import HermesCLI
+            cli = HermesCLI()
+
+        with patch(
+            "hermes_cli.codex_models.get_codex_model_ids",
+            side_effect=Exception("offline"),
+        ):
+            changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is True
+        assert cli.model == "gpt-5.3-codex"
--- a/tests/test_fallback_model.py
+++ b/tests/test_fallback_model.py
@@ -0,0 +1,339 @@
+"""Tests for the provider fallback model feature.
+
+Verifies that AIAgent can switch to a configured fallback model/provider
+when the primary fails after retries.
+"""
+
+import os
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from run_agent import AIAgent
+
+
+def _make_tool_defs(*names: str) -> list:
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": n,
+                "description": f"{n} tool",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }
+        for n in names
+    ]
+
+
+def _make_agent(fallback_model=None):
+    """Create a minimal AIAgent with optional fallback config."""
+    with (
+        patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        agent = AIAgent(
+            api_key="test-key-primary",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+            fallback_model=fallback_model,
+        )
+        agent.client = MagicMock()
+        return agent
+
+
+# =============================================================================
+# _try_activate_fallback()
+# =============================================================================
+
+class TestTryActivateFallback:
+    def test_returns_false_when_not_configured(self):
+        agent = _make_agent(fallback_model=None)
+        assert agent._try_activate_fallback() is False
+        assert agent._fallback_activated is False
+
+    def test_returns_false_for_empty_config(self):
+        agent = _make_agent(fallback_model={"provider": "", "model": ""})
+        assert agent._try_activate_fallback() is False
+
+    def test_returns_false_for_missing_provider(self):
+        agent = _make_agent(fallback_model={"model": "gpt-4.1"})
+        assert agent._try_activate_fallback() is False
+
+    def test_returns_false_for_missing_model(self):
+        agent = _make_agent(fallback_model={"provider": "openrouter"})
+        assert agent._try_activate_fallback() is False
+
+    def test_activates_openrouter_fallback(self):
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        with (
+            patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-fallback-key"}),
+            patch("run_agent.OpenAI") as mock_openai,
+        ):
+            result = agent._try_activate_fallback()
+            assert result is True
+            assert agent._fallback_activated is True
+            assert agent.model == "anthropic/claude-sonnet-4"
+            assert agent.provider == "openrouter"
+            assert agent.api_mode == "chat_completions"
+            mock_openai.assert_called_once()
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["api_key"] == "sk-or-fallback-key"
+            assert "openrouter" in call_kwargs["base_url"].lower()
+            # OpenRouter should get attribution headers
+            assert "default_headers" in call_kwargs
+
+    def test_activates_zai_fallback(self):
+        agent = _make_agent(
+            fallback_model={"provider": "zai", "model": "glm-5"},
+        )
+        with (
+            patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}),
+            patch("run_agent.OpenAI") as mock_openai,
+        ):
+            result = agent._try_activate_fallback()
+            assert result is True
+            assert agent.model == "glm-5"
+            assert agent.provider == "zai"
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["api_key"] == "sk-zai-key"
+            assert "z.ai" in call_kwargs["base_url"].lower()
+
+    def test_activates_kimi_fallback(self):
+        agent = _make_agent(
+            fallback_model={"provider": "kimi-coding", "model": "kimi-k2.5"},
+        )
+        with (
+            patch.dict("os.environ", {"KIMI_API_KEY": "sk-kimi-key"}),
+            patch("run_agent.OpenAI"),
+        ):
+            assert agent._try_activate_fallback() is True
+            assert agent.model == "kimi-k2.5"
+            assert agent.provider == "kimi-coding"
+
+    def test_activates_minimax_fallback(self):
+        agent = _make_agent(
+            fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
+        )
+        with (
+            patch.dict("os.environ", {"MINIMAX_API_KEY": "sk-mm-key"}),
+            patch("run_agent.OpenAI") as mock_openai,
+        ):
+            assert agent._try_activate_fallback() is True
+            assert agent.model == "MiniMax-M2.5"
+            assert agent.provider == "minimax"
+            call_kwargs = mock_openai.call_args[1]
+            assert "minimax.io" in call_kwargs["base_url"]
+
+    def test_only_fires_once(self):
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        with (
+            patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
+            patch("run_agent.OpenAI"),
+        ):
+            assert agent._try_activate_fallback() is True
+            # Second attempt should return False
+            assert agent._try_activate_fallback() is False
+
+    def test_returns_false_when_no_api_key(self):
+        """Fallback should fail gracefully when the API key env var is unset."""
+        agent = _make_agent(
+            fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
+        )
+        # Ensure MINIMAX_API_KEY is not in the environment
+        env = {k: v for k, v in os.environ.items() if k != "MINIMAX_API_KEY"}
+        with patch.dict("os.environ", env, clear=True):
+            assert agent._try_activate_fallback() is False
+            assert agent._fallback_activated is False
+
+    def test_custom_base_url(self):
+        """Custom base_url in config should override the provider default."""
+        agent = _make_agent(
+            fallback_model={
+                "provider": "custom",
+                "model": "my-model",
+                "base_url": "http://localhost:8080/v1",
+                "api_key_env": "MY_CUSTOM_KEY",
+            },
+        )
+        with (
+            patch.dict("os.environ", {"MY_CUSTOM_KEY": "custom-secret"}),
+            patch("run_agent.OpenAI") as mock_openai,
+        ):
+            assert agent._try_activate_fallback() is True
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["base_url"] == "http://localhost:8080/v1"
+            assert call_kwargs["api_key"] == "custom-secret"
+
+    def test_prompt_caching_enabled_for_claude_on_openrouter(self):
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        with (
+            patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
+            patch("run_agent.OpenAI"),
+        ):
+            agent._try_activate_fallback()
+            assert agent._use_prompt_caching is True
+
+    def test_prompt_caching_disabled_for_non_claude(self):
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"},
+        )
+        with (
+            patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
+            patch("run_agent.OpenAI"),
+        ):
+            agent._try_activate_fallback()
+            assert agent._use_prompt_caching is False
+
+    def test_prompt_caching_disabled_for_non_openrouter(self):
+        agent = _make_agent(
+            fallback_model={"provider": "zai", "model": "glm-5"},
+        )
+        with (
+            patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}),
+            patch("run_agent.OpenAI"),
+        ):
+            agent._try_activate_fallback()
+            assert agent._use_prompt_caching is False
+
+    def test_zai_alt_env_var(self):
+        """Z.AI should also check Z_AI_API_KEY as fallback env var."""
+        agent = _make_agent(
+            fallback_model={"provider": "zai", "model": "glm-5"},
+        )
+        with (
+            patch.dict("os.environ", {"Z_AI_API_KEY": "sk-alt-key"}),
+            patch("run_agent.OpenAI") as mock_openai,
+        ):
+            assert agent._try_activate_fallback() is True
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["api_key"] == "sk-alt-key"
+
+    def test_activates_codex_fallback(self):
+        """OpenAI Codex fallback should use OAuth credentials and codex_responses mode."""
+        agent = _make_agent(
+            fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"},
+        )
+        mock_creds = {
+            "api_key": "codex-oauth-token",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        }
+        with (
+            patch("hermes_cli.auth.resolve_codex_runtime_credentials", return_value=mock_creds),
+            patch("run_agent.OpenAI") as mock_openai,
+        ):
+            result = agent._try_activate_fallback()
+            assert result is True
+            assert agent.model == "gpt-5.3-codex"
+            assert agent.provider == "openai-codex"
+            assert agent.api_mode == "codex_responses"
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["api_key"] == "codex-oauth-token"
+            assert "chatgpt.com" in call_kwargs["base_url"]
+
+    def test_codex_fallback_fails_gracefully_without_credentials(self):
+        """Codex fallback should return False if no OAuth credentials available."""
+        agent = _make_agent(
+            fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"},
+        )
+        with patch(
+            "hermes_cli.auth.resolve_codex_runtime_credentials",
+            side_effect=Exception("No Codex credentials"),
+        ):
+            assert agent._try_activate_fallback() is False
+            assert agent._fallback_activated is False
+
+    def test_activates_nous_fallback(self):
+        """Nous Portal fallback should use OAuth credentials and chat_completions mode."""
+        agent = _make_agent(
+            fallback_model={"provider": "nous", "model": "nous-hermes-3"},
+        )
+        mock_creds = {
+            "api_key": "nous-agent-key-abc",
+            "base_url": "https://inference-api.nousresearch.com/v1",
+        }
+        with (
+            patch("hermes_cli.auth.resolve_nous_runtime_credentials", return_value=mock_creds),
+            patch("run_agent.OpenAI") as mock_openai,
+        ):
+            result = agent._try_activate_fallback()
+            assert result is True
+            assert agent.model == "nous-hermes-3"
+            assert agent.provider == "nous"
+            assert agent.api_mode == "chat_completions"
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["api_key"] == "nous-agent-key-abc"
+            assert "nousresearch.com" in call_kwargs["base_url"]
+
+    def test_nous_fallback_fails_gracefully_without_login(self):
+        """Nous fallback should return False if not logged in."""
+        agent = _make_agent(
+            fallback_model={"provider": "nous", "model": "nous-hermes-3"},
+        )
+        with patch(
+            "hermes_cli.auth.resolve_nous_runtime_credentials",
+            side_effect=Exception("Not logged in to Nous Portal"),
+        ):
+            assert agent._try_activate_fallback() is False
+            assert agent._fallback_activated is False
+
+
+# =============================================================================
+# Fallback config init
+# =============================================================================
+
+class TestFallbackInit:
+    def test_fallback_stored_when_configured(self):
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        assert agent._fallback_model is not None
+        assert agent._fallback_model["provider"] == "openrouter"
+        assert agent._fallback_activated is False
+
+    def test_fallback_none_when_not_configured(self):
+        agent = _make_agent(fallback_model=None)
+        assert agent._fallback_model is None
+        assert agent._fallback_activated is False
+
+    def test_fallback_none_for_non_dict(self):
+        agent = _make_agent(fallback_model="not-a-dict")
+        assert agent._fallback_model is None
+
+
+# =============================================================================
+# Provider credential resolution
+# =============================================================================
+
+class TestProviderCredentials:
+    """Verify that each supported provider resolves its API key correctly."""
+
+    @pytest.mark.parametrize("provider,env_var,base_url_fragment", [
+        ("openrouter", "OPENROUTER_API_KEY", "openrouter"),
+        ("zai", "ZAI_API_KEY", "z.ai"),
+        ("kimi-coding", "KIMI_API_KEY", "moonshot.ai"),
+        ("minimax", "MINIMAX_API_KEY", "minimax.io"),
+        ("minimax-cn", "MINIMAX_CN_API_KEY", "minimaxi.com"),
+    ])
+    def test_provider_resolves(self, provider, env_var, base_url_fragment):
+        agent = _make_agent(
+            fallback_model={"provider": provider, "model": "test-model"},
+        )
+        with (
+            patch.dict("os.environ", {env_var: "test-key-123"}),
+            patch("run_agent.OpenAI") as mock_openai,
+        ):
+            result = agent._try_activate_fallback()
+            assert result is True, f"Failed to activate fallback for {provider}"
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["api_key"] == "test-key-123"
+            assert base_url_fragment in call_kwargs["base_url"].lower()
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -351,6 +351,173 @@ class TestPruneSessions:
 # Schema and WAL mode
 # =========================================================================

+# =========================================================================
+# Session title
+# =========================================================================
+
+class TestSessionTitle:
+    def test_set_and_get_title(self, db):
+        db.create_session(session_id="s1", source="cli")
+        assert db.set_session_title("s1", "My Session") is True
+
+        session = db.get_session("s1")
+        assert session["title"] == "My Session"
+
+    def test_set_title_nonexistent_session(self, db):
+        assert db.set_session_title("nonexistent", "Title") is False
+
+    def test_title_initially_none(self, db):
+        db.create_session(session_id="s1", source="cli")
+        session = db.get_session("s1")
+        assert session["title"] is None
+
+    def test_update_title(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.set_session_title("s1", "First Title")
+        db.set_session_title("s1", "Updated Title")
+
+        session = db.get_session("s1")
+        assert session["title"] == "Updated Title"
+
+    def test_title_in_search_sessions(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.set_session_title("s1", "Debugging Auth")
+        db.create_session(session_id="s2", source="cli")
+
+        sessions = db.search_sessions()
+        titled = [s for s in sessions if s.get("title") == "Debugging Auth"]
+        assert len(titled) == 1
+        assert titled[0]["id"] == "s1"
+
+    def test_title_in_export(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.set_session_title("s1", "Export Test")
+        db.append_message("s1", role="user", content="Hello")
+
+        export = db.export_session("s1")
+        assert export["title"] == "Export Test"
+
+    def test_title_with_special_characters(self, db):
+        db.create_session(session_id="s1", source="cli")
+        title = "PR #438 — fixing the 'auth' middleware"
+        db.set_session_title("s1", title)
+
+        session = db.get_session("s1")
+        assert session["title"] == title
+
+    def test_title_empty_string_normalized_to_none(self, db):
+        """Empty strings are normalized to None (clearing the title)."""
+        db.create_session(session_id="s1", source="cli")
+        db.set_session_title("s1", "My Title")
+        # Setting to empty string should clear the title (normalize to None)
+        db.set_session_title("s1", "")
+
+        session = db.get_session("s1")
+        assert session["title"] is None
+
+    def test_multiple_empty_titles_no_conflict(self, db):
+        """Multiple sessions can have empty-string (normalized to NULL) titles."""
+        db.create_session(session_id="s1", source="cli")
+        db.create_session(session_id="s2", source="cli")
+        db.set_session_title("s1", "")
+        db.set_session_title("s2", "")
+        # Both should be None, no uniqueness conflict
+        assert db.get_session("s1")["title"] is None
+        assert db.get_session("s2")["title"] is None
+
+    def test_title_survives_end_session(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.set_session_title("s1", "Before End")
+        db.end_session("s1", end_reason="user_exit")
+
+        session = db.get_session("s1")
+        assert session["title"] == "Before End"
+        assert session["ended_at"] is not None
+
+
+class TestSanitizeTitle:
+    """Tests for SessionDB.sanitize_title() validation and cleaning."""
+
+    def test_normal_title_unchanged(self):
+        assert SessionDB.sanitize_title("My Project") == "My Project"
+
+    def test_strips_whitespace(self):
+        assert SessionDB.sanitize_title("  hello world  ") == "hello world"
+
+    def test_collapses_internal_whitespace(self):
+        assert SessionDB.sanitize_title("hello   world") == "hello world"
+
+    def test_tabs_and_newlines_collapsed(self):
+        assert SessionDB.sanitize_title("hello\t\nworld") == "hello world"
+
+    def test_none_returns_none(self):
+        assert SessionDB.sanitize_title(None) is None
+
+    def test_empty_string_returns_none(self):
+        assert SessionDB.sanitize_title("") is None
+
+    def test_whitespace_only_returns_none(self):
+        assert SessionDB.sanitize_title("   \t\n  ") is None
+
+    def test_control_chars_stripped(self):
+        # Null byte, bell, backspace, etc.
+        assert SessionDB.sanitize_title("hello\x00world") == "helloworld"
+        assert SessionDB.sanitize_title("\x07\x08test\x1b") == "test"
+
+    def test_del_char_stripped(self):
+        assert SessionDB.sanitize_title("hello\x7fworld") == "helloworld"
+
+    def test_zero_width_chars_stripped(self):
+        # Zero-width space (U+200B), zero-width joiner (U+200D)
+        assert SessionDB.sanitize_title("hello\u200bworld") == "helloworld"
+        assert SessionDB.sanitize_title("hello\u200dworld") == "helloworld"
+
+    def test_rtl_override_stripped(self):
+        # Right-to-left override (U+202E) — used in filename spoofing attacks
+        assert SessionDB.sanitize_title("hello\u202eworld") == "helloworld"
+
+    def test_bom_stripped(self):
+        # Byte order mark (U+FEFF)
+        assert SessionDB.sanitize_title("\ufeffhello") == "hello"
+
+    def test_only_control_chars_returns_none(self):
+        assert SessionDB.sanitize_title("\x00\x01\x02\u200b\ufeff") is None
+
+    def test_max_length_allowed(self):
+        title = "A" * 100
+        assert SessionDB.sanitize_title(title) == title
+
+    def test_exceeds_max_length_raises(self):
+        title = "A" * 101
+        with pytest.raises(ValueError, match="too long"):
+            SessionDB.sanitize_title(title)
+
+    def test_unicode_emoji_allowed(self):
+        assert SessionDB.sanitize_title("🚀 My Project 🎉") == "🚀 My Project 🎉"
+
+    def test_cjk_characters_allowed(self):
+        assert SessionDB.sanitize_title("我的项目") == "我的项目"
+
+    def test_accented_characters_allowed(self):
+        assert SessionDB.sanitize_title("Résumé éditing") == "Résumé éditing"
+
+    def test_special_punctuation_allowed(self):
+        title = "PR #438 — fixing the 'auth' middleware"
+        assert SessionDB.sanitize_title(title) == title
+
+    def test_sanitize_applied_in_set_session_title(self, db):
+        """set_session_title applies sanitize_title internally."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "  hello\x00  world  ")
+        assert db.get_session("s1")["title"] == "hello world"
+
+    def test_too_long_title_rejected_by_set(self, db):
+        """set_session_title raises ValueError for overly long titles."""
+        db.create_session("s1", "cli")
+        with pytest.raises(ValueError, match="too long"):
+            db.set_session_title("s1", "X" * 150)
+
+
 class TestSchemaInit:
    def test_wal_mode(self, db):
        cursor = db._conn.execute("PRAGMA journal_mode")
@@ -373,4 +540,297 @@ class TestSchemaInit:
    def test_schema_version(self, db):
        cursor = db._conn.execute("SELECT version FROM schema_version")
        version = cursor.fetchone()[0]
-        assert version == 2
+        assert version == 4
+
+    def test_title_column_exists(self, db):
+        """Verify the title column was created in the sessions table."""
+        cursor = db._conn.execute("PRAGMA table_info(sessions)")
+        columns = {row[1] for row in cursor.fetchall()}
+        assert "title" in columns
+
+    def test_migration_from_v2(self, tmp_path):
+        """Simulate a v2 database and verify migration adds title column."""
+        import sqlite3
+
+        db_path = tmp_path / "migrate_test.db"
+        conn = sqlite3.connect(str(db_path))
+        # Create v2 schema (without title column)
+        conn.executescript("""
+            CREATE TABLE schema_version (version INTEGER NOT NULL);
+            INSERT INTO schema_version (version) VALUES (2);
+
+            CREATE TABLE sessions (
+                id TEXT PRIMARY KEY,
+                source TEXT NOT NULL,
+                user_id TEXT,
+                model TEXT,
+                model_config TEXT,
+                system_prompt TEXT,
+                parent_session_id TEXT,
+                started_at REAL NOT NULL,
+                ended_at REAL,
+                end_reason TEXT,
+                message_count INTEGER DEFAULT 0,
+                tool_call_count INTEGER DEFAULT 0,
+                input_tokens INTEGER DEFAULT 0,
+                output_tokens INTEGER DEFAULT 0
+            );
+
+            CREATE TABLE messages (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                session_id TEXT NOT NULL,
+                role TEXT NOT NULL,
+                content TEXT,
+                tool_call_id TEXT,
+                tool_calls TEXT,
+                tool_name TEXT,
+                timestamp REAL NOT NULL,
+                token_count INTEGER,
+                finish_reason TEXT
+            );
+        """)
+        conn.execute(
+            "INSERT INTO sessions (id, source, started_at) VALUES (?, ?, ?)",
+            ("existing", "cli", 1000.0),
+        )
+        conn.commit()
+        conn.close()
+
+        # Open with SessionDB — should migrate to v4
+        migrated_db = SessionDB(db_path=db_path)
+
+        # Verify migration
+        cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
+        assert cursor.fetchone()[0] == 4
+
+        # Verify title column exists and is NULL for existing sessions
+        session = migrated_db.get_session("existing")
+        assert session is not None
+        assert session["title"] is None
+
+        # Verify we can set title on migrated session
+        assert migrated_db.set_session_title("existing", "Migrated Title") is True
+        session = migrated_db.get_session("existing")
+        assert session["title"] == "Migrated Title"
+
+        migrated_db.close()
+
+
+class TestTitleUniqueness:
+    """Tests for unique title enforcement and title-based lookups."""
+
+    def test_duplicate_title_raises(self, db):
+        """Setting a title already used by another session raises ValueError."""
+        db.create_session("s1", "cli")
+        db.create_session("s2", "cli")
+        db.set_session_title("s1", "my project")
+        with pytest.raises(ValueError, match="already in use"):
+            db.set_session_title("s2", "my project")
+
+    def test_same_session_can_keep_title(self, db):
+        """A session can re-set its own title without error."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "my project")
+        # Should not raise — it's the same session
+        assert db.set_session_title("s1", "my project") is True
+
+    def test_null_titles_not_unique(self, db):
+        """Multiple sessions can have NULL titles (no constraint violation)."""
+        db.create_session("s1", "cli")
+        db.create_session("s2", "cli")
+        # Both have NULL titles — no error
+        assert db.get_session("s1")["title"] is None
+        assert db.get_session("s2")["title"] is None
+
+    def test_get_session_by_title(self, db):
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "refactoring auth")
+        result = db.get_session_by_title("refactoring auth")
+        assert result is not None
+        assert result["id"] == "s1"
+
+    def test_get_session_by_title_not_found(self, db):
+        assert db.get_session_by_title("nonexistent") is None
+
+    def test_get_session_title(self, db):
+        db.create_session("s1", "cli")
+        assert db.get_session_title("s1") is None
+        db.set_session_title("s1", "my title")
+        assert db.get_session_title("s1") == "my title"
+
+    def test_get_session_title_nonexistent(self, db):
+        assert db.get_session_title("nonexistent") is None
+
+
+class TestTitleLineage:
+    """Tests for title lineage resolution and auto-numbering."""
+
+    def test_resolve_exact_title(self, db):
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "my project")
+        assert db.resolve_session_by_title("my project") == "s1"
+
+    def test_resolve_returns_latest_numbered(self, db):
+        """When numbered variants exist, return the most recent one."""
+        import time
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "my project")
+        time.sleep(0.01)
+        db.create_session("s2", "cli")
+        db.set_session_title("s2", "my project #2")
+        time.sleep(0.01)
+        db.create_session("s3", "cli")
+        db.set_session_title("s3", "my project #3")
+        # Resolving "my project" should return s3 (latest numbered variant)
+        assert db.resolve_session_by_title("my project") == "s3"
+
+    def test_resolve_exact_numbered(self, db):
+        """Resolving an exact numbered title returns that specific session."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "my project")
+        db.create_session("s2", "cli")
+        db.set_session_title("s2", "my project #2")
+        # Resolving "my project #2" exactly should return s2
+        assert db.resolve_session_by_title("my project #2") == "s2"
+
+    def test_resolve_nonexistent_title(self, db):
+        assert db.resolve_session_by_title("nonexistent") is None
+
+    def test_next_title_no_existing(self, db):
+        """With no existing sessions, base title is returned as-is."""
+        assert db.get_next_title_in_lineage("my project") == "my project"
+
+    def test_next_title_first_continuation(self, db):
+        """First continuation after the original gets #2."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "my project")
+        assert db.get_next_title_in_lineage("my project") == "my project #2"
+
+    def test_next_title_increments(self, db):
+        """Each continuation increments the number."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "my project")
+        db.create_session("s2", "cli")
+        db.set_session_title("s2", "my project #2")
+        db.create_session("s3", "cli")
+        db.set_session_title("s3", "my project #3")
+        assert db.get_next_title_in_lineage("my project") == "my project #4"
+
+    def test_next_title_strips_existing_number(self, db):
+        """Passing a numbered title strips the number and finds the base."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "my project")
+        db.create_session("s2", "cli")
+        db.set_session_title("s2", "my project #2")
+        # Even when called with "my project #2", it should return #3
+        assert db.get_next_title_in_lineage("my project #2") == "my project #3"
+
+
+class TestTitleSqlWildcards:
+    """Titles containing SQL LIKE wildcards (%, _) must not cause false matches."""
+
+    def test_resolve_title_with_underscore(self, db):
+        """A title like 'test_project' should not match 'testXproject #2'."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "test_project")
+        db.create_session("s2", "cli")
+        db.set_session_title("s2", "testXproject #2")
+        # Resolving "test_project" should return s1 (exact), not s2
+        assert db.resolve_session_by_title("test_project") == "s1"
+
+    def test_resolve_title_with_percent(self, db):
+        """A title with '%' should not wildcard-match unrelated sessions."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "100% done")
+        db.create_session("s2", "cli")
+        db.set_session_title("s2", "100X done #2")
+        # Should resolve to s1 (exact), not s2
+        assert db.resolve_session_by_title("100% done") == "s1"
+
+    def test_next_lineage_with_underscore(self, db):
+        """get_next_title_in_lineage with underscores doesn't match wrong sessions."""
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "test_project")
+        db.create_session("s2", "cli")
+        db.set_session_title("s2", "testXproject #2")
+        # Only "test_project" exists, so next should be "test_project #2"
+        assert db.get_next_title_in_lineage("test_project") == "test_project #2"
+
+
+class TestListSessionsRich:
+    """Tests for enhanced session listing with preview and last_active."""
+
+    def test_preview_from_first_user_message(self, db):
+        db.create_session("s1", "cli")
+        db.append_message("s1", "system", "You are a helpful assistant.")
+        db.append_message("s1", "user", "Help me refactor the auth module please")
+        db.append_message("s1", "assistant", "Sure, let me look at it.")
+        sessions = db.list_sessions_rich()
+        assert len(sessions) == 1
+        assert "Help me refactor the auth module" in sessions[0]["preview"]
+
+    def test_preview_truncated_at_60(self, db):
+        db.create_session("s1", "cli")
+        long_msg = "A" * 100
+        db.append_message("s1", "user", long_msg)
+        sessions = db.list_sessions_rich()
+        assert len(sessions[0]["preview"]) == 63  # 60 chars + "..."
+        assert sessions[0]["preview"].endswith("...")
+
+    def test_preview_empty_when_no_user_messages(self, db):
+        db.create_session("s1", "cli")
+        db.append_message("s1", "system", "System prompt")
+        sessions = db.list_sessions_rich()
+        assert sessions[0]["preview"] == ""
+
+    def test_last_active_from_latest_message(self, db):
+        import time
+        db.create_session("s1", "cli")
+        db.append_message("s1", "user", "Hello")
+        time.sleep(0.01)
+        db.append_message("s1", "assistant", "Hi there!")
+        sessions = db.list_sessions_rich()
+        # last_active should be close to now (the assistant message)
+        assert sessions[0]["last_active"] > sessions[0]["started_at"]
+
+    def test_last_active_fallback_to_started_at(self, db):
+        db.create_session("s1", "cli")
+        sessions = db.list_sessions_rich()
+        # No messages, so last_active falls back to started_at
+        assert sessions[0]["last_active"] == sessions[0]["started_at"]
+
+    def test_rich_list_includes_title(self, db):
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "refactoring auth")
+        sessions = db.list_sessions_rich()
+        assert sessions[0]["title"] == "refactoring auth"
+
+    def test_rich_list_source_filter(self, db):
+        db.create_session("s1", "cli")
+        db.create_session("s2", "telegram")
+        sessions = db.list_sessions_rich(source="cli")
+        assert len(sessions) == 1
+        assert sessions[0]["id"] == "s1"
+
+    def test_preview_newlines_collapsed(self, db):
+        db.create_session("s1", "cli")
+        db.append_message("s1", "user", "Line one\nLine two\nLine three")
+        sessions = db.list_sessions_rich()
+        assert "\n" not in sessions[0]["preview"]
+        assert "Line one Line two" in sessions[0]["preview"]
+
+
+class TestResolveSessionByNameOrId:
+    """Tests for the main.py helper that resolves names or IDs."""
+
+    def test_resolve_by_id(self, db):
+        db.create_session("test-id-123", "cli")
+        session = db.get_session("test-id-123")
+        assert session is not None
+        assert session["id"] == "test-id-123"
+
+    def test_resolve_by_title_falls_back(self, db):
+        db.create_session("s1", "cli")
+        db.set_session_title("s1", "my project")
+        result = db.resolve_session_by_title("my project")
+        assert result == "s1"
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@@ -145,7 +145,7 @@ class TestBuildApiKwargsCodex:
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "reasoning" in kwargs
-        assert kwargs["reasoning"]["effort"] == "xhigh"
+        assert kwargs["reasoning"]["effort"] == "medium"

    def test_includes_encrypted_content_in_include(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
@@ -596,19 +596,19 @@ class TestCodexReasoningPreflight:
 # ── Reasoning effort consistency tests ───────────────────────────────────────

 class TestReasoningEffortDefaults:
-    """Verify reasoning effort defaults to xhigh across all provider paths."""
+    """Verify reasoning effort defaults to medium across all provider paths."""

-    def test_openrouter_default_xhigh(self, monkeypatch):
+    def test_openrouter_default_medium(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
        kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
        reasoning = kwargs["extra_body"]["reasoning"]
-        assert reasoning["effort"] == "xhigh"
+        assert reasoning["effort"] == "medium"

-    def test_codex_default_xhigh(self, monkeypatch):
+    def test_codex_default_medium(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
-        assert kwargs["reasoning"]["effort"] == "xhigh"
+        assert kwargs["reasoning"]["effort"] == "medium"

    def test_codex_reasoning_disabled(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
--- a/tests/test_resume_display.py
+++ b/tests/test_resume_display.py
@@ -0,0 +1,488 @@
+"""Tests for session resume history display — _display_resumed_history() and
+_preload_resumed_session().
+
+Verifies that resuming a session shows a compact recap of the previous
+conversation with correct formatting, truncation, and config behavior.
+"""
+
+import os
+import sys
+from io import StringIO
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+def _make_cli(config_overrides=None, env_overrides=None, **kwargs):
+    """Create a HermesCLI instance with minimal mocking."""
+    import cli as _cli_mod
+    from cli import HermesCLI
+
+    _clean_config = {
+        "model": {
+            "default": "anthropic/claude-opus-4.6",
+            "base_url": "https://openrouter.ai/api/v1",
+            "provider": "auto",
+        },
+        "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
+        "agent": {},
+        "terminal": {"env_type": "local"},
+    }
+    if config_overrides:
+        for k, v in config_overrides.items():
+            if isinstance(v, dict) and k in _clean_config and isinstance(_clean_config[k], dict):
+                _clean_config[k].update(v)
+            else:
+                _clean_config[k] = v
+
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    if env_overrides:
+        clean_env.update(env_overrides)
+    with (
+        patch("cli.get_tool_definitions", return_value=[]),
+        patch.dict("os.environ", clean_env, clear=False),
+        patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}),
+    ):
+        return HermesCLI(**kwargs)
+
+
+# ── Sample conversation histories for tests ──────────────────────────
+
+
+def _simple_history():
+    """Two-turn conversation: user → assistant → user → assistant."""
+    return [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "What is Python?"},
+        {"role": "assistant", "content": "Python is a high-level programming language."},
+        {"role": "user", "content": "How do I install it?"},
+        {"role": "assistant", "content": "You can install Python from python.org."},
+    ]
+
+
+def _tool_call_history():
+    """Conversation with tool calls and tool results."""
+    return [
+        {"role": "system", "content": "system prompt"},
+        {"role": "user", "content": "Search for Python tutorials"},
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "web_search", "arguments": '{"query":"python tutorials"}'},
+                },
+                {
+                    "id": "call_2",
+                    "type": "function",
+                    "function": {"name": "web_extract", "arguments": '{"urls":["https://example.com"]}'},
+                },
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_1", "content": "Found 5 results..."},
+        {"role": "tool", "tool_call_id": "call_2", "content": "Page content..."},
+        {"role": "assistant", "content": "Here are some great Python tutorials I found."},
+    ]
+
+
+def _large_history(n_exchanges=15):
+    """Build a history with many exchanges to test truncation."""
+    msgs = [{"role": "system", "content": "system prompt"}]
+    for i in range(n_exchanges):
+        msgs.append({"role": "user", "content": f"Question #{i + 1}: What is item {i + 1}?"})
+        msgs.append({"role": "assistant", "content": f"Answer #{i + 1}: Item {i + 1} is great."})
+    return msgs
+
+
+def _multimodal_history():
+    """Conversation with multimodal (image) content."""
+    return [
+        {"role": "system", "content": "system prompt"},
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this image?"},
+                {"type": "image_url", "image_url": {"url": "https://example.com/cat.jpg"}},
+            ],
+        },
+        {"role": "assistant", "content": "I see a cat in the image."},
+    ]
+
+
+# ── Tests for _display_resumed_history ───────────────────────────────
+
+
+class TestDisplayResumedHistory:
+    """_display_resumed_history() renders a Rich panel with conversation recap."""
+
+    def _capture_display(self, cli_obj):
+        """Run _display_resumed_history and capture the Rich console output."""
+        buf = StringIO()
+        cli_obj.console.file = buf
+        cli_obj._display_resumed_history()
+        return buf.getvalue()
+
+    def test_simple_history_shows_user_and_assistant(self):
+        cli = _make_cli()
+        cli.conversation_history = _simple_history()
+        output = self._capture_display(cli)
+
+        assert "You:" in output
+        assert "Hermes:" in output
+        assert "What is Python?" in output
+        assert "Python is a high-level programming language." in output
+        assert "How do I install it?" in output
+
+    def test_system_messages_hidden(self):
+        cli = _make_cli()
+        cli.conversation_history = _simple_history()
+        output = self._capture_display(cli)
+
+        assert "You are a helpful assistant" not in output
+
+    def test_tool_messages_hidden(self):
+        cli = _make_cli()
+        cli.conversation_history = _tool_call_history()
+        output = self._capture_display(cli)
+
+        # Tool result content should NOT appear
+        assert "Found 5 results" not in output
+        assert "Page content" not in output
+
+    def test_tool_calls_shown_as_summary(self):
+        cli = _make_cli()
+        cli.conversation_history = _tool_call_history()
+        output = self._capture_display(cli)
+
+        assert "2 tool calls" in output
+        assert "web_search" in output
+        assert "web_extract" in output
+
+    def test_long_user_message_truncated(self):
+        cli = _make_cli()
+        long_text = "A" * 500
+        cli.conversation_history = [
+            {"role": "user", "content": long_text},
+            {"role": "assistant", "content": "OK."},
+        ]
+        output = self._capture_display(cli)
+
+        # Should have truncation indicator and NOT contain the full 500 chars
+        assert "..." in output
+        assert "A" * 500 not in output
+        # The 300-char truncated text is present but may be line-wrapped by
+        # Rich's panel renderer, so check the total A count in the output
+        a_count = output.count("A")
+        assert 200 <= a_count <= 310  # roughly 300 chars (±panel padding)
+
+    def test_long_assistant_message_truncated(self):
+        cli = _make_cli()
+        long_text = "B" * 400
+        cli.conversation_history = [
+            {"role": "user", "content": "Tell me a lot."},
+            {"role": "assistant", "content": long_text},
+        ]
+        output = self._capture_display(cli)
+
+        assert "..." in output
+        assert "B" * 400 not in output
+
+    def test_multiline_assistant_truncated(self):
+        cli = _make_cli()
+        multi = "\n".join([f"Line {i}" for i in range(20)])
+        cli.conversation_history = [
+            {"role": "user", "content": "Show me lines."},
+            {"role": "assistant", "content": multi},
+        ]
+        output = self._capture_display(cli)
+
+        # First 3 lines should be there
+        assert "Line 0" in output
+        assert "Line 1" in output
+        assert "Line 2" in output
+        # Line 19 should NOT be there (truncated after 3 lines)
+        assert "Line 19" not in output
+
+    def test_large_history_shows_truncation_indicator(self):
+        cli = _make_cli()
+        cli.conversation_history = _large_history(n_exchanges=15)
+        output = self._capture_display(cli)
+
+        # Should show "earlier messages" indicator
+        assert "earlier messages" in output
+        # Last question should still be visible
+        assert "Question #15" in output
+
+    def test_multimodal_content_handled(self):
+        cli = _make_cli()
+        cli.conversation_history = _multimodal_history()
+        output = self._capture_display(cli)
+
+        assert "What's in this image?" in output
+        assert "[image]" in output
+
+    def test_empty_history_no_output(self):
+        cli = _make_cli()
+        cli.conversation_history = []
+        output = self._capture_display(cli)
+
+        assert output.strip() == ""
+
+    def test_minimal_config_suppresses_display(self):
+        cli = _make_cli(config_overrides={"display": {"resume_display": "minimal"}})
+        # resume_display is captured as an instance variable during __init__
+        assert cli.resume_display == "minimal"
+        cli.conversation_history = _simple_history()
+        output = self._capture_display(cli)
+
+        assert output.strip() == ""
+
+    def test_panel_has_title(self):
+        cli = _make_cli()
+        cli.conversation_history = _simple_history()
+        output = self._capture_display(cli)
+
+        assert "Previous Conversation" in output
+
+    def test_assistant_with_no_content_no_tools_skipped(self):
+        """Assistant messages with no visible output (e.g. pure reasoning)
+        are skipped in the recap."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": None},
+        ]
+        output = self._capture_display(cli)
+
+        # The assistant entry should be skipped, only the user message shown
+        assert "You:" in output
+        assert "Hermes:" not in output
+
+    def test_only_system_messages_no_output(self):
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "system", "content": "You are helpful."},
+        ]
+        output = self._capture_display(cli)
+
+        assert output.strip() == ""
+
+    def test_reasoning_scratchpad_stripped(self):
+        """<REASONING_SCRATCHPAD> blocks should be stripped from display."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Think about this"},
+            {
+                "role": "assistant",
+                "content": (
+                    "<REASONING_SCRATCHPAD>\nLet me think step by step.\n"
+                    "</REASONING_SCRATCHPAD>\n\nThe answer is 42."
+                ),
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "REASONING_SCRATCHPAD" not in output
+        assert "Let me think step by step" not in output
+        assert "The answer is 42" in output
+
+    def test_pure_reasoning_message_skipped(self):
+        """Assistant messages that are only reasoning should be skipped."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Hello"},
+            {
+                "role": "assistant",
+                "content": "<REASONING_SCRATCHPAD>\nJust thinking...\n</REASONING_SCRATCHPAD>",
+            },
+            {"role": "assistant", "content": "Hi there!"},
+        ]
+        output = self._capture_display(cli)
+
+        assert "Just thinking" not in output
+        assert "Hi there!" in output
+
+    def test_assistant_with_text_and_tool_calls(self):
+        """When an assistant message has both text content AND tool_calls."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Do something complex"},
+            {
+                "role": "assistant",
+                "content": "Let me search for that.",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "terminal", "arguments": '{"command":"ls"}'},
+                    }
+                ],
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "Let me search for that." in output
+        assert "1 tool call" in output
+        assert "terminal" in output
+
+
+# ── Tests for _preload_resumed_session ──────────────────────────────
+
+
+class TestPreloadResumedSession:
+    """_preload_resumed_session() loads session from DB early."""
+
+    def test_returns_false_when_not_resumed(self):
+        cli = _make_cli()
+        assert cli._preload_resumed_session() is False
+
+    def test_returns_false_when_no_session_db(self):
+        cli = _make_cli(resume="test_session_id")
+        cli._session_db = None
+        assert cli._preload_resumed_session() is False
+
+    def test_returns_false_when_session_not_found(self):
+        cli = _make_cli(resume="nonexistent_session")
+        mock_db = MagicMock()
+        mock_db.get_session.return_value = None
+        cli._session_db = mock_db
+
+        buf = StringIO()
+        cli.console.file = buf
+        result = cli._preload_resumed_session()
+
+        assert result is False
+        output = buf.getvalue()
+        assert "Session not found" in output
+
+    def test_returns_false_when_session_has_no_messages(self):
+        cli = _make_cli(resume="empty_session")
+        mock_db = MagicMock()
+        mock_db.get_session.return_value = {"id": "empty_session", "title": None}
+        mock_db.get_messages_as_conversation.return_value = []
+        cli._session_db = mock_db
+
+        buf = StringIO()
+        cli.console.file = buf
+        result = cli._preload_resumed_session()
+
+        assert result is False
+        output = buf.getvalue()
+        assert "no messages" in output
+
+    def test_loads_session_successfully(self):
+        cli = _make_cli(resume="good_session")
+        messages = _simple_history()
+        mock_db = MagicMock()
+        mock_db.get_session.return_value = {"id": "good_session", "title": "Test Session"}
+        mock_db.get_messages_as_conversation.return_value = messages
+        cli._session_db = mock_db
+
+        buf = StringIO()
+        cli.console.file = buf
+        result = cli._preload_resumed_session()
+
+        assert result is True
+        assert cli.conversation_history == messages
+        output = buf.getvalue()
+        assert "Resumed session" in output
+        assert "good_session" in output
+        assert "Test Session" in output
+        assert "2 user messages" in output
+
+    def test_reopens_session_in_db(self):
+        cli = _make_cli(resume="reopen_session")
+        messages = [{"role": "user", "content": "hi"}]
+        mock_db = MagicMock()
+        mock_db.get_session.return_value = {"id": "reopen_session", "title": None}
+        mock_db.get_messages_as_conversation.return_value = messages
+        mock_conn = MagicMock()
+        mock_db._conn = mock_conn
+        cli._session_db = mock_db
+
+        buf = StringIO()
+        cli.console.file = buf
+        cli._preload_resumed_session()
+
+        # Should have executed UPDATE to clear ended_at
+        mock_conn.execute.assert_called_once()
+        call_args = mock_conn.execute.call_args
+        assert "ended_at = NULL" in call_args[0][0]
+        mock_conn.commit.assert_called_once()
+
+    def test_singular_user_message_grammar(self):
+        """1 user message should say 'message' not 'messages'."""
+        cli = _make_cli(resume="one_msg_session")
+        messages = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+        ]
+        mock_db = MagicMock()
+        mock_db.get_session.return_value = {"id": "one_msg_session", "title": None}
+        mock_db.get_messages_as_conversation.return_value = messages
+        mock_db._conn = MagicMock()
+        cli._session_db = mock_db
+
+        buf = StringIO()
+        cli.console.file = buf
+        cli._preload_resumed_session()
+
+        output = buf.getvalue()
+        assert "1 user message," in output
+        assert "1 user messages" not in output
+
+
+# ── Integration: _init_agent skips when preloaded ────────────────────
+
+
+class TestInitAgentSkipsPreloaded:
+    """_init_agent() should skip DB load when history is already populated."""
+
+    def test_init_agent_skips_db_when_preloaded(self):
+        """If conversation_history is already set, _init_agent should not
+        reload from the DB."""
+        cli = _make_cli(resume="preloaded_session")
+        cli.conversation_history = _simple_history()
+
+        mock_db = MagicMock()
+        cli._session_db = mock_db
+
+        # _init_agent will fail at credential resolution (no real API key),
+        # but the session-loading block should be skipped entirely
+        with patch.object(cli, "_ensure_runtime_credentials", return_value=False):
+            cli._init_agent()
+
+        # get_messages_as_conversation should NOT have been called
+        mock_db.get_messages_as_conversation.assert_not_called()
+
+
+# ── Config default tests ─────────────────────────────────────────────
+
+
+class TestResumeDisplayConfig:
+    """resume_display config option defaults and behavior."""
+
+    def test_default_config_has_resume_display(self):
+        """DEFAULT_CONFIG in hermes_cli/config.py includes resume_display."""
+        from hermes_cli.config import DEFAULT_CONFIG
+        display = DEFAULT_CONFIG.get("display", {})
+        assert "resume_display" in display
+        assert display["resume_display"] == "full"
+
+    def test_cli_defaults_have_resume_display(self):
+        """cli.py load_cli_config defaults include resume_display."""
+        import cli as _cli_mod
+        from cli import load_cli_config
+
+        with (
+            patch("pathlib.Path.exists", return_value=False),
+            patch.dict("os.environ", {"LLM_MODEL": ""}, clear=False),
+        ):
+            config = load_cli_config()
+
+        display = config.get("display", {})
+        assert display.get("resume_display") == "full"
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -280,22 +280,21 @@ class TestMaskApiKey:


 class TestInit:
-    def test_anthropic_base_url_fails_fast(self):
-        """Anthropic native endpoints should error before building an OpenAI client."""
+    def test_anthropic_base_url_accepted(self):
+        """Anthropic base URLs should be accepted (OpenAI-compatible endpoint)."""
        with (
            patch("run_agent.get_tool_definitions", return_value=[]),
            patch("run_agent.check_toolset_requirements", return_value={}),
            patch("run_agent.OpenAI") as mock_openai,
        ):
-            with pytest.raises(ValueError, match="not supported yet"):
-                AIAgent(
-                    api_key="test-key-1234567890",
-                    base_url="https://api.anthropic.com/v1/messages",
-                    quiet_mode=True,
-                    skip_context_files=True,
-                    skip_memory=True,
-                )
-            mock_openai.assert_not_called()
+            AIAgent(
+                api_key="test-key-1234567890",
+                base_url="https://api.anthropic.com/v1/",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            mock_openai.assert_called_once()

    def test_prompt_caching_claude_openrouter(self):
        """Claude model via OpenRouter should enable prompt caching."""
@@ -498,12 +497,12 @@ class TestBuildApiKwargs:
        assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"]

    def test_reasoning_config_default_openrouter(self, agent):
-        """Default reasoning config for OpenRouter should be xhigh."""
+        """Default reasoning config for OpenRouter should be medium."""
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        reasoning = kwargs["extra_body"]["reasoning"]
        assert reasoning["enabled"] is True
-        assert reasoning["effort"] == "xhigh"
+        assert reasoning["effort"] == "medium"

    def test_reasoning_config_custom(self, agent):
        agent.reasoning_config = {"enabled": False}
@@ -1041,3 +1040,136 @@ class TestMaxTokensParam:
        agent.base_url = "https://openrouter.ai/api/v1/api.openai.com"
        result = agent._max_tokens_param(4096)
        assert result == {"max_tokens": 4096}
+
+
+# ---------------------------------------------------------------------------
+# System prompt stability for prompt caching
+# ---------------------------------------------------------------------------
+
+class TestSystemPromptStability:
+    """Verify that the system prompt stays stable across turns for cache hits."""
+
+    def test_stored_prompt_reused_for_continuing_session(self, agent):
+        """When conversation_history is non-empty and session DB has a stored
+        prompt, it should be reused instead of rebuilding from disk."""
+        stored = "You are helpful. [stored from turn 1]"
+        mock_db = MagicMock()
+        mock_db.get_session.return_value = {"system_prompt": stored}
+        agent._session_db = mock_db
+
+        # Simulate a continuing session with history
+        history = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+        ]
+
+        # First call — _cached_system_prompt is None, history is non-empty
+        agent._cached_system_prompt = None
+
+        # Patch run_conversation internals to just test the system prompt logic.
+        # We'll call the prompt caching block directly by simulating what
+        # run_conversation does.
+        conversation_history = history
+
+        # The block under test (from run_conversation):
+        if agent._cached_system_prompt is None:
+            stored_prompt = None
+            if conversation_history and agent._session_db:
+                try:
+                    session_row = agent._session_db.get_session(agent.session_id)
+                    if session_row:
+                        stored_prompt = session_row.get("system_prompt") or None
+                except Exception:
+                    pass
+
+            if stored_prompt:
+                agent._cached_system_prompt = stored_prompt
+
+        assert agent._cached_system_prompt == stored
+        mock_db.get_session.assert_called_once_with(agent.session_id)
+
+    def test_fresh_build_when_no_history(self, agent):
+        """On the first turn (no history), system prompt should be built fresh."""
+        mock_db = MagicMock()
+        agent._session_db = mock_db
+
+        agent._cached_system_prompt = None
+        conversation_history = []
+
+        # The block under test:
+        if agent._cached_system_prompt is None:
+            stored_prompt = None
+            if conversation_history and agent._session_db:
+                session_row = agent._session_db.get_session(agent.session_id)
+                if session_row:
+                    stored_prompt = session_row.get("system_prompt") or None
+
+            if stored_prompt:
+                agent._cached_system_prompt = stored_prompt
+            else:
+                agent._cached_system_prompt = agent._build_system_prompt()
+
+        # Should have built fresh, not queried the DB
+        mock_db.get_session.assert_not_called()
+        assert agent._cached_system_prompt is not None
+        assert "Hermes Agent" in agent._cached_system_prompt
+
+    def test_fresh_build_when_db_has_no_prompt(self, agent):
+        """If the session DB has no stored prompt, build fresh even with history."""
+        mock_db = MagicMock()
+        mock_db.get_session.return_value = {"system_prompt": ""}
+        agent._session_db = mock_db
+
+        agent._cached_system_prompt = None
+        conversation_history = [{"role": "user", "content": "hi"}]
+
+        if agent._cached_system_prompt is None:
+            stored_prompt = None
+            if conversation_history and agent._session_db:
+                try:
+                    session_row = agent._session_db.get_session(agent.session_id)
+                    if session_row:
+                        stored_prompt = session_row.get("system_prompt") or None
+                except Exception:
+                    pass
+
+            if stored_prompt:
+                agent._cached_system_prompt = stored_prompt
+            else:
+                agent._cached_system_prompt = agent._build_system_prompt()
+
+        # Empty string is falsy, so should fall through to fresh build
+        assert "Hermes Agent" in agent._cached_system_prompt
+
+    def test_honcho_context_baked_into_prompt_on_first_turn(self, agent):
+        """Honcho context should be baked into _cached_system_prompt on
+        the first turn, not injected separately per API call."""
+        agent._honcho_context = "User prefers Python over JavaScript."
+        agent._cached_system_prompt = None
+
+        # Simulate first turn: build fresh and bake in Honcho
+        agent._cached_system_prompt = agent._build_system_prompt()
+        if agent._honcho_context:
+            agent._cached_system_prompt = (
+                agent._cached_system_prompt + "\n\n" + agent._honcho_context
+            ).strip()
+
+        assert "User prefers Python over JavaScript" in agent._cached_system_prompt
+
+    def test_honcho_prefetch_skipped_on_continuing_session(self):
+        """Honcho prefetch should not be called when conversation_history
+        is non-empty (continuing session)."""
+        conversation_history = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi there"},
+        ]
+
+        # The guard: `not conversation_history` is False when history exists
+        should_prefetch = not conversation_history
+        assert should_prefetch is False
+
+    def test_honcho_prefetch_runs_on_first_turn(self):
+        """Honcho prefetch should run when conversation_history is empty."""
+        conversation_history = []
+        should_prefetch = not conversation_history
+        assert should_prefetch is True
--- a/tests/test_worktree.py
+++ b/tests/test_worktree.py
@@ -0,0 +1,635 @@
+"""Tests for git worktree isolation (CLI --worktree / -w flag).
+
+Verifies worktree creation, cleanup, .worktreeinclude handling,
+.gitignore management, and integration with the CLI.  (#652)
+"""
+
+import os
+import shutil
+import subprocess
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+
+@pytest.fixture
+def git_repo(tmp_path):
+    """Create a temporary git repo for testing."""
+    repo = tmp_path / "test-repo"
+    repo.mkdir()
+    subprocess.run(["git", "init"], cwd=repo, capture_output=True)
+    subprocess.run(
+        ["git", "config", "user.email", "test@test.com"],
+        cwd=repo, capture_output=True,
+    )
+    subprocess.run(
+        ["git", "config", "user.name", "Test"],
+        cwd=repo, capture_output=True,
+    )
+    # Create initial commit (worktrees need at least one commit)
+    (repo / "README.md").write_text("# Test Repo\n")
+    subprocess.run(["git", "add", "."], cwd=repo, capture_output=True)
+    subprocess.run(
+        ["git", "commit", "-m", "Initial commit"],
+        cwd=repo, capture_output=True,
+    )
+    return repo
+
+
+# ---------------------------------------------------------------------------
+# Lightweight reimplementations for testing (avoid importing cli.py)
+# ---------------------------------------------------------------------------
+
+def _git_repo_root(cwd=None):
+    """Test version of _git_repo_root."""
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--show-toplevel"],
+            capture_output=True, text=True, timeout=5,
+            cwd=cwd,
+        )
+        if result.returncode == 0:
+            return result.stdout.strip()
+    except Exception:
+        pass
+    return None
+
+
+def _setup_worktree(repo_root):
+    """Test version of _setup_worktree — creates a worktree."""
+    import uuid
+    short_id = uuid.uuid4().hex[:8]
+    wt_name = f"hermes-{short_id}"
+    branch_name = f"hermes/{wt_name}"
+
+    worktrees_dir = Path(repo_root) / ".worktrees"
+    worktrees_dir.mkdir(parents=True, exist_ok=True)
+    wt_path = worktrees_dir / wt_name
+
+    result = subprocess.run(
+        ["git", "worktree", "add", str(wt_path), "-b", branch_name, "HEAD"],
+        capture_output=True, text=True, timeout=30, cwd=repo_root,
+    )
+    if result.returncode != 0:
+        return None
+
+    return {
+        "path": str(wt_path),
+        "branch": branch_name,
+        "repo_root": repo_root,
+    }
+
+
+def _cleanup_worktree(info):
+    """Test version of _cleanup_worktree."""
+    wt_path = info["path"]
+    branch = info["branch"]
+    repo_root = info["repo_root"]
+
+    if not Path(wt_path).exists():
+        return
+
+    # Check for uncommitted changes
+    status = subprocess.run(
+        ["git", "status", "--porcelain"],
+        capture_output=True, text=True, timeout=10, cwd=wt_path,
+    )
+    has_changes = bool(status.stdout.strip())
+
+    if has_changes:
+        return False  # Did not clean up
+
+    subprocess.run(
+        ["git", "worktree", "remove", wt_path, "--force"],
+        capture_output=True, text=True, timeout=15, cwd=repo_root,
+    )
+    subprocess.run(
+        ["git", "branch", "-D", branch],
+        capture_output=True, text=True, timeout=10, cwd=repo_root,
+    )
+    return True  # Cleaned up
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestGitRepoDetection:
+    """Test git repo root detection."""
+
+    def test_detects_git_repo(self, git_repo):
+        root = _git_repo_root(cwd=str(git_repo))
+        assert root is not None
+        assert Path(root).resolve() == git_repo.resolve()
+
+    def test_detects_subdirectory(self, git_repo):
+        subdir = git_repo / "src" / "lib"
+        subdir.mkdir(parents=True)
+        root = _git_repo_root(cwd=str(subdir))
+        assert root is not None
+        assert Path(root).resolve() == git_repo.resolve()
+
+    def test_returns_none_outside_repo(self, tmp_path):
+        # tmp_path itself is not a git repo
+        bare_dir = tmp_path / "not-a-repo"
+        bare_dir.mkdir()
+        root = _git_repo_root(cwd=str(bare_dir))
+        assert root is None
+
+
+class TestWorktreeCreation:
+    """Test worktree setup."""
+
+    def test_creates_worktree(self, git_repo):
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+        assert Path(info["path"]).exists()
+        assert info["branch"].startswith("hermes/hermes-")
+        assert info["repo_root"] == str(git_repo)
+
+        # Verify it's a valid git worktree
+        result = subprocess.run(
+            ["git", "rev-parse", "--is-inside-work-tree"],
+            capture_output=True, text=True, cwd=info["path"],
+        )
+        assert result.stdout.strip() == "true"
+
+    def test_worktree_has_own_branch(self, git_repo):
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # Check branch name in worktree
+        result = subprocess.run(
+            ["git", "branch", "--show-current"],
+            capture_output=True, text=True, cwd=info["path"],
+        )
+        assert result.stdout.strip() == info["branch"]
+
+    def test_worktree_is_independent(self, git_repo):
+        """Two worktrees from the same repo are independent."""
+        info1 = _setup_worktree(str(git_repo))
+        info2 = _setup_worktree(str(git_repo))
+        assert info1 is not None
+        assert info2 is not None
+        assert info1["path"] != info2["path"]
+        assert info1["branch"] != info2["branch"]
+
+        # Create a file in worktree 1
+        (Path(info1["path"]) / "only-in-wt1.txt").write_text("hello")
+
+        # It should NOT appear in worktree 2
+        assert not (Path(info2["path"]) / "only-in-wt1.txt").exists()
+
+    def test_worktrees_dir_created(self, git_repo):
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+        assert (git_repo / ".worktrees").is_dir()
+
+    def test_worktree_has_repo_files(self, git_repo):
+        """Worktree should contain the repo's tracked files."""
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+        assert (Path(info["path"]) / "README.md").exists()
+
+
+class TestWorktreeCleanup:
+    """Test worktree cleanup on exit."""
+
+    def test_clean_worktree_removed(self, git_repo):
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+        assert Path(info["path"]).exists()
+
+        result = _cleanup_worktree(info)
+        assert result is True
+        assert not Path(info["path"]).exists()
+
+    def test_dirty_worktree_kept(self, git_repo):
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # Make uncommitted changes
+        (Path(info["path"]) / "new-file.txt").write_text("uncommitted")
+        subprocess.run(
+            ["git", "add", "new-file.txt"],
+            cwd=info["path"], capture_output=True,
+        )
+
+        result = _cleanup_worktree(info)
+        assert result is False
+        assert Path(info["path"]).exists()  # Still there
+
+    def test_branch_deleted_on_cleanup(self, git_repo):
+        info = _setup_worktree(str(git_repo))
+        branch = info["branch"]
+
+        _cleanup_worktree(info)
+
+        # Branch should be gone
+        result = subprocess.run(
+            ["git", "branch", "--list", branch],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        assert branch not in result.stdout
+
+    def test_cleanup_nonexistent_worktree(self, git_repo):
+        """Cleanup should handle already-removed worktrees gracefully."""
+        info = {
+            "path": str(git_repo / ".worktrees" / "nonexistent"),
+            "branch": "hermes/nonexistent",
+            "repo_root": str(git_repo),
+        }
+        # Should not raise
+        _cleanup_worktree(info)
+
+
+class TestWorktreeInclude:
+    """Test .worktreeinclude file handling."""
+
+    def test_copies_included_files(self, git_repo):
+        """Files listed in .worktreeinclude should be copied to the worktree."""
+        # Create a .env file (gitignored)
+        (git_repo / ".env").write_text("SECRET=abc123")
+        (git_repo / ".gitignore").write_text(".env\n.worktrees/\n")
+        subprocess.run(
+            ["git", "add", ".gitignore"],
+            cwd=str(git_repo), capture_output=True,
+        )
+        subprocess.run(
+            ["git", "commit", "-m", "Add gitignore"],
+            cwd=str(git_repo), capture_output=True,
+        )
+
+        # Create .worktreeinclude
+        (git_repo / ".worktreeinclude").write_text(".env\n")
+
+        # Import and use the real _setup_worktree logic for include handling
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # Manually copy .worktreeinclude entries (mirrors cli.py logic)
+        import shutil
+        include_file = git_repo / ".worktreeinclude"
+        wt_path = Path(info["path"])
+        for line in include_file.read_text().splitlines():
+            entry = line.strip()
+            if not entry or entry.startswith("#"):
+                continue
+            src = git_repo / entry
+            dst = wt_path / entry
+            if src.is_file():
+                dst.parent.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(str(src), str(dst))
+
+        # Verify .env was copied
+        assert (wt_path / ".env").exists()
+        assert (wt_path / ".env").read_text() == "SECRET=abc123"
+
+    def test_ignores_comments_and_blanks(self, git_repo):
+        """Comments and blank lines in .worktreeinclude should be skipped."""
+        (git_repo / ".worktreeinclude").write_text(
+            "# This is a comment\n"
+            "\n"
+            "  # Another comment\n"
+        )
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+        # Should not crash — just skip all lines
+
+
+class TestGitignoreManagement:
+    """Test that .worktrees/ is added to .gitignore."""
+
+    def test_adds_to_gitignore(self, git_repo):
+        """Creating a worktree should add .worktrees/ to .gitignore."""
+        # Remove any existing .gitignore
+        gitignore = git_repo / ".gitignore"
+        if gitignore.exists():
+            gitignore.unlink()
+
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # Now manually add .worktrees/ to .gitignore (mirrors cli.py logic)
+        _ignore_entry = ".worktrees/"
+        existing = gitignore.read_text() if gitignore.exists() else ""
+        if _ignore_entry not in existing.splitlines():
+            with open(gitignore, "a") as f:
+                if existing and not existing.endswith("\n"):
+                    f.write("\n")
+                f.write(f"{_ignore_entry}\n")
+
+        content = gitignore.read_text()
+        assert ".worktrees/" in content
+
+    def test_does_not_duplicate_gitignore_entry(self, git_repo):
+        """If .worktrees/ is already in .gitignore, don't add again."""
+        gitignore = git_repo / ".gitignore"
+        gitignore.write_text(".worktrees/\n")
+
+        # The check should see it's already there
+        existing = gitignore.read_text()
+        assert ".worktrees/" in existing.splitlines()
+
+
+class TestMultipleWorktrees:
+    """Test running multiple worktrees concurrently (the core use case)."""
+
+    def test_ten_concurrent_worktrees(self, git_repo):
+        """Create 10 worktrees — simulating 10 parallel agents."""
+        worktrees = []
+        for _ in range(10):
+            info = _setup_worktree(str(git_repo))
+            assert info is not None
+            worktrees.append(info)
+
+        # All should exist and be independent
+        paths = [info["path"] for info in worktrees]
+        assert len(set(paths)) == 10  # All unique
+
+        # Each should have the repo files
+        for info in worktrees:
+            assert (Path(info["path"]) / "README.md").exists()
+
+        # Edit a file in one worktree
+        (Path(worktrees[0]["path"]) / "README.md").write_text("Modified in wt0")
+
+        # Others should be unaffected
+        for info in worktrees[1:]:
+            assert (Path(info["path"]) / "README.md").read_text() == "# Test Repo\n"
+
+        # List worktrees via git
+        result = subprocess.run(
+            ["git", "worktree", "list"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        # Should have 11 entries: main + 10 worktrees
+        lines = [l for l in result.stdout.strip().splitlines() if l.strip()]
+        assert len(lines) == 11
+
+        # Cleanup all
+        for info in worktrees:
+            # Discard changes first so cleanup works
+            subprocess.run(
+                ["git", "checkout", "--", "."],
+                cwd=info["path"], capture_output=True,
+            )
+            _cleanup_worktree(info)
+
+        # All should be removed
+        for info in worktrees:
+            assert not Path(info["path"]).exists()
+
+
+class TestWorktreeDirectorySymlink:
+    """Test .worktreeinclude with directories (symlinked)."""
+
+    def test_symlinks_directory(self, git_repo):
+        """Directories in .worktreeinclude should be symlinked."""
+        # Create a .venv directory
+        venv_dir = git_repo / ".venv" / "lib"
+        venv_dir.mkdir(parents=True)
+        (venv_dir / "marker.txt").write_text("venv marker")
+        (git_repo / ".gitignore").write_text(".venv/\n.worktrees/\n")
+        subprocess.run(
+            ["git", "add", ".gitignore"], cwd=str(git_repo), capture_output=True
+        )
+        subprocess.run(
+            ["git", "commit", "-m", "gitignore"], cwd=str(git_repo), capture_output=True
+        )
+
+        (git_repo / ".worktreeinclude").write_text(".venv/\n")
+
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        wt_path = Path(info["path"])
+        src = git_repo / ".venv"
+        dst = wt_path / ".venv"
+
+        # Manually symlink (mirrors cli.py logic)
+        if not dst.exists():
+            dst.parent.mkdir(parents=True, exist_ok=True)
+            os.symlink(str(src.resolve()), str(dst))
+
+        assert dst.is_symlink()
+        assert (dst / "lib" / "marker.txt").read_text() == "venv marker"
+
+
+class TestStaleWorktreePruning:
+    """Test _prune_stale_worktrees garbage collection."""
+
+    def test_prunes_old_clean_worktree(self, git_repo):
+        """Old clean worktrees should be removed on prune."""
+        import time
+
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+        assert Path(info["path"]).exists()
+
+        # Make the worktree look old (set mtime to 25h ago)
+        old_time = time.time() - (25 * 3600)
+        os.utime(info["path"], (old_time, old_time))
+
+        # Reimplementation of prune logic (matches cli.py)
+        worktrees_dir = git_repo / ".worktrees"
+        cutoff = time.time() - (24 * 3600)
+
+        for entry in worktrees_dir.iterdir():
+            if not entry.is_dir() or not entry.name.startswith("hermes-"):
+                continue
+            try:
+                mtime = entry.stat().st_mtime
+                if mtime > cutoff:
+                    continue
+            except Exception:
+                continue
+
+            status = subprocess.run(
+                ["git", "status", "--porcelain"],
+                capture_output=True, text=True, timeout=5, cwd=str(entry),
+            )
+            if status.stdout.strip():
+                continue
+
+            branch_result = subprocess.run(
+                ["git", "branch", "--show-current"],
+                capture_output=True, text=True, timeout=5, cwd=str(entry),
+            )
+            branch = branch_result.stdout.strip()
+            subprocess.run(
+                ["git", "worktree", "remove", str(entry), "--force"],
+                capture_output=True, text=True, timeout=15, cwd=str(git_repo),
+            )
+            if branch:
+                subprocess.run(
+                    ["git", "branch", "-D", branch],
+                    capture_output=True, text=True, timeout=10, cwd=str(git_repo),
+                )
+
+        assert not Path(info["path"]).exists()
+
+    def test_keeps_recent_worktree(self, git_repo):
+        """Recent worktrees should NOT be pruned."""
+        import time
+
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # Don't modify mtime — it's recent
+        worktrees_dir = git_repo / ".worktrees"
+        cutoff = time.time() - (24 * 3600)
+
+        pruned = False
+        for entry in worktrees_dir.iterdir():
+            if not entry.is_dir() or not entry.name.startswith("hermes-"):
+                continue
+            mtime = entry.stat().st_mtime
+            if mtime > cutoff:
+                continue  # Too recent
+            pruned = True
+
+        assert not pruned
+        assert Path(info["path"]).exists()
+
+    def test_keeps_dirty_old_worktree(self, git_repo):
+        """Old worktrees with uncommitted changes should NOT be pruned."""
+        import time
+
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # Make it dirty
+        (Path(info["path"]) / "dirty.txt").write_text("uncommitted")
+        subprocess.run(
+            ["git", "add", "dirty.txt"],
+            cwd=info["path"], capture_output=True,
+        )
+
+        # Make it old
+        old_time = time.time() - (25 * 3600)
+        os.utime(info["path"], (old_time, old_time))
+
+        # Check if it would be pruned
+        status = subprocess.run(
+            ["git", "status", "--porcelain"],
+            capture_output=True, text=True, cwd=info["path"],
+        )
+        has_changes = bool(status.stdout.strip())
+        assert has_changes  # Should be dirty → not pruned
+        assert Path(info["path"]).exists()
+
+
+class TestEdgeCases:
+    """Test edge cases for robustness."""
+
+    def test_no_commits_repo(self, tmp_path):
+        """Worktree creation should fail gracefully on a repo with no commits."""
+        repo = tmp_path / "empty-repo"
+        repo.mkdir()
+        subprocess.run(["git", "init"], cwd=str(repo), capture_output=True)
+
+        info = _setup_worktree(str(repo))
+        assert info is None  # Should fail gracefully
+
+    def test_not_a_git_repo(self, tmp_path):
+        """Repo detection should return None for non-git directories."""
+        bare = tmp_path / "not-git"
+        bare.mkdir()
+        root = _git_repo_root(cwd=str(bare))
+        assert root is None
+
+    def test_worktrees_dir_already_exists(self, git_repo):
+        """Should work fine if .worktrees/ already exists."""
+        (git_repo / ".worktrees").mkdir(exist_ok=True)
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+        assert Path(info["path"]).exists()
+
+
+class TestCLIFlagLogic:
+    """Test the flag/config OR logic from main()."""
+
+    def test_worktree_flag_triggers(self):
+        """--worktree flag should trigger worktree creation."""
+        worktree = True
+        w = False
+        config_worktree = False
+        use_worktree = worktree or w or config_worktree
+        assert use_worktree
+
+    def test_w_flag_triggers(self):
+        """-w flag should trigger worktree creation."""
+        worktree = False
+        w = True
+        config_worktree = False
+        use_worktree = worktree or w or config_worktree
+        assert use_worktree
+
+    def test_config_triggers(self):
+        """worktree: true in config should trigger worktree creation."""
+        worktree = False
+        w = False
+        config_worktree = True
+        use_worktree = worktree or w or config_worktree
+        assert use_worktree
+
+    def test_none_set_no_trigger(self):
+        """No flags and no config should not trigger."""
+        worktree = False
+        w = False
+        config_worktree = False
+        use_worktree = worktree or w or config_worktree
+        assert not use_worktree
+
+
+class TestTerminalCWDIntegration:
+    """Test that TERMINAL_CWD is correctly set to the worktree path."""
+
+    def test_terminal_cwd_set(self, git_repo):
+        """After worktree setup, TERMINAL_CWD should point to the worktree."""
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # This is what main() does:
+        os.environ["TERMINAL_CWD"] = info["path"]
+        assert os.environ["TERMINAL_CWD"] == info["path"]
+        assert Path(os.environ["TERMINAL_CWD"]).exists()
+
+        # Clean up env
+        del os.environ["TERMINAL_CWD"]
+
+    def test_terminal_cwd_is_valid_git_repo(self, git_repo):
+        """The TERMINAL_CWD worktree should be a valid git working tree."""
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        result = subprocess.run(
+            ["git", "rev-parse", "--is-inside-work-tree"],
+            capture_output=True, text=True, cwd=info["path"],
+        )
+        assert result.stdout.strip() == "true"
+
+
+class TestSystemPromptInjection:
+    """Test that the agent gets worktree context in its system prompt."""
+
+    def test_prompt_note_format(self, git_repo):
+        """Verify the system prompt note contains all required info."""
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # This is what main() does:
+        wt_note = (
+            f"\n\n[System note: You are working in an isolated git worktree at "
+            f"{info['path']}. Your branch is `{info['branch']}`. "
+            f"Changes here do not affect the main working tree or other agents. "
+            f"Remember to commit and push your changes, and create a PR if appropriate. "
+            f"The original repo is at {info['repo_root']}.]"
+        )
+
+        assert info["path"] in wt_note
+        assert info["branch"] in wt_note
+        assert info["repo_root"] in wt_note
+        assert "isolated git worktree" in wt_note
+        assert "commit and push" in wt_note
--- a/tests/tools/test_browser_console.py
+++ b/tests/tools/test_browser_console.py
@@ -0,0 +1,276 @@
+"""Tests for browser_console tool and browser_vision annotate param."""
+
+import json
+import os
+import sys
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
+
+
+# ── browser_console ──────────────────────────────────────────────────
+
+
+class TestBrowserConsole:
+    """browser_console() returns console messages + JS errors in one call."""
+
+    def test_returns_console_messages_and_errors(self):
+        from tools.browser_tool import browser_console
+
+        console_response = {
+            "success": True,
+            "data": {
+                "messages": [
+                    {"text": "hello", "type": "log", "timestamp": 1},
+                    {"text": "oops", "type": "error", "timestamp": 2},
+                ]
+            },
+        }
+        errors_response = {
+            "success": True,
+            "data": {
+                "errors": [
+                    {"message": "Uncaught TypeError", "timestamp": 3},
+                ]
+            },
+        }
+
+        with patch("tools.browser_tool._run_browser_command") as mock_cmd:
+            mock_cmd.side_effect = [console_response, errors_response]
+            result = json.loads(browser_console(task_id="test"))
+
+        assert result["success"] is True
+        assert result["total_messages"] == 2
+        assert result["total_errors"] == 1
+        assert result["console_messages"][0]["text"] == "hello"
+        assert result["console_messages"][1]["text"] == "oops"
+        assert result["js_errors"][0]["message"] == "Uncaught TypeError"
+
+    def test_passes_clear_flag(self):
+        from tools.browser_tool import browser_console
+
+        empty = {"success": True, "data": {"messages": [], "errors": []}}
+        with patch("tools.browser_tool._run_browser_command", return_value=empty) as mock_cmd:
+            browser_console(clear=True, task_id="test")
+
+        calls = mock_cmd.call_args_list
+        # Both console and errors should get --clear
+        assert calls[0][0] == ("test", "console", ["--clear"])
+        assert calls[1][0] == ("test", "errors", ["--clear"])
+
+    def test_no_clear_by_default(self):
+        from tools.browser_tool import browser_console
+
+        empty = {"success": True, "data": {"messages": [], "errors": []}}
+        with patch("tools.browser_tool._run_browser_command", return_value=empty) as mock_cmd:
+            browser_console(task_id="test")
+
+        calls = mock_cmd.call_args_list
+        assert calls[0][0] == ("test", "console", [])
+        assert calls[1][0] == ("test", "errors", [])
+
+    def test_empty_console_and_errors(self):
+        from tools.browser_tool import browser_console
+
+        empty = {"success": True, "data": {"messages": [], "errors": []}}
+        with patch("tools.browser_tool._run_browser_command", return_value=empty):
+            result = json.loads(browser_console(task_id="test"))
+
+        assert result["total_messages"] == 0
+        assert result["total_errors"] == 0
+        assert result["console_messages"] == []
+        assert result["js_errors"] == []
+
+    def test_handles_failed_commands(self):
+        from tools.browser_tool import browser_console
+
+        failed = {"success": False, "error": "No session"}
+        with patch("tools.browser_tool._run_browser_command", return_value=failed):
+            result = json.loads(browser_console(task_id="test"))
+
+        # Should still return success with empty data
+        assert result["success"] is True
+        assert result["total_messages"] == 0
+        assert result["total_errors"] == 0
+
+
+# ── browser_console schema ───────────────────────────────────────────
+
+
+class TestBrowserConsoleSchema:
+    """browser_console is properly registered in the tool registry."""
+
+    def test_schema_in_browser_schemas(self):
+        from tools.browser_tool import BROWSER_TOOL_SCHEMAS
+
+        names = [s["name"] for s in BROWSER_TOOL_SCHEMAS]
+        assert "browser_console" in names
+
+    def test_schema_has_clear_param(self):
+        from tools.browser_tool import BROWSER_TOOL_SCHEMAS
+
+        schema = next(s for s in BROWSER_TOOL_SCHEMAS if s["name"] == "browser_console")
+        props = schema["parameters"]["properties"]
+        assert "clear" in props
+        assert props["clear"]["type"] == "boolean"
+
+
+# ── browser_vision annotate ──────────────────────────────────────────
+
+
+class TestBrowserVisionAnnotate:
+    """browser_vision supports annotate parameter."""
+
+    def test_schema_has_annotate_param(self):
+        from tools.browser_tool import BROWSER_TOOL_SCHEMAS
+
+        schema = next(s for s in BROWSER_TOOL_SCHEMAS if s["name"] == "browser_vision")
+        props = schema["parameters"]["properties"]
+        assert "annotate" in props
+        assert props["annotate"]["type"] == "boolean"
+
+    def test_annotate_false_no_flag(self):
+        """Without annotate, screenshot command has no --annotate flag."""
+        from tools.browser_tool import browser_vision
+
+        with (
+            patch("tools.browser_tool._run_browser_command") as mock_cmd,
+            patch("tools.browser_tool._aux_vision_client") as mock_client,
+            patch("tools.browser_tool._DEFAULT_VISION_MODEL", "test-model"),
+            patch("tools.browser_tool._get_vision_model", return_value="test-model"),
+        ):
+            mock_cmd.return_value = {"success": True, "data": {}}
+            # Will fail at screenshot file read, but we can check the command
+            try:
+                browser_vision("test", annotate=False, task_id="test")
+            except Exception:
+                pass
+
+            if mock_cmd.called:
+                args = mock_cmd.call_args[0]
+                cmd_args = args[2] if len(args) > 2 else []
+                assert "--annotate" not in cmd_args
+
+    def test_annotate_true_adds_flag(self):
+        """With annotate=True, screenshot command includes --annotate."""
+        from tools.browser_tool import browser_vision
+
+        with (
+            patch("tools.browser_tool._run_browser_command") as mock_cmd,
+            patch("tools.browser_tool._aux_vision_client") as mock_client,
+            patch("tools.browser_tool._DEFAULT_VISION_MODEL", "test-model"),
+            patch("tools.browser_tool._get_vision_model", return_value="test-model"),
+        ):
+            mock_cmd.return_value = {"success": True, "data": {}}
+            try:
+                browser_vision("test", annotate=True, task_id="test")
+            except Exception:
+                pass
+
+            if mock_cmd.called:
+                args = mock_cmd.call_args[0]
+                cmd_args = args[2] if len(args) > 2 else []
+                assert "--annotate" in cmd_args
+
+
+# ── auto-recording config ────────────────────────────────────────────
+
+
+class TestRecordSessionsConfig:
+    """browser.record_sessions config option."""
+
+    def test_default_config_has_record_sessions(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+
+        browser_cfg = DEFAULT_CONFIG.get("browser", {})
+        assert "record_sessions" in browser_cfg
+        assert browser_cfg["record_sessions"] is False
+
+    def test_maybe_start_recording_disabled(self):
+        """Recording doesn't start when config says record_sessions: false."""
+        from tools.browser_tool import _maybe_start_recording, _recording_sessions
+
+        with (
+            patch("tools.browser_tool._run_browser_command") as mock_cmd,
+            patch("builtins.open", side_effect=FileNotFoundError),
+        ):
+            _maybe_start_recording("test-task")
+
+        mock_cmd.assert_not_called()
+        assert "test-task" not in _recording_sessions
+
+    def test_maybe_stop_recording_noop_when_not_recording(self):
+        """Stopping when not recording is a no-op."""
+        from tools.browser_tool import _maybe_stop_recording, _recording_sessions
+
+        _recording_sessions.discard("test-task")  # ensure not in set
+        with patch("tools.browser_tool._run_browser_command") as mock_cmd:
+            _maybe_stop_recording("test-task")
+
+        mock_cmd.assert_not_called()
+
+
+# ── dogfood skill files ──────────────────────────────────────────────
+
+
+class TestDogfoodSkill:
+    """Dogfood skill files exist and have correct structure."""
+
+    @pytest.fixture(autouse=True)
+    def _skill_dir(self):
+        # Use the actual repo skills dir (not temp)
+        self.skill_dir = os.path.join(
+            os.path.dirname(__file__), "..", "..", "skills", "dogfood"
+        )
+
+    def test_skill_md_exists(self):
+        assert os.path.exists(os.path.join(self.skill_dir, "SKILL.md"))
+
+    def test_taxonomy_exists(self):
+        assert os.path.exists(
+            os.path.join(self.skill_dir, "references", "issue-taxonomy.md")
+        )
+
+    def test_report_template_exists(self):
+        assert os.path.exists(
+            os.path.join(self.skill_dir, "templates", "dogfood-report-template.md")
+        )
+
+    def test_skill_md_has_frontmatter(self):
+        with open(os.path.join(self.skill_dir, "SKILL.md")) as f:
+            content = f.read()
+        assert content.startswith("---")
+        assert "name: dogfood" in content
+        assert "description:" in content
+
+    def test_skill_references_browser_console(self):
+        with open(os.path.join(self.skill_dir, "SKILL.md")) as f:
+            content = f.read()
+        assert "browser_console" in content
+
+    def test_skill_references_annotate(self):
+        with open(os.path.join(self.skill_dir, "SKILL.md")) as f:
+            content = f.read()
+        assert "annotate" in content
+
+    def test_taxonomy_has_severity_levels(self):
+        with open(
+            os.path.join(self.skill_dir, "references", "issue-taxonomy.md")
+        ) as f:
+            content = f.read()
+        assert "Critical" in content
+        assert "High" in content
+        assert "Medium" in content
+        assert "Low" in content
+
+    def test_taxonomy_has_categories(self):
+        with open(
+            os.path.join(self.skill_dir, "references", "issue-taxonomy.md")
+        ) as f:
+            content = f.read()
+        assert "Functional" in content
+        assert "Visual" in content
+        assert "Accessibility" in content
+        assert "Console" in content
--- a/tests/tools/test_clipboard.py
+++ b/tests/tools/test_clipboard.py
@@ -550,14 +550,13 @@ class TestConvertToPng:
        """BMP file should still be reported as success if no converter available."""
        dest = tmp_path / "img.png"
        dest.write_bytes(FAKE_BMP)  # it's a BMP but named .png
-        # Both Pillow and ImageMagick fail
-        with patch("hermes_cli.clipboard.subprocess.run", side_effect=FileNotFoundError):
-            # Pillow import fails
-            with pytest.raises(Exception):
-                from PIL import Image  # noqa — this may or may not work
-            # The function should still return True if file exists and has content
-            # (raw BMP is better than nothing)
-            assert dest.exists() and dest.stat().st_size > 0
+        # Both Pillow and ImageMagick unavailable
+        with patch.dict(sys.modules, {"PIL": None, "PIL.Image": None}):
+            with patch("hermes_cli.clipboard.subprocess.run", side_effect=FileNotFoundError):
+                result = _convert_to_png(dest)
+                # Raw BMP is better than nothing — function should return True
+                assert result is True
+                assert dest.exists() and dest.stat().st_size > 0


 # ── has_clipboard_image dispatch ─────────────────────────────────────────
@@ -602,11 +601,11 @@ class TestHasClipboardImage:


 # ═════════════════════════════════════════════════════════════════════════
-# Level 2: _build_multimodal_content — image → OpenAI vision format
+# Level 2: _preprocess_images_with_vision — image → text via vision tool
 # ═════════════════════════════════════════════════════════════════════════

-class TestBuildMultimodalContent:
-    """Test the extracted _build_multimodal_content method directly."""
+class TestPreprocessImagesWithVision:
+    """Test vision-based image pre-processing for the CLI."""

    @pytest.fixture
    def cli(self):
@@ -637,55 +636,81 @@ class TestBuildMultimodalContent:
        img.write_bytes(content)
        return img

+    def _mock_vision_success(self, description="A test image with colored pixels."):
+        """Return an async mock that simulates a successful vision_analyze_tool call."""
+        import json
+        async def _fake_vision(**kwargs):
+            return json.dumps({"success": True, "analysis": description})
+        return _fake_vision
+
+    def _mock_vision_failure(self):
+        """Return an async mock that simulates a failed vision_analyze_tool call."""
+        import json
+        async def _fake_vision(**kwargs):
+            return json.dumps({"success": False, "analysis": "Error"})
+        return _fake_vision
+
    def test_single_image_with_text(self, cli, tmp_path):
        img = self._make_image(tmp_path)
-        result = cli._build_multimodal_content("Describe this", [img])
+        with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()):
+            result = cli._preprocess_images_with_vision("Describe this", [img])

-        assert len(result) == 2
-        assert result[0] == {"type": "text", "text": "Describe this"}
-        assert result[1]["type"] == "image_url"
-        url = result[1]["image_url"]["url"]
-        assert url.startswith("data:image/png;base64,")
-        # Verify the base64 actually decodes to our image
-        b64_data = url.split(",", 1)[1]
-        assert base64.b64decode(b64_data) == FAKE_PNG
+        assert isinstance(result, str)
+        assert "A test image with colored pixels." in result
+        assert "Describe this" in result
+        assert str(img) in result
+        assert "base64," not in result  # no raw base64 image content

    def test_multiple_images(self, cli, tmp_path):
        imgs = [self._make_image(tmp_path, f"img{i}.png") for i in range(3)]
-        result = cli._build_multimodal_content("Compare", imgs)
-        assert len(result) == 4  # 1 text + 3 images
-        assert all(r["type"] == "image_url" for r in result[1:])
+        with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()):
+            result = cli._preprocess_images_with_vision("Compare", imgs)
+
+        assert isinstance(result, str)
+        assert "Compare" in result
+        # Each image path should be referenced
+        for img in imgs:
+            assert str(img) in result

    def test_empty_text_gets_default_question(self, cli, tmp_path):
        img = self._make_image(tmp_path)
-        result = cli._build_multimodal_content("", [img])
-        assert result[0]["text"] == "What do you see in this image?"
-
-    def test_jpeg_mime_type(self, cli, tmp_path):
-        img = self._make_image(tmp_path, "photo.jpg", b"\xff\xd8\xff\x00" * 20)
-        result = cli._build_multimodal_content("test", [img])
-        assert "image/jpeg" in result[1]["image_url"]["url"]
-
-    def test_webp_mime_type(self, cli, tmp_path):
-        img = self._make_image(tmp_path, "img.webp", b"RIFF\x00\x00" * 10)
-        result = cli._build_multimodal_content("test", [img])
-        assert "image/webp" in result[1]["image_url"]["url"]
-
-    def test_unknown_extension_defaults_to_png(self, cli, tmp_path):
-        img = self._make_image(tmp_path, "data.bmp", b"\x00" * 50)
-        result = cli._build_multimodal_content("test", [img])
-        assert "image/png" in result[1]["image_url"]["url"]
+        with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()):
+            result = cli._preprocess_images_with_vision("", [img])
+        assert isinstance(result, str)
+        assert "A test image with colored pixels." in result

    def test_missing_image_skipped(self, cli, tmp_path):
        missing = tmp_path / "gone.png"
-        result = cli._build_multimodal_content("test", [missing])
-        assert len(result) == 1  # only text
+        with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()):
+            result = cli._preprocess_images_with_vision("test", [missing])
+        # No images analyzed, falls back to default
+        assert result == "test"

    def test_mix_of_existing_and_missing(self, cli, tmp_path):
        real = self._make_image(tmp_path, "real.png")
        missing = tmp_path / "gone.png"
-        result = cli._build_multimodal_content("test", [real, missing])
-        assert len(result) == 2  # text + 1 real image
+        with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()):
+            result = cli._preprocess_images_with_vision("test", [real, missing])
+        assert str(real) in result
+        assert str(missing) not in result
+        assert "test" in result
+
+    def test_vision_failure_includes_path(self, cli, tmp_path):
+        img = self._make_image(tmp_path)
+        with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_failure()):
+            result = cli._preprocess_images_with_vision("check this", [img])
+        assert isinstance(result, str)
+        assert str(img) in result  # path still included for retry
+        assert "check this" in result
+
+    def test_vision_exception_includes_path(self, cli, tmp_path):
+        img = self._make_image(tmp_path)
+        async def _explode(**kwargs):
+            raise RuntimeError("API down")
+        with patch("tools.vision_tools.vision_analyze_tool", side_effect=_explode):
+            result = cli._preprocess_images_with_vision("check this", [img])
+        assert isinstance(result, str)
+        assert str(img) in result  # path still included for retry


 # ═════════════════════════════════════════════════════════════════════════
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -393,5 +393,56 @@ class TestStubSchemaDrift(unittest.TestCase):
        self.assertIn("mode", src)


+class TestHeadTailTruncation(unittest.TestCase):
+    """Tests for head+tail truncation of large stdout in execute_code."""
+
+    def _run(self, code):
+        with patch("model_tools.handle_function_call", side_effect=_mock_handle_function_call):
+            result = execute_code(
+                code=code,
+                task_id="test-task",
+                enabled_tools=list(SANDBOX_ALLOWED_TOOLS),
+            )
+        return json.loads(result)
+
+    def test_short_output_not_truncated(self):
+        """Output under MAX_STDOUT_BYTES should not be truncated."""
+        result = self._run('print("small output")')
+        self.assertEqual(result["status"], "success")
+        self.assertIn("small output", result["output"])
+        self.assertNotIn("TRUNCATED", result["output"])
+
+    def test_large_output_preserves_head_and_tail(self):
+        """Output exceeding MAX_STDOUT_BYTES keeps both head and tail."""
+        code = '''
+# Print HEAD marker, then filler, then TAIL marker
+print("HEAD_MARKER_START")
+for i in range(15000):
+    print(f"filler_line_{i:06d}_padding_to_fill_buffer")
+print("TAIL_MARKER_END")
+'''
+        result = self._run(code)
+        self.assertEqual(result["status"], "success")
+        output = result["output"]
+        # Head should be preserved
+        self.assertIn("HEAD_MARKER_START", output)
+        # Tail should be preserved (this is the key improvement)
+        self.assertIn("TAIL_MARKER_END", output)
+        # Truncation notice should be present
+        self.assertIn("TRUNCATED", output)
+
+    def test_truncation_notice_format(self):
+        """Truncation notice includes character counts."""
+        code = '''
+for i in range(15000):
+    print(f"padding_line_{i:06d}_xxxxxxxxxxxxxxxxxxxxxxxxxx")
+'''
+        result = self._run(code)
+        output = result["output"]
+        if "TRUNCATED" in output:
+            self.assertIn("chars omitted", output)
+            self.assertIn("total", output)
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -56,7 +56,6 @@ class TestDelegateRequirements(unittest.TestCase):
        self.assertIn("tasks", props)
        self.assertIn("context", props)
        self.assertIn("toolsets", props)
-        self.assertIn("model", props)
        self.assertIn("max_iterations", props)
        self.assertEqual(props["tasks"]["maxItems"], 3)

--- a/tests/tools/test_file_operations.py
+++ b/tests/tools/test_file_operations.py
@@ -259,6 +259,70 @@ class TestShellFileOpsHelpers:
        assert ops.cwd == "/"


+class TestSearchPathValidation:
+    """Test that search() returns an error for non-existent paths."""
+
+    def test_search_nonexistent_path_returns_error(self, mock_env):
+        """search() should return an error when the path doesn't exist."""
+        def side_effect(command, **kwargs):
+            if "test -e" in command:
+                return {"output": "not_found", "returncode": 1}
+            if "command -v" in command:
+                return {"output": "yes", "returncode": 0}
+            return {"output": "", "returncode": 0}
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.search("pattern", path="/nonexistent/path")
+        assert result.error is not None
+        assert "not found" in result.error.lower() or "Path not found" in result.error
+
+    def test_search_nonexistent_path_files_mode(self, mock_env):
+        """search(target='files') should also return error for bad paths."""
+        def side_effect(command, **kwargs):
+            if "test -e" in command:
+                return {"output": "not_found", "returncode": 1}
+            if "command -v" in command:
+                return {"output": "yes", "returncode": 0}
+            return {"output": "", "returncode": 0}
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.search("*.py", path="/nonexistent/path", target="files")
+        assert result.error is not None
+        assert "not found" in result.error.lower() or "Path not found" in result.error
+
+    def test_search_existing_path_proceeds(self, mock_env):
+        """search() should proceed normally when the path exists."""
+        def side_effect(command, **kwargs):
+            if "test -e" in command:
+                return {"output": "exists", "returncode": 0}
+            if "command -v" in command:
+                return {"output": "yes", "returncode": 0}
+            # rg returns exit 1 (no matches) with empty output
+            return {"output": "", "returncode": 1}
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.search("pattern", path="/existing/path")
+        assert result.error is None
+        assert result.total_count == 0  # No matches but no error
+
+    def test_search_rg_error_exit_code(self, mock_env):
+        """search() should report error when rg returns exit code 2."""
+        call_count = {"n": 0}
+        def side_effect(command, **kwargs):
+            call_count["n"] += 1
+            if "test -e" in command:
+                return {"output": "exists", "returncode": 0}
+            if "command -v" in command:
+                return {"output": "yes", "returncode": 0}
+            # rg returns exit 2 (error) with empty output
+            return {"output": "", "returncode": 2}
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.search("pattern", path="/some/path")
+        assert result.error is not None
+        assert "search failed" in result.error.lower() or "Search error" in result.error
+
+
 class TestShellFileOpsWriteDenied:
    def test_write_file_denied_path(self, file_ops):
        result = file_ops.write_file("~/.ssh/authorized_keys", "evil key")
--- a/tests/tools/test_file_tools.py
+++ b/tests/tools/test_file_tools.py
@@ -38,6 +38,7 @@ class TestReadFileHandler:
    def test_returns_file_content(self, mock_get):
        mock_ops = MagicMock()
        result_obj = MagicMock()
+        result_obj.content = "line1\nline2"
        result_obj.to_dict.return_value = {"content": "line1\nline2", "total_lines": 2}
        mock_ops.read_file.return_value = result_obj
        mock_get.return_value = mock_ops
@@ -52,6 +53,7 @@ class TestReadFileHandler:
    def test_custom_offset_and_limit(self, mock_get):
        mock_ops = MagicMock()
        result_obj = MagicMock()
+        result_obj.content = "line10"
        result_obj.to_dict.return_value = {"content": "line10", "total_lines": 50}
        mock_ops.read_file.return_value = result_obj
        mock_get.return_value = mock_ops
@@ -200,3 +202,91 @@ class TestSearchHandler:
        from tools.file_tools import search_tool
        result = json.loads(search_tool(pattern="x"))
        assert "error" in result
+
+
+# ---------------------------------------------------------------------------
+# Tool result hint tests (#722)
+# ---------------------------------------------------------------------------
+
+class TestPatchHints:
+    """Patch tool should hint when old_string is not found."""
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_no_match_includes_hint(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.to_dict.return_value = {
+            "error": "Could not find match for old_string in foo.py"
+        }
+        mock_ops.patch_replace.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import patch_tool
+        raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y")
+        assert "[Hint:" in raw
+        assert "read_file" in raw
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_success_no_hint(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.to_dict.return_value = {"success": True, "diff": "--- a\n+++ b"}
+        mock_ops.patch_replace.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import patch_tool
+        raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y")
+        assert "[Hint:" not in raw
+
+
+class TestSearchHints:
+    """Search tool should hint when results are truncated."""
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_truncated_results_hint(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.to_dict.return_value = {
+            "total_count": 100,
+            "matches": [{"path": "a.py", "line": 1, "content": "x"}] * 50,
+            "truncated": True,
+        }
+        mock_ops.search.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import search_tool
+        raw = search_tool(pattern="foo", offset=0, limit=50)
+        assert "[Hint:" in raw
+        assert "offset=50" in raw
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_non_truncated_no_hint(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.to_dict.return_value = {
+            "total_count": 3,
+            "matches": [{"path": "a.py", "line": 1, "content": "x"}] * 3,
+        }
+        mock_ops.search.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import search_tool
+        raw = search_tool(pattern="foo")
+        assert "[Hint:" not in raw
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_truncated_hint_with_nonzero_offset(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.to_dict.return_value = {
+            "total_count": 150,
+            "matches": [{"path": "a.py", "line": 1, "content": "x"}] * 50,
+            "truncated": True,
+        }
+        mock_ops.search.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import search_tool
+        raw = search_tool(pattern="foo", offset=50, limit=50)
+        assert "[Hint:" in raw
+        assert "offset=100" in raw
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -63,7 +63,7 @@ import time
 import requests
 from typing import Dict, Any, Optional, List
 from pathlib import Path
-from agent.auxiliary_client import get_vision_auxiliary_client
+from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client

 logger = logging.getLogger(__name__)

@@ -80,8 +80,38 @@ DEFAULT_SESSION_TIMEOUT = 300
 # Max tokens for snapshot content before summarization
 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000

-# Resolve vision auxiliary client for extraction/vision tasks
-_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()
+# Vision client — for browser_vision (screenshot analysis)
+# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire
+# browser_tool module from importing (which would disable all 10 browser tools).
+try:
+    _aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
+except Exception as _init_err:
+    logger.debug("Could not initialise vision auxiliary client: %s", _init_err)
+    _aux_vision_client, _DEFAULT_VISION_MODEL = None, None
+
+# Text client — for page snapshot summarization (same config as web_extract)
+try:
+    _aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract")
+except Exception as _init_err:
+    logger.debug("Could not initialise text auxiliary client: %s", _init_err)
+    _aux_text_client, _DEFAULT_TEXT_MODEL = None, None
+
+# Module-level alias for availability checks
+EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL
+
+
+def _get_vision_model() -> str:
+    """Model for browser_vision (screenshot analysis — multimodal)."""
+    return (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
+            or _DEFAULT_VISION_MODEL
+            or "google/gemini-3-flash-preview")
+
+
+def _get_extraction_model() -> str:
+    """Model for page snapshot text summarization — same as web_extract."""
+    return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
+            or _DEFAULT_TEXT_MODEL
+            or "google/gemini-3-flash-preview")


 def _is_local_mode() -> bool:
@@ -94,9 +124,27 @@ def _is_local_mode() -> bool:
    return not (os.environ.get("BROWSERBASE_API_KEY") and os.environ.get("BROWSERBASE_PROJECT_ID"))


+def _socket_safe_tmpdir() -> str:
+    """Return a short temp directory path suitable for Unix domain sockets.
+
+    macOS sets ``TMPDIR`` to ``/var/folders/xx/.../T/`` (~51 chars).  When we
+    append ``agent-browser-hermes_…`` the resulting socket path exceeds the
+    104-byte macOS limit for ``AF_UNIX`` addresses, causing agent-browser to
+    fail with "Failed to create socket directory" or silent screenshot failures.
+
+    Linux ``tempfile.gettempdir()`` already returns ``/tmp``, so this is a
+    no-op there.  On macOS we bypass ``TMPDIR`` and use ``/tmp`` directly
+    (symlink to ``/private/tmp``, sticky-bit protected, always available).
+    """
+    if sys.platform == "darwin":
+        return "/tmp"
+    return tempfile.gettempdir()
+
+
 # Track active sessions per task
 # Stores: session_name (always), bb_session_id + cdp_url (cloud mode only)
 _active_sessions: Dict[str, Dict[str, str]] = {}  # task_id -> {session_name, ...}
+_recording_sessions: set = set()  # task_ids with active recordings

 # Flag to track if cleanup has been done
 _cleanup_done = False
@@ -145,7 +193,7 @@ def _emergency_cleanup_all_sessions():
                    try:
                        browser_cmd = _find_agent_browser()
                        task_socket_dir = os.path.join(
-                            tempfile.gettempdir(),
+                            _socket_safe_tmpdir(),
                            f"agent-browser-{session_name}"
                        )
                        env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
@@ -424,18 +472,38 @@ BROWSER_TOOL_SCHEMAS = [
    },
    {
        "name": "browser_vision",
-        "description": "Take a screenshot of the current page and analyze it with vision AI. Use this when you need to visually understand what's on the page - especially useful for CAPTCHAs, visual verification challenges, complex layouts, or when the text snapshot doesn't capture important visual information. Requires browser_navigate to be called first.",
+        "description": "Take a screenshot of the current page and analyze it with vision AI. Use this when you need to visually understand what's on the page - especially useful for CAPTCHAs, visual verification challenges, complex layouts, or when the text snapshot doesn't capture important visual information. Returns both the AI analysis and a screenshot_path that you can share with the user by including MEDIA:<screenshot_path> in your response. Requires browser_navigate to be called first.",
        "parameters": {
            "type": "object",
            "properties": {
                "question": {
                    "type": "string",
                    "description": "What you want to know about the page visually. Be specific about what you're looking for."
+                },
+                "annotate": {
+                    "type": "boolean",
+                    "default": False,
+                    "description": "If true, overlay numbered [N] labels on interactive elements. Each [N] maps to ref @eN for subsequent browser commands. Useful for QA and spatial reasoning about page layout."
                }
            },
            "required": ["question"]
        }
    },
+    {
+        "name": "browser_console",
+        "description": "Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requires browser_navigate to be called first.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "clear": {
+                    "type": "boolean",
+                    "default": False,
+                    "description": "If true, clear the message buffers after reading"
+                }
+            },
+            "required": []
+        }
+    },
 ]


@@ -755,6 +823,7 @@ def _run_browser_command(
    try:
        browser_cmd = _find_agent_browser()
    except FileNotFoundError as e:
+        logger.warning("agent-browser CLI not found: %s", e)
        return {"success": False, "error": str(e)}
    
    from tools.interrupt import is_interrupted
@@ -765,6 +834,7 @@ def _run_browser_command(
    try:
        session_info = _get_session_info(task_id)
    except Exception as e:
+        logger.warning("Failed to create browser session for task=%s: %s", task_id, e)
        return {"success": False, "error": f"Failed to create browser session: {str(e)}"}
    
    # Build the command with the appropriate backend flag.
@@ -790,15 +860,19 @@ def _run_browser_command(
        # Without this, parallel workers fight over the same default socket path,
        # causing "Failed to create socket directory: Permission denied" errors.
        task_socket_dir = os.path.join(
-            tempfile.gettempdir(), 
+            _socket_safe_tmpdir(),
            f"agent-browser-{session_info['session_name']}"
        )
-        os.makedirs(task_socket_dir, exist_ok=True)
+        os.makedirs(task_socket_dir, mode=0o700, exist_ok=True)
+        logger.debug("browser cmd=%s task=%s socket_dir=%s (%d chars)",
+                     command, task_id, task_socket_dir, len(task_socket_dir))
        
-        browser_env = {
-            **os.environ,
-            "AGENT_BROWSER_SOCKET_DIR": task_socket_dir,
-        }
+        browser_env = {**os.environ}
+        # Ensure PATH includes standard dirs (systemd services may have minimal PATH)
+        _SANE_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+        if "/usr/bin" not in browser_env.get("PATH", "").split(":"):
+            browser_env["PATH"] = f"{browser_env.get('PATH', '')}:{_SANE_PATH}"
+        browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir
        
        result = subprocess.run(
            cmd_parts,
@@ -808,10 +882,18 @@ def _run_browser_command(
            env=browser_env,
        )
        
-        # Log stderr for diagnostics (agent-browser may emit warnings there)
+        # Log stderr for diagnostics — use warning level on failure so it's visible
        if result.stderr and result.stderr.strip():
-            logger.debug("stderr from '%s': %s", command, result.stderr.strip()[:200])
+            level = logging.WARNING if result.returncode != 0 else logging.DEBUG
+            logger.log(level, "browser '%s' stderr: %s", command, result.stderr.strip()[:500])
        
+        # Log empty output as warning — common sign of broken agent-browser
+        if not result.stdout.strip() and result.returncode == 0:
+            logger.warning("browser '%s' returned empty stdout with rc=0. "
+                           "cmd=%s stderr=%s",
+                           command, " ".join(cmd_parts[:4]) + "...",
+                           (result.stderr or "")[:200])
+
        # Parse JSON output
        if result.stdout.strip():
            try:
@@ -825,22 +907,29 @@ def _run_browser_command(
                                       "returncode=%s", result.returncode)
                return parsed
            except json.JSONDecodeError:
-                # If not valid JSON, return as raw output
+                # Non-JSON output indicates agent-browser crash or version mismatch
+                raw = result.stdout.strip()[:500]
+                logger.warning("browser '%s' returned non-JSON output (rc=%s): %s",
+                               command, result.returncode, raw[:200])
                return {
                    "success": True,
-                    "data": {"raw": result.stdout.strip()}
+                    "data": {"raw": raw}
                }
        
        # Check for errors
        if result.returncode != 0:
            error_msg = result.stderr.strip() if result.stderr else f"Command failed with code {result.returncode}"
+            logger.warning("browser '%s' failed (rc=%s): %s", command, result.returncode, error_msg[:300])
            return {"success": False, "error": error_msg}
        
        return {"success": True, "data": {}}
        
    except subprocess.TimeoutExpired:
+        logger.warning("browser '%s' timed out after %ds (task=%s, socket_dir=%s)",
+                       command, timeout, task_id, task_socket_dir)
        return {"success": False, "error": f"Command timed out after {timeout} seconds"}
    except Exception as e:
+        logger.warning("browser '%s' exception: %s", command, e, exc_info=True)
        return {"success": False, "error": str(e)}


@@ -850,9 +939,9 @@ def _extract_relevant_content(
 ) -> str:
    """Use LLM to extract relevant content from a snapshot based on the user's task.

-    Falls back to simple truncation when no auxiliary vision model is configured.
+    Falls back to simple truncation when no auxiliary text model is configured.
    """
-    if _aux_vision_client is None or EXTRACTION_MODEL is None:
+    if _aux_text_client is None:
        return _truncate_snapshot(snapshot_text)

    if user_task:
@@ -880,8 +969,8 @@ def _extract_relevant_content(

    try:
        from agent.auxiliary_client import auxiliary_max_tokens_param
-        response = _aux_vision_client.chat.completions.create(
-            model=EXTRACTION_MODEL,
+        response = _aux_text_client.chat.completions.create(
+            model=_get_extraction_model(),
            messages=[{"role": "user", "content": extraction_prompt}],
            **auxiliary_max_tokens_param(4000),
            temperature=0.1,
@@ -930,9 +1019,10 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
    session_info = _get_session_info(effective_task_id)
    is_first_nav = session_info.get("_first_nav", True)
    
-    # Mark that we've done at least one navigation
+    # Auto-start recording if configured and this is first navigation
    if is_first_nav:
        session_info["_first_nav"] = False
+        _maybe_start_recording(effective_task_id)
    
    result = _run_browser_command(effective_task_id, "open", [url], timeout=60)
    
@@ -1196,6 +1286,10 @@ def browser_close(task_id: Optional[str] = None) -> str:
        JSON string with close result
    """
    effective_task_id = task_id or "default"
+    
+    # Stop auto-recording before closing
+    _maybe_stop_recording(effective_task_id)
+    
    result = _run_browser_command(effective_task_id, "close", [])
    
    # Close the backend session (Browserbase API in cloud mode, nothing extra in local mode)
@@ -1226,6 +1320,103 @@ def browser_close(task_id: Optional[str] = None) -> str:
        }, ensure_ascii=False)


+def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
+    """Get browser console messages and JavaScript errors.
+    
+    Returns both console output (log/warn/error/info from the page's JS)
+    and uncaught exceptions (crashes, unhandled promise rejections).
+    
+    Args:
+        clear: If True, clear the message/error buffers after reading
+        task_id: Task identifier for session isolation
+        
+    Returns:
+        JSON string with console messages and JS errors
+    """
+    effective_task_id = task_id or "default"
+    
+    console_args = ["--clear"] if clear else []
+    error_args = ["--clear"] if clear else []
+    
+    console_result = _run_browser_command(effective_task_id, "console", console_args)
+    errors_result = _run_browser_command(effective_task_id, "errors", error_args)
+    
+    messages = []
+    if console_result.get("success"):
+        for msg in console_result.get("data", {}).get("messages", []):
+            messages.append({
+                "type": msg.get("type", "log"),
+                "text": msg.get("text", ""),
+                "source": "console",
+            })
+    
+    errors = []
+    if errors_result.get("success"):
+        for err in errors_result.get("data", {}).get("errors", []):
+            errors.append({
+                "message": err.get("message", ""),
+                "source": "exception",
+            })
+    
+    return json.dumps({
+        "success": True,
+        "console_messages": messages,
+        "js_errors": errors,
+        "total_messages": len(messages),
+        "total_errors": len(errors),
+    }, ensure_ascii=False)
+
+
+def _maybe_start_recording(task_id: str):
+    """Start recording if browser.record_sessions is enabled in config."""
+    if task_id in _recording_sessions:
+        return
+    try:
+        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+        config_path = hermes_home / "config.yaml"
+        record_enabled = False
+        if config_path.exists():
+            import yaml
+            with open(config_path) as f:
+                cfg = yaml.safe_load(f) or {}
+            record_enabled = cfg.get("browser", {}).get("record_sessions", False)
+        
+        if not record_enabled:
+            return
+        
+        recordings_dir = hermes_home / "browser_recordings"
+        recordings_dir.mkdir(parents=True, exist_ok=True)
+        _cleanup_old_recordings(max_age_hours=72)
+        
+        import time
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        recording_path = recordings_dir / f"session_{timestamp}_{task_id[:16]}.webm"
+        
+        result = _run_browser_command(task_id, "record", ["start", str(recording_path)])
+        if result.get("success"):
+            _recording_sessions.add(task_id)
+            logger.info("Auto-recording browser session %s to %s", task_id, recording_path)
+        else:
+            logger.debug("Could not start auto-recording: %s", result.get("error"))
+    except Exception as e:
+        logger.debug("Auto-recording setup failed: %s", e)
+
+
+def _maybe_stop_recording(task_id: str):
+    """Stop recording if one is active for this session."""
+    if task_id not in _recording_sessions:
+        return
+    try:
+        result = _run_browser_command(task_id, "record", ["stop"])
+        if result.get("success"):
+            path = result.get("data", {}).get("path", "")
+            logger.info("Saved browser recording for session %s: %s", task_id, path)
+    except Exception as e:
+        logger.debug("Could not stop recording for %s: %s", task_id, e)
+    finally:
+        _recording_sessions.discard(task_id)
+
+
 def browser_get_images(task_id: Optional[str] = None) -> str:
    """
    Get all images on the current page.
@@ -1280,7 +1471,7 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
        }, ensure_ascii=False)


-def browser_vision(question: str, task_id: Optional[str] = None) -> str:
+def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] = None) -> str:
    """
    Take a screenshot of the current page and analyze it with vision AI.
    
@@ -1289,52 +1480,72 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
    text-based snapshot may not capture (CAPTCHAs, verification challenges,
    images, complex layouts, etc.).
    
+    The screenshot is saved persistently and its file path is returned alongside
+    the analysis, so it can be shared with users via MEDIA:<path> in the response.
+    
    Args:
        question: What you want to know about the page visually
+        annotate: If True, overlay numbered [N] labels on interactive elements
        task_id: Task identifier for session isolation
        
    Returns:
-        JSON string with vision analysis results
+        JSON string with vision analysis results and screenshot_path
    """
    import base64
-    import tempfile
    import uuid as uuid_mod
    from pathlib import Path
    
    effective_task_id = task_id or "default"
    
    # Check auxiliary vision client
-    if _aux_vision_client is None or EXTRACTION_MODEL is None:
+    if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None:
        return json.dumps({
            "success": False,
            "error": "Browser vision unavailable: no auxiliary vision model configured. "
                     "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
        }, ensure_ascii=False)
    
-    # Create a temporary file for the screenshot
-    temp_dir = Path(tempfile.gettempdir())
-    screenshot_path = temp_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
+    # Save screenshot to persistent location so it can be shared with users
+    hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+    screenshots_dir = hermes_home / "browser_screenshots"
+    screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
    
    try:
+        screenshots_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Prune old screenshots (older than 24 hours) to prevent unbounded disk growth
+        _cleanup_old_screenshots(screenshots_dir, max_age_hours=24)
+        
        # Take screenshot using agent-browser
+        screenshot_args = [str(screenshot_path)]
+        if annotate:
+            screenshot_args.insert(0, "--annotate")
        result = _run_browser_command(
            effective_task_id, 
            "screenshot", 
-            [str(screenshot_path)],
+            screenshot_args,
            timeout=30
        )
        
        if not result.get("success"):
+            error_detail = result.get("error", "Unknown error")
+            mode = "local" if _is_local_mode() else "cloud"
            return json.dumps({
                "success": False,
-                "error": f"Failed to take screenshot: {result.get('error', 'Unknown error')}"
+                "error": f"Failed to take screenshot ({mode} mode): {error_detail}"
            }, ensure_ascii=False)
        
        # Check if screenshot file was created
        if not screenshot_path.exists():
+            mode = "local" if _is_local_mode() else "cloud"
            return json.dumps({
                "success": False,
-                "error": "Screenshot file was not created"
+                "error": (
+                    f"Screenshot file was not created at {screenshot_path} ({mode} mode). "
+                    f"This may indicate a socket path issue (macOS /var/folders/), "
+                    f"a missing Chromium install ('agent-browser install'), "
+                    f"or a stale daemon process."
+                ),
            }, ensure_ascii=False)
        
        # Read and convert to base64
@@ -1353,8 +1564,11 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:

        # Use the sync auxiliary vision client directly
        from agent.auxiliary_client import auxiliary_max_tokens_param
+        vision_model = _get_vision_model()
+        logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s",
+                     len(image_data), vision_model)
        response = _aux_vision_client.chat.completions.create(
-            model=EXTRACTION_MODEL,
+            model=vision_model,
            messages=[
                {
                    "role": "user",
@@ -1369,24 +1583,61 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
        )
        
        analysis = response.choices[0].message.content
-        return json.dumps({
+        response_data = {
            "success": True,
            "analysis": analysis,
-        }, ensure_ascii=False)
+            "screenshot_path": str(screenshot_path),
+        }
+        # Include annotation data if annotated screenshot was taken
+        if annotate and result.get("data", {}).get("annotations"):
+            response_data["annotations"] = result["data"]["annotations"]
+        return json.dumps(response_data, ensure_ascii=False)
    
    except Exception as e:
-        return json.dumps({
-            "success": False,
-            "error": f"Error during vision analysis: {str(e)}"
-        }, ensure_ascii=False)
-    
-    finally:
-        # Clean up screenshot file
+        # Keep the screenshot if it was captured successfully — the failure is
+        # in the LLM vision analysis, not the capture.  Deleting a valid
+        # screenshot loses evidence the user might need.  The 24-hour cleanup
+        # in _cleanup_old_screenshots prevents unbounded disk growth.
+        logger.warning("browser_vision failed: %s", e, exc_info=True)
+        error_info = {"success": False, "error": f"Error during vision analysis: {str(e)}"}
        if screenshot_path.exists():
+            error_info["screenshot_path"] = str(screenshot_path)
+            error_info["note"] = "Screenshot was captured but vision analysis failed. You can still share it via MEDIA:<path>."
+        return json.dumps(error_info, ensure_ascii=False)
+
+
+def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24):
+    """Remove browser screenshots older than max_age_hours to prevent disk bloat."""
+    import time
+    try:
+        cutoff = time.time() - (max_age_hours * 3600)
+        for f in screenshots_dir.glob("browser_screenshot_*.png"):
            try:
-                screenshot_path.unlink()
+                if f.stat().st_mtime < cutoff:
+                    f.unlink()
            except Exception:
                pass
+    except Exception:
+        pass  # Non-critical — don't fail the screenshot operation
+
+
+def _cleanup_old_recordings(max_age_hours=72):
+    """Remove browser recordings older than max_age_hours to prevent disk bloat."""
+    import time
+    try:
+        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+        recordings_dir = hermes_home / "browser_recordings"
+        if not recordings_dir.exists():
+            return
+        cutoff = time.time() - (max_age_hours * 3600)
+        for f in recordings_dir.glob("session_*.webm"):
+            try:
+                if f.stat().st_mtime < cutoff:
+                    f.unlink()
+            except Exception:
+                pass
+    except Exception:
+        pass


 # ============================================================================
@@ -1460,6 +1711,9 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
        bb_session_id = session_info.get("bb_session_id", "unknown")
        logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id)
        
+        # Stop auto-recording before closing (saves the file)
+        _maybe_stop_recording(task_id)
+        
        # Try to close via agent-browser first (needs session in _active_sessions)
        try:
            _run_browser_command(task_id, "close", [], timeout=10)
@@ -1485,7 +1739,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
        # Kill the daemon process and clean up socket directory
        session_name = session_info.get("session_name", "")
        if session_name:
-            socket_dir = os.path.join(tempfile.gettempdir(), f"agent-browser-{session_name}")
+            socket_dir = os.path.join(_socket_safe_tmpdir(), f"agent-browser-{session_name}")
            if os.path.exists(socket_dir):
                # agent-browser writes {session}.pid in the socket dir
                pid_file = os.path.join(socket_dir, f"{session_name}.pid")
@@ -1675,6 +1929,13 @@ registry.register(
    name="browser_vision",
    toolset="browser",
    schema=_BROWSER_SCHEMA_MAP["browser_vision"],
-    handler=lambda args, **kw: browser_vision(question=args.get("question", ""), task_id=kw.get("task_id")),
+    handler=lambda args, **kw: browser_vision(question=args.get("question", ""), annotate=args.get("annotate", False), task_id=kw.get("task_id")),
+    check_fn=check_browser_requirements,
+)
+registry.register(
+    name="browser_console",
+    toolset="browser",
+    schema=_BROWSER_SCHEMA_MAP["browser_console"],
+    handler=lambda args, **kw: browser_console(clear=args.get("clear", False), task_id=kw.get("task_id")),
    check_fn=check_browser_requirements,
 )
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -78,7 +78,7 @@ _TOOL_STUBS = {
    "web_extract": (
        "web_extract",
        "urls: list",
-        '"""Extract content from URLs. Returns dict with results list of {url, content, error}."""',
+        '"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""',
        '{"urls": urls}',
    ),
    "read_file": (
@@ -385,7 +385,11 @@ def execute_code(

    # --- Set up temp directory with hermes_tools.py and script.py ---
    tmpdir = tempfile.mkdtemp(prefix="hermes_sandbox_")
-    sock_path = os.path.join(tempfile.gettempdir(), f"hermes_rpc_{uuid.uuid4().hex}.sock")
+    # Use /tmp on macOS to avoid the long /var/folders/... path that pushes
+    # Unix domain socket paths past the 104-byte macOS AF_UNIX limit.
+    # On Linux, tempfile.gettempdir() already returns /tmp.
+    _sock_tmpdir = "/tmp" if sys.platform == "darwin" else tempfile.gettempdir()
+    sock_path = os.path.join(_sock_tmpdir, f"hermes_rpc_{uuid.uuid4().hex}.sock")

    tool_call_log: list = []
    tool_call_counter = [0]  # mutable so the RPC thread can increment
@@ -453,11 +457,17 @@ def execute_code(

        # --- Poll loop: watch for exit, timeout, and interrupt ---
        deadline = time.monotonic() + timeout
-        stdout_chunks: list = []
        stderr_chunks: list = []

-        # Background readers to avoid pipe buffer deadlocks
+        # Background readers to avoid pipe buffer deadlocks.
+        # For stdout we use a head+tail strategy: keep the first HEAD_BYTES
+        # and a rolling window of the last TAIL_BYTES so the final print()
+        # output is never lost.  Stderr keeps head-only (errors appear early).
+        _STDOUT_HEAD_BYTES = int(MAX_STDOUT_BYTES * 0.4)   # 40% head
+        _STDOUT_TAIL_BYTES = MAX_STDOUT_BYTES - _STDOUT_HEAD_BYTES  # 60% tail
+
        def _drain(pipe, chunks, max_bytes):
+            """Simple head-only drain (used for stderr)."""
            total = 0
            try:
                while True:
@@ -471,8 +481,48 @@ def execute_code(
            except (ValueError, OSError):
                pass

+        stdout_total_bytes = [0]  # mutable ref for total bytes seen
+
+        def _drain_head_tail(pipe, head_chunks, tail_chunks, head_bytes, tail_bytes, total_ref):
+            """Drain stdout keeping both head and tail data."""
+            head_collected = 0
+            from collections import deque
+            tail_buf = deque()
+            tail_collected = 0
+            try:
+                while True:
+                    data = pipe.read(4096)
+                    if not data:
+                        break
+                    total_ref[0] += len(data)
+                    # Fill head buffer first
+                    if head_collected < head_bytes:
+                        keep = min(len(data), head_bytes - head_collected)
+                        head_chunks.append(data[:keep])
+                        head_collected += keep
+                        data = data[keep:]  # remaining goes to tail
+                        if not data:
+                            continue
+                    # Everything past head goes into rolling tail buffer
+                    tail_buf.append(data)
+                    tail_collected += len(data)
+                    # Evict old tail data to stay within tail_bytes budget
+                    while tail_collected > tail_bytes and tail_buf:
+                        oldest = tail_buf.popleft()
+                        tail_collected -= len(oldest)
+            except (ValueError, OSError):
+                pass
+            # Transfer final tail to output list
+            tail_chunks.extend(tail_buf)
+
+        stdout_head_chunks: list = []
+        stdout_tail_chunks: list = []
+
        stdout_reader = threading.Thread(
-            target=_drain, args=(proc.stdout, stdout_chunks, MAX_STDOUT_BYTES), daemon=True
+            target=_drain_head_tail,
+            args=(proc.stdout, stdout_head_chunks, stdout_tail_chunks,
+                  _STDOUT_HEAD_BYTES, _STDOUT_TAIL_BYTES, stdout_total_bytes),
+            daemon=True
        )
        stderr_reader = threading.Thread(
            target=_drain, args=(proc.stderr, stderr_chunks, MAX_STDERR_BYTES), daemon=True
@@ -496,12 +546,21 @@ def execute_code(
        stdout_reader.join(timeout=3)
        stderr_reader.join(timeout=3)

-        stdout_text = b"".join(stdout_chunks).decode("utf-8", errors="replace")
+        stdout_head = b"".join(stdout_head_chunks).decode("utf-8", errors="replace")
+        stdout_tail = b"".join(stdout_tail_chunks).decode("utf-8", errors="replace")
        stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace")

-        # Truncation notice
-        if len(stdout_text) >= MAX_STDOUT_BYTES:
-            stdout_text = stdout_text[:MAX_STDOUT_BYTES] + "\n[output truncated at 50KB]"
+        # Assemble stdout with head+tail truncation
+        total_stdout = stdout_total_bytes[0]
+        if total_stdout > MAX_STDOUT_BYTES and stdout_tail:
+            omitted = total_stdout - len(stdout_head) - len(stdout_tail)
+            truncated_notice = (
+                f"\n\n... [OUTPUT TRUNCATED - {omitted:,} chars omitted "
+                f"out of {total_stdout:,} total] ...\n\n"
+            )
+            stdout_text = stdout_head + truncated_notice + stdout_tail
+        else:
+            stdout_text = stdout_head + stdout_tail

        exit_code = proc.returncode if proc.returncode is not None else -1
        duration = round(time.monotonic() - exec_start, 2)
@@ -605,7 +664,7 @@ _TOOL_DOC_LINES = [
     "    Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"),
    ("web_extract",
     "  web_extract(urls: list[str]) -> dict\n"
-     "    Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"),
+     "    Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"),
    ("read_file",
     "  read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n"
     "    Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"),
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -102,7 +102,9 @@ def schedule_cronjob(
                 - "local": Save to local files only (~/.hermes/cron/output/)
                 - "telegram": Send to Telegram home channel
                 - "discord": Send to Discord home channel
+                 - "signal": Send to Signal home channel
                 - "telegram:123456": Send to specific chat ID
+                 - "signal:+15551234567": Send to specific Signal number
    
    Returns:
        JSON with job_id, next_run time, and confirmation
@@ -216,7 +218,7 @@ Use for: reminders, periodic checks, scheduled reports, automated maintenance.""
            },
            "deliver": {
                "type": "string",
-                "description": "Where to send output: 'origin' (back to this chat), 'local' (files only), 'telegram', 'discord', or 'platform:chat_id'"
+                "description": "Where to send output: 'origin' (back to this chat), 'local' (files only), 'telegram', 'discord', 'signal', or 'platform:chat_id'"
            }
        },
        "required": ["prompt", "schedule"]
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -205,6 +205,9 @@ def _run_single_child(
            provider=getattr(parent_agent, "provider", None),
            api_mode=getattr(parent_agent, "api_mode", None),
            max_iterations=max_iterations,
+            max_tokens=getattr(parent_agent, "max_tokens", None),
+            reasoning_config=getattr(parent_agent, "reasoning_config", None),
+            prefill_messages=getattr(parent_agent, "prefill_messages", None),
            enabled_toolsets=child_toolsets,
            quiet_mode=True,
            ephemeral_system_prompt=child_prompt,
@@ -293,7 +296,6 @@ def delegate_task(
    context: Optional[str] = None,
    toolsets: Optional[List[str]] = None,
    tasks: Optional[List[Dict[str, Any]]] = None,
-    model: Optional[str] = None,
    max_iterations: Optional[int] = None,
    parent_agent=None,
 ) -> str:
@@ -355,7 +357,7 @@ def delegate_task(
            goal=t["goal"],
            context=t.get("context"),
            toolsets=t.get("toolsets") or toolsets,
-            model=model,
+            model=None,
            max_iterations=effective_max_iter,
            parent_agent=parent_agent,
            task_count=1,
@@ -380,7 +382,7 @@ def delegate_task(
                    goal=t["goal"],
                    context=t.get("context"),
                    toolsets=t.get("toolsets") or toolsets,
-                    model=model,
+                    model=None,
                    max_iterations=effective_max_iter,
                    parent_agent=parent_agent,
                    task_count=n_tasks,
@@ -533,13 +535,6 @@ DELEGATE_TASK_SCHEMA = {
                    "When provided, top-level goal/context/toolsets are ignored."
                ),
            },
-            "model": {
-                "type": "string",
-                "description": (
-                    "Model override for the subagent(s). Omit to use your "
-                    "same model. Use a cheaper/faster model for simple subtasks."
-                ),
-            },
            "max_iterations": {
                "type": "integer",
                "description": (
@@ -565,7 +560,6 @@ registry.register(
        context=args.get("context"),
        toolsets=args.get("toolsets"),
        tasks=args.get("tasks"),
-        model=args.get("model"),
        max_iterations=args.get("max_iterations"),
        parent_agent=kw.get("parent_agent")),
    check_fn=check_delegate_requirements,
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@@ -17,15 +17,21 @@ from tools.environments.base import BaseEnvironment
 _OUTPUT_FENCE = "__HERMES_FENCE_a9f7b3__"


-def _find_shell() -> str:
-    """Find the best shell for command execution.
+def _find_bash() -> str:
+    """Find bash for command execution.

-    On Unix: uses $SHELL, falls back to bash.
+    The fence wrapper uses bash syntax (semicolons, $?, printf), so we
+    must use bash — not the user's $SHELL which could be fish/zsh/etc.
    On Windows: uses Git Bash (bundled with Git for Windows).
-    Raises RuntimeError if no suitable shell is found on Windows.
    """
    if not _IS_WINDOWS:
-        return os.environ.get("SHELL") or shutil.which("bash") or "/bin/bash"
+        return (
+            shutil.which("bash")
+            or ("/usr/bin/bash" if os.path.isfile("/usr/bin/bash") else None)
+            or ("/bin/bash" if os.path.isfile("/bin/bash") else None)
+            or os.environ.get("SHELL")  # last resort: whatever they have
+            or "/bin/sh"
+        )

    # Windows: look for Git Bash (installed with Git for Windows).
    # Allow override via env var (same pattern as Claude Code).
@@ -53,6 +59,11 @@ def _find_shell() -> str:
        "Or set HERMES_GIT_BASH_PATH to your bash.exe location."
    )

+
+# Backward compat — process_registry.py imports this name
+_find_shell = _find_bash
+
+
 # Noise lines emitted by interactive shells when stdin is not a terminal.
 # Used as a fallback when output fence markers are missing.
 _SHELL_NOISE_SUBSTRINGS = (
@@ -153,13 +164,11 @@ class LocalEnvironment(BaseEnvironment):
        exec_command = self._prepare_command(command)

        try:
-            # Use the user's shell as an interactive login shell (-lic) so
-            # that ALL rc files are sourced — including content after the
-            # interactive guard in .bashrc (case $- in *i*)..esac) where
-            # tools like nvm, pyenv, and cargo install their init scripts.
-            # -l alone isn't enough: .profile sources .bashrc, but the guard
-            # returns early because the shell isn't interactive.
-            user_shell = _find_shell()
+            # The fence wrapper uses bash syntax (semicolons, $?, printf).
+            # Always use bash for the wrapper — NOT $SHELL which could be
+            # fish, zsh, or another shell with incompatible syntax.
+            # The -lic flags source rc files so tools like nvm/pyenv work.
+            user_shell = _find_bash()
            # Wrap with output fences so we can later extract the real
            # command output and discard shell init/exit noise.
            fenced_cmd = (
@@ -169,11 +178,19 @@ class LocalEnvironment(BaseEnvironment):
                f" printf '{_OUTPUT_FENCE}';"
                f" exit $__hermes_rc"
            )
+            # Ensure PATH always includes standard dirs — systemd services
+            # and some terminal multiplexers inherit a minimal PATH.
+            _SANE_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+            run_env = dict(os.environ | self.env)
+            existing_path = run_env.get("PATH", "")
+            if "/usr/bin" not in existing_path.split(":"):
+                run_env["PATH"] = f"{existing_path}:{_SANE_PATH}" if existing_path else _SANE_PATH
+
            proc = subprocess.Popen(
                [user_shell, "-lic", fenced_cmd],
                text=True,
                cwd=work_dir,
-                env=os.environ | self.env,
+                env=run_env,
                encoding="utf-8",
                errors="replace",
                stdout=subprocess.PIPE,
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -819,6 +819,14 @@ class ShellFileOperations(FileOperations):
        # Expand ~ and other shell paths
        path = self._expand_path(path)
        
+        # Validate that the path exists before searching
+        check = self._exec(f"test -e {self._escape_shell_arg(path)} && echo exists || echo not_found")
+        if "not_found" in check.stdout:
+            return SearchResult(
+                error=f"Path not found: {path}. Verify the path exists (use 'terminal' to check).",
+                total_count=0
+            )
+        
        if target == "files":
            return self._search_files(pattern, path, limit, offset)
        else:
@@ -919,6 +927,11 @@ class ShellFileOperations(FileOperations):
        cmd = " ".join(cmd_parts)
        result = self._exec(cmd, timeout=60)
        
+        # rg exit codes: 0=matches found, 1=no matches, 2=error
+        if result.exit_code == 2 and not result.stdout.strip():
+            error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error"
+            return SearchResult(error=f"Search failed: {error_msg}", total_count=0)
+        
        # Parse results based on output mode
        if output_mode == "files_only":
            all_files = [f for f in result.stdout.strip().split('\n') if f]
@@ -1013,6 +1026,11 @@ class ShellFileOperations(FileOperations):
        cmd = " ".join(cmd_parts)
        result = self._exec(cmd, timeout=60)
        
+        # grep exit codes: 0=matches found, 1=no matches, 2=error
+        if result.exit_code == 2 and not result.stdout.strip():
+            error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error"
+            return SearchResult(error=f"Search failed: {error_msg}", total_count=0)
+        
        if output_mode == "files_only":
            all_files = [f for f in result.stdout.strip().split('\n') if f]
            total = len(all_files)
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -7,6 +7,7 @@ import os
 import threading
 from typing import Optional
 from tools.file_operations import ShellFileOperations
+from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

@@ -128,6 +129,8 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
    try:
        file_ops = _get_file_ops(task_id)
        result = file_ops.read_file(path, offset, limit)
+        if result.content:
+            result.content = redact_sensitive_text(result.content)
        return json.dumps(result.to_dict(), ensure_ascii=False)
    except Exception as e:
        return json.dumps({"error": str(e)}, ensure_ascii=False)
@@ -164,7 +167,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
        else:
            return json.dumps({"error": f"Unknown mode: {mode}"})
        
-        return json.dumps(result.to_dict(), ensure_ascii=False)
+        result_dict = result.to_dict()
+        result_json = json.dumps(result_dict, ensure_ascii=False)
+        # Hint when old_string not found — saves iterations where the agent
+        # retries with stale content instead of re-reading the file.
+        if result_dict.get("error") and "Could not find" in str(result_dict["error"]):
+            result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
+        return result_json
    except Exception as e:
        return json.dumps({"error": str(e)}, ensure_ascii=False)

@@ -180,7 +189,18 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
            pattern=pattern, path=path, target=target, file_glob=file_glob,
            limit=limit, offset=offset, output_mode=output_mode, context=context
        )
-        return json.dumps(result.to_dict(), ensure_ascii=False)
+        if hasattr(result, 'matches'):
+            for m in result.matches:
+                if hasattr(m, 'content') and m.content:
+                    m.content = redact_sensitive_text(m.content)
+        result_dict = result.to_dict()
+        result_json = json.dumps(result_dict, ensure_ascii=False)
+        # Hint when results were truncated — explicit next offset is clearer
+        # than relying on the model to infer it from total_count vs match count.
+        if result_dict.get("truncated"):
+            next_offset = offset + limit
+            result_json += f"\n\n[Hint: Results truncated. Use offset={next_offset} to see more, or narrow with a more specific pattern or file_glob.]"
+        return result_json
    except Exception as e:
        return json.dumps({"error": str(e)}, ensure_ascii=False)

--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -31,7 +31,6 @@ Usage:
 import json
 import logging
 import os
-import asyncio
 import datetime
 from typing import Dict, Any, Optional, Union
 import fal_client
@@ -153,10 +152,13 @@ def _validate_parameters(
    return validated


-async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]:
+def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]:
    """
    Upscale an image using FAL.ai's Clarity Upscaler.
    
+    Uses the synchronous fal_client API to avoid event loop lifecycle issues
+    when called from threaded contexts (e.g. gateway thread pool).
+    
    Args:
        image_url (str): URL of the image to upscale
        original_prompt (str): Original prompt used to generate the image
@@ -180,14 +182,17 @@ async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]
            "enable_safety_checker": UPSCALER_SAFETY_CHECKER
        }
        
-        # Submit upscaler request
-        handler = await fal_client.submit_async(
+        # Use sync API — fal_client.submit() uses httpx.Client (no event loop).
+        # The async API (submit_async) caches a global httpx.AsyncClient via
+        # @cached_property, which breaks when asyncio.run() destroys the loop
+        # between calls (gateway thread-pool pattern).
+        handler = fal_client.submit(
            UPSCALER_MODEL,
            arguments=upscaler_arguments
        )
        
-        # Get the upscaled result
-        result = await handler.get()
+        # Get the upscaled result (sync — blocks until done)
+        result = handler.get()
        
        if result and "image" in result:
            upscaled_image = result["image"]
@@ -208,7 +213,7 @@ async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]
        return None


-async def image_generate_tool(
+def image_generate_tool(
    prompt: str,
    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
    num_inference_steps: int = DEFAULT_NUM_INFERENCE_STEPS,
@@ -220,10 +225,10 @@ async def image_generate_tool(
    """
    Generate images from text prompts using FAL.ai's FLUX 2 Pro model with automatic upscaling.
    
-    This tool uses FAL.ai's FLUX 2 Pro model for high-quality text-to-image generation 
-    with extensive customization options. Generated images are automatically upscaled 2x 
-    using FAL.ai's Clarity Upscaler for enhanced quality. The final upscaled images are 
-    returned as URLs that can be displayed using <img src="{URL}"></img> tags.
+    Uses the synchronous fal_client API to avoid event loop lifecycle issues.
+    The async API's global httpx.AsyncClient (cached via @cached_property) breaks
+    when asyncio.run() destroys and recreates event loops between calls, which
+    happens in the gateway's thread-pool pattern.
    
    Args:
        prompt (str): The text prompt describing the desired image
@@ -306,14 +311,14 @@ async def image_generate_tool(
        logger.info("  Steps: %s", validated_params['num_inference_steps'])
        logger.info("  Guidance: %s", validated_params['guidance_scale'])
        
-        # Submit request to FAL.ai
-        handler = await fal_client.submit_async(
+        # Submit request to FAL.ai using sync API (avoids cached event loop issues)
+        handler = fal_client.submit(
            DEFAULT_MODEL,
            arguments=arguments
        )
        
-        # Get the result
-        result = await handler.get()
+        # Get the result (sync — blocks until done)
+        result = handler.get()
        
        generation_time = (datetime.datetime.now() - start_time).total_seconds()
        
@@ -336,7 +341,7 @@ async def image_generate_tool(
                }
                
                # Attempt to upscale the image
-                upscaled_image = await _upscale_image(img["url"], prompt.strip())
+                upscaled_image = _upscale_image(img["url"], prompt.strip())
                
                if upscaled_image:
                    # Use upscaled image if successful
@@ -552,5 +557,5 @@ registry.register(
    handler=_handle_image_generate,
    check_fn=check_image_generation_requirements,
    requires_env=["FAL_KEY"],
-    is_async=True,
+    is_async=False,  # Switched to sync fal_client API to fix "Event loop is closed" in gateway
 )
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -8,6 +8,7 @@ human-friendly channel names to IDs. Works in both CLI and gateway contexts.
 import json
 import logging
 import os
+import time

 logger = logging.getLogger(__name__)

@@ -32,7 +33,7 @@ SEND_MESSAGE_SCHEMA = {
            },
            "target": {
                "type": "string",
-                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', or 'platform:chat_id'. Examples: 'telegram', 'discord:#bot-home', 'slack:#engineering'"
+                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', or 'platform:chat_id'. Examples: 'telegram', 'discord:#bot-home', 'slack:#engineering', 'signal:+15551234567'"
            },
            "message": {
                "type": "string",
@@ -107,6 +108,7 @@ def _handle_send(args):
        "discord": Platform.DISCORD,
        "slack": Platform.SLACK,
        "whatsapp": Platform.WHATSAPP,
+        "signal": Platform.SIGNAL,
    }
    platform = platform_map.get(platform_name)
    if not platform:
@@ -160,6 +162,8 @@ async def _send_to_platform(platform, pconfig, chat_id, message):
        return await _send_discord(pconfig.token, chat_id, message)
    elif platform == Platform.SLACK:
        return await _send_slack(pconfig.token, chat_id, message)
+    elif platform == Platform.SIGNAL:
+        return await _send_signal(pconfig.extra, chat_id, message)
    return {"error": f"Direct sending not yet implemented for {platform.value}"}


@@ -219,6 +223,42 @@ async def _send_slack(token, chat_id, message):
        return {"error": f"Slack send failed: {e}"}


+async def _send_signal(extra, chat_id, message):
+    """Send via signal-cli JSON-RPC API."""
+    try:
+        import httpx
+    except ImportError:
+        return {"error": "httpx not installed"}
+    try:
+        http_url = extra.get("http_url", "http://127.0.0.1:8080").rstrip("/")
+        account = extra.get("account", "")
+        if not account:
+            return {"error": "Signal account not configured"}
+
+        params = {"account": account, "message": message}
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        payload = {
+            "jsonrpc": "2.0",
+            "method": "send",
+            "params": params,
+            "id": f"send_{int(time.time() * 1000)}",
+        }
+
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            resp = await client.post(f"{http_url}/api/v1/rpc", json=payload)
+            resp.raise_for_status()
+            data = resp.json()
+            if "error" in data:
+                return {"error": f"Signal RPC error: {data['error']}"}
+            return {"success": True, "platform": "signal", "chat_id": chat_id}
+    except Exception as e:
+        return {"error": f"Signal send failed: {e}"}
+
+
 def _check_send_message():
    """Gate send_message on gateway running (always available on messaging platforms)."""
    platform = os.getenv("HERMES_SESSION_PLATFORM", "")
--- a/tools/skills_guard.py
+++ b/tools/skills_guard.py
@@ -946,6 +946,11 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
        client = OpenAI(
            base_url=OPENROUTER_BASE_URL,
            api_key=api_key,
+            default_headers={
+                "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
+                "X-OpenRouter-Title": "Hermes Agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
+            },
        )
        response = client.chat.completions.create(
            model=model,
--- a/Show More
+++ b/Show More